neon.isa (13978:896f9f7a1d16) neon.isa (13979:1e0c4607ac12)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <class BaseS, class BaseD>
62 StaticInstPtr
63 decodeNeonSizeSingleDouble(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 2:
69 return new BaseS(machInst, dest, op1, op2);
70 case 3:
71 return new BaseD(machInst, dest, op1, op2);
72 default:
73 return new Unknown(machInst);
74 }
75 }
76
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133 IntRegIndex dest, IntRegIndex op1,
134 IntRegIndex op2)
135 {
136 switch (size) {
137 case 1:
138 return new Base<int16_t>(machInst, dest, op1, op2);
139 case 2:
140 return new Base<int32_t>(machInst, dest, op1, op2);
141 default:
142 return new Unknown(machInst);
143 }
144 }
145
146 template <template <typename T> class Base>
147 StaticInstPtr
148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2, uint64_t imm)
151 {
152 switch (size) {
153 case 1:
154 return new Base<int16_t>(machInst, dest, op1, op2, imm);
155 case 2:
156 return new Base<int32_t>(machInst, dest, op1, op2, imm);
157 default:
158 return new Unknown(machInst);
159 }
160 }
161
162 template <template <typename T> class Base>
163 StaticInstPtr
164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (notSigned) {
169 return decodeNeonUThreeUSReg<Base>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<Base>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUThreeSReg(bool q, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (q) {
185 return decodeNeonUThreeUSReg<BaseQ>(
186 size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonUThreeUSReg<BaseD>(
189 size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonSThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonSThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonSThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeXReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (q) {
233 return decodeNeonUThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonUThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
247 {
248 if (notSigned) {
249 return decodeNeonUThreeSReg<BaseD, BaseQ>(
250 q, size, machInst, dest, op1, op2);
251 } else {
252 return decodeNeonSThreeSReg<BaseD, BaseQ>(
253 q, size, machInst, dest, op1, op2);
254 }
255 }
256
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
259 StaticInstPtr
260 decodeNeonUThreeReg(bool q, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
263 {
264 if (q) {
265 return decodeNeonUThreeUReg<BaseQ>(
266 size, machInst, dest, op1, op2);
267 } else {
268 return decodeNeonUThreeUReg<BaseD>(
269 size, machInst, dest, op1, op2);
270 }
271 }
272
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
275 StaticInstPtr
276 decodeNeonSThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
279 {
280 if (q) {
281 return decodeNeonSThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
283 } else {
284 return decodeNeonSThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
286 }
287 }
288
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
291 StaticInstPtr
292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
295 {
296 if (notSigned) {
297 return decodeNeonUThreeReg<BaseD, BaseQ>(
298 q, size, machInst, dest, op1, op2);
299 } else {
300 return decodeNeonSThreeReg<BaseD, BaseQ>(
301 q, size, machInst, dest, op1, op2);
302 }
303 }
304
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
307 StaticInstPtr
308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310 {
311 if (q) {
312 if (size)
313 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314 else
315 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316 } else {
317 if (size)
318 return new Unknown(machInst);
319 else
320 return new BaseD<uint32_t>(machInst, dest, op1, op2);
321 }
322 }
323
324 template <template <typename T> class Base>
325 StaticInstPtr
326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328 {
329 if (size)
330 return new Base<uint64_t>(machInst, dest, op1, op2);
331 else
332 return new Base<uint32_t>(machInst, dest, op1, op2);
333 }
334
335 template <template <typename T> class Base>
336 StaticInstPtr
337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338 IntRegIndex dest, IntRegIndex op1,
339 IntRegIndex op2, uint64_t imm)
340 {
341 if (size)
342 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343 else
344 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345 }
346
347 template <template <typename T> class BaseD,
348 template <typename T> class BaseQ>
349 StaticInstPtr
350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351 IntRegIndex dest, IntRegIndex op1,
352 IntRegIndex op2, uint64_t imm)
353 {
354 if (q) {
355 switch (size) {
356 case 1:
357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358 case 2:
359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360 default:
361 return new Unknown(machInst);
362 }
363 } else {
364 switch (size) {
365 case 1:
366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367 case 2:
368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369 default:
370 return new Unknown(machInst);
371 }
372 }
373 }
374
375 template <template <typename T> class BaseD,
376 template <typename T> class BaseQ>
377 StaticInstPtr
378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379 IntRegIndex dest, IntRegIndex op1,
380 IntRegIndex op2, uint64_t imm)
381 {
382 if (q) {
383 switch (size) {
384 case 1:
385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386 case 2:
387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388 default:
389 return new Unknown(machInst);
390 }
391 } else {
392 switch (size) {
393 case 1:
394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395 case 2:
396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397 default:
398 return new Unknown(machInst);
399 }
400 }
401 }
402
403 template <template <typename T> class BaseD,
404 template <typename T> class BaseQ>
405 StaticInstPtr
406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407 IntRegIndex dest, IntRegIndex op1,
408 IntRegIndex op2, uint64_t imm)
409 {
410 if (q) {
411 if (size)
412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413 else
414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415 } else {
416 if (size)
417 return new Unknown(machInst);
418 else
419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420 }
421 }
422
423 template <template <typename T> class BaseD,
424 template <typename T> class BaseQ>
425 StaticInstPtr
426 decodeNeonUTwoShiftReg(bool q, unsigned size,
427 ExtMachInst machInst, IntRegIndex dest,
428 IntRegIndex op1, uint64_t imm)
429 {
430 if (q) {
431 switch (size) {
432 case 0:
433 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434 case 1:
435 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436 case 2:
437 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438 case 3:
439 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440 default:
441 return new Unknown(machInst);
442 }
443 } else {
444 switch (size) {
445 case 0:
446 return new BaseD<uint8_t>(machInst, dest, op1, imm);
447 case 1:
448 return new BaseD<uint16_t>(machInst, dest, op1, imm);
449 case 2:
450 return new BaseD<uint32_t>(machInst, dest, op1, imm);
451 case 3:
452 return new BaseD<uint64_t>(machInst, dest, op1, imm);
453 default:
454 return new Unknown(machInst);
455 }
456 }
457 }
458
459 template <template <typename T> class BaseD,
460 template <typename T> class BaseQ>
461 StaticInstPtr
462 decodeNeonSTwoShiftReg(bool q, unsigned size,
463 ExtMachInst machInst, IntRegIndex dest,
464 IntRegIndex op1, uint64_t imm)
465 {
466 if (q) {
467 switch (size) {
468 case 0:
469 return new BaseQ<int8_t>(machInst, dest, op1, imm);
470 case 1:
471 return new BaseQ<int16_t>(machInst, dest, op1, imm);
472 case 2:
473 return new BaseQ<int32_t>(machInst, dest, op1, imm);
474 case 3:
475 return new BaseQ<int64_t>(machInst, dest, op1, imm);
476 default:
477 return new Unknown(machInst);
478 }
479 } else {
480 switch (size) {
481 case 0:
482 return new BaseD<int8_t>(machInst, dest, op1, imm);
483 case 1:
484 return new BaseD<int16_t>(machInst, dest, op1, imm);
485 case 2:
486 return new BaseD<int32_t>(machInst, dest, op1, imm);
487 case 3:
488 return new BaseD<int64_t>(machInst, dest, op1, imm);
489 default:
490 return new Unknown(machInst);
491 }
492 }
493 }
494
495
496 template <template <typename T> class BaseD,
497 template <typename T> class BaseQ>
498 StaticInstPtr
499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500 ExtMachInst machInst, IntRegIndex dest,
501 IntRegIndex op1, uint64_t imm)
502 {
503 if (notSigned) {
504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505 q, size, machInst, dest, op1, imm);
506 } else {
507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508 q, size, machInst, dest, op1, imm);
509 }
510 }
511
512 template <template <typename T> class Base>
513 StaticInstPtr
514 decodeNeonUTwoShiftUSReg(unsigned size,
515 ExtMachInst machInst, IntRegIndex dest,
516 IntRegIndex op1, uint64_t imm)
517 {
518 switch (size) {
519 case 0:
520 return new Base<uint8_t>(machInst, dest, op1, imm);
521 case 1:
522 return new Base<uint16_t>(machInst, dest, op1, imm);
523 case 2:
524 return new Base<uint32_t>(machInst, dest, op1, imm);
525 default:
526 return new Unknown(machInst);
527 }
528 }
529
530 template <template <typename T> class Base>
531 StaticInstPtr
532 decodeNeonUTwoShiftUReg(unsigned size,
533 ExtMachInst machInst, IntRegIndex dest,
534 IntRegIndex op1, uint64_t imm)
535 {
536 switch (size) {
537 case 0:
538 return new Base<uint8_t>(machInst, dest, op1, imm);
539 case 1:
540 return new Base<uint16_t>(machInst, dest, op1, imm);
541 case 2:
542 return new Base<uint32_t>(machInst, dest, op1, imm);
543 case 3:
544 return new Base<uint64_t>(machInst, dest, op1, imm);
545 default:
546 return new Unknown(machInst);
547 }
548 }
549
550 template <template <typename T> class Base>
551 StaticInstPtr
552 decodeNeonSTwoShiftUReg(unsigned size,
553 ExtMachInst machInst, IntRegIndex dest,
554 IntRegIndex op1, uint64_t imm)
555 {
556 switch (size) {
557 case 0:
558 return new Base<int8_t>(machInst, dest, op1, imm);
559 case 1:
560 return new Base<int16_t>(machInst, dest, op1, imm);
561 case 2:
562 return new Base<int32_t>(machInst, dest, op1, imm);
563 case 3:
564 return new Base<int64_t>(machInst, dest, op1, imm);
565 default:
566 return new Unknown(machInst);
567 }
568 }
569
570 template <template <typename T> class BaseD,
571 template <typename T> class BaseQ>
572 StaticInstPtr
573 decodeNeonUTwoShiftSReg(bool q, unsigned size,
574 ExtMachInst machInst, IntRegIndex dest,
575 IntRegIndex op1, uint64_t imm)
576 {
577 if (q) {
578 return decodeNeonUTwoShiftUSReg<BaseQ>(
579 size, machInst, dest, op1, imm);
580 } else {
581 return decodeNeonUTwoShiftUSReg<BaseD>(
582 size, machInst, dest, op1, imm);
583 }
584 }
585
586 template <template <typename T> class Base>
587 StaticInstPtr
588 decodeNeonSTwoShiftUSReg(unsigned size,
589 ExtMachInst machInst, IntRegIndex dest,
590 IntRegIndex op1, uint64_t imm)
591 {
592 switch (size) {
593 case 0:
594 return new Base<int8_t>(machInst, dest, op1, imm);
595 case 1:
596 return new Base<int16_t>(machInst, dest, op1, imm);
597 case 2:
598 return new Base<int32_t>(machInst, dest, op1, imm);
599 default:
600 return new Unknown(machInst);
601 }
602 }
603
604 template <template <typename T> class BaseD,
605 template <typename T> class BaseQ>
606 StaticInstPtr
607 decodeNeonSTwoShiftSReg(bool q, unsigned size,
608 ExtMachInst machInst, IntRegIndex dest,
609 IntRegIndex op1, uint64_t imm)
610 {
611 if (q) {
612 return decodeNeonSTwoShiftUSReg<BaseQ>(
613 size, machInst, dest, op1, imm);
614 } else {
615 return decodeNeonSTwoShiftUSReg<BaseD>(
616 size, machInst, dest, op1, imm);
617 }
618 }
619
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
622 StaticInstPtr
623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
626 {
627 if (notSigned) {
628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629 q, size, machInst, dest, op1, imm);
630 } else {
631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632 q, size, machInst, dest, op1, imm);
633 }
634 }
635
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
638 StaticInstPtr
639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641 {
642 if (q) {
643 return decodeNeonUTwoShiftUReg<BaseQ>(
644 size, machInst, dest, op1, imm);
645 } else {
646 return decodeNeonUTwoShiftUSReg<BaseD>(
647 size, machInst, dest, op1, imm);
648 }
649 }
650
651 template <template <typename T> class BaseD,
652 template <typename T> class BaseQ>
653 StaticInstPtr
654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656 {
657 if (q) {
658 return decodeNeonSTwoShiftUReg<BaseQ>(
659 size, machInst, dest, op1, imm);
660 } else {
661 return decodeNeonSTwoShiftUSReg<BaseD>(
662 size, machInst, dest, op1, imm);
663 }
664 }
665
666 template <template <typename T> class Base>
667 StaticInstPtr
668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670 {
671 if (size)
672 return new Base<uint64_t>(machInst, dest, op1, imm);
673 else
674 return new Base<uint32_t>(machInst, dest, op1, imm);
675 }
676
677 template <template <typename T> class BaseD,
678 template <typename T> class BaseQ>
679 StaticInstPtr
680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682 {
683 if (q) {
684 if (size)
685 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686 else
687 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688 } else {
689 if (size)
690 return new Unknown(machInst);
691 else
692 return new BaseD<uint32_t>(machInst, dest, op1, imm);
693 }
694 }
695
696 template <template <typename T> class Base>
697 StaticInstPtr
698 decodeNeonUTwoMiscUSReg(unsigned size,
699 ExtMachInst machInst, IntRegIndex dest,
700 IntRegIndex op1)
701 {
702 switch (size) {
703 case 0:
704 return new Base<uint8_t>(machInst, dest, op1);
705 case 1:
706 return new Base<uint16_t>(machInst, dest, op1);
707 case 2:
708 return new Base<uint32_t>(machInst, dest, op1);
709 default:
710 return new Unknown(machInst);
711 }
712 }
713
714 template <template <typename T> class Base>
715 StaticInstPtr
716 decodeNeonSTwoMiscUSReg(unsigned size,
717 ExtMachInst machInst, IntRegIndex dest,
718 IntRegIndex op1)
719 {
720 switch (size) {
721 case 0:
722 return new Base<int8_t>(machInst, dest, op1);
723 case 1:
724 return new Base<int16_t>(machInst, dest, op1);
725 case 2:
726 return new Base<int32_t>(machInst, dest, op1);
727 default:
728 return new Unknown(machInst);
729 }
730 }
731
732 template <template <typename T> class BaseD,
733 template <typename T> class BaseQ>
734 StaticInstPtr
735 decodeNeonUTwoMiscSReg(bool q, unsigned size,
736 ExtMachInst machInst, IntRegIndex dest,
737 IntRegIndex op1)
738 {
739 if (q) {
740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741 } else {
742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743 }
744 }
745
746 template <template <typename T> class BaseD,
747 template <typename T> class BaseQ>
748 StaticInstPtr
749 decodeNeonSTwoMiscSReg(bool q, unsigned size,
750 ExtMachInst machInst, IntRegIndex dest,
751 IntRegIndex op1)
752 {
753 if (q) {
754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755 } else {
756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757 }
758 }
759
760 template <template <typename T> class Base>
761 StaticInstPtr
762 decodeNeonUTwoMiscUReg(unsigned size,
763 ExtMachInst machInst, IntRegIndex dest,
764 IntRegIndex op1)
765 {
766 switch (size) {
767 case 0:
768 return new Base<uint8_t>(machInst, dest, op1);
769 case 1:
770 return new Base<uint16_t>(machInst, dest, op1);
771 case 2:
772 return new Base<uint32_t>(machInst, dest, op1);
773 case 3:
774 return new Base<uint64_t>(machInst, dest, op1);
775 default:
776 return new Unknown(machInst);
777 }
778 }
779
780 template <template <typename T> class Base>
781 StaticInstPtr
782 decodeNeonSTwoMiscUReg(unsigned size,
783 ExtMachInst machInst, IntRegIndex dest,
784 IntRegIndex op1)
785 {
786 switch (size) {
787 case 0:
788 return new Base<int8_t>(machInst, dest, op1);
789 case 1:
790 return new Base<int16_t>(machInst, dest, op1);
791 case 2:
792 return new Base<int32_t>(machInst, dest, op1);
793 case 3:
794 return new Base<int64_t>(machInst, dest, op1);
795 default:
796 return new Unknown(machInst);
797 }
798 }
799
800 template <template <typename T> class BaseD,
801 template <typename T> class BaseQ>
802 StaticInstPtr
803 decodeNeonSTwoMiscReg(bool q, unsigned size,
804 ExtMachInst machInst, IntRegIndex dest,
805 IntRegIndex op1)
806 {
807 if (q) {
808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809 } else {
810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811 }
812 }
813
814 template <template <typename T> class BaseD,
815 template <typename T> class BaseQ>
816 StaticInstPtr
817 decodeNeonUTwoMiscReg(bool q, unsigned size,
818 ExtMachInst machInst, IntRegIndex dest,
819 IntRegIndex op1)
820 {
821 if (q) {
822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823 } else {
824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825 }
826 }
827
828 template <template <typename T> class BaseD,
829 template <typename T> class BaseQ>
830 StaticInstPtr
831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832 ExtMachInst machInst, IntRegIndex dest,
833 IntRegIndex op1)
834 {
835 if (notSigned) {
836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837 q, size, machInst, dest, op1);
838 } else {
839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840 q, size, machInst, dest, op1);
841 }
842 }
843
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
846 StaticInstPtr
847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848 IntRegIndex dest, IntRegIndex op1)
849 {
850 if (q) {
851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852 } else {
853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854 }
855 }
856
857 template <template <typename T> class BaseD,
858 template <typename T> class BaseQ>
859 StaticInstPtr
860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861 IntRegIndex dest, IntRegIndex op1)
862 {
863 if (q) {
864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865 } else {
866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867 }
868 }
869
870 template <template <typename T> class BaseD,
871 template <typename T> class BaseQ>
872 StaticInstPtr
873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874 IntRegIndex dest, IntRegIndex op1)
875 {
876 if (q) {
877 if (size)
878 return new BaseQ<uint64_t>(machInst, dest, op1);
879 else
880 return new BaseQ<uint32_t>(machInst, dest, op1);
881 } else {
882 if (size)
883 return new Unknown(machInst);
884 else
885 return new BaseD<uint32_t>(machInst, dest, op1);
886 }
887 }
888
889 template <template <typename T> class BaseD,
890 template <typename T> class BaseQ>
891 StaticInstPtr
892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893 IntRegIndex dest, IntRegIndex op1)
894 {
895 if (size)
896 return new BaseQ<uint64_t>(machInst, dest, op1);
897 else
898 return new BaseD<uint32_t>(machInst, dest, op1);
899 }
900
901 template <template <typename T> class Base>
902 StaticInstPtr
903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904 IntRegIndex dest, IntRegIndex op1)
905 {
906 if (size)
907 return new Base<uint64_t>(machInst, dest, op1);
908 else
909 return new Base<uint32_t>(machInst, dest, op1);
910 }
911
912 template <template <typename T> class BaseD,
913 template <typename T> class BaseQ>
914 StaticInstPtr
915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1)
917 {
918 if (q) {
919 switch (size) {
920 case 0x0:
921 return new BaseQ<uint8_t>(machInst, dest, op1);
922 case 0x1:
923 return new BaseQ<uint16_t>(machInst, dest, op1);
924 case 0x2:
925 return new BaseQ<uint32_t>(machInst, dest, op1);
926 default:
927 return new Unknown(machInst);
928 }
929 } else {
930 switch (size) {
931 case 0x0:
932 return new BaseD<uint8_t>(machInst, dest, op1);
933 case 0x1:
934 return new BaseD<uint16_t>(machInst, dest, op1);
935 default:
936 return new Unknown(machInst);
937 }
938 }
939 }
940
941 template <template <typename T> class BaseD,
942 template <typename T> class BaseQ,
943 template <typename T> class BaseBQ>
944 StaticInstPtr
945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946 IntRegIndex dest, IntRegIndex op1)
947 {
948 if (q) {
949 switch (size) {
950 case 0x0:
951 return new BaseQ<uint8_t>(machInst, dest, op1);
952 case 0x1:
953 return new BaseQ<uint16_t>(machInst, dest, op1);
954 case 0x2:
955 return new BaseBQ<uint32_t>(machInst, dest, op1);
956 default:
957 return new Unknown(machInst);
958 }
959 } else {
960 switch (size) {
961 case 0x0:
962 return new BaseD<uint8_t>(machInst, dest, op1);
963 case 0x1:
964 return new BaseD<uint16_t>(machInst, dest, op1);
965 default:
966 return new Unknown(machInst);
967 }
968 }
969 }
970
971 template <template <typename T> class BaseD,
972 template <typename T> class BaseQ>
973 StaticInstPtr
974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975 IntRegIndex dest, IntRegIndex op1)
976 {
977 if (q) {
978 switch (size) {
979 case 0x0:
980 return new BaseQ<int8_t>(machInst, dest, op1);
981 case 0x1:
982 return new BaseQ<int16_t>(machInst, dest, op1);
983 case 0x2:
984 return new BaseQ<int32_t>(machInst, dest, op1);
985 default:
986 return new Unknown(machInst);
987 }
988 } else {
989 switch (size) {
990 case 0x0:
991 return new BaseD<int8_t>(machInst, dest, op1);
992 case 0x1:
993 return new BaseD<int16_t>(machInst, dest, op1);
994 default:
995 return new Unknown(machInst);
996 }
997 }
998 }
999
1000 template <template <typename T> class BaseD,
1001 template <typename T> class BaseQ,
1002 template <typename T> class BaseBQ>
1003 StaticInstPtr
1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005 IntRegIndex dest, IntRegIndex op1)
1006 {
1007 if (q) {
1008 switch (size) {
1009 case 0x0:
1010 return new BaseQ<uint8_t>(machInst, dest, op1);
1011 case 0x1:
1012 return new BaseQ<uint16_t>(machInst, dest, op1);
1013 case 0x2:
1014 return new BaseBQ<uint32_t>(machInst, dest, op1);
1015 default:
1016 return new Unknown(machInst);
1017 }
1018 } else {
1019 switch (size) {
1020 case 0x0:
1021 return new BaseD<uint8_t>(machInst, dest, op1);
1022 case 0x1:
1023 return new BaseD<uint16_t>(machInst, dest, op1);
1024 default:
1025 return new Unknown(machInst);
1026 }
1027 }
1028 }
1029
1030 template <template <typename T> class BaseD,
1031 template <typename T> class BaseQ,
1032 template <typename T> class BaseBQ>
1033 StaticInstPtr
1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035 IntRegIndex dest, IntRegIndex op1)
1036 {
1037 if (q) {
1038 switch (size) {
1039 case 0x0:
1040 return new BaseQ<int8_t>(machInst, dest, op1);
1041 case 0x1:
1042 return new BaseQ<int16_t>(machInst, dest, op1);
1043 case 0x2:
1044 return new BaseBQ<int32_t>(machInst, dest, op1);
1045 default:
1046 return new Unknown(machInst);
1047 }
1048 } else {
1049 switch (size) {
1050 case 0x0:
1051 return new BaseD<int8_t>(machInst, dest, op1);
1052 case 0x1:
1053 return new BaseD<int16_t>(machInst, dest, op1);
1054 default:
1055 return new Unknown(machInst);
1056 }
1057 }
1058 }
1059}};
1060
1061let {{
1062 header_output = ""
1063 exec_output = ""
1064
1065 vcompares = '''
1066 static float
1067 vcgtFunc(float op1, float op2)
1068 {
1069 if (std::isnan(op1) || std::isnan(op2))
1070 return 2.0;
1071 return (op1 > op2) ? 0.0 : 1.0;
1072 }
1073
1074 static float
1075 vcgeFunc(float op1, float op2)
1076 {
1077 if (std::isnan(op1) || std::isnan(op2))
1078 return 2.0;
1079 return (op1 >= op2) ? 0.0 : 1.0;
1080 }
1081
1082 static float
1083 vceqFunc(float op1, float op2)
1084 {
1085 if (isSnan(op1) || isSnan(op2))
1086 return 2.0;
1087 return (op1 == op2) ? 0.0 : 1.0;
1088 }
1089'''
1090 vcomparesL = '''
1091 static float
1092 vcleFunc(float op1, float op2)
1093 {
1094 if (std::isnan(op1) || std::isnan(op2))
1095 return 2.0;
1096 return (op1 <= op2) ? 0.0 : 1.0;
1097 }
1098
1099 static float
1100 vcltFunc(float op1, float op2)
1101 {
1102 if (std::isnan(op1) || std::isnan(op2))
1103 return 2.0;
1104 return (op1 < op2) ? 0.0 : 1.0;
1105 }
1106'''
1107 vacomparesG = '''
1108 static float
1109 vacgtFunc(float op1, float op2)
1110 {
1111 if (std::isnan(op1) || std::isnan(op2))
1112 return 2.0;
1113 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114 }
1115
1116 static float
1117 vacgeFunc(float op1, float op2)
1118 {
1119 if (std::isnan(op1) || std::isnan(op2))
1120 return 2.0;
1121 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122 }
1123'''
1124
1125 exec_output += vcompares + vacomparesG
1126
1127 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130 signedTypes = smallSignedTypes + ("int64_t",)
1131 smallTypes = smallUnsignedTypes + smallSignedTypes
1132 allTypes = unsignedTypes + signedTypes
1133
1134 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135 readDest=False, pairwise=False,
1136 standardFpcsr=False):
1137 global header_output, exec_output
1138 eWalkCode = simdEnabledCheckCode + '''
1139 RegVect srcReg1, srcReg2, destReg;
1140 '''
1141 for reg in range(rCount):
1142 eWalkCode += '''
1143 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1144 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1145 ''' % { "reg" : reg }
1146 if readDest:
1147 eWalkCode += '''
1148 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1149 ''' % { "reg" : reg }
1150 readDestCode = ''
1151 if standardFpcsr:
1152 eWalkCode += '''
1153 FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc);
1154 '''
1155 if readDest:
1156 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1157 if pairwise:
1158 eWalkCode += '''
1159 for (unsigned i = 0; i < eCount; i++) {
1160 Element srcElem1 = gtoh(2 * i < eCount ?
1161 srcReg1.elements[2 * i] :
1162 srcReg2.elements[2 * i - eCount]);
1163 Element srcElem2 = gtoh(2 * i < eCount ?
1164 srcReg1.elements[2 * i + 1] :
1165 srcReg2.elements[2 * i + 1 - eCount]);
1166 Element destElem;
1167 %(readDest)s
1168 %(op)s
1169 destReg.elements[i] = htog(destElem);
1170 }
1171 ''' % { "op" : op, "readDest" : readDestCode }
1172 else:
1173 eWalkCode += '''
1174 for (unsigned i = 0; i < eCount; i++) {
1175 Element srcElem1 = gtoh(srcReg1.elements[i]);
1176 Element srcElem2 = gtoh(srcReg2.elements[i]);
1177 Element destElem;
1178 %(readDest)s
1179 %(op)s
1180 destReg.elements[i] = htog(destElem);
1181 }
1182 ''' % { "op" : op, "readDest" : readDestCode }
1183 if standardFpcsr:
1184 eWalkCode += '''
1185 FpscrExc = fpscr;
1186 '''
1187 for reg in range(rCount):
1188 eWalkCode += '''
1189 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1190 ''' % { "reg" : reg }
1191 iop = InstObjParams(name, Name,
1192 "RegRegRegOp",
1193 { "code": eWalkCode,
1194 "r_count": rCount,
1195 "predicate_test": predicateTest,
1196 "op_class": opClass }, [])
1197 header_output += NeonRegRegRegOpDeclare.subst(iop)
1198 exec_output += NeonEqualRegExecute.subst(iop)
1199 for type in types:
1200 substDict = { "targs" : type,
1201 "class_name" : Name }
1202 exec_output += NeonExecDeclare.subst(substDict)
1203
1204 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1205 readDest=False, pairwise=False, toInt=False):
1206 global header_output, exec_output
1207 eWalkCode = simdEnabledCheckCode + '''
1208 typedef float FloatVect[rCount];
1209 FloatVect srcRegs1, srcRegs2;
1210 '''
1211 if toInt:
1212 eWalkCode += 'RegVect destRegs;\n'
1213 else:
1214 eWalkCode += 'FloatVect destRegs;\n'
1215 for reg in range(rCount):
1216 eWalkCode += '''
1217 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1218 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1219 ''' % { "reg" : reg }
1220 if readDest:
1221 if toInt:
1222 eWalkCode += '''
1223 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1224 ''' % { "reg" : reg }
1225 else:
1226 eWalkCode += '''
1227 destRegs[%(reg)d] = FpDestP%(reg)d;
1228 ''' % { "reg" : reg }
1229 readDestCode = ''
1230 if readDest:
1231 readDestCode = 'destReg = destRegs[r];'
1232 destType = 'float'
1233 writeDest = 'destRegs[r] = destReg;'
1234 if toInt:
1235 destType = 'uint32_t'
1236 writeDest = 'destRegs.regs[r] = destReg;'
1237 if pairwise:
1238 eWalkCode += '''
1239 for (unsigned r = 0; r < rCount; r++) {
1240 float srcReg1 = (2 * r < rCount) ?
1241 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1242 float srcReg2 = (2 * r < rCount) ?
1243 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1244 %(destType)s destReg;
1245 %(readDest)s
1246 %(op)s
1247 %(writeDest)s
1248 }
1249 ''' % { "op" : op,
1250 "readDest" : readDestCode,
1251 "destType" : destType,
1252 "writeDest" : writeDest }
1253 else:
1254 eWalkCode += '''
1255 for (unsigned r = 0; r < rCount; r++) {
1256 float srcReg1 = srcRegs1[r];
1257 float srcReg2 = srcRegs2[r];
1258 %(destType)s destReg;
1259 %(readDest)s
1260 %(op)s
1261 %(writeDest)s
1262 }
1263 ''' % { "op" : op,
1264 "readDest" : readDestCode,
1265 "destType" : destType,
1266 "writeDest" : writeDest }
1267 for reg in range(rCount):
1268 if toInt:
1269 eWalkCode += '''
1270 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1271 ''' % { "reg" : reg }
1272 else:
1273 eWalkCode += '''
1274 FpDestP%(reg)d = destRegs[%(reg)d];
1275 ''' % { "reg" : reg }
1276 iop = InstObjParams(name, Name,
1277 "FpRegRegRegOp",
1278 { "code": eWalkCode,
1279 "r_count": rCount,
1280 "predicate_test": predicateTest,
1281 "op_class": opClass }, [])
1282 header_output += NeonRegRegRegOpDeclare.subst(iop)
1283 exec_output += NeonEqualRegExecute.subst(iop)
1284 for type in types:
1285 substDict = { "targs" : type,
1286 "class_name" : Name }
1287 exec_output += NeonExecDeclare.subst(substDict)
1288
1289 def threeUnequalRegInst(name, Name, opClass, types, op,
1290 bigSrc1, bigSrc2, bigDest, readDest):
1291 global header_output, exec_output
1292 src1Cnt = src2Cnt = destCnt = 2
1293 src1Prefix = src2Prefix = destPrefix = ''
1294 if bigSrc1:
1295 src1Cnt = 4
1296 src1Prefix = 'Big'
1297 if bigSrc2:
1298 src2Cnt = 4
1299 src2Prefix = 'Big'
1300 if bigDest:
1301 destCnt = 4
1302 destPrefix = 'Big'
1303 eWalkCode = simdEnabledCheckCode + '''
1304 %sRegVect srcReg1;
1305 %sRegVect srcReg2;
1306 %sRegVect destReg;
1307 ''' % (src1Prefix, src2Prefix, destPrefix)
1308 for reg in range(src1Cnt):
1309 eWalkCode += '''
1310 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1311 ''' % { "reg" : reg }
1312 for reg in range(src2Cnt):
1313 eWalkCode += '''
1314 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1315 ''' % { "reg" : reg }
1316 if readDest:
1317 for reg in range(destCnt):
1318 eWalkCode += '''
1319 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1320 ''' % { "reg" : reg }
1321 readDestCode = ''
1322 if readDest:
1323 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1324 eWalkCode += '''
1325 for (unsigned i = 0; i < eCount; i++) {
1326 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1327 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1328 %(destPrefix)sElement destElem;
1329 %(readDest)s
1330 %(op)s
1331 destReg.elements[i] = htog(destElem);
1332 }
1333 ''' % { "op" : op, "readDest" : readDestCode,
1334 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1335 "destPrefix" : destPrefix }
1336 for reg in range(destCnt):
1337 eWalkCode += '''
1338 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1339 ''' % { "reg" : reg }
1340 iop = InstObjParams(name, Name,
1341 "RegRegRegOp",
1342 { "code": eWalkCode,
1343 "r_count": 2,
1344 "predicate_test": predicateTest,
1345 "op_class": opClass }, [])
1346 header_output += NeonRegRegRegOpDeclare.subst(iop)
1347 exec_output += NeonUnequalRegExecute.subst(iop)
1348 for type in types:
1349 substDict = { "targs" : type,
1350 "class_name" : Name }
1351 exec_output += NeonExecDeclare.subst(substDict)
1352
1353 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1354 threeUnequalRegInst(name, Name, opClass, types, op,
1355 True, True, False, readDest)
1356
1357 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1358 threeUnequalRegInst(name, Name, opClass, types, op,
1359 False, False, True, readDest)
1360
1361 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1362 threeUnequalRegInst(name, Name, opClass, types, op,
1363 True, False, True, readDest)
1364
1365 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1366 global header_output, exec_output
1367 eWalkCode = simdEnabledCheckCode + '''
1368 RegVect srcReg1, srcReg2, destReg;
1369 '''
1370 for reg in range(rCount):
1371 eWalkCode += '''
1372 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1373 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1374 ''' % { "reg" : reg }
1375 if readDest:
1376 eWalkCode += '''
1377 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1378 ''' % { "reg" : reg }
1379 readDestCode = ''
1380 if readDest:
1381 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1382 eWalkCode += '''
1383 if (imm < 0 && imm >= eCount) {
1384 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1385 mnemonic);
1386 } else {
1387 for (unsigned i = 0; i < eCount; i++) {
1388 Element srcElem1 = gtoh(srcReg1.elements[i]);
1389 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1390 Element destElem;
1391 %(readDest)s
1392 %(op)s
1393 destReg.elements[i] = htog(destElem);
1394 }
1395 }
1396 ''' % { "op" : op, "readDest" : readDestCode }
1397 for reg in range(rCount):
1398 eWalkCode += '''
1399 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1400 ''' % { "reg" : reg }
1401 iop = InstObjParams(name, Name,
1402 "RegRegRegImmOp",
1403 { "code": eWalkCode,
1404 "r_count": rCount,
1405 "predicate_test": predicateTest,
1406 "op_class": opClass }, [])
1407 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1408 exec_output += NeonEqualRegExecute.subst(iop)
1409 for type in types:
1410 substDict = { "targs" : type,
1411 "class_name" : Name }
1412 exec_output += NeonExecDeclare.subst(substDict)
1413
1414 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1415 global header_output, exec_output
1416 rCount = 2
1417 eWalkCode = simdEnabledCheckCode + '''
1418 RegVect srcReg1, srcReg2;
1419 BigRegVect destReg;
1420 '''
1421 for reg in range(rCount):
1422 eWalkCode += '''
1423 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1424 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1425 ''' % { "reg" : reg }
1426 if readDest:
1427 for reg in range(2 * rCount):
1428 eWalkCode += '''
1429 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1430 ''' % { "reg" : reg }
1431 readDestCode = ''
1432 if readDest:
1433 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1434 eWalkCode += '''
1435 if (imm < 0 && imm >= eCount) {
1436 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1437 mnemonic);
1438 } else {
1439 for (unsigned i = 0; i < eCount; i++) {
1440 Element srcElem1 = gtoh(srcReg1.elements[i]);
1441 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1442 BigElement destElem;
1443 %(readDest)s
1444 %(op)s
1445 destReg.elements[i] = htog(destElem);
1446 }
1447 }
1448 ''' % { "op" : op, "readDest" : readDestCode }
1449 for reg in range(2 * rCount):
1450 eWalkCode += '''
1451 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1452 ''' % { "reg" : reg }
1453 iop = InstObjParams(name, Name,
1454 "RegRegRegImmOp",
1455 { "code": eWalkCode,
1456 "r_count": rCount,
1457 "predicate_test": predicateTest,
1458 "op_class": opClass }, [])
1459 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1460 exec_output += NeonUnequalRegExecute.subst(iop)
1461 for type in types:
1462 substDict = { "targs" : type,
1463 "class_name" : Name }
1464 exec_output += NeonExecDeclare.subst(substDict)
1465
1466 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1467 global header_output, exec_output
1468 eWalkCode = simdEnabledCheckCode + '''
1469 typedef float FloatVect[rCount];
1470 FloatVect srcRegs1, srcRegs2, destRegs;
1471 '''
1472 for reg in range(rCount):
1473 eWalkCode += '''
1474 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1475 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1476 ''' % { "reg" : reg }
1477 if readDest:
1478 eWalkCode += '''
1479 destRegs[%(reg)d] = FpDestP%(reg)d;
1480 ''' % { "reg" : reg }
1481 readDestCode = ''
1482 if readDest:
1483 readDestCode = 'destReg = destRegs[i];'
1484 eWalkCode += '''
1485 if (imm < 0 && imm >= eCount) {
1486 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1487 mnemonic);
1488 } else {
1489 for (unsigned i = 0; i < rCount; i++) {
1490 float srcReg1 = srcRegs1[i];
1491 float srcReg2 = srcRegs2[imm];
1492 float destReg;
1493 %(readDest)s
1494 %(op)s
1495 destRegs[i] = destReg;
1496 }
1497 }
1498 ''' % { "op" : op, "readDest" : readDestCode }
1499 for reg in range(rCount):
1500 eWalkCode += '''
1501 FpDestP%(reg)d = destRegs[%(reg)d];
1502 ''' % { "reg" : reg }
1503 iop = InstObjParams(name, Name,
1504 "FpRegRegRegImmOp",
1505 { "code": eWalkCode,
1506 "r_count": rCount,
1507 "predicate_test": predicateTest,
1508 "op_class": opClass }, [])
1509 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1510 exec_output += NeonEqualRegExecute.subst(iop)
1511 for type in types:
1512 substDict = { "targs" : type,
1513 "class_name" : Name }
1514 exec_output += NeonExecDeclare.subst(substDict)
1515
1516 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1517 readDest=False, toInt=False, fromInt=False):
1518 global header_output, exec_output
1519 eWalkCode = simdEnabledCheckCode + '''
1520 RegVect srcRegs1, destRegs;
1521 '''
1522 for reg in range(rCount):
1523 eWalkCode += '''
1524 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1525 ''' % { "reg" : reg }
1526 if readDest:
1527 eWalkCode += '''
1528 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1529 ''' % { "reg" : reg }
1530 readDestCode = ''
1531 if readDest:
1532 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1533 if toInt:
1534 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1535 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1536 if fromInt:
1537 readOpCode = 'uint32_t srcReg1 = gtoh(srcRegs1.regs[i]);'
1538 declDest = 'Element destElem;'
1539 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1540 if toInt:
1541 declDest = 'uint32_t destReg;'
1542 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1543 eWalkCode += '''
1544 for (unsigned i = 0; i < eCount; i++) {
1545 %(readOp)s
1546 %(declDest)s
1547 %(readDest)s
1548 %(op)s
1549 %(writeDest)s
1550 }
1551 ''' % { "readOp" : readOpCode,
1552 "declDest" : declDest,
1553 "readDest" : readDestCode,
1554 "op" : op,
1555 "writeDest" : writeDestCode }
1556 for reg in range(rCount):
1557 eWalkCode += '''
1558 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1559 ''' % { "reg" : reg }
1560 iop = InstObjParams(name, Name,
1561 "RegRegImmOp",
1562 { "code": eWalkCode,
1563 "r_count": rCount,
1564 "predicate_test": predicateTest,
1565 "op_class": opClass }, [])
1566 header_output += NeonRegRegImmOpDeclare.subst(iop)
1567 exec_output += NeonEqualRegExecute.subst(iop)
1568 for type in types:
1569 substDict = { "targs" : type,
1570 "class_name" : Name }
1571 exec_output += NeonExecDeclare.subst(substDict)
1572
1573 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1574 global header_output, exec_output
1575 eWalkCode = simdEnabledCheckCode + '''
1576 BigRegVect srcReg1;
1577 RegVect destReg;
1578 '''
1579 for reg in range(4):
1580 eWalkCode += '''
1581 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1582 ''' % { "reg" : reg }
1583 if readDest:
1584 for reg in range(2):
1585 eWalkCode += '''
1586 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1587 ''' % { "reg" : reg }
1588 readDestCode = ''
1589 if readDest:
1590 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1591 eWalkCode += '''
1592 for (unsigned i = 0; i < eCount; i++) {
1593 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1594 Element destElem;
1595 %(readDest)s
1596 %(op)s
1597 destReg.elements[i] = htog(destElem);
1598 }
1599 ''' % { "op" : op, "readDest" : readDestCode }
1600 for reg in range(2):
1601 eWalkCode += '''
1602 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1603 ''' % { "reg" : reg }
1604 iop = InstObjParams(name, Name,
1605 "RegRegImmOp",
1606 { "code": eWalkCode,
1607 "r_count": 2,
1608 "predicate_test": predicateTest,
1609 "op_class": opClass }, [])
1610 header_output += NeonRegRegImmOpDeclare.subst(iop)
1611 exec_output += NeonUnequalRegExecute.subst(iop)
1612 for type in types:
1613 substDict = { "targs" : type,
1614 "class_name" : Name }
1615 exec_output += NeonExecDeclare.subst(substDict)
1616
1617 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1618 global header_output, exec_output
1619 eWalkCode = simdEnabledCheckCode + '''
1620 RegVect srcReg1;
1621 BigRegVect destReg;
1622 '''
1623 for reg in range(2):
1624 eWalkCode += '''
1625 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1626 ''' % { "reg" : reg }
1627 if readDest:
1628 for reg in range(4):
1629 eWalkCode += '''
1630 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1631 ''' % { "reg" : reg }
1632 readDestCode = ''
1633 if readDest:
1634 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1635 eWalkCode += '''
1636 for (unsigned i = 0; i < eCount; i++) {
1637 Element srcElem1 = gtoh(srcReg1.elements[i]);
1638 BigElement destElem;
1639 %(readDest)s
1640 %(op)s
1641 destReg.elements[i] = htog(destElem);
1642 }
1643 ''' % { "op" : op, "readDest" : readDestCode }
1644 for reg in range(4):
1645 eWalkCode += '''
1646 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1647 ''' % { "reg" : reg }
1648 iop = InstObjParams(name, Name,
1649 "RegRegImmOp",
1650 { "code": eWalkCode,
1651 "r_count": 2,
1652 "predicate_test": predicateTest,
1653 "op_class": opClass }, [])
1654 header_output += NeonRegRegImmOpDeclare.subst(iop)
1655 exec_output += NeonUnequalRegExecute.subst(iop)
1656 for type in types:
1657 substDict = { "targs" : type,
1658 "class_name" : Name }
1659 exec_output += NeonExecDeclare.subst(substDict)
1660
1661 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1662 global header_output, exec_output
1663 eWalkCode = simdEnabledCheckCode + '''
1664 RegVect srcReg1, destReg;
1665 '''
1666 for reg in range(rCount):
1667 eWalkCode += '''
1668 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1669 ''' % { "reg" : reg }
1670 if readDest:
1671 eWalkCode += '''
1672 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1673 ''' % { "reg" : reg }
1674 readDestCode = ''
1675 if readDest:
1676 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1677 eWalkCode += '''
1678 for (unsigned i = 0; i < eCount; i++) {
1679 unsigned j = i;
1680 Element srcElem1 = gtoh(srcReg1.elements[i]);
1681 Element destElem;
1682 %(readDest)s
1683 %(op)s
1684 destReg.elements[j] = htog(destElem);
1685 }
1686 ''' % { "op" : op, "readDest" : readDestCode }
1687 for reg in range(rCount):
1688 eWalkCode += '''
1689 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1690 ''' % { "reg" : reg }
1691 iop = InstObjParams(name, Name,
1692 "RegRegOp",
1693 { "code": eWalkCode,
1694 "r_count": rCount,
1695 "predicate_test": predicateTest,
1696 "op_class": opClass }, [])
1697 header_output += NeonRegRegOpDeclare.subst(iop)
1698 exec_output += NeonEqualRegExecute.subst(iop)
1699 for type in types:
1700 substDict = { "targs" : type,
1701 "class_name" : Name }
1702 exec_output += NeonExecDeclare.subst(substDict)
1703
1704 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1705 global header_output, exec_output
1706 eWalkCode = simdEnabledCheckCode + '''
1707 RegVect srcReg1, destReg;
1708 '''
1709 for reg in range(rCount):
1710 eWalkCode += '''
1711 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1712 ''' % { "reg" : reg }
1713 if readDest:
1714 eWalkCode += '''
1715 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1716 ''' % { "reg" : reg }
1717 readDestCode = ''
1718 if readDest:
1719 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1720 eWalkCode += '''
1721 for (unsigned i = 0; i < eCount; i++) {
1722 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1723 Element destElem;
1724 %(readDest)s
1725 %(op)s
1726 destReg.elements[i] = htog(destElem);
1727 }
1728 ''' % { "op" : op, "readDest" : readDestCode }
1729 for reg in range(rCount):
1730 eWalkCode += '''
1731 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1732 ''' % { "reg" : reg }
1733 iop = InstObjParams(name, Name,
1734 "RegRegImmOp",
1735 { "code": eWalkCode,
1736 "r_count": rCount,
1737 "predicate_test": predicateTest,
1738 "op_class": opClass }, [])
1739 header_output += NeonRegRegImmOpDeclare.subst(iop)
1740 exec_output += NeonEqualRegExecute.subst(iop)
1741 for type in types:
1742 substDict = { "targs" : type,
1743 "class_name" : Name }
1744 exec_output += NeonExecDeclare.subst(substDict)
1745
1746 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1747 global header_output, exec_output
1748 eWalkCode = simdEnabledCheckCode + '''
1749 RegVect srcReg1, destReg;
1750 '''
1751 for reg in range(rCount):
1752 eWalkCode += '''
1753 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1754 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1755 ''' % { "reg" : reg }
1756 if readDest:
1757 eWalkCode += '''
1758 ''' % { "reg" : reg }
1759 readDestCode = ''
1760 if readDest:
1761 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1762 eWalkCode += op
1763 for reg in range(rCount):
1764 eWalkCode += '''
1765 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1766 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1767 ''' % { "reg" : reg }
1768 iop = InstObjParams(name, Name,
1769 "RegRegOp",
1770 { "code": eWalkCode,
1771 "r_count": rCount,
1772 "predicate_test": predicateTest,
1773 "op_class": opClass }, [])
1774 header_output += NeonRegRegOpDeclare.subst(iop)
1775 exec_output += NeonEqualRegExecute.subst(iop)
1776 for type in types:
1777 substDict = { "targs" : type,
1778 "class_name" : Name }
1779 exec_output += NeonExecDeclare.subst(substDict)
1780
1781 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1782 readDest=False, toInt=False):
1783 global header_output, exec_output
1784 eWalkCode = simdEnabledCheckCode + '''
1785 typedef float FloatVect[rCount];
1786 FloatVect srcRegs1;
1787 '''
1788 if toInt:
1789 eWalkCode += 'RegVect destRegs;\n'
1790 else:
1791 eWalkCode += 'FloatVect destRegs;\n'
1792 for reg in range(rCount):
1793 eWalkCode += '''
1794 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1795 ''' % { "reg" : reg }
1796 if readDest:
1797 if toInt:
1798 eWalkCode += '''
1799 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1800 ''' % { "reg" : reg }
1801 else:
1802 eWalkCode += '''
1803 destRegs[%(reg)d] = FpDestP%(reg)d;
1804 ''' % { "reg" : reg }
1805 readDestCode = ''
1806 if readDest:
1807 readDestCode = 'destReg = destRegs[i];'
1808 destType = 'float'
1809 writeDest = 'destRegs[r] = destReg;'
1810 if toInt:
1811 destType = 'uint32_t'
1812 writeDest = 'destRegs.regs[r] = destReg;'
1813 eWalkCode += '''
1814 for (unsigned r = 0; r < rCount; r++) {
1815 float srcReg1 = srcRegs1[r];
1816 %(destType)s destReg;
1817 %(readDest)s
1818 %(op)s
1819 %(writeDest)s
1820 }
1821 ''' % { "op" : op,
1822 "readDest" : readDestCode,
1823 "destType" : destType,
1824 "writeDest" : writeDest }
1825 for reg in range(rCount):
1826 if toInt:
1827 eWalkCode += '''
1828 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1829 ''' % { "reg" : reg }
1830 else:
1831 eWalkCode += '''
1832 FpDestP%(reg)d = destRegs[%(reg)d];
1833 ''' % { "reg" : reg }
1834 iop = InstObjParams(name, Name,
1835 "FpRegRegOp",
1836 { "code": eWalkCode,
1837 "r_count": rCount,
1838 "predicate_test": predicateTest,
1839 "op_class": opClass }, [])
1840 header_output += NeonRegRegOpDeclare.subst(iop)
1841 exec_output += NeonEqualRegExecute.subst(iop)
1842 for type in types:
1843 substDict = { "targs" : type,
1844 "class_name" : Name }
1845 exec_output += NeonExecDeclare.subst(substDict)
1846
1847 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1848 global header_output, exec_output
1849 eWalkCode = simdEnabledCheckCode + '''
1850 RegVect srcRegs;
1851 BigRegVect destReg;
1852 '''
1853 for reg in range(rCount):
1854 eWalkCode += '''
1855 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1856 ''' % { "reg" : reg }
1857 if readDest:
1858 eWalkCode += '''
1859 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1860 ''' % { "reg" : reg }
1861 readDestCode = ''
1862 if readDest:
1863 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1864 eWalkCode += '''
1865 for (unsigned i = 0; i < eCount / 2; i++) {
1866 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1867 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1868 BigElement destElem;
1869 %(readDest)s
1870 %(op)s
1871 destReg.elements[i] = htog(destElem);
1872 }
1873 ''' % { "op" : op, "readDest" : readDestCode }
1874 for reg in range(rCount):
1875 eWalkCode += '''
1876 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1877 ''' % { "reg" : reg }
1878 iop = InstObjParams(name, Name,
1879 "RegRegOp",
1880 { "code": eWalkCode,
1881 "r_count": rCount,
1882 "predicate_test": predicateTest,
1883 "op_class": opClass }, [])
1884 header_output += NeonRegRegOpDeclare.subst(iop)
1885 exec_output += NeonUnequalRegExecute.subst(iop)
1886 for type in types:
1887 substDict = { "targs" : type,
1888 "class_name" : Name }
1889 exec_output += NeonExecDeclare.subst(substDict)
1890
1891 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1892 global header_output, exec_output
1893 eWalkCode = simdEnabledCheckCode + '''
1894 BigRegVect srcReg1;
1895 RegVect destReg;
1896 '''
1897 for reg in range(4):
1898 eWalkCode += '''
1899 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1900 ''' % { "reg" : reg }
1901 if readDest:
1902 for reg in range(2):
1903 eWalkCode += '''
1904 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1905 ''' % { "reg" : reg }
1906 readDestCode = ''
1907 if readDest:
1908 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1909 eWalkCode += '''
1910 for (unsigned i = 0; i < eCount; i++) {
1911 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1912 Element destElem;
1913 %(readDest)s
1914 %(op)s
1915 destReg.elements[i] = htog(destElem);
1916 }
1917 ''' % { "op" : op, "readDest" : readDestCode }
1918 for reg in range(2):
1919 eWalkCode += '''
1920 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1921 ''' % { "reg" : reg }
1922 iop = InstObjParams(name, Name,
1923 "RegRegOp",
1924 { "code": eWalkCode,
1925 "r_count": 2,
1926 "predicate_test": predicateTest,
1927 "op_class": opClass }, [])
1928 header_output += NeonRegRegOpDeclare.subst(iop)
1929 exec_output += NeonUnequalRegExecute.subst(iop)
1930 for type in types:
1931 substDict = { "targs" : type,
1932 "class_name" : Name }
1933 exec_output += NeonExecDeclare.subst(substDict)
1934
1935 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1936 global header_output, exec_output
1937 eWalkCode = simdEnabledCheckCode + '''
1938 RegVect destReg;
1939 '''
1940 if readDest:
1941 for reg in range(rCount):
1942 eWalkCode += '''
1943 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1944 ''' % { "reg" : reg }
1945 readDestCode = ''
1946 if readDest:
1947 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1948 eWalkCode += '''
1949 for (unsigned i = 0; i < eCount; i++) {
1950 Element destElem;
1951 %(readDest)s
1952 %(op)s
1953 destReg.elements[i] = htog(destElem);
1954 }
1955 ''' % { "op" : op, "readDest" : readDestCode }
1956 for reg in range(rCount):
1957 eWalkCode += '''
1958 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1959 ''' % { "reg" : reg }
1960 iop = InstObjParams(name, Name,
1961 "RegImmOp",
1962 { "code": eWalkCode,
1963 "r_count": rCount,
1964 "predicate_test": predicateTest,
1965 "op_class": opClass }, [])
1966 header_output += NeonRegImmOpDeclare.subst(iop)
1967 exec_output += NeonEqualRegExecute.subst(iop)
1968 for type in types:
1969 substDict = { "targs" : type,
1970 "class_name" : Name }
1971 exec_output += NeonExecDeclare.subst(substDict)
1972
1973 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1974 global header_output, exec_output
1975 eWalkCode = simdEnabledCheckCode + '''
1976 RegVect srcReg1;
1977 BigRegVect destReg;
1978 '''
1979 for reg in range(2):
1980 eWalkCode += '''
1981 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1982 ''' % { "reg" : reg }
1983 if readDest:
1984 for reg in range(4):
1985 eWalkCode += '''
1986 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1987 ''' % { "reg" : reg }
1988 readDestCode = ''
1989 if readDest:
1990 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1991 eWalkCode += '''
1992 for (unsigned i = 0; i < eCount; i++) {
1993 Element srcElem1 = gtoh(srcReg1.elements[i]);
1994 BigElement destElem;
1995 %(readDest)s
1996 %(op)s
1997 destReg.elements[i] = htog(destElem);
1998 }
1999 ''' % { "op" : op, "readDest" : readDestCode }
2000 for reg in range(4):
2001 eWalkCode += '''
2002 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
2003 ''' % { "reg" : reg }
2004 iop = InstObjParams(name, Name,
2005 "RegRegOp",
2006 { "code": eWalkCode,
2007 "r_count": 2,
2008 "predicate_test": predicateTest,
2009 "op_class": opClass }, [])
2010 header_output += NeonRegRegOpDeclare.subst(iop)
2011 exec_output += NeonUnequalRegExecute.subst(iop)
2012 for type in types:
2013 substDict = { "targs" : type,
2014 "class_name" : Name }
2015 exec_output += NeonExecDeclare.subst(substDict)
2016
2017 vhaddCode = '''
2018 Element carryBit =
2019 (((unsigned)srcElem1 & 0x1) +
2020 ((unsigned)srcElem2 & 0x1)) >> 1;
2021 // Use division instead of a shift to ensure the sign extension works
2022 // right. The compiler will figure out if it can be a shift. Mask the
2023 // inputs so they get truncated correctly.
2024 destElem = (((srcElem1 & ~(Element)1) / 2) +
2025 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2026 '''
2027 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2028 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2029
2030 vrhaddCode = '''
2031 Element carryBit =
2032 (((unsigned)srcElem1 & 0x1) +
2033 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2034 // Use division instead of a shift to ensure the sign extension works
2035 // right. The compiler will figure out if it can be a shift. Mask the
2036 // inputs so they get truncated correctly.
2037 destElem = (((srcElem1 & ~(Element)1) / 2) +
2038 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2039 '''
2040 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2041 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2042
2043 vhsubCode = '''
2044 Element barrowBit =
2045 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2046 // Use division instead of a shift to ensure the sign extension works
2047 // right. The compiler will figure out if it can be a shift. Mask the
2048 // inputs so they get truncated correctly.
2049 destElem = (((srcElem1 & ~(Element)1) / 2) -
2050 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2051 '''
2052 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2053 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2054
2055 vandCode = '''
2056 destElem = srcElem1 & srcElem2;
2057 '''
2058 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2059 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2060
2061 vbicCode = '''
2062 destElem = srcElem1 & ~srcElem2;
2063 '''
2064 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2065 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2066
2067 vorrCode = '''
2068 destElem = srcElem1 | srcElem2;
2069 '''
2070 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2071 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2072
2073 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2074 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2075
2076 vornCode = '''
2077 destElem = srcElem1 | ~srcElem2;
2078 '''
2079 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2080 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2081
2082 veorCode = '''
2083 destElem = srcElem1 ^ srcElem2;
2084 '''
2085 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2086 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2087
2088 vbifCode = '''
2089 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2090 '''
2091 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2092 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2093 vbitCode = '''
2094 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2095 '''
2096 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2097 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2098 vbslCode = '''
2099 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2100 '''
2101 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2102 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2103
2104 vmaxCode = '''
2105 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2106 '''
2107 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2108 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2109
2110 vminCode = '''
2111 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2112 '''
2113 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2114 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2115
2116 vaddCode = '''
2117 destElem = srcElem1 + srcElem2;
2118 '''
2119 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2120 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2121
2122 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2123 2, vaddCode, pairwise=True)
2124 vaddlwCode = '''
2125 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2126 '''
2127 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2128 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2129 vaddhnCode = '''
2130 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2131 (sizeof(Element) * 8);
2132 '''
2133 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2134 vraddhnCode = '''
2135 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2136 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2137 (sizeof(Element) * 8);
2138 '''
2139 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2140
2141 vsubCode = '''
2142 destElem = srcElem1 - srcElem2;
2143 '''
2144 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2145 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2146 vsublwCode = '''
2147 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2148 '''
2149 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2150 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2151
2152 vqaddUCode = '''
2153 destElem = srcElem1 + srcElem2;
2154 FPSCR fpscr = (FPSCR) FpscrQc;
2155 if (destElem < srcElem1 || destElem < srcElem2) {
2156 destElem = (Element)(-1);
2157 fpscr.qc = 1;
2158 }
2159 FpscrQc = fpscr;
2160 '''
2161 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2162 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2163 vsubhnCode = '''
2164 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2165 (sizeof(Element) * 8);
2166 '''
2167 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2168 vrsubhnCode = '''
2169 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2170 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2171 (sizeof(Element) * 8);
2172 '''
2173 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2174
2175 vqaddSCode = '''
2176 destElem = srcElem1 + srcElem2;
2177 FPSCR fpscr = (FPSCR) FpscrQc;
2178 bool negDest = (destElem < 0);
2179 bool negSrc1 = (srcElem1 < 0);
2180 bool negSrc2 = (srcElem2 < 0);
2181 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2182 if (negDest)
2183 /* If (>=0) plus (>=0) yields (<0), saturate to +. */
2184 destElem = std::numeric_limits<Element>::max();
2185 else
2186 /* If (<0) plus (<0) yields (>=0), saturate to -. */
2187 destElem = std::numeric_limits<Element>::min();
2188 fpscr.qc = 1;
2189 }
2190 FpscrQc = fpscr;
2191 '''
2192 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2193 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2194
2195 vqsubUCode = '''
2196 destElem = srcElem1 - srcElem2;
2197 FPSCR fpscr = (FPSCR) FpscrQc;
2198 if (destElem > srcElem1) {
2199 destElem = 0;
2200 fpscr.qc = 1;
2201 }
2202 FpscrQc = fpscr;
2203 '''
2204 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2205 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2206
2207 vqsubSCode = '''
2208 destElem = srcElem1 - srcElem2;
2209 FPSCR fpscr = (FPSCR) FpscrQc;
2210 bool negDest = (destElem < 0);
2211 bool negSrc1 = (srcElem1 < 0);
2212 bool posSrc2 = (srcElem2 >= 0);
2213 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2214 if (negDest)
2215 /* If (>=0) minus (<0) yields (<0), saturate to +. */
2216 destElem = std::numeric_limits<Element>::max();
2217 else
2218 /* If (<0) minus (>=0) yields (>=0), saturate to -. */
2219 destElem = std::numeric_limits<Element>::min();
2220 fpscr.qc = 1;
2221 }
2222 FpscrQc = fpscr;
2223 '''
2224 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2225 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2226
2227 vcgtCode = '''
2228 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2229 '''
2230 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2231 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2232
2233 vcgeCode = '''
2234 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2235 '''
2236 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2237 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2238
2239 vceqCode = '''
2240 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2241 '''
2242 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2243 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2244
2245 vshlCode = '''
2246 int16_t shiftAmt = (int8_t)srcElem2;
2247 if (shiftAmt < 0) {
2248 shiftAmt = -shiftAmt;
2249 if (shiftAmt >= sizeof(Element) * 8) {
2250 shiftAmt = sizeof(Element) * 8 - 1;
2251 destElem = 0;
2252 } else {
2253 destElem = (srcElem1 >> shiftAmt);
2254 }
2255 // Make sure the right shift sign extended when it should.
2256 if (ltz(srcElem1) && !ltz(destElem)) {
2257 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2258 1 - shiftAmt));
2259 }
2260 } else {
2261 if (shiftAmt >= sizeof(Element) * 8) {
2262 destElem = 0;
2263 } else {
2264 destElem = srcElem1 << shiftAmt;
2265 }
2266 }
2267 '''
2268 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2269 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2270
2271 vrshlCode = '''
2272 int16_t shiftAmt = (int8_t)srcElem2;
2273 if (shiftAmt < 0) {
2274 shiftAmt = -shiftAmt;
2275 Element rBit = 0;
2276 if (shiftAmt <= sizeof(Element) * 8)
2277 rBit = bits(srcElem1, shiftAmt - 1);
2278 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2279 rBit = 1;
2280 if (shiftAmt >= sizeof(Element) * 8) {
2281 shiftAmt = sizeof(Element) * 8 - 1;
2282 destElem = 0;
2283 } else {
2284 destElem = (srcElem1 >> shiftAmt);
2285 }
2286 // Make sure the right shift sign extended when it should.
2287 if (ltz(srcElem1) && !ltz(destElem)) {
2288 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2289 1 - shiftAmt));
2290 }
2291 destElem += rBit;
2292 } else if (shiftAmt > 0) {
2293 if (shiftAmt >= sizeof(Element) * 8) {
2294 destElem = 0;
2295 } else {
2296 destElem = srcElem1 << shiftAmt;
2297 }
2298 } else {
2299 destElem = srcElem1;
2300 }
2301 '''
2302 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2303 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2304
2305 vqshlUCode = '''
2306 int16_t shiftAmt = (int8_t)srcElem2;
2307 FPSCR fpscr = (FPSCR) FpscrQc;
2308 if (shiftAmt < 0) {
2309 shiftAmt = -shiftAmt;
2310 if (shiftAmt >= sizeof(Element) * 8) {
2311 shiftAmt = sizeof(Element) * 8 - 1;
2312 destElem = 0;
2313 } else {
2314 destElem = (srcElem1 >> shiftAmt);
2315 }
2316 } else if (shiftAmt > 0) {
2317 if (shiftAmt >= sizeof(Element) * 8) {
2318 if (srcElem1 != 0) {
2319 destElem = mask(sizeof(Element) * 8);
2320 fpscr.qc = 1;
2321 } else {
2322 destElem = 0;
2323 }
2324 } else {
2325 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2326 sizeof(Element) * 8 - shiftAmt)) {
2327 destElem = mask(sizeof(Element) * 8);
2328 fpscr.qc = 1;
2329 } else {
2330 destElem = srcElem1 << shiftAmt;
2331 }
2332 }
2333 } else {
2334 destElem = srcElem1;
2335 }
2336 FpscrQc = fpscr;
2337 '''
2338 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2339 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2340
2341 vqshlSCode = '''
2342 int16_t shiftAmt = (int8_t)srcElem2;
2343 FPSCR fpscr = (FPSCR) FpscrQc;
2344 if (shiftAmt < 0) {
2345 shiftAmt = -shiftAmt;
2346 if (shiftAmt >= sizeof(Element) * 8) {
2347 shiftAmt = sizeof(Element) * 8 - 1;
2348 destElem = 0;
2349 } else {
2350 destElem = (srcElem1 >> shiftAmt);
2351 }
2352 // Make sure the right shift sign extended when it should.
2353 if (srcElem1 < 0 && destElem >= 0) {
2354 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2355 1 - shiftAmt));
2356 }
2357 } else if (shiftAmt > 0) {
2358 bool sat = false;
2359 if (shiftAmt >= sizeof(Element) * 8) {
2360 if (srcElem1 != 0)
2361 sat = true;
2362 else
2363 destElem = 0;
2364 } else {
2365 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2366 sizeof(Element) * 8 - 1 - shiftAmt) !=
2367 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2368 sat = true;
2369 } else {
2370 destElem = srcElem1 << shiftAmt;
2371 }
2372 }
2373 if (sat) {
2374 fpscr.qc = 1;
2375 destElem = mask(sizeof(Element) * 8 - 1);
2376 if (srcElem1 < 0)
2377 destElem = ~destElem;
2378 }
2379 } else {
2380 destElem = srcElem1;
2381 }
2382 FpscrQc = fpscr;
2383 '''
2384 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2385 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2386
2387 vqrshlUCode = '''
2388 int16_t shiftAmt = (int8_t)srcElem2;
2389 FPSCR fpscr = (FPSCR) FpscrQc;
2390 if (shiftAmt < 0) {
2391 shiftAmt = -shiftAmt;
2392 Element rBit = 0;
2393 if (shiftAmt <= sizeof(Element) * 8)
2394 rBit = bits(srcElem1, shiftAmt - 1);
2395 if (shiftAmt >= sizeof(Element) * 8) {
2396 shiftAmt = sizeof(Element) * 8 - 1;
2397 destElem = 0;
2398 } else {
2399 destElem = (srcElem1 >> shiftAmt);
2400 }
2401 destElem += rBit;
2402 } else {
2403 if (shiftAmt >= sizeof(Element) * 8) {
2404 if (srcElem1 != 0) {
2405 destElem = mask(sizeof(Element) * 8);
2406 fpscr.qc = 1;
2407 } else {
2408 destElem = 0;
2409 }
2410 } else {
2411 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2412 sizeof(Element) * 8 - shiftAmt)) {
2413 destElem = mask(sizeof(Element) * 8);
2414 fpscr.qc = 1;
2415 } else {
2416 destElem = srcElem1 << shiftAmt;
2417 }
2418 }
2419 }
2420 FpscrQc = fpscr;
2421 '''
2422 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2423 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2424
2425 vqrshlSCode = '''
2426 int16_t shiftAmt = (int8_t)srcElem2;
2427 FPSCR fpscr = (FPSCR) FpscrQc;
2428 if (shiftAmt < 0) {
2429 shiftAmt = -shiftAmt;
2430 Element rBit = 0;
2431 if (shiftAmt <= sizeof(Element) * 8)
2432 rBit = bits(srcElem1, shiftAmt - 1);
2433 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2434 rBit = 1;
2435 if (shiftAmt >= sizeof(Element) * 8) {
2436 shiftAmt = sizeof(Element) * 8 - 1;
2437 destElem = 0;
2438 } else {
2439 destElem = (srcElem1 >> shiftAmt);
2440 }
2441 // Make sure the right shift sign extended when it should.
2442 if (srcElem1 < 0 && destElem >= 0) {
2443 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2444 1 - shiftAmt));
2445 }
2446 destElem += rBit;
2447 } else if (shiftAmt > 0) {
2448 bool sat = false;
2449 if (shiftAmt >= sizeof(Element) * 8) {
2450 if (srcElem1 != 0)
2451 sat = true;
2452 else
2453 destElem = 0;
2454 } else {
2455 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2456 sizeof(Element) * 8 - 1 - shiftAmt) !=
2457 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2458 sat = true;
2459 } else {
2460 destElem = srcElem1 << shiftAmt;
2461 }
2462 }
2463 if (sat) {
2464 fpscr.qc = 1;
2465 destElem = mask(sizeof(Element) * 8 - 1);
2466 if (srcElem1 < 0)
2467 destElem = ~destElem;
2468 }
2469 } else {
2470 destElem = srcElem1;
2471 }
2472 FpscrQc = fpscr;
2473 '''
2474 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2475 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2476
2477 vabaCode = '''
2478 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2479 (srcElem2 - srcElem1);
2480 '''
2481 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2482 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2483 vabalCode = '''
2484 destElem += (srcElem1 > srcElem2) ?
2485 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2486 ((BigElement)srcElem2 - (BigElement)srcElem1);
2487 '''
2488 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2489
2490 vabdCode = '''
2491 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2492 (srcElem2 - srcElem1);
2493 '''
2494 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2495 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2496 vabdlCode = '''
2497 destElem = (srcElem1 > srcElem2) ?
2498 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2499 ((BigElement)srcElem2 - (BigElement)srcElem1);
2500 '''
2501 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2502
2503 vtstCode = '''
2504 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2505 '''
2506 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2507 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2508
2509 vmulCode = '''
2510 destElem = srcElem1 * srcElem2;
2511 '''
2512 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2513 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2514 vmullCode = '''
2515 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2516 '''
2517 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2518
2519 vmlaCode = '''
2520 destElem = destElem + srcElem1 * srcElem2;
2521 '''
2522 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2523 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2524 vmlalCode = '''
2525 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2526 '''
2527 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2528
2529 vqdmlalCode = '''
2530 FPSCR fpscr = (FPSCR) FpscrQc;
2531 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2532 Element maxNeg = std::numeric_limits<Element>::min();
2533 Element halfNeg = maxNeg / 2;
2534 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2535 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2536 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2537 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2538 fpscr.qc = 1;
2539 }
2540 bool negPreDest = ltz(destElem);
2541 destElem += midElem;
2542 bool negDest = ltz(destElem);
2543 bool negMid = ltz(midElem);
2544 if (negPreDest == negMid && negMid != negDest) {
2545 destElem = mask(sizeof(BigElement) * 8 - 1);
2546 if (negPreDest)
2547 destElem = ~destElem;
2548 fpscr.qc = 1;
2549 }
2550 FpscrQc = fpscr;
2551 '''
2552 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2553
2554 vqdmlslCode = '''
2555 FPSCR fpscr = (FPSCR) FpscrQc;
2556 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2557 Element maxNeg = std::numeric_limits<Element>::min();
2558 Element halfNeg = maxNeg / 2;
2559 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2560 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2561 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2562 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2563 fpscr.qc = 1;
2564 }
2565 bool negPreDest = ltz(destElem);
2566 destElem -= midElem;
2567 bool negDest = ltz(destElem);
2568 bool posMid = ltz((BigElement)-midElem);
2569 if (negPreDest == posMid && posMid != negDest) {
2570 destElem = mask(sizeof(BigElement) * 8 - 1);
2571 if (negPreDest)
2572 destElem = ~destElem;
2573 fpscr.qc = 1;
2574 }
2575 FpscrQc = fpscr;
2576 '''
2577 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2578
2579 vqdmullCode = '''
2580 FPSCR fpscr = (FPSCR) FpscrQc;
2581 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2582 if (srcElem1 == srcElem2 &&
2583 srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2584 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2585 fpscr.qc = 1;
2586 }
2587 FpscrQc = fpscr;
2588 '''
2589 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2590
2591 vmlsCode = '''
2592 destElem = destElem - srcElem1 * srcElem2;
2593 '''
2594 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2595 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2596 vmlslCode = '''
2597 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2598 '''
2599 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2600
2601 vmulpCode = '''
2602 destElem = 0;
2603 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2604 if (bits(srcElem2, j))
2605 destElem ^= srcElem1 << j;
2606 }
2607 '''
2608 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2609 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2610 vmullpCode = '''
2611 destElem = 0;
2612 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2613 if (bits(srcElem2, j))
2614 destElem ^= (BigElement)srcElem1 << j;
2615 }
2616 '''
2617 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2618
2619 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2620
2621 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2622
2623 vqdmulhCode = '''
2624 FPSCR fpscr = (FPSCR) FpscrQc;
2625 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2626 (sizeof(Element) * 8);
2627 if (srcElem1 == srcElem2 &&
2628 srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2629 destElem = ~srcElem1;
2630 fpscr.qc = 1;
2631 }
2632 FpscrQc = fpscr;
2633 '''
2634 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2635 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2636
2637 vqrdmulhCode = '''
2638 FPSCR fpscr = (FPSCR) FpscrQc;
2639 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2640 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2641 (sizeof(Element) * 8);
2642 Element maxNeg = std::numeric_limits<Element>::min();
2643 Element halfNeg = maxNeg / 2;
2644 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2645 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2646 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2647 if (destElem < 0) {
2648 destElem = mask(sizeof(Element) * 8 - 1);
2649 } else {
2650 destElem = std::numeric_limits<Element>::min();
2651 }
2652 fpscr.qc = 1;
2653 }
2654 FpscrQc = fpscr;
2655 '''
2656 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2657 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2658 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2659 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2660
2661 vMinMaxFpCode = '''
2662 destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
2663 '''
2664 vMinMaxInsts = [
2665 ("vmax", "VmaxDFp", 2, "Max", False, ),
2666 ("vmax", "VmaxQFp", 4, "Max", False, ),
2667 ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ),
2668 ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ),
2669 ("vpmax", "VpmaxDFp", 2, "Max", True, ),
2670 ("vpmax", "VpmaxQFp", 4, "Max", True, ),
2671 ("vmin", "VminDFp", 2, "Min", False, ),
2672 ("vmin", "VminQFp", 4, "Min", False, ),
2673 ("vminnm", "VminnmDFp", 2, "MinNum", False, ),
2674 ("vminnm", "VminnmQFp", 4, "MinNum", False, ),
2675 ("vpmin", "VpminDFp", 2, "Min", True, ),
2676 ("vpmin", "VpminQFp", 4, "Min", True, ),
2677 ]
2678 for name, Name, rCount, op, pairwise in vMinMaxInsts:
2679 threeEqualRegInst(
2680 name,
2681 Name,
2682 "SimdFloatCmpOp",
2683 ("uint32_t",),
2684 rCount,
2685 vMinMaxFpCode % op,
2686 pairwise=pairwise,
2687 standardFpcsr=True,
2688 )
2689
2690 vaddfpCode = '''
2691 FPSCR fpscr = (FPSCR) FpscrExc;
2692 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2693 true, true, VfpRoundNearest);
2694 FpscrExc = fpscr;
2695 '''
2696 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2697 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2698
2699 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2700 2, vaddfpCode, pairwise=True)
2701 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2702 4, vaddfpCode, pairwise=True)
2703
2704 vsubfpCode = '''
2705 FPSCR fpscr = (FPSCR) FpscrExc;
2706 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2707 true, true, VfpRoundNearest);
2708 FpscrExc = fpscr;
2709 '''
2710 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2711 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2712
2713 vmulfpCode = '''
2714 FPSCR fpscr = (FPSCR) FpscrExc;
2715 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2716 true, true, VfpRoundNearest);
2717 FpscrExc = fpscr;
2718 '''
2719 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2720 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2721
2722 vmlafpCode = '''
2723 FPSCR fpscr = (FPSCR) FpscrExc;
2724 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2725 true, true, VfpRoundNearest);
2726 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2727 true, true, VfpRoundNearest);
2728 FpscrExc = fpscr;
2729 '''
2730 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2731 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2732
2733 vfmafpCode = '''
2734 FPSCR fpscr = (FPSCR) FpscrExc;
2735 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2736 true, true, VfpRoundNearest);
2737 FpscrExc = fpscr;
2738 '''
2739 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2740 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2741
2742 vfmsfpCode = '''
2743 FPSCR fpscr = (FPSCR) FpscrExc;
2744 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2745 true, true, VfpRoundNearest);
2746 FpscrExc = fpscr;
2747 '''
2748 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2749 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2750
2751 vmlsfpCode = '''
2752 FPSCR fpscr = (FPSCR) FpscrExc;
2753 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2754 true, true, VfpRoundNearest);
2755 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2756 true, true, VfpRoundNearest);
2757 FpscrExc = fpscr;
2758 '''
2759 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2760 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2761
2762 vcgtfpCode = '''
2763 FPSCR fpscr = (FPSCR) FpscrExc;
2764 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2765 true, true, VfpRoundNearest);
2766 destReg = (res == 0) ? -1 : 0;
2767 if (res == 2.0)
2768 fpscr.ioc = 1;
2769 FpscrExc = fpscr;
2770 '''
2771 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2772 2, vcgtfpCode, toInt = True)
2773 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2774 4, vcgtfpCode, toInt = True)
2775
2776 vcgefpCode = '''
2777 FPSCR fpscr = (FPSCR) FpscrExc;
2778 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2779 true, true, VfpRoundNearest);
2780 destReg = (res == 0) ? -1 : 0;
2781 if (res == 2.0)
2782 fpscr.ioc = 1;
2783 FpscrExc = fpscr;
2784 '''
2785 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2786 2, vcgefpCode, toInt = True)
2787 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2788 4, vcgefpCode, toInt = True)
2789
2790 vacgtfpCode = '''
2791 FPSCR fpscr = (FPSCR) FpscrExc;
2792 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2793 true, true, VfpRoundNearest);
2794 destReg = (res == 0) ? -1 : 0;
2795 if (res == 2.0)
2796 fpscr.ioc = 1;
2797 FpscrExc = fpscr;
2798 '''
2799 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2800 2, vacgtfpCode, toInt = True)
2801 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2802 4, vacgtfpCode, toInt = True)
2803
2804 vacgefpCode = '''
2805 FPSCR fpscr = (FPSCR) FpscrExc;
2806 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2807 true, true, VfpRoundNearest);
2808 destReg = (res == 0) ? -1 : 0;
2809 if (res == 2.0)
2810 fpscr.ioc = 1;
2811 FpscrExc = fpscr;
2812 '''
2813 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2814 2, vacgefpCode, toInt = True)
2815 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2816 4, vacgefpCode, toInt = True)
2817
2818 vceqfpCode = '''
2819 FPSCR fpscr = (FPSCR) FpscrExc;
2820 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2821 true, true, VfpRoundNearest);
2822 destReg = (res == 0) ? -1 : 0;
2823 if (res == 2.0)
2824 fpscr.ioc = 1;
2825 FpscrExc = fpscr;
2826 '''
2827 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2828 2, vceqfpCode, toInt = True)
2829 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2830 4, vceqfpCode, toInt = True)
2831
2832 vrecpsCode = '''
2833 FPSCR fpscr = (FPSCR) FpscrExc;
2834 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2835 true, true, VfpRoundNearest);
2836 FpscrExc = fpscr;
2837 '''
2838 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2839 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2840
2841 vrsqrtsCode = '''
2842 FPSCR fpscr = (FPSCR) FpscrExc;
2843 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2844 true, true, VfpRoundNearest);
2845 FpscrExc = fpscr;
2846 '''
2847 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2848 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2849
2850 vabdfpCode = '''
2851 FPSCR fpscr = (FPSCR) FpscrExc;
2852 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2853 true, true, VfpRoundNearest);
2854 destReg = fabs(mid);
2855 FpscrExc = fpscr;
2856 '''
2857 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2858 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2859
2860 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2861 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2862 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2863 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2864 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2865
2866 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2867 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2868 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2869 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2870 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2871
2872 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2873 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2874 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2875 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2876 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2877
2878 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2879 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2880 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2881 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2882 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2883 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2884 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2885 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2886 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2887
2888 vshrCode = '''
2889 if (imm >= sizeof(srcElem1) * 8) {
2890 if (ltz(srcElem1))
2891 destElem = -1;
2892 else
2893 destElem = 0;
2894 } else {
2895 destElem = srcElem1 >> imm;
2896 }
2897 '''
2898 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2899 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2900
2901 vsraCode = '''
2902 Element mid;;
2903 if (imm >= sizeof(srcElem1) * 8) {
2904 mid = ltz(srcElem1) ? -1 : 0;
2905 } else {
2906 mid = srcElem1 >> imm;
2907 if (ltz(srcElem1) && !ltz(mid)) {
2908 mid |= -(mid & ((Element)1 <<
2909 (sizeof(Element) * 8 - 1 - imm)));
2910 }
2911 }
2912 destElem += mid;
2913 '''
2914 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2915 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2916
2917 vrshrCode = '''
2918 if (imm > sizeof(srcElem1) * 8) {
2919 destElem = 0;
2920 } else if (imm) {
2921 Element rBit = bits(srcElem1, imm - 1);
2922 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2923 } else {
2924 destElem = srcElem1;
2925 }
2926 '''
2927 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2928 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2929
2930 vrsraCode = '''
2931 if (imm > sizeof(srcElem1) * 8) {
2932 destElem += 0;
2933 } else if (imm) {
2934 Element rBit = bits(srcElem1, imm - 1);
2935 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2936 } else {
2937 destElem += srcElem1;
2938 }
2939 '''
2940 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2941 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2942
2943 vsriCode = '''
2944 if (imm >= sizeof(Element) * 8) {
2945 destElem = destElem;
2946 } else {
2947 destElem = (srcElem1 >> imm) |
2948 (destElem & ~mask(sizeof(Element) * 8 - imm));
2949 }
2950 '''
2951 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2952 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2953
2954 vshlCode = '''
2955 if (imm >= sizeof(Element) * 8) {
2956 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2957 } else {
2958 destElem = srcElem1 << imm;
2959 }
2960 '''
2961 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2962 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2963
2964 vsliCode = '''
2965 if (imm >= sizeof(Element) * 8) {
2966 destElem = destElem;
2967 } else {
2968 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2969 }
2970 '''
2971 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2972 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2973
2974 vqshlCode = '''
2975 FPSCR fpscr = (FPSCR) FpscrQc;
2976 if (imm >= sizeof(Element) * 8) {
2977 if (srcElem1 != 0) {
2978 destElem = std::numeric_limits<Element>::min();
2979 if (srcElem1 > 0)
2980 destElem = ~destElem;
2981 fpscr.qc = 1;
2982 } else {
2983 destElem = 0;
2984 }
2985 } else if (imm) {
2986 destElem = (srcElem1 << imm);
2987 uint64_t topBits = bits((uint64_t)srcElem1,
2988 sizeof(Element) * 8 - 1,
2989 sizeof(Element) * 8 - 1 - imm);
2990 if (topBits != 0 && topBits != mask(imm + 1)) {
2991 destElem = std::numeric_limits<Element>::min();
2992 if (srcElem1 > 0)
2993 destElem = ~destElem;
2994 fpscr.qc = 1;
2995 }
2996 } else {
2997 destElem = srcElem1;
2998 }
2999 FpscrQc = fpscr;
3000 '''
3001 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3002 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3003
3004 vqshluCode = '''
3005 FPSCR fpscr = (FPSCR) FpscrQc;
3006 if (imm >= sizeof(Element) * 8) {
3007 if (srcElem1 != 0) {
3008 destElem = mask(sizeof(Element) * 8);
3009 fpscr.qc = 1;
3010 } else {
3011 destElem = 0;
3012 }
3013 } else if (imm) {
3014 destElem = (srcElem1 << imm);
3015 uint64_t topBits = bits((uint64_t)srcElem1,
3016 sizeof(Element) * 8 - 1,
3017 sizeof(Element) * 8 - imm);
3018 if (topBits != 0) {
3019 destElem = mask(sizeof(Element) * 8);
3020 fpscr.qc = 1;
3021 }
3022 } else {
3023 destElem = srcElem1;
3024 }
3025 FpscrQc = fpscr;
3026 '''
3027 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3028 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3029
3030 vqshlusCode = '''
3031 FPSCR fpscr = (FPSCR) FpscrQc;
3032 if (imm >= sizeof(Element) * 8) {
3033 if (srcElem1 < 0) {
3034 destElem = 0;
3035 fpscr.qc = 1;
3036 } else if (srcElem1 > 0) {
3037 destElem = mask(sizeof(Element) * 8);
3038 fpscr.qc = 1;
3039 } else {
3040 destElem = 0;
3041 }
3042 } else if (imm) {
3043 destElem = (srcElem1 << imm);
3044 uint64_t topBits = bits((uint64_t)srcElem1,
3045 sizeof(Element) * 8 - 1,
3046 sizeof(Element) * 8 - imm);
3047 if (srcElem1 < 0) {
3048 destElem = 0;
3049 fpscr.qc = 1;
3050 } else if (topBits != 0) {
3051 destElem = mask(sizeof(Element) * 8);
3052 fpscr.qc = 1;
3053 }
3054 } else {
3055 if (srcElem1 < 0) {
3056 fpscr.qc = 1;
3057 destElem = 0;
3058 } else {
3059 destElem = srcElem1;
3060 }
3061 }
3062 FpscrQc = fpscr;
3063 '''
3064 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3065 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3066
3067 vshrnCode = '''
3068 if (imm >= sizeof(srcElem1) * 8) {
3069 destElem = 0;
3070 } else {
3071 destElem = srcElem1 >> imm;
3072 }
3073 '''
3074 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3075
3076 vrshrnCode = '''
3077 if (imm > sizeof(srcElem1) * 8) {
3078 destElem = 0;
3079 } else if (imm) {
3080 Element rBit = bits(srcElem1, imm - 1);
3081 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3082 } else {
3083 destElem = srcElem1;
3084 }
3085 '''
3086 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3087
3088 vqshrnCode = '''
3089 FPSCR fpscr = (FPSCR) FpscrQc;
3090 if (imm > sizeof(srcElem1) * 8) {
3091 if (srcElem1 != 0 && srcElem1 != -1)
3092 fpscr.qc = 1;
3093 destElem = 0;
3094 } else if (imm) {
3095 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3096 mid |= -(mid & ((BigElement)1 <<
3097 (sizeof(BigElement) * 8 - 1 - imm)));
3098 if (mid != (Element)mid) {
3099 destElem = mask(sizeof(Element) * 8 - 1);
3100 if (srcElem1 < 0)
3101 destElem = ~destElem;
3102 fpscr.qc = 1;
3103 } else {
3104 destElem = mid;
3105 }
3106 } else {
3107 destElem = srcElem1;
3108 }
3109 FpscrQc = fpscr;
3110 '''
3111 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3112
3113 vqshrunCode = '''
3114 FPSCR fpscr = (FPSCR) FpscrQc;
3115 if (imm > sizeof(srcElem1) * 8) {
3116 if (srcElem1 != 0)
3117 fpscr.qc = 1;
3118 destElem = 0;
3119 } else if (imm) {
3120 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3121 if (mid != (Element)mid) {
3122 destElem = mask(sizeof(Element) * 8);
3123 fpscr.qc = 1;
3124 } else {
3125 destElem = mid;
3126 }
3127 } else {
3128 destElem = srcElem1;
3129 }
3130 FpscrQc = fpscr;
3131 '''
3132 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3133 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3134
3135 vqshrunsCode = '''
3136 FPSCR fpscr = (FPSCR) FpscrQc;
3137 if (imm > sizeof(srcElem1) * 8) {
3138 if (srcElem1 != 0)
3139 fpscr.qc = 1;
3140 destElem = 0;
3141 } else if (imm) {
3142 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3143 if (bits(mid, sizeof(BigElement) * 8 - 1,
3144 sizeof(Element) * 8) != 0) {
3145 if (srcElem1 < 0) {
3146 destElem = 0;
3147 } else {
3148 destElem = mask(sizeof(Element) * 8);
3149 }
3150 fpscr.qc = 1;
3151 } else {
3152 destElem = mid;
3153 }
3154 } else {
3155 destElem = srcElem1;
3156 }
3157 FpscrQc = fpscr;
3158 '''
3159 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3160 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3161
3162 vqrshrnCode = '''
3163 FPSCR fpscr = (FPSCR) FpscrQc;
3164 if (imm > sizeof(srcElem1) * 8) {
3165 if (srcElem1 != 0 && srcElem1 != -1)
3166 fpscr.qc = 1;
3167 destElem = 0;
3168 } else if (imm) {
3169 BigElement mid = (srcElem1 >> (imm - 1));
3170 uint64_t rBit = mid & 0x1;
3171 mid >>= 1;
3172 mid |= -(mid & ((BigElement)1 <<
3173 (sizeof(BigElement) * 8 - 1 - imm)));
3174 mid += rBit;
3175 if (mid != (Element)mid) {
3176 destElem = mask(sizeof(Element) * 8 - 1);
3177 if (srcElem1 < 0)
3178 destElem = ~destElem;
3179 fpscr.qc = 1;
3180 } else {
3181 destElem = mid;
3182 }
3183 } else {
3184 if (srcElem1 != (Element)srcElem1) {
3185 destElem = mask(sizeof(Element) * 8 - 1);
3186 if (srcElem1 < 0)
3187 destElem = ~destElem;
3188 fpscr.qc = 1;
3189 } else {
3190 destElem = srcElem1;
3191 }
3192 }
3193 FpscrQc = fpscr;
3194 '''
3195 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3196 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3197
3198 vqrshrunCode = '''
3199 FPSCR fpscr = (FPSCR) FpscrQc;
3200 if (imm > sizeof(srcElem1) * 8) {
3201 if (srcElem1 != 0)
3202 fpscr.qc = 1;
3203 destElem = 0;
3204 } else if (imm) {
3205 BigElement mid = (srcElem1 >> (imm - 1));
3206 uint64_t rBit = mid & 0x1;
3207 mid >>= 1;
3208 mid += rBit;
3209 if (mid != (Element)mid) {
3210 destElem = mask(sizeof(Element) * 8);
3211 fpscr.qc = 1;
3212 } else {
3213 destElem = mid;
3214 }
3215 } else {
3216 if (srcElem1 != (Element)srcElem1) {
3217 destElem = mask(sizeof(Element) * 8 - 1);
3218 fpscr.qc = 1;
3219 } else {
3220 destElem = srcElem1;
3221 }
3222 }
3223 FpscrQc = fpscr;
3224 '''
3225 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3226 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3227
3228 vqrshrunsCode = '''
3229 FPSCR fpscr = (FPSCR) FpscrQc;
3230 if (imm > sizeof(srcElem1) * 8) {
3231 if (srcElem1 != 0)
3232 fpscr.qc = 1;
3233 destElem = 0;
3234 } else if (imm) {
3235 BigElement mid = (srcElem1 >> (imm - 1));
3236 uint64_t rBit = mid & 0x1;
3237 mid >>= 1;
3238 mid |= -(mid & ((BigElement)1 <<
3239 (sizeof(BigElement) * 8 - 1 - imm)));
3240 mid += rBit;
3241 if (bits(mid, sizeof(BigElement) * 8 - 1,
3242 sizeof(Element) * 8) != 0) {
3243 if (srcElem1 < 0) {
3244 destElem = 0;
3245 } else {
3246 destElem = mask(sizeof(Element) * 8);
3247 }
3248 fpscr.qc = 1;
3249 } else {
3250 destElem = mid;
3251 }
3252 } else {
3253 if (srcElem1 < 0) {
3254 fpscr.qc = 1;
3255 destElem = 0;
3256 } else {
3257 destElem = srcElem1;
3258 }
3259 }
3260 FpscrQc = fpscr;
3261 '''
3262 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3263 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3264
3265 vshllCode = '''
3266 if (imm >= sizeof(destElem) * 8) {
3267 destElem = 0;
3268 } else {
3269 destElem = (BigElement)srcElem1 << imm;
3270 }
3271 '''
3272 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3273
3274 vmovlCode = '''
3275 destElem = srcElem1;
3276 '''
3277 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3278
3279 vcvt2ufxCode = '''
3280 FPSCR fpscr = (FPSCR) FpscrExc;
3281 if (flushToZero(srcElem1))
3282 fpscr.idc = 1;
3283 VfpSavedState state = prepFpState(VfpRoundNearest);
3284 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3285 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3286 __asm__ __volatile__("" :: "m" (destReg));
3287 finishVfp(fpscr, state, true);
3288 FpscrExc = fpscr;
3289 '''
3290 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3291 2, vcvt2ufxCode, toInt = True)
3292 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3293 4, vcvt2ufxCode, toInt = True)
3294
3295 vcvt2sfxCode = '''
3296 FPSCR fpscr = (FPSCR) FpscrExc;
3297 if (flushToZero(srcElem1))
3298 fpscr.idc = 1;
3299 VfpSavedState state = prepFpState(VfpRoundNearest);
3300 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3301 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3302 __asm__ __volatile__("" :: "m" (destReg));
3303 finishVfp(fpscr, state, true);
3304 FpscrExc = fpscr;
3305 '''
3306 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3307 2, vcvt2sfxCode, toInt = True)
3308 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3309 4, vcvt2sfxCode, toInt = True)
3310
3311 vcvtu2fpCode = '''
3312 FPSCR fpscr = (FPSCR) FpscrExc;
3313 VfpSavedState state = prepFpState(VfpRoundNearest);
3314 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3315 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3316 __asm__ __volatile__("" :: "m" (destElem));
3317 finishVfp(fpscr, state, true);
3318 FpscrExc = fpscr;
3319 '''
3320 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3321 2, vcvtu2fpCode, fromInt = True)
3322 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3323 4, vcvtu2fpCode, fromInt = True)
3324
3325 vcvts2fpCode = '''
3326 FPSCR fpscr = (FPSCR) FpscrExc;
3327 VfpSavedState state = prepFpState(VfpRoundNearest);
3328 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3329 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3330 __asm__ __volatile__("" :: "m" (destElem));
3331 finishVfp(fpscr, state, true);
3332 FpscrExc = fpscr;
3333 '''
3334 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3335 2, vcvts2fpCode, fromInt = True)
3336 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3337 4, vcvts2fpCode, fromInt = True)
3338
3339 vcvts2hCode = '''
3340 destElem = 0;
3341 FPSCR fpscr = (FPSCR) FpscrExc;
3342 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3343 if (flushToZero(srcFp1))
3344 fpscr.idc = 1;
3345 VfpSavedState state = prepFpState(VfpRoundNearest);
3346 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3347 : "m" (srcFp1), "m" (destElem));
3348 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3349 fpscr.ahp, srcFp1);
3350 __asm__ __volatile__("" :: "m" (destElem));
3351 finishVfp(fpscr, state, true);
3352 FpscrExc = fpscr;
3353 '''
3354 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3355
3356 vcvth2sCode = '''
3357 destElem = 0;
3358 FPSCR fpscr = (FPSCR) FpscrExc;
3359 VfpSavedState state = prepFpState(VfpRoundNearest);
3360 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3361 : "m" (srcElem1), "m" (destElem));
3362 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3363 __asm__ __volatile__("" :: "m" (destElem));
3364 finishVfp(fpscr, state, true);
3365 FpscrExc = fpscr;
3366 '''
3367 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3368
3369 vrsqrteCode = '''
3370 destElem = unsignedRSqrtEstimate(srcElem1);
3371 '''
3372 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3373 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3374
3375 vrsqrtefpCode = '''
3376 FPSCR fpscr = (FPSCR) FpscrExc;
3377 if (flushToZero(srcReg1))
3378 fpscr.idc = 1;
3379 destReg = fprSqrtEstimate(fpscr, srcReg1);
3380 FpscrExc = fpscr;
3381 '''
3382 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3383 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3384
3385 vrecpeCode = '''
3386 destElem = unsignedRecipEstimate(srcElem1);
3387 '''
3388 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3389 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3390
3391 vrecpefpCode = '''
3392 FPSCR fpscr = (FPSCR) FpscrExc;
3393 if (flushToZero(srcReg1))
3394 fpscr.idc = 1;
3395 destReg = fpRecipEstimate(fpscr, srcReg1);
3396 FpscrExc = fpscr;
3397 '''
3398 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3399 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3400
3401 vrev16Code = '''
3402 destElem = srcElem1;
3403 unsigned groupSize = ((1 << 1) / sizeof(Element));
3404 unsigned reverseMask = (groupSize - 1);
3405 j = i ^ reverseMask;
3406 '''
3407 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3408 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3409 vrev32Code = '''
3410 destElem = srcElem1;
3411 unsigned groupSize = ((1 << 2) / sizeof(Element));
3412 unsigned reverseMask = (groupSize - 1);
3413 j = i ^ reverseMask;
3414 '''
3415 twoRegMiscInst("vrev32", "NVrev32D",
3416 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3417 twoRegMiscInst("vrev32", "NVrev32Q",
3418 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3419 vrev64Code = '''
3420 destElem = srcElem1;
3421 unsigned groupSize = ((1 << 3) / sizeof(Element));
3422 unsigned reverseMask = (groupSize - 1);
3423 j = i ^ reverseMask;
3424 '''
3425 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3426 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3427
3428 split('exec')
3429 exec_output += vcompares + vcomparesL
3430
3431 vpaddlCode = '''
3432 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3433 '''
3434 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3435 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3436
3437 vpadalCode = '''
3438 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3439 '''
3440 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3441 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3442
3443 vclsCode = '''
3444 unsigned count = 0;
3445 if (srcElem1 < 0) {
3446 srcElem1 <<= 1;
3447 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3448 count++;
3449 srcElem1 <<= 1;
3450 }
3451 } else {
3452 srcElem1 <<= 1;
3453 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3454 count++;
3455 srcElem1 <<= 1;
3456 }
3457 }
3458 destElem = count;
3459 '''
3460 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3461 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3462
3463 vclzCode = '''
3464 unsigned count = 0;
3465 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3466 count++;
3467 srcElem1 <<= 1;
3468 }
3469 destElem = count;
3470 '''
3471 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3472 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3473
3474 vcntCode = '''
3475 unsigned count = 0;
3476 while (srcElem1 && count < sizeof(Element) * 8) {
3477 count += srcElem1 & 0x1;
3478 srcElem1 >>= 1;
3479 }
3480 destElem = count;
3481 '''
3482
3483 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3484 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3485
3486 vmvnCode = '''
3487 destElem = ~srcElem1;
3488 '''
3489 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3490 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3491
3492 vqabsCode = '''
3493 FPSCR fpscr = (FPSCR) FpscrQc;
3494 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3495 fpscr.qc = 1;
3496 destElem = ~srcElem1;
3497 } else if (srcElem1 < 0) {
3498 destElem = -srcElem1;
3499 } else {
3500 destElem = srcElem1;
3501 }
3502 FpscrQc = fpscr;
3503 '''
3504 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3505 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3506
3507 vqnegCode = '''
3508 FPSCR fpscr = (FPSCR) FpscrQc;
3509 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3510 fpscr.qc = 1;
3511 destElem = ~srcElem1;
3512 } else {
3513 destElem = -srcElem1;
3514 }
3515 FpscrQc = fpscr;
3516 '''
3517 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3518 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3519
3520 vabsCode = '''
3521 if (srcElem1 < 0) {
3522 destElem = -srcElem1;
3523 } else {
3524 destElem = srcElem1;
3525 }
3526 '''
3527
3528 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3529 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3530 vabsfpCode = '''
3531 union
3532 {
3533 uint32_t i;
3534 float f;
3535 } cStruct;
3536 cStruct.f = srcReg1;
3537 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3538 destReg = cStruct.f;
3539 '''
3540 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3541 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3542
3543 vnegCode = '''
3544 destElem = -srcElem1;
3545 '''
3546 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3547 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3548 vnegfpCode = '''
3549 destReg = -srcReg1;
3550 '''
3551 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3552 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3553
3554 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3555 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3556 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3557 vcgtfpCode = '''
3558 FPSCR fpscr = (FPSCR) FpscrExc;
3559 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc,
3560 true, true, VfpRoundNearest);
3561 destReg = (res == 0) ? -1 : 0;
3562 if (res == 2.0)
3563 fpscr.ioc = 1;
3564 FpscrExc = fpscr;
3565 '''
3566 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3567 2, vcgtfpCode, toInt = True)
3568 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3569 4, vcgtfpCode, toInt = True)
3570
3571 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3572 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3573 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3574 vcgefpCode = '''
3575 FPSCR fpscr = (FPSCR) FpscrExc;
3576 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc,
3577 true, true, VfpRoundNearest);
3578 destReg = (res == 0) ? -1 : 0;
3579 if (res == 2.0)
3580 fpscr.ioc = 1;
3581 FpscrExc = fpscr;
3582 '''
3583 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3584 2, vcgefpCode, toInt = True)
3585 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3586 4, vcgefpCode, toInt = True)
3587
3588 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3589 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3590 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3591 vceqfpCode = '''
3592 FPSCR fpscr = (FPSCR) FpscrExc;
3593 float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc,
3594 true, true, VfpRoundNearest);
3595 destReg = (res == 0) ? -1 : 0;
3596 if (res == 2.0)
3597 fpscr.ioc = 1;
3598 FpscrExc = fpscr;
3599 '''
3600 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3601 2, vceqfpCode, toInt = True)
3602 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3603 4, vceqfpCode, toInt = True)
3604
3605 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3606 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3607 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3608 vclefpCode = '''
3609 FPSCR fpscr = (FPSCR) FpscrExc;
3610 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc,
3611 true, true, VfpRoundNearest);
3612 destReg = (res == 0) ? -1 : 0;
3613 if (res == 2.0)
3614 fpscr.ioc = 1;
3615 FpscrExc = fpscr;
3616 '''
3617 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3618 2, vclefpCode, toInt = True)
3619 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3620 4, vclefpCode, toInt = True)
3621
3622 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3623 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3624 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3625 vcltfpCode = '''
3626 FPSCR fpscr = (FPSCR) FpscrExc;
3627 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc,
3628 true, true, VfpRoundNearest);
3629 destReg = (res == 0) ? -1 : 0;
3630 if (res == 2.0)
3631 fpscr.ioc = 1;
3632 FpscrExc = fpscr;
3633 '''
3634 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3635 2, vcltfpCode, toInt = True)
3636 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3637 4, vcltfpCode, toInt = True)
3638
3639 vswpCode = '''
3640 uint32_t mid;
3641 for (unsigned r = 0; r < rCount; r++) {
3642 mid = srcReg1.regs[r];
3643 srcReg1.regs[r] = destReg.regs[r];
3644 destReg.regs[r] = mid;
3645 }
3646 '''
3647 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3648 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3649
3650 vtrnCode = '''
3651 Element mid;
3652 for (unsigned i = 0; i < eCount; i += 2) {
3653 mid = srcReg1.elements[i];
3654 srcReg1.elements[i] = destReg.elements[i + 1];
3655 destReg.elements[i + 1] = mid;
3656 }
3657 '''
3658 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3659 smallUnsignedTypes, 2, vtrnCode)
3660 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3661 smallUnsignedTypes, 4, vtrnCode)
3662
3663 vuzpCode = '''
3664 Element mid[eCount];
3665 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3666 for (unsigned i = 0; i < eCount / 2; i++) {
3667 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3668 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3669 destReg.elements[i] = destReg.elements[2 * i];
3670 }
3671 for (unsigned i = 0; i < eCount / 2; i++) {
3672 destReg.elements[eCount / 2 + i] = mid[2 * i];
3673 }
3674 '''
3675 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3676 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3677
3678 vzipCode = '''
3679 Element mid[eCount];
3680 memcpy(&mid, &destReg, sizeof(destReg));
3681 for (unsigned i = 0; i < eCount / 2; i++) {
3682 destReg.elements[2 * i] = mid[i];
3683 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3684 }
3685 for (int i = 0; i < eCount / 2; i++) {
3686 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3687 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3688 }
3689 '''
3690 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3691 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3692
3693 vmovnCode = 'destElem = srcElem1;'
3694 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3695
3696 vdupCode = 'destElem = srcElem1;'
3697 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3698 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3699
3700 def vdupGprInst(name, Name, opClass, types, rCount):
3701 global header_output, exec_output
3702 eWalkCode = simdEnabledCheckCode + '''
3703 RegVect destReg;
3704 for (unsigned i = 0; i < eCount; i++) {
3705 destReg.elements[i] = htog((Element)Op1);
3706 }
3707 '''
3708 for reg in range(rCount):
3709 eWalkCode += '''
3710 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3711 ''' % { "reg" : reg }
3712 iop = InstObjParams(name, Name,
3713 "RegRegOp",
3714 { "code": eWalkCode,
3715 "r_count": rCount,
3716 "predicate_test": predicateTest,
3717 "op_class": opClass }, [])
3718 header_output += NeonRegRegOpDeclare.subst(iop)
3719 exec_output += NeonEqualRegExecute.subst(iop)
3720 for type in types:
3721 substDict = { "targs" : type,
3722 "class_name" : Name }
3723 exec_output += NeonExecDeclare.subst(substDict)
3724 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3725 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3726
3727 vmovCode = 'destElem = imm;'
3728 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3729 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3730
3731 vorrCode = 'destElem |= imm;'
3732 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3733 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3734
3735 vmvnCode = 'destElem = ~imm;'
3736 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3737 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3738
3739 vbicCode = 'destElem &= ~imm;'
3740 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3741 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3742
3743 vqmovnCode = '''
3744 FPSCR fpscr = (FPSCR) FpscrQc;
3745 destElem = srcElem1;
3746 if ((BigElement)destElem != srcElem1) {
3747 fpscr.qc = 1;
3748 destElem = mask(sizeof(Element) * 8 - 1);
3749 if (srcElem1 < 0)
3750 destElem = ~destElem;
3751 }
3752 FpscrQc = fpscr;
3753 '''
3754 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3755
3756 vqmovunCode = '''
3757 FPSCR fpscr = (FPSCR) FpscrQc;
3758 destElem = srcElem1;
3759 if ((BigElement)destElem != srcElem1) {
3760 fpscr.qc = 1;
3761 destElem = mask(sizeof(Element) * 8);
3762 }
3763 FpscrQc = fpscr;
3764 '''
3765 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3766 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3767
3768 vqmovunsCode = '''
3769 FPSCR fpscr = (FPSCR) FpscrQc;
3770 destElem = srcElem1;
3771 if (srcElem1 < 0 ||
3772 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3773 fpscr.qc = 1;
3774 destElem = mask(sizeof(Element) * 8);
3775 if (srcElem1 < 0)
3776 destElem = ~destElem;
3777 }
3778 FpscrQc = fpscr;
3779 '''
3780 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3781 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3782
3783 def buildVext(name, Name, opClass, types, rCount, op):
3784 global header_output, exec_output
3785 eWalkCode = simdEnabledCheckCode + '''
3786 RegVect srcReg1, srcReg2, destReg;
3787 '''
3788 for reg in range(rCount):
3789 eWalkCode += '''
3790 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3791 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3792 ''' % { "reg" : reg }
3793 eWalkCode += op
3794 for reg in range(rCount):
3795 eWalkCode += '''
3796 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3797 ''' % { "reg" : reg }
3798 iop = InstObjParams(name, Name,
3799 "RegRegRegImmOp",
3800 { "code": eWalkCode,
3801 "r_count": rCount,
3802 "predicate_test": predicateTest,
3803 "op_class": opClass }, [])
3804 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3805 exec_output += NeonEqualRegExecute.subst(iop)
3806 for type in types:
3807 substDict = { "targs" : type,
3808 "class_name" : Name }
3809 exec_output += NeonExecDeclare.subst(substDict)
3810
3811 vextCode = '''
3812 for (unsigned i = 0; i < eCount; i++) {
3813 unsigned index = i + imm;
3814 if (index < eCount) {
3815 destReg.elements[i] = srcReg1.elements[index];
3816 } else {
3817 index -= eCount;
3818 if (index >= eCount) {
3819 fault = std::make_shared<UndefinedInstruction>(machInst,
3820 false,
3821 mnemonic);
3822 } else {
3823 destReg.elements[i] = srcReg2.elements[index];
3824 }
3825 }
3826 }
3827 '''
3828 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3829 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3830
3831 def buildVtbxl(name, Name, opClass, length, isVtbl):
3832 global header_output, decoder_output, exec_output
3833 code = simdEnabledCheckCode + '''
3834 union
3835 {
3836 uint8_t bytes[32];
3837 uint32_t regs[8];
3838 } table;
3839
3840 union
3841 {
3842 uint8_t bytes[8];
3843 uint32_t regs[2];
3844 } destReg, srcReg2;
3845
3846 const unsigned length = %(length)d;
3847 const bool isVtbl = %(isVtbl)s;
3848
3849 srcReg2.regs[0] = htog(FpOp2P0_uw);
3850 srcReg2.regs[1] = htog(FpOp2P1_uw);
3851
3852 destReg.regs[0] = htog(FpDestP0_uw);
3853 destReg.regs[1] = htog(FpDestP1_uw);
3854 ''' % { "length" : length, "isVtbl" : isVtbl }
3855 for reg in range(8):
3856 if reg < length * 2:
3857 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3858 { "reg" : reg }
3859 else:
3860 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3861 code += '''
3862 for (unsigned i = 0; i < sizeof(destReg); i++) {
3863 uint8_t index = srcReg2.bytes[i];
3864 if (index < 8 * length) {
3865 destReg.bytes[i] = table.bytes[index];
3866 } else {
3867 if (isVtbl)
3868 destReg.bytes[i] = 0;
3869 // else destReg.bytes[i] unchanged
3870 }
3871 }
3872
3873 FpDestP0_uw = gtoh(destReg.regs[0]);
3874 FpDestP1_uw = gtoh(destReg.regs[1]);
3875 '''
3876 iop = InstObjParams(name, Name,
3877 "RegRegRegOp",
3878 { "code": code,
3879 "predicate_test": predicateTest,
3880 "op_class": opClass }, [])
3881 header_output += RegRegRegOpDeclare.subst(iop)
3882 decoder_output += RegRegRegOpConstructor.subst(iop)
3883 exec_output += PredOpExecute.subst(iop)
3884
3885 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3886 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3887 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3888 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3889
3890 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3891 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3892 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3893 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3894}};
77 template <template <typename T> class Base>
78 StaticInstPtr
79 decodeNeonSThreeUReg(unsigned size,
80 ExtMachInst machInst, IntRegIndex dest,
81 IntRegIndex op1, IntRegIndex op2)
82 {
83 switch (size) {
84 case 0:
85 return new Base<int8_t>(machInst, dest, op1, op2);
86 case 1:
87 return new Base<int16_t>(machInst, dest, op1, op2);
88 case 2:
89 return new Base<int32_t>(machInst, dest, op1, op2);
90 case 3:
91 return new Base<int64_t>(machInst, dest, op1, op2);
92 default:
93 return new Unknown(machInst);
94 }
95 }
96
97 template <template <typename T> class Base>
98 StaticInstPtr
99 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
100 ExtMachInst machInst, IntRegIndex dest,
101 IntRegIndex op1, IntRegIndex op2)
102 {
103 if (notSigned) {
104 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
105 } else {
106 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
107 }
108 }
109
110 template <template <typename T> class Base>
111 StaticInstPtr
112 decodeNeonUThreeUSReg(unsigned size,
113 ExtMachInst machInst, IntRegIndex dest,
114 IntRegIndex op1, IntRegIndex op2)
115 {
116 switch (size) {
117 case 0:
118 return new Base<uint8_t>(machInst, dest, op1, op2);
119 case 1:
120 return new Base<uint16_t>(machInst, dest, op1, op2);
121 case 2:
122 return new Base<uint32_t>(machInst, dest, op1, op2);
123 default:
124 return new Unknown(machInst);
125 }
126 }
127
128 template <template <typename T> class Base>
129 StaticInstPtr
130 decodeNeonSThreeUSReg(unsigned size,
131 ExtMachInst machInst, IntRegIndex dest,
132 IntRegIndex op1, IntRegIndex op2)
133 {
134 switch (size) {
135 case 0:
136 return new Base<int8_t>(machInst, dest, op1, op2);
137 case 1:
138 return new Base<int16_t>(machInst, dest, op1, op2);
139 case 2:
140 return new Base<int32_t>(machInst, dest, op1, op2);
141 default:
142 return new Unknown(machInst);
143 }
144 }
145
146 template <template <typename T> class Base>
147 StaticInstPtr
148 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2)
151 {
152 switch (size) {
153 case 1:
154 return new Base<int16_t>(machInst, dest, op1, op2);
155 case 2:
156 return new Base<int32_t>(machInst, dest, op1, op2);
157 default:
158 return new Unknown(machInst);
159 }
160 }
161
162 template <template <typename T> class Base>
163 StaticInstPtr
164 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
165 IntRegIndex dest, IntRegIndex op1,
166 IntRegIndex op2, uint64_t imm)
167 {
168 switch (size) {
169 case 1:
170 return new Base<int16_t>(machInst, dest, op1, op2, imm);
171 case 2:
172 return new Base<int32_t>(machInst, dest, op1, op2, imm);
173 default:
174 return new Unknown(machInst);
175 }
176 }
177
178 template <template <typename T> class Base>
179 StaticInstPtr
180 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeUSReg<Base>(
186 size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeUSReg<Base>(
189 size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeSReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUSReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonSThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (q) {
233 return decodeNeonSThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUThreeXReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
247 {
248 if (q) {
249 return decodeNeonUThreeUReg<BaseQ>(
250 size, machInst, dest, op1, op2);
251 } else {
252 return decodeNeonUThreeUSReg<BaseD>(
253 size, machInst, dest, op1, op2);
254 }
255 }
256
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
259 StaticInstPtr
260 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
263 {
264 if (notSigned) {
265 return decodeNeonUThreeSReg<BaseD, BaseQ>(
266 q, size, machInst, dest, op1, op2);
267 } else {
268 return decodeNeonSThreeSReg<BaseD, BaseQ>(
269 q, size, machInst, dest, op1, op2);
270 }
271 }
272
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
275 StaticInstPtr
276 decodeNeonUThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
279 {
280 if (q) {
281 return decodeNeonUThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
283 } else {
284 return decodeNeonUThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
286 }
287 }
288
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
291 StaticInstPtr
292 decodeNeonSThreeReg(bool q, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
295 {
296 if (q) {
297 return decodeNeonSThreeUReg<BaseQ>(
298 size, machInst, dest, op1, op2);
299 } else {
300 return decodeNeonSThreeUReg<BaseD>(
301 size, machInst, dest, op1, op2);
302 }
303 }
304
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
307 StaticInstPtr
308 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
309 ExtMachInst machInst, IntRegIndex dest,
310 IntRegIndex op1, IntRegIndex op2)
311 {
312 if (notSigned) {
313 return decodeNeonUThreeReg<BaseD, BaseQ>(
314 q, size, machInst, dest, op1, op2);
315 } else {
316 return decodeNeonSThreeReg<BaseD, BaseQ>(
317 q, size, machInst, dest, op1, op2);
318 }
319 }
320
321 template <template <typename T> class BaseD,
322 template <typename T> class BaseQ>
323 StaticInstPtr
324 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
325 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
326 {
327 if (q) {
328 if (size)
329 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
330 else
331 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
332 } else {
333 if (size)
334 return new Unknown(machInst);
335 else
336 return new BaseD<uint32_t>(machInst, dest, op1, op2);
337 }
338 }
339
340 template <template <typename T> class Base>
341 StaticInstPtr
342 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
343 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
344 {
345 if (size)
346 return new Base<uint64_t>(machInst, dest, op1, op2);
347 else
348 return new Base<uint32_t>(machInst, dest, op1, op2);
349 }
350
351 template <template <typename T> class Base>
352 StaticInstPtr
353 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
354 IntRegIndex dest, IntRegIndex op1,
355 IntRegIndex op2, uint64_t imm)
356 {
357 if (size)
358 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
359 else
360 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
361 }
362
363 template <template <typename T> class BaseD,
364 template <typename T> class BaseQ>
365 StaticInstPtr
366 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
367 IntRegIndex dest, IntRegIndex op1,
368 IntRegIndex op2, uint64_t imm)
369 {
370 if (q) {
371 switch (size) {
372 case 1:
373 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
374 case 2:
375 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
376 default:
377 return new Unknown(machInst);
378 }
379 } else {
380 switch (size) {
381 case 1:
382 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
383 case 2:
384 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
385 default:
386 return new Unknown(machInst);
387 }
388 }
389 }
390
391 template <template <typename T> class BaseD,
392 template <typename T> class BaseQ>
393 StaticInstPtr
394 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
395 IntRegIndex dest, IntRegIndex op1,
396 IntRegIndex op2, uint64_t imm)
397 {
398 if (q) {
399 switch (size) {
400 case 1:
401 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
402 case 2:
403 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
404 default:
405 return new Unknown(machInst);
406 }
407 } else {
408 switch (size) {
409 case 1:
410 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
411 case 2:
412 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
413 default:
414 return new Unknown(machInst);
415 }
416 }
417 }
418
419 template <template <typename T> class BaseD,
420 template <typename T> class BaseQ>
421 StaticInstPtr
422 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
423 IntRegIndex dest, IntRegIndex op1,
424 IntRegIndex op2, uint64_t imm)
425 {
426 if (q) {
427 if (size)
428 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
429 else
430 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
431 } else {
432 if (size)
433 return new Unknown(machInst);
434 else
435 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
436 }
437 }
438
439 template <template <typename T> class BaseD,
440 template <typename T> class BaseQ>
441 StaticInstPtr
442 decodeNeonUTwoShiftReg(bool q, unsigned size,
443 ExtMachInst machInst, IntRegIndex dest,
444 IntRegIndex op1, uint64_t imm)
445 {
446 if (q) {
447 switch (size) {
448 case 0:
449 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
450 case 1:
451 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
452 case 2:
453 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
454 case 3:
455 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
456 default:
457 return new Unknown(machInst);
458 }
459 } else {
460 switch (size) {
461 case 0:
462 return new BaseD<uint8_t>(machInst, dest, op1, imm);
463 case 1:
464 return new BaseD<uint16_t>(machInst, dest, op1, imm);
465 case 2:
466 return new BaseD<uint32_t>(machInst, dest, op1, imm);
467 case 3:
468 return new BaseD<uint64_t>(machInst, dest, op1, imm);
469 default:
470 return new Unknown(machInst);
471 }
472 }
473 }
474
475 template <template <typename T> class BaseD,
476 template <typename T> class BaseQ>
477 StaticInstPtr
478 decodeNeonSTwoShiftReg(bool q, unsigned size,
479 ExtMachInst machInst, IntRegIndex dest,
480 IntRegIndex op1, uint64_t imm)
481 {
482 if (q) {
483 switch (size) {
484 case 0:
485 return new BaseQ<int8_t>(machInst, dest, op1, imm);
486 case 1:
487 return new BaseQ<int16_t>(machInst, dest, op1, imm);
488 case 2:
489 return new BaseQ<int32_t>(machInst, dest, op1, imm);
490 case 3:
491 return new BaseQ<int64_t>(machInst, dest, op1, imm);
492 default:
493 return new Unknown(machInst);
494 }
495 } else {
496 switch (size) {
497 case 0:
498 return new BaseD<int8_t>(machInst, dest, op1, imm);
499 case 1:
500 return new BaseD<int16_t>(machInst, dest, op1, imm);
501 case 2:
502 return new BaseD<int32_t>(machInst, dest, op1, imm);
503 case 3:
504 return new BaseD<int64_t>(machInst, dest, op1, imm);
505 default:
506 return new Unknown(machInst);
507 }
508 }
509 }
510
511
512 template <template <typename T> class BaseD,
513 template <typename T> class BaseQ>
514 StaticInstPtr
515 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
516 ExtMachInst machInst, IntRegIndex dest,
517 IntRegIndex op1, uint64_t imm)
518 {
519 if (notSigned) {
520 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
521 q, size, machInst, dest, op1, imm);
522 } else {
523 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
524 q, size, machInst, dest, op1, imm);
525 }
526 }
527
528 template <template <typename T> class Base>
529 StaticInstPtr
530 decodeNeonUTwoShiftUSReg(unsigned size,
531 ExtMachInst machInst, IntRegIndex dest,
532 IntRegIndex op1, uint64_t imm)
533 {
534 switch (size) {
535 case 0:
536 return new Base<uint8_t>(machInst, dest, op1, imm);
537 case 1:
538 return new Base<uint16_t>(machInst, dest, op1, imm);
539 case 2:
540 return new Base<uint32_t>(machInst, dest, op1, imm);
541 default:
542 return new Unknown(machInst);
543 }
544 }
545
546 template <template <typename T> class Base>
547 StaticInstPtr
548 decodeNeonUTwoShiftUReg(unsigned size,
549 ExtMachInst machInst, IntRegIndex dest,
550 IntRegIndex op1, uint64_t imm)
551 {
552 switch (size) {
553 case 0:
554 return new Base<uint8_t>(machInst, dest, op1, imm);
555 case 1:
556 return new Base<uint16_t>(machInst, dest, op1, imm);
557 case 2:
558 return new Base<uint32_t>(machInst, dest, op1, imm);
559 case 3:
560 return new Base<uint64_t>(machInst, dest, op1, imm);
561 default:
562 return new Unknown(machInst);
563 }
564 }
565
566 template <template <typename T> class Base>
567 StaticInstPtr
568 decodeNeonSTwoShiftUReg(unsigned size,
569 ExtMachInst machInst, IntRegIndex dest,
570 IntRegIndex op1, uint64_t imm)
571 {
572 switch (size) {
573 case 0:
574 return new Base<int8_t>(machInst, dest, op1, imm);
575 case 1:
576 return new Base<int16_t>(machInst, dest, op1, imm);
577 case 2:
578 return new Base<int32_t>(machInst, dest, op1, imm);
579 case 3:
580 return new Base<int64_t>(machInst, dest, op1, imm);
581 default:
582 return new Unknown(machInst);
583 }
584 }
585
586 template <template <typename T> class BaseD,
587 template <typename T> class BaseQ>
588 StaticInstPtr
589 decodeNeonUTwoShiftSReg(bool q, unsigned size,
590 ExtMachInst machInst, IntRegIndex dest,
591 IntRegIndex op1, uint64_t imm)
592 {
593 if (q) {
594 return decodeNeonUTwoShiftUSReg<BaseQ>(
595 size, machInst, dest, op1, imm);
596 } else {
597 return decodeNeonUTwoShiftUSReg<BaseD>(
598 size, machInst, dest, op1, imm);
599 }
600 }
601
602 template <template <typename T> class Base>
603 StaticInstPtr
604 decodeNeonSTwoShiftUSReg(unsigned size,
605 ExtMachInst machInst, IntRegIndex dest,
606 IntRegIndex op1, uint64_t imm)
607 {
608 switch (size) {
609 case 0:
610 return new Base<int8_t>(machInst, dest, op1, imm);
611 case 1:
612 return new Base<int16_t>(machInst, dest, op1, imm);
613 case 2:
614 return new Base<int32_t>(machInst, dest, op1, imm);
615 default:
616 return new Unknown(machInst);
617 }
618 }
619
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
622 StaticInstPtr
623 decodeNeonSTwoShiftSReg(bool q, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
626 {
627 if (q) {
628 return decodeNeonSTwoShiftUSReg<BaseQ>(
629 size, machInst, dest, op1, imm);
630 } else {
631 return decodeNeonSTwoShiftUSReg<BaseD>(
632 size, machInst, dest, op1, imm);
633 }
634 }
635
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
638 StaticInstPtr
639 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
640 ExtMachInst machInst, IntRegIndex dest,
641 IntRegIndex op1, uint64_t imm)
642 {
643 if (notSigned) {
644 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
645 q, size, machInst, dest, op1, imm);
646 } else {
647 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
648 q, size, machInst, dest, op1, imm);
649 }
650 }
651
652 template <template <typename T> class BaseD,
653 template <typename T> class BaseQ>
654 StaticInstPtr
655 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
656 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
657 {
658 if (q) {
659 return decodeNeonUTwoShiftUReg<BaseQ>(
660 size, machInst, dest, op1, imm);
661 } else {
662 return decodeNeonUTwoShiftUSReg<BaseD>(
663 size, machInst, dest, op1, imm);
664 }
665 }
666
667 template <template <typename T> class BaseD,
668 template <typename T> class BaseQ>
669 StaticInstPtr
670 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
671 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
672 {
673 if (q) {
674 return decodeNeonSTwoShiftUReg<BaseQ>(
675 size, machInst, dest, op1, imm);
676 } else {
677 return decodeNeonSTwoShiftUSReg<BaseD>(
678 size, machInst, dest, op1, imm);
679 }
680 }
681
682 template <template <typename T> class Base>
683 StaticInstPtr
684 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
685 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
686 {
687 if (size)
688 return new Base<uint64_t>(machInst, dest, op1, imm);
689 else
690 return new Base<uint32_t>(machInst, dest, op1, imm);
691 }
692
693 template <template <typename T> class BaseD,
694 template <typename T> class BaseQ>
695 StaticInstPtr
696 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
697 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
698 {
699 if (q) {
700 if (size)
701 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
702 else
703 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
704 } else {
705 if (size)
706 return new Unknown(machInst);
707 else
708 return new BaseD<uint32_t>(machInst, dest, op1, imm);
709 }
710 }
711
712 template <template <typename T> class Base>
713 StaticInstPtr
714 decodeNeonUTwoMiscUSReg(unsigned size,
715 ExtMachInst machInst, IntRegIndex dest,
716 IntRegIndex op1)
717 {
718 switch (size) {
719 case 0:
720 return new Base<uint8_t>(machInst, dest, op1);
721 case 1:
722 return new Base<uint16_t>(machInst, dest, op1);
723 case 2:
724 return new Base<uint32_t>(machInst, dest, op1);
725 default:
726 return new Unknown(machInst);
727 }
728 }
729
730 template <template <typename T> class Base>
731 StaticInstPtr
732 decodeNeonSTwoMiscUSReg(unsigned size,
733 ExtMachInst machInst, IntRegIndex dest,
734 IntRegIndex op1)
735 {
736 switch (size) {
737 case 0:
738 return new Base<int8_t>(machInst, dest, op1);
739 case 1:
740 return new Base<int16_t>(machInst, dest, op1);
741 case 2:
742 return new Base<int32_t>(machInst, dest, op1);
743 default:
744 return new Unknown(machInst);
745 }
746 }
747
748 template <template <typename T> class BaseD,
749 template <typename T> class BaseQ>
750 StaticInstPtr
751 decodeNeonUTwoMiscSReg(bool q, unsigned size,
752 ExtMachInst machInst, IntRegIndex dest,
753 IntRegIndex op1)
754 {
755 if (q) {
756 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
757 } else {
758 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
759 }
760 }
761
762 template <template <typename T> class BaseD,
763 template <typename T> class BaseQ>
764 StaticInstPtr
765 decodeNeonSTwoMiscSReg(bool q, unsigned size,
766 ExtMachInst machInst, IntRegIndex dest,
767 IntRegIndex op1)
768 {
769 if (q) {
770 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
771 } else {
772 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
773 }
774 }
775
776 template <template <typename T> class Base>
777 StaticInstPtr
778 decodeNeonUTwoMiscUReg(unsigned size,
779 ExtMachInst machInst, IntRegIndex dest,
780 IntRegIndex op1)
781 {
782 switch (size) {
783 case 0:
784 return new Base<uint8_t>(machInst, dest, op1);
785 case 1:
786 return new Base<uint16_t>(machInst, dest, op1);
787 case 2:
788 return new Base<uint32_t>(machInst, dest, op1);
789 case 3:
790 return new Base<uint64_t>(machInst, dest, op1);
791 default:
792 return new Unknown(machInst);
793 }
794 }
795
796 template <template <typename T> class Base>
797 StaticInstPtr
798 decodeNeonSTwoMiscUReg(unsigned size,
799 ExtMachInst machInst, IntRegIndex dest,
800 IntRegIndex op1)
801 {
802 switch (size) {
803 case 0:
804 return new Base<int8_t>(machInst, dest, op1);
805 case 1:
806 return new Base<int16_t>(machInst, dest, op1);
807 case 2:
808 return new Base<int32_t>(machInst, dest, op1);
809 case 3:
810 return new Base<int64_t>(machInst, dest, op1);
811 default:
812 return new Unknown(machInst);
813 }
814 }
815
816 template <template <typename T> class BaseD,
817 template <typename T> class BaseQ>
818 StaticInstPtr
819 decodeNeonSTwoMiscReg(bool q, unsigned size,
820 ExtMachInst machInst, IntRegIndex dest,
821 IntRegIndex op1)
822 {
823 if (q) {
824 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
825 } else {
826 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
827 }
828 }
829
830 template <template <typename T> class BaseD,
831 template <typename T> class BaseQ>
832 StaticInstPtr
833 decodeNeonUTwoMiscReg(bool q, unsigned size,
834 ExtMachInst machInst, IntRegIndex dest,
835 IntRegIndex op1)
836 {
837 if (q) {
838 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
839 } else {
840 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
841 }
842 }
843
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
846 StaticInstPtr
847 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
848 ExtMachInst machInst, IntRegIndex dest,
849 IntRegIndex op1)
850 {
851 if (notSigned) {
852 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
853 q, size, machInst, dest, op1);
854 } else {
855 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
856 q, size, machInst, dest, op1);
857 }
858 }
859
860 template <template <typename T> class BaseD,
861 template <typename T> class BaseQ>
862 StaticInstPtr
863 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
864 IntRegIndex dest, IntRegIndex op1)
865 {
866 if (q) {
867 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
868 } else {
869 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
870 }
871 }
872
873 template <template <typename T> class BaseD,
874 template <typename T> class BaseQ>
875 StaticInstPtr
876 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
877 IntRegIndex dest, IntRegIndex op1)
878 {
879 if (q) {
880 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
881 } else {
882 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
883 }
884 }
885
886 template <template <typename T> class BaseD,
887 template <typename T> class BaseQ>
888 StaticInstPtr
889 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
890 IntRegIndex dest, IntRegIndex op1)
891 {
892 if (q) {
893 if (size)
894 return new BaseQ<uint64_t>(machInst, dest, op1);
895 else
896 return new BaseQ<uint32_t>(machInst, dest, op1);
897 } else {
898 if (size)
899 return new Unknown(machInst);
900 else
901 return new BaseD<uint32_t>(machInst, dest, op1);
902 }
903 }
904
905 template <template <typename T> class BaseD,
906 template <typename T> class BaseQ>
907 StaticInstPtr
908 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
909 IntRegIndex dest, IntRegIndex op1)
910 {
911 if (size)
912 return new BaseQ<uint64_t>(machInst, dest, op1);
913 else
914 return new BaseD<uint32_t>(machInst, dest, op1);
915 }
916
917 template <template <typename T> class Base>
918 StaticInstPtr
919 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
920 IntRegIndex dest, IntRegIndex op1)
921 {
922 if (size)
923 return new Base<uint64_t>(machInst, dest, op1);
924 else
925 return new Base<uint32_t>(machInst, dest, op1);
926 }
927
928 template <template <typename T> class BaseD,
929 template <typename T> class BaseQ>
930 StaticInstPtr
931 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
932 IntRegIndex dest, IntRegIndex op1)
933 {
934 if (q) {
935 switch (size) {
936 case 0x0:
937 return new BaseQ<uint8_t>(machInst, dest, op1);
938 case 0x1:
939 return new BaseQ<uint16_t>(machInst, dest, op1);
940 case 0x2:
941 return new BaseQ<uint32_t>(machInst, dest, op1);
942 default:
943 return new Unknown(machInst);
944 }
945 } else {
946 switch (size) {
947 case 0x0:
948 return new BaseD<uint8_t>(machInst, dest, op1);
949 case 0x1:
950 return new BaseD<uint16_t>(machInst, dest, op1);
951 default:
952 return new Unknown(machInst);
953 }
954 }
955 }
956
957 template <template <typename T> class BaseD,
958 template <typename T> class BaseQ,
959 template <typename T> class BaseBQ>
960 StaticInstPtr
961 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
962 IntRegIndex dest, IntRegIndex op1)
963 {
964 if (q) {
965 switch (size) {
966 case 0x0:
967 return new BaseQ<uint8_t>(machInst, dest, op1);
968 case 0x1:
969 return new BaseQ<uint16_t>(machInst, dest, op1);
970 case 0x2:
971 return new BaseBQ<uint32_t>(machInst, dest, op1);
972 default:
973 return new Unknown(machInst);
974 }
975 } else {
976 switch (size) {
977 case 0x0:
978 return new BaseD<uint8_t>(machInst, dest, op1);
979 case 0x1:
980 return new BaseD<uint16_t>(machInst, dest, op1);
981 default:
982 return new Unknown(machInst);
983 }
984 }
985 }
986
987 template <template <typename T> class BaseD,
988 template <typename T> class BaseQ>
989 StaticInstPtr
990 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
991 IntRegIndex dest, IntRegIndex op1)
992 {
993 if (q) {
994 switch (size) {
995 case 0x0:
996 return new BaseQ<int8_t>(machInst, dest, op1);
997 case 0x1:
998 return new BaseQ<int16_t>(machInst, dest, op1);
999 case 0x2:
1000 return new BaseQ<int32_t>(machInst, dest, op1);
1001 default:
1002 return new Unknown(machInst);
1003 }
1004 } else {
1005 switch (size) {
1006 case 0x0:
1007 return new BaseD<int8_t>(machInst, dest, op1);
1008 case 0x1:
1009 return new BaseD<int16_t>(machInst, dest, op1);
1010 default:
1011 return new Unknown(machInst);
1012 }
1013 }
1014 }
1015
1016 template <template <typename T> class BaseD,
1017 template <typename T> class BaseQ,
1018 template <typename T> class BaseBQ>
1019 StaticInstPtr
1020 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1021 IntRegIndex dest, IntRegIndex op1)
1022 {
1023 if (q) {
1024 switch (size) {
1025 case 0x0:
1026 return new BaseQ<uint8_t>(machInst, dest, op1);
1027 case 0x1:
1028 return new BaseQ<uint16_t>(machInst, dest, op1);
1029 case 0x2:
1030 return new BaseBQ<uint32_t>(machInst, dest, op1);
1031 default:
1032 return new Unknown(machInst);
1033 }
1034 } else {
1035 switch (size) {
1036 case 0x0:
1037 return new BaseD<uint8_t>(machInst, dest, op1);
1038 case 0x1:
1039 return new BaseD<uint16_t>(machInst, dest, op1);
1040 default:
1041 return new Unknown(machInst);
1042 }
1043 }
1044 }
1045
1046 template <template <typename T> class BaseD,
1047 template <typename T> class BaseQ,
1048 template <typename T> class BaseBQ>
1049 StaticInstPtr
1050 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1051 IntRegIndex dest, IntRegIndex op1)
1052 {
1053 if (q) {
1054 switch (size) {
1055 case 0x0:
1056 return new BaseQ<int8_t>(machInst, dest, op1);
1057 case 0x1:
1058 return new BaseQ<int16_t>(machInst, dest, op1);
1059 case 0x2:
1060 return new BaseBQ<int32_t>(machInst, dest, op1);
1061 default:
1062 return new Unknown(machInst);
1063 }
1064 } else {
1065 switch (size) {
1066 case 0x0:
1067 return new BaseD<int8_t>(machInst, dest, op1);
1068 case 0x1:
1069 return new BaseD<int16_t>(machInst, dest, op1);
1070 default:
1071 return new Unknown(machInst);
1072 }
1073 }
1074 }
1075}};
1076
1077let {{
1078 header_output = ""
1079 exec_output = ""
1080
1081 vcompares = '''
1082 static float
1083 vcgtFunc(float op1, float op2)
1084 {
1085 if (std::isnan(op1) || std::isnan(op2))
1086 return 2.0;
1087 return (op1 > op2) ? 0.0 : 1.0;
1088 }
1089
1090 static float
1091 vcgeFunc(float op1, float op2)
1092 {
1093 if (std::isnan(op1) || std::isnan(op2))
1094 return 2.0;
1095 return (op1 >= op2) ? 0.0 : 1.0;
1096 }
1097
1098 static float
1099 vceqFunc(float op1, float op2)
1100 {
1101 if (isSnan(op1) || isSnan(op2))
1102 return 2.0;
1103 return (op1 == op2) ? 0.0 : 1.0;
1104 }
1105'''
1106 vcomparesL = '''
1107 static float
1108 vcleFunc(float op1, float op2)
1109 {
1110 if (std::isnan(op1) || std::isnan(op2))
1111 return 2.0;
1112 return (op1 <= op2) ? 0.0 : 1.0;
1113 }
1114
1115 static float
1116 vcltFunc(float op1, float op2)
1117 {
1118 if (std::isnan(op1) || std::isnan(op2))
1119 return 2.0;
1120 return (op1 < op2) ? 0.0 : 1.0;
1121 }
1122'''
1123 vacomparesG = '''
1124 static float
1125 vacgtFunc(float op1, float op2)
1126 {
1127 if (std::isnan(op1) || std::isnan(op2))
1128 return 2.0;
1129 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1130 }
1131
1132 static float
1133 vacgeFunc(float op1, float op2)
1134 {
1135 if (std::isnan(op1) || std::isnan(op2))
1136 return 2.0;
1137 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1138 }
1139'''
1140
1141 exec_output += vcompares + vacomparesG
1142
1143 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1144 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1145 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1146 signedTypes = smallSignedTypes + ("int64_t",)
1147 smallTypes = smallUnsignedTypes + smallSignedTypes
1148 allTypes = unsignedTypes + signedTypes
1149
1150 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1151 readDest=False, pairwise=False,
1152 standardFpcsr=False):
1153 global header_output, exec_output
1154 eWalkCode = simdEnabledCheckCode + '''
1155 RegVect srcReg1, srcReg2, destReg;
1156 '''
1157 for reg in range(rCount):
1158 eWalkCode += '''
1159 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1160 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1161 ''' % { "reg" : reg }
1162 if readDest:
1163 eWalkCode += '''
1164 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1165 ''' % { "reg" : reg }
1166 readDestCode = ''
1167 if standardFpcsr:
1168 eWalkCode += '''
1169 FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc);
1170 '''
1171 if readDest:
1172 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1173 if pairwise:
1174 eWalkCode += '''
1175 for (unsigned i = 0; i < eCount; i++) {
1176 Element srcElem1 = gtoh(2 * i < eCount ?
1177 srcReg1.elements[2 * i] :
1178 srcReg2.elements[2 * i - eCount]);
1179 Element srcElem2 = gtoh(2 * i < eCount ?
1180 srcReg1.elements[2 * i + 1] :
1181 srcReg2.elements[2 * i + 1 - eCount]);
1182 Element destElem;
1183 %(readDest)s
1184 %(op)s
1185 destReg.elements[i] = htog(destElem);
1186 }
1187 ''' % { "op" : op, "readDest" : readDestCode }
1188 else:
1189 eWalkCode += '''
1190 for (unsigned i = 0; i < eCount; i++) {
1191 Element srcElem1 = gtoh(srcReg1.elements[i]);
1192 Element srcElem2 = gtoh(srcReg2.elements[i]);
1193 Element destElem;
1194 %(readDest)s
1195 %(op)s
1196 destReg.elements[i] = htog(destElem);
1197 }
1198 ''' % { "op" : op, "readDest" : readDestCode }
1199 if standardFpcsr:
1200 eWalkCode += '''
1201 FpscrExc = fpscr;
1202 '''
1203 for reg in range(rCount):
1204 eWalkCode += '''
1205 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1206 ''' % { "reg" : reg }
1207 iop = InstObjParams(name, Name,
1208 "RegRegRegOp",
1209 { "code": eWalkCode,
1210 "r_count": rCount,
1211 "predicate_test": predicateTest,
1212 "op_class": opClass }, [])
1213 header_output += NeonRegRegRegOpDeclare.subst(iop)
1214 exec_output += NeonEqualRegExecute.subst(iop)
1215 for type in types:
1216 substDict = { "targs" : type,
1217 "class_name" : Name }
1218 exec_output += NeonExecDeclare.subst(substDict)
1219
1220 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1221 readDest=False, pairwise=False, toInt=False):
1222 global header_output, exec_output
1223 eWalkCode = simdEnabledCheckCode + '''
1224 typedef float FloatVect[rCount];
1225 FloatVect srcRegs1, srcRegs2;
1226 '''
1227 if toInt:
1228 eWalkCode += 'RegVect destRegs;\n'
1229 else:
1230 eWalkCode += 'FloatVect destRegs;\n'
1231 for reg in range(rCount):
1232 eWalkCode += '''
1233 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1234 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1235 ''' % { "reg" : reg }
1236 if readDest:
1237 if toInt:
1238 eWalkCode += '''
1239 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1240 ''' % { "reg" : reg }
1241 else:
1242 eWalkCode += '''
1243 destRegs[%(reg)d] = FpDestP%(reg)d;
1244 ''' % { "reg" : reg }
1245 readDestCode = ''
1246 if readDest:
1247 readDestCode = 'destReg = destRegs[r];'
1248 destType = 'float'
1249 writeDest = 'destRegs[r] = destReg;'
1250 if toInt:
1251 destType = 'uint32_t'
1252 writeDest = 'destRegs.regs[r] = destReg;'
1253 if pairwise:
1254 eWalkCode += '''
1255 for (unsigned r = 0; r < rCount; r++) {
1256 float srcReg1 = (2 * r < rCount) ?
1257 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1258 float srcReg2 = (2 * r < rCount) ?
1259 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1260 %(destType)s destReg;
1261 %(readDest)s
1262 %(op)s
1263 %(writeDest)s
1264 }
1265 ''' % { "op" : op,
1266 "readDest" : readDestCode,
1267 "destType" : destType,
1268 "writeDest" : writeDest }
1269 else:
1270 eWalkCode += '''
1271 for (unsigned r = 0; r < rCount; r++) {
1272 float srcReg1 = srcRegs1[r];
1273 float srcReg2 = srcRegs2[r];
1274 %(destType)s destReg;
1275 %(readDest)s
1276 %(op)s
1277 %(writeDest)s
1278 }
1279 ''' % { "op" : op,
1280 "readDest" : readDestCode,
1281 "destType" : destType,
1282 "writeDest" : writeDest }
1283 for reg in range(rCount):
1284 if toInt:
1285 eWalkCode += '''
1286 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1287 ''' % { "reg" : reg }
1288 else:
1289 eWalkCode += '''
1290 FpDestP%(reg)d = destRegs[%(reg)d];
1291 ''' % { "reg" : reg }
1292 iop = InstObjParams(name, Name,
1293 "FpRegRegRegOp",
1294 { "code": eWalkCode,
1295 "r_count": rCount,
1296 "predicate_test": predicateTest,
1297 "op_class": opClass }, [])
1298 header_output += NeonRegRegRegOpDeclare.subst(iop)
1299 exec_output += NeonEqualRegExecute.subst(iop)
1300 for type in types:
1301 substDict = { "targs" : type,
1302 "class_name" : Name }
1303 exec_output += NeonExecDeclare.subst(substDict)
1304
1305 def threeUnequalRegInst(name, Name, opClass, types, op,
1306 bigSrc1, bigSrc2, bigDest, readDest):
1307 global header_output, exec_output
1308 src1Cnt = src2Cnt = destCnt = 2
1309 src1Prefix = src2Prefix = destPrefix = ''
1310 if bigSrc1:
1311 src1Cnt = 4
1312 src1Prefix = 'Big'
1313 if bigSrc2:
1314 src2Cnt = 4
1315 src2Prefix = 'Big'
1316 if bigDest:
1317 destCnt = 4
1318 destPrefix = 'Big'
1319 eWalkCode = simdEnabledCheckCode + '''
1320 %sRegVect srcReg1;
1321 %sRegVect srcReg2;
1322 %sRegVect destReg;
1323 ''' % (src1Prefix, src2Prefix, destPrefix)
1324 for reg in range(src1Cnt):
1325 eWalkCode += '''
1326 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1327 ''' % { "reg" : reg }
1328 for reg in range(src2Cnt):
1329 eWalkCode += '''
1330 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1331 ''' % { "reg" : reg }
1332 if readDest:
1333 for reg in range(destCnt):
1334 eWalkCode += '''
1335 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1336 ''' % { "reg" : reg }
1337 readDestCode = ''
1338 if readDest:
1339 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1340 eWalkCode += '''
1341 for (unsigned i = 0; i < eCount; i++) {
1342 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1343 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1344 %(destPrefix)sElement destElem;
1345 %(readDest)s
1346 %(op)s
1347 destReg.elements[i] = htog(destElem);
1348 }
1349 ''' % { "op" : op, "readDest" : readDestCode,
1350 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1351 "destPrefix" : destPrefix }
1352 for reg in range(destCnt):
1353 eWalkCode += '''
1354 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1355 ''' % { "reg" : reg }
1356 iop = InstObjParams(name, Name,
1357 "RegRegRegOp",
1358 { "code": eWalkCode,
1359 "r_count": 2,
1360 "predicate_test": predicateTest,
1361 "op_class": opClass }, [])
1362 header_output += NeonRegRegRegOpDeclare.subst(iop)
1363 exec_output += NeonUnequalRegExecute.subst(iop)
1364 for type in types:
1365 substDict = { "targs" : type,
1366 "class_name" : Name }
1367 exec_output += NeonExecDeclare.subst(substDict)
1368
1369 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1370 threeUnequalRegInst(name, Name, opClass, types, op,
1371 True, True, False, readDest)
1372
1373 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1374 threeUnequalRegInst(name, Name, opClass, types, op,
1375 False, False, True, readDest)
1376
1377 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1378 threeUnequalRegInst(name, Name, opClass, types, op,
1379 True, False, True, readDest)
1380
1381 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1382 global header_output, exec_output
1383 eWalkCode = simdEnabledCheckCode + '''
1384 RegVect srcReg1, srcReg2, destReg;
1385 '''
1386 for reg in range(rCount):
1387 eWalkCode += '''
1388 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1389 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1390 ''' % { "reg" : reg }
1391 if readDest:
1392 eWalkCode += '''
1393 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1394 ''' % { "reg" : reg }
1395 readDestCode = ''
1396 if readDest:
1397 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1398 eWalkCode += '''
1399 if (imm < 0 && imm >= eCount) {
1400 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1401 mnemonic);
1402 } else {
1403 for (unsigned i = 0; i < eCount; i++) {
1404 Element srcElem1 = gtoh(srcReg1.elements[i]);
1405 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1406 Element destElem;
1407 %(readDest)s
1408 %(op)s
1409 destReg.elements[i] = htog(destElem);
1410 }
1411 }
1412 ''' % { "op" : op, "readDest" : readDestCode }
1413 for reg in range(rCount):
1414 eWalkCode += '''
1415 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1416 ''' % { "reg" : reg }
1417 iop = InstObjParams(name, Name,
1418 "RegRegRegImmOp",
1419 { "code": eWalkCode,
1420 "r_count": rCount,
1421 "predicate_test": predicateTest,
1422 "op_class": opClass }, [])
1423 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1424 exec_output += NeonEqualRegExecute.subst(iop)
1425 for type in types:
1426 substDict = { "targs" : type,
1427 "class_name" : Name }
1428 exec_output += NeonExecDeclare.subst(substDict)
1429
1430 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1431 global header_output, exec_output
1432 rCount = 2
1433 eWalkCode = simdEnabledCheckCode + '''
1434 RegVect srcReg1, srcReg2;
1435 BigRegVect destReg;
1436 '''
1437 for reg in range(rCount):
1438 eWalkCode += '''
1439 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1440 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1441 ''' % { "reg" : reg }
1442 if readDest:
1443 for reg in range(2 * rCount):
1444 eWalkCode += '''
1445 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1446 ''' % { "reg" : reg }
1447 readDestCode = ''
1448 if readDest:
1449 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1450 eWalkCode += '''
1451 if (imm < 0 && imm >= eCount) {
1452 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1453 mnemonic);
1454 } else {
1455 for (unsigned i = 0; i < eCount; i++) {
1456 Element srcElem1 = gtoh(srcReg1.elements[i]);
1457 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1458 BigElement destElem;
1459 %(readDest)s
1460 %(op)s
1461 destReg.elements[i] = htog(destElem);
1462 }
1463 }
1464 ''' % { "op" : op, "readDest" : readDestCode }
1465 for reg in range(2 * rCount):
1466 eWalkCode += '''
1467 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1468 ''' % { "reg" : reg }
1469 iop = InstObjParams(name, Name,
1470 "RegRegRegImmOp",
1471 { "code": eWalkCode,
1472 "r_count": rCount,
1473 "predicate_test": predicateTest,
1474 "op_class": opClass }, [])
1475 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1476 exec_output += NeonUnequalRegExecute.subst(iop)
1477 for type in types:
1478 substDict = { "targs" : type,
1479 "class_name" : Name }
1480 exec_output += NeonExecDeclare.subst(substDict)
1481
1482 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1483 global header_output, exec_output
1484 eWalkCode = simdEnabledCheckCode + '''
1485 typedef float FloatVect[rCount];
1486 FloatVect srcRegs1, srcRegs2, destRegs;
1487 '''
1488 for reg in range(rCount):
1489 eWalkCode += '''
1490 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1491 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1492 ''' % { "reg" : reg }
1493 if readDest:
1494 eWalkCode += '''
1495 destRegs[%(reg)d] = FpDestP%(reg)d;
1496 ''' % { "reg" : reg }
1497 readDestCode = ''
1498 if readDest:
1499 readDestCode = 'destReg = destRegs[i];'
1500 eWalkCode += '''
1501 if (imm < 0 && imm >= eCount) {
1502 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1503 mnemonic);
1504 } else {
1505 for (unsigned i = 0; i < rCount; i++) {
1506 float srcReg1 = srcRegs1[i];
1507 float srcReg2 = srcRegs2[imm];
1508 float destReg;
1509 %(readDest)s
1510 %(op)s
1511 destRegs[i] = destReg;
1512 }
1513 }
1514 ''' % { "op" : op, "readDest" : readDestCode }
1515 for reg in range(rCount):
1516 eWalkCode += '''
1517 FpDestP%(reg)d = destRegs[%(reg)d];
1518 ''' % { "reg" : reg }
1519 iop = InstObjParams(name, Name,
1520 "FpRegRegRegImmOp",
1521 { "code": eWalkCode,
1522 "r_count": rCount,
1523 "predicate_test": predicateTest,
1524 "op_class": opClass }, [])
1525 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1526 exec_output += NeonEqualRegExecute.subst(iop)
1527 for type in types:
1528 substDict = { "targs" : type,
1529 "class_name" : Name }
1530 exec_output += NeonExecDeclare.subst(substDict)
1531
1532 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1533 readDest=False, toInt=False, fromInt=False):
1534 global header_output, exec_output
1535 eWalkCode = simdEnabledCheckCode + '''
1536 RegVect srcRegs1, destRegs;
1537 '''
1538 for reg in range(rCount):
1539 eWalkCode += '''
1540 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1541 ''' % { "reg" : reg }
1542 if readDest:
1543 eWalkCode += '''
1544 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1545 ''' % { "reg" : reg }
1546 readDestCode = ''
1547 if readDest:
1548 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1549 if toInt:
1550 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1551 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1552 if fromInt:
1553 readOpCode = 'uint32_t srcReg1 = gtoh(srcRegs1.regs[i]);'
1554 declDest = 'Element destElem;'
1555 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1556 if toInt:
1557 declDest = 'uint32_t destReg;'
1558 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1559 eWalkCode += '''
1560 for (unsigned i = 0; i < eCount; i++) {
1561 %(readOp)s
1562 %(declDest)s
1563 %(readDest)s
1564 %(op)s
1565 %(writeDest)s
1566 }
1567 ''' % { "readOp" : readOpCode,
1568 "declDest" : declDest,
1569 "readDest" : readDestCode,
1570 "op" : op,
1571 "writeDest" : writeDestCode }
1572 for reg in range(rCount):
1573 eWalkCode += '''
1574 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1575 ''' % { "reg" : reg }
1576 iop = InstObjParams(name, Name,
1577 "RegRegImmOp",
1578 { "code": eWalkCode,
1579 "r_count": rCount,
1580 "predicate_test": predicateTest,
1581 "op_class": opClass }, [])
1582 header_output += NeonRegRegImmOpDeclare.subst(iop)
1583 exec_output += NeonEqualRegExecute.subst(iop)
1584 for type in types:
1585 substDict = { "targs" : type,
1586 "class_name" : Name }
1587 exec_output += NeonExecDeclare.subst(substDict)
1588
1589 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1590 global header_output, exec_output
1591 eWalkCode = simdEnabledCheckCode + '''
1592 BigRegVect srcReg1;
1593 RegVect destReg;
1594 '''
1595 for reg in range(4):
1596 eWalkCode += '''
1597 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1598 ''' % { "reg" : reg }
1599 if readDest:
1600 for reg in range(2):
1601 eWalkCode += '''
1602 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1603 ''' % { "reg" : reg }
1604 readDestCode = ''
1605 if readDest:
1606 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1607 eWalkCode += '''
1608 for (unsigned i = 0; i < eCount; i++) {
1609 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1610 Element destElem;
1611 %(readDest)s
1612 %(op)s
1613 destReg.elements[i] = htog(destElem);
1614 }
1615 ''' % { "op" : op, "readDest" : readDestCode }
1616 for reg in range(2):
1617 eWalkCode += '''
1618 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1619 ''' % { "reg" : reg }
1620 iop = InstObjParams(name, Name,
1621 "RegRegImmOp",
1622 { "code": eWalkCode,
1623 "r_count": 2,
1624 "predicate_test": predicateTest,
1625 "op_class": opClass }, [])
1626 header_output += NeonRegRegImmOpDeclare.subst(iop)
1627 exec_output += NeonUnequalRegExecute.subst(iop)
1628 for type in types:
1629 substDict = { "targs" : type,
1630 "class_name" : Name }
1631 exec_output += NeonExecDeclare.subst(substDict)
1632
1633 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1634 global header_output, exec_output
1635 eWalkCode = simdEnabledCheckCode + '''
1636 RegVect srcReg1;
1637 BigRegVect destReg;
1638 '''
1639 for reg in range(2):
1640 eWalkCode += '''
1641 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1642 ''' % { "reg" : reg }
1643 if readDest:
1644 for reg in range(4):
1645 eWalkCode += '''
1646 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1647 ''' % { "reg" : reg }
1648 readDestCode = ''
1649 if readDest:
1650 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1651 eWalkCode += '''
1652 for (unsigned i = 0; i < eCount; i++) {
1653 Element srcElem1 = gtoh(srcReg1.elements[i]);
1654 BigElement destElem;
1655 %(readDest)s
1656 %(op)s
1657 destReg.elements[i] = htog(destElem);
1658 }
1659 ''' % { "op" : op, "readDest" : readDestCode }
1660 for reg in range(4):
1661 eWalkCode += '''
1662 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1663 ''' % { "reg" : reg }
1664 iop = InstObjParams(name, Name,
1665 "RegRegImmOp",
1666 { "code": eWalkCode,
1667 "r_count": 2,
1668 "predicate_test": predicateTest,
1669 "op_class": opClass }, [])
1670 header_output += NeonRegRegImmOpDeclare.subst(iop)
1671 exec_output += NeonUnequalRegExecute.subst(iop)
1672 for type in types:
1673 substDict = { "targs" : type,
1674 "class_name" : Name }
1675 exec_output += NeonExecDeclare.subst(substDict)
1676
1677 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1678 global header_output, exec_output
1679 eWalkCode = simdEnabledCheckCode + '''
1680 RegVect srcReg1, destReg;
1681 '''
1682 for reg in range(rCount):
1683 eWalkCode += '''
1684 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1685 ''' % { "reg" : reg }
1686 if readDest:
1687 eWalkCode += '''
1688 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1689 ''' % { "reg" : reg }
1690 readDestCode = ''
1691 if readDest:
1692 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1693 eWalkCode += '''
1694 for (unsigned i = 0; i < eCount; i++) {
1695 unsigned j = i;
1696 Element srcElem1 = gtoh(srcReg1.elements[i]);
1697 Element destElem;
1698 %(readDest)s
1699 %(op)s
1700 destReg.elements[j] = htog(destElem);
1701 }
1702 ''' % { "op" : op, "readDest" : readDestCode }
1703 for reg in range(rCount):
1704 eWalkCode += '''
1705 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1706 ''' % { "reg" : reg }
1707 iop = InstObjParams(name, Name,
1708 "RegRegOp",
1709 { "code": eWalkCode,
1710 "r_count": rCount,
1711 "predicate_test": predicateTest,
1712 "op_class": opClass }, [])
1713 header_output += NeonRegRegOpDeclare.subst(iop)
1714 exec_output += NeonEqualRegExecute.subst(iop)
1715 for type in types:
1716 substDict = { "targs" : type,
1717 "class_name" : Name }
1718 exec_output += NeonExecDeclare.subst(substDict)
1719
1720 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1721 global header_output, exec_output
1722 eWalkCode = simdEnabledCheckCode + '''
1723 RegVect srcReg1, destReg;
1724 '''
1725 for reg in range(rCount):
1726 eWalkCode += '''
1727 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1728 ''' % { "reg" : reg }
1729 if readDest:
1730 eWalkCode += '''
1731 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1732 ''' % { "reg" : reg }
1733 readDestCode = ''
1734 if readDest:
1735 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1736 eWalkCode += '''
1737 for (unsigned i = 0; i < eCount; i++) {
1738 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1739 Element destElem;
1740 %(readDest)s
1741 %(op)s
1742 destReg.elements[i] = htog(destElem);
1743 }
1744 ''' % { "op" : op, "readDest" : readDestCode }
1745 for reg in range(rCount):
1746 eWalkCode += '''
1747 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1748 ''' % { "reg" : reg }
1749 iop = InstObjParams(name, Name,
1750 "RegRegImmOp",
1751 { "code": eWalkCode,
1752 "r_count": rCount,
1753 "predicate_test": predicateTest,
1754 "op_class": opClass }, [])
1755 header_output += NeonRegRegImmOpDeclare.subst(iop)
1756 exec_output += NeonEqualRegExecute.subst(iop)
1757 for type in types:
1758 substDict = { "targs" : type,
1759 "class_name" : Name }
1760 exec_output += NeonExecDeclare.subst(substDict)
1761
1762 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1763 global header_output, exec_output
1764 eWalkCode = simdEnabledCheckCode + '''
1765 RegVect srcReg1, destReg;
1766 '''
1767 for reg in range(rCount):
1768 eWalkCode += '''
1769 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1770 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1771 ''' % { "reg" : reg }
1772 if readDest:
1773 eWalkCode += '''
1774 ''' % { "reg" : reg }
1775 readDestCode = ''
1776 if readDest:
1777 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1778 eWalkCode += op
1779 for reg in range(rCount):
1780 eWalkCode += '''
1781 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1782 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1783 ''' % { "reg" : reg }
1784 iop = InstObjParams(name, Name,
1785 "RegRegOp",
1786 { "code": eWalkCode,
1787 "r_count": rCount,
1788 "predicate_test": predicateTest,
1789 "op_class": opClass }, [])
1790 header_output += NeonRegRegOpDeclare.subst(iop)
1791 exec_output += NeonEqualRegExecute.subst(iop)
1792 for type in types:
1793 substDict = { "targs" : type,
1794 "class_name" : Name }
1795 exec_output += NeonExecDeclare.subst(substDict)
1796
1797 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1798 readDest=False, toInt=False):
1799 global header_output, exec_output
1800 eWalkCode = simdEnabledCheckCode + '''
1801 typedef float FloatVect[rCount];
1802 FloatVect srcRegs1;
1803 '''
1804 if toInt:
1805 eWalkCode += 'RegVect destRegs;\n'
1806 else:
1807 eWalkCode += 'FloatVect destRegs;\n'
1808 for reg in range(rCount):
1809 eWalkCode += '''
1810 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1811 ''' % { "reg" : reg }
1812 if readDest:
1813 if toInt:
1814 eWalkCode += '''
1815 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1816 ''' % { "reg" : reg }
1817 else:
1818 eWalkCode += '''
1819 destRegs[%(reg)d] = FpDestP%(reg)d;
1820 ''' % { "reg" : reg }
1821 readDestCode = ''
1822 if readDest:
1823 readDestCode = 'destReg = destRegs[i];'
1824 destType = 'float'
1825 writeDest = 'destRegs[r] = destReg;'
1826 if toInt:
1827 destType = 'uint32_t'
1828 writeDest = 'destRegs.regs[r] = destReg;'
1829 eWalkCode += '''
1830 for (unsigned r = 0; r < rCount; r++) {
1831 float srcReg1 = srcRegs1[r];
1832 %(destType)s destReg;
1833 %(readDest)s
1834 %(op)s
1835 %(writeDest)s
1836 }
1837 ''' % { "op" : op,
1838 "readDest" : readDestCode,
1839 "destType" : destType,
1840 "writeDest" : writeDest }
1841 for reg in range(rCount):
1842 if toInt:
1843 eWalkCode += '''
1844 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1845 ''' % { "reg" : reg }
1846 else:
1847 eWalkCode += '''
1848 FpDestP%(reg)d = destRegs[%(reg)d];
1849 ''' % { "reg" : reg }
1850 iop = InstObjParams(name, Name,
1851 "FpRegRegOp",
1852 { "code": eWalkCode,
1853 "r_count": rCount,
1854 "predicate_test": predicateTest,
1855 "op_class": opClass }, [])
1856 header_output += NeonRegRegOpDeclare.subst(iop)
1857 exec_output += NeonEqualRegExecute.subst(iop)
1858 for type in types:
1859 substDict = { "targs" : type,
1860 "class_name" : Name }
1861 exec_output += NeonExecDeclare.subst(substDict)
1862
1863 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1864 global header_output, exec_output
1865 eWalkCode = simdEnabledCheckCode + '''
1866 RegVect srcRegs;
1867 BigRegVect destReg;
1868 '''
1869 for reg in range(rCount):
1870 eWalkCode += '''
1871 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1872 ''' % { "reg" : reg }
1873 if readDest:
1874 eWalkCode += '''
1875 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1876 ''' % { "reg" : reg }
1877 readDestCode = ''
1878 if readDest:
1879 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1880 eWalkCode += '''
1881 for (unsigned i = 0; i < eCount / 2; i++) {
1882 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1883 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1884 BigElement destElem;
1885 %(readDest)s
1886 %(op)s
1887 destReg.elements[i] = htog(destElem);
1888 }
1889 ''' % { "op" : op, "readDest" : readDestCode }
1890 for reg in range(rCount):
1891 eWalkCode += '''
1892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1893 ''' % { "reg" : reg }
1894 iop = InstObjParams(name, Name,
1895 "RegRegOp",
1896 { "code": eWalkCode,
1897 "r_count": rCount,
1898 "predicate_test": predicateTest,
1899 "op_class": opClass }, [])
1900 header_output += NeonRegRegOpDeclare.subst(iop)
1901 exec_output += NeonUnequalRegExecute.subst(iop)
1902 for type in types:
1903 substDict = { "targs" : type,
1904 "class_name" : Name }
1905 exec_output += NeonExecDeclare.subst(substDict)
1906
1907 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1908 global header_output, exec_output
1909 eWalkCode = simdEnabledCheckCode + '''
1910 BigRegVect srcReg1;
1911 RegVect destReg;
1912 '''
1913 for reg in range(4):
1914 eWalkCode += '''
1915 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1916 ''' % { "reg" : reg }
1917 if readDest:
1918 for reg in range(2):
1919 eWalkCode += '''
1920 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1921 ''' % { "reg" : reg }
1922 readDestCode = ''
1923 if readDest:
1924 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1925 eWalkCode += '''
1926 for (unsigned i = 0; i < eCount; i++) {
1927 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1928 Element destElem;
1929 %(readDest)s
1930 %(op)s
1931 destReg.elements[i] = htog(destElem);
1932 }
1933 ''' % { "op" : op, "readDest" : readDestCode }
1934 for reg in range(2):
1935 eWalkCode += '''
1936 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1937 ''' % { "reg" : reg }
1938 iop = InstObjParams(name, Name,
1939 "RegRegOp",
1940 { "code": eWalkCode,
1941 "r_count": 2,
1942 "predicate_test": predicateTest,
1943 "op_class": opClass }, [])
1944 header_output += NeonRegRegOpDeclare.subst(iop)
1945 exec_output += NeonUnequalRegExecute.subst(iop)
1946 for type in types:
1947 substDict = { "targs" : type,
1948 "class_name" : Name }
1949 exec_output += NeonExecDeclare.subst(substDict)
1950
1951 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1952 global header_output, exec_output
1953 eWalkCode = simdEnabledCheckCode + '''
1954 RegVect destReg;
1955 '''
1956 if readDest:
1957 for reg in range(rCount):
1958 eWalkCode += '''
1959 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1960 ''' % { "reg" : reg }
1961 readDestCode = ''
1962 if readDest:
1963 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1964 eWalkCode += '''
1965 for (unsigned i = 0; i < eCount; i++) {
1966 Element destElem;
1967 %(readDest)s
1968 %(op)s
1969 destReg.elements[i] = htog(destElem);
1970 }
1971 ''' % { "op" : op, "readDest" : readDestCode }
1972 for reg in range(rCount):
1973 eWalkCode += '''
1974 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1975 ''' % { "reg" : reg }
1976 iop = InstObjParams(name, Name,
1977 "RegImmOp",
1978 { "code": eWalkCode,
1979 "r_count": rCount,
1980 "predicate_test": predicateTest,
1981 "op_class": opClass }, [])
1982 header_output += NeonRegImmOpDeclare.subst(iop)
1983 exec_output += NeonEqualRegExecute.subst(iop)
1984 for type in types:
1985 substDict = { "targs" : type,
1986 "class_name" : Name }
1987 exec_output += NeonExecDeclare.subst(substDict)
1988
1989 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1990 global header_output, exec_output
1991 eWalkCode = simdEnabledCheckCode + '''
1992 RegVect srcReg1;
1993 BigRegVect destReg;
1994 '''
1995 for reg in range(2):
1996 eWalkCode += '''
1997 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1998 ''' % { "reg" : reg }
1999 if readDest:
2000 for reg in range(4):
2001 eWalkCode += '''
2002 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
2003 ''' % { "reg" : reg }
2004 readDestCode = ''
2005 if readDest:
2006 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
2007 eWalkCode += '''
2008 for (unsigned i = 0; i < eCount; i++) {
2009 Element srcElem1 = gtoh(srcReg1.elements[i]);
2010 BigElement destElem;
2011 %(readDest)s
2012 %(op)s
2013 destReg.elements[i] = htog(destElem);
2014 }
2015 ''' % { "op" : op, "readDest" : readDestCode }
2016 for reg in range(4):
2017 eWalkCode += '''
2018 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
2019 ''' % { "reg" : reg }
2020 iop = InstObjParams(name, Name,
2021 "RegRegOp",
2022 { "code": eWalkCode,
2023 "r_count": 2,
2024 "predicate_test": predicateTest,
2025 "op_class": opClass }, [])
2026 header_output += NeonRegRegOpDeclare.subst(iop)
2027 exec_output += NeonUnequalRegExecute.subst(iop)
2028 for type in types:
2029 substDict = { "targs" : type,
2030 "class_name" : Name }
2031 exec_output += NeonExecDeclare.subst(substDict)
2032
2033 vhaddCode = '''
2034 Element carryBit =
2035 (((unsigned)srcElem1 & 0x1) +
2036 ((unsigned)srcElem2 & 0x1)) >> 1;
2037 // Use division instead of a shift to ensure the sign extension works
2038 // right. The compiler will figure out if it can be a shift. Mask the
2039 // inputs so they get truncated correctly.
2040 destElem = (((srcElem1 & ~(Element)1) / 2) +
2041 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2042 '''
2043 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2044 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2045
2046 vrhaddCode = '''
2047 Element carryBit =
2048 (((unsigned)srcElem1 & 0x1) +
2049 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2050 // Use division instead of a shift to ensure the sign extension works
2051 // right. The compiler will figure out if it can be a shift. Mask the
2052 // inputs so they get truncated correctly.
2053 destElem = (((srcElem1 & ~(Element)1) / 2) +
2054 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2055 '''
2056 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2057 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2058
2059 vhsubCode = '''
2060 Element barrowBit =
2061 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2062 // Use division instead of a shift to ensure the sign extension works
2063 // right. The compiler will figure out if it can be a shift. Mask the
2064 // inputs so they get truncated correctly.
2065 destElem = (((srcElem1 & ~(Element)1) / 2) -
2066 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2067 '''
2068 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2069 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2070
2071 vandCode = '''
2072 destElem = srcElem1 & srcElem2;
2073 '''
2074 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2075 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2076
2077 vbicCode = '''
2078 destElem = srcElem1 & ~srcElem2;
2079 '''
2080 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2081 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2082
2083 vorrCode = '''
2084 destElem = srcElem1 | srcElem2;
2085 '''
2086 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2087 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2088
2089 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2090 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2091
2092 vornCode = '''
2093 destElem = srcElem1 | ~srcElem2;
2094 '''
2095 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2096 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2097
2098 veorCode = '''
2099 destElem = srcElem1 ^ srcElem2;
2100 '''
2101 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2102 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2103
2104 vbifCode = '''
2105 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2106 '''
2107 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2108 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2109 vbitCode = '''
2110 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2111 '''
2112 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2113 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2114 vbslCode = '''
2115 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2116 '''
2117 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2118 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2119
2120 vmaxCode = '''
2121 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2122 '''
2123 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2124 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2125
2126 vminCode = '''
2127 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2128 '''
2129 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2130 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2131
2132 vaddCode = '''
2133 destElem = srcElem1 + srcElem2;
2134 '''
2135 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2136 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2137
2138 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2139 2, vaddCode, pairwise=True)
2140 vaddlwCode = '''
2141 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2142 '''
2143 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2144 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2145 vaddhnCode = '''
2146 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2147 (sizeof(Element) * 8);
2148 '''
2149 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2150 vraddhnCode = '''
2151 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2152 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2153 (sizeof(Element) * 8);
2154 '''
2155 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2156
2157 vsubCode = '''
2158 destElem = srcElem1 - srcElem2;
2159 '''
2160 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2161 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2162 vsublwCode = '''
2163 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2164 '''
2165 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2166 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2167
2168 vqaddUCode = '''
2169 destElem = srcElem1 + srcElem2;
2170 FPSCR fpscr = (FPSCR) FpscrQc;
2171 if (destElem < srcElem1 || destElem < srcElem2) {
2172 destElem = (Element)(-1);
2173 fpscr.qc = 1;
2174 }
2175 FpscrQc = fpscr;
2176 '''
2177 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2178 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2179 vsubhnCode = '''
2180 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2181 (sizeof(Element) * 8);
2182 '''
2183 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2184 vrsubhnCode = '''
2185 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2186 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2187 (sizeof(Element) * 8);
2188 '''
2189 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2190
2191 vqaddSCode = '''
2192 destElem = srcElem1 + srcElem2;
2193 FPSCR fpscr = (FPSCR) FpscrQc;
2194 bool negDest = (destElem < 0);
2195 bool negSrc1 = (srcElem1 < 0);
2196 bool negSrc2 = (srcElem2 < 0);
2197 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2198 if (negDest)
2199 /* If (>=0) plus (>=0) yields (<0), saturate to +. */
2200 destElem = std::numeric_limits<Element>::max();
2201 else
2202 /* If (<0) plus (<0) yields (>=0), saturate to -. */
2203 destElem = std::numeric_limits<Element>::min();
2204 fpscr.qc = 1;
2205 }
2206 FpscrQc = fpscr;
2207 '''
2208 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2209 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2210
2211 vqsubUCode = '''
2212 destElem = srcElem1 - srcElem2;
2213 FPSCR fpscr = (FPSCR) FpscrQc;
2214 if (destElem > srcElem1) {
2215 destElem = 0;
2216 fpscr.qc = 1;
2217 }
2218 FpscrQc = fpscr;
2219 '''
2220 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2221 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2222
2223 vqsubSCode = '''
2224 destElem = srcElem1 - srcElem2;
2225 FPSCR fpscr = (FPSCR) FpscrQc;
2226 bool negDest = (destElem < 0);
2227 bool negSrc1 = (srcElem1 < 0);
2228 bool posSrc2 = (srcElem2 >= 0);
2229 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2230 if (negDest)
2231 /* If (>=0) minus (<0) yields (<0), saturate to +. */
2232 destElem = std::numeric_limits<Element>::max();
2233 else
2234 /* If (<0) minus (>=0) yields (>=0), saturate to -. */
2235 destElem = std::numeric_limits<Element>::min();
2236 fpscr.qc = 1;
2237 }
2238 FpscrQc = fpscr;
2239 '''
2240 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2241 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2242
2243 vcgtCode = '''
2244 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2245 '''
2246 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2247 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2248
2249 vcgeCode = '''
2250 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2251 '''
2252 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2253 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2254
2255 vceqCode = '''
2256 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2257 '''
2258 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2259 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2260
2261 vshlCode = '''
2262 int16_t shiftAmt = (int8_t)srcElem2;
2263 if (shiftAmt < 0) {
2264 shiftAmt = -shiftAmt;
2265 if (shiftAmt >= sizeof(Element) * 8) {
2266 shiftAmt = sizeof(Element) * 8 - 1;
2267 destElem = 0;
2268 } else {
2269 destElem = (srcElem1 >> shiftAmt);
2270 }
2271 // Make sure the right shift sign extended when it should.
2272 if (ltz(srcElem1) && !ltz(destElem)) {
2273 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2274 1 - shiftAmt));
2275 }
2276 } else {
2277 if (shiftAmt >= sizeof(Element) * 8) {
2278 destElem = 0;
2279 } else {
2280 destElem = srcElem1 << shiftAmt;
2281 }
2282 }
2283 '''
2284 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2285 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2286
2287 vrshlCode = '''
2288 int16_t shiftAmt = (int8_t)srcElem2;
2289 if (shiftAmt < 0) {
2290 shiftAmt = -shiftAmt;
2291 Element rBit = 0;
2292 if (shiftAmt <= sizeof(Element) * 8)
2293 rBit = bits(srcElem1, shiftAmt - 1);
2294 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2295 rBit = 1;
2296 if (shiftAmt >= sizeof(Element) * 8) {
2297 shiftAmt = sizeof(Element) * 8 - 1;
2298 destElem = 0;
2299 } else {
2300 destElem = (srcElem1 >> shiftAmt);
2301 }
2302 // Make sure the right shift sign extended when it should.
2303 if (ltz(srcElem1) && !ltz(destElem)) {
2304 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2305 1 - shiftAmt));
2306 }
2307 destElem += rBit;
2308 } else if (shiftAmt > 0) {
2309 if (shiftAmt >= sizeof(Element) * 8) {
2310 destElem = 0;
2311 } else {
2312 destElem = srcElem1 << shiftAmt;
2313 }
2314 } else {
2315 destElem = srcElem1;
2316 }
2317 '''
2318 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2319 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2320
2321 vqshlUCode = '''
2322 int16_t shiftAmt = (int8_t)srcElem2;
2323 FPSCR fpscr = (FPSCR) FpscrQc;
2324 if (shiftAmt < 0) {
2325 shiftAmt = -shiftAmt;
2326 if (shiftAmt >= sizeof(Element) * 8) {
2327 shiftAmt = sizeof(Element) * 8 - 1;
2328 destElem = 0;
2329 } else {
2330 destElem = (srcElem1 >> shiftAmt);
2331 }
2332 } else if (shiftAmt > 0) {
2333 if (shiftAmt >= sizeof(Element) * 8) {
2334 if (srcElem1 != 0) {
2335 destElem = mask(sizeof(Element) * 8);
2336 fpscr.qc = 1;
2337 } else {
2338 destElem = 0;
2339 }
2340 } else {
2341 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2342 sizeof(Element) * 8 - shiftAmt)) {
2343 destElem = mask(sizeof(Element) * 8);
2344 fpscr.qc = 1;
2345 } else {
2346 destElem = srcElem1 << shiftAmt;
2347 }
2348 }
2349 } else {
2350 destElem = srcElem1;
2351 }
2352 FpscrQc = fpscr;
2353 '''
2354 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2355 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2356
2357 vqshlSCode = '''
2358 int16_t shiftAmt = (int8_t)srcElem2;
2359 FPSCR fpscr = (FPSCR) FpscrQc;
2360 if (shiftAmt < 0) {
2361 shiftAmt = -shiftAmt;
2362 if (shiftAmt >= sizeof(Element) * 8) {
2363 shiftAmt = sizeof(Element) * 8 - 1;
2364 destElem = 0;
2365 } else {
2366 destElem = (srcElem1 >> shiftAmt);
2367 }
2368 // Make sure the right shift sign extended when it should.
2369 if (srcElem1 < 0 && destElem >= 0) {
2370 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2371 1 - shiftAmt));
2372 }
2373 } else if (shiftAmt > 0) {
2374 bool sat = false;
2375 if (shiftAmt >= sizeof(Element) * 8) {
2376 if (srcElem1 != 0)
2377 sat = true;
2378 else
2379 destElem = 0;
2380 } else {
2381 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2382 sizeof(Element) * 8 - 1 - shiftAmt) !=
2383 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2384 sat = true;
2385 } else {
2386 destElem = srcElem1 << shiftAmt;
2387 }
2388 }
2389 if (sat) {
2390 fpscr.qc = 1;
2391 destElem = mask(sizeof(Element) * 8 - 1);
2392 if (srcElem1 < 0)
2393 destElem = ~destElem;
2394 }
2395 } else {
2396 destElem = srcElem1;
2397 }
2398 FpscrQc = fpscr;
2399 '''
2400 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2401 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2402
2403 vqrshlUCode = '''
2404 int16_t shiftAmt = (int8_t)srcElem2;
2405 FPSCR fpscr = (FPSCR) FpscrQc;
2406 if (shiftAmt < 0) {
2407 shiftAmt = -shiftAmt;
2408 Element rBit = 0;
2409 if (shiftAmt <= sizeof(Element) * 8)
2410 rBit = bits(srcElem1, shiftAmt - 1);
2411 if (shiftAmt >= sizeof(Element) * 8) {
2412 shiftAmt = sizeof(Element) * 8 - 1;
2413 destElem = 0;
2414 } else {
2415 destElem = (srcElem1 >> shiftAmt);
2416 }
2417 destElem += rBit;
2418 } else {
2419 if (shiftAmt >= sizeof(Element) * 8) {
2420 if (srcElem1 != 0) {
2421 destElem = mask(sizeof(Element) * 8);
2422 fpscr.qc = 1;
2423 } else {
2424 destElem = 0;
2425 }
2426 } else {
2427 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2428 sizeof(Element) * 8 - shiftAmt)) {
2429 destElem = mask(sizeof(Element) * 8);
2430 fpscr.qc = 1;
2431 } else {
2432 destElem = srcElem1 << shiftAmt;
2433 }
2434 }
2435 }
2436 FpscrQc = fpscr;
2437 '''
2438 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2439 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2440
2441 vqrshlSCode = '''
2442 int16_t shiftAmt = (int8_t)srcElem2;
2443 FPSCR fpscr = (FPSCR) FpscrQc;
2444 if (shiftAmt < 0) {
2445 shiftAmt = -shiftAmt;
2446 Element rBit = 0;
2447 if (shiftAmt <= sizeof(Element) * 8)
2448 rBit = bits(srcElem1, shiftAmt - 1);
2449 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2450 rBit = 1;
2451 if (shiftAmt >= sizeof(Element) * 8) {
2452 shiftAmt = sizeof(Element) * 8 - 1;
2453 destElem = 0;
2454 } else {
2455 destElem = (srcElem1 >> shiftAmt);
2456 }
2457 // Make sure the right shift sign extended when it should.
2458 if (srcElem1 < 0 && destElem >= 0) {
2459 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2460 1 - shiftAmt));
2461 }
2462 destElem += rBit;
2463 } else if (shiftAmt > 0) {
2464 bool sat = false;
2465 if (shiftAmt >= sizeof(Element) * 8) {
2466 if (srcElem1 != 0)
2467 sat = true;
2468 else
2469 destElem = 0;
2470 } else {
2471 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2472 sizeof(Element) * 8 - 1 - shiftAmt) !=
2473 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2474 sat = true;
2475 } else {
2476 destElem = srcElem1 << shiftAmt;
2477 }
2478 }
2479 if (sat) {
2480 fpscr.qc = 1;
2481 destElem = mask(sizeof(Element) * 8 - 1);
2482 if (srcElem1 < 0)
2483 destElem = ~destElem;
2484 }
2485 } else {
2486 destElem = srcElem1;
2487 }
2488 FpscrQc = fpscr;
2489 '''
2490 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2491 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2492
2493 vabaCode = '''
2494 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2495 (srcElem2 - srcElem1);
2496 '''
2497 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2498 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2499 vabalCode = '''
2500 destElem += (srcElem1 > srcElem2) ?
2501 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2502 ((BigElement)srcElem2 - (BigElement)srcElem1);
2503 '''
2504 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2505
2506 vabdCode = '''
2507 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2508 (srcElem2 - srcElem1);
2509 '''
2510 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2511 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2512 vabdlCode = '''
2513 destElem = (srcElem1 > srcElem2) ?
2514 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2515 ((BigElement)srcElem2 - (BigElement)srcElem1);
2516 '''
2517 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2518
2519 vtstCode = '''
2520 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2521 '''
2522 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2523 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2524
2525 vmulCode = '''
2526 destElem = srcElem1 * srcElem2;
2527 '''
2528 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2529 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2530 vmullCode = '''
2531 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2532 '''
2533 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2534
2535 vmlaCode = '''
2536 destElem = destElem + srcElem1 * srcElem2;
2537 '''
2538 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2539 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2540 vmlalCode = '''
2541 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2542 '''
2543 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2544
2545 vqdmlalCode = '''
2546 FPSCR fpscr = (FPSCR) FpscrQc;
2547 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2548 Element maxNeg = std::numeric_limits<Element>::min();
2549 Element halfNeg = maxNeg / 2;
2550 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2551 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2552 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2553 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2554 fpscr.qc = 1;
2555 }
2556 bool negPreDest = ltz(destElem);
2557 destElem += midElem;
2558 bool negDest = ltz(destElem);
2559 bool negMid = ltz(midElem);
2560 if (negPreDest == negMid && negMid != negDest) {
2561 destElem = mask(sizeof(BigElement) * 8 - 1);
2562 if (negPreDest)
2563 destElem = ~destElem;
2564 fpscr.qc = 1;
2565 }
2566 FpscrQc = fpscr;
2567 '''
2568 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2569
2570 vqdmlslCode = '''
2571 FPSCR fpscr = (FPSCR) FpscrQc;
2572 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2573 Element maxNeg = std::numeric_limits<Element>::min();
2574 Element halfNeg = maxNeg / 2;
2575 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2576 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2577 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2578 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2579 fpscr.qc = 1;
2580 }
2581 bool negPreDest = ltz(destElem);
2582 destElem -= midElem;
2583 bool negDest = ltz(destElem);
2584 bool posMid = ltz((BigElement)-midElem);
2585 if (negPreDest == posMid && posMid != negDest) {
2586 destElem = mask(sizeof(BigElement) * 8 - 1);
2587 if (negPreDest)
2588 destElem = ~destElem;
2589 fpscr.qc = 1;
2590 }
2591 FpscrQc = fpscr;
2592 '''
2593 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2594
2595 vqdmullCode = '''
2596 FPSCR fpscr = (FPSCR) FpscrQc;
2597 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2598 if (srcElem1 == srcElem2 &&
2599 srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2600 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2601 fpscr.qc = 1;
2602 }
2603 FpscrQc = fpscr;
2604 '''
2605 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2606
2607 vmlsCode = '''
2608 destElem = destElem - srcElem1 * srcElem2;
2609 '''
2610 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2611 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2612 vmlslCode = '''
2613 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2614 '''
2615 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2616
2617 vmulpCode = '''
2618 destElem = 0;
2619 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2620 if (bits(srcElem2, j))
2621 destElem ^= srcElem1 << j;
2622 }
2623 '''
2624 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2625 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2626 vmullpCode = '''
2627 destElem = 0;
2628 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2629 if (bits(srcElem2, j))
2630 destElem ^= (BigElement)srcElem1 << j;
2631 }
2632 '''
2633 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2634
2635 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2636
2637 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2638
2639 vqdmulhCode = '''
2640 FPSCR fpscr = (FPSCR) FpscrQc;
2641 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2642 (sizeof(Element) * 8);
2643 if (srcElem1 == srcElem2 &&
2644 srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2645 destElem = ~srcElem1;
2646 fpscr.qc = 1;
2647 }
2648 FpscrQc = fpscr;
2649 '''
2650 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2651 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2652
2653 vqrdmulhCode = '''
2654 FPSCR fpscr = (FPSCR) FpscrQc;
2655 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2656 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2657 (sizeof(Element) * 8);
2658 Element maxNeg = std::numeric_limits<Element>::min();
2659 Element halfNeg = maxNeg / 2;
2660 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2661 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2662 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2663 if (destElem < 0) {
2664 destElem = mask(sizeof(Element) * 8 - 1);
2665 } else {
2666 destElem = std::numeric_limits<Element>::min();
2667 }
2668 fpscr.qc = 1;
2669 }
2670 FpscrQc = fpscr;
2671 '''
2672 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2673 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2674 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2675 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2676
2677 vMinMaxFpCode = '''
2678 destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
2679 '''
2680 vMinMaxInsts = [
2681 ("vmax", "VmaxDFp", 2, "Max", False, ),
2682 ("vmax", "VmaxQFp", 4, "Max", False, ),
2683 ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ),
2684 ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ),
2685 ("vpmax", "VpmaxDFp", 2, "Max", True, ),
2686 ("vpmax", "VpmaxQFp", 4, "Max", True, ),
2687 ("vmin", "VminDFp", 2, "Min", False, ),
2688 ("vmin", "VminQFp", 4, "Min", False, ),
2689 ("vminnm", "VminnmDFp", 2, "MinNum", False, ),
2690 ("vminnm", "VminnmQFp", 4, "MinNum", False, ),
2691 ("vpmin", "VpminDFp", 2, "Min", True, ),
2692 ("vpmin", "VpminQFp", 4, "Min", True, ),
2693 ]
2694 for name, Name, rCount, op, pairwise in vMinMaxInsts:
2695 threeEqualRegInst(
2696 name,
2697 Name,
2698 "SimdFloatCmpOp",
2699 ("uint32_t",),
2700 rCount,
2701 vMinMaxFpCode % op,
2702 pairwise=pairwise,
2703 standardFpcsr=True,
2704 )
2705
2706 vaddfpCode = '''
2707 FPSCR fpscr = (FPSCR) FpscrExc;
2708 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2709 true, true, VfpRoundNearest);
2710 FpscrExc = fpscr;
2711 '''
2712 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2713 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2714
2715 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2716 2, vaddfpCode, pairwise=True)
2717 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2718 4, vaddfpCode, pairwise=True)
2719
2720 vsubfpCode = '''
2721 FPSCR fpscr = (FPSCR) FpscrExc;
2722 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2723 true, true, VfpRoundNearest);
2724 FpscrExc = fpscr;
2725 '''
2726 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2727 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2728
2729 vmulfpCode = '''
2730 FPSCR fpscr = (FPSCR) FpscrExc;
2731 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2732 true, true, VfpRoundNearest);
2733 FpscrExc = fpscr;
2734 '''
2735 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2736 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2737
2738 vmlafpCode = '''
2739 FPSCR fpscr = (FPSCR) FpscrExc;
2740 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2741 true, true, VfpRoundNearest);
2742 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2743 true, true, VfpRoundNearest);
2744 FpscrExc = fpscr;
2745 '''
2746 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2747 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2748
2749 vfmafpCode = '''
2750 FPSCR fpscr = (FPSCR) FpscrExc;
2751 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2752 true, true, VfpRoundNearest);
2753 FpscrExc = fpscr;
2754 '''
2755 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2756 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2757
2758 vfmsfpCode = '''
2759 FPSCR fpscr = (FPSCR) FpscrExc;
2760 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2761 true, true, VfpRoundNearest);
2762 FpscrExc = fpscr;
2763 '''
2764 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2765 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2766
2767 vmlsfpCode = '''
2768 FPSCR fpscr = (FPSCR) FpscrExc;
2769 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2770 true, true, VfpRoundNearest);
2771 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2772 true, true, VfpRoundNearest);
2773 FpscrExc = fpscr;
2774 '''
2775 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2776 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2777
2778 vcgtfpCode = '''
2779 FPSCR fpscr = (FPSCR) FpscrExc;
2780 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2781 true, true, VfpRoundNearest);
2782 destReg = (res == 0) ? -1 : 0;
2783 if (res == 2.0)
2784 fpscr.ioc = 1;
2785 FpscrExc = fpscr;
2786 '''
2787 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2788 2, vcgtfpCode, toInt = True)
2789 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2790 4, vcgtfpCode, toInt = True)
2791
2792 vcgefpCode = '''
2793 FPSCR fpscr = (FPSCR) FpscrExc;
2794 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2795 true, true, VfpRoundNearest);
2796 destReg = (res == 0) ? -1 : 0;
2797 if (res == 2.0)
2798 fpscr.ioc = 1;
2799 FpscrExc = fpscr;
2800 '''
2801 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2802 2, vcgefpCode, toInt = True)
2803 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2804 4, vcgefpCode, toInt = True)
2805
2806 vacgtfpCode = '''
2807 FPSCR fpscr = (FPSCR) FpscrExc;
2808 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2809 true, true, VfpRoundNearest);
2810 destReg = (res == 0) ? -1 : 0;
2811 if (res == 2.0)
2812 fpscr.ioc = 1;
2813 FpscrExc = fpscr;
2814 '''
2815 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2816 2, vacgtfpCode, toInt = True)
2817 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2818 4, vacgtfpCode, toInt = True)
2819
2820 vacgefpCode = '''
2821 FPSCR fpscr = (FPSCR) FpscrExc;
2822 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2823 true, true, VfpRoundNearest);
2824 destReg = (res == 0) ? -1 : 0;
2825 if (res == 2.0)
2826 fpscr.ioc = 1;
2827 FpscrExc = fpscr;
2828 '''
2829 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2830 2, vacgefpCode, toInt = True)
2831 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2832 4, vacgefpCode, toInt = True)
2833
2834 vceqfpCode = '''
2835 FPSCR fpscr = (FPSCR) FpscrExc;
2836 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2837 true, true, VfpRoundNearest);
2838 destReg = (res == 0) ? -1 : 0;
2839 if (res == 2.0)
2840 fpscr.ioc = 1;
2841 FpscrExc = fpscr;
2842 '''
2843 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2844 2, vceqfpCode, toInt = True)
2845 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2846 4, vceqfpCode, toInt = True)
2847
2848 vrecpsCode = '''
2849 FPSCR fpscr = (FPSCR) FpscrExc;
2850 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2851 true, true, VfpRoundNearest);
2852 FpscrExc = fpscr;
2853 '''
2854 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2855 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2856
2857 vrsqrtsCode = '''
2858 FPSCR fpscr = (FPSCR) FpscrExc;
2859 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2860 true, true, VfpRoundNearest);
2861 FpscrExc = fpscr;
2862 '''
2863 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2864 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2865
2866 vabdfpCode = '''
2867 FPSCR fpscr = (FPSCR) FpscrExc;
2868 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2869 true, true, VfpRoundNearest);
2870 destReg = fabs(mid);
2871 FpscrExc = fpscr;
2872 '''
2873 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2874 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2875
2876 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2877 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2878 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2879 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2880 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2881
2882 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2883 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2884 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2885 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2886 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2887
2888 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2889 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2890 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2891 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2892 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2893
2894 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2895 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2896 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2897 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2898 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2899 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2900 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2901 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2902 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2903
2904 vshrCode = '''
2905 if (imm >= sizeof(srcElem1) * 8) {
2906 if (ltz(srcElem1))
2907 destElem = -1;
2908 else
2909 destElem = 0;
2910 } else {
2911 destElem = srcElem1 >> imm;
2912 }
2913 '''
2914 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2915 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2916
2917 vsraCode = '''
2918 Element mid;;
2919 if (imm >= sizeof(srcElem1) * 8) {
2920 mid = ltz(srcElem1) ? -1 : 0;
2921 } else {
2922 mid = srcElem1 >> imm;
2923 if (ltz(srcElem1) && !ltz(mid)) {
2924 mid |= -(mid & ((Element)1 <<
2925 (sizeof(Element) * 8 - 1 - imm)));
2926 }
2927 }
2928 destElem += mid;
2929 '''
2930 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2931 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2932
2933 vrshrCode = '''
2934 if (imm > sizeof(srcElem1) * 8) {
2935 destElem = 0;
2936 } else if (imm) {
2937 Element rBit = bits(srcElem1, imm - 1);
2938 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2939 } else {
2940 destElem = srcElem1;
2941 }
2942 '''
2943 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2944 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2945
2946 vrsraCode = '''
2947 if (imm > sizeof(srcElem1) * 8) {
2948 destElem += 0;
2949 } else if (imm) {
2950 Element rBit = bits(srcElem1, imm - 1);
2951 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2952 } else {
2953 destElem += srcElem1;
2954 }
2955 '''
2956 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2957 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2958
2959 vsriCode = '''
2960 if (imm >= sizeof(Element) * 8) {
2961 destElem = destElem;
2962 } else {
2963 destElem = (srcElem1 >> imm) |
2964 (destElem & ~mask(sizeof(Element) * 8 - imm));
2965 }
2966 '''
2967 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2968 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2969
2970 vshlCode = '''
2971 if (imm >= sizeof(Element) * 8) {
2972 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2973 } else {
2974 destElem = srcElem1 << imm;
2975 }
2976 '''
2977 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2978 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2979
2980 vsliCode = '''
2981 if (imm >= sizeof(Element) * 8) {
2982 destElem = destElem;
2983 } else {
2984 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2985 }
2986 '''
2987 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2988 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2989
2990 vqshlCode = '''
2991 FPSCR fpscr = (FPSCR) FpscrQc;
2992 if (imm >= sizeof(Element) * 8) {
2993 if (srcElem1 != 0) {
2994 destElem = std::numeric_limits<Element>::min();
2995 if (srcElem1 > 0)
2996 destElem = ~destElem;
2997 fpscr.qc = 1;
2998 } else {
2999 destElem = 0;
3000 }
3001 } else if (imm) {
3002 destElem = (srcElem1 << imm);
3003 uint64_t topBits = bits((uint64_t)srcElem1,
3004 sizeof(Element) * 8 - 1,
3005 sizeof(Element) * 8 - 1 - imm);
3006 if (topBits != 0 && topBits != mask(imm + 1)) {
3007 destElem = std::numeric_limits<Element>::min();
3008 if (srcElem1 > 0)
3009 destElem = ~destElem;
3010 fpscr.qc = 1;
3011 }
3012 } else {
3013 destElem = srcElem1;
3014 }
3015 FpscrQc = fpscr;
3016 '''
3017 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3018 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3019
3020 vqshluCode = '''
3021 FPSCR fpscr = (FPSCR) FpscrQc;
3022 if (imm >= sizeof(Element) * 8) {
3023 if (srcElem1 != 0) {
3024 destElem = mask(sizeof(Element) * 8);
3025 fpscr.qc = 1;
3026 } else {
3027 destElem = 0;
3028 }
3029 } else if (imm) {
3030 destElem = (srcElem1 << imm);
3031 uint64_t topBits = bits((uint64_t)srcElem1,
3032 sizeof(Element) * 8 - 1,
3033 sizeof(Element) * 8 - imm);
3034 if (topBits != 0) {
3035 destElem = mask(sizeof(Element) * 8);
3036 fpscr.qc = 1;
3037 }
3038 } else {
3039 destElem = srcElem1;
3040 }
3041 FpscrQc = fpscr;
3042 '''
3043 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3044 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3045
3046 vqshlusCode = '''
3047 FPSCR fpscr = (FPSCR) FpscrQc;
3048 if (imm >= sizeof(Element) * 8) {
3049 if (srcElem1 < 0) {
3050 destElem = 0;
3051 fpscr.qc = 1;
3052 } else if (srcElem1 > 0) {
3053 destElem = mask(sizeof(Element) * 8);
3054 fpscr.qc = 1;
3055 } else {
3056 destElem = 0;
3057 }
3058 } else if (imm) {
3059 destElem = (srcElem1 << imm);
3060 uint64_t topBits = bits((uint64_t)srcElem1,
3061 sizeof(Element) * 8 - 1,
3062 sizeof(Element) * 8 - imm);
3063 if (srcElem1 < 0) {
3064 destElem = 0;
3065 fpscr.qc = 1;
3066 } else if (topBits != 0) {
3067 destElem = mask(sizeof(Element) * 8);
3068 fpscr.qc = 1;
3069 }
3070 } else {
3071 if (srcElem1 < 0) {
3072 fpscr.qc = 1;
3073 destElem = 0;
3074 } else {
3075 destElem = srcElem1;
3076 }
3077 }
3078 FpscrQc = fpscr;
3079 '''
3080 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3081 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3082
3083 vshrnCode = '''
3084 if (imm >= sizeof(srcElem1) * 8) {
3085 destElem = 0;
3086 } else {
3087 destElem = srcElem1 >> imm;
3088 }
3089 '''
3090 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3091
3092 vrshrnCode = '''
3093 if (imm > sizeof(srcElem1) * 8) {
3094 destElem = 0;
3095 } else if (imm) {
3096 Element rBit = bits(srcElem1, imm - 1);
3097 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3098 } else {
3099 destElem = srcElem1;
3100 }
3101 '''
3102 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3103
3104 vqshrnCode = '''
3105 FPSCR fpscr = (FPSCR) FpscrQc;
3106 if (imm > sizeof(srcElem1) * 8) {
3107 if (srcElem1 != 0 && srcElem1 != -1)
3108 fpscr.qc = 1;
3109 destElem = 0;
3110 } else if (imm) {
3111 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3112 mid |= -(mid & ((BigElement)1 <<
3113 (sizeof(BigElement) * 8 - 1 - imm)));
3114 if (mid != (Element)mid) {
3115 destElem = mask(sizeof(Element) * 8 - 1);
3116 if (srcElem1 < 0)
3117 destElem = ~destElem;
3118 fpscr.qc = 1;
3119 } else {
3120 destElem = mid;
3121 }
3122 } else {
3123 destElem = srcElem1;
3124 }
3125 FpscrQc = fpscr;
3126 '''
3127 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3128
3129 vqshrunCode = '''
3130 FPSCR fpscr = (FPSCR) FpscrQc;
3131 if (imm > sizeof(srcElem1) * 8) {
3132 if (srcElem1 != 0)
3133 fpscr.qc = 1;
3134 destElem = 0;
3135 } else if (imm) {
3136 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3137 if (mid != (Element)mid) {
3138 destElem = mask(sizeof(Element) * 8);
3139 fpscr.qc = 1;
3140 } else {
3141 destElem = mid;
3142 }
3143 } else {
3144 destElem = srcElem1;
3145 }
3146 FpscrQc = fpscr;
3147 '''
3148 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3149 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3150
3151 vqshrunsCode = '''
3152 FPSCR fpscr = (FPSCR) FpscrQc;
3153 if (imm > sizeof(srcElem1) * 8) {
3154 if (srcElem1 != 0)
3155 fpscr.qc = 1;
3156 destElem = 0;
3157 } else if (imm) {
3158 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3159 if (bits(mid, sizeof(BigElement) * 8 - 1,
3160 sizeof(Element) * 8) != 0) {
3161 if (srcElem1 < 0) {
3162 destElem = 0;
3163 } else {
3164 destElem = mask(sizeof(Element) * 8);
3165 }
3166 fpscr.qc = 1;
3167 } else {
3168 destElem = mid;
3169 }
3170 } else {
3171 destElem = srcElem1;
3172 }
3173 FpscrQc = fpscr;
3174 '''
3175 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3176 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3177
3178 vqrshrnCode = '''
3179 FPSCR fpscr = (FPSCR) FpscrQc;
3180 if (imm > sizeof(srcElem1) * 8) {
3181 if (srcElem1 != 0 && srcElem1 != -1)
3182 fpscr.qc = 1;
3183 destElem = 0;
3184 } else if (imm) {
3185 BigElement mid = (srcElem1 >> (imm - 1));
3186 uint64_t rBit = mid & 0x1;
3187 mid >>= 1;
3188 mid |= -(mid & ((BigElement)1 <<
3189 (sizeof(BigElement) * 8 - 1 - imm)));
3190 mid += rBit;
3191 if (mid != (Element)mid) {
3192 destElem = mask(sizeof(Element) * 8 - 1);
3193 if (srcElem1 < 0)
3194 destElem = ~destElem;
3195 fpscr.qc = 1;
3196 } else {
3197 destElem = mid;
3198 }
3199 } else {
3200 if (srcElem1 != (Element)srcElem1) {
3201 destElem = mask(sizeof(Element) * 8 - 1);
3202 if (srcElem1 < 0)
3203 destElem = ~destElem;
3204 fpscr.qc = 1;
3205 } else {
3206 destElem = srcElem1;
3207 }
3208 }
3209 FpscrQc = fpscr;
3210 '''
3211 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3212 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3213
3214 vqrshrunCode = '''
3215 FPSCR fpscr = (FPSCR) FpscrQc;
3216 if (imm > sizeof(srcElem1) * 8) {
3217 if (srcElem1 != 0)
3218 fpscr.qc = 1;
3219 destElem = 0;
3220 } else if (imm) {
3221 BigElement mid = (srcElem1 >> (imm - 1));
3222 uint64_t rBit = mid & 0x1;
3223 mid >>= 1;
3224 mid += rBit;
3225 if (mid != (Element)mid) {
3226 destElem = mask(sizeof(Element) * 8);
3227 fpscr.qc = 1;
3228 } else {
3229 destElem = mid;
3230 }
3231 } else {
3232 if (srcElem1 != (Element)srcElem1) {
3233 destElem = mask(sizeof(Element) * 8 - 1);
3234 fpscr.qc = 1;
3235 } else {
3236 destElem = srcElem1;
3237 }
3238 }
3239 FpscrQc = fpscr;
3240 '''
3241 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3242 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3243
3244 vqrshrunsCode = '''
3245 FPSCR fpscr = (FPSCR) FpscrQc;
3246 if (imm > sizeof(srcElem1) * 8) {
3247 if (srcElem1 != 0)
3248 fpscr.qc = 1;
3249 destElem = 0;
3250 } else if (imm) {
3251 BigElement mid = (srcElem1 >> (imm - 1));
3252 uint64_t rBit = mid & 0x1;
3253 mid >>= 1;
3254 mid |= -(mid & ((BigElement)1 <<
3255 (sizeof(BigElement) * 8 - 1 - imm)));
3256 mid += rBit;
3257 if (bits(mid, sizeof(BigElement) * 8 - 1,
3258 sizeof(Element) * 8) != 0) {
3259 if (srcElem1 < 0) {
3260 destElem = 0;
3261 } else {
3262 destElem = mask(sizeof(Element) * 8);
3263 }
3264 fpscr.qc = 1;
3265 } else {
3266 destElem = mid;
3267 }
3268 } else {
3269 if (srcElem1 < 0) {
3270 fpscr.qc = 1;
3271 destElem = 0;
3272 } else {
3273 destElem = srcElem1;
3274 }
3275 }
3276 FpscrQc = fpscr;
3277 '''
3278 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3279 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3280
3281 vshllCode = '''
3282 if (imm >= sizeof(destElem) * 8) {
3283 destElem = 0;
3284 } else {
3285 destElem = (BigElement)srcElem1 << imm;
3286 }
3287 '''
3288 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3289
3290 vmovlCode = '''
3291 destElem = srcElem1;
3292 '''
3293 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3294
3295 vcvt2ufxCode = '''
3296 FPSCR fpscr = (FPSCR) FpscrExc;
3297 if (flushToZero(srcElem1))
3298 fpscr.idc = 1;
3299 VfpSavedState state = prepFpState(VfpRoundNearest);
3300 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3301 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3302 __asm__ __volatile__("" :: "m" (destReg));
3303 finishVfp(fpscr, state, true);
3304 FpscrExc = fpscr;
3305 '''
3306 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3307 2, vcvt2ufxCode, toInt = True)
3308 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3309 4, vcvt2ufxCode, toInt = True)
3310
3311 vcvt2sfxCode = '''
3312 FPSCR fpscr = (FPSCR) FpscrExc;
3313 if (flushToZero(srcElem1))
3314 fpscr.idc = 1;
3315 VfpSavedState state = prepFpState(VfpRoundNearest);
3316 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3317 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3318 __asm__ __volatile__("" :: "m" (destReg));
3319 finishVfp(fpscr, state, true);
3320 FpscrExc = fpscr;
3321 '''
3322 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3323 2, vcvt2sfxCode, toInt = True)
3324 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3325 4, vcvt2sfxCode, toInt = True)
3326
3327 vcvtu2fpCode = '''
3328 FPSCR fpscr = (FPSCR) FpscrExc;
3329 VfpSavedState state = prepFpState(VfpRoundNearest);
3330 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3331 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3332 __asm__ __volatile__("" :: "m" (destElem));
3333 finishVfp(fpscr, state, true);
3334 FpscrExc = fpscr;
3335 '''
3336 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3337 2, vcvtu2fpCode, fromInt = True)
3338 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3339 4, vcvtu2fpCode, fromInt = True)
3340
3341 vcvts2fpCode = '''
3342 FPSCR fpscr = (FPSCR) FpscrExc;
3343 VfpSavedState state = prepFpState(VfpRoundNearest);
3344 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3345 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3346 __asm__ __volatile__("" :: "m" (destElem));
3347 finishVfp(fpscr, state, true);
3348 FpscrExc = fpscr;
3349 '''
3350 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3351 2, vcvts2fpCode, fromInt = True)
3352 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3353 4, vcvts2fpCode, fromInt = True)
3354
3355 vcvts2hCode = '''
3356 destElem = 0;
3357 FPSCR fpscr = (FPSCR) FpscrExc;
3358 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3359 if (flushToZero(srcFp1))
3360 fpscr.idc = 1;
3361 VfpSavedState state = prepFpState(VfpRoundNearest);
3362 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3363 : "m" (srcFp1), "m" (destElem));
3364 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3365 fpscr.ahp, srcFp1);
3366 __asm__ __volatile__("" :: "m" (destElem));
3367 finishVfp(fpscr, state, true);
3368 FpscrExc = fpscr;
3369 '''
3370 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3371
3372 vcvth2sCode = '''
3373 destElem = 0;
3374 FPSCR fpscr = (FPSCR) FpscrExc;
3375 VfpSavedState state = prepFpState(VfpRoundNearest);
3376 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3377 : "m" (srcElem1), "m" (destElem));
3378 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3379 __asm__ __volatile__("" :: "m" (destElem));
3380 finishVfp(fpscr, state, true);
3381 FpscrExc = fpscr;
3382 '''
3383 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3384
3385 vrsqrteCode = '''
3386 destElem = unsignedRSqrtEstimate(srcElem1);
3387 '''
3388 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3389 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3390
3391 vrsqrtefpCode = '''
3392 FPSCR fpscr = (FPSCR) FpscrExc;
3393 if (flushToZero(srcReg1))
3394 fpscr.idc = 1;
3395 destReg = fprSqrtEstimate(fpscr, srcReg1);
3396 FpscrExc = fpscr;
3397 '''
3398 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3399 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3400
3401 vrecpeCode = '''
3402 destElem = unsignedRecipEstimate(srcElem1);
3403 '''
3404 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3405 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3406
3407 vrecpefpCode = '''
3408 FPSCR fpscr = (FPSCR) FpscrExc;
3409 if (flushToZero(srcReg1))
3410 fpscr.idc = 1;
3411 destReg = fpRecipEstimate(fpscr, srcReg1);
3412 FpscrExc = fpscr;
3413 '''
3414 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3415 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3416
3417 vrev16Code = '''
3418 destElem = srcElem1;
3419 unsigned groupSize = ((1 << 1) / sizeof(Element));
3420 unsigned reverseMask = (groupSize - 1);
3421 j = i ^ reverseMask;
3422 '''
3423 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3424 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3425 vrev32Code = '''
3426 destElem = srcElem1;
3427 unsigned groupSize = ((1 << 2) / sizeof(Element));
3428 unsigned reverseMask = (groupSize - 1);
3429 j = i ^ reverseMask;
3430 '''
3431 twoRegMiscInst("vrev32", "NVrev32D",
3432 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3433 twoRegMiscInst("vrev32", "NVrev32Q",
3434 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3435 vrev64Code = '''
3436 destElem = srcElem1;
3437 unsigned groupSize = ((1 << 3) / sizeof(Element));
3438 unsigned reverseMask = (groupSize - 1);
3439 j = i ^ reverseMask;
3440 '''
3441 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3442 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3443
3444 split('exec')
3445 exec_output += vcompares + vcomparesL
3446
3447 vpaddlCode = '''
3448 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3449 '''
3450 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3451 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3452
3453 vpadalCode = '''
3454 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3455 '''
3456 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3457 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3458
3459 vclsCode = '''
3460 unsigned count = 0;
3461 if (srcElem1 < 0) {
3462 srcElem1 <<= 1;
3463 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3464 count++;
3465 srcElem1 <<= 1;
3466 }
3467 } else {
3468 srcElem1 <<= 1;
3469 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3470 count++;
3471 srcElem1 <<= 1;
3472 }
3473 }
3474 destElem = count;
3475 '''
3476 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3477 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3478
3479 vclzCode = '''
3480 unsigned count = 0;
3481 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3482 count++;
3483 srcElem1 <<= 1;
3484 }
3485 destElem = count;
3486 '''
3487 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3488 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3489
3490 vcntCode = '''
3491 unsigned count = 0;
3492 while (srcElem1 && count < sizeof(Element) * 8) {
3493 count += srcElem1 & 0x1;
3494 srcElem1 >>= 1;
3495 }
3496 destElem = count;
3497 '''
3498
3499 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3500 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3501
3502 vmvnCode = '''
3503 destElem = ~srcElem1;
3504 '''
3505 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3506 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3507
3508 vqabsCode = '''
3509 FPSCR fpscr = (FPSCR) FpscrQc;
3510 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3511 fpscr.qc = 1;
3512 destElem = ~srcElem1;
3513 } else if (srcElem1 < 0) {
3514 destElem = -srcElem1;
3515 } else {
3516 destElem = srcElem1;
3517 }
3518 FpscrQc = fpscr;
3519 '''
3520 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3521 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3522
3523 vqnegCode = '''
3524 FPSCR fpscr = (FPSCR) FpscrQc;
3525 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3526 fpscr.qc = 1;
3527 destElem = ~srcElem1;
3528 } else {
3529 destElem = -srcElem1;
3530 }
3531 FpscrQc = fpscr;
3532 '''
3533 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3534 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3535
3536 vabsCode = '''
3537 if (srcElem1 < 0) {
3538 destElem = -srcElem1;
3539 } else {
3540 destElem = srcElem1;
3541 }
3542 '''
3543
3544 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3545 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3546 vabsfpCode = '''
3547 union
3548 {
3549 uint32_t i;
3550 float f;
3551 } cStruct;
3552 cStruct.f = srcReg1;
3553 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3554 destReg = cStruct.f;
3555 '''
3556 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3557 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3558
3559 vnegCode = '''
3560 destElem = -srcElem1;
3561 '''
3562 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3563 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3564 vnegfpCode = '''
3565 destReg = -srcReg1;
3566 '''
3567 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3568 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3569
3570 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3571 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3572 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3573 vcgtfpCode = '''
3574 FPSCR fpscr = (FPSCR) FpscrExc;
3575 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc,
3576 true, true, VfpRoundNearest);
3577 destReg = (res == 0) ? -1 : 0;
3578 if (res == 2.0)
3579 fpscr.ioc = 1;
3580 FpscrExc = fpscr;
3581 '''
3582 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3583 2, vcgtfpCode, toInt = True)
3584 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3585 4, vcgtfpCode, toInt = True)
3586
3587 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3588 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3589 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3590 vcgefpCode = '''
3591 FPSCR fpscr = (FPSCR) FpscrExc;
3592 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc,
3593 true, true, VfpRoundNearest);
3594 destReg = (res == 0) ? -1 : 0;
3595 if (res == 2.0)
3596 fpscr.ioc = 1;
3597 FpscrExc = fpscr;
3598 '''
3599 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3600 2, vcgefpCode, toInt = True)
3601 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3602 4, vcgefpCode, toInt = True)
3603
3604 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3605 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3606 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3607 vceqfpCode = '''
3608 FPSCR fpscr = (FPSCR) FpscrExc;
3609 float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc,
3610 true, true, VfpRoundNearest);
3611 destReg = (res == 0) ? -1 : 0;
3612 if (res == 2.0)
3613 fpscr.ioc = 1;
3614 FpscrExc = fpscr;
3615 '''
3616 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3617 2, vceqfpCode, toInt = True)
3618 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3619 4, vceqfpCode, toInt = True)
3620
3621 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3622 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3623 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3624 vclefpCode = '''
3625 FPSCR fpscr = (FPSCR) FpscrExc;
3626 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc,
3627 true, true, VfpRoundNearest);
3628 destReg = (res == 0) ? -1 : 0;
3629 if (res == 2.0)
3630 fpscr.ioc = 1;
3631 FpscrExc = fpscr;
3632 '''
3633 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3634 2, vclefpCode, toInt = True)
3635 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3636 4, vclefpCode, toInt = True)
3637
3638 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3639 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3640 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3641 vcltfpCode = '''
3642 FPSCR fpscr = (FPSCR) FpscrExc;
3643 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc,
3644 true, true, VfpRoundNearest);
3645 destReg = (res == 0) ? -1 : 0;
3646 if (res == 2.0)
3647 fpscr.ioc = 1;
3648 FpscrExc = fpscr;
3649 '''
3650 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3651 2, vcltfpCode, toInt = True)
3652 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3653 4, vcltfpCode, toInt = True)
3654
3655 vswpCode = '''
3656 uint32_t mid;
3657 for (unsigned r = 0; r < rCount; r++) {
3658 mid = srcReg1.regs[r];
3659 srcReg1.regs[r] = destReg.regs[r];
3660 destReg.regs[r] = mid;
3661 }
3662 '''
3663 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3664 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3665
3666 vtrnCode = '''
3667 Element mid;
3668 for (unsigned i = 0; i < eCount; i += 2) {
3669 mid = srcReg1.elements[i];
3670 srcReg1.elements[i] = destReg.elements[i + 1];
3671 destReg.elements[i + 1] = mid;
3672 }
3673 '''
3674 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3675 smallUnsignedTypes, 2, vtrnCode)
3676 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3677 smallUnsignedTypes, 4, vtrnCode)
3678
3679 vuzpCode = '''
3680 Element mid[eCount];
3681 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3682 for (unsigned i = 0; i < eCount / 2; i++) {
3683 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3684 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3685 destReg.elements[i] = destReg.elements[2 * i];
3686 }
3687 for (unsigned i = 0; i < eCount / 2; i++) {
3688 destReg.elements[eCount / 2 + i] = mid[2 * i];
3689 }
3690 '''
3691 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3692 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3693
3694 vzipCode = '''
3695 Element mid[eCount];
3696 memcpy(&mid, &destReg, sizeof(destReg));
3697 for (unsigned i = 0; i < eCount / 2; i++) {
3698 destReg.elements[2 * i] = mid[i];
3699 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3700 }
3701 for (int i = 0; i < eCount / 2; i++) {
3702 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3703 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3704 }
3705 '''
3706 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3707 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3708
3709 vmovnCode = 'destElem = srcElem1;'
3710 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3711
3712 vdupCode = 'destElem = srcElem1;'
3713 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3714 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3715
3716 def vdupGprInst(name, Name, opClass, types, rCount):
3717 global header_output, exec_output
3718 eWalkCode = simdEnabledCheckCode + '''
3719 RegVect destReg;
3720 for (unsigned i = 0; i < eCount; i++) {
3721 destReg.elements[i] = htog((Element)Op1);
3722 }
3723 '''
3724 for reg in range(rCount):
3725 eWalkCode += '''
3726 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3727 ''' % { "reg" : reg }
3728 iop = InstObjParams(name, Name,
3729 "RegRegOp",
3730 { "code": eWalkCode,
3731 "r_count": rCount,
3732 "predicate_test": predicateTest,
3733 "op_class": opClass }, [])
3734 header_output += NeonRegRegOpDeclare.subst(iop)
3735 exec_output += NeonEqualRegExecute.subst(iop)
3736 for type in types:
3737 substDict = { "targs" : type,
3738 "class_name" : Name }
3739 exec_output += NeonExecDeclare.subst(substDict)
3740 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3741 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3742
3743 vmovCode = 'destElem = imm;'
3744 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3745 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3746
3747 vorrCode = 'destElem |= imm;'
3748 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3749 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3750
3751 vmvnCode = 'destElem = ~imm;'
3752 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3753 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3754
3755 vbicCode = 'destElem &= ~imm;'
3756 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3757 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3758
3759 vqmovnCode = '''
3760 FPSCR fpscr = (FPSCR) FpscrQc;
3761 destElem = srcElem1;
3762 if ((BigElement)destElem != srcElem1) {
3763 fpscr.qc = 1;
3764 destElem = mask(sizeof(Element) * 8 - 1);
3765 if (srcElem1 < 0)
3766 destElem = ~destElem;
3767 }
3768 FpscrQc = fpscr;
3769 '''
3770 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3771
3772 vqmovunCode = '''
3773 FPSCR fpscr = (FPSCR) FpscrQc;
3774 destElem = srcElem1;
3775 if ((BigElement)destElem != srcElem1) {
3776 fpscr.qc = 1;
3777 destElem = mask(sizeof(Element) * 8);
3778 }
3779 FpscrQc = fpscr;
3780 '''
3781 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3782 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3783
3784 vqmovunsCode = '''
3785 FPSCR fpscr = (FPSCR) FpscrQc;
3786 destElem = srcElem1;
3787 if (srcElem1 < 0 ||
3788 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3789 fpscr.qc = 1;
3790 destElem = mask(sizeof(Element) * 8);
3791 if (srcElem1 < 0)
3792 destElem = ~destElem;
3793 }
3794 FpscrQc = fpscr;
3795 '''
3796 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3797 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3798
3799 def buildVext(name, Name, opClass, types, rCount, op):
3800 global header_output, exec_output
3801 eWalkCode = simdEnabledCheckCode + '''
3802 RegVect srcReg1, srcReg2, destReg;
3803 '''
3804 for reg in range(rCount):
3805 eWalkCode += '''
3806 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3807 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3808 ''' % { "reg" : reg }
3809 eWalkCode += op
3810 for reg in range(rCount):
3811 eWalkCode += '''
3812 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3813 ''' % { "reg" : reg }
3814 iop = InstObjParams(name, Name,
3815 "RegRegRegImmOp",
3816 { "code": eWalkCode,
3817 "r_count": rCount,
3818 "predicate_test": predicateTest,
3819 "op_class": opClass }, [])
3820 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3821 exec_output += NeonEqualRegExecute.subst(iop)
3822 for type in types:
3823 substDict = { "targs" : type,
3824 "class_name" : Name }
3825 exec_output += NeonExecDeclare.subst(substDict)
3826
3827 vextCode = '''
3828 for (unsigned i = 0; i < eCount; i++) {
3829 unsigned index = i + imm;
3830 if (index < eCount) {
3831 destReg.elements[i] = srcReg1.elements[index];
3832 } else {
3833 index -= eCount;
3834 if (index >= eCount) {
3835 fault = std::make_shared<UndefinedInstruction>(machInst,
3836 false,
3837 mnemonic);
3838 } else {
3839 destReg.elements[i] = srcReg2.elements[index];
3840 }
3841 }
3842 }
3843 '''
3844 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3845 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3846
3847 def buildVtbxl(name, Name, opClass, length, isVtbl):
3848 global header_output, decoder_output, exec_output
3849 code = simdEnabledCheckCode + '''
3850 union
3851 {
3852 uint8_t bytes[32];
3853 uint32_t regs[8];
3854 } table;
3855
3856 union
3857 {
3858 uint8_t bytes[8];
3859 uint32_t regs[2];
3860 } destReg, srcReg2;
3861
3862 const unsigned length = %(length)d;
3863 const bool isVtbl = %(isVtbl)s;
3864
3865 srcReg2.regs[0] = htog(FpOp2P0_uw);
3866 srcReg2.regs[1] = htog(FpOp2P1_uw);
3867
3868 destReg.regs[0] = htog(FpDestP0_uw);
3869 destReg.regs[1] = htog(FpDestP1_uw);
3870 ''' % { "length" : length, "isVtbl" : isVtbl }
3871 for reg in range(8):
3872 if reg < length * 2:
3873 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3874 { "reg" : reg }
3875 else:
3876 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3877 code += '''
3878 for (unsigned i = 0; i < sizeof(destReg); i++) {
3879 uint8_t index = srcReg2.bytes[i];
3880 if (index < 8 * length) {
3881 destReg.bytes[i] = table.bytes[index];
3882 } else {
3883 if (isVtbl)
3884 destReg.bytes[i] = 0;
3885 // else destReg.bytes[i] unchanged
3886 }
3887 }
3888
3889 FpDestP0_uw = gtoh(destReg.regs[0]);
3890 FpDestP1_uw = gtoh(destReg.regs[1]);
3891 '''
3892 iop = InstObjParams(name, Name,
3893 "RegRegRegOp",
3894 { "code": code,
3895 "predicate_test": predicateTest,
3896 "op_class": opClass }, [])
3897 header_output += RegRegRegOpDeclare.subst(iop)
3898 decoder_output += RegRegRegOpConstructor.subst(iop)
3899 exec_output += PredOpExecute.subst(iop)
3900
3901 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3902 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3903 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3904 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3905
3906 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3907 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3908 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3909 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3910}};