sve.isa (13816:5a101ab471c9) sve.isa (13824:54e92033cf67)
1// Copyright (c) 2017-2019 ARM Limited
2// All rights reserved
3//
4// The license below extends only to copyright in the software and shall
5// not be construed as granting a license to any other intellectual
6// property including but not limited to intellectual property relating
7// to a hardware implementation of the functionality of the software
8// licensed hereunder. You may use the software subject to the license
9// terms below provided that you ensure that this notice is replicated
10// unmodified and in its entirety in all distributions of the software,
11// modified or unmodified, in source code or in binary form.
12//
13// Redistribution and use in source and binary forms, with or without
14// modification, are permitted provided that the following conditions are
15// met: redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer;
17// redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution;
20// neither the name of the copyright holders nor the names of its
21// contributors may be used to endorse or promote products derived from
22// this software without specific prior written permission.
23//
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
36// Authors: Giacomo Gabrielli
37
38// @file Definition of SVE instructions.
39
40output header {{
41
42 // Decodes unary, constructive, predicated (merging) SVE instructions,
43 // handling signed and unsigned variants.
44 template <template <typename T> class BaseS,
45 template <typename T> class BaseU>
46 StaticInstPtr
47 decodeSveUnaryPred(unsigned size, unsigned u, ExtMachInst machInst,
48 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
49 {
50 switch (size) {
51 case 0:
52 if (u) {
53 return new BaseU<uint8_t>(machInst, dest, op1, gp);
54 } else {
55 return new BaseS<int8_t>(machInst, dest, op1, gp);
56 }
57 case 1:
58 if (u) {
59 return new BaseU<uint16_t>(machInst, dest, op1, gp);
60 } else {
61 return new BaseS<int16_t>(machInst, dest, op1, gp);
62 }
63 case 2:
64 if (u) {
65 return new BaseU<uint32_t>(machInst, dest, op1, gp);
66 } else {
67 return new BaseS<int32_t>(machInst, dest, op1, gp);
68 }
69 case 3:
70 if (u) {
71 return new BaseU<uint64_t>(machInst, dest, op1, gp);
72 } else {
73 return new BaseS<int64_t>(machInst, dest, op1, gp);
74 }
75 default:
76 return new Unknown64(machInst);
77 }
78 }
79
80 // Decodes SVE widening reductions.
81 // handling signed and unsigned variants.
82 template <template <typename T1, typename T2> class BaseS,
83 template <typename T1, typename T2> class BaseU>
84 StaticInstPtr
85 decodeSveWideningReduc(unsigned size, unsigned u, ExtMachInst machInst,
86 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
87 {
88 switch (size) {
89 case 0:
90 if (u) {
91 return new BaseU<uint8_t, uint64_t>(machInst, dest, op1, gp);
92 } else {
93 return new BaseS<int8_t, int64_t>(machInst, dest, op1, gp);
94 }
95 case 1:
96 if (u) {
97 return new BaseU<uint16_t, uint64_t>(machInst, dest, op1, gp);
98 } else {
99 return new BaseS<int16_t, int64_t>(machInst, dest, op1, gp);
100 }
101 case 2:
102 if (u) {
103 return new BaseU<uint32_t, uint64_t>(machInst, dest, op1, gp);
104 } else {
105 return new BaseS<int32_t, int64_t>(machInst, dest, op1, gp);
106 }
107 case 3:
108 assert(u);
109 return new BaseU<uint64_t, uint64_t>(machInst, dest, op1, gp);
110 default:
111 return new Unknown64(machInst);
112 }
113 }
114
115 // Decodes unary, constructive, predicated (merging) SVE instructions,
116 // handling signed variants only.
117 template <template <typename T> class Base>
118 StaticInstPtr
119 decodeSveUnaryPredS(unsigned size, ExtMachInst machInst,
120 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
121 {
122 switch (size) {
123 case 0:
124 return new Base<int8_t>(machInst, dest, op1, gp);
125 case 1:
126 return new Base<int16_t>(machInst, dest, op1, gp);
127 case 2:
128 return new Base<int32_t>(machInst, dest, op1, gp);
129 case 3:
130 return new Base<int64_t>(machInst, dest, op1, gp);
131 default:
132 return new Unknown64(machInst);
133 }
134 }
135
136 // Decodes unary, constructive, predicated (merging) SVE instructions,
137 // handling unsigned variants only.
138 template <template <typename T> class Base>
139 StaticInstPtr
140 decodeSveUnaryPredU(unsigned size, ExtMachInst machInst,
141 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
142 {
143 switch (size) {
144 case 0:
145 return new Base<uint8_t>(machInst, dest, op1, gp);
146 case 1:
147 return new Base<uint16_t>(machInst, dest, op1, gp);
148 case 2:
149 return new Base<uint32_t>(machInst, dest, op1, gp);
150 case 3:
151 return new Base<uint64_t>(machInst, dest, op1, gp);
152 default:
153 return new Unknown64(machInst);
154 }
155 }
156
157 // Decodes unary, constructive, predicated (merging) SVE instructions,
158 // handling signed and unsigned variants, for small element sizes (8- to
159 // 32-bit).
160 template <template <typename T> class BaseS,
161 template <typename T> class BaseU>
162 StaticInstPtr
163 decodeSveUnaryPredSmall(unsigned size, unsigned u, ExtMachInst machInst,
164 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
165 {
166 switch (size) {
167 case 0:
168 if (u) {
169 return new BaseU<uint8_t>(machInst, dest, op1, gp);
170 } else {
171 return new BaseS<int8_t>(machInst, dest, op1, gp);
172 }
173 case 1:
174 if (u) {
175 return new BaseU<uint16_t>(machInst, dest, op1, gp);
176 } else {
177 return new BaseS<int16_t>(machInst, dest, op1, gp);
178 }
179 case 2:
180 if (u) {
181 return new BaseU<uint32_t>(machInst, dest, op1, gp);
182 } else {
183 return new BaseS<int32_t>(machInst, dest, op1, gp);
184 }
185 default:
186 return new Unknown64(machInst);
187 }
188 }
189
190 // Decodes unary, constructive, predicated (merging) SVE instructions,
191 // handling floating point variants only.
192 template <template <typename T> class Base>
193 StaticInstPtr
194 decodeSveUnaryPredF(unsigned size, ExtMachInst machInst,
195 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
196 {
197 switch (size) {
198 case 1:
199 return new Base<uint16_t>(machInst, dest, op1, gp);
200 case 2:
201 return new Base<uint32_t>(machInst, dest, op1, gp);
202 case 3:
203 return new Base<uint64_t>(machInst, dest, op1, gp);
204 default:
205 return new Unknown64(machInst);
206 }
207 }
208
209 // Decodes unary, constructive, unpredicated SVE instructions, handling
210 // unsigned variants only.
211 template <template <typename T> class Base>
212 StaticInstPtr
213 decodeSveUnaryUnpredU(unsigned size, ExtMachInst machInst,
214 IntRegIndex dest, IntRegIndex op1)
215 {
216 switch (size) {
217 case 0:
218 return new Base<uint8_t>(machInst, dest, op1);
219 case 1:
220 return new Base<uint16_t>(machInst, dest, op1);
221 case 2:
222 return new Base<uint32_t>(machInst, dest, op1);
223 case 3:
224 return new Base<uint64_t>(machInst, dest, op1);
225 default:
226 return new Unknown64(machInst);
227 }
228 }
229
230 // Decodes unary, constructive, unpredicated SVE instructions, handling
231 // floating-point variants only.
232 template <template <typename T> class Base>
233 StaticInstPtr
234 decodeSveUnaryUnpredF(unsigned size, ExtMachInst machInst,
235 IntRegIndex dest, IntRegIndex op1)
236 {
237 switch (size) {
238 case 1:
239 return new Base<uint16_t>(machInst, dest, op1);
240 case 2:
241 return new Base<uint32_t>(machInst, dest, op1);
242 case 3:
243 return new Base<uint64_t>(machInst, dest, op1);
244 default:
245 return new Unknown64(machInst);
246 }
247 }
248
249 // Decodes binary, destructive, predicated (merging) SVE instructions,
250 // handling signed and unsigned variants.
251 template <template <typename T> class BaseS,
252 template <typename T> class BaseU>
253 StaticInstPtr
254 decodeSveBinDestrPred(unsigned size, unsigned u, ExtMachInst machInst,
255 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
256 {
257 switch (size) {
258 case 0:
259 if (u) {
260 return new BaseU<uint8_t>(machInst, dest, op2, gp);
261 } else {
262 return new BaseS<int8_t>(machInst, dest, op2, gp);
263 }
264 case 1:
265 if (u) {
266 return new BaseU<uint16_t>(machInst, dest, op2, gp);
267 } else {
268 return new BaseS<int16_t>(machInst, dest, op2, gp);
269 }
270 case 2:
271 if (u) {
272 return new BaseU<uint32_t>(machInst, dest, op2, gp);
273 } else {
274 return new BaseS<int32_t>(machInst, dest, op2, gp);
275 }
276 case 3:
277 if (u) {
278 return new BaseU<uint64_t>(machInst, dest, op2, gp);
279 } else {
280 return new BaseS<int64_t>(machInst, dest, op2, gp);
281 }
282 default:
283 return new Unknown64(machInst);
284 }
285 }
286
287 // Decodes binary with immediate operand, constructive, unpredicated
288 // SVE instructions, handling signed variants only.
289 template <template <typename T> class Base>
290 StaticInstPtr
291 decodeSveBinImmUnpredS(unsigned size, ExtMachInst machInst,
292 IntRegIndex dest, IntRegIndex op1, unsigned immediate)
293 {
294 switch (size) {
295 case 0:
296 return new Base<int8_t>(machInst, dest, op1, immediate);
297 case 1:
298 return new Base<int16_t>(machInst, dest, op1, immediate);
299 case 2:
300 return new Base<int32_t>(machInst, dest, op1, immediate);
301 case 3:
302 return new Base<int64_t>(machInst, dest, op1, immediate);
303 default:
304 return new Unknown64(machInst);
305 }
306 }
307
308
309 // Decodes binary with immediate operand, constructive, unpredicated
310 // SVE instructions, handling unsigned variants only.
311 template <template <typename T> class Base>
312 StaticInstPtr
313 decodeSveBinImmUnpredU(unsigned size, ExtMachInst machInst,
314 IntRegIndex dest, IntRegIndex op1, unsigned immediate)
315 {
316 switch (size) {
317 case 0:
318 return new Base<uint8_t>(machInst, dest, op1, immediate);
319 case 1:
320 return new Base<uint16_t>(machInst, dest, op1, immediate);
321 case 2:
322 return new Base<uint32_t>(machInst, dest, op1, immediate);
323 case 3:
324 return new Base<uint64_t>(machInst, dest, op1, immediate);
325 default:
326 return new Unknown64(machInst);
327 }
328 }
329
330 // Decodes binary with immediate operand, destructive, predicated (merging)
331 // SVE instructions, handling unsigned variants only.
332 template <template <typename T> class Base>
333 StaticInstPtr
334 decodeSveBinImmPredU(unsigned size, ExtMachInst machInst, IntRegIndex dest,
335 unsigned immediate, IntRegIndex gp)
336 {
337 switch (size) {
338 case 0:
339 return new Base<uint8_t>(machInst, dest, immediate, gp);
340 case 1:
341 return new Base<uint16_t>(machInst, dest, immediate, gp);
342 case 2:
343 return new Base<uint32_t>(machInst, dest, immediate, gp);
344 case 3:
345 return new Base<uint64_t>(machInst, dest, immediate, gp);
346 default:
347 return new Unknown64(machInst);
348 }
349 }
350
351 // Decodes binary with immediate operand, destructive, predicated (merging)
352 // SVE instructions, handling signed variants only.
353 template <template <typename T> class Base>
354 StaticInstPtr
355 decodeSveBinImmPredS(unsigned size, ExtMachInst machInst, IntRegIndex dest,
356 unsigned immediate, IntRegIndex gp)
357 {
358 switch (size) {
359 case 0:
360 return new Base<int8_t>(machInst, dest, immediate, gp);
361 case 1:
362 return new Base<int16_t>(machInst, dest, immediate, gp);
363 case 2:
364 return new Base<int32_t>(machInst, dest, immediate, gp);
365 case 3:
366 return new Base<int64_t>(machInst, dest, immediate, gp);
367 default:
368 return new Unknown64(machInst);
369 }
370 }
371
372 // Decodes binary with immediate operand, destructive, predicated (merging)
373 // SVE instructions, handling floating-point variants only.
374 template <template <typename T> class Base>
375 StaticInstPtr
376 decodeSveBinImmPredF(unsigned size, ExtMachInst machInst, IntRegIndex dest,
377 uint64_t immediate, IntRegIndex gp)
378 {
379 switch (size) {
380 case 1:
381 return new Base<uint16_t>(machInst, dest, immediate, gp);
382 case 2:
383 return new Base<uint32_t>(machInst, dest, immediate, gp);
384 case 3:
385 return new Base<uint64_t>(machInst, dest, immediate, gp);
386 default:
387 return new Unknown64(machInst);
388 }
389 }
390
391 // Decodes unary/binary with wide immediate operand, destructive,
392 // unpredicated SVE instructions, handling unsigned variants only.
393 template <template <typename T> class Base>
394 StaticInstPtr
395 decodeSveWideImmUnpredU(unsigned size, ExtMachInst machInst,
396 IntRegIndex dest, uint64_t immediate)
397 {
398 switch (size) {
399 case 0:
400 return new Base<uint8_t>(machInst, dest, immediate);
401 case 1:
402 return new Base<uint16_t>(machInst, dest, immediate);
403 case 2:
404 return new Base<uint32_t>(machInst, dest, immediate);
405 case 3:
406 return new Base<uint64_t>(machInst, dest, immediate);
407 default:
408 return new Unknown64(machInst);
409 }
410 }
411
412 // Decodes unary/binary with wide immediate operand, destructive,
413 // unpredicated SVE instructions, handling signed variants only.
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeSveWideImmUnpredS(unsigned size, ExtMachInst machInst,
417 IntRegIndex dest, uint64_t immediate)
418 {
419 switch (size) {
420 case 0:
421 return new Base<int8_t>(machInst, dest, immediate);
422 case 1:
423 return new Base<int16_t>(machInst, dest, immediate);
424 case 2:
425 return new Base<int32_t>(machInst, dest, immediate);
426 case 3:
427 return new Base<int64_t>(machInst, dest, immediate);
428 default:
429 return new Unknown64(machInst);
430 }
431 }
432
433 // Decodes unary/binary with wide immediate operand, destructive,
434 // unpredicated SVE instructions, handling floating-point variants only.
435 template <template <typename T> class Base>
436 StaticInstPtr
437 decodeSveWideImmUnpredF(unsigned size, ExtMachInst machInst,
438 IntRegIndex dest, uint64_t immediate)
439 {
440 switch (size) {
441 case 1:
442 return new Base<uint16_t>(machInst, dest, immediate);
443 case 2:
444 return new Base<uint32_t>(machInst, dest, immediate);
445 case 3:
446 return new Base<uint64_t>(machInst, dest, immediate);
447 default:
448 return new Unknown64(machInst);
449 }
450 }
451
452 // Decodes unary/binary with wide immediate operand, destructive,
453 // predicated SVE instructions, handling unsigned variants only.
454 template <template <typename T> class Base>
455 StaticInstPtr
456 decodeSveWideImmPredU(unsigned size, ExtMachInst machInst,
457 IntRegIndex dest, uint64_t immediate, IntRegIndex gp,
458 bool isMerging = true)
459 {
460 switch (size) {
461 case 0:
462 return new Base<uint8_t>(machInst, dest, immediate, gp,
463 isMerging);
464 case 1:
465 return new Base<uint16_t>(machInst, dest, immediate, gp,
466 isMerging);
467 case 2:
468 return new Base<uint32_t>(machInst, dest, immediate, gp,
469 isMerging);
470 case 3:
471 return new Base<uint64_t>(machInst, dest, immediate, gp,
472 isMerging);
473 default:
474 return new Unknown64(machInst);
475 }
476 }
477
478 // Decodes unary/binary with wide immediate operand, destructive,
479 // predicated SVE instructions, handling floating-point variants only.
480 template <template <typename T> class Base>
481 StaticInstPtr
482 decodeSveWideImmPredF(unsigned size, ExtMachInst machInst,
483 IntRegIndex dest, uint64_t immediate, IntRegIndex gp)
484 {
485 switch (size) {
486 case 1:
487 return new Base<uint16_t>(machInst, dest, immediate, gp);
488 case 2:
489 return new Base<uint32_t>(machInst, dest, immediate, gp);
490 case 3:
491 return new Base<uint64_t>(machInst, dest, immediate, gp);
492 default:
493 return new Unknown64(machInst);
494 }
495 }
496
497 // Decodes binary, destructive, predicated (merging) SVE instructions,
498 // handling unsigned variants only.
499 template <template <typename T> class Base>
500 StaticInstPtr
501 decodeSveBinDestrPredU(unsigned size, ExtMachInst machInst,
502 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
503 {
504 switch (size) {
505 case 0:
506 return new Base<uint8_t>(machInst, dest, op2, gp);
507 case 1:
508 return new Base<uint16_t>(machInst, dest, op2, gp);
509 case 2:
510 return new Base<uint32_t>(machInst, dest, op2, gp);
511 case 3:
512 return new Base<uint64_t>(machInst, dest, op2, gp);
513 default:
514 return new Unknown64(machInst);
515 }
516 }
517
518 // Decodes binary, destructive, predicated (merging) SVE instructions,
519 // handling signed variants only.
520 template <template <typename T> class Base>
521 StaticInstPtr
522 decodeSveBinDestrPredS(unsigned size, ExtMachInst machInst,
523 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
524 {
525 switch (size) {
526 case 0:
527 return new Base<int8_t>(machInst, dest, op2, gp);
528 case 1:
529 return new Base<int16_t>(machInst, dest, op2, gp);
530 case 2:
531 return new Base<int32_t>(machInst, dest, op2, gp);
532 case 3:
533 return new Base<int64_t>(machInst, dest, op2, gp);
534 default:
535 return new Unknown64(machInst);
536 }
537 }
538
539 // Decodes binary, destructive, predicated (merging) SVE instructions,
540 // handling floating-point variants only.
541 template <template <typename T> class Base>
542 StaticInstPtr
543 decodeSveBinDestrPredF(unsigned size, ExtMachInst machInst,
544 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
545 {
546 switch (size) {
547 case 1:
548 return new Base<uint16_t>(machInst, dest, op2, gp);
549 case 2:
550 return new Base<uint32_t>(machInst, dest, op2, gp);
551 case 3:
552 return new Base<uint64_t>(machInst, dest, op2, gp);
553 default:
554 return new Unknown64(machInst);
555 }
556 }
557
558 // Decodes binary, constructive, predicated SVE instructions, handling
559 // unsigned variants only.
560 template <template <typename T> class Base>
561 StaticInstPtr
562 decodeSveBinConstrPredU(unsigned size, ExtMachInst machInst,
563 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
564 IntRegIndex gp, SvePredType predType)
565 {
566 switch (size) {
567 case 0:
568 return new Base<uint8_t>(machInst, dest, op1, op2, gp, predType);
569 case 1:
570 return new Base<uint16_t>(machInst, dest, op1, op2, gp, predType);
571 case 2:
572 return new Base<uint32_t>(machInst, dest, op1, op2, gp, predType);
573 case 3:
574 return new Base<uint64_t>(machInst, dest, op1, op2, gp, predType);
575 default:
576 return new Unknown64(machInst);
577 }
578 }
579
580 // Decodes binary, constructive, unpredicated SVE instructions.
581 template <template <typename T> class Base>
582 StaticInstPtr
583 decodeSveBinUnpred(unsigned size, unsigned u, ExtMachInst machInst,
584 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
585 {
586 switch (size) {
587 case 0:
588 if (u) {
589 return new Base<uint8_t>(machInst, dest, op1, op2);
590 } else {
591 return new Base<int8_t>(machInst, dest, op1, op2);
592 }
593 case 1:
594 if (u) {
595 return new Base<uint16_t>(machInst, dest, op1, op2);
596 } else {
597 return new Base<int16_t>(machInst, dest, op1, op2);
598 }
599 case 2:
600 if (u) {
601 return new Base<uint32_t>(machInst, dest, op1, op2);
602 } else {
603 return new Base<int32_t>(machInst, dest, op1, op2);
604 }
605 case 3:
606 if (u) {
607 return new Base<uint64_t>(machInst, dest, op1, op2);
608 } else {
609 return new Base<int64_t>(machInst, dest, op1, op2);
610 }
611 default:
612 return new Unknown64(machInst);
613 }
614 }
615
616 // Decodes binary, constructive, unpredicated SVE instructions.
617 // Unsigned instructions only.
618 template <template <typename T> class Base>
619 StaticInstPtr
620 decodeSveBinUnpredU(unsigned size, ExtMachInst machInst, IntRegIndex dest,
621 IntRegIndex op1, IntRegIndex op2)
622 {
623 switch (size) {
624 case 0:
625 return new Base<uint8_t>(machInst, dest, op1, op2);
626 case 1:
627 return new Base<uint16_t>(machInst, dest, op1, op2);
628 case 2:
629 return new Base<uint32_t>(machInst, dest, op1, op2);
630 case 3:
631 return new Base<uint64_t>(machInst, dest, op1, op2);
632 default:
633 return new Unknown64(machInst);
634 }
635 }
636
637 // Decodes binary, constructive, unpredicated SVE instructions.
638 // Signed instructions only.
639 template <template <typename T> class Base>
640 StaticInstPtr
641 decodeSveBinUnpredS(unsigned size, ExtMachInst machInst, IntRegIndex dest,
642 IntRegIndex op1, IntRegIndex op2)
643 {
644 switch (size) {
645 case 0:
646 return new Base<int8_t>(machInst, dest, op1, op2);
647 case 1:
648 return new Base<int16_t>(machInst, dest, op1, op2);
649 case 2:
650 return new Base<int32_t>(machInst, dest, op1, op2);
651 case 3:
652 return new Base<int64_t>(machInst, dest, op1, op2);
653 default:
654 return new Unknown64(machInst);
655 }
656 }
657
658 // Decodes binary, costructive, unpredicated SVE instructions, handling
659 // floating-point variants only.
660 template <template <typename T> class Base>
661 StaticInstPtr
662 decodeSveBinUnpredF(unsigned size, ExtMachInst machInst, IntRegIndex dest,
663 IntRegIndex op1, IntRegIndex op2)
664 {
665 switch (size) {
666 case 1:
667 return new Base<uint16_t>(machInst, dest, op1, op2);
668 case 2:
669 return new Base<uint32_t>(machInst, dest, op1, op2);
670 case 3:
671 return new Base<uint64_t>(machInst, dest, op1, op2);
672 default:
673 return new Unknown64(machInst);
674 }
675 }
676
677 // Decodes SVE compare instructions - binary, predicated (zeroing),
678 // generating a predicate - handling floating-point variants only.
679 template <template <typename T> class Base>
680 StaticInstPtr
681 decodeSveCmpF(unsigned size, ExtMachInst machInst,
682 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
683 IntRegIndex gp)
684 {
685 switch (size) {
686 case 1:
687 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
688 case 2:
689 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
690 case 3:
691 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
692 default:
693 return new Unknown64(machInst);
694 }
695 }
696
697 // Decodes SVE compare-with-immediate instructions - binary, predicated
698 // (zeroing), generating a predicate - handling floating-point variants
699 // only.
700 template <template <typename T> class Base>
701 StaticInstPtr
702 decodeSveCmpImmF(unsigned size, ExtMachInst machInst,
703 IntRegIndex dest, IntRegIndex op1, uint64_t imm,
704 IntRegIndex gp)
705 {
706 switch (size) {
707 case 1:
708 return new Base<uint16_t>(machInst, dest, op1, imm, gp);
709 case 2:
710 return new Base<uint32_t>(machInst, dest, op1, imm, gp);
711 case 3:
712 return new Base<uint64_t>(machInst, dest, op1, imm, gp);
713 default:
714 return new Unknown64(machInst);
715 }
716 }
717
718 // Decodes ternary, destructive, predicated (merging) SVE instructions.
719 template <template <typename T> class Base>
720 StaticInstPtr
721 decodeSveTerPred(unsigned size, unsigned u, ExtMachInst machInst,
722 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
723 IntRegIndex gp)
724 {
725 switch (size) {
726 case 0:
727 if (u) {
728 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
729 } else {
730 return new Base<int8_t>(machInst, dest, op1, op2, gp);
731 }
732 case 1:
733 if (u) {
734 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
735 } else {
736 return new Base<int16_t>(machInst, dest, op1, op2, gp);
737 }
738 case 2:
739 if (u) {
740 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
741 } else {
742 return new Base<int32_t>(machInst, dest, op1, op2, gp);
743 }
744 case 3:
745 if (u) {
746 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
747 } else {
748 return new Base<int64_t>(machInst, dest, op1, op2, gp);
749 }
750 default:
751 return new Unknown64(machInst);
752 }
753 }
754
755 // Decodes ternary, destructive, predicated (merging) SVE instructions,
756 // handling wide signed variants only. XXX: zeroing for CMP instructions.
757 template <template <typename T> class Base>
758 StaticInstPtr
759 decodeSveTerPredWS(unsigned size, ExtMachInst machInst,
760 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
761 IntRegIndex gp)
762 {
763 switch (size) {
764 case 0:
765 return new Base<int8_t>(machInst, dest, op1, op2, gp);
766 case 1:
767 return new Base<int16_t>(machInst, dest, op1, op2, gp);
768 case 2:
769 return new Base<int32_t>(machInst, dest, op1, op2, gp);
770 default:
771 return new Unknown64(machInst);
772 }
773 }
774
775 // Decodes ternary, destructive, predicated (merging) SVE instructions,
776 // handling wide unsigned variants only. XXX: zeroing for CMP instructions.
777 template <template <typename T> class Base>
778 StaticInstPtr
779 decodeSveTerPredWU(unsigned size, ExtMachInst machInst,
780 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
781 IntRegIndex gp)
782 {
783 switch (size) {
784 case 0:
785 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
786 case 1:
787 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
788 case 2:
789 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
790 default:
791 return new Unknown64(machInst);
792 }
793 }
794
795 // Decodes ternary, destructive, predicated (merging) SVE instructions,
796 // handling signed variants only. XXX: zeroing for CMP instructions.
797 template <template <typename T> class Base>
798 StaticInstPtr
799 decodeSveTerPredS(unsigned size, ExtMachInst machInst,
800 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
801 IntRegIndex gp)
802 {
803 switch (size) {
804 case 0:
805 return new Base<int8_t>(machInst, dest, op1, op2, gp);
806 case 1:
807 return new Base<int16_t>(machInst, dest, op1, op2, gp);
808 case 2:
809 return new Base<int32_t>(machInst, dest, op1, op2, gp);
810 case 3:
811 return new Base<int64_t>(machInst, dest, op1, op2, gp);
812 default:
813 return new Unknown64(machInst);
814 }
815 }
816
817 // Decodes ternary, destructive, predicated (merging) SVE instructions,
818 // handling unsigned variants only. XXX: zeroing for CMP instructions.
819 template <template <typename T> class Base>
820 StaticInstPtr
821 decodeSveTerPredU(unsigned size, ExtMachInst machInst,
822 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
823 IntRegIndex gp)
824 {
825 switch (size) {
826 case 0:
827 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
828 case 1:
829 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
830 case 2:
831 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
832 case 3:
833 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
834 default:
835 return new Unknown64(machInst);
836 }
837 }
838
839 // Decodes SVE signed unary extension instructions (8-bit source element
840 // size)
841 template <template <typename TS, typename TD> class Base>
842 StaticInstPtr
843 decodeSveUnaryExtendFromBPredS(unsigned dsize, ExtMachInst machInst,
844 IntRegIndex dest, IntRegIndex op1,
845 IntRegIndex gp)
846 {
847 switch (dsize) {
848 case 1:
849 return new Base<int8_t, int16_t>(machInst, dest, op1, gp);
850 case 2:
851 return new Base<int8_t, int32_t>(machInst, dest, op1, gp);
852 case 3:
853 return new Base<int8_t, int64_t>(machInst, dest, op1, gp);
854 }
855 return new Unknown64(machInst);
856 }
857
858 // Decodes SVE unsigned unary extension instructions (8-bit source element
859 // size)
860 template <template <typename TS, typename TD> class Base>
861 StaticInstPtr
862 decodeSveUnaryExtendFromBPredU(unsigned dsize, ExtMachInst machInst,
863 IntRegIndex dest, IntRegIndex op1,
864 IntRegIndex gp)
865 {
866 switch (dsize) {
867 case 1:
868 return new Base<uint8_t, uint16_t>(machInst, dest, op1, gp);
869 case 2:
870 return new Base<uint8_t, uint32_t>(machInst, dest, op1, gp);
871 case 3:
872 return new Base<uint8_t, uint64_t>(machInst, dest, op1, gp);
873 }
874 return new Unknown64(machInst);
875 }
876
877 // Decodes SVE signed unary extension instructions (16-bit source element
878 // size)
879 template <template <typename TS, typename TD> class Base>
880 StaticInstPtr
881 decodeSveUnaryExtendFromHPredS(unsigned dsize, ExtMachInst machInst,
882 IntRegIndex dest, IntRegIndex op1,
883 IntRegIndex gp)
884 {
885 switch (dsize) {
886 case 2:
887 return new Base<int16_t, int32_t>(machInst, dest, op1, gp);
888 case 3:
889 return new Base<int16_t, int64_t>(machInst, dest, op1, gp);
890 }
891 return new Unknown64(machInst);
892 }
893
894 // Decodes SVE unsigned unary extension instructions (16-bit source element
895 // size)
896 template <template <typename TS, typename TD> class Base>
897 StaticInstPtr
898 decodeSveUnaryExtendFromHPredU(unsigned dsize, ExtMachInst machInst,
899 IntRegIndex dest, IntRegIndex op1,
900 IntRegIndex gp)
901 {
902 switch (dsize) {
903 case 2:
904 return new Base<uint16_t, uint32_t>(machInst, dest, op1, gp);
905 case 3:
906 return new Base<uint16_t, uint64_t>(machInst, dest, op1, gp);
907 }
908 return new Unknown64(machInst);
909 }
910
911 // Decodes ternary, destructive, predicated (merging) SVE instructions,
912 // handling floating-point variants only.
913 template <template <typename T> class Base>
914 StaticInstPtr
915 decodeSveTerPredF(unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
917 IntRegIndex gp)
918 {
919 switch (size) {
920 case 1:
921 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
922 case 2:
923 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
924 case 3:
925 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
926 default:
927 return new Unknown64(machInst);
928 }
929 }
930
931 // Decodes ternary with immediate operand, destructive, unpredicated SVE
932 // instructions handling floating-point variants only.
933 template <template <typename T> class Base>
934 StaticInstPtr
935 decodeSveTerImmUnpredF(unsigned size, ExtMachInst machInst,
936 IntRegIndex dest, IntRegIndex op2, uint8_t imm)
937 {
938 switch (size) {
939 case 1:
940 return new Base<uint16_t>(machInst, dest, op2, imm);
941 case 2:
942 return new Base<uint32_t>(machInst, dest, op2, imm);
943 case 3:
944 return new Base<uint64_t>(machInst, dest, op2, imm);
945 default:
946 return new Unknown64(machInst);
947 }
948 }
949
950 // Decodes SVE PTRUE(S) instructions.
951 template <template <typename T> class Base>
952 StaticInstPtr
953 decodeSvePtrue(unsigned size, ExtMachInst machInst,
954 IntRegIndex dest, uint8_t imm)
955 {
956 switch (size) {
957 case 0:
958 return new Base<uint8_t>(machInst, dest, imm);
959 case 1:
960 return new Base<uint16_t>(machInst, dest, imm);
961 case 2:
962 return new Base<uint32_t>(machInst, dest, imm);
963 case 3:
964 return new Base<uint64_t>(machInst, dest, imm);
965 default:
966 return new Unknown64(machInst);
967 }
968 }
969
970 // Decodes SVE predicate count instructions, scalar signed variant only
971 template <template <typename T> class Base>
972 StaticInstPtr
973 decodeSvePredCountS(unsigned size, ExtMachInst machInst,
974 IntRegIndex dest, IntRegIndex op1)
975 {
976 switch (size) {
977 case 0:
978 return new Base<int8_t>(machInst, dest, op1);
979 case 1:
980 return new Base<int16_t>(machInst, dest, op1);
981 case 2:
982 return new Base<int32_t>(machInst, dest, op1);
983 case 3:
984 return new Base<int64_t>(machInst, dest, op1);
985 default:
986 return new Unknown64(machInst);
987 }
988 }
989
990 // Decodes SVE predicate count instructions, scalar unsigned variant only
991 template <template <typename T> class Base>
992 StaticInstPtr
993 decodeSvePredCountU(unsigned size, ExtMachInst machInst,
994 IntRegIndex dest, IntRegIndex op1)
995 {
996 switch (size) {
997 case 0:
998 return new Base<uint8_t>(machInst, dest, op1);
999 case 1:
1000 return new Base<uint16_t>(machInst, dest, op1);
1001 case 2:
1002 return new Base<uint32_t>(machInst, dest, op1);
1003 case 3:
1004 return new Base<uint64_t>(machInst, dest, op1);
1005 default:
1006 return new Unknown64(machInst);
1007 }
1008 }
1009
1010 // Decodes SVE predicate count instructions, vector signed variant only
1011 template <template <typename T> class Base>
1012 StaticInstPtr
1013 decodeSvePredCountVS(unsigned size, ExtMachInst machInst,
1014 IntRegIndex dest, IntRegIndex op1)
1015 {
1016 switch (size) {
1017 case 1:
1018 return new Base<int16_t>(machInst, dest, op1);
1019 case 2:
1020 return new Base<int32_t>(machInst, dest, op1);
1021 case 3:
1022 return new Base<int64_t>(machInst, dest, op1);
1023 default:
1024 return new Unknown64(machInst);
1025 }
1026 }
1027
1028 // Decodes SVE predicate count instructions, vector unsigned variant only
1029 template <template <typename T> class Base>
1030 StaticInstPtr
1031 decodeSvePredCountVU(unsigned size, ExtMachInst machInst,
1032 IntRegIndex dest, IntRegIndex op1)
1033 {
1034 switch (size) {
1035 case 1:
1036 return new Base<uint16_t>(machInst, dest, op1);
1037 case 2:
1038 return new Base<uint32_t>(machInst, dest, op1);
1039 case 3:
1040 return new Base<uint64_t>(machInst, dest, op1);
1041 default:
1042 return new Unknown64(machInst);
1043 }
1044 }
1045
1046 // Decodes ternary with immediate operand, predicated SVE
1047 // instructions handling unsigned variants only.
1048 template <template <typename T> class Base>
1049 StaticInstPtr
1050 decodeSveTerImmPredU(unsigned size, ExtMachInst machInst,
1051 IntRegIndex dest, IntRegIndex op1, int64_t imm, IntRegIndex gp)
1052 {
1053 switch (size) {
1054 case 0:
1055 return new Base<uint8_t>(machInst, dest, op1, imm, gp);
1056 case 1:
1057 return new Base<uint16_t>(machInst, dest, op1, imm, gp);
1058 case 2:
1059 return new Base<uint32_t>(machInst, dest, op1, imm, gp);
1060 case 3:
1061 return new Base<uint64_t>(machInst, dest, op1, imm, gp);
1062 default:
1063 return new Unknown64(machInst);
1064 }
1065 }
1066
1067 // Decodes ternary with immediate operand, predicated SVE
1068 // instructions handling signed variants only.
1069 template <template <typename T> class Base>
1070 StaticInstPtr
1071 decodeSveTerImmPredS(unsigned size, ExtMachInst machInst,
1072 IntRegIndex dest, IntRegIndex op1, int64_t imm, IntRegIndex gp)
1073 {
1074 switch (size) {
1075 case 0:
1076 return new Base<int8_t>(machInst, dest, op1, imm, gp);
1077 case 1:
1078 return new Base<int16_t>(machInst, dest, op1, imm, gp);
1079 case 2:
1080 return new Base<int32_t>(machInst, dest, op1, imm, gp);
1081 case 3:
1082 return new Base<int64_t>(machInst, dest, op1, imm, gp);
1083 default:
1084 return new Unknown64(machInst);
1085 }
1086 }
1087
1088 // Decodes integer element count SVE instructions, handling
1089 // signed variants only.
1090 template <template <typename T> class Base>
1091 StaticInstPtr
1092 decodeSveElemIntCountS(unsigned size, ExtMachInst machInst,
1093 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1094 {
1095 switch (size) {
1096 case 0:
1097 return new Base<int8_t>(machInst, dest, pattern, imm4);
1098 case 1:
1099 return new Base<int16_t>(machInst, dest, pattern, imm4);
1100 case 2:
1101 return new Base<int32_t>(machInst, dest, pattern, imm4);
1102 case 3:
1103 return new Base<int64_t>(machInst, dest, pattern, imm4);
1104 default:
1105 return new Unknown64(machInst);
1106 }
1107 }
1108
1109 // Decodes integer element count SVE instructions, handling
1110 // unsigned variants only.
1111 template <template <typename T> class Base>
1112 StaticInstPtr
1113 decodeSveElemIntCountU(unsigned size, ExtMachInst machInst,
1114 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1115 {
1116 switch (size) {
1117 case 0:
1118 return new Base<uint8_t>(machInst, dest, pattern, imm4);
1119 case 1:
1120 return new Base<uint16_t>(machInst, dest, pattern, imm4);
1121 case 2:
1122 return new Base<uint32_t>(machInst, dest, pattern, imm4);
1123 case 3:
1124 return new Base<uint64_t>(machInst, dest, pattern, imm4);
1125 default:
1126 return new Unknown64(machInst);
1127 }
1128 }
1129
1130 // Decodes integer element count SVE instructions, handling
1131 // signed variants from 16 to 64 bits only.
1132 template <template <typename T> class Base>
1133 StaticInstPtr
1134 decodeSveElemIntCountLS(unsigned size, ExtMachInst machInst,
1135 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1136 {
1137 switch (size) {
1138 case 1:
1139 return new Base<int16_t>(machInst, dest, pattern, imm4);
1140 case 2:
1141 return new Base<int32_t>(machInst, dest, pattern, imm4);
1142 case 3:
1143 return new Base<int64_t>(machInst, dest, pattern, imm4);
1144 default:
1145 return new Unknown64(machInst);
1146 }
1147 }
1148
1149 // Decodes integer element count SVE instructions, handling
1150 // unsigned variants from 16 to 64 bits only.
1151 template <template <typename T> class Base>
1152 StaticInstPtr
1153 decodeSveElemIntCountLU(unsigned size, ExtMachInst machInst,
1154 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1155 {
1156 switch (size) {
1157 case 1:
1158 return new Base<uint16_t>(machInst, dest, pattern, imm4);
1159 case 2:
1160 return new Base<uint32_t>(machInst, dest, pattern, imm4);
1161 case 3:
1162 return new Base<uint64_t>(machInst, dest, pattern, imm4);
1163 default:
1164 return new Unknown64(machInst);
1165 }
1166 }
1167
1168 // Decodes SVE unpack instructions. Handling signed variants.
1169 template <template <typename T1, typename T2> class Base>
1170 StaticInstPtr
1171 decodeSveUnpackS(unsigned size, ExtMachInst machInst,
1172 IntRegIndex dest, IntRegIndex op1)
1173 {
1174 switch (size) {
1175 case 1:
1176 return new Base<int8_t, int16_t>(machInst, dest, op1);
1177 case 2:
1178 return new Base<int16_t, int32_t>(machInst, dest, op1);
1179 case 3:
1180 return new Base<int32_t, int64_t>(machInst, dest, op1);
1181 default:
1182 return new Unknown64(machInst);
1183 }
1184 }
1185
1186 // Decodes SVE unpack instructions. Handling unsigned variants.
1187 template <template <typename T1, typename T2> class Base>
1188 StaticInstPtr
1189 decodeSveUnpackU(unsigned size, ExtMachInst machInst,
1190 IntRegIndex dest, IntRegIndex op1)
1191 {
1192 switch (size) {
1193 case 1:
1194 return new Base<uint8_t, uint16_t>(machInst, dest, op1);
1195 case 2:
1196 return new Base<uint16_t, uint32_t>(machInst, dest, op1);
1197 case 3:
1198 return new Base<uint32_t, uint64_t>(machInst, dest, op1);
1199 default:
1200 return new Unknown64(machInst);
1201 }
1202 }
1203}};
1204
1205let {{
1206
1207 header_output = ''
1208 exec_output = ''
1209 decoders = { 'Generic': {} }
1210
1211 class PredType:
1212 NONE = 0
1213 MERGE = 1
1214 ZERO = 2
1215 SELECT = 3
1216
1217 class CvtDir:
1218 Narrow = 0
1219 Widen = 1
1220
1221 class IndexFormat(object):
1222 ImmImm = 'II'
1223 ImmReg = 'IR'
1224 RegImm = 'RI'
1225 RegReg = 'RR'
1226
1227 class SrcRegType(object):
1228 Vector = 0
1229 Scalar = 1
1230 SimdFpScalar = 2
1231 Predicate = 3
1232
1233 class DstRegType(object):
1234 Vector = 0
1235 Scalar = 1
1236 SimdFpScalar = 2
1237 Predicate = 3
1238
1239 class DestType(object):
1240 Scalar = 'false'
1241 Vector = 'true'
1242
1243 class SrcSize(object):
1244 Src32bit = 'true'
1245 Src64bit = 'false'
1246
1247 class Break(object):
1248 Before = 0
1249 After = 1
1250
1251 class Unpack(object):
1252 High = 0
1253 Low = 1
1254
1255 # Generates definitions for SVE ADR instructions
1256 def sveAdrInst(name, Name, opClass, types, op):
1257 global header_output, exec_output, decoders
1258 code = sveEnabledCheckCode + '''
1259 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1260 xc->tcBase());
1261 for (unsigned i = 0; i < eCount; i++) {
1262 const Element& srcElem1 = AA64FpOp1_x[i];
1263 Element srcElem2 = AA64FpOp2_x[i];
1264 Element destElem = 0;
1265 %(op)s
1266 AA64FpDest_x[i] = destElem;
1267 }''' % {'op': op}
1268 iop = InstObjParams(name, 'Sve' + Name, 'SveAdrOp',
1269 {'code': code, 'op_class': opClass}, [])
1270 header_output += SveAdrOpDeclare.subst(iop)
1271 exec_output += SveOpExecute.subst(iop)
1272 for type in types:
1273 substDict = {'targs' : type,
1274 'class_name' : 'Sve' + Name}
1275 exec_output += SveOpExecDeclare.subst(substDict)
1276
1277 # Generates definition for SVE while predicate generation instructions
1278 def sveWhileInst(name, Name, opClass, types, op,
1279 srcSize = SrcSize.Src64bit):
1280 global header_output, exec_output, decoders
1281 extraPrologCode = '''
1282 auto& destPred = PDest;'''
1283 if 'int32_t' in types:
1284 srcType = 'int64_t' if srcSize == SrcSize.Src64bit else 'int32_t'
1285 else:
1286 srcType = 'uint64_t' if srcSize == SrcSize.Src64bit else 'uint32_t'
1287 code = sveEnabledCheckCode + '''
1288 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1289 xc->tcBase());
1290 %(stype)s srcElem1 = static_cast<%(stype)s>(XOp1);
1291 %(stype)s srcElem2 = static_cast<%(stype)s>(XOp2);
1292 bool cond, first = false, none = true, last = true;
1293 destPred.reset();
1294 for (unsigned i = 0; i < eCount; i++) {
1295 %(op)s;
1296 last = last && cond;
1297 none = none && !cond;
1298 first = first || (i == 0 && cond);
1299 PDest_x[i] = last;
1300 srcElem1++;
1301 }
1302 CondCodesNZ = (first << 1) | none;
1303 CondCodesC = !last;
1304 CondCodesV = false;
1305 '''%{'op': op, 'stype': srcType}
1306 iop = InstObjParams(name, 'Sve' + Name, 'SveWhileOp',
1307 {'code': code, 'op_class': opClass, 'srcIs32b': srcSize}, [])
1308 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
1309 header_output += SveWhileOpDeclare.subst(iop)
1310 exec_output += SveOpExecute.subst(iop)
1311 for type in types:
1312 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1313 exec_output += SveOpExecDeclare.subst(substDict);
1314
1315 # Generate definition for SVE compare & terminate instructions
1316 def sveCompTermInst(name, Name, opClass, types, op):
1317 global header_output, exec_output, decoders
1318 code = sveEnabledCheckCode + '''
1319 bool destElem;
1320 Element srcElem1 = static_cast<Element>(XOp1);
1321 Element srcElem2 = static_cast<Element>(XOp2);
1322 %(op)s;
1323 if (destElem) {
1324 CondCodesNZ = CondCodesNZ | 0x2;
1325 CondCodesV = 0;
1326 } else {
1327 CondCodesNZ = CondCodesNZ & ~0x2;
1328 CondCodesV = !CondCodesC;
1329 }
1330 ''' % {'op': op}
1331 iop = InstObjParams(name, 'Sve' + Name, 'SveCompTermOp',
1332 {'code': code, 'op_class': opClass}, [])
1333 header_output += SveCompTermOpDeclare.subst(iop)
1334 exec_output += SveOpExecute.subst(iop)
1335 for type in types:
1336 substDict = {'targs' : type, 'class_name': 'Sve' + Name}
1337 exec_output += SveOpExecDeclare.subst(substDict);
1338
1339 # Generates definition for SVE predicate count instructions
1340 def svePredCountInst(name, Name, opClass, types, op,
1341 destType=DestType.Vector,
1342 srcSize=SrcSize.Src64bit):
1343 global header_output, exec_output, decoders
1344 assert not (destType == DestType.Vector and
1345 srcSize != SrcSize.Src64bit)
1346 code = sveEnabledCheckCode + '''
1347 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1348 xc->tcBase());
1349 int count = 0;
1350 for (unsigned i = 0; i < eCount; i++) {
1351 if (GpOp_x[i]) {
1352 count++;
1353 }
1354 }'''
1355 if destType == DestType.Vector:
1356 code += '''
1357 for (unsigned i = 0; i < eCount; i++) {
1358 Element destElem = 0;
1359 const Element& srcElem = AA64FpDestMerge_x[i];
1360 %(op)s
1361 AA64FpDest_x[i] = destElem;
1362 }''' % {'op': op}
1363 else:
1364 code += '''
1365 %(op)s''' % {'op': op}
1366 iop = InstObjParams(name, 'Sve' + Name, 'SvePredCountOp',
1367 {'code': code, 'op_class': opClass, 'srcIs32b': srcSize,
1368 'destIsVec': destType}, [])
1369 header_output += SvePredCountOpDeclare.subst(iop)
1370 exec_output += SveOpExecute.subst(iop)
1371 for type in types:
1372 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1373 exec_output += SveOpExecDeclare.subst(substDict);
1374
1375 # Generates definition for SVE predicate count instructions (predicated)
1376 def svePredCountPredInst(name, Name, opClass, types):
1377 global header_output, exec_output, decoders
1378 code = sveEnabledCheckCode + '''
1379 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1380 xc->tcBase());
1381 int count = 0;
1382 for (unsigned i = 0; i < eCount; i++) {
1383 if (POp1_x[i] && GpOp_x[i]) {
1384 count++;
1385 }
1386 }
1387 XDest = count;
1388 '''
1389 iop = InstObjParams(name, 'Sve' + Name, 'SvePredCountPredOp',
1390 {'code': code, 'op_class': opClass}, [])
1391 header_output += SvePredCountPredOpDeclare.subst(iop)
1392 exec_output += SveOpExecute.subst(iop)
1393 for type in types:
1394 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1395 exec_output += SveOpExecDeclare.subst(substDict)
1396
1397 # Generates definition for SVE Index generation instructions
1398 def sveIndex(fmt):
1399 global header_output, exec_output, decoders
1400 code = sveEnabledCheckCode + '''
1401 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1402 xc->tcBase());'''
1403 if fmt == IndexFormat.ImmReg or fmt == IndexFormat.ImmImm:
1404 code += '''
1405 const Element& srcElem1 = imm1;'''
1406 if fmt == IndexFormat.RegImm or fmt == IndexFormat.RegReg:
1407 code += '''
1408 const Element& srcElem1 = XOp1;'''
1409 if fmt == IndexFormat.RegImm or fmt == IndexFormat.ImmImm:
1410 code += '''
1411 const Element& srcElem2 = imm2;'''
1412 if fmt == IndexFormat.ImmReg or fmt == IndexFormat.RegReg:
1413 code += '''
1414 const Element& srcElem2 = XOp2;'''
1415 code +='''
1416 for (unsigned i = 0; i < eCount; i++) {
1417 AA64FpDest_x[i] = srcElem1 + i * srcElem2;
1418 }'''
1419 iop = InstObjParams('index', 'SveIndex'+fmt, 'SveIndex'+fmt+'Op',
1420 {'code': code, 'op_class': 'SimdAluOp'})
1421 if fmt == IndexFormat.ImmImm:
1422 header_output += SveIndexIIOpDeclare.subst(iop)
1423 elif fmt == IndexFormat.ImmReg:
1424 header_output += SveIndexIROpDeclare.subst(iop)
1425 elif fmt == IndexFormat.RegImm:
1426 header_output += SveIndexRIOpDeclare.subst(iop)
1427 elif fmt == IndexFormat.RegReg:
1428 header_output += SveIndexRROpDeclare.subst(iop)
1429 exec_output += SveOpExecute.subst(iop)
1430 for type in ['int8_t', 'int16_t', 'int32_t', 'int64_t']:
1431 substDict = {'targs': type, 'class_name': 'SveIndex'+fmt}
1432 exec_output += SveOpExecDeclare.subst(substDict)
1433
1434 # Generates definitions for widening unary SVE instructions
1435 # (always constructive)
1436 def sveWidenUnaryInst(name, Name, opClass, types, op,
1437 predType=PredType.NONE, decoder='Generic'):
1438 global header_output, exec_output, decoders
1439 code = sveEnabledCheckCode + '''
1440 unsigned eCount = ArmStaticInst::getCurSveVecLen<DElement>(
1441 xc->tcBase());
1442 for (unsigned i = 0; i < eCount; i++) {
1443 SElement srcElem1 = AA64FpOp1_xd[i];
1444 DElement destElem = 0;'''
1445 if predType != PredType.NONE:
1446 code += '''
1447 if (GpOp_xd[i]) {
1448 %(op)s
1449 } else {
1450 destElem = %(dest_elem)s;
1451 }''' % {'op': op,
1452 'dest_elem': 'AA64FpDestMerge_xd[i]'
1453 if predType == PredType.MERGE
1454 else '0'}
1455 else:
1456 code += '''
1457 %(op)s''' % {'op': op}
1458 code += '''
1459 AA64FpDest_xd[i] = destElem;
1460 }'''
1461 iop = InstObjParams(name, 'Sve' + Name,
1462 'SveUnaryPredOp' if predType != PredType.NONE
1463 else 'SveUnaryUnpredOp',
1464 {'code': code, 'op_class': opClass}, [])
1465 if predType != PredType.NONE:
1466 header_output += SveWideningUnaryPredOpDeclare.subst(iop)
1467 else:
1468 header_output += SveWideningUnaryUnpredOpDeclare.subst(iop)
1469 exec_output += SveWideningOpExecute.subst(iop)
1470 for type in types:
1471 substDict = {'targs' : type,
1472 'class_name' : 'Sve' + Name}
1473 exec_output += SveOpExecDeclare.subst(substDict)
1474
1475 # Generates definitions for unary SVE instructions (always constructive)
1476 def sveUnaryInst(name, Name, opClass, types, op, predType=PredType.NONE,
1477 srcRegType=SrcRegType.Vector, decoder='Generic'):
1478 global header_output, exec_output, decoders
1479 op1 = ('AA64FpOp1_x[i]' if srcRegType == SrcRegType.Vector
1480 else 'XOp1' if srcRegType == SrcRegType.Scalar
1481 else 'AA64FpOp1_x[0]')
1482 code = sveEnabledCheckCode + '''
1483 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1484 xc->tcBase());
1485 for (unsigned i = 0; i < eCount; i++) {
1486 Element srcElem1 = %s;
1487 Element destElem = 0;''' % op1
1488 if predType != PredType.NONE:
1489 code += '''
1490 if (GpOp_x[i]) {
1491 %(op)s
1492 } else {
1493 destElem = %(dest_elem)s;
1494 }''' % {'op': op,
1495 'dest_elem': 'AA64FpDestMerge_x[i]'
1496 if predType == PredType.MERGE
1497 else '0'}
1498 else:
1499 code += '''
1500 %(op)s''' % {'op': op}
1501 code += '''
1502 AA64FpDest_x[i] = destElem;
1503 }'''
1504 iop = InstObjParams(name, 'Sve' + Name,
1505 'SveUnaryPredOp' if predType != PredType.NONE
1506 else 'SveUnaryUnpredOp',
1507 {'code': code, 'op_class': opClass}, [])
1508 if predType != PredType.NONE:
1509 header_output += SveUnaryPredOpDeclare.subst(iop)
1510 else:
1511 header_output += SveUnaryUnpredOpDeclare.subst(iop)
1512 exec_output += SveOpExecute.subst(iop)
1513 for type in types:
1514 substDict = {'targs' : type,
1515 'class_name' : 'Sve' + Name}
1516 exec_output += SveOpExecDeclare.subst(substDict)
1517
1518 # Generates definitions for SVE floating-point conversions (always
1519 # unary, constructive, merging
1520 def sveCvtInst(name, Name, opClass, types, op, direction=CvtDir.Narrow,
1521 decoder='Generic'):
1522 global header_output, exec_output, decoders
1523 code = sveEnabledCheckCode + '''
1524 unsigned eCount = ArmStaticInst::getCurSveVecLen<%(bigElemType)s>(
1525 xc->tcBase());
1526 for (unsigned i = 0; i < eCount; i++) {
1527 SElement srcElem1 = AA64FpOp1_x%(bigElemSuffix)s[i] &
1528 mask(sizeof(SElement) * 8);
1529 DElement destElem = 0;
1530 if (GpOp_x%(bigElemSuffix)s[i]) {
1531 %(op)s
1532 AA64FpDest_x%(bigElemSuffix)s[i] = destElem;
1533 } else {
1534 AA64FpDest_x%(bigElemSuffix)s[i] =
1535 AA64FpDestMerge_x%(bigElemSuffix)s[i];
1536 }
1537 }
1538 ''' % {'op': op,
1539 'bigElemType': 'SElement' if direction == CvtDir.Narrow
1540 else 'DElement',
1541 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'}
1542 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp',
1543 {'code': code, 'op_class': opClass}, [])
1544 header_output += SveWideningUnaryPredOpDeclare.subst(iop)
1545 exec_output += SveWideningOpExecute.subst(iop)
1546 for type in types:
1547 substDict = {'targs' : type,
1548 'class_name' : 'Sve' + Name}
1549 exec_output += SveOpExecDeclare.subst(substDict)
1550
1551 # Generates definitions for associative SVE reductions
1552 def sveAssocReducInst(name, Name, opClass, types, op, identity,
1553 decoder='Generic'):
1554 global header_output, exec_output, decoders
1555 code = sveEnabledCheckCode + '''
1556 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1557 xc->tcBase());
1558 TheISA::VecRegContainer tmpVecC;
1559 auto auxOp1 = tmpVecC.as<Element>();
1560 for (unsigned i = 0; i < eCount; ++i) {
1561 auxOp1[i] = AA64FpOp1_x[i];
1562 }
1563 Element destElem = %(identity)s;
1564 for (unsigned i = 0; i < eCount; i++) {
1565 AA64FpDest_x[i] = 0; // zero upper part
1566 if (GpOp_x[i]) {
1567 const Element& srcElem1 = auxOp1[i];
1568 %(op)s
1569 }
1570 }
1571 AA64FpDest_x[0] = destElem;
1572 ''' % {'op': op, 'identity': identity}
1573 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1574 {'code': code, 'op_class': opClass}, [])
1575 header_output += SveReducOpDeclare.subst(iop)
1576 exec_output += SveOpExecute.subst(iop)
1577 for type in types:
1578 substDict = {'targs' : type,
1579 'class_name' : 'Sve' + Name}
1580 exec_output += SveOpExecDeclare.subst(substDict)
1581
1582 # Generates definitions for widening associative SVE reductions
1583 def sveWideningAssocReducInst(name, Name, opClass, types, op, identity,
1584 decoder='Generic'):
1585 global header_output, exec_output, decoders
1586 code = sveEnabledCheckCode + '''
1587 unsigned eCount = ArmStaticInst::getCurSveVecLen<SElement>(
1588 xc->tcBase());
1589 unsigned eWideCount = ArmStaticInst::getCurSveVecLen<DElement>(
1590 xc->tcBase());
1591 DElement destElem = %(identity)s;
1592 for (unsigned i = 0; i < eCount; i++) {
1593 if (GpOp_xs[i]) {
1594 DElement srcElem1 = AA64FpOp1_xs[i];
1595 %(op)s
1596 }
1597 }
1598 AA64FpDest_xd[0] = destElem;
1599 for (int i = 1; i < eWideCount; i++) {
1600 AA64FpDest_xd[i] = 0;
1601 }
1602 ''' % {'op': op, 'identity': identity}
1603 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1604 {'code': code, 'op_class': opClass}, [])
1605 header_output += SveWideningReducOpDeclare.subst(iop)
1606 exec_output += SveWideningOpExecute.subst(iop)
1607 for type in types:
1608 substDict = {'targs' : type,
1609 'class_name' : 'Sve' + Name}
1610 exec_output += SveOpExecDeclare.subst(substDict)
1611
1612 # Generates definitions for non-associative SVE reductions
1613 def sveNonAssocReducInst(name, Name, opClass, types, op, identity,
1614 decoder='Generic'):
1615 global header_output, exec_output, decoders
1616 code = sveEnabledCheckCode + '''
1617 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1618 xc->tcBase());
1619 TheISA::VecRegContainer tmpVecC;
1620 auto tmpVec = tmpVecC.as<Element>();
1621 int ePow2Count = 1;
1622 while (ePow2Count < eCount) {
1623 ePow2Count *= 2;
1624 }
1625
1626 for (unsigned i = 0; i < ePow2Count; i++) {
1627 if (i < eCount && GpOp_x[i]) {
1628 tmpVec[i] = AA64FpOp1_x[i];
1629 } else {
1630 tmpVec[i] = %(identity)s;
1631 }
1632 }
1633
1634 unsigned n = ePow2Count;
1635 while (n > 1) {
1636 unsigned max = n;
1637 n = 0;
1638 for (unsigned i = 0; i < max; i += 2) {
1639 Element srcElem1 = tmpVec[i];
1640 Element srcElem2 = tmpVec[i + 1];
1641 Element destElem = 0;
1642 %(op)s
1643 tmpVec[n] = destElem;
1644 n++;
1645 }
1646 }
1647 AA64FpDest_x[0] = tmpVec[0];
1648 for (unsigned i = 1; i < eCount; i++) {
1649 AA64FpDest_x[i] = 0; // zero upper part
1650 }
1651 ''' % {'op': op, 'identity': identity}
1652 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1653 {'code': code, 'op_class': opClass}, [])
1654 header_output += SveReducOpDeclare.subst(iop)
1655 exec_output += SveOpExecute.subst(iop)
1656 for type in types:
1657 substDict = {'targs' : type,
1658 'class_name' : 'Sve' + Name}
1659 exec_output += SveOpExecDeclare.subst(substDict)
1660
1661 # Generates definitions for binary SVE instructions with immediate operand
1662 def sveBinImmInst(name, Name, opClass, types, op, predType=PredType.NONE,
1663 decoder='Generic'):
1664 global header_output, exec_output, decoders
1665 code = sveEnabledCheckCode + '''
1666 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1667 xc->tcBase());
1668 for (unsigned i = 0; i < eCount; i++) {'''
1669 if predType != PredType.NONE:
1670 code += '''
1671 const Element& srcElem1 = %s;''' % (
1672 'AA64FpDestMerge_x[i]' if predType == PredType.MERGE else '0')
1673 else:
1674 code += '''
1675 const Element& srcElem1 = AA64FpOp1_x[i];'''
1676 code += '''
1677 Element srcElem2 = imm;
1678 Element destElem = 0;'''
1679 if predType != PredType.NONE:
1680 code += '''
1681 if (GpOp_x[i]) {
1682 %(op)s
1683 } else {
1684 destElem = %(dest_elem)s;
1685 }''' % {'op': op,
1686 'dest_elem': 'AA64FpDestMerge_x[i]'
1687 if predType == PredType.MERGE else '0'}
1688 else:
1689 code += '''
1690 %(op)s''' % {'op': op}
1691 code += '''
1692 AA64FpDest_x[i] = destElem;
1693 }'''
1694 iop = InstObjParams(name, 'Sve' + Name,
1695 'SveBinImmPredOp' if predType != PredType.NONE
1696 else 'SveBinImmUnpredConstrOp',
1697 {'code': code, 'op_class': opClass}, [])
1698 if predType != PredType.NONE:
1699 header_output += SveBinImmPredOpDeclare.subst(iop)
1700 else:
1701 header_output += SveBinImmUnpredOpDeclare.subst(iop)
1702 exec_output += SveOpExecute.subst(iop)
1703 for type in types:
1704 substDict = {'targs' : type,
1705 'class_name' : 'Sve' + Name}
1706 exec_output += SveOpExecDeclare.subst(substDict)
1707
1708 # Generates definitions for unary and binary SVE instructions with wide
1709 # immediate operand
1710 def sveWideImmInst(name, Name, opClass, types, op, predType=PredType.NONE,
1711 isUnary=False, decoder='Generic'):
1712 global header_output, exec_output, decoders
1713 code = sveEnabledCheckCode + '''
1714 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1715 xc->tcBase());
1716 for (unsigned i = 0; i < eCount; i++) {'''
1717 # TODO: handle unsigned-to-signed conversion properly...
1718 if isUnary:
1719 code += '''
1720 Element srcElem1 = imm;'''
1721 else:
1722 code += '''
1723 const Element& srcElem1 = AA64FpDestMerge_x[i];
1724 Element srcElem2 = imm;'''
1725 code += '''
1726 Element destElem = 0;'''
1727 if predType != PredType.NONE:
1728 code += '''
1729 if (GpOp_x[i]) {
1730 %(op)s
1731 } else {
1732 destElem = %(dest_elem)s;
1733 }''' % {'op': op,
1734 'dest_elem': 'AA64FpDestMerge_x[i]'
1735 if predType == PredType.MERGE else '0'}
1736 else:
1737 code += '''
1738 %(op)s''' % {'op': op}
1739 code += '''
1740 AA64FpDest_x[i] = destElem;
1741 }'''
1742 iop = InstObjParams(name, 'Sve' + Name,
1743 'Sve%sWideImm%sOp' % (
1744 'Unary' if isUnary else 'Bin',
1745 'Unpred' if predType == PredType.NONE else 'Pred'),
1746 {'code': code, 'op_class': opClass}, [])
1747 if predType == PredType.NONE:
1748 header_output += SveWideImmUnpredOpDeclare.subst(iop)
1749 else:
1750 header_output += SveWideImmPredOpDeclare.subst(iop)
1751 exec_output += SveOpExecute.subst(iop)
1752 for type in types:
1753 substDict = {'targs' : type,
1754 'class_name' : 'Sve' + Name}
1755 exec_output += SveOpExecDeclare.subst(substDict)
1756
1757 # Generates definitions for shift SVE instructions with wide elements
1758 def sveShiftByWideElemsInst(name, Name, opClass, types, op,
1759 predType=PredType.NONE, decoder='Generic'):
1760 global header_output, exec_output, decoders
1761 code = sveEnabledCheckCode + '''
1762 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1763 xc->tcBase());
1764 TheISA::VecRegContainer tmpVecC;
1765 auto auxOp2 = tmpVecC.as<Element>();
1766 for (unsigned i = 0; i < eCount; i++) {
1767 auxOp2[i] = AA64FpOp2_ud[i];
1768 }
1769 for (unsigned i = 0; i < eCount; i++) {'''
1770 if predType != PredType.NONE:
1771 code += '''
1772 const Element& srcElem1 = AA64FpDestMerge_x[i];'''
1773 else:
1774 code += '''
1775 const Element& srcElem1 = AA64FpOp1_x[i];'''
1776 code += '''
1777 const auto& srcElem2 = auxOp2[
1778 (i * sizeof(Element) * 8) / 64];
1779 Element destElem = 0;'''
1780 if predType != PredType.NONE:
1781 code += '''
1782 if (GpOp_x[i]) {
1783 %(op)s
1784 } else {
1785 destElem = %(dest_elem)s;
1786 }''' % {'op': op,
1787 'dest_elem': 'AA64FpDestMerge_x[i]'
1788 if predType == PredType.MERGE else '0'}
1789 else:
1790 code += '''
1791 %(op)s''' % {'op': op}
1792 code += '''
1793 AA64FpDest_x[i] = destElem;
1794 }'''
1795 iop = InstObjParams(name, 'Sve' + Name,
1796 'SveBinDestrPredOp' if predType != PredType.NONE
1797 else 'SveBinUnpredOp',
1798 {'code': code, 'op_class': opClass}, [])
1799 if predType != PredType.NONE:
1800 header_output += SveBinDestrPredOpDeclare.subst(iop)
1801 else:
1802 header_output += SveBinUnpredOpDeclare.subst(iop)
1803 exec_output += SveOpExecute.subst(iop)
1804 for type in types:
1805 substDict = {'targs' : type,
1806 'class_name' : 'Sve' + Name}
1807 exec_output += SveOpExecDeclare.subst(substDict)
1808
1809 # Generates definitions for binary indexed SVE instructions
1810 # (always unpredicated)
1811 def sveBinIdxInst(name, Name, opClass, types, op, decoder='Generic'):
1812 global header_output, exec_output, decoders
1813 code = sveEnabledCheckCode + '''
1814 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1815 xc->tcBase());
1816
1817 // Number of elements in a 128 bit segment
1818 constexpr unsigned ePerSegment = 128 / sizeof(Element);
1819
1820 '''
1821
1822 code += '''
1823 for (unsigned i = 0; i < eCount; i++) {
1824 const auto segmentBase = i - i % ePerSegment;
1825 const auto segmentIdx = segmentBase + index;
1826
1827 const Element& srcElem1 = AA64FpOp1_x[i];
1828 const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
1829 Element destElem = 0;
1830
1831 '''
1832
1833 code += '''
1834 %(op)s
1835 AA64FpDest_x[i] = destElem;
1836 }
1837 ''' % {'op': op}
1838
1839 baseClass = 'SveBinIdxUnpredOp'
1840
1841 iop = InstObjParams(name, 'Sve' + Name, baseClass,
1842 {'code': code, 'op_class': opClass}, [])
1843 header_output += SveBinIdxUnpredOpDeclare.subst(iop)
1844 exec_output += SveOpExecute.subst(iop)
1845 for type in types:
1846 substDict = {'targs' : type,
1847 'class_name' : 'Sve' + Name}
1848 exec_output += SveOpExecDeclare.subst(substDict)
1849
1850 # Generates definitions for binary SVE instructions
1851 def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE,
1852 isDestructive=False, customIterCode=None,
1853 decoder='Generic'):
1854 assert not (predType in (PredType.NONE, PredType.SELECT) and
1855 isDestructive)
1856 global header_output, exec_output, decoders
1857 code = sveEnabledCheckCode + '''
1858 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1859 xc->tcBase());'''
1860 if customIterCode is None:
1861 code += '''
1862 for (unsigned i = 0; i < eCount; i++) {'''
1863 if predType == PredType.MERGE:
1864 code += '''
1865 const Element& srcElem1 = AA64FpDestMerge_x[i];'''
1866 else:
1867 code += '''
1868 const Element& srcElem1 = AA64FpOp1_x[i];'''
1869 code += '''
1870 const Element& srcElem2 = AA64FpOp2_x[i];
1871 Element destElem = 0;'''
1872 if predType != PredType.NONE:
1873 code += '''
1874 if (GpOp_x[i]) {
1875 %(op)s
1876 } else {
1877 destElem = %(dest_elem)s;
1878 }''' % {'op': op,
1879 'dest_elem':
1880 'AA64FpDestMerge_x[i]' if predType == PredType.MERGE
1881 else '0' if predType == PredType.ZERO
1882 else 'srcElem2'}
1883 else:
1884 code += '''
1885 %(op)s''' % {'op': op}
1886 code += '''
1887 AA64FpDest_x[i] = destElem;
1888 }'''
1889 else:
1890 code += customIterCode
1891 if predType == PredType.NONE:
1892 baseClass = 'SveBinUnpredOp'
1893 elif isDestructive:
1894 baseClass = 'SveBinDestrPredOp'
1895 else:
1896 baseClass = 'SveBinConstrPredOp'
1897 iop = InstObjParams(name, 'Sve' + Name, baseClass,
1898 {'code': code, 'op_class': opClass}, [])
1899 if predType == PredType.NONE:
1900 header_output += SveBinUnpredOpDeclare.subst(iop)
1901 elif isDestructive:
1902 header_output += SveBinDestrPredOpDeclare.subst(iop)
1903 else:
1904 header_output += SveBinConstrPredOpDeclare.subst(iop)
1905 exec_output += SveOpExecute.subst(iop)
1906 for type in types:
1907 substDict = {'targs' : type,
1908 'class_name' : 'Sve' + Name}
1909 exec_output += SveOpExecDeclare.subst(substDict)
1910
1911 # Generates definitions for predicate logical instructions
1912 def svePredLogicalInst(name, Name, opClass, types, op,
1913 predType=PredType.ZERO, isFlagSetting=False,
1914 decoder='Generic'):
1915 global header_output, exec_output, decoders
1916 assert predType in (PredType.ZERO, PredType.SELECT)
1917 code = sveEnabledCheckCode + '''
1918 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1919 xc->tcBase());
1920 TheISA::VecPredRegContainer tmpPredC;
1921 auto auxGpOp = tmpPredC.as<Element>();
1922 for (unsigned i = 0; i < eCount; i++) {
1923 auxGpOp[i] = GpOp_x[i];
1924 }
1925 for (unsigned i = 0; i < eCount; i++) {
1926 bool srcElem1 = POp1_x[i];
1927 bool srcElem2 = POp2_x[i];
1928 bool destElem = false;
1929 if (auxGpOp[i]) {
1930 %(op)s
1931 } else {
1932 destElem = %(dest_elem)s;
1933 }
1934 PDest_x[i] = destElem;
1935 }''' % {'op': op,
1936 'dest_elem': 'false' if predType == PredType.ZERO
1937 else 'srcElem2'}
1938 extraPrologCode = ''
1939 if isFlagSetting:
1940 code += '''
1941 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
1942 destPred.noneActive(auxGpOp, eCount);
1943 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
1944 CondCodesV = 0;'''
1945 extraPrologCode += '''
1946 auto& destPred = PDest;'''
1947 iop = InstObjParams(name, 'Sve' + Name, 'SvePredLogicalOp',
1948 {'code': code, 'op_class': opClass}, [])
1949 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
1950 header_output += SvePredLogicalOpDeclare.subst(iop)
1951 exec_output += SveOpExecute.subst(iop)
1952 for type in types:
1953 substDict = {'targs' : type,
1954 'class_name' : 'Sve' + Name}
1955 exec_output += SveOpExecDeclare.subst(substDict)
1956
1957 # Generates definitions for predicate permute instructions
1958 def svePredBinPermInst(name, Name, opClass, types, iterCode,
1959 decoder='Generic'):
1960 global header_output, exec_output, decoders
1961 code = sveEnabledCheckCode + '''
1962 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1963 xc->tcBase());'''
1964 code += iterCode
1965 iop = InstObjParams(name, 'Sve' + Name, 'SvePredBinPermOp',
1966 {'code': code, 'op_class': opClass}, [])
1967 header_output += SveBinUnpredOpDeclare.subst(iop)
1968 exec_output += SveOpExecute.subst(iop)
1969 for type in types:
1970 substDict = {'targs' : type,
1971 'class_name' : 'Sve' + Name}
1972 exec_output += SveOpExecDeclare.subst(substDict)
1973
1974 # Generates definitions for SVE compare instructions
1975 # NOTE: compares are all predicated zeroing
1976 def sveCmpInst(name, Name, opClass, types, op, isImm=False,
1977 decoder='Generic'):
1978 global header_output, exec_output, decoders
1979 extraPrologCode = '''
1980 auto& destPred = PDest;'''
1981 code = sveEnabledCheckCode + '''
1982 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1983 xc->tcBase());
1984 TheISA::VecPredRegContainer tmpPredC;
1985 auto tmpPred = tmpPredC.as<Element>();
1986 for (unsigned i = 0; i < eCount; ++i)
1987 tmpPred[i] = GpOp_x[i];
1988 destPred.reset();
1989 for (unsigned i = 0; i < eCount; i++) {
1990 const Element& srcElem1 = AA64FpOp1_x[i];
1991 %(src_elem_2_ty)s srcElem2 __attribute__((unused)) =
1992 %(src_elem_2)s;
1993 bool destElem = false;
1994 if (tmpPred[i]) {
1995 %(op)s
1996 } else {
1997 destElem = false;
1998 }
1999 PDest_x[i] = destElem;
2000 }''' % {'op': op,
2001 'src_elem_2_ty': 'Element' if isImm else 'const Element&',
2002 'src_elem_2': 'imm' if isImm else 'AA64FpOp2_x[i]'}
2003 iop = InstObjParams(name, 'Sve' + Name,
2004 'SveCmpImmOp' if isImm else 'SveCmpOp',
2005 {'code': code, 'op_class': opClass}, [])
2006 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2007 if isImm:
2008 header_output += SveCmpImmOpDeclare.subst(iop)
2009 else:
2010 header_output += SveCmpOpDeclare.subst(iop)
2011 exec_output += SveOpExecute.subst(iop)
2012 for type in types:
2013 substDict = {'targs' : type,
2014 'class_name' : 'Sve' + Name}
2015 exec_output += SveOpExecDeclare.subst(substDict)
2016
2017 # Generates definitions for ternary SVE intructions (always predicated -
2018 # merging)
2019 def sveTerInst(name, Name, opClass, types, op, decoder='Generic'):
2020 global header_output, exec_output, decoders
2021 code = sveEnabledCheckCode + '''
2022 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2023 xc->tcBase());
2024 for (unsigned i = 0; i < eCount; i++) {
2025 const Element& srcElem1 = AA64FpOp1_x[i];
2026 const Element& srcElem2 = AA64FpOp2_x[i];
2027 Element destElem = AA64FpDestMerge_x[i];
2028 if (GpOp_x[i]) {
2029 %(op)s
2030 }
2031 AA64FpDest_x[i] = destElem;
2032 }''' % {'op': op}
2033 iop = InstObjParams(name, 'Sve' + Name, 'SveTerPredOp',
2034 {'code': code, 'op_class': opClass}, [])
2035 header_output += SveTerPredOpDeclare.subst(iop)
2036 exec_output += SveOpExecute.subst(iop)
2037 for type in types:
2038 substDict = {'targs' : type,
2039 'class_name' : 'Sve' + Name}
2040 exec_output += SveOpExecDeclare.subst(substDict)
2041
2042 # Generates definitions for ternary SVE instructions with indexed operand
2043 def sveTerIdxInst(name, Name, opClass, types, op, decoder='Generic'):
2044 global header_output, exec_output, decoders
2045 code = sveEnabledCheckCode + '''
2046 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2047 xc->tcBase());
2048
2049 // Number of elements in a 128 bit segment
2050 constexpr unsigned ePerSegment = 128 / sizeof(Element);
2051
2052 for (unsigned i = 0; i < eCount; i++) {
2053 const auto segmentBase = i - i % ePerSegment;
2054 const auto segmentIdx = segmentBase + index;
2055
2056 const Element& srcElem1 = AA64FpOp1_x[i];
2057 const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
2058 Element destElem = AA64FpDestMerge_x[i];
2059 '''
2060
2061 code += '''
2062 %(op)s
2063 AA64FpDest_x[i] = destElem;
2064 }''' % {'op': op}
2065
2066 iop = InstObjParams(name, 'Sve' + Name, 'SveBinIdxUnpredOp',
2067 {'code': code, 'op_class': opClass}, [])
2068 header_output += SveBinIdxUnpredOpDeclare.subst(iop)
2069 exec_output += SveOpExecute.subst(iop)
2070 for type in types:
2071 substDict = {'targs' : type,
2072 'class_name' : 'Sve' + Name}
2073 exec_output += SveOpExecDeclare.subst(substDict)
2074
2075 # Generates definitions for ternary SVE intructions with immediate operand
2076 # (always unpredicated)
2077 def sveTerImmInst(name, Name, opClass, types, op, decoder='Generic'):
2078 global header_output, exec_output, decoders
2079 code = sveEnabledCheckCode + '''
2080 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2081 xc->tcBase());
2082 for (unsigned i = 0; i < eCount; i++) {
2083 const Element& srcElem2 = AA64FpOp2_x[i];
2084 Element srcElem3 = imm;
2085 Element destElem = AA64FpDestMerge_x[i];
2086 %(op)s
2087 AA64FpDest_x[i] = destElem;
2088 }''' % {'op': op}
2089 iop = InstObjParams(name, 'Sve' + Name, 'SveTerImmUnpredOp',
2090 {'code': code, 'op_class': opClass}, [])
2091 header_output += SveTerImmUnpredOpDeclare.subst(iop)
2092 exec_output += SveOpExecute.subst(iop)
2093 for type in types:
2094 substDict = {'targs' : type,
2095 'class_name' : 'Sve' + Name}
2096 exec_output += SveOpExecDeclare.subst(substDict)
2097
2098 # Generates definitions for PTRUE and PTRUES instructions.
2099 def svePtrueInst(name, Name, opClass, types, isFlagSetting=False,
2100 decoder='Generic'):
2101 global header_output, exec_output, decoders
2102 extraPrologCode = '''
2103 auto& destPred = PDest;'''
2104 code = sveEnabledCheckCode + '''
2105 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2106 xc->tcBase());
2107 unsigned predCount = sveDecodePredCount(imm, eCount);
2108 destPred.reset();
2109 for (unsigned i = 0; i < eCount; i++) {
2110 PDest_x[i] = (i < predCount);
2111 }'''
2112 if isFlagSetting:
2113 code += '''
2114 CondCodesNZ = (destPred.firstActive(destPred, eCount) << 1) |
2115 destPred.noneActive(destPred, eCount);
2116 CondCodesC = !destPred.lastActive(destPred, eCount);
2117 CondCodesV = 0;'''
2118 iop = InstObjParams(name, 'Sve' + Name, 'SvePtrueOp',
2119 {'code': code, 'op_class': opClass}, [])
2120 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2121 header_output += SvePtrueOpDeclare.subst(iop)
2122 exec_output += SveOpExecute.subst(iop)
2123 for type in types:
2124 substDict = {'targs' : type,
2125 'class_name' : 'Sve' + Name}
2126 exec_output += SveOpExecDeclare.subst(substDict)
2127
2128 # Generate definitions for integer CMP<cc> instructions
2129 def sveIntCmpInst(name, Name, opClass, types, op, wideop = False,
2130 decoder = 'Generic'):
2131 global header_output, exec_output, decoders
2132 signed = 'int8_t' in types
2133 srcType = 'Element'
2134 op2Suffix = 'x'
2135 if wideop:
2136 srcType = 'int64_t' if signed else 'uint64_t'
2137 op2Suffix = 'sd' if signed else 'ud'
2138 extraPrologCode = '''
2139 auto& destPred = PDest;'''
2140 code = sveEnabledCheckCode + '''
2141 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2142 xc->tcBase());
2143 TheISA::VecPredRegContainer tmpPredC;
2144 auto tmpPred = tmpPredC.as<Element>();
2145 for (unsigned i = 0; i < eCount; ++i)
2146 tmpPred[i] = GpOp_x[i];
2147 destPred.reset();
2148 for (unsigned i = 0; i < eCount; ++i) {
2149 %(srcType)s srcElem1 = (%(srcType)s) AA64FpOp1_x[i];
2150 %(srcType)s srcElem2 = AA64FpOp2_%(op2Suffix)s[%(op2Index)s];
2151 bool destElem = false;
2152 if (tmpPred[i]) {
2153 %(op)s
2154 }
2155 PDest_x[i] = destElem;
2156 }
2157 CondCodesNZ = (destPred.firstActive(tmpPred, eCount) << 1) |
2158 destPred.noneActive(tmpPred, eCount);
2159 CondCodesC = !destPred.lastActive(tmpPred, eCount);
2160 CondCodesV = 0;''' % {
2161 'op': op,
2162 'srcType': srcType,
2163 'op2Suffix': op2Suffix,
2164 'op2Index': '(i * sizeof(Element)) / 8' if wideop else 'i'
2165 }
2166 iop = InstObjParams(name, 'Sve' + Name, 'SveIntCmpOp',
2167 {
2168 'code': code,
2169 'op_class': opClass,
2170 'op2IsWide': 'true' if wideop else 'false',
2171 }, [])
2172 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2173 header_output += SveIntCmpOpDeclare.subst(iop)
2174 exec_output += SveOpExecute.subst(iop)
2175 for type in types:
2176 substDict = {'targs' : type,
2177 'class_name' : 'Sve' + Name}
2178 exec_output += SveOpExecDeclare.subst(substDict)
2179
2180 # Generate definitions for integer CMP<cc> instructions (with immediate)
2181 def sveIntCmpImmInst(name, Name, opClass, types, op, decoder = 'Generic'):
2182 global header_output, exec_output, decoders
2183 extraPrologCode = '''
2184 auto& destPred = PDest;'''
2185 code = sveEnabledCheckCode + '''
2186 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2187 xc->tcBase());
2188 TheISA::VecPredRegContainer tmpPredC;
2189 auto tmpPred = tmpPredC.as<Element>();
2190 for (unsigned i = 0; i < eCount; ++i)
2191 tmpPred[i] = GpOp_x[i];
2192 destPred.reset();
2193 for (unsigned i = 0; i < eCount; ++i) {
2194 Element srcElem1 = AA64FpOp1_x[i];
2195 Element srcElem2 = static_cast<Element>(imm);
2196 bool destElem = false;
2197 if (tmpPred[i]) {
2198 %(op)s
2199 }
2200 PDest_x[i] = destElem;
2201 }
2202 CondCodesNZ = (destPred.firstActive(tmpPred, eCount) << 1) |
2203 destPred.noneActive(tmpPred, eCount);
2204 CondCodesC = !destPred.lastActive(tmpPred, eCount);
2205 CondCodesV = 0;'''%{'op': op}
2206 iop = InstObjParams(name, 'Sve' + Name, 'SveIntCmpImmOp',
2207 {'code': code, 'op_class': opClass,}, [])
2208 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2209 header_output += SveIntCmpImmOpDeclare.subst(iop)
2210 exec_output += SveOpExecute.subst(iop)
2211 for type in types:
2212 substDict = {'targs' : type,
2213 'class_name' : 'Sve' + Name}
2214 exec_output += SveOpExecDeclare.subst(substDict)
2215
2216 # Generate definitions for SVE element count instructions
2217 def sveElemCountInst(name, Name, opClass, types, op,
2218 destType = DestType.Scalar, dstIs32b = False,
2219 dstAcc = True, decoder = 'Generic'):
2220 global header_output, exec_output, decoders
2221 code = sveEnabledCheckCode + '''
2222 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2223 xc->tcBase());
2224 unsigned count = sveDecodePredCount(pattern, eCount);
2225 '''
2226 if destType == DestType.Vector:
2227 code += '''
2228 for (unsigned i = 0; i < eCount; ++i) {
2229 Element srcElem1 = AA64FpDestMerge_x[i];
2230 Element destElem = 0;
2231 %(op)s
2232 AA64FpDest_x[i] = destElem;
2233 }'''%{'op': op}
2234 else:
2235 if 'uint16_t' in types:
2236 if dstIs32b:
2237 dstType = 'uint32_t'
2238 else:
2239 dstType = 'uint64_t'
2240 else:
2241 if dstIs32b:
2242 dstType = 'int32_t'
2243 else:
2244 dstType = 'int64_t'
2245 if dstAcc:
2246 code += '''
2247 %(dstType)s srcElem1 = XDest;
2248 '''%{'dstType': dstType}
2249 code += '''
2250 %(dstType)s destElem = 0;
2251 %(op)s;
2252 XDest = destElem;
2253 '''%{'op': op, 'dstType': dstType}
2254 iop = InstObjParams(name, 'Sve' + Name, 'SveElemCountOp',
2255 {'code': code, 'op_class': opClass, 'dstIsVec': destType,
2256 'dstIs32b': 'true' if dstIs32b else 'false'}, [])
2257 header_output += SveElemCountOpDeclare.subst(iop)
2258 exec_output += SveOpExecute.subst(iop)
2259 for type in types:
2260 substDict = {'targs' : type,
2261 'class_name' : 'Sve' + Name}
2262 exec_output += SveOpExecDeclare.subst(substDict);
2263
2264 def svePartBrkInst(name, Name, opClass, isFlagSetting, predType, whenBrk,
2265 decoder = 'Generic'):
2266 global header_output, exec_output, decoders
2267 code = sveEnabledCheckCode + '''
2268 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2269 xc->tcBase());
2270 bool dobreak = false;
2271 TheISA::VecPredRegContainer tmpPredC;
2272 auto auxGpOp = tmpPredC.as<uint8_t>();
2273 for (unsigned i = 0; i < eCount; ++i) {
2274 auxGpOp[i] = GpOp_ub[i];
2275 }
2276 for (unsigned i = 0; i < eCount; ++i) {
2277 bool element = POp1_ub[i] == 1;
2278 if (auxGpOp[i]) {'''
2279 breakCode = '''
2280 dobreak = dobreak || element;'''
2281 if whenBrk == Break.Before:
2282 code += breakCode
2283 code += '''
2284 PDest_ub[i] = !dobreak;'''
2285 if whenBrk == Break.After:
2286 code += breakCode
2287 code += '''
2288 }'''
2289 if predType == PredType.ZERO:
2290 code += ''' else {
2291 PDest_ub[i] = 0;
2292 }'''
2293 elif predType == PredType.MERGE:
2294 code += ''' else {
2295 PDest_ub[i] = PDestMerge_ub[i];
2296 }'''
2297 code += '''
2298 }'''
2299 extraPrologCode = ''
2300 if isFlagSetting:
2301 code += '''
2302 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2303 destPred.noneActive(auxGpOp, eCount);
2304 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2305 CondCodesV = 0;'''
2306 extraPrologCode += '''
2307 auto& destPred = PDest;'''
2308 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkOp',
2309 {'code': code, 'op_class': opClass,
2310 'isMerging': 'true' if predType == PredType.MERGE
2311 else 'false'}, [])
2312 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2313 header_output += SvePartBrkOpDeclare.subst(iop)
2314 exec_output += SveNonTemplatedOpExecute.subst(iop)
2315
2316 def svePartBrkPropPrevInst(name, Name, opClass, isFlagSetting, whenBrk,
2317 decoder = 'Generic'):
2318 global header_output, exec_output, decoders
2319 code = sveEnabledCheckCode + '''
2320 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2321 xc->tcBase());
2322 bool last = POp1_ub.lastActive(GpOp_ub, eCount);
2323 TheISA::VecPredRegContainer tmpPredC;
2324 auto auxGpOp = tmpPredC.as<uint8_t>();
2325 for (unsigned i = 0; i < eCount; ++i) {
2326 auxGpOp[i] = GpOp_ub[i];
2327 }
2328 for (unsigned i = 0; i < eCount; ++i) {
2329 if (auxGpOp[i]) {'''
2330 breakCode = '''
2331 last = last && (POp2_ub[i] == 0);'''
2332 if whenBrk == Break.Before:
2333 code += breakCode
2334 code += '''
2335 PDest_ub[i] = last;'''
2336 if whenBrk == Break.After:
2337 code += breakCode
2338 code += '''
2339 } else {
2340 PDest_ub[i] = 0;
2341 }
2342 }'''
2343 extraPrologCode = ''
2344 if isFlagSetting:
2345 code += '''
2346 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2347 destPred.noneActive(auxGpOp, eCount);
2348 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2349 CondCodesV = 0;'''
2350 extraPrologCode += '''
2351 auto& destPred = PDest;'''
2352 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkPropOp',
2353 {'code': code, 'op_class': opClass}, [])
2354 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2355 header_output += SvePartBrkPropOpDeclare.subst(iop)
2356 exec_output += SveNonTemplatedOpExecute.subst(iop)
2357
2358 def svePartBrkPropNextInst(name, Name, opClass, isFlagSetting,
2359 decoder = 'Generic'):
2360 global header_output, exec_output, decoders
2361 code = sveEnabledCheckCode + '''
2362 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2363 xc->tcBase());
2364 bool last = POp1_ub.lastActive(GpOp_ub, eCount);
2365 for (unsigned i = 0; i < eCount; i++) {
2366 if (!last) {
2367 PDest_ub[i] = 0;
2368 } else {
2369 PDest_ub[i] = PDestMerge_ub[i];
2370 }
2371 }'''
2372 extraPrologCode = ''
2373 if isFlagSetting:
2374 code += '''
2375 VecPredRegT<uint8_t, MaxSveVecLenInBytes, false, false>::Container c;
2376 VecPredRegT<uint8_t, MaxSveVecLenInBytes, false, false> predOnes(c);
2377 for (unsigned i = 0; i < eCount; i++) {
2378 predOnes[i] = 1;
2379 }
2380 CondCodesNZ = (destPred.firstActive(predOnes, eCount) << 1) |
2381 destPred.noneActive(predOnes, eCount);
2382 CondCodesC = !destPred.lastActive(predOnes, eCount);
2383 CondCodesV = 0;'''
2384 extraPrologCode += '''
2385 auto& destPred = PDest;'''
2386 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkPropOp',
2387 {'code': code, 'op_class': opClass}, [])
2388 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2389 header_output += SvePartBrkPropOpDeclare.subst(iop)
2390 exec_output += SveNonTemplatedOpExecute.subst(iop)
2391
2392 # Generate definitions for scalar select instructions
2393 def sveSelectInst(name, Name, opClass, types, op, isCond,
2394 destType = DstRegType.Scalar, decoder = 'Generic'):
2395 global header_output, exec_output, decoders
2396 code = sveEnabledCheckCode + '''
2397 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2398 xc->tcBase());
2399 int last;
2400 for (last = eCount - 1; last >= 0; --last) {
2401 if (GpOp_x[last]) {
2402 break;
2403 }
2404 }
2405 '''
2406 if isCond:
2407 code += '''
2408 if (last >= 0) {'''
2409 code += '''
2410 Element destElem;
2411 %(op)s'''%{'op': op}
2412 if destType == DstRegType.Vector:
2413 code += '''
2414 for (unsigned i = 0; i < eCount; ++i)
2415 AA64FpDest_x[i] = destElem;'''
2416 elif destType == DstRegType.Scalar:
2417 code += '''
2418 XDest = destElem;'''
2419 elif destType == DstRegType.SimdFpScalar:
2420 code += '''
2421 AA64FpDest_x[0] = destElem;'''
2422 if isCond:
2423 code += '''
2424 }'''
2425 if destType == DstRegType.Scalar:
2426 code += ''' else {
2427 XDest = (Element) XDest;
2428 }'''
2429 elif destType == DstRegType.Vector:
2430 code += ''' else {
2431 for (unsigned i = 0; i < eCount; ++i)
2432 AA64FpDest_x[i] = AA64FpDestMerge_x[i];
2433 }'''
2434 elif destType == DstRegType.SimdFpScalar:
2435 code += ''' else {
2436 AA64FpDest_x[0] = AA64FpDestMerge_x[0];
2437 }'''
2438 iop = InstObjParams(name, 'Sve' + Name, 'SveSelectOp',
2439 {'code': code, 'op_class': opClass,
2440 'isCond': 'true' if isCond else 'false',
2441 'isScalar': 'true'
2442 if destType == DstRegType.Scalar else 'false',
2443 'isSimdFp': 'true'
2444 if destType == DstRegType.SimdFpScalar
2445 else 'false'},
2446 [])
2447 header_output += SveSelectOpDeclare.subst(iop)
2448 exec_output += SveOpExecute.subst(iop)
2449 for type in types:
2450 substDict = {'targs' : type,
2451 'class_name' : 'Sve' + Name}
2452 exec_output += SveOpExecDeclare.subst(substDict)
2453
2454 # Generate definitions for PNEXT (find next active predicate)
2455 # instructions
2456 def svePNextInst(name, Name, opClass, types, decoder = 'Generic'):
2457 global header_output, exec_output, decoders
2458 code = sveEnabledCheckCode + '''
2459 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2460 xc->tcBase());
2461 TheISA::VecPredRegContainer tmpPredC;
2462 auto auxGpOp = tmpPredC.as<Element>();
2463 for (unsigned i = 0; i < eCount; ++i) {
2464 auxGpOp[i] = GpOp_x[i];
2465 }
2466 int last;
2467 for (last = eCount - 1; last >= 0; --last) {
2468 if (POp1_x[last]) {
2469 break;
2470 }
2471 }
2472 int next = last + 1;
2473 while (next < eCount && GpOp_x[next] == 0) {
2474 next++;
2475 }
2476 destPred.reset();
2477 if (next < eCount) {
2478 PDest_x[next] = 1;
2479 }
2480 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2481 destPred.noneActive(auxGpOp, eCount);
2482 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2483 CondCodesV = 0;'''
2484 extraPrologCode = '''
2485 auto& destPred = PDest;'''
2486 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredPredOp',
2487 {'code': code, 'op_class': opClass}, [])
2488 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2489 header_output += SveUnaryPredOpDeclare.subst(iop)
2490 exec_output += SveOpExecute.subst(iop)
2491 for type in types:
2492 substDict = {'targs' : type,
2493 'class_name' : 'Sve' + Name}
2494 exec_output += SveOpExecDeclare.subst(substDict)
2495
2496 # Generate definitions for PFIRST (set first active predicate)
2497 # instructions
2498 def svePFirstInst(name, Name, opClass, decoder = 'Generic'):
2499 global header_output, exec_output, decoders
2500 code = sveEnabledCheckCode + '''
2501 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2502 xc->tcBase());
2503 TheISA::VecPredRegContainer tmpPredC;
2504 auto auxGpOp = tmpPredC.as<Element>();
2505 for (unsigned i = 0; i < eCount; ++i)
2506 auxGpOp[i] = GpOp_x[i];
2507 int first = -1;
2508 for (int i = 0; i < eCount; ++i) {
2509 if (auxGpOp[i] && first == -1) {
2510 first = i;
2511 }
2512 }
2513 for (int i = 0; i < eCount; ++i) {
2514 PDest_x[i] = PDestMerge_x[i];
2515 }
2516 if (first >= 0) {
2517 PDest_x[first] = 1;
2518 }
2519 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2520 destPred.noneActive(auxGpOp, eCount);
2521 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2522 CondCodesV = 0;'''
2523 extraPrologCode = '''
2524 auto& destPred = PDest;'''
2525 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredPredOp',
2526 {'code': code, 'op_class': opClass}, [])
2527 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2528 header_output += SveUnaryPredOpDeclare.subst(iop)
2529 exec_output += SveOpExecute.subst(iop)
2530 substDict = {'targs' : 'uint8_t',
2531 'class_name' : 'Sve' + Name}
2532 exec_output += SveOpExecDeclare.subst(substDict)
2533
2534 # Generate definitions for SVE TBL instructions
2535 def sveTblInst(name, Name, opClass, decoder = 'Generic'):
2536 global header_output, exec_output, decoders
2537 code = sveEnabledCheckCode + '''
2538 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2539 xc->tcBase());
2540 for (int i = 0; i < eCount; ++i) {
2541 Element idx = AA64FpOp2_x[i];
2542 Element val;
2543 if (idx < eCount) {
2544 val = AA64FpOp1_x[idx];
2545 } else {
2546 val = 0;
2547 }
2548 AA64FpDest_x[i] = val;
2549 }'''
2550 iop = InstObjParams(name, 'Sve' + Name, 'SveTblOp',
2551 {'code': code, 'op_class': opClass}, [])
2552 header_output += SveBinUnpredOpDeclare.subst(iop)
2553 exec_output += SveOpExecute.subst(iop)
2554 for type in unsignedTypes:
2555 substDict = {'targs' : type,
2556 'class_name' : 'Sve' + Name}
2557 exec_output += SveOpExecDeclare.subst(substDict)
2558
2559 # Generate definitions for SVE Unpack instructions
2560 def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf,
2561 regType, decoder = 'Generic'):
2562 global header_output, exec_output, decoders
2563 extraPrologCode = '''
2564 auto& destPred = PDest;'''
2565 code = sveEnabledCheckCode + '''
2566 unsigned eCount = ArmStaticInst::getCurSveVecLen<DElement>(
2567 xc->tcBase());'''
2568 if unpackHalf == Unpack.Low:
2569 if regType == SrcRegType.Predicate:
2570 code += '''
2571 TheISA::VecPredRegContainer tmpPredC;
2572 auto auxPOp1 = tmpPredC.as<SElement>();
2573 for (int i = 0; i < eCount; ++i) {
2574 auxPOp1[i] = POp1_xs[i];
2575 }'''
2576 else:
2577 code += '''
2578 TheISA::VecRegContainer tmpVecC;
2579 auto auxOp1 = tmpVecC.as<SElement>();
2580 for (int i = 0; i < eCount; ++i) {
2581 auxOp1[i] = AA64FpOp1_xs[i];
2582 }'''
2583 code += '''
2584 for (int i = 0; i < eCount; ++i) {'''
2585 if regType == SrcRegType.Predicate:
2586 if unpackHalf == Unpack.High:
2587 code +='''
2588 const SElement& srcElem1 = POp1_xs[i + eCount];'''
2589 else:
2590 code +='''
2591 const SElement& srcElem1 = auxPOp1[i];'''
2592 code += '''
2593 destPred.set_raw(i, 0);
2594 PDest_xd[i] = srcElem1;'''
2595 else:
2596 if unpackHalf == Unpack.High:
2597 code +='''
2598 const SElement& srcElem1 = AA64FpOp1_xs[i + eCount];'''
2599 else:
2600 code +='''
2601 const SElement& srcElem1 = auxOp1[i];'''
2602 code += '''
2603 AA64FpDest_xd[i] = static_cast<DElement>(srcElem1);'''
2604 code += '''
2605 }
2606 '''
2607 iop = InstObjParams(name, 'Sve' + Name, 'SveUnpackOp',
2608 {'code': code, 'op_class': opClass}, [])
2609 if regType == SrcRegType.Predicate:
2610 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2611 header_output += SveUnpackOpDeclare.subst(iop)
2612 exec_output += SveWideningOpExecute.subst(iop)
2613 for srcType, dstType in sdtypes:
2614 substDict = {'targs': srcType + ', ' + dstType,
2615 'class_name': 'Sve' + Name}
2616 exec_output += SveOpExecDeclare.subst(substDict)
2617
2618 # Generate definition for SVE predicate test instructions
2619 def svePredTestInst(name, Name, opClass, decoder = 'Generic'):
2620 global header_output, exec_output, decoders
2621 code = sveEnabledCheckCode + '''
2622 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2623 xc->tcBase());
2624 CondCodesNZ = (POp1_ub.firstActive(GpOp_ub, eCount) << 1) |
2625 POp1_ub.noneActive(GpOp_ub, eCount);
2626 CondCodesC = !POp1_ub.lastActive(GpOp_ub, eCount);
2627 CondCodesV = 0;'''
2628 iop = InstObjParams(name, 'Sve' + Name, 'SvePredTestOp',
2629 {'code': code, 'op_class': opClass}, [])
2630 header_output += SvePredicateTestOpDeclare.subst(iop)
2631 exec_output += SveNonTemplatedOpExecute.subst(iop)
2632
2633 # Generate definition for SVE predicate compact operations
2634 def sveCompactInst(name, Name, opClass, types, decoder = 'Generic'):
2635 global header_output, exec_output, decoders
2636 code = sveEnabledCheckCode + '''
2637 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2638 xc->tcBase());
2639 TheISA::VecRegContainer tmpVecC;
2640 auto auxOp1 = tmpVecC.as<Element>();
2641 for (unsigned i = 0; i < eCount; ++i) {
2642 auxOp1[i] = AA64FpOp1_x[i];
2643 }
2644 unsigned x = 0;
2645 for (unsigned i = 0; i < eCount; ++i) {
2646 AA64FpDest_x[i] = 0;
2647 if (GpOp_x[i]) {
2648 AA64FpDest_x[x] = auxOp1[i];
2649 x++;
2650 }
2651 }'''
2652 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp',
2653 {'code': code, 'op_class': opClass}, [])
2654 header_output += SveUnaryPredOpDeclare.subst(iop)
2655 exec_output += SveOpExecute.subst(iop)
2656 for type in types:
2657 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2658 exec_output += SveOpExecDeclare.subst(substDict)
2659
2660 # Generate definition for unary SVE predicate instructions with implicit
2661 # source operand (PFALSE, RDFFR(S))
2662 def svePredUnaryWImplicitSrcInst(name, Name, opClass, op,
2663 predType=PredType.NONE, isFlagSetting=False, decoder='Generic'):
2664 global header_output, exec_output, decoders
2665 code = sveEnabledCheckCode + op
2666 if isFlagSetting:
2667 code += '''
2668 CondCodesNZ = (destPred.firstActive(GpOp, eCount) << 1) |
2669 destPred.noneActive(GpOp, eCount);
2670 CondCodesC = !destPred.lastActive(GpOp, eCount);
2671 CondCodesV = 0;'''
2672 extraPrologCode = '''
2673 auto& destPred M5_VAR_USED = PDest;'''
2674 baseClass = ('SvePredUnaryWImplicitSrcOp' if predType == PredType.NONE
2675 else 'SvePredUnaryWImplicitSrcPredOp')
2676 iop = InstObjParams(name, 'Sve' + Name, baseClass,
2677 {'code': code, 'op_class': opClass}, [])
2678 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2679 if predType == PredType.NONE:
2680 header_output += SvePredUnaryOpWImplicitSrcDeclare.subst(iop)
2681 else:
2682 header_output += SvePredUnaryPredOpWImplicitSrcDeclare.subst(iop)
2683 exec_output += SveNonTemplatedOpExecute.subst(iop)
2684
2685 # Generate definition for SVE instructions writing to the FFR (SETFFR,
2686 # WRFFR)
2687 def svePredWriteFfrInst(name, Name, opClass, op, isSetFfr,
2688 decoder='Generic'):
2689 global header_output, exec_output, decoders
2690 code = sveEnabledCheckCode + op
2691 extraPrologCode = '''
2692 auto& destPred M5_VAR_USED = Ffr;'''
2693 baseClass = ('SveWImplicitSrcDstOp' if isSetFfr
2694 else 'SvePredUnaryWImplicitDstOp')
2695 iop = InstObjParams(name, 'Sve' + Name, baseClass,
2696 {'code': code, 'op_class': opClass}, [])
2697 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2698 if isSetFfr:
2699 header_output += SveOpWImplicitSrcDstDeclare.subst(iop)
2700 else:
2701 header_output += SvePredUnaryOpWImplicitDstDeclare.subst(iop)
2702 exec_output += SveNonTemplatedOpExecute.subst(iop)
2703
2704 # Generate definition for SVE Ext instruction
2705 def sveExtInst(name, Name, opClass, decoder = 'Generic'):
2706 global header_output, exec_output, decoders
2707 code = sveEnabledCheckCode + '''
2708 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2709 xc->tcBase());
2710 TheISA::VecRegContainer tmpVecC;
2711 auto auxOp1 = tmpVecC.as<Element>();
2712 for (unsigned i = 0; i < eCount; ++i) {
2713 auxOp1[i] = AA64FpOp1_x[i];
2714 }
2715 uint64_t pos = imm;
2716 if (pos >= eCount)
2717 pos = 0;
2718 for (int i = 0; i < eCount; ++i, ++pos)
2719 {
2720 if (pos < eCount)
2721 AA64FpDest_x[i] = AA64FpDestMerge_x[pos];
2722 else
2723 AA64FpDest_x[i] = auxOp1[pos-eCount];
2724 }
2725 '''
2726 iop = InstObjParams(name, 'Sve' + Name, 'SveBinImmUnpredDestrOp',
2727 {'code': code, 'op_class': opClass}, [])
2728 header_output += SveBinImmUnpredOpDeclare.subst(iop);
2729 exec_output += SveOpExecute.subst(iop)
2730 substDict = {'targs': 'uint8_t', 'class_name': 'Sve' + Name}
2731 exec_output += SveOpExecDeclare.subst(substDict)
2732
2733 # Generate definition for SVE Slice instruction
2734 def sveSpliceInst(name, Name, opClass, types, decoder = 'Generic'):
2735 global header_output, exec_output, decoders
2736 code = sveEnabledCheckCode + '''
2737 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2738 xc->tcBase());
2739 TheISA::VecRegContainer tmpVecC;
2740 auto auxDest = tmpVecC.as<Element>();
2741 int firstelem = -1, lastelem = -2;
2742 for (int i = 0; i < eCount; ++i) {
2743 if (GpOp_x[i]) {
2744 lastelem = i;
2745 if (firstelem < 0)
2746 firstelem = i;
2747 }
2748 }
2749 int x = 0;
2750 for (int i = firstelem; i <= lastelem; ++i, ++x) {
2751 auxDest[x] = AA64FpDestMerge_x[i];
2752 }
2753 int remaining = eCount - x;
2754 for (int i = 0; i < remaining; ++i, ++x) {
2755 auxDest[x] = AA64FpOp2_x[i];
2756 }
2757 for (int i = 0; i < eCount; ++i) {
2758 AA64FpDest_x[i] = auxDest[i];
2759 }
2760 '''
2761 iop = InstObjParams(name, 'Sve' + Name, 'SveBinDestrPredOp',
2762 {'code': code, 'op_class': opClass}, [])
2763 header_output += SveBinDestrPredOpDeclare.subst(iop)
2764 exec_output += SveOpExecute.subst(iop)
2765 for type in types:
2766 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2767 exec_output += SveOpExecDeclare.subst(substDict)
2768
2769 # Generate definition for SVE DUP (index) instruction
2770 def sveDupIndexInst(name, Name, opClass, types, decoder = 'Generic'):
2771 global header_output, exec_output, decoders
2772 code = sveEnabledCheckCode + '''
2773 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2774 xc->tcBase());
2775 Element srcElem1 = 0;
2776 if (imm < eCount) {
2777 srcElem1 = AA64FpOp1_x[imm];
2778 }
2779 for (int i = 0; i < eCount; ++i) {
2780 AA64FpDest_x[i] = srcElem1;
2781 }'''
2782 iop = InstObjParams(name, 'Sve' + Name, 'SveBinImmIdxUnpredOp',
2783 {'code': code, 'op_class': opClass}, [])
2784 header_output += SveBinImmUnpredOpDeclare.subst(iop)
2785 exec_output += SveOpExecute.subst(iop)
2786 for type in types:
2787 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2788 exec_output += SveOpExecDeclare.subst(substDict)
2789
2790 # Generate definition for SVE reverse elements instructions
2791 def sveReverseElementsInst(name, Name, opClass, types,
2792 srcType = SrcRegType.Vector, decoder = 'Generic'):
2793 assert srcType in (SrcRegType.Vector, SrcRegType.Predicate)
2794 global header_output, exec_output, decoders
2795 extraPrologCode = '''
2796 auto& destPred = PDest;'''
2797 code = sveEnabledCheckCode + '''
2798 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2799 xc->tcBase());'''
2800 if srcType == SrcRegType.Predicate:
2801 code += '''
2802 TheISA::VecPredRegContainer tmpPredC;
2803 auto auxPOp1 = tmpPredC.as<Element>();
2804 for (unsigned i = 0; i < eCount; ++i) {
2805 uint8_t v = POp1_x.get_raw(i);
2806 auxPOp1.set_raw(i, v);
2807 }
2808 PDest_x[0] = 0;'''
2809 else:
2810 code += '''
2811 TheISA::VecRegContainer tmpRegC;
2812 auto auxOp1 = tmpRegC.as<Element>();
2813 for (unsigned i = 0; i < eCount; ++i) {
2814 auxOp1[i] = AA64FpOp1_x[i];
2815 }'''
2816 code += '''
2817 for (int i = 0; i < eCount; ++i) {'''
2818 if srcType == SrcRegType.Vector:
2819 code += '''
2820 AA64FpDest_x[i] = auxOp1[eCount - i - 1];'''
2821 else:
2822 code += '''
2823 destPred.set_raw(i, auxPOp1.get_raw(eCount - i - 1));'''
2824 code += '''
2825 }'''
2826 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryUnpredOp',
2827 {'code': code, 'op_class': opClass}, [])
2828 if srcType == SrcRegType.Predicate:
2829 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2830 header_output += SveUnaryUnpredOpDeclare.subst(iop)
2831 exec_output += SveOpExecute.subst(iop)
2832 for type in types:
2833 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2834 exec_output += SveOpExecDeclare.subst(substDict)
2835
2836 # Generate definition for shift & insert instructions
2837 def sveShiftAndInsertInst(name, Name, opClass, types,
2838 srcType = SrcRegType.Scalar, decoder = 'Generic'):
2839 assert srcType in (SrcRegType.SimdFpScalar, SrcRegType.Scalar)
2840 global header_output, exec_output, decoders
2841 code = sveEnabledCheckCode + '''
2842 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2843 xc->tcBase());'''
2844 if srcType == SrcRegType.Scalar:
2845 code += '''
2846 auto& srcElem1 = XOp1;'''
2847 elif srcType == SrcRegType.SimdFpScalar:
2848 code += '''
2849 auto& srcElem1 = AA64FpOp1_x[0];'''
2850 code += '''
2851 for (int i = eCount - 1; i > 0; --i) {
2852 AA64FpDest_x[i] = AA64FpDestMerge_x[i-1];
2853 }
2854 AA64FpDest_x[0] = srcElem1;'''
2855 iop = InstObjParams(name, 'Sve' + Name, 'SveUnarySca2VecUnpredOp',
2856 {'code': code, 'op_class': opClass,
2857 'isSimdFp': 'true' if srcType == SrcRegType.SimdFpScalar
2858 else 'false'}, [])
2859 header_output += SveShiftAndInsertOpDeclare.subst(iop)
2860 exec_output += SveOpExecute.subst(iop)
2861 for type in types:
2862 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2863 exec_output += SveOpExecDeclare.subst(substDict)
2864
2865 # Generate definition for DOT instructions
2866 def sveDotInst(name, Name, opClass, types, isIndexed = True):
2867 global header_output, exec_output, decoders
2868 code = sveEnabledCheckCode + '''
2869 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2870 xc->tcBase());
2871 for (int i = 0; i < eCount; ++i) {'''
2872 if isIndexed:
2873 code += '''
2874 int segbase = i - i % (16 / sizeof(Element));
2875 int s = segbase + imm;'''
2876 code += '''
2877 DElement res = AA64FpDest_xd[i];
2878 DElement srcElem1, srcElem2;
2879 for (int j = 0; j <= 3; ++j) {
2880 srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i + j]);'''
2881 if isIndexed:
2882 code += '''
2883 srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s + j]);'''
2884 else:
2885 code += '''
2886 srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i + j]);'''
2887 code += '''
2888 res += srcElem1 * srcElem2;
2889 }
2890 AA64FpDestMerge_xd[i] = res;
2891 }'''
2892 iop = InstObjParams(name, 'Sve' + Name,
2893 'SveDotProdIdxOp' if isIndexed else
2894 'SveDotProdOp',
2895 {'code': code, 'op_class': opClass}, [])
2896 if isIndexed:
2897 header_output += SveWideningTerImmOpDeclare.subst(iop)
2898 else:
2899 header_output += SveWideningTerOpDeclare.subst(iop)
2900 exec_output += SveWideningOpExecute.subst(iop)
2901 for type in types:
2902 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2903 exec_output += SveOpExecDeclare.subst(substDict)
2904
2905 # Generate definition for ordered reduction
2906 def sveOrderedReduction(name, Name, opClass, types, op,
2907 decoder = 'Generic'):
2908 global header_output, exec_output, decoders
2909 code = sveEnabledCheckCode + '''
2910 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2911 xc->tcBase());'''
2912 code += '''
2913 Element destElem = AA64FpDestMerge_x[0];
2914 for (int i = 0; i < eCount; ++i) {
2915 if (GpOp_x[i]) {
2916 Element srcElem1 = AA64FpOp1_x[i];
2917 %(op)s
2918 }
2919 }
2920 for (int i = 1; i < eCount; ++i) {
2921 AA64FpDest_x[i] = 0;
2922 }
2923 AA64FpDest_x[0] = destElem;'''%{'op': op}
2924 iop = InstObjParams(name, 'Sve' + Name, 'SveOrdReducOp',
2925 {'code': code, 'op_class': opClass}, [])
2926 header_output += SveReducOpDeclare.subst(iop)
2927 exec_output += SveOpExecute.subst(iop)
2928 for type in types:
2929 substDict = {'targs' : type,
2930 'class_name' : 'Sve' + Name}
2931 exec_output += SveOpExecDeclare.subst(substDict)
2932
2933 # Generate definitions for complex addition instructions
2934 def sveComplexAddInst(name, Name, opClass, types,
2935 decoder = 'Generic'):
2936 global header_output, exec_output, decoders
2937 code = sveEnabledCheckCode + '''
2938 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2939 xc->tcBase());'''
2940 code += '''
2941 bool sub_i = (rot == 1);
2942 bool sub_r = (rot == 3);
2943 for (int i = 0; i < eCount / 2; ++i) {
2944 Element acc_r = AA64FpOp1_x[2 * i];
2945 Element acc_i = AA64FpOp1_x[2 * i + 1];
2946 Element elt2_r = AA64FpOp2_x[2 * i];
2947 Element elt2_i = AA64FpOp2_x[2 * i + 1];
2948
2949 FPSCR fpscr;
2950 if (GpOp_x[2 * i]) {
2951 if (sub_i) {
2952 elt2_i = fplibNeg<Element>(elt2_i);
2953 }
2954 fpscr = (FPSCR) FpscrExc;
2955 acc_r = fplibAdd<Element>(acc_r, elt2_i, fpscr);
2956 FpscrExc = fpscr;
2957 }
2958 if (GpOp_x[2 * i + 1]) {
2959 if (sub_r) {
2960 elt2_r = fplibNeg<Element>(elt2_r);
2961 }
2962 fpscr = (FPSCR) FpscrExc;
2963 acc_i = fplibAdd<Element>(acc_i, elt2_r, fpscr);
2964 FpscrExc = fpscr;
2965 }
2966
2967 AA64FpDest_x[2 * i] = acc_r;
2968 AA64FpDest_x[2 * i + 1] = acc_i;
2969 }
2970 '''
2971 iop = InstObjParams(name, 'Sve' + Name, 'SveComplexOp',
2972 {'code': code, 'op_class': opClass}, [])
2973 header_output += SveComplexOpDeclare.subst(iop)
2974 exec_output += SveOpExecute.subst(iop)
2975 for type in types:
2976 substDict = {'targs' : type,
2977 'class_name' : 'Sve' + Name}
2978 exec_output += SveOpExecDeclare.subst(substDict)
2979
2980 # Generate definitions for complex multiply and accumulate instructions
2981 def sveComplexMulAddInst(name, Name, opClass, types,
2982 predType=PredType.NONE, decoder='Generic'):
2983 assert predType in (PredType.NONE, PredType.MERGE)
2984 global header_output, exec_output, decoders
2985 code = sveEnabledCheckCode + '''
2986 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2987 xc->tcBase());'''
2988 code += '''
2989 uint32_t sel_a = rot & 0x1;
2990 uint32_t sel_b = sel_a ? 0 : 1;
2991 bool neg_i = (rot & 0x2) == 1;
2992 bool neg_r = (rot & 0x1) != (rot & 0x2);'''
2993 if predType == PredType.NONE:
2994 code += '''
2995 uint32_t eltspersegment = 16 / (2 * sizeof(Element));'''
2996 code += '''
2997 for (int i = 0; i < eCount / 2; ++i) {'''
2998 if predType == PredType.NONE:
2999 code += '''
3000 uint32_t segmentbase = i - (i % eltspersegment);
3001 uint32_t s = segmentbase + imm;'''
3002 else:
3003 code += '''
3004 uint32_t s = i;'''
3005 code += '''
3006 Element addend_r = AA64FpDestMerge_x[2 * i];
3007 Element addend_i = AA64FpDestMerge_x[2 * i + 1];
3008 Element elt1_a = AA64FpOp1_x[2 * i + sel_a];
3009 Element elt2_a = AA64FpOp2_x[2 * s + sel_a];
3010 Element elt2_b = AA64FpOp2_x[2 * s + sel_b];
3011 FPSCR fpscr;
3012 '''
3013 if predType != PredType.NONE:
3014 code += '''
3015 if (GpOp_x[2 * i]) {'''
3016 code += '''
3017 if (neg_r) {
3018 elt2_a = fplibNeg<Element>(elt2_a);
3019 }
3020 fpscr = (FPSCR) FpscrExc;
3021 addend_r = fplibMulAdd<Element>(addend_r, elt1_a, elt2_a, fpscr);
3022 FpscrExc = fpscr;'''
3023 if predType != PredType.NONE:
3024 code += '''
3025 }'''
3026 if predType != PredType.NONE:
3027 code += '''
3028 if (GpOp_x[2 * i + 1]) {'''
3029 code += '''
3030 if (neg_i) {
3031 elt2_b = fplibNeg<Element>(elt2_b);
3032 }
3033 fpscr = (FPSCR) FpscrExc;
3034 addend_i = fplibMulAdd<Element>(addend_i, elt1_a, elt2_b, fpscr);
3035 FpscrExc = fpscr;'''
3036 if predType != PredType.NONE:
3037 code += '''
3038 }'''
3039 code += '''
3040 AA64FpDest_x[2 * i] = addend_r;
3041 AA64FpDest_x[2 * i + 1] = addend_i;
3042 }'''
3043 iop = InstObjParams(name, 'Sve' + Name,
3044 'SveComplexIdxOp' if predType == PredType.NONE
3045 else 'SveComplexOp',
3046 {'code': code, 'op_class': opClass}, [])
3047 if predType == PredType.NONE:
3048 header_output += SveComplexIndexOpDeclare.subst(iop)
3049 else:
3050 header_output += SveComplexOpDeclare.subst(iop)
3051 exec_output += SveOpExecute.subst(iop)
3052 for type in types:
3053 substDict = {'targs' : type,
3054 'class_name' : 'Sve' + Name}
3055 exec_output += SveOpExecDeclare.subst(substDict)
3056
3057 fpTypes = ('uint16_t', 'uint32_t', 'uint64_t')
3058 signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t')
3059 unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
3060
3061 smallSignedTypes = ('int8_t', 'int16_t', 'int32_t')
3062 bigSignedTypes = ('int16_t', 'int32_t', 'int64_t')
3063 smallUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t')
3064 bigUnsignedTypes = ('uint16_t', 'uint32_t', 'uint64_t')
3065
3066 unsignedWideSDTypes = (('uint8_t', 'uint16_t'),
3067 ('uint16_t', 'uint32_t'), ('uint32_t', 'uint64_t'))
3068 signedWideSDTypes = (('int8_t', 'int16_t'),
3069 ('int16_t', 'int32_t'), ('int32_t', 'int64_t'))
3070
3071 # ABS
3072 absCode = 'destElem = (Element) std::abs(srcElem1);'
3073 sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode,
3074 PredType.MERGE)
3075 # ADD (immediate)
3076 sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode, False)
3077 # ADD (vectors, predicated)
3078 addCode = 'destElem = srcElem1 + srcElem2;'
3079 sveBinInst('add', 'AddPred', 'SimdAddOp', unsignedTypes, addCode,
3080 PredType.MERGE, True)
3081 # ADD (vectors, unpredicated)
3082 addCode = 'destElem = srcElem1 + srcElem2;'
3083 sveBinInst('add', 'AddUnpred', 'SimdAddOp', unsignedTypes, addCode)
3084 # ADDPL
3085 addvlCode = sveEnabledCheckCode + '''
3086 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint%d_t>(
3087 xc->tcBase());
3088 XDest = XOp1 + eCount * (int64_t) imm;
3089 '''
3090 buildXImmDataInst('addpl', addvlCode % 64, buildCc=False)
3091 # ADDVL
3092 buildXImmDataInst('addvl', addvlCode % 8, buildCc=False)
3093 # ADR
3094 adrCode = '''
3095 if (offsetFormat == SveAdrOffsetUnpackedSigned) {
3096 srcElem2 = sext<32>(srcElem2 & mask(32));
3097 } else if (offsetFormat == SveAdrOffsetUnpackedUnsigned) {
3098 srcElem2 = srcElem2 & mask(32);
3099 }
3100 destElem = srcElem1 + srcElem2 * mult;
3101 '''
3102 sveAdrInst('adr', 'Adr', 'SimdAddOp', ('uint32_t', 'uint64_t'), adrCode)
3103 # AND (immediate)
3104 andCode = 'destElem = srcElem1 & srcElem2;'
3105 sveWideImmInst('and', 'AndImm', 'SimdAluOp', ('uint64_t',), andCode)
3106 # AND (vectors, predicated)
3107 sveBinInst('and', 'AndPred', 'SimdAluOp', unsignedTypes, andCode,
3108 PredType.MERGE, True)
3109 # AND (vectors, unpredicated)
3110 andCode = 'destElem = srcElem1 & srcElem2;'
3111 sveBinInst('and', 'AndUnpred', 'SimdAluOp', ('uint64_t',), andCode)
3112 # AND, ANDS (predicates)
3113 svePredLogicalInst('and', 'PredAnd', 'SimdPredAluOp', ('uint8_t',),
3114 andCode)
3115 svePredLogicalInst('ands', 'PredAnds', 'SimdPredAluOp', ('uint8_t',),
3116 andCode, isFlagSetting=True)
3117 # ANDV
3118 andvCode = 'destElem &= srcElem1;'
3119 sveAssocReducInst('andv', 'Andv', 'SimdReduceAluOp', unsignedTypes,
3120 andvCode, 'std::numeric_limits<Element>::max()')
3121 # ASR (immediate, predicated)
3122 asrCode = '''
3123 int sign_bit = bits(srcElem1, sizeof(Element) * 8 - 1);
3124 if (srcElem2 == 0) {
3125 destElem = srcElem1;
3126 } else if (srcElem2 >= sizeof(Element) * 8) {
3127 destElem = sign_bit ? std::numeric_limits<Element>::max() : 0;
3128 } else {
3129 destElem = srcElem1 >> srcElem2;
3130 if (sign_bit) {
3131 destElem |= ~mask(sizeof(Element) * 8 - srcElem2);
3132 }
3133 }
3134 '''
3135 sveBinImmInst('asr', 'AsrImmPred', 'SimdAluOp', unsignedTypes, asrCode,
3136 PredType.MERGE)
3137 # ASR (immediate, unpredicated)
3138 sveBinImmInst('asr', 'AsrImmUnpred', 'SimdAluOp', unsignedTypes, asrCode)
3139 # ASR (vectors)
3140 sveBinInst('asr', 'AsrPred', 'SimdAluOp', unsignedTypes, asrCode,
3141 PredType.MERGE, True)
3142 # ASR (wide elements, predicated)
3143 sveShiftByWideElemsInst('asr', 'AsrWidePred', 'SimdAluOp', unsignedTypes,
3144 asrCode, PredType.MERGE)
3145 # ASR (wide elements, unpredicated)
3146 sveShiftByWideElemsInst('asr', 'AsrWideUnpred', 'SimdAluOp', unsignedTypes,
3147 asrCode)
3148 # ASRD
3149 asrdCode = '''
3150 Element element1 = srcElem1;
3151 Element shift = srcElem2;
3152 if (srcElem1 < 0) {
3153 Element tmp = ((1L << shift) - 1L);
3154 if (tmp == -1L) {
3155 element1 = 0;
3156 } else {
3157 element1 = element1 + tmp;
3158 }
3159 }
3160 destElem = (element1 >> shift);
3161 '''
3162 sveBinImmInst('asrd', 'Asrd', 'SimdAluOp', signedTypes, asrdCode,
3163 PredType.MERGE)
3164 # ASRR
3165 asrrCode = '''
3166 int sign_bit = bits(srcElem2, sizeof(Element) * 8 - 1);
3167 if (srcElem1 == 0) {
3168 destElem = srcElem2;
3169 } else if (srcElem1 >= sizeof(Element) * 8) {
3170 destElem = sign_bit ? std::numeric_limits<Element>::max() : 0;
3171 } else {
3172 destElem = srcElem2 >> srcElem1;
3173 if (sign_bit) {
3174 destElem |= ~mask(sizeof(Element) * 8 - srcElem1);
3175 }
3176 }
3177 '''
3178 sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode,
3179 PredType.MERGE, True)
3180 # BIC (vectors, predicated)
3181 bicCode = 'destElem = srcElem1 & ~srcElem2;'
3182 sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode,
3183 PredType.MERGE, True)
3184 # BIC (vectors, unpredicated)
3185 sveBinInst('bic', 'BicUnpred', 'SimdAluOp', unsignedTypes, bicCode)
3186 # BIC, BICS (predicates)
3187 bicCode = 'destElem = srcElem1 && !srcElem2;'
3188 svePredLogicalInst('bic', 'PredBic', 'SimdPredAluOp', ('uint8_t',),
3189 bicCode)
3190 svePredLogicalInst('bics', 'PredBics', 'SimdPredAluOp', ('uint8_t',),
3191 bicCode, isFlagSetting=True)
3192 # BRKA (merging)
3193 svePartBrkInst('brka', 'Brkam', 'SimdPredAluOp', isFlagSetting = False,
3194 predType = PredType.MERGE, whenBrk = Break.After)
3195 # BRKA (zeroing)
3196 svePartBrkInst('brka', 'Brkaz', 'SimdPredAluOp', isFlagSetting = False,
3197 predType = PredType.ZERO, whenBrk = Break.After)
3198 # BRKAS
3199 svePartBrkInst('brkas', 'Brkas', 'SimdPredAluOp', isFlagSetting = True,
3200 predType = PredType.ZERO, whenBrk = Break.After)
3201 # BRKB (merging)
3202 svePartBrkInst('brkb', 'Brkbm', 'SimdPredAluOp', isFlagSetting = False,
3203 predType = PredType.MERGE, whenBrk = Break.Before)
3204 # BRKB (zeroging)
3205 svePartBrkInst('brkb', 'Brkbz', 'SimdPredAluOp', isFlagSetting = False,
3206 predType = PredType.ZERO, whenBrk = Break.Before)
3207 # BRKBS
3208 svePartBrkInst('brkbs', 'Brkbs', 'SimdPredAluOp', isFlagSetting = True,
3209 predType = PredType.ZERO, whenBrk = Break.Before)
3210 # BRKN
3211 svePartBrkPropNextInst('brkn', 'Brkn', 'SimdPredAluOp',
3212 isFlagSetting = False)
3213 # BRKNS
3214 svePartBrkPropNextInst('brkns', 'Brkns', 'SimdPredAluOp',
3215 isFlagSetting = True)
3216 # BRKPA
3217 svePartBrkPropPrevInst('brkpa', 'Brkpa', 'SimdPredAluOp',
3218 isFlagSetting = False, whenBrk = Break.After)
3219 # BRKPAS
3220 svePartBrkPropPrevInst('brkpas', 'Brkpas', 'SimdPredAluOp',
3221 isFlagSetting = True, whenBrk = Break.After)
3222 # BRKPB
3223 svePartBrkPropPrevInst('brkpb', 'Brkpb', 'SimdPredAluOp',
3224 isFlagSetting = False, whenBrk = Break.Before)
3225 # BRKPBS
3226 svePartBrkPropPrevInst('brkpbs', 'Brkpbs', 'SimdPredAluOp',
3227 isFlagSetting = True, whenBrk = Break.Before)
3228 # CLASTA (scalar)
3229 clastaCode = '''
3230 last++;
3231 if (last >= eCount)
3232 last = 0;
3233 destElem = AA64FpOp1_x[last];'''
3234 sveSelectInst('clasta', 'Clasta', 'SimdAluOp', unsignedTypes, clastaCode,
3235 isCond = True, destType = DstRegType.Scalar)
3236 # CLASTA (SIMD&FP scalar)
3237 sveSelectInst('clasta', 'Clastaf', 'SimdAluOp', unsignedTypes, clastaCode,
3238 isCond = True, destType = DstRegType.SimdFpScalar)
3239 # CLASTA (vector)
3240 sveSelectInst('clasta', 'Clastav', 'SimdAluOp', unsignedTypes, clastaCode,
3241 isCond = True, destType = DstRegType.Vector)
3242 # CLASTB (scalar)
3243 clastbCode = '''
3244 destElem = AA64FpOp1_x[last];'''
3245 sveSelectInst('clastb', 'Clastb', 'SimdAluOp', unsignedTypes, clastbCode,
3246 isCond = True, destType = DstRegType.Scalar)
3247 # CLASTB (SIMD&FP scalar)
3248 sveSelectInst('clastb', 'Clastbf', 'SimdAluOp', unsignedTypes, clastbCode,
3249 isCond = True, destType = DstRegType.SimdFpScalar)
3250 # CLASTB (vectors)
3251 sveSelectInst('clastb', 'Clastbv', 'SimdAluOp', unsignedTypes, clastbCode,
3252 isCond = True, destType = DstRegType.Vector)
3253 # CLS
3254 clsCode = '''
3255 destElem = 0;
3256 Element val = srcElem1;
3257 if (val < 0) {
3258 val <<= 1;
3259 while (val < 0) {
3260 destElem++;
3261 val <<= 1;
3262 }
3263 } else {
3264 val <<= 1;
3265 while (val >= 0 && destElem < sizeof(Element) * 8 - 1) {
3266 destElem++;
3267 val <<= 1;
3268 }
3269 }
3270 '''
3271 sveUnaryInst('cls', 'Cls', 'SimdAluOp', signedTypes, clsCode,
3272 PredType.MERGE)
3273 # CLZ
3274 clzCode = '''
3275 destElem = 0;
3276 Element val = srcElem1;
3277 while (val >= 0 && destElem < sizeof(Element) * 8) {
3278 destElem++;
3279 val <<= 1;
3280 }
3281 '''
3282 sveUnaryInst('clz', 'Clz', 'SimdAluOp', signedTypes, clzCode,
3283 PredType.MERGE)
3284 # CMPEQ (immediate)
3285 cmpeqCode = '''
3286 destElem = (srcElem1 == srcElem2);
3287 '''
3288 sveIntCmpImmInst('cmpeq', 'Cmpeqi', 'SimdCmpOp', unsignedTypes, cmpeqCode)
3289 # CMPEQ (vectors)
3290 sveIntCmpInst('cmpeq', 'Cmpeq', 'SimdCmpOp', unsignedTypes, cmpeqCode)
3291 # CMPEQ (wide elements)
3292 sveIntCmpInst('cmpeq', 'Cmpeqw', 'SimdCmpOp', smallUnsignedTypes,
3293 cmpeqCode, True)
3294 # CMPGE (immediate)
3295 cmpgeCode = '''
3296 destElem = (srcElem1 >= srcElem2);
3297 '''
3298 sveIntCmpImmInst('cmpge', 'Cmpgei', 'SimdCmpOp', signedTypes, cmpgeCode)
3299 # CMPGE (vectors)
3300 sveIntCmpInst('cmpge', 'Cmpge', 'SimdCmpOp', signedTypes, cmpgeCode)
3301 # CMPGE (wide elements)
3302 sveIntCmpInst('cmpge', 'Cmpgew', 'SimdCmpOp', smallSignedTypes,
3303 cmpgeCode, True)
3304 # CMPGT (immediate)
3305 cmpgtCode = '''
3306 destElem = (srcElem1 > srcElem2);
3307 '''
3308 sveIntCmpImmInst('cmpge', 'Cmpgti', 'SimdCmpOp', signedTypes, cmpgtCode)
3309 # CMPGT (vectors)
3310 sveIntCmpInst('cmpge', 'Cmpgt', 'SimdCmpOp', signedTypes, cmpgtCode)
3311 # CMPGT (wide elements)
3312 sveIntCmpInst('cmpge', 'Cmpgtw', 'SimdCmpOp', smallSignedTypes,
3313 cmpgtCode, True)
3314 # CMPHI (immediate)
3315 sveIntCmpImmInst('cmphi', 'Cmphii', 'SimdCmpOp', unsignedTypes, cmpgtCode)
3316 # CMPHI (vectors)
3317 sveIntCmpInst('cmphi', 'Cmphi', 'SimdCmpOp', unsignedTypes, cmpgtCode)
3318 # CMPHI (wide elements)
3319 sveIntCmpInst('cmphi', 'Cmphiw', 'SimdCmpOp', smallUnsignedTypes,
3320 cmpgtCode, True)
3321 # CMPHS (immediate)
3322 sveIntCmpImmInst('cmphs', 'Cmphsi', 'SimdCmpOp', unsignedTypes, cmpgeCode)
3323 # CMPHS (vectors)
3324 sveIntCmpInst('cmphs', 'Cmphs', 'SimdCmpOp', unsignedTypes, cmpgeCode)
3325 # CMPHS (wide elements)
3326 sveIntCmpInst('cmphs', 'Cmphsw', 'SimdCmpOp', smallUnsignedTypes,
3327 cmpgeCode, True)
3328 # CMPLE (immediate)
3329 cmpleCode = '''
3330 destElem = (srcElem1 <= srcElem2);
3331 '''
3332 sveIntCmpImmInst('cmple', 'Cmplei', 'SimdCmpOp', signedTypes, cmpleCode)
3333 # CMPLE (wide elements)
3334 sveIntCmpInst('cmple', 'Cmplew', 'SimdCmpOp', smallSignedTypes,
3335 cmpleCode, True)
3336 # CMPLO (immediate)
3337 cmpltCode = '''
3338 destElem = (srcElem1 < srcElem2);
3339 '''
3340 sveIntCmpImmInst('cmplo', 'Cmploi', 'SimdCmpOp', unsignedTypes, cmpltCode)
3341 # CMPLO (wide elements)
3342 sveIntCmpInst('cmplo', 'Cmplow', 'SimdCmpOp', smallUnsignedTypes,
3343 cmpltCode, True)
3344 # CMPLS (immediate)
3345 sveIntCmpImmInst('cmpls', 'Cmplsi', 'SimdCmpOp', unsignedTypes, cmpleCode)
3346 # CMPLS (wide elements)
3347 sveIntCmpInst('cmpls', 'Cmplsw', 'SimdCmpOp', smallUnsignedTypes,
3348 cmpleCode, True)
3349 # CMPLT (immediate)
3350 sveIntCmpImmInst('cmplt', 'Cmplti', 'SimdCmpOp', signedTypes, cmpltCode)
3351 # CMPLT (wide elements)
3352 sveIntCmpInst('cmplt', 'Cmpltw', 'SimdCmpOp', smallSignedTypes,
3353 cmpltCode, True)
3354 # CMPNE (immediate)
3355 cmpneCode = '''
3356 destElem = (srcElem1 != srcElem2);
3357 '''
3358 sveIntCmpImmInst('cmpeq', 'Cmpnei', 'SimdCmpOp', unsignedTypes, cmpneCode)
3359 # CMPNE (vectors)
3360 sveIntCmpInst('cmpeq', 'Cmpne', 'SimdCmpOp', unsignedTypes, cmpneCode)
3361 # CMPNE (wide elements)
3362 sveIntCmpInst('cmpeq', 'Cmpnew', 'SimdCmpOp', smallUnsignedTypes,
3363 cmpneCode, True)
3364 # CNOT
3365 cnotCode = '''
3366 destElem = srcElem1?0:1;
3367 '''
3368 sveUnaryInst('cnot', 'Cnot', 'SimdAluOp', unsignedTypes, cnotCode,
3369 PredType.MERGE)
3370 # CNT
3371 cntCode = '''
3372 destElem = 0;
3373 Element val = srcElem1;
3374 while (val) {
3375 destElem += val & 0x1;
3376 val >>= 1;
3377 }
3378 '''
3379 sveUnaryInst('cnt', 'Cnt', 'SimdAluOp', unsignedTypes, cntCode,
3380 PredType.MERGE)
3381 # CNTB, CNTD, CNTH, CNTW
3382 cntxCode = '''
3383 destElem = (count * imm);
3384 '''
3385 sveElemCountInst('cnt', 'Cntx', 'SimdAluOp', unsignedTypes, cntxCode,
3386 destType = DestType.Scalar, dstIs32b = False, dstAcc = False)
3387 # COMPACT
3388 sveCompactInst('compact', 'Compact', 'SimdPredAluOp',
3389 ('uint32_t', 'uint64_t'))
3390 # CPY (immediate)
3391 dupCode = 'destElem = srcElem1;'
3392 sveWideImmInst('cpy', 'CpyImmMerge', 'SimdAluOp', unsignedTypes, dupCode,
3393 predType=PredType.MERGE, isUnary=True)
3394 sveWideImmInst('cpy', 'CpyImmZero', 'SimdAluOp', unsignedTypes, dupCode,
3395 predType=PredType.ZERO, isUnary=True)
3396 # CPY (scalar)
3397 sveUnaryInst('cpy', 'CpyScalar', 'SimdAluOp', unsignedTypes, dupCode,
3398 PredType.MERGE, srcRegType=SrcRegType.Scalar)
3399 # CPY (SIMD&FP scalar)
3400 sveUnaryInst('cpy', 'CpySimdFpScalar', 'SimdAluOp', unsignedTypes, dupCode,
3401 PredType.MERGE, srcRegType=SrcRegType.SimdFpScalar)
3402 # CNTP
3403 svePredCountPredInst('cntp', 'Cntp', 'SimdAluOp', unsignedTypes)
3404 # CTERMEQ
3405 cteqCode = '''
3406 destElem = srcElem1 == srcElem2;
3407 '''
3408 sveCompTermInst('ctermeq', 'Ctermeq', 'IntAluOp',
3409 ['uint32_t', 'uint64_t'], cteqCode)
3410 # CTERMNE
3411 ctneCode = '''
3412 destElem = srcElem1 != srcElem2;
3413 '''
3414 sveCompTermInst('ctermne', 'Ctermne', 'IntAluOp',
3415 ['uint32_t', 'uint64_t'], ctneCode)
3416 # DECB, DECH, DECW, DECD (scalar)
3417 decxCode = '''
3418 destElem = srcElem1 - (count * imm);
3419 '''
3420 sveElemCountInst('dec', 'Dec', 'SimdAluOp', unsignedTypes, decxCode,
3421 destType = DestType.Scalar, dstIs32b = False)
3422 # DECH, DECW, DECD (vector)
3423 sveElemCountInst('dec', 'Decv', 'SimdAluOp', bigUnsignedTypes, decxCode,
3424 destType = DestType.Vector, dstIs32b = False)
3425 # DECP (scalar)
3426 decpCode = '''
3427 XDest = XDest - count;
3428 '''
3429 svePredCountInst('decp', 'Decp', 'SimdAluOp', unsignedTypes, decpCode,
3430 DestType.Scalar, SrcSize.Src64bit)
3431 # DECP (vector)
3432 decpvCode = '''
3433 destElem = srcElem - count;
3434 '''
3435 svePredCountInst('decp', 'Decpv', 'SimdAluOp', unsignedTypes, decpvCode,
3436 DestType.Vector)
3437 # DUP (immediate)
3438 sveWideImmInst('dup', 'DupImm', 'SimdAluOp', unsignedTypes, dupCode,
3439 isUnary=True)
3440 # DUP (indexed)
3441 sveDupIndexInst('mov', 'DupIdx', 'SimdAluOp',
3442 list(unsignedTypes) + ['__uint128_t'])
3443 # DUP (scalar)
3444 sveUnaryInst('dup', 'DupScalar', 'SimdAluOp', unsignedTypes, dupCode,
3445 PredType.NONE, srcRegType=SrcRegType.Scalar)
3446 # DUPM
3447 sveWideImmInst('dupm', 'Dupm', 'SimdAluOp', unsignedTypes, dupCode,
3448 isUnary=True)
3449 # EOR (immediate)
3450 eorCode = 'destElem = srcElem1 ^ srcElem2;'
3451 sveWideImmInst('eor', 'EorImm', 'SimdAluOp', ('uint64_t',), eorCode)
3452 # EOR (vectors, predicated)
3453 sveBinInst('eor', 'EorPred', 'SimdAluOp', unsignedTypes, eorCode,
3454 PredType.MERGE, True)
3455 # EOR (vectors, unpredicated)
3456 eorCode = 'destElem = srcElem1 ^ srcElem2;'
3457 sveBinInst('eor', 'EorUnpred', 'SimdAluOp', ('uint64_t',), eorCode)
3458 # EOR, EORS (predicates)
3459 svePredLogicalInst('eor', 'PredEor', 'SimdPredAluOp', ('uint8_t',),
3460 eorCode)
3461 svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',),
3462 eorCode, isFlagSetting=True)
3463 # EORV
3464 eorvCode = 'destElem ^= srcElem1;'
3465 sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes,
3466 eorvCode, '0')
3467 # EXT
3468 sveExtInst('ext', 'Ext', 'SimdAluOp')
3469 # FABD
3470 fpOp = '''
3471 FPSCR fpscr = (FPSCR) FpscrExc;
3472 destElem = %s;
3473 FpscrExc = fpscr;
3474 '''
3475 fabdCode = fpOp % 'fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))'
3476 sveBinInst('fabd', 'Fabd', 'SimdFloatAddOp', floatTypes, fabdCode,
3477 PredType.MERGE, True)
3478 # FABS
3479 fabsCode = 'destElem = fplibAbs<Element>(srcElem1);'
3480 sveUnaryInst('fabs', 'Fabs', 'SimdFloatAluOp', fpTypes, fabsCode,
3481 PredType.MERGE)
3482 # FACGE
3483 fpCmpAbsOp = fpOp % ('fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),'
3484 ' fplibAbs<Element>(srcElem2), fpscr)')
3485 facgeCode = fpCmpAbsOp % 'GE'
3486 sveCmpInst('facge', 'Facge', 'SimdFloatCmpOp', fpTypes, facgeCode)
3487 # FACGT
3488 facgtCode = fpCmpAbsOp % 'GT'
3489 sveCmpInst('facgt', 'Facgt', 'SimdFloatCmpOp', fpTypes, facgtCode)
3490 # FADD (immediate)
3491 fpBinOp = fpOp % 'fplib%s<Element>(srcElem1, srcElem2, fpscr)'
3492 faddCode = fpBinOp % 'Add'
3493 sveBinImmInst('fadd', 'FaddImm', 'SimdFloatAddOp', floatTypes, faddCode,
3494 PredType.MERGE)
3495 # FADD (vectors, predicated)
3496 sveBinInst('fadd', 'FaddPred', 'SimdFloatAddOp', floatTypes, faddCode,
3497 PredType.MERGE, True)
3498 # FADD (vectors, unpredicated)
3499 sveBinInst('fadd', 'FaddUnpred', 'SimdFloatAddOp', floatTypes, faddCode)
3500 # FADDA
3501 fpAddaOp = '''
3502 FPSCR fpscr = (FPSCR) FpscrExc;
3503 destElem = fplibAdd<Element>(destElem, srcElem1, fpscr);
3504 FpscrExc = FpscrExc | fpscr;
3505 '''
3506 sveOrderedReduction('fadda', 'Fadda', 'SimdFloatReduceAddOp', floatTypes,
3507 fpAddaOp)
3508 # FADDV
3509 fpReduceOp = '''
3510 FPSCR fpscr = (FPSCR) FpscrExc;
3511 destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
3512 FpscrExc = FpscrExc | fpscr;
3513 '''
3514 faddvCode = fpReduceOp % 'Add'
3515 sveNonAssocReducInst('faddv', 'Faddv', 'SimdFloatReduceAddOp', floatTypes,
3516 faddvCode, '0')
3517 # FCADD
3518 sveComplexAddInst('fcadd','Fcadd', 'SimdFloatAddOp', fpTypes)
3519 # FCMEQ (vectors)
3520 fpCmpOp = fpOp % ('fplibCompare%s<Element>(srcElem1, srcElem2, fpscr)')
3521 fcmeqCode = fpCmpOp % 'EQ'
3522 sveCmpInst('fcmeq', 'Fcmeq', 'SimdFloatCmpOp', fpTypes, fcmeqCode)
3523 # FCMEQ (zero)
3524 fpCmpZeroOp = fpOp % 'fplibCompare%s<Element>(srcElem1, 0, fpscr)'
3525 fcmeqZeroCode = fpCmpZeroOp % 'EQ'
3526 sveCmpInst('fcmeq', 'FcmeqZero', 'SimdFloatCmpOp', fpTypes, fcmeqZeroCode,
3527 True)
3528 # FCMGE (vectors)
3529 fcmgeCode = fpCmpOp % 'GE'
3530 sveCmpInst('fcmge', 'Fcmge', 'SimdFloatCmpOp', fpTypes, fcmgeCode)
3531 # FCMGE (zero)
3532 fcmgeZeroCode = fpCmpZeroOp % 'GE'
3533 sveCmpInst('fcmge', 'FcmgeZero', 'SimdFloatCmpOp', fpTypes, fcmgeZeroCode,
3534 True)
3535 # FCMGT (vectors)
3536 fcmgtCode = fpCmpOp % 'GT'
3537 sveCmpInst('fcmgt', 'Fcmgt', 'SimdFloatCmpOp', fpTypes, fcmgtCode)
3538 # FCMGT (zero)
3539 fcmgtZeroCode = fpCmpZeroOp % 'GT'
3540 sveCmpInst('fcmgt', 'FcmgtZero', 'SimdFloatCmpOp', fpTypes, fcmgtZeroCode,
3541 True)
3542 # FCMLE (zero)
3543 fpCmpRevZeroOp = fpOp % ('fplibCompare%s<Element>(0, srcElem1, fpscr)')
3544 fcmleZeroCode = fpCmpRevZeroOp % 'GE'
3545 sveCmpInst('fcmle', 'FcmleZero', 'SimdFloatCmpOp', fpTypes, fcmleZeroCode,
3546 True)
3547 # FCMLT (zero)
3548 fcmltZeroCode = fpCmpRevZeroOp % 'GT'
3549 sveCmpInst('fcmlt', 'FcmltZero', 'SimdFloatCmpOp', fpTypes, fcmltZeroCode,
3550 True)
3551 # FCMNE (vectors)
3552 fcmneCode = fpOp % ('!fplibCompareEQ<Element>(srcElem1, srcElem2, fpscr)')
3553 sveCmpInst('fcmne', 'Fcmne', 'SimdFloatCmpOp', fpTypes, fcmneCode)
3554 # FCMNE (zero)
3555 fcmneZeroCode = fpOp % ('!fplibCompareEQ<Element>(srcElem1, 0, fpscr)')
3556 sveCmpInst('fcmne', 'FcmneZero', 'SimdFloatCmpOp', fpTypes, fcmneZeroCode,
3557 True)
3558 # FCMUO (vectors)
3559 fcmuoCode = fpCmpOp % 'UN'
3560 sveCmpInst('fcmuo', 'Fcmuo', 'SimdFloatCmpOp', fpTypes, fcmuoCode)
3561 # FCMLA (indexed)
3562 sveComplexMulAddInst('fcmla', 'Fcmlai', 'SimdFloatMultAccOp',
3563 fpTypes[1:], predType = PredType.NONE)
3564 # FCMLA (vectors)
3565 sveComplexMulAddInst('fcmla', 'Fcmlav', 'SimdFloatMultAccOp',
3566 fpTypes, predType = PredType.MERGE)
3567 # FCPY
3568 sveWideImmInst('fcpy', 'Fcpy', 'SimdAluOp', unsignedTypes, dupCode,
3569 predType=PredType.MERGE, isUnary=True)
3570 # FCVT
3571 fcvtCode = fpOp % ('fplibConvert<SElement, DElement>('
3572 'srcElem1, FPCRRounding(fpscr), fpscr)')
3573 sveCvtInst('fcvt', 'FcvtNarrow', 'SimdCvtOp',
3574 ('uint32_t, uint16_t',
3575 'uint64_t, uint16_t',
3576 'uint64_t, uint32_t'),
3577 fcvtCode, CvtDir.Narrow)
3578 sveCvtInst('fcvt', 'FcvtWiden', 'SimdCvtOp',
3579 ('uint16_t, uint32_t',
3580 'uint16_t, uint64_t',
3581 'uint32_t, uint64_t'),
3582 fcvtCode, CvtDir.Widen)
3583 # FCVTZS
3584 fcvtIntCode = fpOp % ('fplibFPToFixed<SElement, DElement>('
3585 'srcElem1, %s, %s, %s, fpscr)')
3586 fcvtzsCode = fcvtIntCode % ('0', 'false', 'FPRounding_ZERO')
3587 sveCvtInst('fcvtzs', 'FcvtzsNarrow', 'SimdCvtOp',
3588 ('uint16_t, uint16_t',
3589 'uint32_t, uint32_t',
3590 'uint64_t, uint32_t',
3591 'uint64_t, uint64_t'),
3592 fcvtzsCode, CvtDir.Narrow)
3593 sveCvtInst('fcvtzs', 'FcvtzsWiden', 'SimdCvtOp',
3594 ('uint16_t, uint32_t',
3595 'uint16_t, uint64_t',
3596 'uint32_t, uint64_t'),
3597 fcvtzsCode, CvtDir.Widen)
3598 # FCVTZU
3599 fcvtzuCode = fcvtIntCode % ('0', 'true', 'FPRounding_ZERO')
3600 sveCvtInst('fcvtzu', 'FcvtzuNarrow', 'SimdCvtOp',
3601 ('uint16_t, uint16_t',
3602 'uint32_t, uint32_t',
3603 'uint64_t, uint32_t',
3604 'uint64_t, uint64_t'),
3605 fcvtzuCode, CvtDir.Narrow)
3606 sveCvtInst('fcvtzu', 'FcvtzuWiden', 'SimdCvtOp',
3607 ('uint16_t, uint32_t',
3608 'uint16_t, uint64_t',
3609 'uint32_t, uint64_t'),
3610 fcvtzuCode, CvtDir.Widen)
3611 # FDIV
3612 fdivCode = fpBinOp % 'Div'
3613 sveBinInst('fdiv', 'Fdiv', 'SimdFloatDivOp', floatTypes, fdivCode,
3614 PredType.MERGE, True)
3615 # FDIVR
3616 fpBinRevOp = fpOp % 'fplib%s<Element>(srcElem2, srcElem1, fpscr)'
3617 fdivrCode = fpBinRevOp % 'Div'
3618 sveBinInst('fdivr', 'Fdivr', 'SimdFloatDivOp', floatTypes, fdivrCode,
3619 PredType.MERGE, True)
3620 # FDUP
3621 sveWideImmInst('fdup', 'Fdup', 'SimdFloatAluOp', floatTypes, dupCode,
3622 isUnary=True)
3623 # FEXPA
3624 fexpaCode = 'destElem = fplibExpA<Element>(srcElem1);'
3625 sveUnaryInst('fexpa', 'Fexpa', 'SimdFloatAluOp', fpTypes, fexpaCode)
3626 # FMAD
3627 fmadCode = fpOp % ('fplibMulAdd<Element>('
3628 'srcElem1, destElem, srcElem2, fpscr)')
3629 sveTerInst('fmad', 'Fmad', 'SimdFloatMultAccOp', floatTypes, fmadCode,
3630 PredType.MERGE)
3631 # FMAX (immediate)
3632 fmaxCode = fpBinOp % 'Max'
3633 sveBinImmInst('fmax', 'FmaxImm', 'SimdFloatCmpOp', floatTypes, fmaxCode,
3634 PredType.MERGE)
3635 # FMAX (vectors)
3636 sveBinInst('fmax', 'Fmax', 'SimdFloatCmpOp', floatTypes, fmaxCode,
3637 PredType.MERGE, True)
3638 # FMAXNM (immediate)
3639 fmaxnmCode = fpBinOp % 'MaxNum'
3640 sveBinImmInst('fmaxnm', 'FmaxnmImm', 'SimdFloatCmpOp', floatTypes,
3641 fmaxnmCode, PredType.MERGE)
3642 # FMAXNM (vectors)
3643 sveBinInst('fmaxnm', 'Fmaxnm', 'SimdFloatCmpOp', floatTypes, fmaxnmCode,
3644 PredType.MERGE, True)
3645 # FMAXNMV
3646 fmaxnmvCode = fpReduceOp % 'MaxNum'
3647 sveNonAssocReducInst('fmaxnmv', 'Fmaxnmv', 'SimdFloatReduceCmpOp',
3648 floatTypes, fmaxnmvCode, 'fplibDefaultNaN<Element>()')
3649 # FMAXV
3650 fmaxvCode = fpReduceOp % 'Max'
3651 sveNonAssocReducInst('fmaxv', 'Fmaxv', 'SimdFloatReduceCmpOp', floatTypes,
3652 fmaxvCode, 'fplibInfinity<Element>(1)')
3653 # FMIN (immediate)
3654 fminCode = fpBinOp % 'Min'
3655 sveBinImmInst('fmin', 'FminImm', 'SimdFloatCmpOp', floatTypes, fminCode,
3656 PredType.MERGE)
3657 # FMIN (vectors)
3658 sveBinInst('fmin', 'Fmin', 'SimdFloatCmpOp', floatTypes, fminCode,
3659 PredType.MERGE, True)
3660 # FMINNM (immediate)
3661 fminnmCode = fpBinOp % 'MinNum'
3662 sveBinImmInst('fminnm', 'FminnmImm', 'SimdFloatCmpOp', floatTypes,
3663 fminnmCode, PredType.MERGE)
3664 # FMINNM (vectors)
3665 sveBinInst('fminnm', 'Fminnm', 'SimdFloatCmpOp', floatTypes, fminnmCode,
3666 PredType.MERGE, True)
3667 # FMINNMV
3668 fminnmvCode = fpReduceOp % 'MinNum'
3669 sveNonAssocReducInst('fminnmv', 'Fminnmv', 'SimdFloatReduceCmpOp',
3670 floatTypes, fminnmvCode, 'fplibDefaultNaN<Element>()')
3671 # FMINV
3672 fminvCode = fpReduceOp % 'Min'
3673 sveNonAssocReducInst('fminv', 'Fminv', 'SimdFloatReduceCmpOp', floatTypes,
3674 fminvCode, 'fplibInfinity<Element>(0)')
3675 fmlaCode = fpOp % ('fplibMulAdd<Element>('
3676 'destElem, srcElem1, srcElem2, fpscr)')
3677 # FMLA (indexed)
3678 sveTerIdxInst('fmla', 'FmlaIdx', 'SimdFloatMultAccOp', floatTypes,
3679 fmlaCode, PredType.MERGE)
3680 # FMLA (vectors)
3681 sveTerInst('fmla', 'Fmla', 'SimdFloatMultAccOp', floatTypes, fmlaCode,
3682 PredType.MERGE)
3683 fmlsCode = fpOp % ('fplibMulAdd<Element>(destElem, '
3684 'fplibNeg<Element>(srcElem1), srcElem2, fpscr)')
3685 # FMLS (indexed)
3686 sveTerIdxInst('fmls', 'FmlsIdx', 'SimdFloatMultAccOp', floatTypes,
3687 fmlsCode, PredType.MERGE)
3688 # FMLS (vectors)
3689 sveTerInst('fmls', 'Fmls', 'SimdFloatMultAccOp', floatTypes, fmlsCode,
3690 PredType.MERGE)
3691 # FMSB
3692 fmsbCode = fpOp % ('fplibMulAdd<Element>(srcElem1, '
3693 'fplibNeg<Element>(destElem), srcElem2, fpscr)')
3694 sveTerInst('fmsb', 'Fmsb', 'SimdFloatMultAccOp', floatTypes, fmsbCode,
3695 PredType.MERGE)
3696 # FMUL (immediate)
3697 fpBinOp = fpOp % 'fplib%s<Element>(srcElem1, srcElem2, fpscr)'
3698 fmulCode = fpBinOp % 'Mul'
3699 sveBinImmInst('fmul', 'FmulImm', 'SimdFloatMultOp', floatTypes, fmulCode,
3700 PredType.MERGE)
3701 # TODO: FMUL (indexed)
3702 # FMUL (vectors, predicated)
3703 fmulCode = fpBinOp % 'Mul'
3704 sveBinInst('fmul', 'FmulPred', 'SimdFloatMultOp', floatTypes, fmulCode,
3705 PredType.MERGE, True)
3706 # FMUL (vectors, unpredicated)
3707 sveBinInst('fmul', 'FmulUnpred', 'SimdFloatMultOp', floatTypes, fmulCode)
3708 # FMUL (indexed)
3709 sveBinIdxInst('fmul', 'FmulIdx', 'SimdFloatMultOp', floatTypes, fmulCode)
3710
3711 # FMULX
3712 fmulxCode = fpBinOp % 'MulX'
3713 sveBinInst('fmulx', 'Fmulx', 'SimdFloatMultOp', floatTypes, fmulxCode,
3714 PredType.MERGE, True)
3715 # FNEG
3716 fnegCode = 'destElem = fplibNeg<Element>(srcElem1);'
3717 sveUnaryInst('fneg', 'Fneg', 'SimdFloatAluOp', fpTypes, fnegCode,
3718 PredType.MERGE)
3719 # FNMAD
3720 fnmadCode = fpOp % ('fplibMulAdd<Element>('
3721 'fplibNeg<Element>(srcElem1), '
3722 'fplibNeg<Element>(destElem), srcElem2, fpscr)')
3723 sveTerInst('fnmad', 'Fnmad', 'SimdFloatMultAccOp', floatTypes, fnmadCode,
3724 PredType.MERGE)
3725 # FNMLA
3726 fnmlaCode = fpOp % ('fplibMulAdd<Element>('
3727 'fplibNeg<Element>(destElem), '
3728 'fplibNeg<Element>(srcElem1), srcElem2, fpscr)')
3729 sveTerInst('fnmla', 'Fnmla', 'SimdFloatMultAccOp', floatTypes, fnmlaCode,
3730 PredType.MERGE)
3731 # FNMLS
3732 fnmlsCode = fpOp % ('fplibMulAdd<Element>('
3733 'fplibNeg<Element>(destElem), srcElem1, srcElem2, '
3734 'fpscr)')
3735 sveTerInst('fnmls', 'Fnmls', 'SimdFloatMultAccOp', floatTypes, fnmlsCode,
3736 PredType.MERGE)
3737 # FNMSB
3738 fnmsbCode = fpOp % ('fplibMulAdd<Element>('
3739 'fplibNeg<Element>(srcElem1), destElem, srcElem2, '
3740 'fpscr)')
3741 sveTerInst('fnmsb', 'Fnmsb', 'SimdFloatMultAccOp', floatTypes, fnmsbCode,
3742 PredType.MERGE)
3743 # FRECPE
3744 frecpeCode = fpOp % 'fplibRecipEstimate<Element>(srcElem1, fpscr)'
3745 sveUnaryInst('frecpe', 'Frecpe', 'SimdFloatMultAccOp', floatTypes,
3746 frecpeCode)
3747 # FRECPS
3748 frecpsCode = fpBinOp % 'RecipStepFused'
3749 sveBinInst('frecps', 'Frecps', 'SimdFloatMultAccOp', floatTypes,
3750 frecpsCode)
3751 # FRECPX
3752 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)"
3753 sveUnaryInst('frecpx', 'Frecpx', 'SimdFloatMultAccOp', floatTypes,
3754 frecpxCode, PredType.MERGE)
3755 # FRINTA
3756 frintCode = fpOp % 'fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)'
3757 frintaCode = frintCode % ('FPRounding_TIEAWAY', 'false')
3758 sveUnaryInst('frinta', 'Frinta', 'SimdCvtOp', floatTypes, frintaCode,
3759 PredType.MERGE)
3760 # FRINTI
3761 frintiCode = frintCode % ('FPCRRounding(fpscr)', 'false')
3762 sveUnaryInst('frinti', 'Frinti', 'SimdCvtOp', floatTypes, frintiCode,
3763 PredType.MERGE)
3764 # FRINTM
3765 frintmCode = frintCode % ('FPRounding_NEGINF', 'false')
3766 sveUnaryInst('frintm', 'Frintm', 'SimdCvtOp', floatTypes, frintmCode,
3767 PredType.MERGE)
3768 # FRINTN
3769 frintnCode = frintCode % ('FPRounding_TIEEVEN', 'false')
3770 sveUnaryInst('frintn', 'Frintn', 'SimdCvtOp', floatTypes, frintnCode,
3771 PredType.MERGE)
3772 # FRINTP
3773 frintpCode = frintCode % ('FPRounding_POSINF', 'false')
3774 sveUnaryInst('frintp', 'Frintp', 'SimdCvtOp', floatTypes, frintpCode,
3775 PredType.MERGE)
3776 # FRINTX
3777 frintxCode = frintCode % ('FPCRRounding(fpscr)', 'true')
3778 sveUnaryInst('frintx', 'Frintx', 'SimdCvtOp', floatTypes, frintxCode,
3779 PredType.MERGE)
3780 # FRINTZ
3781 frintzCode = frintCode % ('FPRounding_ZERO', 'false')
3782 sveUnaryInst('frintz', 'Frintz', 'SimdCvtOp', floatTypes, frintzCode,
3783 PredType.MERGE)
3784 # FRSQRTE
3785 frsqrteCode = fpOp % 'fplibRSqrtEstimate<Element>(srcElem1, fpscr)'
3786 sveUnaryInst('frsqrte', 'Frsqrte', 'SimdFloatSqrtOp', floatTypes,
3787 frsqrteCode)
3788 # FRSQRTS
3789 frsqrtsCode = fpBinOp % 'RSqrtStepFused'
3790 sveBinInst('frsqrts', 'Frsqrts', 'SimdFloatMiscOp', floatTypes,
3791 frsqrtsCode)
3792 # FSCALE
3793 fscaleCode = fpBinOp % 'Scale'
3794 sveBinInst('fscale', 'Fscale', 'SimdFloatMiscOp', floatTypes, fscaleCode,
3795 PredType.MERGE, True)
3796 # FSQRT
3797 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)"
3798 sveUnaryInst('fsqrt', 'Fsqrt', 'SimdFloatSqrtOp', floatTypes, fsqrtCode,
3799 PredType.MERGE)
3800 # FSUB (immediate)
3801 fsubCode = fpBinOp % 'Sub'
3802 sveBinImmInst('fsub', 'FsubImm', 'SimdFloatAddOp', floatTypes, fsubCode,
3803 PredType.MERGE)
3804 # FSUB (vectors, predicated)
3805 sveBinInst('fsub', 'FsubPred', 'SimdFloatAddOp', floatTypes, fsubCode,
3806 PredType.MERGE, True)
3807 # FSUB (vectors, unpredicated)
3808 sveBinInst('fsub', 'FsubUnpred', 'SimdFloatAddOp', floatTypes, fsubCode)
3809 # FSUBR (immediate)
3810 fsubrCode = fpBinRevOp % 'Sub'
3811 sveBinImmInst('fsubr', 'FsubrImm', 'SimdFloatAddOp', floatTypes, fsubrCode,
3812 PredType.MERGE)
3813 # FSUBR (vectors)
3814 sveBinInst('fsubr', 'Fsubr', 'SimdFloatAddOp', floatTypes, fsubrCode,
3815 PredType.MERGE, True)
3816 # FTMAD
3817 ftmadCode = fpOp % ('fplibTrigMulAdd<Element>('
3818 'srcElem3, destElem, srcElem2, fpscr)')
3819 sveTerImmInst('ftmad', 'Ftmad', 'SimdFloatMultAccOp', floatTypes,
3820 ftmadCode)
3821 # FTSMUL
3822 ftsmulCode = fpBinOp % 'TrigSMul'
3823 sveBinInst('ftsmul', 'Ftsmul', 'SimdFloatMiscOp', floatTypes, ftsmulCode)
3824 # FTSSEL
3825 ftsselCode = fpBinOp % 'TrigSSel'
3826 sveBinInst('ftssel', 'Ftssel', 'SimdFloatMultOp', floatTypes, ftsselCode)
3827 # INCB, INCH, INCW, INCD (scalar)
3828 incxCode = '''
3829 destElem = srcElem1 + (count * imm);
3830 '''
3831 sveElemCountInst('inc', 'Inc', 'SimdAluOp', unsignedTypes, incxCode,
3832 destType = DestType.Scalar, dstIs32b = False)
3833 # INCH, INCW, INCD (vector)
3834 sveElemCountInst('inc', 'Incv', 'SimdAluOp', bigUnsignedTypes, incxCode,
3835 destType = DestType.Vector, dstIs32b = False)
3836 # INCP (scalar)
3837 incpCode = '''
3838 XDest = XDest + count;
3839 '''
3840 svePredCountInst('incp', 'Incp', 'SimdAluOp', unsignedTypes, incpCode,
3841 DestType.Scalar, SrcSize.Src64bit)
3842 # INCP (vector)
3843 incpvCode = '''
3844 destElem = srcElem + count;
3845 '''
3846 svePredCountInst('incp', 'Incpv', 'SimdAluOp', unsignedTypes, incpvCode,
3847 DestType.Vector)
3848 # INDEX (immediate, scalar)
3849 sveIndex(IndexFormat.ImmReg)
3850 # INDEX (immediates)
3851 sveIndex(IndexFormat.ImmImm)
3852 # INDEX (scalar, immediate)
3853 sveIndex(IndexFormat.RegImm)
3854 # INDEX (scalars)
3855 sveIndex(IndexFormat.RegReg)
3856 # INSR (scalar)
3857 sveShiftAndInsertInst('insr', 'Insr', 'SimdAluOp', unsignedTypes,
3858 srcType = SrcRegType.Scalar)
3859 # INSR (SIMD&FP scalar)
3860 sveShiftAndInsertInst('insr', 'Insrf', 'SimdAluOp', unsignedTypes,
3861 srcType = SrcRegType.SimdFpScalar)
3862 # LASTA (scalar)
3863 lastaCode = '''
3864 last++;
3865 if (last >= eCount) {
3866 last = 0;
3867 }
3868 destElem = AA64FpOp1_x[last];'''
3869 sveSelectInst('lasta', 'Lasta', 'SimdAluOp', unsignedTypes, lastaCode,
3870 isCond = False)
3871 # LASTA (SIMD&FP scalar)
3872 sveSelectInst('lasta', 'Lastaf', 'SimdAluOp', unsignedTypes, lastaCode,
3873 isCond = False, destType = DstRegType.SimdFpScalar)
3874 # LASTB (scalar)
3875 lastbCode = '''
3876 if (last < 0) {
3877 last = eCount - 1;
3878 }
3879 destElem = AA64FpOp1_x[last];'''
3880 sveSelectInst('lastb', 'Lastb', 'SimdAluOp', unsignedTypes, lastbCode,
3881 isCond = False)
3882 # LASTB (SIMD&FP scalar)
3883 sveSelectInst('lastb', 'Lastbf', 'SimdAluOp', unsignedTypes, lastbCode,
3884 isCond = False, destType = DstRegType.SimdFpScalar)
3885 # LSL (immediate, predicated)
3886 lslCode = '''
3887 if (srcElem2 == 0) {
3888 destElem = srcElem1;
3889 } else if (srcElem2 >= sizeof(Element) * 8) {
3890 destElem = 0;
3891 } else {
3892 destElem = srcElem1 << srcElem2;
3893 }
3894 '''
3895 sveBinImmInst('lsl', 'LslImmPred', 'SimdAluOp', unsignedTypes, lslCode,
3896 PredType.MERGE)
3897 # LSL (immediate, unpredicated)
3898 sveBinImmInst('lsl', 'LslImmUnpred', 'SimdAluOp', unsignedTypes, lslCode)
3899 # LSL (vectors)
3900 sveBinInst('lsl', 'LslPred', 'SimdAluOp', unsignedTypes, lslCode,
3901 PredType.MERGE, True)
3902 # LSL (wide elements, predicated)
3903 sveShiftByWideElemsInst('lsl', 'LslWidePred', 'SimdAluOp', unsignedTypes,
3904 lslCode, PredType.MERGE)
3905 # LSL (wide elements, unpredicated)
3906 sveShiftByWideElemsInst('lsl', 'LslWideUnpred', 'SimdAluOp', unsignedTypes,
3907 lslCode)
3908 # LSLR
3909 lslrCode = '''
3910 if (srcElem1 == 0) {
3911 destElem = srcElem2;
3912 } else if (srcElem1 >= sizeof(Element) * 8) {
3913 destElem = 0;
3914 } else {
3915 destElem = srcElem2 << srcElem1;
3916 }
3917 '''
3918 sveBinInst('lslr', 'Lslr', 'SimdAluOp', unsignedTypes, lslrCode,
3919 PredType.MERGE, True)
3920 # LSR (immediate, predicated)
3921 lsrCode = '''
3922 if (srcElem2 >= sizeof(Element) * 8) {
3923 destElem = 0;
3924 } else {
3925 destElem = srcElem1 >> srcElem2;
3926 }
3927 '''
3928 sveBinImmInst('lsr', 'LsrImmPred', 'SimdAluOp', unsignedTypes, lsrCode,
3929 PredType.MERGE)
3930 # LSR (immediate, unpredicated)
3931 sveBinImmInst('lsr', 'LsrImmUnpred', 'SimdAluOp', unsignedTypes, lsrCode)
3932 # LSR (vectors)
3933 sveBinInst('lsr', 'LsrPred', 'SimdAluOp', unsignedTypes, lsrCode,
3934 PredType.MERGE, True)
3935 # LSR (wide elements, predicated)
3936 sveShiftByWideElemsInst('lsr', 'LsrWidePred', 'SimdAluOp', unsignedTypes,
3937 lsrCode, PredType.MERGE)
3938 # LSR (wide elements, unpredicated)
3939 sveShiftByWideElemsInst('lsr', 'LsrWideUnpred', 'SimdAluOp', unsignedTypes,
3940 lsrCode)
3941 # LSRR
3942 lsrrCode = '''
3943 if (srcElem1 >= sizeof(Element) * 8) {
3944 destElem = 0;
3945 } else {
3946 destElem = srcElem2 >> srcElem1;
3947 }
3948 '''
3949 sveBinInst('lsrr', 'Lsrr', 'SimdAluOp', unsignedTypes, lsrrCode,
3950 PredType.MERGE, True)
3951 # MAD
3952 madCode = 'destElem = srcElem1 + destElem * srcElem2;'
3953 sveTerInst('mad', 'Mad', 'SimdMultAccOp', signedTypes, madCode)
3954 # MLA
3955 mlaCode = 'destElem += srcElem1 * srcElem2;'
3956 sveTerInst('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode)
3957 # MLS
3958 mlsCode = 'destElem -= srcElem1 * srcElem2;'
3959 sveTerInst('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode)
3960 # MOVPRFX (predicated)
3961 movCode = 'destElem = srcElem1;'
3962 sveUnaryInst('movprfx', 'MovprfxPredM', 'SimdMiscOp', unsignedTypes,
3963 movCode, PredType.MERGE)
3964 sveUnaryInst('movprfx', 'MovprfxPredZ', 'SimdMiscOp', unsignedTypes,
3965 movCode, PredType.ZERO)
3966 # MOVPRFX (unpredicated)
3967 sveUnaryInst('movprfx', 'MovprfxUnpred', 'SimdMiscOp', ('uint64_t',),
3968 movCode)
3969 # MSB
3970 msbCode = 'destElem = srcElem1 - destElem * srcElem2;'
3971 sveTerInst('msb', 'Msb', 'SimdMultAccOp', signedTypes, msbCode)
3972 # MUL (immediate)
3973 mulCode = 'destElem = srcElem1 * srcElem2;'
3974 sveWideImmInst('mul', 'MulImm', 'SimdMultOp', unsignedTypes, mulCode)
3975 # MUL (vectors)
3976 sveBinInst('mul', 'Mul', 'SimdMultOp', unsignedTypes, mulCode,
3977 PredType.MERGE, True)
3978 # NAND, NANDS
3979 nandCode = 'destElem = !(srcElem1 & srcElem2);';
3980 svePredLogicalInst('nand', 'PredNand', 'SimdPredAluOp', ('uint8_t',),
3981 nandCode)
3982 svePredLogicalInst('nands', 'PredNands', 'SimdPredAluOp', ('uint8_t',),
3983 nandCode, isFlagSetting=True)
3984 # NEG
3985 negCode = 'destElem = -srcElem1;'
3986 sveUnaryInst('neg', 'Neg', 'SimdAluOp', signedTypes, negCode,
3987 PredType.MERGE)
3988 # NOR, NORS
3989 norCode = 'destElem = !(srcElem1 | srcElem2);';
3990 svePredLogicalInst('nor', 'PredNor', 'SimdPredAluOp', ('uint8_t',),
3991 norCode)
3992 svePredLogicalInst('nors', 'PredNors', 'SimdPredAluOp', ('uint8_t',),
3993 norCode, isFlagSetting=True)
3994 # NOT (vector)
3995 notCode = 'destElem = ~srcElem1;'
3996 sveUnaryInst('not', 'Not', 'SimdAluOp', unsignedTypes, notCode,
3997 PredType.MERGE)
3998 # ORN, ORNS (predicates)
3999 ornCode = 'destElem = srcElem1 | !srcElem2;';
4000 svePredLogicalInst('orn', 'PredOrn', 'SimdPredAluOp', ('uint8_t',),
4001 ornCode)
4002 svePredLogicalInst('orns', 'PredOrns', 'SimdPredAluOp', ('uint8_t',),
4003 ornCode, isFlagSetting=True)
4004 # ORR (immediate)
4005 orCode = 'destElem = srcElem1 | srcElem2;'
4006 sveWideImmInst('orr', 'OrrImm', 'SimdAluOp', ('uint64_t',), orCode)
4007 # ORR (vectors, predicated)
4008 sveBinInst('orr', 'OrrPred', 'SimdAluOp', unsignedTypes, orCode,
4009 PredType.MERGE, True)
4010 # ORR (vectors, unpredicated)
4011 orCode = 'destElem = srcElem1 | srcElem2;'
4012 sveBinInst('orr', 'OrrUnpred', 'SimdAluOp', ('uint64_t',), orCode)
4013 # ORR, ORRS (predicates)
4014 svePredLogicalInst('orr', 'PredOrr', 'SimdPredAluOp', ('uint8_t',), orCode)
4015 svePredLogicalInst('orrs', 'PredOrrs', 'SimdPredAluOp', ('uint8_t',),
4016 orCode, isFlagSetting=True)
4017 # ORV
4018 orvCode = 'destElem |= srcElem1;'
4019 sveAssocReducInst('orv', 'Orv', 'SimdReduceAluOp', unsignedTypes,
4020 orvCode, '0')
4021 # PFALSE
4022 pfalseCode = '''
4023 PDest_ub[0] = 0;
4024 destPred.reset();
4025 '''
4026 svePredUnaryWImplicitSrcInst('pfalse', 'Pfalse', 'SimdPredAluOp',
4027 pfalseCode)
4028 # PFIRST
4029 svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')
4030 # PNEXT
4031 svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
4032 # PTEST
4033 svePredTestInst('ptest', 'Ptest', 'SimdPredAluOp')
4034 # PTRUE
4035 svePtrueInst('ptrue', 'Ptrue', 'SimdPredAluOp', unsignedTypes, False)
4036 # PTRUES
4037 svePtrueInst('ptrues', 'Ptrues', 'SimdPredAluOp', unsignedTypes, True)
4038 # PUNPKHI
4039 sveUnpackInst('punpkhi', 'Punpkhi', 'SimdPredAluOp', unsignedWideSDTypes,
4040 unpackHalf = Unpack.High, regType = SrcRegType.Predicate)
4041 # PUNPKLO
4042 sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp', unsignedWideSDTypes,
4043 unpackHalf = Unpack.Low, regType = SrcRegType.Predicate)
4044 # RBIT
4045 rbitCode = '''
4046 destElem = reverseBits(srcElem1);'''
4047 sveUnaryInst('rbit', 'Rbit', 'SimdAluOp', unsignedTypes, rbitCode,
4048 predType=PredType.MERGE, srcRegType=SrcRegType.Vector)
4049 # RDFFR (unpredicated)
4050 rdffrUnpredCode = '''
4051 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4052 xc->tcBase());
4053 for (unsigned i = 0; i < eCount; i++) {
4054 PDest_ub[i] = Ffr_ub[i];
4055 }'''
4056 svePredUnaryWImplicitSrcInst('rdffr', 'RdffrUnpred', 'SimdPredAluOp',
4057 rdffrUnpredCode)
4058 # RDFFR, RDFFRS (predicated)
4059 rdffrPredCode = '''
4060 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4061 xc->tcBase());
4062 for (unsigned i = 0; i < eCount; i++) {
4063 if (GpOp_ub[i]) {
4064 PDest_ub[i] = Ffr_ub[i];
4065 } else {
4066 PDest_ub[i] = false;
4067 }
4068 }'''
4069 svePredUnaryWImplicitSrcInst('rdffr', 'RdffrPred', 'SimdPredAluOp',
4070 rdffrPredCode, PredType.ZERO, False)
4071 svePredUnaryWImplicitSrcInst('rdffrs', 'RdffrsPred', 'SimdPredAluOp',
4072 rdffrPredCode, PredType.ZERO, True)
4073 # RDVL
4074 rdvlCode = sveEnabledCheckCode + '''
4075 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4076 xc->tcBase());
4077 XDest = eCount * (int64_t) imm;
4078 '''
4079 rdvlIop = InstObjParams('rdvl', 'SveRdvl', 'RegImmOp', rdvlCode, [])
4080 header_output += RegImmOpDeclare.subst(rdvlIop)
4081 decoder_output += RegImmOpConstructor.subst(rdvlIop)
4082 exec_output += BasicExecute.subst(rdvlIop)
4083 # REV (predicate)
4084 sveReverseElementsInst('rev', 'Revp', 'SimdPredAluOp', unsignedTypes,
4085 srcType = SrcRegType.Predicate)
4086 # REV (vector)
4087 sveReverseElementsInst('rev', 'Revv', 'SimdAluOp', unsignedTypes,
4088 srcType = SrcRegType.Vector)
4089 # REVB
4090 revCode = '''
4091 %(revtype)s* srcPtr = reinterpret_cast<%(revtype)s*>(&srcElem1);
4092 %(revtype)s* dstPtr = reinterpret_cast<%(revtype)s*>(&destElem);
4093 uint8_t subelements = sizeof(Element) / sizeof(%(revtype)s);
4094 for(int i = 0; i < subelements; ++i) {
4095 dstPtr[subelements - i - 1] = srcPtr[i];
4096 }'''
4097 sveUnaryInst('revb', 'Revb', 'SimdAluOp',
4098 ['uint16_t', 'uint32_t', 'uint64_t'],
4099 revCode % {'revtype' : 'uint8_t'}, predType=PredType.MERGE,
4100 srcRegType=SrcRegType.Vector, decoder='Generic')
4101 # REVH
4102 sveUnaryInst('revh', 'Revh', 'SimdAluOp', ['uint32_t', 'uint64_t'],
4103 revCode % {'revtype' : 'uint16_t'}, predType=PredType.MERGE,
4104 srcRegType=SrcRegType.Vector, decoder='Generic')
4105 # REVW
4106 sveUnaryInst('revw', 'Revw', 'SimdAluOp', ['uint64_t'],
4107 revCode % {'revtype' : 'uint32_t'}, predType=PredType.MERGE,
4108 srcRegType=SrcRegType.Vector, decoder='Generic')
4109 # SABD
4110 abdCode = '''
4111 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
4112 (srcElem2 - srcElem1);
4113 '''
4114 sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode,
4115 PredType.MERGE, True)
4116 # SADDV
4117 addvCode = 'destElem += srcElem1;'
4118 sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
4119 ['int8_t, int64_t', 'int16_t, int64_t', 'int32_t, int64_t'],
4120 addvCode, '0')
4121 # SCVTF
4122 scvtfCode = fpOp % ('fplibFixedToFP<DElement>('
4123 'sext<sizeof(SElement) * 8>(srcElem1), 0,'
4124 ' false, FPCRRounding(fpscr), fpscr)')
4125 sveCvtInst('scvtf', 'ScvtfNarrow', 'SimdCvtOp',
4126 ('uint16_t, uint16_t',
4127 'uint32_t, uint16_t',
4128 'uint64_t, uint16_t',
4129 'uint32_t, uint32_t',
4130 'uint64_t, uint32_t',
4131 'uint64_t, uint64_t'),
4132 scvtfCode, CvtDir.Narrow)
4133 sveCvtInst('scvtf', 'ScvtfWiden', 'SimdCvtOp', ('uint32_t, uint64_t',),
4134 scvtfCode, CvtDir.Widen)
4135 # SDIV
4136 sdivCode = '''
4137 constexpr Element ELEM_MIN = std::numeric_limits<Element>::min();
4138 destElem = (srcElem2 == 0) ? 0 :
4139 (srcElem2 == -1 && srcElem1 == ELEM_MIN) ? ELEM_MIN :
4140 (srcElem1 / srcElem2);
4141 '''
4142 sveBinInst('sdiv', 'Sdiv', 'SimdDivOp', signedTypes, sdivCode,
4143 PredType.MERGE, True)
4144 # SDIVR
4145 sdivrCode = '''
4146 constexpr Element ELEM_MIN = std::numeric_limits<Element>::min();
4147 destElem = (srcElem1 == 0) ? 0 :
4148 (srcElem1 == -1 && srcElem2 == ELEM_MIN) ? ELEM_MIN :
4149 (srcElem2 / srcElem1);
4150 '''
4151 sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode,
4152 PredType.MERGE, True)
4153 # SDOT (indexed)
4154 sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t',
4155 'int16_t, int64_t'], isIndexed = True)
4156 # SDOT (vectors)
4157 sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t',
4158 'int16_t, int64_t'], isIndexed = False)
4159 # SEL (predicates)
4160 selCode = 'destElem = srcElem1;'
4161 svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',),
4162 selCode, PredType.SELECT)
4163 # SEL (vectors)
4164 sveBinInst('sel', 'Sel', 'SimdAluOp', unsignedTypes, selCode,
4165 PredType.SELECT, False)
4166 # SETFFR
4167 setffrCode = '''
4168 Ffr_ub[0] = true;
4169 destPred.set();'''
4170 svePredWriteFfrInst('setffr', 'Setffr', 'SimdPredAluOp', setffrCode, True)
4171 # SMAX (immediate)
4172 maxCode = 'destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;'
4173 sveWideImmInst('smax', 'SmaxImm', 'SimdCmpOp', signedTypes, maxCode)
4174 # SMAX (vectors)
4175 sveBinInst('smax', 'Smax', 'SimdCmpOp', signedTypes, maxCode,
4176 PredType.MERGE, True)
4177 # SMAXV
4178 maxvCode = '''
4179 if (srcElem1 > destElem)
4180 destElem = srcElem1;
4181 '''
4182 sveAssocReducInst('smaxv', 'Smaxv', 'SimdReduceCmpOp', signedTypes,
4183 maxvCode, 'std::numeric_limits<Element>::min()')
4184 # SMIN (immediate)
4185 minCode = 'destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;'
4186 sveWideImmInst('smin', 'SminImm', 'SimdCmpOp', signedTypes, minCode)
4187 # SMIN (vectors)
4188 sveBinInst('smin', 'Smin', 'SimdCmpOp', signedTypes, minCode,
4189 PredType.MERGE, True)
4190 # SMINV
4191 minvCode = '''
4192 if (srcElem1 < destElem)
4193 destElem = srcElem1;
4194 '''
4195 sveAssocReducInst('sminv', 'Sminv', 'SimdReduceCmpOp', signedTypes,
4196 minvCode, 'std::numeric_limits<Element>::max()')
4197 # SMULH
4198 exec_output += '''
4199 template <class T>
4200 T do_mulh(T srcElem1, T srcElem2)
4201 {
4202 return ((int64_t)srcElem1 * (int64_t)srcElem2) >> sizeof(T) * 8;
4203 }
4204
4205 int64_t do_mulh(int64_t srcElem1, int64_t srcElem2)
4206 {
4207 uint64_t x = (uint64_t) llabs(srcElem1);
4208 uint64_t y = (uint64_t) llabs(srcElem2);
4209
4210 uint64_t a = x >> 32;
4211 uint64_t b = x & 0xFFFFFFFF;
4212 uint64_t c = y >> 32;
4213 uint64_t d = y & 0xFFFFFFFF;
4214
4215 uint64_t hi = a * c;
4216 uint64_t lo = b * d;
4217
4218 hi += (a * d) >> 32;
4219 uint64_t tmp = lo;
4220 lo += ((a * d) & 0xFFFFFFFF) << 32;
4221 if (lo < tmp)
4222 hi++;
4223
4224 hi += (b * c) >> 32;
4225 tmp = lo;
4226 lo += ((b * c) & 0xFFFFFFFF) << 32;
4227 if (lo < tmp)
4228 hi++;
4229
4230 uint64_t destElem = hi;
4231 if ((srcElem1 < 0) ^ (srcElem2 < 0)) {
4232 uint64_t tmp = lo = ~lo;
4233 destElem = ~hi;
4234 if (++lo < tmp)
4235 destElem++;
4236 }
4237
4238 return destElem;
4239 }
4240
4241 uint64_t do_mulh(uint64_t srcElem1, uint64_t srcElem2)
4242 {
4243 uint64_t x = srcElem1;
4244 uint64_t y = srcElem2;
4245
4246 uint64_t a = x >> 32;
4247 uint64_t b = x & 0xFFFFFFFF;
4248 uint64_t c = y >> 32;
4249 uint64_t d = y & 0xFFFFFFFF;
4250
4251 uint64_t hi = a * c;
4252 uint64_t lo = b * d;
4253
4254 hi += (a * d) >> 32;
4255 uint64_t tmp = lo;
4256 lo += ((a * d) & 0xFFFFFFFF) << 32;
4257 if (lo < tmp)
4258 hi++;
4259
4260 hi += (b * c) >> 32;
4261 tmp = lo;
4262 lo += ((b * c) & 0xFFFFFFFF) << 32;
4263 if (lo < tmp)
4264 hi++;
4265
4266 return hi;
4267 }'''
4268 mulhCode = '''
4269 destElem = do_mulh(srcElem1, srcElem2);'''
4270 sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode,
4271 PredType.MERGE, True)
4272 # SPLICE
4273 sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes)
4274 # SQADD (immediate)
4275 sqaddCode = '''
4276 destElem = srcElem1 + srcElem2;
4277 bool negDest = (destElem < 0);
4278 bool negSrc1 = (srcElem1 < 0);
4279 bool negSrc2 = (srcElem2 < 0);
4280 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1// Copyright (c) 2017-2019 ARM Limited
2// All rights reserved
3//
4// The license below extends only to copyright in the software and shall
5// not be construed as granting a license to any other intellectual
6// property including but not limited to intellectual property relating
7// to a hardware implementation of the functionality of the software
8// licensed hereunder. You may use the software subject to the license
9// terms below provided that you ensure that this notice is replicated
10// unmodified and in its entirety in all distributions of the software,
11// modified or unmodified, in source code or in binary form.
12//
13// Redistribution and use in source and binary forms, with or without
14// modification, are permitted provided that the following conditions are
15// met: redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer;
17// redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution;
20// neither the name of the copyright holders nor the names of its
21// contributors may be used to endorse or promote products derived from
22// this software without specific prior written permission.
23//
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
36// Authors: Giacomo Gabrielli
37
38// @file Definition of SVE instructions.
39
40output header {{
41
42 // Decodes unary, constructive, predicated (merging) SVE instructions,
43 // handling signed and unsigned variants.
44 template <template <typename T> class BaseS,
45 template <typename T> class BaseU>
46 StaticInstPtr
47 decodeSveUnaryPred(unsigned size, unsigned u, ExtMachInst machInst,
48 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
49 {
50 switch (size) {
51 case 0:
52 if (u) {
53 return new BaseU<uint8_t>(machInst, dest, op1, gp);
54 } else {
55 return new BaseS<int8_t>(machInst, dest, op1, gp);
56 }
57 case 1:
58 if (u) {
59 return new BaseU<uint16_t>(machInst, dest, op1, gp);
60 } else {
61 return new BaseS<int16_t>(machInst, dest, op1, gp);
62 }
63 case 2:
64 if (u) {
65 return new BaseU<uint32_t>(machInst, dest, op1, gp);
66 } else {
67 return new BaseS<int32_t>(machInst, dest, op1, gp);
68 }
69 case 3:
70 if (u) {
71 return new BaseU<uint64_t>(machInst, dest, op1, gp);
72 } else {
73 return new BaseS<int64_t>(machInst, dest, op1, gp);
74 }
75 default:
76 return new Unknown64(machInst);
77 }
78 }
79
80 // Decodes SVE widening reductions.
81 // handling signed and unsigned variants.
82 template <template <typename T1, typename T2> class BaseS,
83 template <typename T1, typename T2> class BaseU>
84 StaticInstPtr
85 decodeSveWideningReduc(unsigned size, unsigned u, ExtMachInst machInst,
86 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
87 {
88 switch (size) {
89 case 0:
90 if (u) {
91 return new BaseU<uint8_t, uint64_t>(machInst, dest, op1, gp);
92 } else {
93 return new BaseS<int8_t, int64_t>(machInst, dest, op1, gp);
94 }
95 case 1:
96 if (u) {
97 return new BaseU<uint16_t, uint64_t>(machInst, dest, op1, gp);
98 } else {
99 return new BaseS<int16_t, int64_t>(machInst, dest, op1, gp);
100 }
101 case 2:
102 if (u) {
103 return new BaseU<uint32_t, uint64_t>(machInst, dest, op1, gp);
104 } else {
105 return new BaseS<int32_t, int64_t>(machInst, dest, op1, gp);
106 }
107 case 3:
108 assert(u);
109 return new BaseU<uint64_t, uint64_t>(machInst, dest, op1, gp);
110 default:
111 return new Unknown64(machInst);
112 }
113 }
114
115 // Decodes unary, constructive, predicated (merging) SVE instructions,
116 // handling signed variants only.
117 template <template <typename T> class Base>
118 StaticInstPtr
119 decodeSveUnaryPredS(unsigned size, ExtMachInst machInst,
120 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
121 {
122 switch (size) {
123 case 0:
124 return new Base<int8_t>(machInst, dest, op1, gp);
125 case 1:
126 return new Base<int16_t>(machInst, dest, op1, gp);
127 case 2:
128 return new Base<int32_t>(machInst, dest, op1, gp);
129 case 3:
130 return new Base<int64_t>(machInst, dest, op1, gp);
131 default:
132 return new Unknown64(machInst);
133 }
134 }
135
136 // Decodes unary, constructive, predicated (merging) SVE instructions,
137 // handling unsigned variants only.
138 template <template <typename T> class Base>
139 StaticInstPtr
140 decodeSveUnaryPredU(unsigned size, ExtMachInst machInst,
141 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
142 {
143 switch (size) {
144 case 0:
145 return new Base<uint8_t>(machInst, dest, op1, gp);
146 case 1:
147 return new Base<uint16_t>(machInst, dest, op1, gp);
148 case 2:
149 return new Base<uint32_t>(machInst, dest, op1, gp);
150 case 3:
151 return new Base<uint64_t>(machInst, dest, op1, gp);
152 default:
153 return new Unknown64(machInst);
154 }
155 }
156
157 // Decodes unary, constructive, predicated (merging) SVE instructions,
158 // handling signed and unsigned variants, for small element sizes (8- to
159 // 32-bit).
160 template <template <typename T> class BaseS,
161 template <typename T> class BaseU>
162 StaticInstPtr
163 decodeSveUnaryPredSmall(unsigned size, unsigned u, ExtMachInst machInst,
164 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
165 {
166 switch (size) {
167 case 0:
168 if (u) {
169 return new BaseU<uint8_t>(machInst, dest, op1, gp);
170 } else {
171 return new BaseS<int8_t>(machInst, dest, op1, gp);
172 }
173 case 1:
174 if (u) {
175 return new BaseU<uint16_t>(machInst, dest, op1, gp);
176 } else {
177 return new BaseS<int16_t>(machInst, dest, op1, gp);
178 }
179 case 2:
180 if (u) {
181 return new BaseU<uint32_t>(machInst, dest, op1, gp);
182 } else {
183 return new BaseS<int32_t>(machInst, dest, op1, gp);
184 }
185 default:
186 return new Unknown64(machInst);
187 }
188 }
189
190 // Decodes unary, constructive, predicated (merging) SVE instructions,
191 // handling floating point variants only.
192 template <template <typename T> class Base>
193 StaticInstPtr
194 decodeSveUnaryPredF(unsigned size, ExtMachInst machInst,
195 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
196 {
197 switch (size) {
198 case 1:
199 return new Base<uint16_t>(machInst, dest, op1, gp);
200 case 2:
201 return new Base<uint32_t>(machInst, dest, op1, gp);
202 case 3:
203 return new Base<uint64_t>(machInst, dest, op1, gp);
204 default:
205 return new Unknown64(machInst);
206 }
207 }
208
209 // Decodes unary, constructive, unpredicated SVE instructions, handling
210 // unsigned variants only.
211 template <template <typename T> class Base>
212 StaticInstPtr
213 decodeSveUnaryUnpredU(unsigned size, ExtMachInst machInst,
214 IntRegIndex dest, IntRegIndex op1)
215 {
216 switch (size) {
217 case 0:
218 return new Base<uint8_t>(machInst, dest, op1);
219 case 1:
220 return new Base<uint16_t>(machInst, dest, op1);
221 case 2:
222 return new Base<uint32_t>(machInst, dest, op1);
223 case 3:
224 return new Base<uint64_t>(machInst, dest, op1);
225 default:
226 return new Unknown64(machInst);
227 }
228 }
229
230 // Decodes unary, constructive, unpredicated SVE instructions, handling
231 // floating-point variants only.
232 template <template <typename T> class Base>
233 StaticInstPtr
234 decodeSveUnaryUnpredF(unsigned size, ExtMachInst machInst,
235 IntRegIndex dest, IntRegIndex op1)
236 {
237 switch (size) {
238 case 1:
239 return new Base<uint16_t>(machInst, dest, op1);
240 case 2:
241 return new Base<uint32_t>(machInst, dest, op1);
242 case 3:
243 return new Base<uint64_t>(machInst, dest, op1);
244 default:
245 return new Unknown64(machInst);
246 }
247 }
248
249 // Decodes binary, destructive, predicated (merging) SVE instructions,
250 // handling signed and unsigned variants.
251 template <template <typename T> class BaseS,
252 template <typename T> class BaseU>
253 StaticInstPtr
254 decodeSveBinDestrPred(unsigned size, unsigned u, ExtMachInst machInst,
255 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
256 {
257 switch (size) {
258 case 0:
259 if (u) {
260 return new BaseU<uint8_t>(machInst, dest, op2, gp);
261 } else {
262 return new BaseS<int8_t>(machInst, dest, op2, gp);
263 }
264 case 1:
265 if (u) {
266 return new BaseU<uint16_t>(machInst, dest, op2, gp);
267 } else {
268 return new BaseS<int16_t>(machInst, dest, op2, gp);
269 }
270 case 2:
271 if (u) {
272 return new BaseU<uint32_t>(machInst, dest, op2, gp);
273 } else {
274 return new BaseS<int32_t>(machInst, dest, op2, gp);
275 }
276 case 3:
277 if (u) {
278 return new BaseU<uint64_t>(machInst, dest, op2, gp);
279 } else {
280 return new BaseS<int64_t>(machInst, dest, op2, gp);
281 }
282 default:
283 return new Unknown64(machInst);
284 }
285 }
286
287 // Decodes binary with immediate operand, constructive, unpredicated
288 // SVE instructions, handling signed variants only.
289 template <template <typename T> class Base>
290 StaticInstPtr
291 decodeSveBinImmUnpredS(unsigned size, ExtMachInst machInst,
292 IntRegIndex dest, IntRegIndex op1, unsigned immediate)
293 {
294 switch (size) {
295 case 0:
296 return new Base<int8_t>(machInst, dest, op1, immediate);
297 case 1:
298 return new Base<int16_t>(machInst, dest, op1, immediate);
299 case 2:
300 return new Base<int32_t>(machInst, dest, op1, immediate);
301 case 3:
302 return new Base<int64_t>(machInst, dest, op1, immediate);
303 default:
304 return new Unknown64(machInst);
305 }
306 }
307
308
309 // Decodes binary with immediate operand, constructive, unpredicated
310 // SVE instructions, handling unsigned variants only.
311 template <template <typename T> class Base>
312 StaticInstPtr
313 decodeSveBinImmUnpredU(unsigned size, ExtMachInst machInst,
314 IntRegIndex dest, IntRegIndex op1, unsigned immediate)
315 {
316 switch (size) {
317 case 0:
318 return new Base<uint8_t>(machInst, dest, op1, immediate);
319 case 1:
320 return new Base<uint16_t>(machInst, dest, op1, immediate);
321 case 2:
322 return new Base<uint32_t>(machInst, dest, op1, immediate);
323 case 3:
324 return new Base<uint64_t>(machInst, dest, op1, immediate);
325 default:
326 return new Unknown64(machInst);
327 }
328 }
329
330 // Decodes binary with immediate operand, destructive, predicated (merging)
331 // SVE instructions, handling unsigned variants only.
332 template <template <typename T> class Base>
333 StaticInstPtr
334 decodeSveBinImmPredU(unsigned size, ExtMachInst machInst, IntRegIndex dest,
335 unsigned immediate, IntRegIndex gp)
336 {
337 switch (size) {
338 case 0:
339 return new Base<uint8_t>(machInst, dest, immediate, gp);
340 case 1:
341 return new Base<uint16_t>(machInst, dest, immediate, gp);
342 case 2:
343 return new Base<uint32_t>(machInst, dest, immediate, gp);
344 case 3:
345 return new Base<uint64_t>(machInst, dest, immediate, gp);
346 default:
347 return new Unknown64(machInst);
348 }
349 }
350
351 // Decodes binary with immediate operand, destructive, predicated (merging)
352 // SVE instructions, handling signed variants only.
353 template <template <typename T> class Base>
354 StaticInstPtr
355 decodeSveBinImmPredS(unsigned size, ExtMachInst machInst, IntRegIndex dest,
356 unsigned immediate, IntRegIndex gp)
357 {
358 switch (size) {
359 case 0:
360 return new Base<int8_t>(machInst, dest, immediate, gp);
361 case 1:
362 return new Base<int16_t>(machInst, dest, immediate, gp);
363 case 2:
364 return new Base<int32_t>(machInst, dest, immediate, gp);
365 case 3:
366 return new Base<int64_t>(machInst, dest, immediate, gp);
367 default:
368 return new Unknown64(machInst);
369 }
370 }
371
372 // Decodes binary with immediate operand, destructive, predicated (merging)
373 // SVE instructions, handling floating-point variants only.
374 template <template <typename T> class Base>
375 StaticInstPtr
376 decodeSveBinImmPredF(unsigned size, ExtMachInst machInst, IntRegIndex dest,
377 uint64_t immediate, IntRegIndex gp)
378 {
379 switch (size) {
380 case 1:
381 return new Base<uint16_t>(machInst, dest, immediate, gp);
382 case 2:
383 return new Base<uint32_t>(machInst, dest, immediate, gp);
384 case 3:
385 return new Base<uint64_t>(machInst, dest, immediate, gp);
386 default:
387 return new Unknown64(machInst);
388 }
389 }
390
391 // Decodes unary/binary with wide immediate operand, destructive,
392 // unpredicated SVE instructions, handling unsigned variants only.
393 template <template <typename T> class Base>
394 StaticInstPtr
395 decodeSveWideImmUnpredU(unsigned size, ExtMachInst machInst,
396 IntRegIndex dest, uint64_t immediate)
397 {
398 switch (size) {
399 case 0:
400 return new Base<uint8_t>(machInst, dest, immediate);
401 case 1:
402 return new Base<uint16_t>(machInst, dest, immediate);
403 case 2:
404 return new Base<uint32_t>(machInst, dest, immediate);
405 case 3:
406 return new Base<uint64_t>(machInst, dest, immediate);
407 default:
408 return new Unknown64(machInst);
409 }
410 }
411
412 // Decodes unary/binary with wide immediate operand, destructive,
413 // unpredicated SVE instructions, handling signed variants only.
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeSveWideImmUnpredS(unsigned size, ExtMachInst machInst,
417 IntRegIndex dest, uint64_t immediate)
418 {
419 switch (size) {
420 case 0:
421 return new Base<int8_t>(machInst, dest, immediate);
422 case 1:
423 return new Base<int16_t>(machInst, dest, immediate);
424 case 2:
425 return new Base<int32_t>(machInst, dest, immediate);
426 case 3:
427 return new Base<int64_t>(machInst, dest, immediate);
428 default:
429 return new Unknown64(machInst);
430 }
431 }
432
433 // Decodes unary/binary with wide immediate operand, destructive,
434 // unpredicated SVE instructions, handling floating-point variants only.
435 template <template <typename T> class Base>
436 StaticInstPtr
437 decodeSveWideImmUnpredF(unsigned size, ExtMachInst machInst,
438 IntRegIndex dest, uint64_t immediate)
439 {
440 switch (size) {
441 case 1:
442 return new Base<uint16_t>(machInst, dest, immediate);
443 case 2:
444 return new Base<uint32_t>(machInst, dest, immediate);
445 case 3:
446 return new Base<uint64_t>(machInst, dest, immediate);
447 default:
448 return new Unknown64(machInst);
449 }
450 }
451
452 // Decodes unary/binary with wide immediate operand, destructive,
453 // predicated SVE instructions, handling unsigned variants only.
454 template <template <typename T> class Base>
455 StaticInstPtr
456 decodeSveWideImmPredU(unsigned size, ExtMachInst machInst,
457 IntRegIndex dest, uint64_t immediate, IntRegIndex gp,
458 bool isMerging = true)
459 {
460 switch (size) {
461 case 0:
462 return new Base<uint8_t>(machInst, dest, immediate, gp,
463 isMerging);
464 case 1:
465 return new Base<uint16_t>(machInst, dest, immediate, gp,
466 isMerging);
467 case 2:
468 return new Base<uint32_t>(machInst, dest, immediate, gp,
469 isMerging);
470 case 3:
471 return new Base<uint64_t>(machInst, dest, immediate, gp,
472 isMerging);
473 default:
474 return new Unknown64(machInst);
475 }
476 }
477
478 // Decodes unary/binary with wide immediate operand, destructive,
479 // predicated SVE instructions, handling floating-point variants only.
480 template <template <typename T> class Base>
481 StaticInstPtr
482 decodeSveWideImmPredF(unsigned size, ExtMachInst machInst,
483 IntRegIndex dest, uint64_t immediate, IntRegIndex gp)
484 {
485 switch (size) {
486 case 1:
487 return new Base<uint16_t>(machInst, dest, immediate, gp);
488 case 2:
489 return new Base<uint32_t>(machInst, dest, immediate, gp);
490 case 3:
491 return new Base<uint64_t>(machInst, dest, immediate, gp);
492 default:
493 return new Unknown64(machInst);
494 }
495 }
496
497 // Decodes binary, destructive, predicated (merging) SVE instructions,
498 // handling unsigned variants only.
499 template <template <typename T> class Base>
500 StaticInstPtr
501 decodeSveBinDestrPredU(unsigned size, ExtMachInst machInst,
502 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
503 {
504 switch (size) {
505 case 0:
506 return new Base<uint8_t>(machInst, dest, op2, gp);
507 case 1:
508 return new Base<uint16_t>(machInst, dest, op2, gp);
509 case 2:
510 return new Base<uint32_t>(machInst, dest, op2, gp);
511 case 3:
512 return new Base<uint64_t>(machInst, dest, op2, gp);
513 default:
514 return new Unknown64(machInst);
515 }
516 }
517
518 // Decodes binary, destructive, predicated (merging) SVE instructions,
519 // handling signed variants only.
520 template <template <typename T> class Base>
521 StaticInstPtr
522 decodeSveBinDestrPredS(unsigned size, ExtMachInst machInst,
523 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
524 {
525 switch (size) {
526 case 0:
527 return new Base<int8_t>(machInst, dest, op2, gp);
528 case 1:
529 return new Base<int16_t>(machInst, dest, op2, gp);
530 case 2:
531 return new Base<int32_t>(machInst, dest, op2, gp);
532 case 3:
533 return new Base<int64_t>(machInst, dest, op2, gp);
534 default:
535 return new Unknown64(machInst);
536 }
537 }
538
539 // Decodes binary, destructive, predicated (merging) SVE instructions,
540 // handling floating-point variants only.
541 template <template <typename T> class Base>
542 StaticInstPtr
543 decodeSveBinDestrPredF(unsigned size, ExtMachInst machInst,
544 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
545 {
546 switch (size) {
547 case 1:
548 return new Base<uint16_t>(machInst, dest, op2, gp);
549 case 2:
550 return new Base<uint32_t>(machInst, dest, op2, gp);
551 case 3:
552 return new Base<uint64_t>(machInst, dest, op2, gp);
553 default:
554 return new Unknown64(machInst);
555 }
556 }
557
558 // Decodes binary, constructive, predicated SVE instructions, handling
559 // unsigned variants only.
560 template <template <typename T> class Base>
561 StaticInstPtr
562 decodeSveBinConstrPredU(unsigned size, ExtMachInst machInst,
563 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
564 IntRegIndex gp, SvePredType predType)
565 {
566 switch (size) {
567 case 0:
568 return new Base<uint8_t>(machInst, dest, op1, op2, gp, predType);
569 case 1:
570 return new Base<uint16_t>(machInst, dest, op1, op2, gp, predType);
571 case 2:
572 return new Base<uint32_t>(machInst, dest, op1, op2, gp, predType);
573 case 3:
574 return new Base<uint64_t>(machInst, dest, op1, op2, gp, predType);
575 default:
576 return new Unknown64(machInst);
577 }
578 }
579
580 // Decodes binary, constructive, unpredicated SVE instructions.
581 template <template <typename T> class Base>
582 StaticInstPtr
583 decodeSveBinUnpred(unsigned size, unsigned u, ExtMachInst machInst,
584 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
585 {
586 switch (size) {
587 case 0:
588 if (u) {
589 return new Base<uint8_t>(machInst, dest, op1, op2);
590 } else {
591 return new Base<int8_t>(machInst, dest, op1, op2);
592 }
593 case 1:
594 if (u) {
595 return new Base<uint16_t>(machInst, dest, op1, op2);
596 } else {
597 return new Base<int16_t>(machInst, dest, op1, op2);
598 }
599 case 2:
600 if (u) {
601 return new Base<uint32_t>(machInst, dest, op1, op2);
602 } else {
603 return new Base<int32_t>(machInst, dest, op1, op2);
604 }
605 case 3:
606 if (u) {
607 return new Base<uint64_t>(machInst, dest, op1, op2);
608 } else {
609 return new Base<int64_t>(machInst, dest, op1, op2);
610 }
611 default:
612 return new Unknown64(machInst);
613 }
614 }
615
616 // Decodes binary, constructive, unpredicated SVE instructions.
617 // Unsigned instructions only.
618 template <template <typename T> class Base>
619 StaticInstPtr
620 decodeSveBinUnpredU(unsigned size, ExtMachInst machInst, IntRegIndex dest,
621 IntRegIndex op1, IntRegIndex op2)
622 {
623 switch (size) {
624 case 0:
625 return new Base<uint8_t>(machInst, dest, op1, op2);
626 case 1:
627 return new Base<uint16_t>(machInst, dest, op1, op2);
628 case 2:
629 return new Base<uint32_t>(machInst, dest, op1, op2);
630 case 3:
631 return new Base<uint64_t>(machInst, dest, op1, op2);
632 default:
633 return new Unknown64(machInst);
634 }
635 }
636
637 // Decodes binary, constructive, unpredicated SVE instructions.
638 // Signed instructions only.
639 template <template <typename T> class Base>
640 StaticInstPtr
641 decodeSveBinUnpredS(unsigned size, ExtMachInst machInst, IntRegIndex dest,
642 IntRegIndex op1, IntRegIndex op2)
643 {
644 switch (size) {
645 case 0:
646 return new Base<int8_t>(machInst, dest, op1, op2);
647 case 1:
648 return new Base<int16_t>(machInst, dest, op1, op2);
649 case 2:
650 return new Base<int32_t>(machInst, dest, op1, op2);
651 case 3:
652 return new Base<int64_t>(machInst, dest, op1, op2);
653 default:
654 return new Unknown64(machInst);
655 }
656 }
657
658 // Decodes binary, costructive, unpredicated SVE instructions, handling
659 // floating-point variants only.
660 template <template <typename T> class Base>
661 StaticInstPtr
662 decodeSveBinUnpredF(unsigned size, ExtMachInst machInst, IntRegIndex dest,
663 IntRegIndex op1, IntRegIndex op2)
664 {
665 switch (size) {
666 case 1:
667 return new Base<uint16_t>(machInst, dest, op1, op2);
668 case 2:
669 return new Base<uint32_t>(machInst, dest, op1, op2);
670 case 3:
671 return new Base<uint64_t>(machInst, dest, op1, op2);
672 default:
673 return new Unknown64(machInst);
674 }
675 }
676
677 // Decodes SVE compare instructions - binary, predicated (zeroing),
678 // generating a predicate - handling floating-point variants only.
679 template <template <typename T> class Base>
680 StaticInstPtr
681 decodeSveCmpF(unsigned size, ExtMachInst machInst,
682 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
683 IntRegIndex gp)
684 {
685 switch (size) {
686 case 1:
687 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
688 case 2:
689 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
690 case 3:
691 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
692 default:
693 return new Unknown64(machInst);
694 }
695 }
696
697 // Decodes SVE compare-with-immediate instructions - binary, predicated
698 // (zeroing), generating a predicate - handling floating-point variants
699 // only.
700 template <template <typename T> class Base>
701 StaticInstPtr
702 decodeSveCmpImmF(unsigned size, ExtMachInst machInst,
703 IntRegIndex dest, IntRegIndex op1, uint64_t imm,
704 IntRegIndex gp)
705 {
706 switch (size) {
707 case 1:
708 return new Base<uint16_t>(machInst, dest, op1, imm, gp);
709 case 2:
710 return new Base<uint32_t>(machInst, dest, op1, imm, gp);
711 case 3:
712 return new Base<uint64_t>(machInst, dest, op1, imm, gp);
713 default:
714 return new Unknown64(machInst);
715 }
716 }
717
718 // Decodes ternary, destructive, predicated (merging) SVE instructions.
719 template <template <typename T> class Base>
720 StaticInstPtr
721 decodeSveTerPred(unsigned size, unsigned u, ExtMachInst machInst,
722 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
723 IntRegIndex gp)
724 {
725 switch (size) {
726 case 0:
727 if (u) {
728 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
729 } else {
730 return new Base<int8_t>(machInst, dest, op1, op2, gp);
731 }
732 case 1:
733 if (u) {
734 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
735 } else {
736 return new Base<int16_t>(machInst, dest, op1, op2, gp);
737 }
738 case 2:
739 if (u) {
740 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
741 } else {
742 return new Base<int32_t>(machInst, dest, op1, op2, gp);
743 }
744 case 3:
745 if (u) {
746 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
747 } else {
748 return new Base<int64_t>(machInst, dest, op1, op2, gp);
749 }
750 default:
751 return new Unknown64(machInst);
752 }
753 }
754
755 // Decodes ternary, destructive, predicated (merging) SVE instructions,
756 // handling wide signed variants only. XXX: zeroing for CMP instructions.
757 template <template <typename T> class Base>
758 StaticInstPtr
759 decodeSveTerPredWS(unsigned size, ExtMachInst machInst,
760 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
761 IntRegIndex gp)
762 {
763 switch (size) {
764 case 0:
765 return new Base<int8_t>(machInst, dest, op1, op2, gp);
766 case 1:
767 return new Base<int16_t>(machInst, dest, op1, op2, gp);
768 case 2:
769 return new Base<int32_t>(machInst, dest, op1, op2, gp);
770 default:
771 return new Unknown64(machInst);
772 }
773 }
774
775 // Decodes ternary, destructive, predicated (merging) SVE instructions,
776 // handling wide unsigned variants only. XXX: zeroing for CMP instructions.
777 template <template <typename T> class Base>
778 StaticInstPtr
779 decodeSveTerPredWU(unsigned size, ExtMachInst machInst,
780 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
781 IntRegIndex gp)
782 {
783 switch (size) {
784 case 0:
785 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
786 case 1:
787 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
788 case 2:
789 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
790 default:
791 return new Unknown64(machInst);
792 }
793 }
794
795 // Decodes ternary, destructive, predicated (merging) SVE instructions,
796 // handling signed variants only. XXX: zeroing for CMP instructions.
797 template <template <typename T> class Base>
798 StaticInstPtr
799 decodeSveTerPredS(unsigned size, ExtMachInst machInst,
800 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
801 IntRegIndex gp)
802 {
803 switch (size) {
804 case 0:
805 return new Base<int8_t>(machInst, dest, op1, op2, gp);
806 case 1:
807 return new Base<int16_t>(machInst, dest, op1, op2, gp);
808 case 2:
809 return new Base<int32_t>(machInst, dest, op1, op2, gp);
810 case 3:
811 return new Base<int64_t>(machInst, dest, op1, op2, gp);
812 default:
813 return new Unknown64(machInst);
814 }
815 }
816
817 // Decodes ternary, destructive, predicated (merging) SVE instructions,
818 // handling unsigned variants only. XXX: zeroing for CMP instructions.
819 template <template <typename T> class Base>
820 StaticInstPtr
821 decodeSveTerPredU(unsigned size, ExtMachInst machInst,
822 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
823 IntRegIndex gp)
824 {
825 switch (size) {
826 case 0:
827 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
828 case 1:
829 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
830 case 2:
831 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
832 case 3:
833 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
834 default:
835 return new Unknown64(machInst);
836 }
837 }
838
839 // Decodes SVE signed unary extension instructions (8-bit source element
840 // size)
841 template <template <typename TS, typename TD> class Base>
842 StaticInstPtr
843 decodeSveUnaryExtendFromBPredS(unsigned dsize, ExtMachInst machInst,
844 IntRegIndex dest, IntRegIndex op1,
845 IntRegIndex gp)
846 {
847 switch (dsize) {
848 case 1:
849 return new Base<int8_t, int16_t>(machInst, dest, op1, gp);
850 case 2:
851 return new Base<int8_t, int32_t>(machInst, dest, op1, gp);
852 case 3:
853 return new Base<int8_t, int64_t>(machInst, dest, op1, gp);
854 }
855 return new Unknown64(machInst);
856 }
857
858 // Decodes SVE unsigned unary extension instructions (8-bit source element
859 // size)
860 template <template <typename TS, typename TD> class Base>
861 StaticInstPtr
862 decodeSveUnaryExtendFromBPredU(unsigned dsize, ExtMachInst machInst,
863 IntRegIndex dest, IntRegIndex op1,
864 IntRegIndex gp)
865 {
866 switch (dsize) {
867 case 1:
868 return new Base<uint8_t, uint16_t>(machInst, dest, op1, gp);
869 case 2:
870 return new Base<uint8_t, uint32_t>(machInst, dest, op1, gp);
871 case 3:
872 return new Base<uint8_t, uint64_t>(machInst, dest, op1, gp);
873 }
874 return new Unknown64(machInst);
875 }
876
877 // Decodes SVE signed unary extension instructions (16-bit source element
878 // size)
879 template <template <typename TS, typename TD> class Base>
880 StaticInstPtr
881 decodeSveUnaryExtendFromHPredS(unsigned dsize, ExtMachInst machInst,
882 IntRegIndex dest, IntRegIndex op1,
883 IntRegIndex gp)
884 {
885 switch (dsize) {
886 case 2:
887 return new Base<int16_t, int32_t>(machInst, dest, op1, gp);
888 case 3:
889 return new Base<int16_t, int64_t>(machInst, dest, op1, gp);
890 }
891 return new Unknown64(machInst);
892 }
893
894 // Decodes SVE unsigned unary extension instructions (16-bit source element
895 // size)
896 template <template <typename TS, typename TD> class Base>
897 StaticInstPtr
898 decodeSveUnaryExtendFromHPredU(unsigned dsize, ExtMachInst machInst,
899 IntRegIndex dest, IntRegIndex op1,
900 IntRegIndex gp)
901 {
902 switch (dsize) {
903 case 2:
904 return new Base<uint16_t, uint32_t>(machInst, dest, op1, gp);
905 case 3:
906 return new Base<uint16_t, uint64_t>(machInst, dest, op1, gp);
907 }
908 return new Unknown64(machInst);
909 }
910
911 // Decodes ternary, destructive, predicated (merging) SVE instructions,
912 // handling floating-point variants only.
913 template <template <typename T> class Base>
914 StaticInstPtr
915 decodeSveTerPredF(unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
917 IntRegIndex gp)
918 {
919 switch (size) {
920 case 1:
921 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
922 case 2:
923 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
924 case 3:
925 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
926 default:
927 return new Unknown64(machInst);
928 }
929 }
930
931 // Decodes ternary with immediate operand, destructive, unpredicated SVE
932 // instructions handling floating-point variants only.
933 template <template <typename T> class Base>
934 StaticInstPtr
935 decodeSveTerImmUnpredF(unsigned size, ExtMachInst machInst,
936 IntRegIndex dest, IntRegIndex op2, uint8_t imm)
937 {
938 switch (size) {
939 case 1:
940 return new Base<uint16_t>(machInst, dest, op2, imm);
941 case 2:
942 return new Base<uint32_t>(machInst, dest, op2, imm);
943 case 3:
944 return new Base<uint64_t>(machInst, dest, op2, imm);
945 default:
946 return new Unknown64(machInst);
947 }
948 }
949
950 // Decodes SVE PTRUE(S) instructions.
951 template <template <typename T> class Base>
952 StaticInstPtr
953 decodeSvePtrue(unsigned size, ExtMachInst machInst,
954 IntRegIndex dest, uint8_t imm)
955 {
956 switch (size) {
957 case 0:
958 return new Base<uint8_t>(machInst, dest, imm);
959 case 1:
960 return new Base<uint16_t>(machInst, dest, imm);
961 case 2:
962 return new Base<uint32_t>(machInst, dest, imm);
963 case 3:
964 return new Base<uint64_t>(machInst, dest, imm);
965 default:
966 return new Unknown64(machInst);
967 }
968 }
969
970 // Decodes SVE predicate count instructions, scalar signed variant only
971 template <template <typename T> class Base>
972 StaticInstPtr
973 decodeSvePredCountS(unsigned size, ExtMachInst machInst,
974 IntRegIndex dest, IntRegIndex op1)
975 {
976 switch (size) {
977 case 0:
978 return new Base<int8_t>(machInst, dest, op1);
979 case 1:
980 return new Base<int16_t>(machInst, dest, op1);
981 case 2:
982 return new Base<int32_t>(machInst, dest, op1);
983 case 3:
984 return new Base<int64_t>(machInst, dest, op1);
985 default:
986 return new Unknown64(machInst);
987 }
988 }
989
990 // Decodes SVE predicate count instructions, scalar unsigned variant only
991 template <template <typename T> class Base>
992 StaticInstPtr
993 decodeSvePredCountU(unsigned size, ExtMachInst machInst,
994 IntRegIndex dest, IntRegIndex op1)
995 {
996 switch (size) {
997 case 0:
998 return new Base<uint8_t>(machInst, dest, op1);
999 case 1:
1000 return new Base<uint16_t>(machInst, dest, op1);
1001 case 2:
1002 return new Base<uint32_t>(machInst, dest, op1);
1003 case 3:
1004 return new Base<uint64_t>(machInst, dest, op1);
1005 default:
1006 return new Unknown64(machInst);
1007 }
1008 }
1009
1010 // Decodes SVE predicate count instructions, vector signed variant only
1011 template <template <typename T> class Base>
1012 StaticInstPtr
1013 decodeSvePredCountVS(unsigned size, ExtMachInst machInst,
1014 IntRegIndex dest, IntRegIndex op1)
1015 {
1016 switch (size) {
1017 case 1:
1018 return new Base<int16_t>(machInst, dest, op1);
1019 case 2:
1020 return new Base<int32_t>(machInst, dest, op1);
1021 case 3:
1022 return new Base<int64_t>(machInst, dest, op1);
1023 default:
1024 return new Unknown64(machInst);
1025 }
1026 }
1027
1028 // Decodes SVE predicate count instructions, vector unsigned variant only
1029 template <template <typename T> class Base>
1030 StaticInstPtr
1031 decodeSvePredCountVU(unsigned size, ExtMachInst machInst,
1032 IntRegIndex dest, IntRegIndex op1)
1033 {
1034 switch (size) {
1035 case 1:
1036 return new Base<uint16_t>(machInst, dest, op1);
1037 case 2:
1038 return new Base<uint32_t>(machInst, dest, op1);
1039 case 3:
1040 return new Base<uint64_t>(machInst, dest, op1);
1041 default:
1042 return new Unknown64(machInst);
1043 }
1044 }
1045
1046 // Decodes ternary with immediate operand, predicated SVE
1047 // instructions handling unsigned variants only.
1048 template <template <typename T> class Base>
1049 StaticInstPtr
1050 decodeSveTerImmPredU(unsigned size, ExtMachInst machInst,
1051 IntRegIndex dest, IntRegIndex op1, int64_t imm, IntRegIndex gp)
1052 {
1053 switch (size) {
1054 case 0:
1055 return new Base<uint8_t>(machInst, dest, op1, imm, gp);
1056 case 1:
1057 return new Base<uint16_t>(machInst, dest, op1, imm, gp);
1058 case 2:
1059 return new Base<uint32_t>(machInst, dest, op1, imm, gp);
1060 case 3:
1061 return new Base<uint64_t>(machInst, dest, op1, imm, gp);
1062 default:
1063 return new Unknown64(machInst);
1064 }
1065 }
1066
1067 // Decodes ternary with immediate operand, predicated SVE
1068 // instructions handling signed variants only.
1069 template <template <typename T> class Base>
1070 StaticInstPtr
1071 decodeSveTerImmPredS(unsigned size, ExtMachInst machInst,
1072 IntRegIndex dest, IntRegIndex op1, int64_t imm, IntRegIndex gp)
1073 {
1074 switch (size) {
1075 case 0:
1076 return new Base<int8_t>(machInst, dest, op1, imm, gp);
1077 case 1:
1078 return new Base<int16_t>(machInst, dest, op1, imm, gp);
1079 case 2:
1080 return new Base<int32_t>(machInst, dest, op1, imm, gp);
1081 case 3:
1082 return new Base<int64_t>(machInst, dest, op1, imm, gp);
1083 default:
1084 return new Unknown64(machInst);
1085 }
1086 }
1087
1088 // Decodes integer element count SVE instructions, handling
1089 // signed variants only.
1090 template <template <typename T> class Base>
1091 StaticInstPtr
1092 decodeSveElemIntCountS(unsigned size, ExtMachInst machInst,
1093 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1094 {
1095 switch (size) {
1096 case 0:
1097 return new Base<int8_t>(machInst, dest, pattern, imm4);
1098 case 1:
1099 return new Base<int16_t>(machInst, dest, pattern, imm4);
1100 case 2:
1101 return new Base<int32_t>(machInst, dest, pattern, imm4);
1102 case 3:
1103 return new Base<int64_t>(machInst, dest, pattern, imm4);
1104 default:
1105 return new Unknown64(machInst);
1106 }
1107 }
1108
1109 // Decodes integer element count SVE instructions, handling
1110 // unsigned variants only.
1111 template <template <typename T> class Base>
1112 StaticInstPtr
1113 decodeSveElemIntCountU(unsigned size, ExtMachInst machInst,
1114 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1115 {
1116 switch (size) {
1117 case 0:
1118 return new Base<uint8_t>(machInst, dest, pattern, imm4);
1119 case 1:
1120 return new Base<uint16_t>(machInst, dest, pattern, imm4);
1121 case 2:
1122 return new Base<uint32_t>(machInst, dest, pattern, imm4);
1123 case 3:
1124 return new Base<uint64_t>(machInst, dest, pattern, imm4);
1125 default:
1126 return new Unknown64(machInst);
1127 }
1128 }
1129
1130 // Decodes integer element count SVE instructions, handling
1131 // signed variants from 16 to 64 bits only.
1132 template <template <typename T> class Base>
1133 StaticInstPtr
1134 decodeSveElemIntCountLS(unsigned size, ExtMachInst machInst,
1135 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1136 {
1137 switch (size) {
1138 case 1:
1139 return new Base<int16_t>(machInst, dest, pattern, imm4);
1140 case 2:
1141 return new Base<int32_t>(machInst, dest, pattern, imm4);
1142 case 3:
1143 return new Base<int64_t>(machInst, dest, pattern, imm4);
1144 default:
1145 return new Unknown64(machInst);
1146 }
1147 }
1148
1149 // Decodes integer element count SVE instructions, handling
1150 // unsigned variants from 16 to 64 bits only.
1151 template <template <typename T> class Base>
1152 StaticInstPtr
1153 decodeSveElemIntCountLU(unsigned size, ExtMachInst machInst,
1154 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1155 {
1156 switch (size) {
1157 case 1:
1158 return new Base<uint16_t>(machInst, dest, pattern, imm4);
1159 case 2:
1160 return new Base<uint32_t>(machInst, dest, pattern, imm4);
1161 case 3:
1162 return new Base<uint64_t>(machInst, dest, pattern, imm4);
1163 default:
1164 return new Unknown64(machInst);
1165 }
1166 }
1167
1168 // Decodes SVE unpack instructions. Handling signed variants.
1169 template <template <typename T1, typename T2> class Base>
1170 StaticInstPtr
1171 decodeSveUnpackS(unsigned size, ExtMachInst machInst,
1172 IntRegIndex dest, IntRegIndex op1)
1173 {
1174 switch (size) {
1175 case 1:
1176 return new Base<int8_t, int16_t>(machInst, dest, op1);
1177 case 2:
1178 return new Base<int16_t, int32_t>(machInst, dest, op1);
1179 case 3:
1180 return new Base<int32_t, int64_t>(machInst, dest, op1);
1181 default:
1182 return new Unknown64(machInst);
1183 }
1184 }
1185
1186 // Decodes SVE unpack instructions. Handling unsigned variants.
1187 template <template <typename T1, typename T2> class Base>
1188 StaticInstPtr
1189 decodeSveUnpackU(unsigned size, ExtMachInst machInst,
1190 IntRegIndex dest, IntRegIndex op1)
1191 {
1192 switch (size) {
1193 case 1:
1194 return new Base<uint8_t, uint16_t>(machInst, dest, op1);
1195 case 2:
1196 return new Base<uint16_t, uint32_t>(machInst, dest, op1);
1197 case 3:
1198 return new Base<uint32_t, uint64_t>(machInst, dest, op1);
1199 default:
1200 return new Unknown64(machInst);
1201 }
1202 }
1203}};
1204
1205let {{
1206
1207 header_output = ''
1208 exec_output = ''
1209 decoders = { 'Generic': {} }
1210
1211 class PredType:
1212 NONE = 0
1213 MERGE = 1
1214 ZERO = 2
1215 SELECT = 3
1216
1217 class CvtDir:
1218 Narrow = 0
1219 Widen = 1
1220
1221 class IndexFormat(object):
1222 ImmImm = 'II'
1223 ImmReg = 'IR'
1224 RegImm = 'RI'
1225 RegReg = 'RR'
1226
1227 class SrcRegType(object):
1228 Vector = 0
1229 Scalar = 1
1230 SimdFpScalar = 2
1231 Predicate = 3
1232
1233 class DstRegType(object):
1234 Vector = 0
1235 Scalar = 1
1236 SimdFpScalar = 2
1237 Predicate = 3
1238
1239 class DestType(object):
1240 Scalar = 'false'
1241 Vector = 'true'
1242
1243 class SrcSize(object):
1244 Src32bit = 'true'
1245 Src64bit = 'false'
1246
1247 class Break(object):
1248 Before = 0
1249 After = 1
1250
1251 class Unpack(object):
1252 High = 0
1253 Low = 1
1254
1255 # Generates definitions for SVE ADR instructions
1256 def sveAdrInst(name, Name, opClass, types, op):
1257 global header_output, exec_output, decoders
1258 code = sveEnabledCheckCode + '''
1259 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1260 xc->tcBase());
1261 for (unsigned i = 0; i < eCount; i++) {
1262 const Element& srcElem1 = AA64FpOp1_x[i];
1263 Element srcElem2 = AA64FpOp2_x[i];
1264 Element destElem = 0;
1265 %(op)s
1266 AA64FpDest_x[i] = destElem;
1267 }''' % {'op': op}
1268 iop = InstObjParams(name, 'Sve' + Name, 'SveAdrOp',
1269 {'code': code, 'op_class': opClass}, [])
1270 header_output += SveAdrOpDeclare.subst(iop)
1271 exec_output += SveOpExecute.subst(iop)
1272 for type in types:
1273 substDict = {'targs' : type,
1274 'class_name' : 'Sve' + Name}
1275 exec_output += SveOpExecDeclare.subst(substDict)
1276
1277 # Generates definition for SVE while predicate generation instructions
1278 def sveWhileInst(name, Name, opClass, types, op,
1279 srcSize = SrcSize.Src64bit):
1280 global header_output, exec_output, decoders
1281 extraPrologCode = '''
1282 auto& destPred = PDest;'''
1283 if 'int32_t' in types:
1284 srcType = 'int64_t' if srcSize == SrcSize.Src64bit else 'int32_t'
1285 else:
1286 srcType = 'uint64_t' if srcSize == SrcSize.Src64bit else 'uint32_t'
1287 code = sveEnabledCheckCode + '''
1288 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1289 xc->tcBase());
1290 %(stype)s srcElem1 = static_cast<%(stype)s>(XOp1);
1291 %(stype)s srcElem2 = static_cast<%(stype)s>(XOp2);
1292 bool cond, first = false, none = true, last = true;
1293 destPred.reset();
1294 for (unsigned i = 0; i < eCount; i++) {
1295 %(op)s;
1296 last = last && cond;
1297 none = none && !cond;
1298 first = first || (i == 0 && cond);
1299 PDest_x[i] = last;
1300 srcElem1++;
1301 }
1302 CondCodesNZ = (first << 1) | none;
1303 CondCodesC = !last;
1304 CondCodesV = false;
1305 '''%{'op': op, 'stype': srcType}
1306 iop = InstObjParams(name, 'Sve' + Name, 'SveWhileOp',
1307 {'code': code, 'op_class': opClass, 'srcIs32b': srcSize}, [])
1308 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
1309 header_output += SveWhileOpDeclare.subst(iop)
1310 exec_output += SveOpExecute.subst(iop)
1311 for type in types:
1312 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1313 exec_output += SveOpExecDeclare.subst(substDict);
1314
1315 # Generate definition for SVE compare & terminate instructions
1316 def sveCompTermInst(name, Name, opClass, types, op):
1317 global header_output, exec_output, decoders
1318 code = sveEnabledCheckCode + '''
1319 bool destElem;
1320 Element srcElem1 = static_cast<Element>(XOp1);
1321 Element srcElem2 = static_cast<Element>(XOp2);
1322 %(op)s;
1323 if (destElem) {
1324 CondCodesNZ = CondCodesNZ | 0x2;
1325 CondCodesV = 0;
1326 } else {
1327 CondCodesNZ = CondCodesNZ & ~0x2;
1328 CondCodesV = !CondCodesC;
1329 }
1330 ''' % {'op': op}
1331 iop = InstObjParams(name, 'Sve' + Name, 'SveCompTermOp',
1332 {'code': code, 'op_class': opClass}, [])
1333 header_output += SveCompTermOpDeclare.subst(iop)
1334 exec_output += SveOpExecute.subst(iop)
1335 for type in types:
1336 substDict = {'targs' : type, 'class_name': 'Sve' + Name}
1337 exec_output += SveOpExecDeclare.subst(substDict);
1338
1339 # Generates definition for SVE predicate count instructions
1340 def svePredCountInst(name, Name, opClass, types, op,
1341 destType=DestType.Vector,
1342 srcSize=SrcSize.Src64bit):
1343 global header_output, exec_output, decoders
1344 assert not (destType == DestType.Vector and
1345 srcSize != SrcSize.Src64bit)
1346 code = sveEnabledCheckCode + '''
1347 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1348 xc->tcBase());
1349 int count = 0;
1350 for (unsigned i = 0; i < eCount; i++) {
1351 if (GpOp_x[i]) {
1352 count++;
1353 }
1354 }'''
1355 if destType == DestType.Vector:
1356 code += '''
1357 for (unsigned i = 0; i < eCount; i++) {
1358 Element destElem = 0;
1359 const Element& srcElem = AA64FpDestMerge_x[i];
1360 %(op)s
1361 AA64FpDest_x[i] = destElem;
1362 }''' % {'op': op}
1363 else:
1364 code += '''
1365 %(op)s''' % {'op': op}
1366 iop = InstObjParams(name, 'Sve' + Name, 'SvePredCountOp',
1367 {'code': code, 'op_class': opClass, 'srcIs32b': srcSize,
1368 'destIsVec': destType}, [])
1369 header_output += SvePredCountOpDeclare.subst(iop)
1370 exec_output += SveOpExecute.subst(iop)
1371 for type in types:
1372 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1373 exec_output += SveOpExecDeclare.subst(substDict);
1374
1375 # Generates definition for SVE predicate count instructions (predicated)
1376 def svePredCountPredInst(name, Name, opClass, types):
1377 global header_output, exec_output, decoders
1378 code = sveEnabledCheckCode + '''
1379 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1380 xc->tcBase());
1381 int count = 0;
1382 for (unsigned i = 0; i < eCount; i++) {
1383 if (POp1_x[i] && GpOp_x[i]) {
1384 count++;
1385 }
1386 }
1387 XDest = count;
1388 '''
1389 iop = InstObjParams(name, 'Sve' + Name, 'SvePredCountPredOp',
1390 {'code': code, 'op_class': opClass}, [])
1391 header_output += SvePredCountPredOpDeclare.subst(iop)
1392 exec_output += SveOpExecute.subst(iop)
1393 for type in types:
1394 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1395 exec_output += SveOpExecDeclare.subst(substDict)
1396
1397 # Generates definition for SVE Index generation instructions
1398 def sveIndex(fmt):
1399 global header_output, exec_output, decoders
1400 code = sveEnabledCheckCode + '''
1401 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1402 xc->tcBase());'''
1403 if fmt == IndexFormat.ImmReg or fmt == IndexFormat.ImmImm:
1404 code += '''
1405 const Element& srcElem1 = imm1;'''
1406 if fmt == IndexFormat.RegImm or fmt == IndexFormat.RegReg:
1407 code += '''
1408 const Element& srcElem1 = XOp1;'''
1409 if fmt == IndexFormat.RegImm or fmt == IndexFormat.ImmImm:
1410 code += '''
1411 const Element& srcElem2 = imm2;'''
1412 if fmt == IndexFormat.ImmReg or fmt == IndexFormat.RegReg:
1413 code += '''
1414 const Element& srcElem2 = XOp2;'''
1415 code +='''
1416 for (unsigned i = 0; i < eCount; i++) {
1417 AA64FpDest_x[i] = srcElem1 + i * srcElem2;
1418 }'''
1419 iop = InstObjParams('index', 'SveIndex'+fmt, 'SveIndex'+fmt+'Op',
1420 {'code': code, 'op_class': 'SimdAluOp'})
1421 if fmt == IndexFormat.ImmImm:
1422 header_output += SveIndexIIOpDeclare.subst(iop)
1423 elif fmt == IndexFormat.ImmReg:
1424 header_output += SveIndexIROpDeclare.subst(iop)
1425 elif fmt == IndexFormat.RegImm:
1426 header_output += SveIndexRIOpDeclare.subst(iop)
1427 elif fmt == IndexFormat.RegReg:
1428 header_output += SveIndexRROpDeclare.subst(iop)
1429 exec_output += SveOpExecute.subst(iop)
1430 for type in ['int8_t', 'int16_t', 'int32_t', 'int64_t']:
1431 substDict = {'targs': type, 'class_name': 'SveIndex'+fmt}
1432 exec_output += SveOpExecDeclare.subst(substDict)
1433
1434 # Generates definitions for widening unary SVE instructions
1435 # (always constructive)
1436 def sveWidenUnaryInst(name, Name, opClass, types, op,
1437 predType=PredType.NONE, decoder='Generic'):
1438 global header_output, exec_output, decoders
1439 code = sveEnabledCheckCode + '''
1440 unsigned eCount = ArmStaticInst::getCurSveVecLen<DElement>(
1441 xc->tcBase());
1442 for (unsigned i = 0; i < eCount; i++) {
1443 SElement srcElem1 = AA64FpOp1_xd[i];
1444 DElement destElem = 0;'''
1445 if predType != PredType.NONE:
1446 code += '''
1447 if (GpOp_xd[i]) {
1448 %(op)s
1449 } else {
1450 destElem = %(dest_elem)s;
1451 }''' % {'op': op,
1452 'dest_elem': 'AA64FpDestMerge_xd[i]'
1453 if predType == PredType.MERGE
1454 else '0'}
1455 else:
1456 code += '''
1457 %(op)s''' % {'op': op}
1458 code += '''
1459 AA64FpDest_xd[i] = destElem;
1460 }'''
1461 iop = InstObjParams(name, 'Sve' + Name,
1462 'SveUnaryPredOp' if predType != PredType.NONE
1463 else 'SveUnaryUnpredOp',
1464 {'code': code, 'op_class': opClass}, [])
1465 if predType != PredType.NONE:
1466 header_output += SveWideningUnaryPredOpDeclare.subst(iop)
1467 else:
1468 header_output += SveWideningUnaryUnpredOpDeclare.subst(iop)
1469 exec_output += SveWideningOpExecute.subst(iop)
1470 for type in types:
1471 substDict = {'targs' : type,
1472 'class_name' : 'Sve' + Name}
1473 exec_output += SveOpExecDeclare.subst(substDict)
1474
1475 # Generates definitions for unary SVE instructions (always constructive)
1476 def sveUnaryInst(name, Name, opClass, types, op, predType=PredType.NONE,
1477 srcRegType=SrcRegType.Vector, decoder='Generic'):
1478 global header_output, exec_output, decoders
1479 op1 = ('AA64FpOp1_x[i]' if srcRegType == SrcRegType.Vector
1480 else 'XOp1' if srcRegType == SrcRegType.Scalar
1481 else 'AA64FpOp1_x[0]')
1482 code = sveEnabledCheckCode + '''
1483 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1484 xc->tcBase());
1485 for (unsigned i = 0; i < eCount; i++) {
1486 Element srcElem1 = %s;
1487 Element destElem = 0;''' % op1
1488 if predType != PredType.NONE:
1489 code += '''
1490 if (GpOp_x[i]) {
1491 %(op)s
1492 } else {
1493 destElem = %(dest_elem)s;
1494 }''' % {'op': op,
1495 'dest_elem': 'AA64FpDestMerge_x[i]'
1496 if predType == PredType.MERGE
1497 else '0'}
1498 else:
1499 code += '''
1500 %(op)s''' % {'op': op}
1501 code += '''
1502 AA64FpDest_x[i] = destElem;
1503 }'''
1504 iop = InstObjParams(name, 'Sve' + Name,
1505 'SveUnaryPredOp' if predType != PredType.NONE
1506 else 'SveUnaryUnpredOp',
1507 {'code': code, 'op_class': opClass}, [])
1508 if predType != PredType.NONE:
1509 header_output += SveUnaryPredOpDeclare.subst(iop)
1510 else:
1511 header_output += SveUnaryUnpredOpDeclare.subst(iop)
1512 exec_output += SveOpExecute.subst(iop)
1513 for type in types:
1514 substDict = {'targs' : type,
1515 'class_name' : 'Sve' + Name}
1516 exec_output += SveOpExecDeclare.subst(substDict)
1517
1518 # Generates definitions for SVE floating-point conversions (always
1519 # unary, constructive, merging
1520 def sveCvtInst(name, Name, opClass, types, op, direction=CvtDir.Narrow,
1521 decoder='Generic'):
1522 global header_output, exec_output, decoders
1523 code = sveEnabledCheckCode + '''
1524 unsigned eCount = ArmStaticInst::getCurSveVecLen<%(bigElemType)s>(
1525 xc->tcBase());
1526 for (unsigned i = 0; i < eCount; i++) {
1527 SElement srcElem1 = AA64FpOp1_x%(bigElemSuffix)s[i] &
1528 mask(sizeof(SElement) * 8);
1529 DElement destElem = 0;
1530 if (GpOp_x%(bigElemSuffix)s[i]) {
1531 %(op)s
1532 AA64FpDest_x%(bigElemSuffix)s[i] = destElem;
1533 } else {
1534 AA64FpDest_x%(bigElemSuffix)s[i] =
1535 AA64FpDestMerge_x%(bigElemSuffix)s[i];
1536 }
1537 }
1538 ''' % {'op': op,
1539 'bigElemType': 'SElement' if direction == CvtDir.Narrow
1540 else 'DElement',
1541 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'}
1542 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp',
1543 {'code': code, 'op_class': opClass}, [])
1544 header_output += SveWideningUnaryPredOpDeclare.subst(iop)
1545 exec_output += SveWideningOpExecute.subst(iop)
1546 for type in types:
1547 substDict = {'targs' : type,
1548 'class_name' : 'Sve' + Name}
1549 exec_output += SveOpExecDeclare.subst(substDict)
1550
1551 # Generates definitions for associative SVE reductions
1552 def sveAssocReducInst(name, Name, opClass, types, op, identity,
1553 decoder='Generic'):
1554 global header_output, exec_output, decoders
1555 code = sveEnabledCheckCode + '''
1556 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1557 xc->tcBase());
1558 TheISA::VecRegContainer tmpVecC;
1559 auto auxOp1 = tmpVecC.as<Element>();
1560 for (unsigned i = 0; i < eCount; ++i) {
1561 auxOp1[i] = AA64FpOp1_x[i];
1562 }
1563 Element destElem = %(identity)s;
1564 for (unsigned i = 0; i < eCount; i++) {
1565 AA64FpDest_x[i] = 0; // zero upper part
1566 if (GpOp_x[i]) {
1567 const Element& srcElem1 = auxOp1[i];
1568 %(op)s
1569 }
1570 }
1571 AA64FpDest_x[0] = destElem;
1572 ''' % {'op': op, 'identity': identity}
1573 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1574 {'code': code, 'op_class': opClass}, [])
1575 header_output += SveReducOpDeclare.subst(iop)
1576 exec_output += SveOpExecute.subst(iop)
1577 for type in types:
1578 substDict = {'targs' : type,
1579 'class_name' : 'Sve' + Name}
1580 exec_output += SveOpExecDeclare.subst(substDict)
1581
1582 # Generates definitions for widening associative SVE reductions
1583 def sveWideningAssocReducInst(name, Name, opClass, types, op, identity,
1584 decoder='Generic'):
1585 global header_output, exec_output, decoders
1586 code = sveEnabledCheckCode + '''
1587 unsigned eCount = ArmStaticInst::getCurSveVecLen<SElement>(
1588 xc->tcBase());
1589 unsigned eWideCount = ArmStaticInst::getCurSveVecLen<DElement>(
1590 xc->tcBase());
1591 DElement destElem = %(identity)s;
1592 for (unsigned i = 0; i < eCount; i++) {
1593 if (GpOp_xs[i]) {
1594 DElement srcElem1 = AA64FpOp1_xs[i];
1595 %(op)s
1596 }
1597 }
1598 AA64FpDest_xd[0] = destElem;
1599 for (int i = 1; i < eWideCount; i++) {
1600 AA64FpDest_xd[i] = 0;
1601 }
1602 ''' % {'op': op, 'identity': identity}
1603 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1604 {'code': code, 'op_class': opClass}, [])
1605 header_output += SveWideningReducOpDeclare.subst(iop)
1606 exec_output += SveWideningOpExecute.subst(iop)
1607 for type in types:
1608 substDict = {'targs' : type,
1609 'class_name' : 'Sve' + Name}
1610 exec_output += SveOpExecDeclare.subst(substDict)
1611
1612 # Generates definitions for non-associative SVE reductions
1613 def sveNonAssocReducInst(name, Name, opClass, types, op, identity,
1614 decoder='Generic'):
1615 global header_output, exec_output, decoders
1616 code = sveEnabledCheckCode + '''
1617 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1618 xc->tcBase());
1619 TheISA::VecRegContainer tmpVecC;
1620 auto tmpVec = tmpVecC.as<Element>();
1621 int ePow2Count = 1;
1622 while (ePow2Count < eCount) {
1623 ePow2Count *= 2;
1624 }
1625
1626 for (unsigned i = 0; i < ePow2Count; i++) {
1627 if (i < eCount && GpOp_x[i]) {
1628 tmpVec[i] = AA64FpOp1_x[i];
1629 } else {
1630 tmpVec[i] = %(identity)s;
1631 }
1632 }
1633
1634 unsigned n = ePow2Count;
1635 while (n > 1) {
1636 unsigned max = n;
1637 n = 0;
1638 for (unsigned i = 0; i < max; i += 2) {
1639 Element srcElem1 = tmpVec[i];
1640 Element srcElem2 = tmpVec[i + 1];
1641 Element destElem = 0;
1642 %(op)s
1643 tmpVec[n] = destElem;
1644 n++;
1645 }
1646 }
1647 AA64FpDest_x[0] = tmpVec[0];
1648 for (unsigned i = 1; i < eCount; i++) {
1649 AA64FpDest_x[i] = 0; // zero upper part
1650 }
1651 ''' % {'op': op, 'identity': identity}
1652 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1653 {'code': code, 'op_class': opClass}, [])
1654 header_output += SveReducOpDeclare.subst(iop)
1655 exec_output += SveOpExecute.subst(iop)
1656 for type in types:
1657 substDict = {'targs' : type,
1658 'class_name' : 'Sve' + Name}
1659 exec_output += SveOpExecDeclare.subst(substDict)
1660
1661 # Generates definitions for binary SVE instructions with immediate operand
1662 def sveBinImmInst(name, Name, opClass, types, op, predType=PredType.NONE,
1663 decoder='Generic'):
1664 global header_output, exec_output, decoders
1665 code = sveEnabledCheckCode + '''
1666 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1667 xc->tcBase());
1668 for (unsigned i = 0; i < eCount; i++) {'''
1669 if predType != PredType.NONE:
1670 code += '''
1671 const Element& srcElem1 = %s;''' % (
1672 'AA64FpDestMerge_x[i]' if predType == PredType.MERGE else '0')
1673 else:
1674 code += '''
1675 const Element& srcElem1 = AA64FpOp1_x[i];'''
1676 code += '''
1677 Element srcElem2 = imm;
1678 Element destElem = 0;'''
1679 if predType != PredType.NONE:
1680 code += '''
1681 if (GpOp_x[i]) {
1682 %(op)s
1683 } else {
1684 destElem = %(dest_elem)s;
1685 }''' % {'op': op,
1686 'dest_elem': 'AA64FpDestMerge_x[i]'
1687 if predType == PredType.MERGE else '0'}
1688 else:
1689 code += '''
1690 %(op)s''' % {'op': op}
1691 code += '''
1692 AA64FpDest_x[i] = destElem;
1693 }'''
1694 iop = InstObjParams(name, 'Sve' + Name,
1695 'SveBinImmPredOp' if predType != PredType.NONE
1696 else 'SveBinImmUnpredConstrOp',
1697 {'code': code, 'op_class': opClass}, [])
1698 if predType != PredType.NONE:
1699 header_output += SveBinImmPredOpDeclare.subst(iop)
1700 else:
1701 header_output += SveBinImmUnpredOpDeclare.subst(iop)
1702 exec_output += SveOpExecute.subst(iop)
1703 for type in types:
1704 substDict = {'targs' : type,
1705 'class_name' : 'Sve' + Name}
1706 exec_output += SveOpExecDeclare.subst(substDict)
1707
1708 # Generates definitions for unary and binary SVE instructions with wide
1709 # immediate operand
1710 def sveWideImmInst(name, Name, opClass, types, op, predType=PredType.NONE,
1711 isUnary=False, decoder='Generic'):
1712 global header_output, exec_output, decoders
1713 code = sveEnabledCheckCode + '''
1714 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1715 xc->tcBase());
1716 for (unsigned i = 0; i < eCount; i++) {'''
1717 # TODO: handle unsigned-to-signed conversion properly...
1718 if isUnary:
1719 code += '''
1720 Element srcElem1 = imm;'''
1721 else:
1722 code += '''
1723 const Element& srcElem1 = AA64FpDestMerge_x[i];
1724 Element srcElem2 = imm;'''
1725 code += '''
1726 Element destElem = 0;'''
1727 if predType != PredType.NONE:
1728 code += '''
1729 if (GpOp_x[i]) {
1730 %(op)s
1731 } else {
1732 destElem = %(dest_elem)s;
1733 }''' % {'op': op,
1734 'dest_elem': 'AA64FpDestMerge_x[i]'
1735 if predType == PredType.MERGE else '0'}
1736 else:
1737 code += '''
1738 %(op)s''' % {'op': op}
1739 code += '''
1740 AA64FpDest_x[i] = destElem;
1741 }'''
1742 iop = InstObjParams(name, 'Sve' + Name,
1743 'Sve%sWideImm%sOp' % (
1744 'Unary' if isUnary else 'Bin',
1745 'Unpred' if predType == PredType.NONE else 'Pred'),
1746 {'code': code, 'op_class': opClass}, [])
1747 if predType == PredType.NONE:
1748 header_output += SveWideImmUnpredOpDeclare.subst(iop)
1749 else:
1750 header_output += SveWideImmPredOpDeclare.subst(iop)
1751 exec_output += SveOpExecute.subst(iop)
1752 for type in types:
1753 substDict = {'targs' : type,
1754 'class_name' : 'Sve' + Name}
1755 exec_output += SveOpExecDeclare.subst(substDict)
1756
1757 # Generates definitions for shift SVE instructions with wide elements
1758 def sveShiftByWideElemsInst(name, Name, opClass, types, op,
1759 predType=PredType.NONE, decoder='Generic'):
1760 global header_output, exec_output, decoders
1761 code = sveEnabledCheckCode + '''
1762 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1763 xc->tcBase());
1764 TheISA::VecRegContainer tmpVecC;
1765 auto auxOp2 = tmpVecC.as<Element>();
1766 for (unsigned i = 0; i < eCount; i++) {
1767 auxOp2[i] = AA64FpOp2_ud[i];
1768 }
1769 for (unsigned i = 0; i < eCount; i++) {'''
1770 if predType != PredType.NONE:
1771 code += '''
1772 const Element& srcElem1 = AA64FpDestMerge_x[i];'''
1773 else:
1774 code += '''
1775 const Element& srcElem1 = AA64FpOp1_x[i];'''
1776 code += '''
1777 const auto& srcElem2 = auxOp2[
1778 (i * sizeof(Element) * 8) / 64];
1779 Element destElem = 0;'''
1780 if predType != PredType.NONE:
1781 code += '''
1782 if (GpOp_x[i]) {
1783 %(op)s
1784 } else {
1785 destElem = %(dest_elem)s;
1786 }''' % {'op': op,
1787 'dest_elem': 'AA64FpDestMerge_x[i]'
1788 if predType == PredType.MERGE else '0'}
1789 else:
1790 code += '''
1791 %(op)s''' % {'op': op}
1792 code += '''
1793 AA64FpDest_x[i] = destElem;
1794 }'''
1795 iop = InstObjParams(name, 'Sve' + Name,
1796 'SveBinDestrPredOp' if predType != PredType.NONE
1797 else 'SveBinUnpredOp',
1798 {'code': code, 'op_class': opClass}, [])
1799 if predType != PredType.NONE:
1800 header_output += SveBinDestrPredOpDeclare.subst(iop)
1801 else:
1802 header_output += SveBinUnpredOpDeclare.subst(iop)
1803 exec_output += SveOpExecute.subst(iop)
1804 for type in types:
1805 substDict = {'targs' : type,
1806 'class_name' : 'Sve' + Name}
1807 exec_output += SveOpExecDeclare.subst(substDict)
1808
1809 # Generates definitions for binary indexed SVE instructions
1810 # (always unpredicated)
1811 def sveBinIdxInst(name, Name, opClass, types, op, decoder='Generic'):
1812 global header_output, exec_output, decoders
1813 code = sveEnabledCheckCode + '''
1814 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1815 xc->tcBase());
1816
1817 // Number of elements in a 128 bit segment
1818 constexpr unsigned ePerSegment = 128 / sizeof(Element);
1819
1820 '''
1821
1822 code += '''
1823 for (unsigned i = 0; i < eCount; i++) {
1824 const auto segmentBase = i - i % ePerSegment;
1825 const auto segmentIdx = segmentBase + index;
1826
1827 const Element& srcElem1 = AA64FpOp1_x[i];
1828 const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
1829 Element destElem = 0;
1830
1831 '''
1832
1833 code += '''
1834 %(op)s
1835 AA64FpDest_x[i] = destElem;
1836 }
1837 ''' % {'op': op}
1838
1839 baseClass = 'SveBinIdxUnpredOp'
1840
1841 iop = InstObjParams(name, 'Sve' + Name, baseClass,
1842 {'code': code, 'op_class': opClass}, [])
1843 header_output += SveBinIdxUnpredOpDeclare.subst(iop)
1844 exec_output += SveOpExecute.subst(iop)
1845 for type in types:
1846 substDict = {'targs' : type,
1847 'class_name' : 'Sve' + Name}
1848 exec_output += SveOpExecDeclare.subst(substDict)
1849
1850 # Generates definitions for binary SVE instructions
1851 def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE,
1852 isDestructive=False, customIterCode=None,
1853 decoder='Generic'):
1854 assert not (predType in (PredType.NONE, PredType.SELECT) and
1855 isDestructive)
1856 global header_output, exec_output, decoders
1857 code = sveEnabledCheckCode + '''
1858 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1859 xc->tcBase());'''
1860 if customIterCode is None:
1861 code += '''
1862 for (unsigned i = 0; i < eCount; i++) {'''
1863 if predType == PredType.MERGE:
1864 code += '''
1865 const Element& srcElem1 = AA64FpDestMerge_x[i];'''
1866 else:
1867 code += '''
1868 const Element& srcElem1 = AA64FpOp1_x[i];'''
1869 code += '''
1870 const Element& srcElem2 = AA64FpOp2_x[i];
1871 Element destElem = 0;'''
1872 if predType != PredType.NONE:
1873 code += '''
1874 if (GpOp_x[i]) {
1875 %(op)s
1876 } else {
1877 destElem = %(dest_elem)s;
1878 }''' % {'op': op,
1879 'dest_elem':
1880 'AA64FpDestMerge_x[i]' if predType == PredType.MERGE
1881 else '0' if predType == PredType.ZERO
1882 else 'srcElem2'}
1883 else:
1884 code += '''
1885 %(op)s''' % {'op': op}
1886 code += '''
1887 AA64FpDest_x[i] = destElem;
1888 }'''
1889 else:
1890 code += customIterCode
1891 if predType == PredType.NONE:
1892 baseClass = 'SveBinUnpredOp'
1893 elif isDestructive:
1894 baseClass = 'SveBinDestrPredOp'
1895 else:
1896 baseClass = 'SveBinConstrPredOp'
1897 iop = InstObjParams(name, 'Sve' + Name, baseClass,
1898 {'code': code, 'op_class': opClass}, [])
1899 if predType == PredType.NONE:
1900 header_output += SveBinUnpredOpDeclare.subst(iop)
1901 elif isDestructive:
1902 header_output += SveBinDestrPredOpDeclare.subst(iop)
1903 else:
1904 header_output += SveBinConstrPredOpDeclare.subst(iop)
1905 exec_output += SveOpExecute.subst(iop)
1906 for type in types:
1907 substDict = {'targs' : type,
1908 'class_name' : 'Sve' + Name}
1909 exec_output += SveOpExecDeclare.subst(substDict)
1910
1911 # Generates definitions for predicate logical instructions
1912 def svePredLogicalInst(name, Name, opClass, types, op,
1913 predType=PredType.ZERO, isFlagSetting=False,
1914 decoder='Generic'):
1915 global header_output, exec_output, decoders
1916 assert predType in (PredType.ZERO, PredType.SELECT)
1917 code = sveEnabledCheckCode + '''
1918 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1919 xc->tcBase());
1920 TheISA::VecPredRegContainer tmpPredC;
1921 auto auxGpOp = tmpPredC.as<Element>();
1922 for (unsigned i = 0; i < eCount; i++) {
1923 auxGpOp[i] = GpOp_x[i];
1924 }
1925 for (unsigned i = 0; i < eCount; i++) {
1926 bool srcElem1 = POp1_x[i];
1927 bool srcElem2 = POp2_x[i];
1928 bool destElem = false;
1929 if (auxGpOp[i]) {
1930 %(op)s
1931 } else {
1932 destElem = %(dest_elem)s;
1933 }
1934 PDest_x[i] = destElem;
1935 }''' % {'op': op,
1936 'dest_elem': 'false' if predType == PredType.ZERO
1937 else 'srcElem2'}
1938 extraPrologCode = ''
1939 if isFlagSetting:
1940 code += '''
1941 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
1942 destPred.noneActive(auxGpOp, eCount);
1943 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
1944 CondCodesV = 0;'''
1945 extraPrologCode += '''
1946 auto& destPred = PDest;'''
1947 iop = InstObjParams(name, 'Sve' + Name, 'SvePredLogicalOp',
1948 {'code': code, 'op_class': opClass}, [])
1949 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
1950 header_output += SvePredLogicalOpDeclare.subst(iop)
1951 exec_output += SveOpExecute.subst(iop)
1952 for type in types:
1953 substDict = {'targs' : type,
1954 'class_name' : 'Sve' + Name}
1955 exec_output += SveOpExecDeclare.subst(substDict)
1956
1957 # Generates definitions for predicate permute instructions
1958 def svePredBinPermInst(name, Name, opClass, types, iterCode,
1959 decoder='Generic'):
1960 global header_output, exec_output, decoders
1961 code = sveEnabledCheckCode + '''
1962 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1963 xc->tcBase());'''
1964 code += iterCode
1965 iop = InstObjParams(name, 'Sve' + Name, 'SvePredBinPermOp',
1966 {'code': code, 'op_class': opClass}, [])
1967 header_output += SveBinUnpredOpDeclare.subst(iop)
1968 exec_output += SveOpExecute.subst(iop)
1969 for type in types:
1970 substDict = {'targs' : type,
1971 'class_name' : 'Sve' + Name}
1972 exec_output += SveOpExecDeclare.subst(substDict)
1973
1974 # Generates definitions for SVE compare instructions
1975 # NOTE: compares are all predicated zeroing
1976 def sveCmpInst(name, Name, opClass, types, op, isImm=False,
1977 decoder='Generic'):
1978 global header_output, exec_output, decoders
1979 extraPrologCode = '''
1980 auto& destPred = PDest;'''
1981 code = sveEnabledCheckCode + '''
1982 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1983 xc->tcBase());
1984 TheISA::VecPredRegContainer tmpPredC;
1985 auto tmpPred = tmpPredC.as<Element>();
1986 for (unsigned i = 0; i < eCount; ++i)
1987 tmpPred[i] = GpOp_x[i];
1988 destPred.reset();
1989 for (unsigned i = 0; i < eCount; i++) {
1990 const Element& srcElem1 = AA64FpOp1_x[i];
1991 %(src_elem_2_ty)s srcElem2 __attribute__((unused)) =
1992 %(src_elem_2)s;
1993 bool destElem = false;
1994 if (tmpPred[i]) {
1995 %(op)s
1996 } else {
1997 destElem = false;
1998 }
1999 PDest_x[i] = destElem;
2000 }''' % {'op': op,
2001 'src_elem_2_ty': 'Element' if isImm else 'const Element&',
2002 'src_elem_2': 'imm' if isImm else 'AA64FpOp2_x[i]'}
2003 iop = InstObjParams(name, 'Sve' + Name,
2004 'SveCmpImmOp' if isImm else 'SveCmpOp',
2005 {'code': code, 'op_class': opClass}, [])
2006 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2007 if isImm:
2008 header_output += SveCmpImmOpDeclare.subst(iop)
2009 else:
2010 header_output += SveCmpOpDeclare.subst(iop)
2011 exec_output += SveOpExecute.subst(iop)
2012 for type in types:
2013 substDict = {'targs' : type,
2014 'class_name' : 'Sve' + Name}
2015 exec_output += SveOpExecDeclare.subst(substDict)
2016
2017 # Generates definitions for ternary SVE intructions (always predicated -
2018 # merging)
2019 def sveTerInst(name, Name, opClass, types, op, decoder='Generic'):
2020 global header_output, exec_output, decoders
2021 code = sveEnabledCheckCode + '''
2022 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2023 xc->tcBase());
2024 for (unsigned i = 0; i < eCount; i++) {
2025 const Element& srcElem1 = AA64FpOp1_x[i];
2026 const Element& srcElem2 = AA64FpOp2_x[i];
2027 Element destElem = AA64FpDestMerge_x[i];
2028 if (GpOp_x[i]) {
2029 %(op)s
2030 }
2031 AA64FpDest_x[i] = destElem;
2032 }''' % {'op': op}
2033 iop = InstObjParams(name, 'Sve' + Name, 'SveTerPredOp',
2034 {'code': code, 'op_class': opClass}, [])
2035 header_output += SveTerPredOpDeclare.subst(iop)
2036 exec_output += SveOpExecute.subst(iop)
2037 for type in types:
2038 substDict = {'targs' : type,
2039 'class_name' : 'Sve' + Name}
2040 exec_output += SveOpExecDeclare.subst(substDict)
2041
2042 # Generates definitions for ternary SVE instructions with indexed operand
2043 def sveTerIdxInst(name, Name, opClass, types, op, decoder='Generic'):
2044 global header_output, exec_output, decoders
2045 code = sveEnabledCheckCode + '''
2046 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2047 xc->tcBase());
2048
2049 // Number of elements in a 128 bit segment
2050 constexpr unsigned ePerSegment = 128 / sizeof(Element);
2051
2052 for (unsigned i = 0; i < eCount; i++) {
2053 const auto segmentBase = i - i % ePerSegment;
2054 const auto segmentIdx = segmentBase + index;
2055
2056 const Element& srcElem1 = AA64FpOp1_x[i];
2057 const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
2058 Element destElem = AA64FpDestMerge_x[i];
2059 '''
2060
2061 code += '''
2062 %(op)s
2063 AA64FpDest_x[i] = destElem;
2064 }''' % {'op': op}
2065
2066 iop = InstObjParams(name, 'Sve' + Name, 'SveBinIdxUnpredOp',
2067 {'code': code, 'op_class': opClass}, [])
2068 header_output += SveBinIdxUnpredOpDeclare.subst(iop)
2069 exec_output += SveOpExecute.subst(iop)
2070 for type in types:
2071 substDict = {'targs' : type,
2072 'class_name' : 'Sve' + Name}
2073 exec_output += SveOpExecDeclare.subst(substDict)
2074
2075 # Generates definitions for ternary SVE intructions with immediate operand
2076 # (always unpredicated)
2077 def sveTerImmInst(name, Name, opClass, types, op, decoder='Generic'):
2078 global header_output, exec_output, decoders
2079 code = sveEnabledCheckCode + '''
2080 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2081 xc->tcBase());
2082 for (unsigned i = 0; i < eCount; i++) {
2083 const Element& srcElem2 = AA64FpOp2_x[i];
2084 Element srcElem3 = imm;
2085 Element destElem = AA64FpDestMerge_x[i];
2086 %(op)s
2087 AA64FpDest_x[i] = destElem;
2088 }''' % {'op': op}
2089 iop = InstObjParams(name, 'Sve' + Name, 'SveTerImmUnpredOp',
2090 {'code': code, 'op_class': opClass}, [])
2091 header_output += SveTerImmUnpredOpDeclare.subst(iop)
2092 exec_output += SveOpExecute.subst(iop)
2093 for type in types:
2094 substDict = {'targs' : type,
2095 'class_name' : 'Sve' + Name}
2096 exec_output += SveOpExecDeclare.subst(substDict)
2097
2098 # Generates definitions for PTRUE and PTRUES instructions.
2099 def svePtrueInst(name, Name, opClass, types, isFlagSetting=False,
2100 decoder='Generic'):
2101 global header_output, exec_output, decoders
2102 extraPrologCode = '''
2103 auto& destPred = PDest;'''
2104 code = sveEnabledCheckCode + '''
2105 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2106 xc->tcBase());
2107 unsigned predCount = sveDecodePredCount(imm, eCount);
2108 destPred.reset();
2109 for (unsigned i = 0; i < eCount; i++) {
2110 PDest_x[i] = (i < predCount);
2111 }'''
2112 if isFlagSetting:
2113 code += '''
2114 CondCodesNZ = (destPred.firstActive(destPred, eCount) << 1) |
2115 destPred.noneActive(destPred, eCount);
2116 CondCodesC = !destPred.lastActive(destPred, eCount);
2117 CondCodesV = 0;'''
2118 iop = InstObjParams(name, 'Sve' + Name, 'SvePtrueOp',
2119 {'code': code, 'op_class': opClass}, [])
2120 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2121 header_output += SvePtrueOpDeclare.subst(iop)
2122 exec_output += SveOpExecute.subst(iop)
2123 for type in types:
2124 substDict = {'targs' : type,
2125 'class_name' : 'Sve' + Name}
2126 exec_output += SveOpExecDeclare.subst(substDict)
2127
2128 # Generate definitions for integer CMP<cc> instructions
2129 def sveIntCmpInst(name, Name, opClass, types, op, wideop = False,
2130 decoder = 'Generic'):
2131 global header_output, exec_output, decoders
2132 signed = 'int8_t' in types
2133 srcType = 'Element'
2134 op2Suffix = 'x'
2135 if wideop:
2136 srcType = 'int64_t' if signed else 'uint64_t'
2137 op2Suffix = 'sd' if signed else 'ud'
2138 extraPrologCode = '''
2139 auto& destPred = PDest;'''
2140 code = sveEnabledCheckCode + '''
2141 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2142 xc->tcBase());
2143 TheISA::VecPredRegContainer tmpPredC;
2144 auto tmpPred = tmpPredC.as<Element>();
2145 for (unsigned i = 0; i < eCount; ++i)
2146 tmpPred[i] = GpOp_x[i];
2147 destPred.reset();
2148 for (unsigned i = 0; i < eCount; ++i) {
2149 %(srcType)s srcElem1 = (%(srcType)s) AA64FpOp1_x[i];
2150 %(srcType)s srcElem2 = AA64FpOp2_%(op2Suffix)s[%(op2Index)s];
2151 bool destElem = false;
2152 if (tmpPred[i]) {
2153 %(op)s
2154 }
2155 PDest_x[i] = destElem;
2156 }
2157 CondCodesNZ = (destPred.firstActive(tmpPred, eCount) << 1) |
2158 destPred.noneActive(tmpPred, eCount);
2159 CondCodesC = !destPred.lastActive(tmpPred, eCount);
2160 CondCodesV = 0;''' % {
2161 'op': op,
2162 'srcType': srcType,
2163 'op2Suffix': op2Suffix,
2164 'op2Index': '(i * sizeof(Element)) / 8' if wideop else 'i'
2165 }
2166 iop = InstObjParams(name, 'Sve' + Name, 'SveIntCmpOp',
2167 {
2168 'code': code,
2169 'op_class': opClass,
2170 'op2IsWide': 'true' if wideop else 'false',
2171 }, [])
2172 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2173 header_output += SveIntCmpOpDeclare.subst(iop)
2174 exec_output += SveOpExecute.subst(iop)
2175 for type in types:
2176 substDict = {'targs' : type,
2177 'class_name' : 'Sve' + Name}
2178 exec_output += SveOpExecDeclare.subst(substDict)
2179
2180 # Generate definitions for integer CMP<cc> instructions (with immediate)
2181 def sveIntCmpImmInst(name, Name, opClass, types, op, decoder = 'Generic'):
2182 global header_output, exec_output, decoders
2183 extraPrologCode = '''
2184 auto& destPred = PDest;'''
2185 code = sveEnabledCheckCode + '''
2186 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2187 xc->tcBase());
2188 TheISA::VecPredRegContainer tmpPredC;
2189 auto tmpPred = tmpPredC.as<Element>();
2190 for (unsigned i = 0; i < eCount; ++i)
2191 tmpPred[i] = GpOp_x[i];
2192 destPred.reset();
2193 for (unsigned i = 0; i < eCount; ++i) {
2194 Element srcElem1 = AA64FpOp1_x[i];
2195 Element srcElem2 = static_cast<Element>(imm);
2196 bool destElem = false;
2197 if (tmpPred[i]) {
2198 %(op)s
2199 }
2200 PDest_x[i] = destElem;
2201 }
2202 CondCodesNZ = (destPred.firstActive(tmpPred, eCount) << 1) |
2203 destPred.noneActive(tmpPred, eCount);
2204 CondCodesC = !destPred.lastActive(tmpPred, eCount);
2205 CondCodesV = 0;'''%{'op': op}
2206 iop = InstObjParams(name, 'Sve' + Name, 'SveIntCmpImmOp',
2207 {'code': code, 'op_class': opClass,}, [])
2208 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2209 header_output += SveIntCmpImmOpDeclare.subst(iop)
2210 exec_output += SveOpExecute.subst(iop)
2211 for type in types:
2212 substDict = {'targs' : type,
2213 'class_name' : 'Sve' + Name}
2214 exec_output += SveOpExecDeclare.subst(substDict)
2215
2216 # Generate definitions for SVE element count instructions
2217 def sveElemCountInst(name, Name, opClass, types, op,
2218 destType = DestType.Scalar, dstIs32b = False,
2219 dstAcc = True, decoder = 'Generic'):
2220 global header_output, exec_output, decoders
2221 code = sveEnabledCheckCode + '''
2222 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2223 xc->tcBase());
2224 unsigned count = sveDecodePredCount(pattern, eCount);
2225 '''
2226 if destType == DestType.Vector:
2227 code += '''
2228 for (unsigned i = 0; i < eCount; ++i) {
2229 Element srcElem1 = AA64FpDestMerge_x[i];
2230 Element destElem = 0;
2231 %(op)s
2232 AA64FpDest_x[i] = destElem;
2233 }'''%{'op': op}
2234 else:
2235 if 'uint16_t' in types:
2236 if dstIs32b:
2237 dstType = 'uint32_t'
2238 else:
2239 dstType = 'uint64_t'
2240 else:
2241 if dstIs32b:
2242 dstType = 'int32_t'
2243 else:
2244 dstType = 'int64_t'
2245 if dstAcc:
2246 code += '''
2247 %(dstType)s srcElem1 = XDest;
2248 '''%{'dstType': dstType}
2249 code += '''
2250 %(dstType)s destElem = 0;
2251 %(op)s;
2252 XDest = destElem;
2253 '''%{'op': op, 'dstType': dstType}
2254 iop = InstObjParams(name, 'Sve' + Name, 'SveElemCountOp',
2255 {'code': code, 'op_class': opClass, 'dstIsVec': destType,
2256 'dstIs32b': 'true' if dstIs32b else 'false'}, [])
2257 header_output += SveElemCountOpDeclare.subst(iop)
2258 exec_output += SveOpExecute.subst(iop)
2259 for type in types:
2260 substDict = {'targs' : type,
2261 'class_name' : 'Sve' + Name}
2262 exec_output += SveOpExecDeclare.subst(substDict);
2263
2264 def svePartBrkInst(name, Name, opClass, isFlagSetting, predType, whenBrk,
2265 decoder = 'Generic'):
2266 global header_output, exec_output, decoders
2267 code = sveEnabledCheckCode + '''
2268 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2269 xc->tcBase());
2270 bool dobreak = false;
2271 TheISA::VecPredRegContainer tmpPredC;
2272 auto auxGpOp = tmpPredC.as<uint8_t>();
2273 for (unsigned i = 0; i < eCount; ++i) {
2274 auxGpOp[i] = GpOp_ub[i];
2275 }
2276 for (unsigned i = 0; i < eCount; ++i) {
2277 bool element = POp1_ub[i] == 1;
2278 if (auxGpOp[i]) {'''
2279 breakCode = '''
2280 dobreak = dobreak || element;'''
2281 if whenBrk == Break.Before:
2282 code += breakCode
2283 code += '''
2284 PDest_ub[i] = !dobreak;'''
2285 if whenBrk == Break.After:
2286 code += breakCode
2287 code += '''
2288 }'''
2289 if predType == PredType.ZERO:
2290 code += ''' else {
2291 PDest_ub[i] = 0;
2292 }'''
2293 elif predType == PredType.MERGE:
2294 code += ''' else {
2295 PDest_ub[i] = PDestMerge_ub[i];
2296 }'''
2297 code += '''
2298 }'''
2299 extraPrologCode = ''
2300 if isFlagSetting:
2301 code += '''
2302 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2303 destPred.noneActive(auxGpOp, eCount);
2304 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2305 CondCodesV = 0;'''
2306 extraPrologCode += '''
2307 auto& destPred = PDest;'''
2308 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkOp',
2309 {'code': code, 'op_class': opClass,
2310 'isMerging': 'true' if predType == PredType.MERGE
2311 else 'false'}, [])
2312 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2313 header_output += SvePartBrkOpDeclare.subst(iop)
2314 exec_output += SveNonTemplatedOpExecute.subst(iop)
2315
2316 def svePartBrkPropPrevInst(name, Name, opClass, isFlagSetting, whenBrk,
2317 decoder = 'Generic'):
2318 global header_output, exec_output, decoders
2319 code = sveEnabledCheckCode + '''
2320 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2321 xc->tcBase());
2322 bool last = POp1_ub.lastActive(GpOp_ub, eCount);
2323 TheISA::VecPredRegContainer tmpPredC;
2324 auto auxGpOp = tmpPredC.as<uint8_t>();
2325 for (unsigned i = 0; i < eCount; ++i) {
2326 auxGpOp[i] = GpOp_ub[i];
2327 }
2328 for (unsigned i = 0; i < eCount; ++i) {
2329 if (auxGpOp[i]) {'''
2330 breakCode = '''
2331 last = last && (POp2_ub[i] == 0);'''
2332 if whenBrk == Break.Before:
2333 code += breakCode
2334 code += '''
2335 PDest_ub[i] = last;'''
2336 if whenBrk == Break.After:
2337 code += breakCode
2338 code += '''
2339 } else {
2340 PDest_ub[i] = 0;
2341 }
2342 }'''
2343 extraPrologCode = ''
2344 if isFlagSetting:
2345 code += '''
2346 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2347 destPred.noneActive(auxGpOp, eCount);
2348 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2349 CondCodesV = 0;'''
2350 extraPrologCode += '''
2351 auto& destPred = PDest;'''
2352 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkPropOp',
2353 {'code': code, 'op_class': opClass}, [])
2354 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2355 header_output += SvePartBrkPropOpDeclare.subst(iop)
2356 exec_output += SveNonTemplatedOpExecute.subst(iop)
2357
2358 def svePartBrkPropNextInst(name, Name, opClass, isFlagSetting,
2359 decoder = 'Generic'):
2360 global header_output, exec_output, decoders
2361 code = sveEnabledCheckCode + '''
2362 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2363 xc->tcBase());
2364 bool last = POp1_ub.lastActive(GpOp_ub, eCount);
2365 for (unsigned i = 0; i < eCount; i++) {
2366 if (!last) {
2367 PDest_ub[i] = 0;
2368 } else {
2369 PDest_ub[i] = PDestMerge_ub[i];
2370 }
2371 }'''
2372 extraPrologCode = ''
2373 if isFlagSetting:
2374 code += '''
2375 VecPredRegT<uint8_t, MaxSveVecLenInBytes, false, false>::Container c;
2376 VecPredRegT<uint8_t, MaxSveVecLenInBytes, false, false> predOnes(c);
2377 for (unsigned i = 0; i < eCount; i++) {
2378 predOnes[i] = 1;
2379 }
2380 CondCodesNZ = (destPred.firstActive(predOnes, eCount) << 1) |
2381 destPred.noneActive(predOnes, eCount);
2382 CondCodesC = !destPred.lastActive(predOnes, eCount);
2383 CondCodesV = 0;'''
2384 extraPrologCode += '''
2385 auto& destPred = PDest;'''
2386 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkPropOp',
2387 {'code': code, 'op_class': opClass}, [])
2388 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2389 header_output += SvePartBrkPropOpDeclare.subst(iop)
2390 exec_output += SveNonTemplatedOpExecute.subst(iop)
2391
2392 # Generate definitions for scalar select instructions
2393 def sveSelectInst(name, Name, opClass, types, op, isCond,
2394 destType = DstRegType.Scalar, decoder = 'Generic'):
2395 global header_output, exec_output, decoders
2396 code = sveEnabledCheckCode + '''
2397 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2398 xc->tcBase());
2399 int last;
2400 for (last = eCount - 1; last >= 0; --last) {
2401 if (GpOp_x[last]) {
2402 break;
2403 }
2404 }
2405 '''
2406 if isCond:
2407 code += '''
2408 if (last >= 0) {'''
2409 code += '''
2410 Element destElem;
2411 %(op)s'''%{'op': op}
2412 if destType == DstRegType.Vector:
2413 code += '''
2414 for (unsigned i = 0; i < eCount; ++i)
2415 AA64FpDest_x[i] = destElem;'''
2416 elif destType == DstRegType.Scalar:
2417 code += '''
2418 XDest = destElem;'''
2419 elif destType == DstRegType.SimdFpScalar:
2420 code += '''
2421 AA64FpDest_x[0] = destElem;'''
2422 if isCond:
2423 code += '''
2424 }'''
2425 if destType == DstRegType.Scalar:
2426 code += ''' else {
2427 XDest = (Element) XDest;
2428 }'''
2429 elif destType == DstRegType.Vector:
2430 code += ''' else {
2431 for (unsigned i = 0; i < eCount; ++i)
2432 AA64FpDest_x[i] = AA64FpDestMerge_x[i];
2433 }'''
2434 elif destType == DstRegType.SimdFpScalar:
2435 code += ''' else {
2436 AA64FpDest_x[0] = AA64FpDestMerge_x[0];
2437 }'''
2438 iop = InstObjParams(name, 'Sve' + Name, 'SveSelectOp',
2439 {'code': code, 'op_class': opClass,
2440 'isCond': 'true' if isCond else 'false',
2441 'isScalar': 'true'
2442 if destType == DstRegType.Scalar else 'false',
2443 'isSimdFp': 'true'
2444 if destType == DstRegType.SimdFpScalar
2445 else 'false'},
2446 [])
2447 header_output += SveSelectOpDeclare.subst(iop)
2448 exec_output += SveOpExecute.subst(iop)
2449 for type in types:
2450 substDict = {'targs' : type,
2451 'class_name' : 'Sve' + Name}
2452 exec_output += SveOpExecDeclare.subst(substDict)
2453
2454 # Generate definitions for PNEXT (find next active predicate)
2455 # instructions
2456 def svePNextInst(name, Name, opClass, types, decoder = 'Generic'):
2457 global header_output, exec_output, decoders
2458 code = sveEnabledCheckCode + '''
2459 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2460 xc->tcBase());
2461 TheISA::VecPredRegContainer tmpPredC;
2462 auto auxGpOp = tmpPredC.as<Element>();
2463 for (unsigned i = 0; i < eCount; ++i) {
2464 auxGpOp[i] = GpOp_x[i];
2465 }
2466 int last;
2467 for (last = eCount - 1; last >= 0; --last) {
2468 if (POp1_x[last]) {
2469 break;
2470 }
2471 }
2472 int next = last + 1;
2473 while (next < eCount && GpOp_x[next] == 0) {
2474 next++;
2475 }
2476 destPred.reset();
2477 if (next < eCount) {
2478 PDest_x[next] = 1;
2479 }
2480 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2481 destPred.noneActive(auxGpOp, eCount);
2482 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2483 CondCodesV = 0;'''
2484 extraPrologCode = '''
2485 auto& destPred = PDest;'''
2486 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredPredOp',
2487 {'code': code, 'op_class': opClass}, [])
2488 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2489 header_output += SveUnaryPredOpDeclare.subst(iop)
2490 exec_output += SveOpExecute.subst(iop)
2491 for type in types:
2492 substDict = {'targs' : type,
2493 'class_name' : 'Sve' + Name}
2494 exec_output += SveOpExecDeclare.subst(substDict)
2495
2496 # Generate definitions for PFIRST (set first active predicate)
2497 # instructions
2498 def svePFirstInst(name, Name, opClass, decoder = 'Generic'):
2499 global header_output, exec_output, decoders
2500 code = sveEnabledCheckCode + '''
2501 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2502 xc->tcBase());
2503 TheISA::VecPredRegContainer tmpPredC;
2504 auto auxGpOp = tmpPredC.as<Element>();
2505 for (unsigned i = 0; i < eCount; ++i)
2506 auxGpOp[i] = GpOp_x[i];
2507 int first = -1;
2508 for (int i = 0; i < eCount; ++i) {
2509 if (auxGpOp[i] && first == -1) {
2510 first = i;
2511 }
2512 }
2513 for (int i = 0; i < eCount; ++i) {
2514 PDest_x[i] = PDestMerge_x[i];
2515 }
2516 if (first >= 0) {
2517 PDest_x[first] = 1;
2518 }
2519 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2520 destPred.noneActive(auxGpOp, eCount);
2521 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2522 CondCodesV = 0;'''
2523 extraPrologCode = '''
2524 auto& destPred = PDest;'''
2525 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredPredOp',
2526 {'code': code, 'op_class': opClass}, [])
2527 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2528 header_output += SveUnaryPredOpDeclare.subst(iop)
2529 exec_output += SveOpExecute.subst(iop)
2530 substDict = {'targs' : 'uint8_t',
2531 'class_name' : 'Sve' + Name}
2532 exec_output += SveOpExecDeclare.subst(substDict)
2533
2534 # Generate definitions for SVE TBL instructions
2535 def sveTblInst(name, Name, opClass, decoder = 'Generic'):
2536 global header_output, exec_output, decoders
2537 code = sveEnabledCheckCode + '''
2538 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2539 xc->tcBase());
2540 for (int i = 0; i < eCount; ++i) {
2541 Element idx = AA64FpOp2_x[i];
2542 Element val;
2543 if (idx < eCount) {
2544 val = AA64FpOp1_x[idx];
2545 } else {
2546 val = 0;
2547 }
2548 AA64FpDest_x[i] = val;
2549 }'''
2550 iop = InstObjParams(name, 'Sve' + Name, 'SveTblOp',
2551 {'code': code, 'op_class': opClass}, [])
2552 header_output += SveBinUnpredOpDeclare.subst(iop)
2553 exec_output += SveOpExecute.subst(iop)
2554 for type in unsignedTypes:
2555 substDict = {'targs' : type,
2556 'class_name' : 'Sve' + Name}
2557 exec_output += SveOpExecDeclare.subst(substDict)
2558
2559 # Generate definitions for SVE Unpack instructions
2560 def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf,
2561 regType, decoder = 'Generic'):
2562 global header_output, exec_output, decoders
2563 extraPrologCode = '''
2564 auto& destPred = PDest;'''
2565 code = sveEnabledCheckCode + '''
2566 unsigned eCount = ArmStaticInst::getCurSveVecLen<DElement>(
2567 xc->tcBase());'''
2568 if unpackHalf == Unpack.Low:
2569 if regType == SrcRegType.Predicate:
2570 code += '''
2571 TheISA::VecPredRegContainer tmpPredC;
2572 auto auxPOp1 = tmpPredC.as<SElement>();
2573 for (int i = 0; i < eCount; ++i) {
2574 auxPOp1[i] = POp1_xs[i];
2575 }'''
2576 else:
2577 code += '''
2578 TheISA::VecRegContainer tmpVecC;
2579 auto auxOp1 = tmpVecC.as<SElement>();
2580 for (int i = 0; i < eCount; ++i) {
2581 auxOp1[i] = AA64FpOp1_xs[i];
2582 }'''
2583 code += '''
2584 for (int i = 0; i < eCount; ++i) {'''
2585 if regType == SrcRegType.Predicate:
2586 if unpackHalf == Unpack.High:
2587 code +='''
2588 const SElement& srcElem1 = POp1_xs[i + eCount];'''
2589 else:
2590 code +='''
2591 const SElement& srcElem1 = auxPOp1[i];'''
2592 code += '''
2593 destPred.set_raw(i, 0);
2594 PDest_xd[i] = srcElem1;'''
2595 else:
2596 if unpackHalf == Unpack.High:
2597 code +='''
2598 const SElement& srcElem1 = AA64FpOp1_xs[i + eCount];'''
2599 else:
2600 code +='''
2601 const SElement& srcElem1 = auxOp1[i];'''
2602 code += '''
2603 AA64FpDest_xd[i] = static_cast<DElement>(srcElem1);'''
2604 code += '''
2605 }
2606 '''
2607 iop = InstObjParams(name, 'Sve' + Name, 'SveUnpackOp',
2608 {'code': code, 'op_class': opClass}, [])
2609 if regType == SrcRegType.Predicate:
2610 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2611 header_output += SveUnpackOpDeclare.subst(iop)
2612 exec_output += SveWideningOpExecute.subst(iop)
2613 for srcType, dstType in sdtypes:
2614 substDict = {'targs': srcType + ', ' + dstType,
2615 'class_name': 'Sve' + Name}
2616 exec_output += SveOpExecDeclare.subst(substDict)
2617
2618 # Generate definition for SVE predicate test instructions
2619 def svePredTestInst(name, Name, opClass, decoder = 'Generic'):
2620 global header_output, exec_output, decoders
2621 code = sveEnabledCheckCode + '''
2622 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2623 xc->tcBase());
2624 CondCodesNZ = (POp1_ub.firstActive(GpOp_ub, eCount) << 1) |
2625 POp1_ub.noneActive(GpOp_ub, eCount);
2626 CondCodesC = !POp1_ub.lastActive(GpOp_ub, eCount);
2627 CondCodesV = 0;'''
2628 iop = InstObjParams(name, 'Sve' + Name, 'SvePredTestOp',
2629 {'code': code, 'op_class': opClass}, [])
2630 header_output += SvePredicateTestOpDeclare.subst(iop)
2631 exec_output += SveNonTemplatedOpExecute.subst(iop)
2632
2633 # Generate definition for SVE predicate compact operations
2634 def sveCompactInst(name, Name, opClass, types, decoder = 'Generic'):
2635 global header_output, exec_output, decoders
2636 code = sveEnabledCheckCode + '''
2637 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2638 xc->tcBase());
2639 TheISA::VecRegContainer tmpVecC;
2640 auto auxOp1 = tmpVecC.as<Element>();
2641 for (unsigned i = 0; i < eCount; ++i) {
2642 auxOp1[i] = AA64FpOp1_x[i];
2643 }
2644 unsigned x = 0;
2645 for (unsigned i = 0; i < eCount; ++i) {
2646 AA64FpDest_x[i] = 0;
2647 if (GpOp_x[i]) {
2648 AA64FpDest_x[x] = auxOp1[i];
2649 x++;
2650 }
2651 }'''
2652 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp',
2653 {'code': code, 'op_class': opClass}, [])
2654 header_output += SveUnaryPredOpDeclare.subst(iop)
2655 exec_output += SveOpExecute.subst(iop)
2656 for type in types:
2657 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2658 exec_output += SveOpExecDeclare.subst(substDict)
2659
2660 # Generate definition for unary SVE predicate instructions with implicit
2661 # source operand (PFALSE, RDFFR(S))
2662 def svePredUnaryWImplicitSrcInst(name, Name, opClass, op,
2663 predType=PredType.NONE, isFlagSetting=False, decoder='Generic'):
2664 global header_output, exec_output, decoders
2665 code = sveEnabledCheckCode + op
2666 if isFlagSetting:
2667 code += '''
2668 CondCodesNZ = (destPred.firstActive(GpOp, eCount) << 1) |
2669 destPred.noneActive(GpOp, eCount);
2670 CondCodesC = !destPred.lastActive(GpOp, eCount);
2671 CondCodesV = 0;'''
2672 extraPrologCode = '''
2673 auto& destPred M5_VAR_USED = PDest;'''
2674 baseClass = ('SvePredUnaryWImplicitSrcOp' if predType == PredType.NONE
2675 else 'SvePredUnaryWImplicitSrcPredOp')
2676 iop = InstObjParams(name, 'Sve' + Name, baseClass,
2677 {'code': code, 'op_class': opClass}, [])
2678 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2679 if predType == PredType.NONE:
2680 header_output += SvePredUnaryOpWImplicitSrcDeclare.subst(iop)
2681 else:
2682 header_output += SvePredUnaryPredOpWImplicitSrcDeclare.subst(iop)
2683 exec_output += SveNonTemplatedOpExecute.subst(iop)
2684
2685 # Generate definition for SVE instructions writing to the FFR (SETFFR,
2686 # WRFFR)
2687 def svePredWriteFfrInst(name, Name, opClass, op, isSetFfr,
2688 decoder='Generic'):
2689 global header_output, exec_output, decoders
2690 code = sveEnabledCheckCode + op
2691 extraPrologCode = '''
2692 auto& destPred M5_VAR_USED = Ffr;'''
2693 baseClass = ('SveWImplicitSrcDstOp' if isSetFfr
2694 else 'SvePredUnaryWImplicitDstOp')
2695 iop = InstObjParams(name, 'Sve' + Name, baseClass,
2696 {'code': code, 'op_class': opClass}, [])
2697 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2698 if isSetFfr:
2699 header_output += SveOpWImplicitSrcDstDeclare.subst(iop)
2700 else:
2701 header_output += SvePredUnaryOpWImplicitDstDeclare.subst(iop)
2702 exec_output += SveNonTemplatedOpExecute.subst(iop)
2703
2704 # Generate definition for SVE Ext instruction
2705 def sveExtInst(name, Name, opClass, decoder = 'Generic'):
2706 global header_output, exec_output, decoders
2707 code = sveEnabledCheckCode + '''
2708 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2709 xc->tcBase());
2710 TheISA::VecRegContainer tmpVecC;
2711 auto auxOp1 = tmpVecC.as<Element>();
2712 for (unsigned i = 0; i < eCount; ++i) {
2713 auxOp1[i] = AA64FpOp1_x[i];
2714 }
2715 uint64_t pos = imm;
2716 if (pos >= eCount)
2717 pos = 0;
2718 for (int i = 0; i < eCount; ++i, ++pos)
2719 {
2720 if (pos < eCount)
2721 AA64FpDest_x[i] = AA64FpDestMerge_x[pos];
2722 else
2723 AA64FpDest_x[i] = auxOp1[pos-eCount];
2724 }
2725 '''
2726 iop = InstObjParams(name, 'Sve' + Name, 'SveBinImmUnpredDestrOp',
2727 {'code': code, 'op_class': opClass}, [])
2728 header_output += SveBinImmUnpredOpDeclare.subst(iop);
2729 exec_output += SveOpExecute.subst(iop)
2730 substDict = {'targs': 'uint8_t', 'class_name': 'Sve' + Name}
2731 exec_output += SveOpExecDeclare.subst(substDict)
2732
2733 # Generate definition for SVE Slice instruction
2734 def sveSpliceInst(name, Name, opClass, types, decoder = 'Generic'):
2735 global header_output, exec_output, decoders
2736 code = sveEnabledCheckCode + '''
2737 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2738 xc->tcBase());
2739 TheISA::VecRegContainer tmpVecC;
2740 auto auxDest = tmpVecC.as<Element>();
2741 int firstelem = -1, lastelem = -2;
2742 for (int i = 0; i < eCount; ++i) {
2743 if (GpOp_x[i]) {
2744 lastelem = i;
2745 if (firstelem < 0)
2746 firstelem = i;
2747 }
2748 }
2749 int x = 0;
2750 for (int i = firstelem; i <= lastelem; ++i, ++x) {
2751 auxDest[x] = AA64FpDestMerge_x[i];
2752 }
2753 int remaining = eCount - x;
2754 for (int i = 0; i < remaining; ++i, ++x) {
2755 auxDest[x] = AA64FpOp2_x[i];
2756 }
2757 for (int i = 0; i < eCount; ++i) {
2758 AA64FpDest_x[i] = auxDest[i];
2759 }
2760 '''
2761 iop = InstObjParams(name, 'Sve' + Name, 'SveBinDestrPredOp',
2762 {'code': code, 'op_class': opClass}, [])
2763 header_output += SveBinDestrPredOpDeclare.subst(iop)
2764 exec_output += SveOpExecute.subst(iop)
2765 for type in types:
2766 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2767 exec_output += SveOpExecDeclare.subst(substDict)
2768
2769 # Generate definition for SVE DUP (index) instruction
2770 def sveDupIndexInst(name, Name, opClass, types, decoder = 'Generic'):
2771 global header_output, exec_output, decoders
2772 code = sveEnabledCheckCode + '''
2773 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2774 xc->tcBase());
2775 Element srcElem1 = 0;
2776 if (imm < eCount) {
2777 srcElem1 = AA64FpOp1_x[imm];
2778 }
2779 for (int i = 0; i < eCount; ++i) {
2780 AA64FpDest_x[i] = srcElem1;
2781 }'''
2782 iop = InstObjParams(name, 'Sve' + Name, 'SveBinImmIdxUnpredOp',
2783 {'code': code, 'op_class': opClass}, [])
2784 header_output += SveBinImmUnpredOpDeclare.subst(iop)
2785 exec_output += SveOpExecute.subst(iop)
2786 for type in types:
2787 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2788 exec_output += SveOpExecDeclare.subst(substDict)
2789
2790 # Generate definition for SVE reverse elements instructions
2791 def sveReverseElementsInst(name, Name, opClass, types,
2792 srcType = SrcRegType.Vector, decoder = 'Generic'):
2793 assert srcType in (SrcRegType.Vector, SrcRegType.Predicate)
2794 global header_output, exec_output, decoders
2795 extraPrologCode = '''
2796 auto& destPred = PDest;'''
2797 code = sveEnabledCheckCode + '''
2798 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2799 xc->tcBase());'''
2800 if srcType == SrcRegType.Predicate:
2801 code += '''
2802 TheISA::VecPredRegContainer tmpPredC;
2803 auto auxPOp1 = tmpPredC.as<Element>();
2804 for (unsigned i = 0; i < eCount; ++i) {
2805 uint8_t v = POp1_x.get_raw(i);
2806 auxPOp1.set_raw(i, v);
2807 }
2808 PDest_x[0] = 0;'''
2809 else:
2810 code += '''
2811 TheISA::VecRegContainer tmpRegC;
2812 auto auxOp1 = tmpRegC.as<Element>();
2813 for (unsigned i = 0; i < eCount; ++i) {
2814 auxOp1[i] = AA64FpOp1_x[i];
2815 }'''
2816 code += '''
2817 for (int i = 0; i < eCount; ++i) {'''
2818 if srcType == SrcRegType.Vector:
2819 code += '''
2820 AA64FpDest_x[i] = auxOp1[eCount - i - 1];'''
2821 else:
2822 code += '''
2823 destPred.set_raw(i, auxPOp1.get_raw(eCount - i - 1));'''
2824 code += '''
2825 }'''
2826 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryUnpredOp',
2827 {'code': code, 'op_class': opClass}, [])
2828 if srcType == SrcRegType.Predicate:
2829 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2830 header_output += SveUnaryUnpredOpDeclare.subst(iop)
2831 exec_output += SveOpExecute.subst(iop)
2832 for type in types:
2833 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2834 exec_output += SveOpExecDeclare.subst(substDict)
2835
2836 # Generate definition for shift & insert instructions
2837 def sveShiftAndInsertInst(name, Name, opClass, types,
2838 srcType = SrcRegType.Scalar, decoder = 'Generic'):
2839 assert srcType in (SrcRegType.SimdFpScalar, SrcRegType.Scalar)
2840 global header_output, exec_output, decoders
2841 code = sveEnabledCheckCode + '''
2842 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2843 xc->tcBase());'''
2844 if srcType == SrcRegType.Scalar:
2845 code += '''
2846 auto& srcElem1 = XOp1;'''
2847 elif srcType == SrcRegType.SimdFpScalar:
2848 code += '''
2849 auto& srcElem1 = AA64FpOp1_x[0];'''
2850 code += '''
2851 for (int i = eCount - 1; i > 0; --i) {
2852 AA64FpDest_x[i] = AA64FpDestMerge_x[i-1];
2853 }
2854 AA64FpDest_x[0] = srcElem1;'''
2855 iop = InstObjParams(name, 'Sve' + Name, 'SveUnarySca2VecUnpredOp',
2856 {'code': code, 'op_class': opClass,
2857 'isSimdFp': 'true' if srcType == SrcRegType.SimdFpScalar
2858 else 'false'}, [])
2859 header_output += SveShiftAndInsertOpDeclare.subst(iop)
2860 exec_output += SveOpExecute.subst(iop)
2861 for type in types:
2862 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2863 exec_output += SveOpExecDeclare.subst(substDict)
2864
2865 # Generate definition for DOT instructions
2866 def sveDotInst(name, Name, opClass, types, isIndexed = True):
2867 global header_output, exec_output, decoders
2868 code = sveEnabledCheckCode + '''
2869 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2870 xc->tcBase());
2871 for (int i = 0; i < eCount; ++i) {'''
2872 if isIndexed:
2873 code += '''
2874 int segbase = i - i % (16 / sizeof(Element));
2875 int s = segbase + imm;'''
2876 code += '''
2877 DElement res = AA64FpDest_xd[i];
2878 DElement srcElem1, srcElem2;
2879 for (int j = 0; j <= 3; ++j) {
2880 srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i + j]);'''
2881 if isIndexed:
2882 code += '''
2883 srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s + j]);'''
2884 else:
2885 code += '''
2886 srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i + j]);'''
2887 code += '''
2888 res += srcElem1 * srcElem2;
2889 }
2890 AA64FpDestMerge_xd[i] = res;
2891 }'''
2892 iop = InstObjParams(name, 'Sve' + Name,
2893 'SveDotProdIdxOp' if isIndexed else
2894 'SveDotProdOp',
2895 {'code': code, 'op_class': opClass}, [])
2896 if isIndexed:
2897 header_output += SveWideningTerImmOpDeclare.subst(iop)
2898 else:
2899 header_output += SveWideningTerOpDeclare.subst(iop)
2900 exec_output += SveWideningOpExecute.subst(iop)
2901 for type in types:
2902 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2903 exec_output += SveOpExecDeclare.subst(substDict)
2904
2905 # Generate definition for ordered reduction
2906 def sveOrderedReduction(name, Name, opClass, types, op,
2907 decoder = 'Generic'):
2908 global header_output, exec_output, decoders
2909 code = sveEnabledCheckCode + '''
2910 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2911 xc->tcBase());'''
2912 code += '''
2913 Element destElem = AA64FpDestMerge_x[0];
2914 for (int i = 0; i < eCount; ++i) {
2915 if (GpOp_x[i]) {
2916 Element srcElem1 = AA64FpOp1_x[i];
2917 %(op)s
2918 }
2919 }
2920 for (int i = 1; i < eCount; ++i) {
2921 AA64FpDest_x[i] = 0;
2922 }
2923 AA64FpDest_x[0] = destElem;'''%{'op': op}
2924 iop = InstObjParams(name, 'Sve' + Name, 'SveOrdReducOp',
2925 {'code': code, 'op_class': opClass}, [])
2926 header_output += SveReducOpDeclare.subst(iop)
2927 exec_output += SveOpExecute.subst(iop)
2928 for type in types:
2929 substDict = {'targs' : type,
2930 'class_name' : 'Sve' + Name}
2931 exec_output += SveOpExecDeclare.subst(substDict)
2932
2933 # Generate definitions for complex addition instructions
2934 def sveComplexAddInst(name, Name, opClass, types,
2935 decoder = 'Generic'):
2936 global header_output, exec_output, decoders
2937 code = sveEnabledCheckCode + '''
2938 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2939 xc->tcBase());'''
2940 code += '''
2941 bool sub_i = (rot == 1);
2942 bool sub_r = (rot == 3);
2943 for (int i = 0; i < eCount / 2; ++i) {
2944 Element acc_r = AA64FpOp1_x[2 * i];
2945 Element acc_i = AA64FpOp1_x[2 * i + 1];
2946 Element elt2_r = AA64FpOp2_x[2 * i];
2947 Element elt2_i = AA64FpOp2_x[2 * i + 1];
2948
2949 FPSCR fpscr;
2950 if (GpOp_x[2 * i]) {
2951 if (sub_i) {
2952 elt2_i = fplibNeg<Element>(elt2_i);
2953 }
2954 fpscr = (FPSCR) FpscrExc;
2955 acc_r = fplibAdd<Element>(acc_r, elt2_i, fpscr);
2956 FpscrExc = fpscr;
2957 }
2958 if (GpOp_x[2 * i + 1]) {
2959 if (sub_r) {
2960 elt2_r = fplibNeg<Element>(elt2_r);
2961 }
2962 fpscr = (FPSCR) FpscrExc;
2963 acc_i = fplibAdd<Element>(acc_i, elt2_r, fpscr);
2964 FpscrExc = fpscr;
2965 }
2966
2967 AA64FpDest_x[2 * i] = acc_r;
2968 AA64FpDest_x[2 * i + 1] = acc_i;
2969 }
2970 '''
2971 iop = InstObjParams(name, 'Sve' + Name, 'SveComplexOp',
2972 {'code': code, 'op_class': opClass}, [])
2973 header_output += SveComplexOpDeclare.subst(iop)
2974 exec_output += SveOpExecute.subst(iop)
2975 for type in types:
2976 substDict = {'targs' : type,
2977 'class_name' : 'Sve' + Name}
2978 exec_output += SveOpExecDeclare.subst(substDict)
2979
2980 # Generate definitions for complex multiply and accumulate instructions
2981 def sveComplexMulAddInst(name, Name, opClass, types,
2982 predType=PredType.NONE, decoder='Generic'):
2983 assert predType in (PredType.NONE, PredType.MERGE)
2984 global header_output, exec_output, decoders
2985 code = sveEnabledCheckCode + '''
2986 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2987 xc->tcBase());'''
2988 code += '''
2989 uint32_t sel_a = rot & 0x1;
2990 uint32_t sel_b = sel_a ? 0 : 1;
2991 bool neg_i = (rot & 0x2) == 1;
2992 bool neg_r = (rot & 0x1) != (rot & 0x2);'''
2993 if predType == PredType.NONE:
2994 code += '''
2995 uint32_t eltspersegment = 16 / (2 * sizeof(Element));'''
2996 code += '''
2997 for (int i = 0; i < eCount / 2; ++i) {'''
2998 if predType == PredType.NONE:
2999 code += '''
3000 uint32_t segmentbase = i - (i % eltspersegment);
3001 uint32_t s = segmentbase + imm;'''
3002 else:
3003 code += '''
3004 uint32_t s = i;'''
3005 code += '''
3006 Element addend_r = AA64FpDestMerge_x[2 * i];
3007 Element addend_i = AA64FpDestMerge_x[2 * i + 1];
3008 Element elt1_a = AA64FpOp1_x[2 * i + sel_a];
3009 Element elt2_a = AA64FpOp2_x[2 * s + sel_a];
3010 Element elt2_b = AA64FpOp2_x[2 * s + sel_b];
3011 FPSCR fpscr;
3012 '''
3013 if predType != PredType.NONE:
3014 code += '''
3015 if (GpOp_x[2 * i]) {'''
3016 code += '''
3017 if (neg_r) {
3018 elt2_a = fplibNeg<Element>(elt2_a);
3019 }
3020 fpscr = (FPSCR) FpscrExc;
3021 addend_r = fplibMulAdd<Element>(addend_r, elt1_a, elt2_a, fpscr);
3022 FpscrExc = fpscr;'''
3023 if predType != PredType.NONE:
3024 code += '''
3025 }'''
3026 if predType != PredType.NONE:
3027 code += '''
3028 if (GpOp_x[2 * i + 1]) {'''
3029 code += '''
3030 if (neg_i) {
3031 elt2_b = fplibNeg<Element>(elt2_b);
3032 }
3033 fpscr = (FPSCR) FpscrExc;
3034 addend_i = fplibMulAdd<Element>(addend_i, elt1_a, elt2_b, fpscr);
3035 FpscrExc = fpscr;'''
3036 if predType != PredType.NONE:
3037 code += '''
3038 }'''
3039 code += '''
3040 AA64FpDest_x[2 * i] = addend_r;
3041 AA64FpDest_x[2 * i + 1] = addend_i;
3042 }'''
3043 iop = InstObjParams(name, 'Sve' + Name,
3044 'SveComplexIdxOp' if predType == PredType.NONE
3045 else 'SveComplexOp',
3046 {'code': code, 'op_class': opClass}, [])
3047 if predType == PredType.NONE:
3048 header_output += SveComplexIndexOpDeclare.subst(iop)
3049 else:
3050 header_output += SveComplexOpDeclare.subst(iop)
3051 exec_output += SveOpExecute.subst(iop)
3052 for type in types:
3053 substDict = {'targs' : type,
3054 'class_name' : 'Sve' + Name}
3055 exec_output += SveOpExecDeclare.subst(substDict)
3056
3057 fpTypes = ('uint16_t', 'uint32_t', 'uint64_t')
3058 signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t')
3059 unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
3060
3061 smallSignedTypes = ('int8_t', 'int16_t', 'int32_t')
3062 bigSignedTypes = ('int16_t', 'int32_t', 'int64_t')
3063 smallUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t')
3064 bigUnsignedTypes = ('uint16_t', 'uint32_t', 'uint64_t')
3065
3066 unsignedWideSDTypes = (('uint8_t', 'uint16_t'),
3067 ('uint16_t', 'uint32_t'), ('uint32_t', 'uint64_t'))
3068 signedWideSDTypes = (('int8_t', 'int16_t'),
3069 ('int16_t', 'int32_t'), ('int32_t', 'int64_t'))
3070
3071 # ABS
3072 absCode = 'destElem = (Element) std::abs(srcElem1);'
3073 sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode,
3074 PredType.MERGE)
3075 # ADD (immediate)
3076 sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode, False)
3077 # ADD (vectors, predicated)
3078 addCode = 'destElem = srcElem1 + srcElem2;'
3079 sveBinInst('add', 'AddPred', 'SimdAddOp', unsignedTypes, addCode,
3080 PredType.MERGE, True)
3081 # ADD (vectors, unpredicated)
3082 addCode = 'destElem = srcElem1 + srcElem2;'
3083 sveBinInst('add', 'AddUnpred', 'SimdAddOp', unsignedTypes, addCode)
3084 # ADDPL
3085 addvlCode = sveEnabledCheckCode + '''
3086 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint%d_t>(
3087 xc->tcBase());
3088 XDest = XOp1 + eCount * (int64_t) imm;
3089 '''
3090 buildXImmDataInst('addpl', addvlCode % 64, buildCc=False)
3091 # ADDVL
3092 buildXImmDataInst('addvl', addvlCode % 8, buildCc=False)
3093 # ADR
3094 adrCode = '''
3095 if (offsetFormat == SveAdrOffsetUnpackedSigned) {
3096 srcElem2 = sext<32>(srcElem2 & mask(32));
3097 } else if (offsetFormat == SveAdrOffsetUnpackedUnsigned) {
3098 srcElem2 = srcElem2 & mask(32);
3099 }
3100 destElem = srcElem1 + srcElem2 * mult;
3101 '''
3102 sveAdrInst('adr', 'Adr', 'SimdAddOp', ('uint32_t', 'uint64_t'), adrCode)
3103 # AND (immediate)
3104 andCode = 'destElem = srcElem1 & srcElem2;'
3105 sveWideImmInst('and', 'AndImm', 'SimdAluOp', ('uint64_t',), andCode)
3106 # AND (vectors, predicated)
3107 sveBinInst('and', 'AndPred', 'SimdAluOp', unsignedTypes, andCode,
3108 PredType.MERGE, True)
3109 # AND (vectors, unpredicated)
3110 andCode = 'destElem = srcElem1 & srcElem2;'
3111 sveBinInst('and', 'AndUnpred', 'SimdAluOp', ('uint64_t',), andCode)
3112 # AND, ANDS (predicates)
3113 svePredLogicalInst('and', 'PredAnd', 'SimdPredAluOp', ('uint8_t',),
3114 andCode)
3115 svePredLogicalInst('ands', 'PredAnds', 'SimdPredAluOp', ('uint8_t',),
3116 andCode, isFlagSetting=True)
3117 # ANDV
3118 andvCode = 'destElem &= srcElem1;'
3119 sveAssocReducInst('andv', 'Andv', 'SimdReduceAluOp', unsignedTypes,
3120 andvCode, 'std::numeric_limits<Element>::max()')
3121 # ASR (immediate, predicated)
3122 asrCode = '''
3123 int sign_bit = bits(srcElem1, sizeof(Element) * 8 - 1);
3124 if (srcElem2 == 0) {
3125 destElem = srcElem1;
3126 } else if (srcElem2 >= sizeof(Element) * 8) {
3127 destElem = sign_bit ? std::numeric_limits<Element>::max() : 0;
3128 } else {
3129 destElem = srcElem1 >> srcElem2;
3130 if (sign_bit) {
3131 destElem |= ~mask(sizeof(Element) * 8 - srcElem2);
3132 }
3133 }
3134 '''
3135 sveBinImmInst('asr', 'AsrImmPred', 'SimdAluOp', unsignedTypes, asrCode,
3136 PredType.MERGE)
3137 # ASR (immediate, unpredicated)
3138 sveBinImmInst('asr', 'AsrImmUnpred', 'SimdAluOp', unsignedTypes, asrCode)
3139 # ASR (vectors)
3140 sveBinInst('asr', 'AsrPred', 'SimdAluOp', unsignedTypes, asrCode,
3141 PredType.MERGE, True)
3142 # ASR (wide elements, predicated)
3143 sveShiftByWideElemsInst('asr', 'AsrWidePred', 'SimdAluOp', unsignedTypes,
3144 asrCode, PredType.MERGE)
3145 # ASR (wide elements, unpredicated)
3146 sveShiftByWideElemsInst('asr', 'AsrWideUnpred', 'SimdAluOp', unsignedTypes,
3147 asrCode)
3148 # ASRD
3149 asrdCode = '''
3150 Element element1 = srcElem1;
3151 Element shift = srcElem2;
3152 if (srcElem1 < 0) {
3153 Element tmp = ((1L << shift) - 1L);
3154 if (tmp == -1L) {
3155 element1 = 0;
3156 } else {
3157 element1 = element1 + tmp;
3158 }
3159 }
3160 destElem = (element1 >> shift);
3161 '''
3162 sveBinImmInst('asrd', 'Asrd', 'SimdAluOp', signedTypes, asrdCode,
3163 PredType.MERGE)
3164 # ASRR
3165 asrrCode = '''
3166 int sign_bit = bits(srcElem2, sizeof(Element) * 8 - 1);
3167 if (srcElem1 == 0) {
3168 destElem = srcElem2;
3169 } else if (srcElem1 >= sizeof(Element) * 8) {
3170 destElem = sign_bit ? std::numeric_limits<Element>::max() : 0;
3171 } else {
3172 destElem = srcElem2 >> srcElem1;
3173 if (sign_bit) {
3174 destElem |= ~mask(sizeof(Element) * 8 - srcElem1);
3175 }
3176 }
3177 '''
3178 sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode,
3179 PredType.MERGE, True)
3180 # BIC (vectors, predicated)
3181 bicCode = 'destElem = srcElem1 & ~srcElem2;'
3182 sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode,
3183 PredType.MERGE, True)
3184 # BIC (vectors, unpredicated)
3185 sveBinInst('bic', 'BicUnpred', 'SimdAluOp', unsignedTypes, bicCode)
3186 # BIC, BICS (predicates)
3187 bicCode = 'destElem = srcElem1 && !srcElem2;'
3188 svePredLogicalInst('bic', 'PredBic', 'SimdPredAluOp', ('uint8_t',),
3189 bicCode)
3190 svePredLogicalInst('bics', 'PredBics', 'SimdPredAluOp', ('uint8_t',),
3191 bicCode, isFlagSetting=True)
3192 # BRKA (merging)
3193 svePartBrkInst('brka', 'Brkam', 'SimdPredAluOp', isFlagSetting = False,
3194 predType = PredType.MERGE, whenBrk = Break.After)
3195 # BRKA (zeroing)
3196 svePartBrkInst('brka', 'Brkaz', 'SimdPredAluOp', isFlagSetting = False,
3197 predType = PredType.ZERO, whenBrk = Break.After)
3198 # BRKAS
3199 svePartBrkInst('brkas', 'Brkas', 'SimdPredAluOp', isFlagSetting = True,
3200 predType = PredType.ZERO, whenBrk = Break.After)
3201 # BRKB (merging)
3202 svePartBrkInst('brkb', 'Brkbm', 'SimdPredAluOp', isFlagSetting = False,
3203 predType = PredType.MERGE, whenBrk = Break.Before)
3204 # BRKB (zeroging)
3205 svePartBrkInst('brkb', 'Brkbz', 'SimdPredAluOp', isFlagSetting = False,
3206 predType = PredType.ZERO, whenBrk = Break.Before)
3207 # BRKBS
3208 svePartBrkInst('brkbs', 'Brkbs', 'SimdPredAluOp', isFlagSetting = True,
3209 predType = PredType.ZERO, whenBrk = Break.Before)
3210 # BRKN
3211 svePartBrkPropNextInst('brkn', 'Brkn', 'SimdPredAluOp',
3212 isFlagSetting = False)
3213 # BRKNS
3214 svePartBrkPropNextInst('brkns', 'Brkns', 'SimdPredAluOp',
3215 isFlagSetting = True)
3216 # BRKPA
3217 svePartBrkPropPrevInst('brkpa', 'Brkpa', 'SimdPredAluOp',
3218 isFlagSetting = False, whenBrk = Break.After)
3219 # BRKPAS
3220 svePartBrkPropPrevInst('brkpas', 'Brkpas', 'SimdPredAluOp',
3221 isFlagSetting = True, whenBrk = Break.After)
3222 # BRKPB
3223 svePartBrkPropPrevInst('brkpb', 'Brkpb', 'SimdPredAluOp',
3224 isFlagSetting = False, whenBrk = Break.Before)
3225 # BRKPBS
3226 svePartBrkPropPrevInst('brkpbs', 'Brkpbs', 'SimdPredAluOp',
3227 isFlagSetting = True, whenBrk = Break.Before)
3228 # CLASTA (scalar)
3229 clastaCode = '''
3230 last++;
3231 if (last >= eCount)
3232 last = 0;
3233 destElem = AA64FpOp1_x[last];'''
3234 sveSelectInst('clasta', 'Clasta', 'SimdAluOp', unsignedTypes, clastaCode,
3235 isCond = True, destType = DstRegType.Scalar)
3236 # CLASTA (SIMD&FP scalar)
3237 sveSelectInst('clasta', 'Clastaf', 'SimdAluOp', unsignedTypes, clastaCode,
3238 isCond = True, destType = DstRegType.SimdFpScalar)
3239 # CLASTA (vector)
3240 sveSelectInst('clasta', 'Clastav', 'SimdAluOp', unsignedTypes, clastaCode,
3241 isCond = True, destType = DstRegType.Vector)
3242 # CLASTB (scalar)
3243 clastbCode = '''
3244 destElem = AA64FpOp1_x[last];'''
3245 sveSelectInst('clastb', 'Clastb', 'SimdAluOp', unsignedTypes, clastbCode,
3246 isCond = True, destType = DstRegType.Scalar)
3247 # CLASTB (SIMD&FP scalar)
3248 sveSelectInst('clastb', 'Clastbf', 'SimdAluOp', unsignedTypes, clastbCode,
3249 isCond = True, destType = DstRegType.SimdFpScalar)
3250 # CLASTB (vectors)
3251 sveSelectInst('clastb', 'Clastbv', 'SimdAluOp', unsignedTypes, clastbCode,
3252 isCond = True, destType = DstRegType.Vector)
3253 # CLS
3254 clsCode = '''
3255 destElem = 0;
3256 Element val = srcElem1;
3257 if (val < 0) {
3258 val <<= 1;
3259 while (val < 0) {
3260 destElem++;
3261 val <<= 1;
3262 }
3263 } else {
3264 val <<= 1;
3265 while (val >= 0 && destElem < sizeof(Element) * 8 - 1) {
3266 destElem++;
3267 val <<= 1;
3268 }
3269 }
3270 '''
3271 sveUnaryInst('cls', 'Cls', 'SimdAluOp', signedTypes, clsCode,
3272 PredType.MERGE)
3273 # CLZ
3274 clzCode = '''
3275 destElem = 0;
3276 Element val = srcElem1;
3277 while (val >= 0 && destElem < sizeof(Element) * 8) {
3278 destElem++;
3279 val <<= 1;
3280 }
3281 '''
3282 sveUnaryInst('clz', 'Clz', 'SimdAluOp', signedTypes, clzCode,
3283 PredType.MERGE)
3284 # CMPEQ (immediate)
3285 cmpeqCode = '''
3286 destElem = (srcElem1 == srcElem2);
3287 '''
3288 sveIntCmpImmInst('cmpeq', 'Cmpeqi', 'SimdCmpOp', unsignedTypes, cmpeqCode)
3289 # CMPEQ (vectors)
3290 sveIntCmpInst('cmpeq', 'Cmpeq', 'SimdCmpOp', unsignedTypes, cmpeqCode)
3291 # CMPEQ (wide elements)
3292 sveIntCmpInst('cmpeq', 'Cmpeqw', 'SimdCmpOp', smallUnsignedTypes,
3293 cmpeqCode, True)
3294 # CMPGE (immediate)
3295 cmpgeCode = '''
3296 destElem = (srcElem1 >= srcElem2);
3297 '''
3298 sveIntCmpImmInst('cmpge', 'Cmpgei', 'SimdCmpOp', signedTypes, cmpgeCode)
3299 # CMPGE (vectors)
3300 sveIntCmpInst('cmpge', 'Cmpge', 'SimdCmpOp', signedTypes, cmpgeCode)
3301 # CMPGE (wide elements)
3302 sveIntCmpInst('cmpge', 'Cmpgew', 'SimdCmpOp', smallSignedTypes,
3303 cmpgeCode, True)
3304 # CMPGT (immediate)
3305 cmpgtCode = '''
3306 destElem = (srcElem1 > srcElem2);
3307 '''
3308 sveIntCmpImmInst('cmpge', 'Cmpgti', 'SimdCmpOp', signedTypes, cmpgtCode)
3309 # CMPGT (vectors)
3310 sveIntCmpInst('cmpge', 'Cmpgt', 'SimdCmpOp', signedTypes, cmpgtCode)
3311 # CMPGT (wide elements)
3312 sveIntCmpInst('cmpge', 'Cmpgtw', 'SimdCmpOp', smallSignedTypes,
3313 cmpgtCode, True)
3314 # CMPHI (immediate)
3315 sveIntCmpImmInst('cmphi', 'Cmphii', 'SimdCmpOp', unsignedTypes, cmpgtCode)
3316 # CMPHI (vectors)
3317 sveIntCmpInst('cmphi', 'Cmphi', 'SimdCmpOp', unsignedTypes, cmpgtCode)
3318 # CMPHI (wide elements)
3319 sveIntCmpInst('cmphi', 'Cmphiw', 'SimdCmpOp', smallUnsignedTypes,
3320 cmpgtCode, True)
3321 # CMPHS (immediate)
3322 sveIntCmpImmInst('cmphs', 'Cmphsi', 'SimdCmpOp', unsignedTypes, cmpgeCode)
3323 # CMPHS (vectors)
3324 sveIntCmpInst('cmphs', 'Cmphs', 'SimdCmpOp', unsignedTypes, cmpgeCode)
3325 # CMPHS (wide elements)
3326 sveIntCmpInst('cmphs', 'Cmphsw', 'SimdCmpOp', smallUnsignedTypes,
3327 cmpgeCode, True)
3328 # CMPLE (immediate)
3329 cmpleCode = '''
3330 destElem = (srcElem1 <= srcElem2);
3331 '''
3332 sveIntCmpImmInst('cmple', 'Cmplei', 'SimdCmpOp', signedTypes, cmpleCode)
3333 # CMPLE (wide elements)
3334 sveIntCmpInst('cmple', 'Cmplew', 'SimdCmpOp', smallSignedTypes,
3335 cmpleCode, True)
3336 # CMPLO (immediate)
3337 cmpltCode = '''
3338 destElem = (srcElem1 < srcElem2);
3339 '''
3340 sveIntCmpImmInst('cmplo', 'Cmploi', 'SimdCmpOp', unsignedTypes, cmpltCode)
3341 # CMPLO (wide elements)
3342 sveIntCmpInst('cmplo', 'Cmplow', 'SimdCmpOp', smallUnsignedTypes,
3343 cmpltCode, True)
3344 # CMPLS (immediate)
3345 sveIntCmpImmInst('cmpls', 'Cmplsi', 'SimdCmpOp', unsignedTypes, cmpleCode)
3346 # CMPLS (wide elements)
3347 sveIntCmpInst('cmpls', 'Cmplsw', 'SimdCmpOp', smallUnsignedTypes,
3348 cmpleCode, True)
3349 # CMPLT (immediate)
3350 sveIntCmpImmInst('cmplt', 'Cmplti', 'SimdCmpOp', signedTypes, cmpltCode)
3351 # CMPLT (wide elements)
3352 sveIntCmpInst('cmplt', 'Cmpltw', 'SimdCmpOp', smallSignedTypes,
3353 cmpltCode, True)
3354 # CMPNE (immediate)
3355 cmpneCode = '''
3356 destElem = (srcElem1 != srcElem2);
3357 '''
3358 sveIntCmpImmInst('cmpeq', 'Cmpnei', 'SimdCmpOp', unsignedTypes, cmpneCode)
3359 # CMPNE (vectors)
3360 sveIntCmpInst('cmpeq', 'Cmpne', 'SimdCmpOp', unsignedTypes, cmpneCode)
3361 # CMPNE (wide elements)
3362 sveIntCmpInst('cmpeq', 'Cmpnew', 'SimdCmpOp', smallUnsignedTypes,
3363 cmpneCode, True)
3364 # CNOT
3365 cnotCode = '''
3366 destElem = srcElem1?0:1;
3367 '''
3368 sveUnaryInst('cnot', 'Cnot', 'SimdAluOp', unsignedTypes, cnotCode,
3369 PredType.MERGE)
3370 # CNT
3371 cntCode = '''
3372 destElem = 0;
3373 Element val = srcElem1;
3374 while (val) {
3375 destElem += val & 0x1;
3376 val >>= 1;
3377 }
3378 '''
3379 sveUnaryInst('cnt', 'Cnt', 'SimdAluOp', unsignedTypes, cntCode,
3380 PredType.MERGE)
3381 # CNTB, CNTD, CNTH, CNTW
3382 cntxCode = '''
3383 destElem = (count * imm);
3384 '''
3385 sveElemCountInst('cnt', 'Cntx', 'SimdAluOp', unsignedTypes, cntxCode,
3386 destType = DestType.Scalar, dstIs32b = False, dstAcc = False)
3387 # COMPACT
3388 sveCompactInst('compact', 'Compact', 'SimdPredAluOp',
3389 ('uint32_t', 'uint64_t'))
3390 # CPY (immediate)
3391 dupCode = 'destElem = srcElem1;'
3392 sveWideImmInst('cpy', 'CpyImmMerge', 'SimdAluOp', unsignedTypes, dupCode,
3393 predType=PredType.MERGE, isUnary=True)
3394 sveWideImmInst('cpy', 'CpyImmZero', 'SimdAluOp', unsignedTypes, dupCode,
3395 predType=PredType.ZERO, isUnary=True)
3396 # CPY (scalar)
3397 sveUnaryInst('cpy', 'CpyScalar', 'SimdAluOp', unsignedTypes, dupCode,
3398 PredType.MERGE, srcRegType=SrcRegType.Scalar)
3399 # CPY (SIMD&FP scalar)
3400 sveUnaryInst('cpy', 'CpySimdFpScalar', 'SimdAluOp', unsignedTypes, dupCode,
3401 PredType.MERGE, srcRegType=SrcRegType.SimdFpScalar)
3402 # CNTP
3403 svePredCountPredInst('cntp', 'Cntp', 'SimdAluOp', unsignedTypes)
3404 # CTERMEQ
3405 cteqCode = '''
3406 destElem = srcElem1 == srcElem2;
3407 '''
3408 sveCompTermInst('ctermeq', 'Ctermeq', 'IntAluOp',
3409 ['uint32_t', 'uint64_t'], cteqCode)
3410 # CTERMNE
3411 ctneCode = '''
3412 destElem = srcElem1 != srcElem2;
3413 '''
3414 sveCompTermInst('ctermne', 'Ctermne', 'IntAluOp',
3415 ['uint32_t', 'uint64_t'], ctneCode)
3416 # DECB, DECH, DECW, DECD (scalar)
3417 decxCode = '''
3418 destElem = srcElem1 - (count * imm);
3419 '''
3420 sveElemCountInst('dec', 'Dec', 'SimdAluOp', unsignedTypes, decxCode,
3421 destType = DestType.Scalar, dstIs32b = False)
3422 # DECH, DECW, DECD (vector)
3423 sveElemCountInst('dec', 'Decv', 'SimdAluOp', bigUnsignedTypes, decxCode,
3424 destType = DestType.Vector, dstIs32b = False)
3425 # DECP (scalar)
3426 decpCode = '''
3427 XDest = XDest - count;
3428 '''
3429 svePredCountInst('decp', 'Decp', 'SimdAluOp', unsignedTypes, decpCode,
3430 DestType.Scalar, SrcSize.Src64bit)
3431 # DECP (vector)
3432 decpvCode = '''
3433 destElem = srcElem - count;
3434 '''
3435 svePredCountInst('decp', 'Decpv', 'SimdAluOp', unsignedTypes, decpvCode,
3436 DestType.Vector)
3437 # DUP (immediate)
3438 sveWideImmInst('dup', 'DupImm', 'SimdAluOp', unsignedTypes, dupCode,
3439 isUnary=True)
3440 # DUP (indexed)
3441 sveDupIndexInst('mov', 'DupIdx', 'SimdAluOp',
3442 list(unsignedTypes) + ['__uint128_t'])
3443 # DUP (scalar)
3444 sveUnaryInst('dup', 'DupScalar', 'SimdAluOp', unsignedTypes, dupCode,
3445 PredType.NONE, srcRegType=SrcRegType.Scalar)
3446 # DUPM
3447 sveWideImmInst('dupm', 'Dupm', 'SimdAluOp', unsignedTypes, dupCode,
3448 isUnary=True)
3449 # EOR (immediate)
3450 eorCode = 'destElem = srcElem1 ^ srcElem2;'
3451 sveWideImmInst('eor', 'EorImm', 'SimdAluOp', ('uint64_t',), eorCode)
3452 # EOR (vectors, predicated)
3453 sveBinInst('eor', 'EorPred', 'SimdAluOp', unsignedTypes, eorCode,
3454 PredType.MERGE, True)
3455 # EOR (vectors, unpredicated)
3456 eorCode = 'destElem = srcElem1 ^ srcElem2;'
3457 sveBinInst('eor', 'EorUnpred', 'SimdAluOp', ('uint64_t',), eorCode)
3458 # EOR, EORS (predicates)
3459 svePredLogicalInst('eor', 'PredEor', 'SimdPredAluOp', ('uint8_t',),
3460 eorCode)
3461 svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',),
3462 eorCode, isFlagSetting=True)
3463 # EORV
3464 eorvCode = 'destElem ^= srcElem1;'
3465 sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes,
3466 eorvCode, '0')
3467 # EXT
3468 sveExtInst('ext', 'Ext', 'SimdAluOp')
3469 # FABD
3470 fpOp = '''
3471 FPSCR fpscr = (FPSCR) FpscrExc;
3472 destElem = %s;
3473 FpscrExc = fpscr;
3474 '''
3475 fabdCode = fpOp % 'fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))'
3476 sveBinInst('fabd', 'Fabd', 'SimdFloatAddOp', floatTypes, fabdCode,
3477 PredType.MERGE, True)
3478 # FABS
3479 fabsCode = 'destElem = fplibAbs<Element>(srcElem1);'
3480 sveUnaryInst('fabs', 'Fabs', 'SimdFloatAluOp', fpTypes, fabsCode,
3481 PredType.MERGE)
3482 # FACGE
3483 fpCmpAbsOp = fpOp % ('fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),'
3484 ' fplibAbs<Element>(srcElem2), fpscr)')
3485 facgeCode = fpCmpAbsOp % 'GE'
3486 sveCmpInst('facge', 'Facge', 'SimdFloatCmpOp', fpTypes, facgeCode)
3487 # FACGT
3488 facgtCode = fpCmpAbsOp % 'GT'
3489 sveCmpInst('facgt', 'Facgt', 'SimdFloatCmpOp', fpTypes, facgtCode)
3490 # FADD (immediate)
3491 fpBinOp = fpOp % 'fplib%s<Element>(srcElem1, srcElem2, fpscr)'
3492 faddCode = fpBinOp % 'Add'
3493 sveBinImmInst('fadd', 'FaddImm', 'SimdFloatAddOp', floatTypes, faddCode,
3494 PredType.MERGE)
3495 # FADD (vectors, predicated)
3496 sveBinInst('fadd', 'FaddPred', 'SimdFloatAddOp', floatTypes, faddCode,
3497 PredType.MERGE, True)
3498 # FADD (vectors, unpredicated)
3499 sveBinInst('fadd', 'FaddUnpred', 'SimdFloatAddOp', floatTypes, faddCode)
3500 # FADDA
3501 fpAddaOp = '''
3502 FPSCR fpscr = (FPSCR) FpscrExc;
3503 destElem = fplibAdd<Element>(destElem, srcElem1, fpscr);
3504 FpscrExc = FpscrExc | fpscr;
3505 '''
3506 sveOrderedReduction('fadda', 'Fadda', 'SimdFloatReduceAddOp', floatTypes,
3507 fpAddaOp)
3508 # FADDV
3509 fpReduceOp = '''
3510 FPSCR fpscr = (FPSCR) FpscrExc;
3511 destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
3512 FpscrExc = FpscrExc | fpscr;
3513 '''
3514 faddvCode = fpReduceOp % 'Add'
3515 sveNonAssocReducInst('faddv', 'Faddv', 'SimdFloatReduceAddOp', floatTypes,
3516 faddvCode, '0')
3517 # FCADD
3518 sveComplexAddInst('fcadd','Fcadd', 'SimdFloatAddOp', fpTypes)
3519 # FCMEQ (vectors)
3520 fpCmpOp = fpOp % ('fplibCompare%s<Element>(srcElem1, srcElem2, fpscr)')
3521 fcmeqCode = fpCmpOp % 'EQ'
3522 sveCmpInst('fcmeq', 'Fcmeq', 'SimdFloatCmpOp', fpTypes, fcmeqCode)
3523 # FCMEQ (zero)
3524 fpCmpZeroOp = fpOp % 'fplibCompare%s<Element>(srcElem1, 0, fpscr)'
3525 fcmeqZeroCode = fpCmpZeroOp % 'EQ'
3526 sveCmpInst('fcmeq', 'FcmeqZero', 'SimdFloatCmpOp', fpTypes, fcmeqZeroCode,
3527 True)
3528 # FCMGE (vectors)
3529 fcmgeCode = fpCmpOp % 'GE'
3530 sveCmpInst('fcmge', 'Fcmge', 'SimdFloatCmpOp', fpTypes, fcmgeCode)
3531 # FCMGE (zero)
3532 fcmgeZeroCode = fpCmpZeroOp % 'GE'
3533 sveCmpInst('fcmge', 'FcmgeZero', 'SimdFloatCmpOp', fpTypes, fcmgeZeroCode,
3534 True)
3535 # FCMGT (vectors)
3536 fcmgtCode = fpCmpOp % 'GT'
3537 sveCmpInst('fcmgt', 'Fcmgt', 'SimdFloatCmpOp', fpTypes, fcmgtCode)
3538 # FCMGT (zero)
3539 fcmgtZeroCode = fpCmpZeroOp % 'GT'
3540 sveCmpInst('fcmgt', 'FcmgtZero', 'SimdFloatCmpOp', fpTypes, fcmgtZeroCode,
3541 True)
3542 # FCMLE (zero)
3543 fpCmpRevZeroOp = fpOp % ('fplibCompare%s<Element>(0, srcElem1, fpscr)')
3544 fcmleZeroCode = fpCmpRevZeroOp % 'GE'
3545 sveCmpInst('fcmle', 'FcmleZero', 'SimdFloatCmpOp', fpTypes, fcmleZeroCode,
3546 True)
3547 # FCMLT (zero)
3548 fcmltZeroCode = fpCmpRevZeroOp % 'GT'
3549 sveCmpInst('fcmlt', 'FcmltZero', 'SimdFloatCmpOp', fpTypes, fcmltZeroCode,
3550 True)
3551 # FCMNE (vectors)
3552 fcmneCode = fpOp % ('!fplibCompareEQ<Element>(srcElem1, srcElem2, fpscr)')
3553 sveCmpInst('fcmne', 'Fcmne', 'SimdFloatCmpOp', fpTypes, fcmneCode)
3554 # FCMNE (zero)
3555 fcmneZeroCode = fpOp % ('!fplibCompareEQ<Element>(srcElem1, 0, fpscr)')
3556 sveCmpInst('fcmne', 'FcmneZero', 'SimdFloatCmpOp', fpTypes, fcmneZeroCode,
3557 True)
3558 # FCMUO (vectors)
3559 fcmuoCode = fpCmpOp % 'UN'
3560 sveCmpInst('fcmuo', 'Fcmuo', 'SimdFloatCmpOp', fpTypes, fcmuoCode)
3561 # FCMLA (indexed)
3562 sveComplexMulAddInst('fcmla', 'Fcmlai', 'SimdFloatMultAccOp',
3563 fpTypes[1:], predType = PredType.NONE)
3564 # FCMLA (vectors)
3565 sveComplexMulAddInst('fcmla', 'Fcmlav', 'SimdFloatMultAccOp',
3566 fpTypes, predType = PredType.MERGE)
3567 # FCPY
3568 sveWideImmInst('fcpy', 'Fcpy', 'SimdAluOp', unsignedTypes, dupCode,
3569 predType=PredType.MERGE, isUnary=True)
3570 # FCVT
3571 fcvtCode = fpOp % ('fplibConvert<SElement, DElement>('
3572 'srcElem1, FPCRRounding(fpscr), fpscr)')
3573 sveCvtInst('fcvt', 'FcvtNarrow', 'SimdCvtOp',
3574 ('uint32_t, uint16_t',
3575 'uint64_t, uint16_t',
3576 'uint64_t, uint32_t'),
3577 fcvtCode, CvtDir.Narrow)
3578 sveCvtInst('fcvt', 'FcvtWiden', 'SimdCvtOp',
3579 ('uint16_t, uint32_t',
3580 'uint16_t, uint64_t',
3581 'uint32_t, uint64_t'),
3582 fcvtCode, CvtDir.Widen)
3583 # FCVTZS
3584 fcvtIntCode = fpOp % ('fplibFPToFixed<SElement, DElement>('
3585 'srcElem1, %s, %s, %s, fpscr)')
3586 fcvtzsCode = fcvtIntCode % ('0', 'false', 'FPRounding_ZERO')
3587 sveCvtInst('fcvtzs', 'FcvtzsNarrow', 'SimdCvtOp',
3588 ('uint16_t, uint16_t',
3589 'uint32_t, uint32_t',
3590 'uint64_t, uint32_t',
3591 'uint64_t, uint64_t'),
3592 fcvtzsCode, CvtDir.Narrow)
3593 sveCvtInst('fcvtzs', 'FcvtzsWiden', 'SimdCvtOp',
3594 ('uint16_t, uint32_t',
3595 'uint16_t, uint64_t',
3596 'uint32_t, uint64_t'),
3597 fcvtzsCode, CvtDir.Widen)
3598 # FCVTZU
3599 fcvtzuCode = fcvtIntCode % ('0', 'true', 'FPRounding_ZERO')
3600 sveCvtInst('fcvtzu', 'FcvtzuNarrow', 'SimdCvtOp',
3601 ('uint16_t, uint16_t',
3602 'uint32_t, uint32_t',
3603 'uint64_t, uint32_t',
3604 'uint64_t, uint64_t'),
3605 fcvtzuCode, CvtDir.Narrow)
3606 sveCvtInst('fcvtzu', 'FcvtzuWiden', 'SimdCvtOp',
3607 ('uint16_t, uint32_t',
3608 'uint16_t, uint64_t',
3609 'uint32_t, uint64_t'),
3610 fcvtzuCode, CvtDir.Widen)
3611 # FDIV
3612 fdivCode = fpBinOp % 'Div'
3613 sveBinInst('fdiv', 'Fdiv', 'SimdFloatDivOp', floatTypes, fdivCode,
3614 PredType.MERGE, True)
3615 # FDIVR
3616 fpBinRevOp = fpOp % 'fplib%s<Element>(srcElem2, srcElem1, fpscr)'
3617 fdivrCode = fpBinRevOp % 'Div'
3618 sveBinInst('fdivr', 'Fdivr', 'SimdFloatDivOp', floatTypes, fdivrCode,
3619 PredType.MERGE, True)
3620 # FDUP
3621 sveWideImmInst('fdup', 'Fdup', 'SimdFloatAluOp', floatTypes, dupCode,
3622 isUnary=True)
3623 # FEXPA
3624 fexpaCode = 'destElem = fplibExpA<Element>(srcElem1);'
3625 sveUnaryInst('fexpa', 'Fexpa', 'SimdFloatAluOp', fpTypes, fexpaCode)
3626 # FMAD
3627 fmadCode = fpOp % ('fplibMulAdd<Element>('
3628 'srcElem1, destElem, srcElem2, fpscr)')
3629 sveTerInst('fmad', 'Fmad', 'SimdFloatMultAccOp', floatTypes, fmadCode,
3630 PredType.MERGE)
3631 # FMAX (immediate)
3632 fmaxCode = fpBinOp % 'Max'
3633 sveBinImmInst('fmax', 'FmaxImm', 'SimdFloatCmpOp', floatTypes, fmaxCode,
3634 PredType.MERGE)
3635 # FMAX (vectors)
3636 sveBinInst('fmax', 'Fmax', 'SimdFloatCmpOp', floatTypes, fmaxCode,
3637 PredType.MERGE, True)
3638 # FMAXNM (immediate)
3639 fmaxnmCode = fpBinOp % 'MaxNum'
3640 sveBinImmInst('fmaxnm', 'FmaxnmImm', 'SimdFloatCmpOp', floatTypes,
3641 fmaxnmCode, PredType.MERGE)
3642 # FMAXNM (vectors)
3643 sveBinInst('fmaxnm', 'Fmaxnm', 'SimdFloatCmpOp', floatTypes, fmaxnmCode,
3644 PredType.MERGE, True)
3645 # FMAXNMV
3646 fmaxnmvCode = fpReduceOp % 'MaxNum'
3647 sveNonAssocReducInst('fmaxnmv', 'Fmaxnmv', 'SimdFloatReduceCmpOp',
3648 floatTypes, fmaxnmvCode, 'fplibDefaultNaN<Element>()')
3649 # FMAXV
3650 fmaxvCode = fpReduceOp % 'Max'
3651 sveNonAssocReducInst('fmaxv', 'Fmaxv', 'SimdFloatReduceCmpOp', floatTypes,
3652 fmaxvCode, 'fplibInfinity<Element>(1)')
3653 # FMIN (immediate)
3654 fminCode = fpBinOp % 'Min'
3655 sveBinImmInst('fmin', 'FminImm', 'SimdFloatCmpOp', floatTypes, fminCode,
3656 PredType.MERGE)
3657 # FMIN (vectors)
3658 sveBinInst('fmin', 'Fmin', 'SimdFloatCmpOp', floatTypes, fminCode,
3659 PredType.MERGE, True)
3660 # FMINNM (immediate)
3661 fminnmCode = fpBinOp % 'MinNum'
3662 sveBinImmInst('fminnm', 'FminnmImm', 'SimdFloatCmpOp', floatTypes,
3663 fminnmCode, PredType.MERGE)
3664 # FMINNM (vectors)
3665 sveBinInst('fminnm', 'Fminnm', 'SimdFloatCmpOp', floatTypes, fminnmCode,
3666 PredType.MERGE, True)
3667 # FMINNMV
3668 fminnmvCode = fpReduceOp % 'MinNum'
3669 sveNonAssocReducInst('fminnmv', 'Fminnmv', 'SimdFloatReduceCmpOp',
3670 floatTypes, fminnmvCode, 'fplibDefaultNaN<Element>()')
3671 # FMINV
3672 fminvCode = fpReduceOp % 'Min'
3673 sveNonAssocReducInst('fminv', 'Fminv', 'SimdFloatReduceCmpOp', floatTypes,
3674 fminvCode, 'fplibInfinity<Element>(0)')
3675 fmlaCode = fpOp % ('fplibMulAdd<Element>('
3676 'destElem, srcElem1, srcElem2, fpscr)')
3677 # FMLA (indexed)
3678 sveTerIdxInst('fmla', 'FmlaIdx', 'SimdFloatMultAccOp', floatTypes,
3679 fmlaCode, PredType.MERGE)
3680 # FMLA (vectors)
3681 sveTerInst('fmla', 'Fmla', 'SimdFloatMultAccOp', floatTypes, fmlaCode,
3682 PredType.MERGE)
3683 fmlsCode = fpOp % ('fplibMulAdd<Element>(destElem, '
3684 'fplibNeg<Element>(srcElem1), srcElem2, fpscr)')
3685 # FMLS (indexed)
3686 sveTerIdxInst('fmls', 'FmlsIdx', 'SimdFloatMultAccOp', floatTypes,
3687 fmlsCode, PredType.MERGE)
3688 # FMLS (vectors)
3689 sveTerInst('fmls', 'Fmls', 'SimdFloatMultAccOp', floatTypes, fmlsCode,
3690 PredType.MERGE)
3691 # FMSB
3692 fmsbCode = fpOp % ('fplibMulAdd<Element>(srcElem1, '
3693 'fplibNeg<Element>(destElem), srcElem2, fpscr)')
3694 sveTerInst('fmsb', 'Fmsb', 'SimdFloatMultAccOp', floatTypes, fmsbCode,
3695 PredType.MERGE)
3696 # FMUL (immediate)
3697 fpBinOp = fpOp % 'fplib%s<Element>(srcElem1, srcElem2, fpscr)'
3698 fmulCode = fpBinOp % 'Mul'
3699 sveBinImmInst('fmul', 'FmulImm', 'SimdFloatMultOp', floatTypes, fmulCode,
3700 PredType.MERGE)
3701 # TODO: FMUL (indexed)
3702 # FMUL (vectors, predicated)
3703 fmulCode = fpBinOp % 'Mul'
3704 sveBinInst('fmul', 'FmulPred', 'SimdFloatMultOp', floatTypes, fmulCode,
3705 PredType.MERGE, True)
3706 # FMUL (vectors, unpredicated)
3707 sveBinInst('fmul', 'FmulUnpred', 'SimdFloatMultOp', floatTypes, fmulCode)
3708 # FMUL (indexed)
3709 sveBinIdxInst('fmul', 'FmulIdx', 'SimdFloatMultOp', floatTypes, fmulCode)
3710
3711 # FMULX
3712 fmulxCode = fpBinOp % 'MulX'
3713 sveBinInst('fmulx', 'Fmulx', 'SimdFloatMultOp', floatTypes, fmulxCode,
3714 PredType.MERGE, True)
3715 # FNEG
3716 fnegCode = 'destElem = fplibNeg<Element>(srcElem1);'
3717 sveUnaryInst('fneg', 'Fneg', 'SimdFloatAluOp', fpTypes, fnegCode,
3718 PredType.MERGE)
3719 # FNMAD
3720 fnmadCode = fpOp % ('fplibMulAdd<Element>('
3721 'fplibNeg<Element>(srcElem1), '
3722 'fplibNeg<Element>(destElem), srcElem2, fpscr)')
3723 sveTerInst('fnmad', 'Fnmad', 'SimdFloatMultAccOp', floatTypes, fnmadCode,
3724 PredType.MERGE)
3725 # FNMLA
3726 fnmlaCode = fpOp % ('fplibMulAdd<Element>('
3727 'fplibNeg<Element>(destElem), '
3728 'fplibNeg<Element>(srcElem1), srcElem2, fpscr)')
3729 sveTerInst('fnmla', 'Fnmla', 'SimdFloatMultAccOp', floatTypes, fnmlaCode,
3730 PredType.MERGE)
3731 # FNMLS
3732 fnmlsCode = fpOp % ('fplibMulAdd<Element>('
3733 'fplibNeg<Element>(destElem), srcElem1, srcElem2, '
3734 'fpscr)')
3735 sveTerInst('fnmls', 'Fnmls', 'SimdFloatMultAccOp', floatTypes, fnmlsCode,
3736 PredType.MERGE)
3737 # FNMSB
3738 fnmsbCode = fpOp % ('fplibMulAdd<Element>('
3739 'fplibNeg<Element>(srcElem1), destElem, srcElem2, '
3740 'fpscr)')
3741 sveTerInst('fnmsb', 'Fnmsb', 'SimdFloatMultAccOp', floatTypes, fnmsbCode,
3742 PredType.MERGE)
3743 # FRECPE
3744 frecpeCode = fpOp % 'fplibRecipEstimate<Element>(srcElem1, fpscr)'
3745 sveUnaryInst('frecpe', 'Frecpe', 'SimdFloatMultAccOp', floatTypes,
3746 frecpeCode)
3747 # FRECPS
3748 frecpsCode = fpBinOp % 'RecipStepFused'
3749 sveBinInst('frecps', 'Frecps', 'SimdFloatMultAccOp', floatTypes,
3750 frecpsCode)
3751 # FRECPX
3752 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)"
3753 sveUnaryInst('frecpx', 'Frecpx', 'SimdFloatMultAccOp', floatTypes,
3754 frecpxCode, PredType.MERGE)
3755 # FRINTA
3756 frintCode = fpOp % 'fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)'
3757 frintaCode = frintCode % ('FPRounding_TIEAWAY', 'false')
3758 sveUnaryInst('frinta', 'Frinta', 'SimdCvtOp', floatTypes, frintaCode,
3759 PredType.MERGE)
3760 # FRINTI
3761 frintiCode = frintCode % ('FPCRRounding(fpscr)', 'false')
3762 sveUnaryInst('frinti', 'Frinti', 'SimdCvtOp', floatTypes, frintiCode,
3763 PredType.MERGE)
3764 # FRINTM
3765 frintmCode = frintCode % ('FPRounding_NEGINF', 'false')
3766 sveUnaryInst('frintm', 'Frintm', 'SimdCvtOp', floatTypes, frintmCode,
3767 PredType.MERGE)
3768 # FRINTN
3769 frintnCode = frintCode % ('FPRounding_TIEEVEN', 'false')
3770 sveUnaryInst('frintn', 'Frintn', 'SimdCvtOp', floatTypes, frintnCode,
3771 PredType.MERGE)
3772 # FRINTP
3773 frintpCode = frintCode % ('FPRounding_POSINF', 'false')
3774 sveUnaryInst('frintp', 'Frintp', 'SimdCvtOp', floatTypes, frintpCode,
3775 PredType.MERGE)
3776 # FRINTX
3777 frintxCode = frintCode % ('FPCRRounding(fpscr)', 'true')
3778 sveUnaryInst('frintx', 'Frintx', 'SimdCvtOp', floatTypes, frintxCode,
3779 PredType.MERGE)
3780 # FRINTZ
3781 frintzCode = frintCode % ('FPRounding_ZERO', 'false')
3782 sveUnaryInst('frintz', 'Frintz', 'SimdCvtOp', floatTypes, frintzCode,
3783 PredType.MERGE)
3784 # FRSQRTE
3785 frsqrteCode = fpOp % 'fplibRSqrtEstimate<Element>(srcElem1, fpscr)'
3786 sveUnaryInst('frsqrte', 'Frsqrte', 'SimdFloatSqrtOp', floatTypes,
3787 frsqrteCode)
3788 # FRSQRTS
3789 frsqrtsCode = fpBinOp % 'RSqrtStepFused'
3790 sveBinInst('frsqrts', 'Frsqrts', 'SimdFloatMiscOp', floatTypes,
3791 frsqrtsCode)
3792 # FSCALE
3793 fscaleCode = fpBinOp % 'Scale'
3794 sveBinInst('fscale', 'Fscale', 'SimdFloatMiscOp', floatTypes, fscaleCode,
3795 PredType.MERGE, True)
3796 # FSQRT
3797 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)"
3798 sveUnaryInst('fsqrt', 'Fsqrt', 'SimdFloatSqrtOp', floatTypes, fsqrtCode,
3799 PredType.MERGE)
3800 # FSUB (immediate)
3801 fsubCode = fpBinOp % 'Sub'
3802 sveBinImmInst('fsub', 'FsubImm', 'SimdFloatAddOp', floatTypes, fsubCode,
3803 PredType.MERGE)
3804 # FSUB (vectors, predicated)
3805 sveBinInst('fsub', 'FsubPred', 'SimdFloatAddOp', floatTypes, fsubCode,
3806 PredType.MERGE, True)
3807 # FSUB (vectors, unpredicated)
3808 sveBinInst('fsub', 'FsubUnpred', 'SimdFloatAddOp', floatTypes, fsubCode)
3809 # FSUBR (immediate)
3810 fsubrCode = fpBinRevOp % 'Sub'
3811 sveBinImmInst('fsubr', 'FsubrImm', 'SimdFloatAddOp', floatTypes, fsubrCode,
3812 PredType.MERGE)
3813 # FSUBR (vectors)
3814 sveBinInst('fsubr', 'Fsubr', 'SimdFloatAddOp', floatTypes, fsubrCode,
3815 PredType.MERGE, True)
3816 # FTMAD
3817 ftmadCode = fpOp % ('fplibTrigMulAdd<Element>('
3818 'srcElem3, destElem, srcElem2, fpscr)')
3819 sveTerImmInst('ftmad', 'Ftmad', 'SimdFloatMultAccOp', floatTypes,
3820 ftmadCode)
3821 # FTSMUL
3822 ftsmulCode = fpBinOp % 'TrigSMul'
3823 sveBinInst('ftsmul', 'Ftsmul', 'SimdFloatMiscOp', floatTypes, ftsmulCode)
3824 # FTSSEL
3825 ftsselCode = fpBinOp % 'TrigSSel'
3826 sveBinInst('ftssel', 'Ftssel', 'SimdFloatMultOp', floatTypes, ftsselCode)
3827 # INCB, INCH, INCW, INCD (scalar)
3828 incxCode = '''
3829 destElem = srcElem1 + (count * imm);
3830 '''
3831 sveElemCountInst('inc', 'Inc', 'SimdAluOp', unsignedTypes, incxCode,
3832 destType = DestType.Scalar, dstIs32b = False)
3833 # INCH, INCW, INCD (vector)
3834 sveElemCountInst('inc', 'Incv', 'SimdAluOp', bigUnsignedTypes, incxCode,
3835 destType = DestType.Vector, dstIs32b = False)
3836 # INCP (scalar)
3837 incpCode = '''
3838 XDest = XDest + count;
3839 '''
3840 svePredCountInst('incp', 'Incp', 'SimdAluOp', unsignedTypes, incpCode,
3841 DestType.Scalar, SrcSize.Src64bit)
3842 # INCP (vector)
3843 incpvCode = '''
3844 destElem = srcElem + count;
3845 '''
3846 svePredCountInst('incp', 'Incpv', 'SimdAluOp', unsignedTypes, incpvCode,
3847 DestType.Vector)
3848 # INDEX (immediate, scalar)
3849 sveIndex(IndexFormat.ImmReg)
3850 # INDEX (immediates)
3851 sveIndex(IndexFormat.ImmImm)
3852 # INDEX (scalar, immediate)
3853 sveIndex(IndexFormat.RegImm)
3854 # INDEX (scalars)
3855 sveIndex(IndexFormat.RegReg)
3856 # INSR (scalar)
3857 sveShiftAndInsertInst('insr', 'Insr', 'SimdAluOp', unsignedTypes,
3858 srcType = SrcRegType.Scalar)
3859 # INSR (SIMD&FP scalar)
3860 sveShiftAndInsertInst('insr', 'Insrf', 'SimdAluOp', unsignedTypes,
3861 srcType = SrcRegType.SimdFpScalar)
3862 # LASTA (scalar)
3863 lastaCode = '''
3864 last++;
3865 if (last >= eCount) {
3866 last = 0;
3867 }
3868 destElem = AA64FpOp1_x[last];'''
3869 sveSelectInst('lasta', 'Lasta', 'SimdAluOp', unsignedTypes, lastaCode,
3870 isCond = False)
3871 # LASTA (SIMD&FP scalar)
3872 sveSelectInst('lasta', 'Lastaf', 'SimdAluOp', unsignedTypes, lastaCode,
3873 isCond = False, destType = DstRegType.SimdFpScalar)
3874 # LASTB (scalar)
3875 lastbCode = '''
3876 if (last < 0) {
3877 last = eCount - 1;
3878 }
3879 destElem = AA64FpOp1_x[last];'''
3880 sveSelectInst('lastb', 'Lastb', 'SimdAluOp', unsignedTypes, lastbCode,
3881 isCond = False)
3882 # LASTB (SIMD&FP scalar)
3883 sveSelectInst('lastb', 'Lastbf', 'SimdAluOp', unsignedTypes, lastbCode,
3884 isCond = False, destType = DstRegType.SimdFpScalar)
3885 # LSL (immediate, predicated)
3886 lslCode = '''
3887 if (srcElem2 == 0) {
3888 destElem = srcElem1;
3889 } else if (srcElem2 >= sizeof(Element) * 8) {
3890 destElem = 0;
3891 } else {
3892 destElem = srcElem1 << srcElem2;
3893 }
3894 '''
3895 sveBinImmInst('lsl', 'LslImmPred', 'SimdAluOp', unsignedTypes, lslCode,
3896 PredType.MERGE)
3897 # LSL (immediate, unpredicated)
3898 sveBinImmInst('lsl', 'LslImmUnpred', 'SimdAluOp', unsignedTypes, lslCode)
3899 # LSL (vectors)
3900 sveBinInst('lsl', 'LslPred', 'SimdAluOp', unsignedTypes, lslCode,
3901 PredType.MERGE, True)
3902 # LSL (wide elements, predicated)
3903 sveShiftByWideElemsInst('lsl', 'LslWidePred', 'SimdAluOp', unsignedTypes,
3904 lslCode, PredType.MERGE)
3905 # LSL (wide elements, unpredicated)
3906 sveShiftByWideElemsInst('lsl', 'LslWideUnpred', 'SimdAluOp', unsignedTypes,
3907 lslCode)
3908 # LSLR
3909 lslrCode = '''
3910 if (srcElem1 == 0) {
3911 destElem = srcElem2;
3912 } else if (srcElem1 >= sizeof(Element) * 8) {
3913 destElem = 0;
3914 } else {
3915 destElem = srcElem2 << srcElem1;
3916 }
3917 '''
3918 sveBinInst('lslr', 'Lslr', 'SimdAluOp', unsignedTypes, lslrCode,
3919 PredType.MERGE, True)
3920 # LSR (immediate, predicated)
3921 lsrCode = '''
3922 if (srcElem2 >= sizeof(Element) * 8) {
3923 destElem = 0;
3924 } else {
3925 destElem = srcElem1 >> srcElem2;
3926 }
3927 '''
3928 sveBinImmInst('lsr', 'LsrImmPred', 'SimdAluOp', unsignedTypes, lsrCode,
3929 PredType.MERGE)
3930 # LSR (immediate, unpredicated)
3931 sveBinImmInst('lsr', 'LsrImmUnpred', 'SimdAluOp', unsignedTypes, lsrCode)
3932 # LSR (vectors)
3933 sveBinInst('lsr', 'LsrPred', 'SimdAluOp', unsignedTypes, lsrCode,
3934 PredType.MERGE, True)
3935 # LSR (wide elements, predicated)
3936 sveShiftByWideElemsInst('lsr', 'LsrWidePred', 'SimdAluOp', unsignedTypes,
3937 lsrCode, PredType.MERGE)
3938 # LSR (wide elements, unpredicated)
3939 sveShiftByWideElemsInst('lsr', 'LsrWideUnpred', 'SimdAluOp', unsignedTypes,
3940 lsrCode)
3941 # LSRR
3942 lsrrCode = '''
3943 if (srcElem1 >= sizeof(Element) * 8) {
3944 destElem = 0;
3945 } else {
3946 destElem = srcElem2 >> srcElem1;
3947 }
3948 '''
3949 sveBinInst('lsrr', 'Lsrr', 'SimdAluOp', unsignedTypes, lsrrCode,
3950 PredType.MERGE, True)
3951 # MAD
3952 madCode = 'destElem = srcElem1 + destElem * srcElem2;'
3953 sveTerInst('mad', 'Mad', 'SimdMultAccOp', signedTypes, madCode)
3954 # MLA
3955 mlaCode = 'destElem += srcElem1 * srcElem2;'
3956 sveTerInst('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode)
3957 # MLS
3958 mlsCode = 'destElem -= srcElem1 * srcElem2;'
3959 sveTerInst('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode)
3960 # MOVPRFX (predicated)
3961 movCode = 'destElem = srcElem1;'
3962 sveUnaryInst('movprfx', 'MovprfxPredM', 'SimdMiscOp', unsignedTypes,
3963 movCode, PredType.MERGE)
3964 sveUnaryInst('movprfx', 'MovprfxPredZ', 'SimdMiscOp', unsignedTypes,
3965 movCode, PredType.ZERO)
3966 # MOVPRFX (unpredicated)
3967 sveUnaryInst('movprfx', 'MovprfxUnpred', 'SimdMiscOp', ('uint64_t',),
3968 movCode)
3969 # MSB
3970 msbCode = 'destElem = srcElem1 - destElem * srcElem2;'
3971 sveTerInst('msb', 'Msb', 'SimdMultAccOp', signedTypes, msbCode)
3972 # MUL (immediate)
3973 mulCode = 'destElem = srcElem1 * srcElem2;'
3974 sveWideImmInst('mul', 'MulImm', 'SimdMultOp', unsignedTypes, mulCode)
3975 # MUL (vectors)
3976 sveBinInst('mul', 'Mul', 'SimdMultOp', unsignedTypes, mulCode,
3977 PredType.MERGE, True)
3978 # NAND, NANDS
3979 nandCode = 'destElem = !(srcElem1 & srcElem2);';
3980 svePredLogicalInst('nand', 'PredNand', 'SimdPredAluOp', ('uint8_t',),
3981 nandCode)
3982 svePredLogicalInst('nands', 'PredNands', 'SimdPredAluOp', ('uint8_t',),
3983 nandCode, isFlagSetting=True)
3984 # NEG
3985 negCode = 'destElem = -srcElem1;'
3986 sveUnaryInst('neg', 'Neg', 'SimdAluOp', signedTypes, negCode,
3987 PredType.MERGE)
3988 # NOR, NORS
3989 norCode = 'destElem = !(srcElem1 | srcElem2);';
3990 svePredLogicalInst('nor', 'PredNor', 'SimdPredAluOp', ('uint8_t',),
3991 norCode)
3992 svePredLogicalInst('nors', 'PredNors', 'SimdPredAluOp', ('uint8_t',),
3993 norCode, isFlagSetting=True)
3994 # NOT (vector)
3995 notCode = 'destElem = ~srcElem1;'
3996 sveUnaryInst('not', 'Not', 'SimdAluOp', unsignedTypes, notCode,
3997 PredType.MERGE)
3998 # ORN, ORNS (predicates)
3999 ornCode = 'destElem = srcElem1 | !srcElem2;';
4000 svePredLogicalInst('orn', 'PredOrn', 'SimdPredAluOp', ('uint8_t',),
4001 ornCode)
4002 svePredLogicalInst('orns', 'PredOrns', 'SimdPredAluOp', ('uint8_t',),
4003 ornCode, isFlagSetting=True)
4004 # ORR (immediate)
4005 orCode = 'destElem = srcElem1 | srcElem2;'
4006 sveWideImmInst('orr', 'OrrImm', 'SimdAluOp', ('uint64_t',), orCode)
4007 # ORR (vectors, predicated)
4008 sveBinInst('orr', 'OrrPred', 'SimdAluOp', unsignedTypes, orCode,
4009 PredType.MERGE, True)
4010 # ORR (vectors, unpredicated)
4011 orCode = 'destElem = srcElem1 | srcElem2;'
4012 sveBinInst('orr', 'OrrUnpred', 'SimdAluOp', ('uint64_t',), orCode)
4013 # ORR, ORRS (predicates)
4014 svePredLogicalInst('orr', 'PredOrr', 'SimdPredAluOp', ('uint8_t',), orCode)
4015 svePredLogicalInst('orrs', 'PredOrrs', 'SimdPredAluOp', ('uint8_t',),
4016 orCode, isFlagSetting=True)
4017 # ORV
4018 orvCode = 'destElem |= srcElem1;'
4019 sveAssocReducInst('orv', 'Orv', 'SimdReduceAluOp', unsignedTypes,
4020 orvCode, '0')
4021 # PFALSE
4022 pfalseCode = '''
4023 PDest_ub[0] = 0;
4024 destPred.reset();
4025 '''
4026 svePredUnaryWImplicitSrcInst('pfalse', 'Pfalse', 'SimdPredAluOp',
4027 pfalseCode)
4028 # PFIRST
4029 svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')
4030 # PNEXT
4031 svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
4032 # PTEST
4033 svePredTestInst('ptest', 'Ptest', 'SimdPredAluOp')
4034 # PTRUE
4035 svePtrueInst('ptrue', 'Ptrue', 'SimdPredAluOp', unsignedTypes, False)
4036 # PTRUES
4037 svePtrueInst('ptrues', 'Ptrues', 'SimdPredAluOp', unsignedTypes, True)
4038 # PUNPKHI
4039 sveUnpackInst('punpkhi', 'Punpkhi', 'SimdPredAluOp', unsignedWideSDTypes,
4040 unpackHalf = Unpack.High, regType = SrcRegType.Predicate)
4041 # PUNPKLO
4042 sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp', unsignedWideSDTypes,
4043 unpackHalf = Unpack.Low, regType = SrcRegType.Predicate)
4044 # RBIT
4045 rbitCode = '''
4046 destElem = reverseBits(srcElem1);'''
4047 sveUnaryInst('rbit', 'Rbit', 'SimdAluOp', unsignedTypes, rbitCode,
4048 predType=PredType.MERGE, srcRegType=SrcRegType.Vector)
4049 # RDFFR (unpredicated)
4050 rdffrUnpredCode = '''
4051 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4052 xc->tcBase());
4053 for (unsigned i = 0; i < eCount; i++) {
4054 PDest_ub[i] = Ffr_ub[i];
4055 }'''
4056 svePredUnaryWImplicitSrcInst('rdffr', 'RdffrUnpred', 'SimdPredAluOp',
4057 rdffrUnpredCode)
4058 # RDFFR, RDFFRS (predicated)
4059 rdffrPredCode = '''
4060 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4061 xc->tcBase());
4062 for (unsigned i = 0; i < eCount; i++) {
4063 if (GpOp_ub[i]) {
4064 PDest_ub[i] = Ffr_ub[i];
4065 } else {
4066 PDest_ub[i] = false;
4067 }
4068 }'''
4069 svePredUnaryWImplicitSrcInst('rdffr', 'RdffrPred', 'SimdPredAluOp',
4070 rdffrPredCode, PredType.ZERO, False)
4071 svePredUnaryWImplicitSrcInst('rdffrs', 'RdffrsPred', 'SimdPredAluOp',
4072 rdffrPredCode, PredType.ZERO, True)
4073 # RDVL
4074 rdvlCode = sveEnabledCheckCode + '''
4075 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4076 xc->tcBase());
4077 XDest = eCount * (int64_t) imm;
4078 '''
4079 rdvlIop = InstObjParams('rdvl', 'SveRdvl', 'RegImmOp', rdvlCode, [])
4080 header_output += RegImmOpDeclare.subst(rdvlIop)
4081 decoder_output += RegImmOpConstructor.subst(rdvlIop)
4082 exec_output += BasicExecute.subst(rdvlIop)
4083 # REV (predicate)
4084 sveReverseElementsInst('rev', 'Revp', 'SimdPredAluOp', unsignedTypes,
4085 srcType = SrcRegType.Predicate)
4086 # REV (vector)
4087 sveReverseElementsInst('rev', 'Revv', 'SimdAluOp', unsignedTypes,
4088 srcType = SrcRegType.Vector)
4089 # REVB
4090 revCode = '''
4091 %(revtype)s* srcPtr = reinterpret_cast<%(revtype)s*>(&srcElem1);
4092 %(revtype)s* dstPtr = reinterpret_cast<%(revtype)s*>(&destElem);
4093 uint8_t subelements = sizeof(Element) / sizeof(%(revtype)s);
4094 for(int i = 0; i < subelements; ++i) {
4095 dstPtr[subelements - i - 1] = srcPtr[i];
4096 }'''
4097 sveUnaryInst('revb', 'Revb', 'SimdAluOp',
4098 ['uint16_t', 'uint32_t', 'uint64_t'],
4099 revCode % {'revtype' : 'uint8_t'}, predType=PredType.MERGE,
4100 srcRegType=SrcRegType.Vector, decoder='Generic')
4101 # REVH
4102 sveUnaryInst('revh', 'Revh', 'SimdAluOp', ['uint32_t', 'uint64_t'],
4103 revCode % {'revtype' : 'uint16_t'}, predType=PredType.MERGE,
4104 srcRegType=SrcRegType.Vector, decoder='Generic')
4105 # REVW
4106 sveUnaryInst('revw', 'Revw', 'SimdAluOp', ['uint64_t'],
4107 revCode % {'revtype' : 'uint32_t'}, predType=PredType.MERGE,
4108 srcRegType=SrcRegType.Vector, decoder='Generic')
4109 # SABD
4110 abdCode = '''
4111 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
4112 (srcElem2 - srcElem1);
4113 '''
4114 sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode,
4115 PredType.MERGE, True)
4116 # SADDV
4117 addvCode = 'destElem += srcElem1;'
4118 sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
4119 ['int8_t, int64_t', 'int16_t, int64_t', 'int32_t, int64_t'],
4120 addvCode, '0')
4121 # SCVTF
4122 scvtfCode = fpOp % ('fplibFixedToFP<DElement>('
4123 'sext<sizeof(SElement) * 8>(srcElem1), 0,'
4124 ' false, FPCRRounding(fpscr), fpscr)')
4125 sveCvtInst('scvtf', 'ScvtfNarrow', 'SimdCvtOp',
4126 ('uint16_t, uint16_t',
4127 'uint32_t, uint16_t',
4128 'uint64_t, uint16_t',
4129 'uint32_t, uint32_t',
4130 'uint64_t, uint32_t',
4131 'uint64_t, uint64_t'),
4132 scvtfCode, CvtDir.Narrow)
4133 sveCvtInst('scvtf', 'ScvtfWiden', 'SimdCvtOp', ('uint32_t, uint64_t',),
4134 scvtfCode, CvtDir.Widen)
4135 # SDIV
4136 sdivCode = '''
4137 constexpr Element ELEM_MIN = std::numeric_limits<Element>::min();
4138 destElem = (srcElem2 == 0) ? 0 :
4139 (srcElem2 == -1 && srcElem1 == ELEM_MIN) ? ELEM_MIN :
4140 (srcElem1 / srcElem2);
4141 '''
4142 sveBinInst('sdiv', 'Sdiv', 'SimdDivOp', signedTypes, sdivCode,
4143 PredType.MERGE, True)
4144 # SDIVR
4145 sdivrCode = '''
4146 constexpr Element ELEM_MIN = std::numeric_limits<Element>::min();
4147 destElem = (srcElem1 == 0) ? 0 :
4148 (srcElem1 == -1 && srcElem2 == ELEM_MIN) ? ELEM_MIN :
4149 (srcElem2 / srcElem1);
4150 '''
4151 sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode,
4152 PredType.MERGE, True)
4153 # SDOT (indexed)
4154 sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t',
4155 'int16_t, int64_t'], isIndexed = True)
4156 # SDOT (vectors)
4157 sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t',
4158 'int16_t, int64_t'], isIndexed = False)
4159 # SEL (predicates)
4160 selCode = 'destElem = srcElem1;'
4161 svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',),
4162 selCode, PredType.SELECT)
4163 # SEL (vectors)
4164 sveBinInst('sel', 'Sel', 'SimdAluOp', unsignedTypes, selCode,
4165 PredType.SELECT, False)
4166 # SETFFR
4167 setffrCode = '''
4168 Ffr_ub[0] = true;
4169 destPred.set();'''
4170 svePredWriteFfrInst('setffr', 'Setffr', 'SimdPredAluOp', setffrCode, True)
4171 # SMAX (immediate)
4172 maxCode = 'destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;'
4173 sveWideImmInst('smax', 'SmaxImm', 'SimdCmpOp', signedTypes, maxCode)
4174 # SMAX (vectors)
4175 sveBinInst('smax', 'Smax', 'SimdCmpOp', signedTypes, maxCode,
4176 PredType.MERGE, True)
4177 # SMAXV
4178 maxvCode = '''
4179 if (srcElem1 > destElem)
4180 destElem = srcElem1;
4181 '''
4182 sveAssocReducInst('smaxv', 'Smaxv', 'SimdReduceCmpOp', signedTypes,
4183 maxvCode, 'std::numeric_limits<Element>::min()')
4184 # SMIN (immediate)
4185 minCode = 'destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;'
4186 sveWideImmInst('smin', 'SminImm', 'SimdCmpOp', signedTypes, minCode)
4187 # SMIN (vectors)
4188 sveBinInst('smin', 'Smin', 'SimdCmpOp', signedTypes, minCode,
4189 PredType.MERGE, True)
4190 # SMINV
4191 minvCode = '''
4192 if (srcElem1 < destElem)
4193 destElem = srcElem1;
4194 '''
4195 sveAssocReducInst('sminv', 'Sminv', 'SimdReduceCmpOp', signedTypes,
4196 minvCode, 'std::numeric_limits<Element>::max()')
4197 # SMULH
4198 exec_output += '''
4199 template <class T>
4200 T do_mulh(T srcElem1, T srcElem2)
4201 {
4202 return ((int64_t)srcElem1 * (int64_t)srcElem2) >> sizeof(T) * 8;
4203 }
4204
4205 int64_t do_mulh(int64_t srcElem1, int64_t srcElem2)
4206 {
4207 uint64_t x = (uint64_t) llabs(srcElem1);
4208 uint64_t y = (uint64_t) llabs(srcElem2);
4209
4210 uint64_t a = x >> 32;
4211 uint64_t b = x & 0xFFFFFFFF;
4212 uint64_t c = y >> 32;
4213 uint64_t d = y & 0xFFFFFFFF;
4214
4215 uint64_t hi = a * c;
4216 uint64_t lo = b * d;
4217
4218 hi += (a * d) >> 32;
4219 uint64_t tmp = lo;
4220 lo += ((a * d) & 0xFFFFFFFF) << 32;
4221 if (lo < tmp)
4222 hi++;
4223
4224 hi += (b * c) >> 32;
4225 tmp = lo;
4226 lo += ((b * c) & 0xFFFFFFFF) << 32;
4227 if (lo < tmp)
4228 hi++;
4229
4230 uint64_t destElem = hi;
4231 if ((srcElem1 < 0) ^ (srcElem2 < 0)) {
4232 uint64_t tmp = lo = ~lo;
4233 destElem = ~hi;
4234 if (++lo < tmp)
4235 destElem++;
4236 }
4237
4238 return destElem;
4239 }
4240
4241 uint64_t do_mulh(uint64_t srcElem1, uint64_t srcElem2)
4242 {
4243 uint64_t x = srcElem1;
4244 uint64_t y = srcElem2;
4245
4246 uint64_t a = x >> 32;
4247 uint64_t b = x & 0xFFFFFFFF;
4248 uint64_t c = y >> 32;
4249 uint64_t d = y & 0xFFFFFFFF;
4250
4251 uint64_t hi = a * c;
4252 uint64_t lo = b * d;
4253
4254 hi += (a * d) >> 32;
4255 uint64_t tmp = lo;
4256 lo += ((a * d) & 0xFFFFFFFF) << 32;
4257 if (lo < tmp)
4258 hi++;
4259
4260 hi += (b * c) >> 32;
4261 tmp = lo;
4262 lo += ((b * c) & 0xFFFFFFFF) << 32;
4263 if (lo < tmp)
4264 hi++;
4265
4266 return hi;
4267 }'''
4268 mulhCode = '''
4269 destElem = do_mulh(srcElem1, srcElem2);'''
4270 sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode,
4271 PredType.MERGE, True)
4272 # SPLICE
4273 sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes)
4274 # SQADD (immediate)
4275 sqaddCode = '''
4276 destElem = srcElem1 + srcElem2;
4277 bool negDest = (destElem < 0);
4278 bool negSrc1 = (srcElem1 < 0);
4279 bool negSrc2 = (srcElem2 < 0);
4280 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
4281 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
4281 destElem = static_cast<Element>(
4282 (Element)1 << (sizeof(Element) * 8 - 1)
4283 );
4282 if (negDest)
4283 destElem -= 1;
4284 }
4285 '''
4286 sveWideImmInst('sqadd', 'SqaddImm', 'SimdAddOp', signedTypes, sqaddCode)
4287 # SQADD (vectors)
4288 sveBinInst('sqadd', 'Sqadd', 'SimdAddOp', signedTypes, sqaddCode)
4289 # SQDECB, SQDECH, SQDECW, SQDECD (scalar, 32-bit)
4290 sqdecCode = '''
4291 destElem = srcElem1 - (count * imm);
4292 bool negDest = (destElem < 0);
4293 bool negSrc = (srcElem1 < 0);
4294 bool posCount = ((count * imm) >= 0);
4295 if ((negDest != negSrc) && (negSrc == posCount)) {
4284 if (negDest)
4285 destElem -= 1;
4286 }
4287 '''
4288 sveWideImmInst('sqadd', 'SqaddImm', 'SimdAddOp', signedTypes, sqaddCode)
4289 # SQADD (vectors)
4290 sveBinInst('sqadd', 'Sqadd', 'SimdAddOp', signedTypes, sqaddCode)
4291 # SQDECB, SQDECH, SQDECW, SQDECD (scalar, 32-bit)
4292 sqdecCode = '''
4293 destElem = srcElem1 - (count * imm);
4294 bool negDest = (destElem < 0);
4295 bool negSrc = (srcElem1 < 0);
4296 bool posCount = ((count * imm) >= 0);
4297 if ((negDest != negSrc) && (negSrc == posCount)) {
4296 destElem = (%(dstType)s)1 << (sizeof(%(dstType)s) * 8 - 1);
4298 destElem = static_cast<%(dstType)s>(
4299 (%(dstType)s)1 << (sizeof(%(dstType)s) * 8 - 1)
4300 );
4297 if (negDest)
4298 destElem -= 1;
4299 }
4300 '''
4301 sveElemCountInst('sqdec', 'Sqdec32', 'SimdAluOp', signedTypes,
4302 sqdecCode%{'dstType':'int32_t'}, destType = DestType.Scalar,
4303 dstIs32b = True)
4304 # SQDECB, SQDECH, SQDECW, SQDECD (scalar, 64-bit)
4305 sveElemCountInst('sqdec', 'Sqdec', 'SimdAluOp', signedTypes,
4306 sqdecCode%{'dstType':'int64_t'}, destType = DestType.Scalar,
4307 dstIs32b = False)
4308 # SQDECH, SQDECW, SQDECD (vector)
4309 sveElemCountInst('sqdec', 'Sqdecv', 'SimdAluOp', bigSignedTypes,
4310 sqdecCode%{'dstType':'Element'}, destType = DestType.Vector,
4311 dstIs32b = False)
4312 # SQDECP (scalar, 32-bit)
4313 sqdecpCode = '''
4314 destElem = srcElem - count;
4315 bool negDest = (destElem < 0);
4316 bool negSrc = (srcElem < 0);
4317 bool posCount = (count >= 0);
4318 if ((negDest != negSrc) && (negSrc == posCount)) {
4319 destElem = std::numeric_limits<%s>::min();
4320 if (negDest)
4321 destElem -= 1;
4322 }
4323 '''
4324 sqdecp32Code = '''
4325 int32_t srcElem = WDest;
4326 int32_t destElem;''' + (sqdecpCode % 'int32_t') + '''
4327 if (destElem < 0) {
4328 XDest = static_cast<uint32_t>(destElem) | ~mask(32);
4329 } else {
4330 XDest = destElem;
4331 }
4332 '''
4333 svePredCountInst('sqdecp', 'Sqdecp32', 'SimdAluOp', signedTypes,
4334 sqdecp32Code, DestType.Scalar, SrcSize.Src32bit)
4335 # SQDECP (scalar, 64-bit)
4336 sqdecp64Code = '''
4337 int64_t srcElem = XDest;
4338 int64_t destElem;''' + (sqdecpCode % 'int64_t') + '''
4339 XDest = destElem;
4340 '''
4341 svePredCountInst('sqdecp', 'Sqdecp64', 'SimdAluOp', signedTypes,
4342 sqdecp64Code, DestType.Scalar, SrcSize.Src64bit)
4343 # SQDECP (vector)
4344 svePredCountInst('sqdecp', 'Sqdecpv', 'SimdAluOp', signedTypes,
4345 sqdecpCode % 'Element', DestType.Vector)
4346 # SQINCB, SQINCH, SQINCW, SQINCD (scalar, 32-bit)
4347 sqincCode = '''
4348 destElem = srcElem1 + (count * imm);
4349 bool negDest = (destElem < 0);
4350 bool negSrc = (srcElem1 < 0);
4351 bool negCount = ((count * imm) < 0);
4352 if ((negDest != negSrc) && (negSrc == negCount)) {
4301 if (negDest)
4302 destElem -= 1;
4303 }
4304 '''
4305 sveElemCountInst('sqdec', 'Sqdec32', 'SimdAluOp', signedTypes,
4306 sqdecCode%{'dstType':'int32_t'}, destType = DestType.Scalar,
4307 dstIs32b = True)
4308 # SQDECB, SQDECH, SQDECW, SQDECD (scalar, 64-bit)
4309 sveElemCountInst('sqdec', 'Sqdec', 'SimdAluOp', signedTypes,
4310 sqdecCode%{'dstType':'int64_t'}, destType = DestType.Scalar,
4311 dstIs32b = False)
4312 # SQDECH, SQDECW, SQDECD (vector)
4313 sveElemCountInst('sqdec', 'Sqdecv', 'SimdAluOp', bigSignedTypes,
4314 sqdecCode%{'dstType':'Element'}, destType = DestType.Vector,
4315 dstIs32b = False)
4316 # SQDECP (scalar, 32-bit)
4317 sqdecpCode = '''
4318 destElem = srcElem - count;
4319 bool negDest = (destElem < 0);
4320 bool negSrc = (srcElem < 0);
4321 bool posCount = (count >= 0);
4322 if ((negDest != negSrc) && (negSrc == posCount)) {
4323 destElem = std::numeric_limits<%s>::min();
4324 if (negDest)
4325 destElem -= 1;
4326 }
4327 '''
4328 sqdecp32Code = '''
4329 int32_t srcElem = WDest;
4330 int32_t destElem;''' + (sqdecpCode % 'int32_t') + '''
4331 if (destElem < 0) {
4332 XDest = static_cast<uint32_t>(destElem) | ~mask(32);
4333 } else {
4334 XDest = destElem;
4335 }
4336 '''
4337 svePredCountInst('sqdecp', 'Sqdecp32', 'SimdAluOp', signedTypes,
4338 sqdecp32Code, DestType.Scalar, SrcSize.Src32bit)
4339 # SQDECP (scalar, 64-bit)
4340 sqdecp64Code = '''
4341 int64_t srcElem = XDest;
4342 int64_t destElem;''' + (sqdecpCode % 'int64_t') + '''
4343 XDest = destElem;
4344 '''
4345 svePredCountInst('sqdecp', 'Sqdecp64', 'SimdAluOp', signedTypes,
4346 sqdecp64Code, DestType.Scalar, SrcSize.Src64bit)
4347 # SQDECP (vector)
4348 svePredCountInst('sqdecp', 'Sqdecpv', 'SimdAluOp', signedTypes,
4349 sqdecpCode % 'Element', DestType.Vector)
4350 # SQINCB, SQINCH, SQINCW, SQINCD (scalar, 32-bit)
4351 sqincCode = '''
4352 destElem = srcElem1 + (count * imm);
4353 bool negDest = (destElem < 0);
4354 bool negSrc = (srcElem1 < 0);
4355 bool negCount = ((count * imm) < 0);
4356 if ((negDest != negSrc) && (negSrc == negCount)) {
4353 destElem = (%(dstType)s)1 << (sizeof(%(dstType)s) * 8 - 1);
4357 destElem = static_cast<%(dstType)s>(
4358 (%(dstType)s)1 << (sizeof(%(dstType)s) * 8 - 1)
4359 );
4354 if (negDest)
4355 destElem -= 1;
4356 }
4357 '''
4358 sveElemCountInst('sqinc', 'Sqinc32', 'SimdAluOp', signedTypes,
4359 sqincCode%{'dstType':'int32_t'}, destType = DestType.Scalar,
4360 dstIs32b = True)
4361 # SQINCB, SQINCH, SQINCW, SQINCD (scalar, 64-bit)
4362 sveElemCountInst('sqinc', 'Sqinc', 'SimdAluOp', signedTypes,
4363 sqincCode%{'dstType':'int64_t'}, destType = DestType.Scalar,
4364 dstIs32b = False)
4365 # SQINCH, SQINCW, SQINCD (vector)
4366 sveElemCountInst('sqinc', 'Sqincv', 'SimdAluOp', bigSignedTypes,
4367 sqincCode%{'dstType':'Element'}, destType = DestType.Vector,
4368 dstIs32b = False)
4369 # SQINCP (scalar, 32-bit)
4370 sqincpCode = '''
4371 destElem = srcElem + count;
4372 bool negDest = (destElem < 0);
4373 bool negSrc = (srcElem < 0);
4374 bool negCount = (count < 0);
4375 if ((negDest != negSrc) && (negSrc == negCount)) {
4376 destElem = std::numeric_limits<%s>::min();
4377 if (negDest)
4378 destElem -= 1;
4379 }
4380 '''
4381 sqincp32Code = '''
4382 int32_t srcElem = WDest;
4383 int32_t destElem;''' + (sqincpCode % 'int32_t') + '''
4384 if (destElem < 0) {
4385 XDest = static_cast<uint32_t>(destElem) | ~mask(32);
4386 } else {
4387 XDest = destElem;
4388 }
4389 '''
4390 svePredCountInst('sqincp', 'Sqincp32', 'SimdAluOp', signedTypes,
4391 sqincp32Code, DestType.Scalar, SrcSize.Src32bit)
4392 # SQINCP (scalar, 64-bit)
4393 sqincp64Code = '''
4394 int64_t srcElem = XDest;
4395 int64_t destElem;''' + (sqincpCode % 'int64_t') + '''
4396 XDest = destElem;
4397 '''
4398 svePredCountInst('sqincp', 'Sqincp64', 'SimdAluOp', signedTypes,
4399 sqincp64Code, DestType.Scalar, SrcSize.Src64bit)
4400 # SQINCP (vector)
4401 svePredCountInst('sqincp', 'Sqincpv', 'SimdAluOp', signedTypes,
4402 sqincpCode % 'Element', DestType.Vector)
4403 # SQSUB (immediate)
4404 sqsubCode = '''
4405 destElem = srcElem1 - srcElem2;
4406 bool negDest = (destElem < 0);
4407 bool negSrc1 = (srcElem1 < 0);
4408 bool posSrc2 = (srcElem2 >= 0);
4409 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
4360 if (negDest)
4361 destElem -= 1;
4362 }
4363 '''
4364 sveElemCountInst('sqinc', 'Sqinc32', 'SimdAluOp', signedTypes,
4365 sqincCode%{'dstType':'int32_t'}, destType = DestType.Scalar,
4366 dstIs32b = True)
4367 # SQINCB, SQINCH, SQINCW, SQINCD (scalar, 64-bit)
4368 sveElemCountInst('sqinc', 'Sqinc', 'SimdAluOp', signedTypes,
4369 sqincCode%{'dstType':'int64_t'}, destType = DestType.Scalar,
4370 dstIs32b = False)
4371 # SQINCH, SQINCW, SQINCD (vector)
4372 sveElemCountInst('sqinc', 'Sqincv', 'SimdAluOp', bigSignedTypes,
4373 sqincCode%{'dstType':'Element'}, destType = DestType.Vector,
4374 dstIs32b = False)
4375 # SQINCP (scalar, 32-bit)
4376 sqincpCode = '''
4377 destElem = srcElem + count;
4378 bool negDest = (destElem < 0);
4379 bool negSrc = (srcElem < 0);
4380 bool negCount = (count < 0);
4381 if ((negDest != negSrc) && (negSrc == negCount)) {
4382 destElem = std::numeric_limits<%s>::min();
4383 if (negDest)
4384 destElem -= 1;
4385 }
4386 '''
4387 sqincp32Code = '''
4388 int32_t srcElem = WDest;
4389 int32_t destElem;''' + (sqincpCode % 'int32_t') + '''
4390 if (destElem < 0) {
4391 XDest = static_cast<uint32_t>(destElem) | ~mask(32);
4392 } else {
4393 XDest = destElem;
4394 }
4395 '''
4396 svePredCountInst('sqincp', 'Sqincp32', 'SimdAluOp', signedTypes,
4397 sqincp32Code, DestType.Scalar, SrcSize.Src32bit)
4398 # SQINCP (scalar, 64-bit)
4399 sqincp64Code = '''
4400 int64_t srcElem = XDest;
4401 int64_t destElem;''' + (sqincpCode % 'int64_t') + '''
4402 XDest = destElem;
4403 '''
4404 svePredCountInst('sqincp', 'Sqincp64', 'SimdAluOp', signedTypes,
4405 sqincp64Code, DestType.Scalar, SrcSize.Src64bit)
4406 # SQINCP (vector)
4407 svePredCountInst('sqincp', 'Sqincpv', 'SimdAluOp', signedTypes,
4408 sqincpCode % 'Element', DestType.Vector)
4409 # SQSUB (immediate)
4410 sqsubCode = '''
4411 destElem = srcElem1 - srcElem2;
4412 bool negDest = (destElem < 0);
4413 bool negSrc1 = (srcElem1 < 0);
4414 bool posSrc2 = (srcElem2 >= 0);
4415 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
4410 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
4416 destElem = static_cast<Element>(
4417 (Element)1 << (sizeof(Element) * 8 - 1)
4418 );
4411 if (negDest)
4412 destElem -= 1;
4413 }
4414 '''
4415 sveWideImmInst('sqsub', 'SqsubImm', 'SimdAddOp', signedTypes, sqsubCode)
4416 # SQSUB (vectors)
4417 sveBinInst('sqsub', 'Sqsub', 'SimdAddOp', signedTypes, sqsubCode)
4418 # SUB (immediate)
4419 subCode = 'destElem = srcElem1 - srcElem2;'
4420 sveWideImmInst('sub', 'SubImm', 'SimdAddOp', unsignedTypes, subCode)
4421 # SUB (vectors, predicated)
4422 sveBinInst('sub', 'SubPred', 'SimdAddOp', unsignedTypes, subCode,
4423 PredType.MERGE, True)
4424 # SUB (vectors, unpredicated)
4425 subCode = 'destElem = srcElem1 - srcElem2;'
4426 sveBinInst('sub', 'SubUnpred', 'SimdAddOp', unsignedTypes, subCode)
4427 # SUBR (immediate)
4428 subrCode = 'destElem = srcElem2 - srcElem1;'
4429 sveWideImmInst('subr', 'SubrImm', 'SimdAddOp', unsignedTypes, subrCode)
4430 # SUBR (vectors)
4431 sveBinInst('subr', 'Subr', 'SimdAddOp', unsignedTypes, subrCode,
4432 PredType.MERGE, True)
4433 # SUNPKHI
4434 sveUnpackInst('sunpkhi', 'Sunpkhi', 'SimdAluOp', signedWideSDTypes,
4435 unpackHalf = Unpack.High, regType = SrcRegType.Vector)
4436 # SUNPKLO
4437 sveUnpackInst('sunpklo', 'Sunpklo', 'SimdAluOp', signedWideSDTypes,
4438 unpackHalf = Unpack.Low, regType = SrcRegType.Vector)
4439 # SXTB
4440 sxtCode = 'destElem = sext<8 * sizeof(SElement)>(srcElem1);'
4441 sveWidenUnaryInst('sxtb', 'Sxtb', 'SimdAluOp',
4442 ['uint8_t, uint16_t', 'uint8_t, uint32_t', 'uint8_t, uint64_t'],
4443 sxtCode, PredType.MERGE)
4444 # SXTH
4445 sveWidenUnaryInst('sxth', 'Sxth', 'SimdAluOp',
4446 ['uint16_t, uint32_t', 'uint16_t, uint64_t'],
4447 sxtCode, PredType.MERGE)
4448 # SXTW
4449 sveWidenUnaryInst('sxtw', 'Sxtw', 'SimdAluOp',
4450 ['uint32_t, uint64_t'],
4451 sxtCode, PredType.MERGE)
4452 # TBL
4453 sveTblInst('tbl', 'Tbl', 'SimdAluOp')
4454 # TRN1, TRN2 (predicates)
4455 trnPredIterCode = '''
4456 constexpr unsigned sz = sizeof(Element);
4457 int s;
4458 int part = %d;
4459 TheISA::VecPredRegContainer tmpPredC;
4460 auto auxPDest = tmpPredC.as<uint8_t>();
4461 for (unsigned i = 0; i < eCount / 2; i++) {
4462 s = 2 * i + part;
4463 for (unsigned j = 0; j < sz; j++) {
4464 auxPDest[(2 * i) * sz + j] = POp1_pb[s * sz + j];
4465 auxPDest[(2 * i + 1) * sz + j] = POp2_pb[s * sz + j];
4466 }
4467 }
4468 for (unsigned i = 0; i < eCount * sz; i++) {
4469 PDest_pb[i] = auxPDest[i];
4470 }
4471 '''
4472 svePredBinPermInst('trn1', 'Trn1Pred', 'SimdPredAluOp', unsignedTypes,
4473 trnPredIterCode % 0)
4474 svePredBinPermInst('trn2', 'Trn2Pred', 'SimdPredAluOp', unsignedTypes,
4475 trnPredIterCode % 1)
4476 # TRN1, TRN2 (vectors)
4477 trnIterCode = '''
4478 int s;
4479 int part = %d;
4480 TheISA::VecRegContainer tmpVecC;
4481 auto auxDest = tmpVecC.as<Element>();
4482 for (unsigned i = 0; i < eCount / 2; i++) {
4483 s = 2 * i + part;
4484 auxDest[2 * i] = AA64FpOp1_x[s];
4485 auxDest[2 * i + 1] = AA64FpOp2_x[s];
4486 }
4487 for (unsigned i = 0; i < eCount; i++) {
4488 AA64FpDest_x[i] = auxDest[i];
4489 }
4490 '''
4491 sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '',
4492 customIterCode=trnIterCode % 0)
4493 sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '',
4494 customIterCode=trnIterCode % 1)
4495 # UABD
4496 sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode,
4497 PredType.MERGE, True)
4498 # UADDV
4499 sveWideningAssocReducInst('uaddv', 'Uaddv', 'SimdReduceAddOp',
4500 ['uint8_t, uint64_t', 'uint16_t, uint64_t', 'uint32_t, uint64_t',
4501 'uint64_t, uint64_t'],
4502 addvCode, '0')
4503 # UCVTF
4504 ucvtfCode = fpOp % ('fplibFixedToFP<DElement>(srcElem1, 0, true,'
4505 ' FPCRRounding(fpscr), fpscr)')
4506 sveCvtInst('ucvtf', 'UcvtfNarrow', 'SimdCvtOp',
4507 ('uint16_t, uint16_t',
4508 'uint32_t, uint16_t',
4509 'uint64_t, uint16_t',
4510 'uint32_t, uint32_t',
4511 'uint64_t, uint32_t',
4512 'uint64_t, uint64_t'),
4513 ucvtfCode, CvtDir.Narrow)
4514 sveCvtInst('ucvtf', 'UcvtfWiden', 'SimdCvtOp', ('uint32_t, uint64_t',),
4515 ucvtfCode, CvtDir.Widen)
4516 # UDIV
4517 udivCode = 'destElem = (srcElem2 == 0) ? 0 : (srcElem1 / srcElem2);'
4518 sveBinInst('udiv', 'Udiv', 'SimdDivOp', unsignedTypes, udivCode,
4519 PredType.MERGE, True)
4520 # UDIVR
4521 udivrCode = 'destElem = (srcElem1 == 0) ? 0 : (srcElem2 / srcElem1);'
4522 sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode,
4523 PredType.MERGE, True)
4524 # UDOT (indexed)
4525 sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t',
4526 'uint16_t, uint64_t'], isIndexed = True)
4527 # UDOT (vectors)
4528 sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t',
4529 'uint16_t, uint64_t'], isIndexed = False)
4530 # UMAX (immediate)
4531 sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode)
4532 # UMAX (vectors)
4533 sveBinInst('umax', 'Umax', 'SimdCmpOp', unsignedTypes, maxCode,
4534 PredType.MERGE, True)
4535 # UMAXV
4536 sveAssocReducInst('umaxv', 'Umaxv', 'SimdReduceCmpOp', unsignedTypes,
4537 maxvCode, 'std::numeric_limits<Element>::min()')
4538 # UMIN (immediate)
4539 sveWideImmInst('umin', 'UminImm', 'SimdCmpOp', unsignedTypes, minCode)
4540 # UMIN (vectors)
4541 sveBinInst('umin', 'Umin', 'SimdCmpOp', unsignedTypes, minCode,
4542 PredType.MERGE, True)
4543 # UMINV
4544 sveAssocReducInst('uminv', 'Uminv', 'SimdReduceCmpOp', unsignedTypes,
4545 minvCode, 'std::numeric_limits<Element>::max()')
4546 # UMULH
4547 sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode,
4548 PredType.MERGE, True)
4549 # UQADD (immediate)
4550 uqaddCode = '''
4551 destElem = srcElem1 + srcElem2;
4552 if (destElem < srcElem1 || destElem < srcElem2) {
4553 destElem = (Element)(-1);
4554 }
4555 '''
4556 sveWideImmInst('uqadd', 'UqaddImm', 'SimdAddOp', unsignedTypes, uqaddCode)
4557 # UQADD (vectors)
4558 sveBinInst('uqadd', 'Uqadd', 'SimdAddOp', unsignedTypes, uqaddCode)
4559 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 32-bit)
4560 uqdecCode = '''
4561 destElem = srcElem1 - (imm * count);
4562 if (destElem > srcElem1) {
4563 destElem = 0;
4564 }
4565 '''
4566 sveElemCountInst('uqdec', 'Uqdec32', 'SimdAluOp', unsignedTypes,
4567 uqdecCode, destType = DestType.Scalar, dstIs32b = True)
4568 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 64-bit)
4569 sveElemCountInst('uqdec', 'Uqdec', 'SimdAluOp', unsignedTypes,
4570 uqdecCode, destType = DestType.Scalar, dstIs32b = False)
4571 # UQDECH, UQDECW, UQDECD (vector)
4572 sveElemCountInst('uqdec', 'Uqdecv', 'SimdAluOp', bigUnsignedTypes,
4573 uqdecCode, destType = DestType.Vector, dstIs32b = False)
4574 # UQDECP (scalar, 32-bit)
4575 uqdecpCode = '''
4576 destElem = srcElem - count;
4577 if (destElem > srcElem) {
4578 destElem = 0;
4579 }
4580 '''
4581 uqdecp32Code = '''
4582 uint32_t srcElem = WDest;
4583 uint32_t destElem;''' + uqdecpCode + '''
4584 WDest = destElem;
4585 '''
4586 svePredCountInst('uqdecp', 'Uqdecp32', 'SimdAluOp', unsignedTypes,
4587 uqdecp32Code, DestType.Scalar, SrcSize.Src32bit)
4588 # UQDECP (scalar, 64-bit)
4589 uqdecp64Code = '''
4590 uint64_t srcElem = XDest;
4591 uint64_t destElem;''' + uqdecpCode + '''
4592 XDest = destElem;
4593 '''
4594 svePredCountInst('uqdecp', 'Uqdecp64', 'SimdAluOp', unsignedTypes,
4595 uqdecp64Code, DestType.Scalar, SrcSize.Src64bit)
4596 # UQDECP (vector)
4597 svePredCountInst('uqdecp', 'Uqdecpv', 'SimdAluOp', unsignedTypes,
4598 uqdecpCode, DestType.Vector)
4599 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 32-bit)
4600 uqincCode = '''
4601 destElem = srcElem1 + (imm * count);
4602 if (destElem < srcElem1 || destElem < (imm * count)) {
4603 destElem = static_cast<%(destType)s>(-1);
4604 }
4605 '''
4606 sveElemCountInst('uqinc', 'Uqinc32', 'SimdAluOp', unsignedTypes,
4607 uqincCode%{'destType': 'uint32_t'}, destType = DestType.Scalar,
4608 dstIs32b = True)
4609 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 64-bit)
4610 sveElemCountInst('uqinc', 'Uqinc', 'SimdAluOp', unsignedTypes,
4611 uqincCode%{'destType': 'uint64_t'}, destType = DestType.Scalar,
4612 dstIs32b = False)
4613 # UQDECH, UQDECW, UQDECD (vector)
4614 sveElemCountInst('uqinc', 'Uqincv', 'SimdAluOp', bigUnsignedTypes,
4615 uqincCode%{'destType': 'Element'}, destType = DestType.Vector,
4616 dstIs32b = False)
4617 # UQINCP (scalar, 32-bit)
4618 uqincpCode = '''
4619 destElem = srcElem + count;
4620 if (destElem < srcElem || destElem < count) {
4621 destElem = std::numeric_limits<%s>::max();
4622 }
4623 '''
4624 uqincp32Code = '''
4625 uint32_t srcElem = WDest;
4626 uint32_t destElem;''' + (uqincpCode % 'uint32_t') + '''
4627 XDest = destElem;
4628 '''
4629 svePredCountInst('uqincp', 'Uqincp32', 'SimdAluOp', unsignedTypes,
4630 uqincp32Code, DestType.Scalar, SrcSize.Src32bit)
4631 # UQINCP (scalar, 64-bit)
4632 uqincp64Code = '''
4633 uint64_t srcElem = XDest;
4634 uint64_t destElem;''' + (uqincpCode % 'uint64_t') + '''
4635 XDest = destElem;
4636 '''
4637 svePredCountInst('uqincp', 'Uqincp64', 'SimdAluOp', unsignedTypes,
4638 uqincp64Code, DestType.Scalar, SrcSize.Src64bit)
4639 # UQINCP (vector)
4640 svePredCountInst('uqincp', 'Uqincpv', 'SimdAluOp', unsignedTypes,
4641 uqincpCode % 'Element', DestType.Vector)
4642 # UQSUB (immediate)
4643 uqsubCode = '''
4644 destElem = srcElem1 - srcElem2;
4645 if (destElem > srcElem1) {
4646 destElem = 0;
4647 }
4648 '''
4649 sveWideImmInst('uqsub', 'UqsubImm', 'SimdAddOp', unsignedTypes, uqsubCode)
4650 # UQSUB (vectors)
4651 sveBinInst('uqsub', 'Uqsub', 'SimdAddOp', unsignedTypes, uqsubCode)
4652 # UUNPKHI
4653 sveUnpackInst('uunpkhi', 'Uunpkhi', 'SimdAluOp', unsignedWideSDTypes,
4654 unpackHalf = Unpack.High, regType = SrcRegType.Vector)
4655 # UUNPKLO
4656 sveUnpackInst('uunpklo', 'Uunpklo', 'SimdAluOp', unsignedWideSDTypes,
4657 unpackHalf = Unpack.Low, regType = SrcRegType.Vector)
4658 # UXTB
4659 uxtCode = 'destElem = srcElem1;'
4660 sveWidenUnaryInst('uxtb', 'Uxtb', 'SimdAluOp',
4661 ['uint8_t, uint16_t', 'uint8_t, uint32_t', 'uint8_t, uint64_t'],
4662 uxtCode, PredType.MERGE)
4663 # UXTH
4664 sveWidenUnaryInst('uxth', 'Uxth', 'SimdAluOp',
4665 ['uint16_t, uint32_t', 'uint16_t, uint64_t'],
4666 uxtCode, PredType.MERGE)
4667 # UXTW
4668 sveWidenUnaryInst('uxtw', 'Uxtw', 'SimdAluOp',
4669 ['uint32_t, uint64_t'],
4670 uxtCode, PredType.MERGE)
4671 # UZP1, UZP2 (predicates)
4672 uzpPredIterCode = '''
4673 constexpr unsigned sz = sizeof(Element);
4674 int s;
4675 int part = %d;
4676 TheISA::VecPredRegContainer tmpPredC;
4677 auto auxPDest = tmpPredC.as<uint8_t>();
4678 for (unsigned i = 0; i < eCount; i++) {
4679 s = 2 * i + part;
4680 for (unsigned j = 0; j < sz; j++) {
4681 if (s < eCount) {
4682 auxPDest[i * sz + j] = POp1_pb[s * sz + j];
4683 } else {
4684 auxPDest[i * sz + j] = POp2_pb[(s - eCount) * sz + j];
4685 }
4686 }
4687 }
4688 for (unsigned i = 0; i < eCount * sz; i++) {
4689 PDest_pb[i] = auxPDest[i];
4690 }
4691 '''
4692 svePredBinPermInst('uzp1', 'Uzp1Pred', 'SimdPredAluOp', unsignedTypes,
4693 uzpPredIterCode % 0)
4694 svePredBinPermInst('uzp2', 'Uzp2Pred', 'SimdPredAluOp', unsignedTypes,
4695 uzpPredIterCode % 1)
4696 # UZP1, UZP2 (vectors)
4697 uzpIterCode = '''
4698 int s;
4699 int part = %d;
4700 TheISA::VecRegContainer tmpVecC;
4701 auto auxDest = tmpVecC.as<Element>();
4702 for (unsigned i = 0; i < eCount; i++) {
4703 s = 2 * i + part;
4704 if (s < eCount) {
4705 auxDest[i] = AA64FpOp1_x[s];
4706 } else {
4707 auxDest[i] = AA64FpOp2_x[s - eCount];
4708 }
4709 }
4710 for (unsigned i = 0; i < eCount; i++) {
4711 AA64FpDest_x[i] = auxDest[i];
4712 }
4713 '''
4714 sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '',
4715 customIterCode=uzpIterCode % 0)
4716 sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '',
4717 customIterCode=uzpIterCode % 1)
4718 # WHILELE (32-bit)
4719 whileLECode = '''
4720 cond = srcElem1 <= srcElem2;
4721 '''
4722 sveWhileInst('whilele', 'Whilele32', 'SimdCmpOp', signedTypes, whileLECode,
4723 SrcSize.Src32bit)
4724 # WHILELE (64-bit)
4725 sveWhileInst('whilele', 'Whilele64', 'SimdCmpOp', signedTypes, whileLECode,
4726 SrcSize.Src64bit)
4727 # WHILELO (32-bit)
4728 whileLTCode = '''
4729 cond = srcElem1 < srcElem2;
4730 '''
4731 sveWhileInst('whilelo', 'Whilelo32', 'SimdCmpOp', unsignedTypes,
4732 whileLTCode, SrcSize.Src32bit)
4733 # WHILELO (64-bit)
4734 sveWhileInst('whilelo', 'Whilelo64', 'SimdCmpOp', unsignedTypes,
4735 whileLTCode, SrcSize.Src64bit)
4736 # WHILELS (32-bit)
4737 sveWhileInst('whilels', 'Whilels32', 'SimdCmpOp', unsignedTypes,
4738 whileLECode, SrcSize.Src32bit)
4739 # WHILELS (64-bit)
4740 sveWhileInst('whilels', 'Whilels64', 'SimdCmpOp', unsignedTypes,
4741 whileLECode, SrcSize.Src64bit)
4742 # WHILELT (32-bit)
4743 sveWhileInst('whilelt', 'Whilelt32', 'SimdCmpOp', signedTypes,
4744 whileLTCode, SrcSize.Src32bit)
4745 # WHILELT (64-bit)
4746 sveWhileInst('whilelt', 'Whilelt64', 'SimdCmpOp', signedTypes,
4747 whileLTCode, SrcSize.Src64bit)
4748 # WRFFR
4749 wrffrCode = '''
4750 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4751 xc->tcBase());
4752 for (unsigned i = 0; i < eCount; i++) {
4753 Ffr_ub[i] = POp1_ub[i];
4754 }'''
4755 svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode, False)
4756 # ZIP1, ZIP2 (predicates)
4757 zipPredIterCode = '''
4758 constexpr unsigned sz = sizeof(Element);
4759 int s;
4760 int part = %d;
4761 TheISA::VecPredRegContainer tmpPredC;
4762 auto auxPDest = tmpPredC.as<uint8_t>();
4763 for (unsigned i = 0; i < eCount / 2; i++) {
4764 s = i + (part * (eCount / 2));
4765 for (unsigned j = 0; j < sz; j++) {
4766 auxPDest[(2 * i) * sz + j] = POp1_pb[s * sz + j];
4767 auxPDest[(2 * i + 1) * sz + j] = POp2_pb[s * sz + j];
4768 }
4769 }
4770 for (unsigned i = 0; i < eCount * sz; i++) {
4771 PDest_pb[i] = auxPDest[i];
4772 }
4773 '''
4774 svePredBinPermInst('zip1', 'Zip1Pred', 'SimdPredAluOp', unsignedTypes,
4775 zipPredIterCode % 0)
4776 svePredBinPermInst('zip2', 'Zip2Pred', 'SimdPredAluOp', unsignedTypes,
4777 zipPredIterCode % 1)
4778 # ZIP1, ZIP2 (vectors)
4779 zipIterCode = '''
4780 int s;
4781 int part = %d;
4782 TheISA::VecRegContainer tmpVecC;
4783 auto auxDest = tmpVecC.as<Element>();
4784 for (unsigned i = 0; i < eCount / 2; i++) {
4785 s = i + (part * (eCount / 2));
4786 auxDest[2 * i] = AA64FpOp1_x[s];
4787 auxDest[2 * i + 1] = AA64FpOp2_x[s];
4788 }
4789 for (unsigned i = 0; i < eCount; i++) {
4790 AA64FpDest_x[i] = auxDest[i];
4791 }
4792 '''
4793 sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '',
4794 customIterCode=zipIterCode % 0)
4795 sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '',
4796 customIterCode=zipIterCode % 1)
4797
4798}};
4419 if (negDest)
4420 destElem -= 1;
4421 }
4422 '''
4423 sveWideImmInst('sqsub', 'SqsubImm', 'SimdAddOp', signedTypes, sqsubCode)
4424 # SQSUB (vectors)
4425 sveBinInst('sqsub', 'Sqsub', 'SimdAddOp', signedTypes, sqsubCode)
4426 # SUB (immediate)
4427 subCode = 'destElem = srcElem1 - srcElem2;'
4428 sveWideImmInst('sub', 'SubImm', 'SimdAddOp', unsignedTypes, subCode)
4429 # SUB (vectors, predicated)
4430 sveBinInst('sub', 'SubPred', 'SimdAddOp', unsignedTypes, subCode,
4431 PredType.MERGE, True)
4432 # SUB (vectors, unpredicated)
4433 subCode = 'destElem = srcElem1 - srcElem2;'
4434 sveBinInst('sub', 'SubUnpred', 'SimdAddOp', unsignedTypes, subCode)
4435 # SUBR (immediate)
4436 subrCode = 'destElem = srcElem2 - srcElem1;'
4437 sveWideImmInst('subr', 'SubrImm', 'SimdAddOp', unsignedTypes, subrCode)
4438 # SUBR (vectors)
4439 sveBinInst('subr', 'Subr', 'SimdAddOp', unsignedTypes, subrCode,
4440 PredType.MERGE, True)
4441 # SUNPKHI
4442 sveUnpackInst('sunpkhi', 'Sunpkhi', 'SimdAluOp', signedWideSDTypes,
4443 unpackHalf = Unpack.High, regType = SrcRegType.Vector)
4444 # SUNPKLO
4445 sveUnpackInst('sunpklo', 'Sunpklo', 'SimdAluOp', signedWideSDTypes,
4446 unpackHalf = Unpack.Low, regType = SrcRegType.Vector)
4447 # SXTB
4448 sxtCode = 'destElem = sext<8 * sizeof(SElement)>(srcElem1);'
4449 sveWidenUnaryInst('sxtb', 'Sxtb', 'SimdAluOp',
4450 ['uint8_t, uint16_t', 'uint8_t, uint32_t', 'uint8_t, uint64_t'],
4451 sxtCode, PredType.MERGE)
4452 # SXTH
4453 sveWidenUnaryInst('sxth', 'Sxth', 'SimdAluOp',
4454 ['uint16_t, uint32_t', 'uint16_t, uint64_t'],
4455 sxtCode, PredType.MERGE)
4456 # SXTW
4457 sveWidenUnaryInst('sxtw', 'Sxtw', 'SimdAluOp',
4458 ['uint32_t, uint64_t'],
4459 sxtCode, PredType.MERGE)
4460 # TBL
4461 sveTblInst('tbl', 'Tbl', 'SimdAluOp')
4462 # TRN1, TRN2 (predicates)
4463 trnPredIterCode = '''
4464 constexpr unsigned sz = sizeof(Element);
4465 int s;
4466 int part = %d;
4467 TheISA::VecPredRegContainer tmpPredC;
4468 auto auxPDest = tmpPredC.as<uint8_t>();
4469 for (unsigned i = 0; i < eCount / 2; i++) {
4470 s = 2 * i + part;
4471 for (unsigned j = 0; j < sz; j++) {
4472 auxPDest[(2 * i) * sz + j] = POp1_pb[s * sz + j];
4473 auxPDest[(2 * i + 1) * sz + j] = POp2_pb[s * sz + j];
4474 }
4475 }
4476 for (unsigned i = 0; i < eCount * sz; i++) {
4477 PDest_pb[i] = auxPDest[i];
4478 }
4479 '''
4480 svePredBinPermInst('trn1', 'Trn1Pred', 'SimdPredAluOp', unsignedTypes,
4481 trnPredIterCode % 0)
4482 svePredBinPermInst('trn2', 'Trn2Pred', 'SimdPredAluOp', unsignedTypes,
4483 trnPredIterCode % 1)
4484 # TRN1, TRN2 (vectors)
4485 trnIterCode = '''
4486 int s;
4487 int part = %d;
4488 TheISA::VecRegContainer tmpVecC;
4489 auto auxDest = tmpVecC.as<Element>();
4490 for (unsigned i = 0; i < eCount / 2; i++) {
4491 s = 2 * i + part;
4492 auxDest[2 * i] = AA64FpOp1_x[s];
4493 auxDest[2 * i + 1] = AA64FpOp2_x[s];
4494 }
4495 for (unsigned i = 0; i < eCount; i++) {
4496 AA64FpDest_x[i] = auxDest[i];
4497 }
4498 '''
4499 sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '',
4500 customIterCode=trnIterCode % 0)
4501 sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '',
4502 customIterCode=trnIterCode % 1)
4503 # UABD
4504 sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode,
4505 PredType.MERGE, True)
4506 # UADDV
4507 sveWideningAssocReducInst('uaddv', 'Uaddv', 'SimdReduceAddOp',
4508 ['uint8_t, uint64_t', 'uint16_t, uint64_t', 'uint32_t, uint64_t',
4509 'uint64_t, uint64_t'],
4510 addvCode, '0')
4511 # UCVTF
4512 ucvtfCode = fpOp % ('fplibFixedToFP<DElement>(srcElem1, 0, true,'
4513 ' FPCRRounding(fpscr), fpscr)')
4514 sveCvtInst('ucvtf', 'UcvtfNarrow', 'SimdCvtOp',
4515 ('uint16_t, uint16_t',
4516 'uint32_t, uint16_t',
4517 'uint64_t, uint16_t',
4518 'uint32_t, uint32_t',
4519 'uint64_t, uint32_t',
4520 'uint64_t, uint64_t'),
4521 ucvtfCode, CvtDir.Narrow)
4522 sveCvtInst('ucvtf', 'UcvtfWiden', 'SimdCvtOp', ('uint32_t, uint64_t',),
4523 ucvtfCode, CvtDir.Widen)
4524 # UDIV
4525 udivCode = 'destElem = (srcElem2 == 0) ? 0 : (srcElem1 / srcElem2);'
4526 sveBinInst('udiv', 'Udiv', 'SimdDivOp', unsignedTypes, udivCode,
4527 PredType.MERGE, True)
4528 # UDIVR
4529 udivrCode = 'destElem = (srcElem1 == 0) ? 0 : (srcElem2 / srcElem1);'
4530 sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode,
4531 PredType.MERGE, True)
4532 # UDOT (indexed)
4533 sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t',
4534 'uint16_t, uint64_t'], isIndexed = True)
4535 # UDOT (vectors)
4536 sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t',
4537 'uint16_t, uint64_t'], isIndexed = False)
4538 # UMAX (immediate)
4539 sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode)
4540 # UMAX (vectors)
4541 sveBinInst('umax', 'Umax', 'SimdCmpOp', unsignedTypes, maxCode,
4542 PredType.MERGE, True)
4543 # UMAXV
4544 sveAssocReducInst('umaxv', 'Umaxv', 'SimdReduceCmpOp', unsignedTypes,
4545 maxvCode, 'std::numeric_limits<Element>::min()')
4546 # UMIN (immediate)
4547 sveWideImmInst('umin', 'UminImm', 'SimdCmpOp', unsignedTypes, minCode)
4548 # UMIN (vectors)
4549 sveBinInst('umin', 'Umin', 'SimdCmpOp', unsignedTypes, minCode,
4550 PredType.MERGE, True)
4551 # UMINV
4552 sveAssocReducInst('uminv', 'Uminv', 'SimdReduceCmpOp', unsignedTypes,
4553 minvCode, 'std::numeric_limits<Element>::max()')
4554 # UMULH
4555 sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode,
4556 PredType.MERGE, True)
4557 # UQADD (immediate)
4558 uqaddCode = '''
4559 destElem = srcElem1 + srcElem2;
4560 if (destElem < srcElem1 || destElem < srcElem2) {
4561 destElem = (Element)(-1);
4562 }
4563 '''
4564 sveWideImmInst('uqadd', 'UqaddImm', 'SimdAddOp', unsignedTypes, uqaddCode)
4565 # UQADD (vectors)
4566 sveBinInst('uqadd', 'Uqadd', 'SimdAddOp', unsignedTypes, uqaddCode)
4567 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 32-bit)
4568 uqdecCode = '''
4569 destElem = srcElem1 - (imm * count);
4570 if (destElem > srcElem1) {
4571 destElem = 0;
4572 }
4573 '''
4574 sveElemCountInst('uqdec', 'Uqdec32', 'SimdAluOp', unsignedTypes,
4575 uqdecCode, destType = DestType.Scalar, dstIs32b = True)
4576 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 64-bit)
4577 sveElemCountInst('uqdec', 'Uqdec', 'SimdAluOp', unsignedTypes,
4578 uqdecCode, destType = DestType.Scalar, dstIs32b = False)
4579 # UQDECH, UQDECW, UQDECD (vector)
4580 sveElemCountInst('uqdec', 'Uqdecv', 'SimdAluOp', bigUnsignedTypes,
4581 uqdecCode, destType = DestType.Vector, dstIs32b = False)
4582 # UQDECP (scalar, 32-bit)
4583 uqdecpCode = '''
4584 destElem = srcElem - count;
4585 if (destElem > srcElem) {
4586 destElem = 0;
4587 }
4588 '''
4589 uqdecp32Code = '''
4590 uint32_t srcElem = WDest;
4591 uint32_t destElem;''' + uqdecpCode + '''
4592 WDest = destElem;
4593 '''
4594 svePredCountInst('uqdecp', 'Uqdecp32', 'SimdAluOp', unsignedTypes,
4595 uqdecp32Code, DestType.Scalar, SrcSize.Src32bit)
4596 # UQDECP (scalar, 64-bit)
4597 uqdecp64Code = '''
4598 uint64_t srcElem = XDest;
4599 uint64_t destElem;''' + uqdecpCode + '''
4600 XDest = destElem;
4601 '''
4602 svePredCountInst('uqdecp', 'Uqdecp64', 'SimdAluOp', unsignedTypes,
4603 uqdecp64Code, DestType.Scalar, SrcSize.Src64bit)
4604 # UQDECP (vector)
4605 svePredCountInst('uqdecp', 'Uqdecpv', 'SimdAluOp', unsignedTypes,
4606 uqdecpCode, DestType.Vector)
4607 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 32-bit)
4608 uqincCode = '''
4609 destElem = srcElem1 + (imm * count);
4610 if (destElem < srcElem1 || destElem < (imm * count)) {
4611 destElem = static_cast<%(destType)s>(-1);
4612 }
4613 '''
4614 sveElemCountInst('uqinc', 'Uqinc32', 'SimdAluOp', unsignedTypes,
4615 uqincCode%{'destType': 'uint32_t'}, destType = DestType.Scalar,
4616 dstIs32b = True)
4617 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 64-bit)
4618 sveElemCountInst('uqinc', 'Uqinc', 'SimdAluOp', unsignedTypes,
4619 uqincCode%{'destType': 'uint64_t'}, destType = DestType.Scalar,
4620 dstIs32b = False)
4621 # UQDECH, UQDECW, UQDECD (vector)
4622 sveElemCountInst('uqinc', 'Uqincv', 'SimdAluOp', bigUnsignedTypes,
4623 uqincCode%{'destType': 'Element'}, destType = DestType.Vector,
4624 dstIs32b = False)
4625 # UQINCP (scalar, 32-bit)
4626 uqincpCode = '''
4627 destElem = srcElem + count;
4628 if (destElem < srcElem || destElem < count) {
4629 destElem = std::numeric_limits<%s>::max();
4630 }
4631 '''
4632 uqincp32Code = '''
4633 uint32_t srcElem = WDest;
4634 uint32_t destElem;''' + (uqincpCode % 'uint32_t') + '''
4635 XDest = destElem;
4636 '''
4637 svePredCountInst('uqincp', 'Uqincp32', 'SimdAluOp', unsignedTypes,
4638 uqincp32Code, DestType.Scalar, SrcSize.Src32bit)
4639 # UQINCP (scalar, 64-bit)
4640 uqincp64Code = '''
4641 uint64_t srcElem = XDest;
4642 uint64_t destElem;''' + (uqincpCode % 'uint64_t') + '''
4643 XDest = destElem;
4644 '''
4645 svePredCountInst('uqincp', 'Uqincp64', 'SimdAluOp', unsignedTypes,
4646 uqincp64Code, DestType.Scalar, SrcSize.Src64bit)
4647 # UQINCP (vector)
4648 svePredCountInst('uqincp', 'Uqincpv', 'SimdAluOp', unsignedTypes,
4649 uqincpCode % 'Element', DestType.Vector)
4650 # UQSUB (immediate)
4651 uqsubCode = '''
4652 destElem = srcElem1 - srcElem2;
4653 if (destElem > srcElem1) {
4654 destElem = 0;
4655 }
4656 '''
4657 sveWideImmInst('uqsub', 'UqsubImm', 'SimdAddOp', unsignedTypes, uqsubCode)
4658 # UQSUB (vectors)
4659 sveBinInst('uqsub', 'Uqsub', 'SimdAddOp', unsignedTypes, uqsubCode)
4660 # UUNPKHI
4661 sveUnpackInst('uunpkhi', 'Uunpkhi', 'SimdAluOp', unsignedWideSDTypes,
4662 unpackHalf = Unpack.High, regType = SrcRegType.Vector)
4663 # UUNPKLO
4664 sveUnpackInst('uunpklo', 'Uunpklo', 'SimdAluOp', unsignedWideSDTypes,
4665 unpackHalf = Unpack.Low, regType = SrcRegType.Vector)
4666 # UXTB
4667 uxtCode = 'destElem = srcElem1;'
4668 sveWidenUnaryInst('uxtb', 'Uxtb', 'SimdAluOp',
4669 ['uint8_t, uint16_t', 'uint8_t, uint32_t', 'uint8_t, uint64_t'],
4670 uxtCode, PredType.MERGE)
4671 # UXTH
4672 sveWidenUnaryInst('uxth', 'Uxth', 'SimdAluOp',
4673 ['uint16_t, uint32_t', 'uint16_t, uint64_t'],
4674 uxtCode, PredType.MERGE)
4675 # UXTW
4676 sveWidenUnaryInst('uxtw', 'Uxtw', 'SimdAluOp',
4677 ['uint32_t, uint64_t'],
4678 uxtCode, PredType.MERGE)
4679 # UZP1, UZP2 (predicates)
4680 uzpPredIterCode = '''
4681 constexpr unsigned sz = sizeof(Element);
4682 int s;
4683 int part = %d;
4684 TheISA::VecPredRegContainer tmpPredC;
4685 auto auxPDest = tmpPredC.as<uint8_t>();
4686 for (unsigned i = 0; i < eCount; i++) {
4687 s = 2 * i + part;
4688 for (unsigned j = 0; j < sz; j++) {
4689 if (s < eCount) {
4690 auxPDest[i * sz + j] = POp1_pb[s * sz + j];
4691 } else {
4692 auxPDest[i * sz + j] = POp2_pb[(s - eCount) * sz + j];
4693 }
4694 }
4695 }
4696 for (unsigned i = 0; i < eCount * sz; i++) {
4697 PDest_pb[i] = auxPDest[i];
4698 }
4699 '''
4700 svePredBinPermInst('uzp1', 'Uzp1Pred', 'SimdPredAluOp', unsignedTypes,
4701 uzpPredIterCode % 0)
4702 svePredBinPermInst('uzp2', 'Uzp2Pred', 'SimdPredAluOp', unsignedTypes,
4703 uzpPredIterCode % 1)
4704 # UZP1, UZP2 (vectors)
4705 uzpIterCode = '''
4706 int s;
4707 int part = %d;
4708 TheISA::VecRegContainer tmpVecC;
4709 auto auxDest = tmpVecC.as<Element>();
4710 for (unsigned i = 0; i < eCount; i++) {
4711 s = 2 * i + part;
4712 if (s < eCount) {
4713 auxDest[i] = AA64FpOp1_x[s];
4714 } else {
4715 auxDest[i] = AA64FpOp2_x[s - eCount];
4716 }
4717 }
4718 for (unsigned i = 0; i < eCount; i++) {
4719 AA64FpDest_x[i] = auxDest[i];
4720 }
4721 '''
4722 sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '',
4723 customIterCode=uzpIterCode % 0)
4724 sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '',
4725 customIterCode=uzpIterCode % 1)
4726 # WHILELE (32-bit)
4727 whileLECode = '''
4728 cond = srcElem1 <= srcElem2;
4729 '''
4730 sveWhileInst('whilele', 'Whilele32', 'SimdCmpOp', signedTypes, whileLECode,
4731 SrcSize.Src32bit)
4732 # WHILELE (64-bit)
4733 sveWhileInst('whilele', 'Whilele64', 'SimdCmpOp', signedTypes, whileLECode,
4734 SrcSize.Src64bit)
4735 # WHILELO (32-bit)
4736 whileLTCode = '''
4737 cond = srcElem1 < srcElem2;
4738 '''
4739 sveWhileInst('whilelo', 'Whilelo32', 'SimdCmpOp', unsignedTypes,
4740 whileLTCode, SrcSize.Src32bit)
4741 # WHILELO (64-bit)
4742 sveWhileInst('whilelo', 'Whilelo64', 'SimdCmpOp', unsignedTypes,
4743 whileLTCode, SrcSize.Src64bit)
4744 # WHILELS (32-bit)
4745 sveWhileInst('whilels', 'Whilels32', 'SimdCmpOp', unsignedTypes,
4746 whileLECode, SrcSize.Src32bit)
4747 # WHILELS (64-bit)
4748 sveWhileInst('whilels', 'Whilels64', 'SimdCmpOp', unsignedTypes,
4749 whileLECode, SrcSize.Src64bit)
4750 # WHILELT (32-bit)
4751 sveWhileInst('whilelt', 'Whilelt32', 'SimdCmpOp', signedTypes,
4752 whileLTCode, SrcSize.Src32bit)
4753 # WHILELT (64-bit)
4754 sveWhileInst('whilelt', 'Whilelt64', 'SimdCmpOp', signedTypes,
4755 whileLTCode, SrcSize.Src64bit)
4756 # WRFFR
4757 wrffrCode = '''
4758 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4759 xc->tcBase());
4760 for (unsigned i = 0; i < eCount; i++) {
4761 Ffr_ub[i] = POp1_ub[i];
4762 }'''
4763 svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode, False)
4764 # ZIP1, ZIP2 (predicates)
4765 zipPredIterCode = '''
4766 constexpr unsigned sz = sizeof(Element);
4767 int s;
4768 int part = %d;
4769 TheISA::VecPredRegContainer tmpPredC;
4770 auto auxPDest = tmpPredC.as<uint8_t>();
4771 for (unsigned i = 0; i < eCount / 2; i++) {
4772 s = i + (part * (eCount / 2));
4773 for (unsigned j = 0; j < sz; j++) {
4774 auxPDest[(2 * i) * sz + j] = POp1_pb[s * sz + j];
4775 auxPDest[(2 * i + 1) * sz + j] = POp2_pb[s * sz + j];
4776 }
4777 }
4778 for (unsigned i = 0; i < eCount * sz; i++) {
4779 PDest_pb[i] = auxPDest[i];
4780 }
4781 '''
4782 svePredBinPermInst('zip1', 'Zip1Pred', 'SimdPredAluOp', unsignedTypes,
4783 zipPredIterCode % 0)
4784 svePredBinPermInst('zip2', 'Zip2Pred', 'SimdPredAluOp', unsignedTypes,
4785 zipPredIterCode % 1)
4786 # ZIP1, ZIP2 (vectors)
4787 zipIterCode = '''
4788 int s;
4789 int part = %d;
4790 TheISA::VecRegContainer tmpVecC;
4791 auto auxDest = tmpVecC.as<Element>();
4792 for (unsigned i = 0; i < eCount / 2; i++) {
4793 s = i + (part * (eCount / 2));
4794 auxDest[2 * i] = AA64FpOp1_x[s];
4795 auxDest[2 * i + 1] = AA64FpOp2_x[s];
4796 }
4797 for (unsigned i = 0; i < eCount; i++) {
4798 AA64FpDest_x[i] = auxDest[i];
4799 }
4800 '''
4801 sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '',
4802 customIterCode=zipIterCode % 0)
4803 sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '',
4804 customIterCode=zipIterCode % 1)
4805
4806}};