sve_mem.isa revision 14028
1// Copyright (c) 2017-2018 ARM Limited
2// All rights reserved
3//
4// The license below extends only to copyright in the software and shall
5// not be construed as granting a license to any other intellectual
6// property including but not limited to intellectual property relating
7// to a hardware implementation of the functionality of the software
8// licensed hereunder.  You may use the software subject to the license
9// terms below provided that you ensure that this notice is replicated
10// unmodified and in its entirety in all distributions of the software,
11// modified or unmodified, in source code or in binary form.
12//
13// Redistribution and use in source and binary forms, with or without
14// modification, are permitted provided that the following conditions are
15// met: redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer;
17// redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution;
20// neither the name of the copyright holders nor the names of its
21// contributors may be used to endorse or promote products derived from
22// this software without specific prior written permission.
23//
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
36// Authors: Giacomo Gabrielli
37
38// @file Definition of SVE memory access instructions.
39
40output header {{
41
42    // Decodes SVE contiguous load instructions, scalar plus scalar form.
43    template <template <typename T1, typename T2> class Base>
44    StaticInstPtr
45    decodeSveContigLoadSSInsts(uint8_t dtype, ExtMachInst machInst,
46                               IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
47                               IntRegIndex rm, bool firstFaulting)
48    {
49        const char* mn = firstFaulting ? "ldff1" : "ld1";
50        switch (dtype) {
51          case 0x0:
52            return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
53          case 0x1:
54            return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
55          case 0x2:
56            return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
57          case 0x3:
58            return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
59          case 0x4:
60            return new Base<int64_t, int32_t>(mn, machInst, zt, pg, rn, rm);
61          case 0x5:
62            return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
63          case 0x6:
64            return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
65          case 0x7:
66            return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
67          case 0x8:
68            return new Base<int64_t, int16_t>(mn, machInst, zt, pg, rn, rm);
69          case 0x9:
70            return new Base<int32_t, int16_t>(mn, machInst, zt, pg, rn, rm);
71          case 0xa:
72            return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, rm);
73          case 0xb:
74            return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, rm);
75          case 0xc:
76            return new Base<int64_t, int8_t>(mn, machInst, zt, pg, rn, rm);
77          case 0xd:
78            return new Base<int32_t, int8_t>(mn, machInst, zt, pg, rn, rm);
79          case 0xe:
80            return new Base<int16_t, int8_t>(mn, machInst, zt, pg, rn, rm);
81          case 0xf:
82            return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, rm);
83        }
84        return new Unknown64(machInst);
85    }
86
87    // Decodes SVE contiguous load instructions, scalar plus immediate form.
88    template <template <typename T1, typename T2> class Base>
89    StaticInstPtr
90    decodeSveContigLoadSIInsts(uint8_t dtype, ExtMachInst machInst,
91                               IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
92                               uint64_t imm, bool firstFaulting,
93                               bool replicate = false)
94    {
95        assert(!(replicate && firstFaulting));
96
97        const char* mn = replicate ? "ld1r" :
98                                     (firstFaulting ? "ldff1" : "ld1");
99        switch (dtype) {
100          case 0x0:
101            return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
102          case 0x1:
103            return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
104          case 0x2:
105            return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
106          case 0x3:
107            return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
108          case 0x4:
109            return new Base<int64_t, int32_t>(mn, machInst, zt, pg, rn, imm);
110          case 0x5:
111            return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
112          case 0x6:
113            return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
114          case 0x7:
115            return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
116          case 0x8:
117            return new Base<int64_t, int16_t>(mn, machInst, zt, pg, rn, imm);
118          case 0x9:
119            return new Base<int32_t, int16_t>(mn, machInst, zt, pg, rn, imm);
120          case 0xa:
121            return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, imm);
122          case 0xb:
123            return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, imm);
124          case 0xc:
125            return new Base<int64_t, int8_t>(mn, machInst, zt, pg, rn, imm);
126          case 0xd:
127            return new Base<int32_t, int8_t>(mn, machInst, zt, pg, rn, imm);
128          case 0xe:
129            return new Base<int16_t, int8_t>(mn, machInst, zt, pg, rn, imm);
130          case 0xf:
131            return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, imm);
132        }
133        return new Unknown64(machInst);
134    }
135
136    // Decodes SVE contiguous store instructions, scalar plus scalar form.
137    template <template <typename T1, typename T2> class Base>
138    StaticInstPtr
139    decodeSveContigStoreSSInsts(uint8_t dtype, ExtMachInst machInst,
140                                IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
141                                IntRegIndex rm)
142    {
143        const char* mn = "st1";
144        switch (dtype) {
145          case 0x0:
146            return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
147          case 0x1:
148            return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
149          case 0x2:
150            return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
151          case 0x3:
152            return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
153          case 0x5:
154            return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
155          case 0x6:
156            return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
157          case 0x7:
158            return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
159          case 0xa:
160            return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, rm);
161          case 0xb:
162            return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, rm);
163          case 0xf:
164            return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, rm);
165        }
166        return new Unknown64(machInst);
167    }
168
169    // Decodes SVE contiguous store instructions, scalar plus immediate form.
170    template <template <typename T1, typename T2> class Base>
171    StaticInstPtr
172    decodeSveContigStoreSIInsts(uint8_t dtype, ExtMachInst machInst,
173                                IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
174                                int8_t imm)
175    {
176        const char* mn = "st1";
177        switch (dtype) {
178          case 0x0:
179            return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
180          case 0x1:
181            return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
182          case 0x2:
183            return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
184          case 0x3:
185            return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
186          case 0x5:
187            return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
188          case 0x6:
189            return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
190          case 0x7:
191            return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
192          case 0xa:
193            return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, imm);
194          case 0xb:
195            return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, imm);
196          case 0xf:
197            return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, imm);
198        }
199        return new Unknown64(machInst);
200    }
201
202    // NOTE: SVE load-and-replicate instructions are decoded with
203    // decodeSveContigLoadSIInsts(...).
204
205}};
206
207output decoder {{
208
209    StaticInstPtr
210    decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst,
211                               IntRegIndex zt, IntRegIndex pg, IntRegIndex zn,
212                               uint64_t imm, bool esizeIs32,
213                               bool firstFaulting)
214    {
215        const char* mn = firstFaulting ? "ldff1" : "ld1";
216        switch (dtype) {
217          case 0x0:
218            if (esizeIs32) {
219                return new SveIndexedMemVI<int32_t, int8_t,
220                                           SveGatherLoadVIMicroop>(
221                    mn, machInst, MemReadOp, zt, pg, zn, imm);
222            } else {
223                return new SveIndexedMemVI<int64_t, int8_t,
224                                           SveGatherLoadVIMicroop>(
225                    mn, machInst, MemReadOp, zt, pg, zn, imm);
226            }
227          case 0x1:
228            if (esizeIs32) {
229                return new SveIndexedMemVI<uint32_t, uint8_t,
230                                           SveGatherLoadVIMicroop>(
231                    mn, machInst, MemReadOp, zt, pg, zn, imm);
232            } else {
233                return new SveIndexedMemVI<uint64_t, uint8_t,
234                                           SveGatherLoadVIMicroop>(
235                    mn, machInst, MemReadOp, zt, pg, zn, imm);
236            }
237          case 0x2:
238            if (esizeIs32) {
239                return new SveIndexedMemVI<int32_t, int16_t,
240                                           SveGatherLoadVIMicroop>(
241                    mn, machInst, MemReadOp, zt, pg, zn, imm);
242            } else {
243                return new SveIndexedMemVI<int64_t, int16_t,
244                                           SveGatherLoadVIMicroop>(
245                    mn, machInst, MemReadOp, zt, pg, zn, imm);
246            }
247          case 0x3:
248            if (esizeIs32) {
249                return new SveIndexedMemVI<uint32_t, uint16_t,
250                                           SveGatherLoadVIMicroop>(
251                    mn, machInst, MemReadOp, zt, pg, zn, imm);
252            } else {
253                return new SveIndexedMemVI<uint64_t, uint16_t,
254                                           SveGatherLoadVIMicroop>(
255                    mn, machInst, MemReadOp, zt, pg, zn, imm);
256            }
257          case 0x4:
258            if (esizeIs32) {
259                break;
260            } else {
261                return new SveIndexedMemVI<int64_t, int32_t,
262                                           SveGatherLoadVIMicroop>(
263                    mn, machInst, MemReadOp, zt, pg, zn, imm);
264            }
265          case 0x5:
266            if (esizeIs32) {
267                return new SveIndexedMemVI<uint32_t, uint32_t,
268                                           SveGatherLoadVIMicroop>(
269                    mn, machInst, MemReadOp, zt, pg, zn, imm);
270            } else {
271                return new SveIndexedMemVI<uint64_t, uint32_t,
272                                           SveGatherLoadVIMicroop>(
273                    mn, machInst, MemReadOp, zt, pg, zn, imm);
274            }
275          case 0x7:
276            if (esizeIs32) {
277                break;
278            } else {
279                return new SveIndexedMemVI<uint64_t, uint64_t,
280                                           SveGatherLoadVIMicroop>(
281                    mn, machInst, MemReadOp, zt, pg, zn, imm);
282            }
283        }
284        return new Unknown64(machInst);
285    }
286
287    StaticInstPtr
288    decodeSveGatherLoadSVInsts(uint8_t dtype, ExtMachInst machInst,
289                               IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
290                               IntRegIndex zm, bool esizeIs32, bool offsetIs32,
291                               bool offsetIsSigned, bool offsetIsScaled,
292                               bool firstFaulting)
293    {
294        const char* mn = firstFaulting ? "ldff1" : "ld1";
295        switch (dtype) {
296          case 0x0:
297            if (esizeIs32) {
298                return new SveIndexedMemSV<int32_t, int8_t,
299                                           SveGatherLoadSVMicroop>(
300                    mn, machInst, MemReadOp, zt, pg, rn, zm,
301                    offsetIs32, offsetIsSigned, offsetIsScaled);
302            } else {
303                return new SveIndexedMemSV<int64_t, int8_t,
304                                           SveGatherLoadSVMicroop>(
305                    mn, machInst, MemReadOp, zt, pg, rn, zm,
306                    offsetIs32, offsetIsSigned, offsetIsScaled);
307            }
308          case 0x1:
309            if (esizeIs32) {
310                return new SveIndexedMemSV<uint32_t, uint8_t,
311                                           SveGatherLoadSVMicroop>(
312                    mn, machInst, MemReadOp, zt, pg, rn, zm,
313                    offsetIs32, offsetIsSigned, offsetIsScaled);
314            } else {
315                return new SveIndexedMemSV<uint64_t, uint8_t,
316                                           SveGatherLoadSVMicroop>(
317                    mn, machInst, MemReadOp, zt, pg, rn, zm,
318                    offsetIs32, offsetIsSigned, offsetIsScaled);
319            }
320          case 0x2:
321            if (esizeIs32) {
322                return new SveIndexedMemSV<int32_t, int16_t,
323                                           SveGatherLoadSVMicroop>(
324                    mn, machInst, MemReadOp, zt, pg, rn, zm,
325                    offsetIs32, offsetIsSigned, offsetIsScaled);
326            } else {
327                return new SveIndexedMemSV<int64_t, int16_t,
328                                           SveGatherLoadSVMicroop>(
329                    mn, machInst, MemReadOp, zt, pg, rn, zm,
330                    offsetIs32, offsetIsSigned, offsetIsScaled);
331            }
332          case 0x3:
333            if (esizeIs32) {
334                return new SveIndexedMemSV<uint32_t, uint16_t,
335                                           SveGatherLoadSVMicroop>(
336                    mn, machInst, MemReadOp, zt, pg, rn, zm,
337                    offsetIs32, offsetIsSigned, offsetIsScaled);
338            } else {
339                return new SveIndexedMemSV<uint64_t, uint16_t,
340                                           SveGatherLoadSVMicroop>(
341                    mn, machInst, MemReadOp, zt, pg, rn, zm,
342                    offsetIs32, offsetIsSigned, offsetIsScaled);
343            }
344          case 0x4:
345            if (esizeIs32) {
346                break;
347            } else {
348                return new SveIndexedMemSV<int64_t, int32_t,
349                                           SveGatherLoadSVMicroop>(
350                    mn, machInst, MemReadOp, zt, pg, rn, zm,
351                    offsetIs32, offsetIsSigned, offsetIsScaled);
352            }
353          case 0x5:
354            if (esizeIs32) {
355                return new SveIndexedMemSV<uint32_t, uint32_t,
356                                           SveGatherLoadSVMicroop>(
357                    mn, machInst, MemReadOp, zt, pg, rn, zm,
358                    offsetIs32, offsetIsSigned, offsetIsScaled);
359            } else {
360                return new SveIndexedMemSV<uint64_t, uint32_t,
361                                           SveGatherLoadSVMicroop>(
362                    mn, machInst, MemReadOp, zt, pg, rn, zm,
363                    offsetIs32, offsetIsSigned, offsetIsScaled);
364            }
365          case 0x7:
366            if (esizeIs32) {
367                break;
368            } else {
369                return new SveIndexedMemSV<uint64_t, uint64_t,
370                                           SveGatherLoadSVMicroop>(
371                    mn, machInst, MemReadOp, zt, pg, rn, zm,
372                    offsetIs32, offsetIsSigned, offsetIsScaled);
373            }
374        }
375        return new Unknown64(machInst);
376    }
377
378    StaticInstPtr
379    decodeSveScatterStoreVIInsts(uint8_t msz, ExtMachInst machInst,
380                                 IntRegIndex zt, IntRegIndex pg,
381                                 IntRegIndex zn, uint64_t imm,
382                                 bool esizeIs32)
383    {
384        const char* mn = "st1";
385        switch (msz) {
386          case 0x0:
387            if (esizeIs32) {
388                return new SveIndexedMemVI<uint32_t, uint8_t,
389                                           SveScatterStoreVIMicroop>(
390                    mn, machInst, MemWriteOp, zt, pg, zn, imm);
391            } else {
392                return new SveIndexedMemVI<uint64_t, uint8_t,
393                                           SveScatterStoreVIMicroop>(
394                    mn, machInst, MemWriteOp, zt, pg, zn, imm);
395            }
396          case 0x1:
397            if (esizeIs32) {
398                return new SveIndexedMemVI<uint32_t, uint16_t,
399                                           SveScatterStoreVIMicroop>(
400                    mn, machInst, MemWriteOp, zt, pg, zn, imm);
401            } else {
402                return new SveIndexedMemVI<uint64_t, uint16_t,
403                                           SveScatterStoreVIMicroop>(
404                    mn, machInst, MemWriteOp, zt, pg, zn, imm);
405            }
406          case 0x2:
407            if (esizeIs32) {
408                return new SveIndexedMemVI<uint32_t, uint32_t,
409                                           SveScatterStoreVIMicroop>(
410                    mn, machInst, MemWriteOp, zt, pg, zn, imm);
411            } else {
412                return new SveIndexedMemVI<uint64_t, uint32_t,
413                                           SveScatterStoreVIMicroop>(
414                    mn, machInst, MemWriteOp, zt, pg, zn, imm);
415            }
416          case 0x3:
417            if (esizeIs32) {
418                break;
419            } else {
420                return new SveIndexedMemVI<uint64_t, uint64_t,
421                                           SveScatterStoreVIMicroop>(
422                    mn, machInst, MemWriteOp, zt, pg, zn, imm);
423            }
424        }
425        return new Unknown64(machInst);
426    }
427
428    StaticInstPtr
429    decodeSveScatterStoreSVInsts(uint8_t msz, ExtMachInst machInst,
430                                 IntRegIndex zt, IntRegIndex pg,
431                                 IntRegIndex rn, IntRegIndex zm,
432                                 bool esizeIs32, bool offsetIs32,
433                                 bool offsetIsSigned, bool offsetIsScaled)
434    {
435        const char* mn = "st1";
436        switch (msz) {
437          case 0x0:
438            if (esizeIs32) {
439                return new SveIndexedMemSV<uint32_t, uint8_t,
440                                           SveScatterStoreSVMicroop>(
441                    mn, machInst, MemWriteOp, zt, pg, rn, zm,
442                    offsetIs32, offsetIsSigned, offsetIsScaled);
443            } else {
444                return new SveIndexedMemSV<uint64_t, uint8_t,
445                                           SveScatterStoreSVMicroop>(
446                    mn, machInst, MemWriteOp, zt, pg, rn, zm,
447                    offsetIs32, offsetIsSigned, offsetIsScaled);
448            }
449          case 0x1:
450            if (esizeIs32) {
451                return new SveIndexedMemSV<uint32_t, uint16_t,
452                                           SveScatterStoreSVMicroop>(
453                    mn, machInst, MemWriteOp, zt, pg, rn, zm,
454                    offsetIs32, offsetIsSigned, offsetIsScaled);
455            } else {
456                return new SveIndexedMemSV<uint64_t, uint16_t,
457                                           SveScatterStoreSVMicroop>(
458                    mn, machInst, MemWriteOp, zt, pg, rn, zm,
459                    offsetIs32, offsetIsSigned, offsetIsScaled);
460            }
461          case 0x2:
462            if (esizeIs32) {
463                return new SveIndexedMemSV<uint32_t, uint32_t,
464                                           SveScatterStoreSVMicroop>(
465                    mn, machInst, MemWriteOp, zt, pg, rn, zm,
466                    offsetIs32, offsetIsSigned, offsetIsScaled);
467            } else {
468                return new SveIndexedMemSV<uint64_t, uint32_t,
469                                           SveScatterStoreSVMicroop>(
470                    mn, machInst, MemWriteOp, zt, pg, rn, zm,
471                    offsetIs32, offsetIsSigned, offsetIsScaled);
472            }
473          case 0x3:
474            if (esizeIs32) {
475                break;
476            } else {
477                return new SveIndexedMemSV<uint64_t, uint64_t,
478                                           SveScatterStoreSVMicroop>(
479                    mn, machInst, MemWriteOp, zt, pg, rn, zm,
480                    offsetIs32, offsetIsSigned, offsetIsScaled);
481            }
482        }
483        return new Unknown64(machInst);
484    }
485
486}};
487
488
489let {{
490
491    header_output = ''
492    exec_output = ''
493    decoders = { 'Generic': {} }
494
495    SPAlignmentCheckCode = '''
496        if (this->baseIsSP && bits(XBase, 3, 0) &&
497            SPAlignmentCheckEnabled(xc->tcBase())) {
498            return std::make_shared<SPAlignmentFault>();
499        }
500    '''
501
502    def emitSveMemFillSpill(isPred):
503        global header_output, exec_output, decoders
504        eaCode = SPAlignmentCheckCode + '''
505        int memAccessSize = %(memacc_size)s;
506        EA = XBase + ((int64_t) imm * %(memacc_size)s)''' % {
507            'memacc_size': 'eCount / 8' if isPred else 'eCount'}
508        if isPred:
509            loadMemAccCode = '''
510            int index = 0;
511            uint8_t byte;
512            for (int i = 0; i < eCount / 8; i++) {
513                byte = memDataView[i];
514                for (int j = 0; j < 8; j++, index++) {
515                    PDest_x[index] = (byte >> j) & 1;
516                }
517            }
518            '''
519            storeMemAccCode = '''
520            int index = 0;
521            uint8_t byte;
522            for (int i = 0; i < eCount / 8; i++) {
523                byte = 0;
524                for (int j = 0; j < 8; j++, index++) {
525                    byte |= PDest_x[index] << j;
526                }
527                memDataView[i] = byte;
528            }
529            '''
530            storeWrEnableCode = '''
531            auto wrEn = std::vector<bool>(eCount / 8, true);
532            '''
533        else:
534            loadMemAccCode = '''
535            for (int i = 0; i < eCount; i++) {
536                AA64FpDest_x[i] = memDataView[i];
537            }
538            '''
539            storeMemAccCode = '''
540            for (int i = 0; i < eCount; i++) {
541                memDataView[i] = AA64FpDest_x[i];
542            }
543            '''
544            storeWrEnableCode = '''
545            auto wrEn = std::vector<bool>(sizeof(MemElemType) * eCount, true);
546            '''
547        loadIop = InstObjParams('ldr',
548            'SveLdrPred' if isPred else 'SveLdrVec',
549            'SveMemPredFillSpill' if isPred else 'SveMemVecFillSpill',
550            {'tpl_header': '',
551             'tpl_args': '',
552             'memacc_code': loadMemAccCode,
553             'ea_code' : sveEnabledCheckCode + eaCode,
554             'fa_code' : ''},
555            ['IsMemRef', 'IsLoad'])
556        storeIop = InstObjParams('str',
557            'SveStrPred' if isPred else 'SveStrVec',
558            'SveMemPredFillSpill' if isPred else 'SveMemVecFillSpill',
559            {'tpl_header': '',
560             'tpl_args': '',
561             'wren_code': storeWrEnableCode,
562             'memacc_code': storeMemAccCode,
563             'ea_code' : sveEnabledCheckCode + eaCode,
564             'fa_code' : ''},
565            ['IsMemRef', 'IsStore'])
566        header_output += SveMemFillSpillOpDeclare.subst(loadIop)
567        header_output += SveMemFillSpillOpDeclare.subst(storeIop)
568        exec_output += (
569            SveContigLoadExecute.subst(loadIop) +
570            SveContigLoadInitiateAcc.subst(loadIop) +
571            SveContigLoadCompleteAcc.subst(loadIop) +
572            SveContigStoreExecute.subst(storeIop) +
573            SveContigStoreInitiateAcc.subst(storeIop) +
574            SveContigStoreCompleteAcc.subst(storeIop))
575
576    loadTplArgs = (
577        ('uint8_t', 'uint8_t'),
578        ('uint16_t', 'uint8_t'),
579        ('uint32_t', 'uint8_t'),
580        ('uint64_t', 'uint8_t'),
581        ('int64_t', 'int32_t'),
582        ('uint16_t', 'uint16_t'),
583        ('uint32_t', 'uint16_t'),
584        ('uint64_t', 'uint16_t'),
585        ('int64_t', 'int16_t'),
586        ('int32_t', 'int16_t'),
587        ('uint32_t', 'uint32_t'),
588        ('uint64_t', 'uint32_t'),
589        ('int64_t', 'int8_t'),
590        ('int32_t', 'int8_t'),
591        ('int16_t', 'int8_t'),
592        ('uint64_t', 'uint64_t'),
593    )
594
595    storeTplArgs = (
596        ('uint8_t', 'uint8_t'),
597        ('uint16_t', 'uint8_t'),
598        ('uint32_t', 'uint8_t'),
599        ('uint64_t', 'uint8_t'),
600        ('uint16_t', 'uint16_t'),
601        ('uint32_t', 'uint16_t'),
602        ('uint64_t', 'uint16_t'),
603        ('uint32_t', 'uint32_t'),
604        ('uint64_t', 'uint32_t'),
605        ('uint64_t', 'uint64_t'),
606    )
607
608    gatherLoadTplArgs = (
609        ('int32_t', 'int8_t'),
610        ('int64_t', 'int8_t'),
611        ('uint32_t', 'uint8_t'),
612        ('uint64_t', 'uint8_t'),
613        ('int32_t', 'int16_t'),
614        ('int64_t', 'int16_t'),
615        ('uint32_t', 'uint16_t'),
616        ('uint64_t', 'uint16_t'),
617        ('int64_t', 'int32_t'),
618        ('uint32_t', 'uint32_t'),
619        ('uint64_t', 'uint32_t'),
620        ('uint64_t', 'uint64_t'),
621    )
622
623    scatterStoreTplArgs = (
624        ('uint32_t', 'uint8_t'),
625        ('uint64_t', 'uint8_t'),
626        ('uint32_t', 'uint16_t'),
627        ('uint64_t', 'uint16_t'),
628        ('uint32_t', 'uint32_t'),
629        ('uint64_t', 'uint32_t'),
630        ('uint64_t', 'uint64_t'),
631    )
632
633    # Generates definitions for SVE contiguous loads
634    def emitSveContigMemInsts(offsetIsImm):
635        global header_output, exec_output, decoders
636        tplHeader = 'template <class RegElemType, class MemElemType>'
637        tplArgs = '<RegElemType, MemElemType>'
638        eaCode = SPAlignmentCheckCode + '''
639        int memAccessSize = eCount * sizeof(MemElemType);
640        EA = XBase + '''
641        if offsetIsImm:
642            eaCode += '((int64_t) this->imm * eCount * sizeof(MemElemType))'
643        else:
644            eaCode += '(XOffset * sizeof(MemElemType));'
645        loadMemAccCode = '''
646        for (int i = 0; i < eCount; i++) {
647            if (GpOp_x[i]) {
648                AA64FpDest_x[i] = memDataView[i];
649            } else {
650                AA64FpDest_x[i] = 0;
651            }
652        }
653        '''
654        storeMemAccCode = '''
655        for (int i = 0; i < eCount; i++) {
656            if (GpOp_x[i]) {
657                memDataView[i] = AA64FpDest_x[i];
658            } else {
659                memDataView[i] = 0;
660                for (int j = 0; j < sizeof(MemElemType); j++) {
661                    wrEn[sizeof(MemElemType) * i + j] = false;
662                }
663            }
664        }
665        '''
666        storeWrEnableCode = '''
667        auto wrEn = std::vector<bool>(sizeof(MemElemType) * eCount, true);
668        '''
669        loadIop = InstObjParams('ld1',
670            'SveContigLoadSI' if offsetIsImm else 'SveContigLoadSS',
671            'SveContigMemSI' if offsetIsImm else 'SveContigMemSS',
672            {'tpl_header': tplHeader,
673             'tpl_args': tplArgs,
674             'memacc_code': loadMemAccCode,
675             'ea_code' : sveEnabledCheckCode + eaCode,
676             'fa_code' : ''},
677            ['IsMemRef', 'IsLoad'])
678        storeIop = InstObjParams('st1',
679            'SveContigStoreSI' if offsetIsImm else 'SveContigStoreSS',
680            'SveContigMemSI' if offsetIsImm else 'SveContigMemSS',
681            {'tpl_header': tplHeader,
682             'tpl_args': tplArgs,
683             'wren_code': storeWrEnableCode,
684             'memacc_code': storeMemAccCode,
685             'ea_code' : sveEnabledCheckCode + eaCode,
686             'fa_code' : ''},
687            ['IsMemRef', 'IsStore'])
688        if offsetIsImm:
689            header_output += SveContigMemSIOpDeclare.subst(loadIop)
690            header_output += SveContigMemSIOpDeclare.subst(storeIop)
691        else:
692            header_output += SveContigMemSSOpDeclare.subst(loadIop)
693            header_output += SveContigMemSSOpDeclare.subst(storeIop)
694        exec_output += (
695            SveContigLoadExecute.subst(loadIop) +
696            SveContigLoadInitiateAcc.subst(loadIop) +
697            SveContigLoadCompleteAcc.subst(loadIop) +
698            SveContigStoreExecute.subst(storeIop) +
699            SveContigStoreInitiateAcc.subst(storeIop) +
700            SveContigStoreCompleteAcc.subst(storeIop))
701        for args in loadTplArgs:
702            substDict = {'tpl_args': '<%s>' % ', '.join(args),
703                         'class_name': 'SveContigLoadSI' if offsetIsImm
704                                       else 'SveContigLoadSS'}
705            exec_output += SveContigMemExecDeclare.subst(substDict)
706        for args in storeTplArgs:
707            substDict = {'tpl_args': '<%s>' % ', '.join(args),
708                         'class_name': 'SveContigStoreSI' if offsetIsImm
709                                       else 'SveContigStoreSS'}
710            exec_output += SveContigMemExecDeclare.subst(substDict)
711
712    # Generates definitions for SVE load-and-replicate instructions
713    def emitSveLoadAndRepl():
714        global header_output, exec_output, decoders
715        tplHeader = 'template <class RegElemType, class MemElemType>'
716        tplArgs = '<RegElemType, MemElemType>'
717        eaCode = SPAlignmentCheckCode + '''
718        EA = XBase + imm * sizeof(MemElemType);'''
719        memAccCode = '''
720        for (int i = 0; i < eCount; i++) {
721            if (GpOp_x[i]) {
722                AA64FpDest_x[i] = memData;
723            } else {
724                AA64FpDest_x[i] = 0;
725            }
726        }
727        '''
728        iop = InstObjParams('ld1r',
729            'SveLoadAndRepl',
730            'SveContigMemSI',
731            {'tpl_header': tplHeader,
732             'tpl_args': tplArgs,
733             'memacc_code': memAccCode,
734             'ea_code' : sveEnabledCheckCode + eaCode,
735             'fa_code' : ''},
736            ['IsMemRef', 'IsLoad'])
737        header_output += SveContigMemSIOpDeclare.subst(iop)
738        exec_output += (
739            SveLoadAndReplExecute.subst(iop) +
740            SveLoadAndReplInitiateAcc.subst(iop) +
741            SveLoadAndReplCompleteAcc.subst(iop))
742        for args in loadTplArgs:
743            substDict = {'tpl_args': '<%s>' % ', '.join(args),
744                         'class_name': 'SveLoadAndRepl'}
745            exec_output += SveContigMemExecDeclare.subst(substDict)
746
747    class IndexedAddrForm:
748        VEC_PLUS_IMM = 0
749        SCA_PLUS_VEC = 1
750
751    # Generates definitions for the transfer microops of SVE indexed memory
752    # operations (gather loads, scatter stores)
753    def emitSveIndexedMemMicroops(indexed_addr_form):
754        assert indexed_addr_form in (IndexedAddrForm.VEC_PLUS_IMM,
755                                     IndexedAddrForm.SCA_PLUS_VEC)
756        global header_output, exec_output, decoders
757        tplHeader = 'template <class RegElemType, class MemElemType>'
758        tplArgs = '<RegElemType, MemElemType>'
759        if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM:
760            eaCode = '''
761        EA = AA64FpBase_x[elemIndex] + imm * sizeof(MemElemType)'''
762        else:
763            eaCode = '''
764        uint64_t offset = AA64FpOffset_x[elemIndex];
765        if (offsetIs32) {
766            offset &= (1ULL << 32) - 1;
767        }
768        if (offsetIsSigned) {
769            offset = sext<32>(offset);
770        }
771        if (offsetIsScaled) {
772            offset *= sizeof(MemElemType);
773        }
774        EA = XBase + offset'''
775        loadMemAccCode = '''
776            if (GpOp_x[elemIndex]) {
777                AA64FpDest_x[elemIndex] = memData;
778            } else {
779                AA64FpDest_x[elemIndex] = 0;
780            }
781        '''
782        storeMemAccCode = '''
783            memData = AA64FpDest_x[elemIndex];
784        '''
785        predCheckCode = 'GpOp_x[elemIndex]'
786        loadIop = InstObjParams('ld1',
787            ('SveGatherLoadVIMicroop'
788             if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM
789             else 'SveGatherLoadSVMicroop'),
790            'MicroOp',
791            {'tpl_header': tplHeader,
792             'tpl_args': tplArgs,
793             'memacc_code': loadMemAccCode,
794             'ea_code' : sveEnabledCheckCode + eaCode,
795             'pred_check_code' : predCheckCode,
796             'fa_code' : ''},
797            ['IsMicroop', 'IsMemRef', 'IsLoad'])
798        storeIop = InstObjParams('st1',
799            ('SveScatterStoreVIMicroop'
800             if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM
801             else 'SveScatterStoreSVMicroop'),
802            'MicroOp',
803            {'tpl_header': tplHeader,
804             'tpl_args': tplArgs,
805             'memacc_code': storeMemAccCode,
806             'ea_code' : sveEnabledCheckCode + eaCode,
807             'pred_check_code' : predCheckCode,
808             'fa_code' : ''},
809            ['IsMicroop', 'IsMemRef', 'IsStore'])
810        if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM:
811            header_output += SveIndexedMemVIMicroopDeclare.subst(loadIop)
812            header_output += SveIndexedMemVIMicroopDeclare.subst(storeIop)
813        else:
814            header_output += SveIndexedMemSVMicroopDeclare.subst(loadIop)
815            header_output += SveIndexedMemSVMicroopDeclare.subst(storeIop)
816        exec_output += (
817            SveGatherLoadMicroopExecute.subst(loadIop) +
818            SveGatherLoadMicroopInitiateAcc.subst(loadIop) +
819            SveGatherLoadMicroopCompleteAcc.subst(loadIop) +
820            SveScatterStoreMicroopExecute.subst(storeIop) +
821            SveScatterStoreMicroopInitiateAcc.subst(storeIop) +
822            SveScatterStoreMicroopCompleteAcc.subst(storeIop))
823        for args in gatherLoadTplArgs:
824            substDict = {'tpl_args': '<%s>' % ', '.join(args),
825                         'class_name': (
826                             'SveGatherLoadVIMicroop'
827                             if indexed_addr_form == \
828                                 IndexedAddrForm.VEC_PLUS_IMM
829                             else 'SveGatherLoadSVMicroop')}
830            # TODO: this should become SveMemExecDeclare
831            exec_output += SveContigMemExecDeclare.subst(substDict)
832        for args in scatterStoreTplArgs:
833            substDict = {'tpl_args': '<%s>' % ', '.join(args),
834                         'class_name': (
835                             'SveScatterStoreVIMicroop'
836                             if indexed_addr_form == \
837                                 IndexedAddrForm.VEC_PLUS_IMM
838                             else 'SveScatterStoreSVMicroop')}
839            # TODO: this should become SveMemExecDeclare
840            exec_output += SveContigMemExecDeclare.subst(substDict)
841
842    # Generates definitions for the first microop of SVE gather loads, required
843    # to propagate the source vector register to the transfer microops
844    def emitSveGatherLoadCpySrcVecMicroop():
845        global header_output, exec_output, decoders
846        code = sveEnabledCheckCode + '''
847        unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
848                xc->tcBase());
849        for (unsigned i = 0; i < eCount; i++) {
850            AA64FpUreg0_ub[i] = AA64FpOp1_ub[i];
851        }'''
852        iop = InstObjParams('ld1',
853            'SveGatherLoadCpySrcVecMicroop',
854            'MicroOp',
855            {'code': code},
856            ['IsMicroop'])
857        header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop)
858        exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop)
859
860    # LD1[S]{B,H,W,D} (scalar plus immediate)
861    # ST1[S]{B,H,W,D} (scalar plus immediate)
862    emitSveContigMemInsts(True)
863    # LD1[S]{B,H,W,D} (scalar plus scalar)
864    # ST1[S]{B,H,W,D} (scalar plus scalar)
865    emitSveContigMemInsts(False)
866
867    # LD1R[S]{B,H,W,D}
868    emitSveLoadAndRepl()
869
870    # LDR (predicate), STR (predicate)
871    emitSveMemFillSpill(True)
872    # LDR (vector), STR (vector)
873    emitSveMemFillSpill(False)
874
875    # LD1[S]{B,H,W,D} (vector plus immediate)
876    # ST1[S]{B,H,W,D} (vector plus immediate)
877    emitSveIndexedMemMicroops(IndexedAddrForm.VEC_PLUS_IMM)
878    # LD1[S]{B,H,W,D} (scalar plus vector)
879    # ST1[S]{B,H,W,D} (scalar plus vector)
880    emitSveIndexedMemMicroops(IndexedAddrForm.SCA_PLUS_VEC)
881
882    # Source vector copy microop for gather loads
883    emitSveGatherLoadCpySrcVecMicroop()
884
885}};
886