sve_mem.isa revision 13955:e0f46be83fc7
1// Copyright (c) 2017 ARM Limited
2// All rights reserved
3//
4// The license below extends only to copyright in the software and shall
5// not be construed as granting a license to any other intellectual
6// property including but not limited to intellectual property relating
7// to a hardware implementation of the functionality of the software
8// licensed hereunder.  You may use the software subject to the license
9// terms below provided that you ensure that this notice is replicated
10// unmodified and in its entirety in all distributions of the software,
11// modified or unmodified, in source code or in binary form.
12//
13// Redistribution and use in source and binary forms, with or without
14// modification, are permitted provided that the following conditions are
15// met: redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer;
17// redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution;
20// neither the name of the copyright holders nor the names of its
21// contributors may be used to endorse or promote products derived from
22// this software without specific prior written permission.
23//
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
36// Authors: Giacomo Gabrielli
37
38// @file Definition of SVE memory access instructions.
39
40output header {{
41
42    // Decodes SVE contiguous load instructions, scalar plus scalar form.
43    template <template <typename T1, typename T2> class Base>
44    StaticInstPtr
45    decodeSveContigLoadSSInsts(uint8_t dtype, ExtMachInst machInst,
46                               IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
47                               IntRegIndex rm, bool firstFaulting)
48    {
49        const char* mn = firstFaulting ? "ldff1" : "ld1";
50        switch (dtype) {
51          case 0x0:
52            return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
53          case 0x1:
54            return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
55          case 0x2:
56            return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
57          case 0x3:
58            return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
59          case 0x4:
60            return new Base<int64_t, int32_t>(mn, machInst, zt, pg, rn, rm);
61          case 0x5:
62            return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
63          case 0x6:
64            return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
65          case 0x7:
66            return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
67          case 0x8:
68            return new Base<int64_t, int16_t>(mn, machInst, zt, pg, rn, rm);
69          case 0x9:
70            return new Base<int32_t, int16_t>(mn, machInst, zt, pg, rn, rm);
71          case 0xa:
72            return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, rm);
73          case 0xb:
74            return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, rm);
75          case 0xc:
76            return new Base<int64_t, int8_t>(mn, machInst, zt, pg, rn, rm);
77          case 0xd:
78            return new Base<int32_t, int8_t>(mn, machInst, zt, pg, rn, rm);
79          case 0xe:
80            return new Base<int16_t, int8_t>(mn, machInst, zt, pg, rn, rm);
81          case 0xf:
82            return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, rm);
83        }
84        return new Unknown64(machInst);
85    }
86
87    // Decodes SVE contiguous load instructions, scalar plus immediate form.
88    template <template <typename T1, typename T2> class Base>
89    StaticInstPtr
90    decodeSveContigLoadSIInsts(uint8_t dtype, ExtMachInst machInst,
91                               IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
92                               uint64_t imm, bool firstFaulting,
93                               bool replicate = false)
94    {
95        assert(!(replicate && firstFaulting));
96
97        const char* mn = replicate ? "ld1r" :
98                                     (firstFaulting ? "ldff1" : "ld1");
99        switch (dtype) {
100          case 0x0:
101            return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
102          case 0x1:
103            return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
104          case 0x2:
105            return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
106          case 0x3:
107            return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
108          case 0x4:
109            return new Base<int64_t, int32_t>(mn, machInst, zt, pg, rn, imm);
110          case 0x5:
111            return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
112          case 0x6:
113            return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
114          case 0x7:
115            return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
116          case 0x8:
117            return new Base<int64_t, int16_t>(mn, machInst, zt, pg, rn, imm);
118          case 0x9:
119            return new Base<int32_t, int16_t>(mn, machInst, zt, pg, rn, imm);
120          case 0xa:
121            return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, imm);
122          case 0xb:
123            return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, imm);
124          case 0xc:
125            return new Base<int64_t, int8_t>(mn, machInst, zt, pg, rn, imm);
126          case 0xd:
127            return new Base<int32_t, int8_t>(mn, machInst, zt, pg, rn, imm);
128          case 0xe:
129            return new Base<int16_t, int8_t>(mn, machInst, zt, pg, rn, imm);
130          case 0xf:
131            return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, imm);
132        }
133        return new Unknown64(machInst);
134    }
135
136    // Decodes SVE contiguous store instructions, scalar plus scalar form.
137    template <template <typename T1, typename T2> class Base>
138    StaticInstPtr
139    decodeSveContigStoreSSInsts(uint8_t dtype, ExtMachInst machInst,
140                                IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
141                                IntRegIndex rm)
142    {
143        const char* mn = "st1";
144        switch (dtype) {
145          case 0x0:
146            return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
147          case 0x1:
148            return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
149          case 0x2:
150            return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
151          case 0x3:
152            return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, rm);
153          case 0x5:
154            return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
155          case 0x6:
156            return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
157          case 0x7:
158            return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, rm);
159          case 0xa:
160            return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, rm);
161          case 0xb:
162            return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, rm);
163          case 0xf:
164            return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, rm);
165        }
166        return new Unknown64(machInst);
167    }
168
169    // Decodes SVE contiguous store instructions, scalar plus immediate form.
170    template <template <typename T1, typename T2> class Base>
171    StaticInstPtr
172    decodeSveContigStoreSIInsts(uint8_t dtype, ExtMachInst machInst,
173                                IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
174                                int8_t imm)
175    {
176        const char* mn = "st1";
177        switch (dtype) {
178          case 0x0:
179            return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
180          case 0x1:
181            return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
182          case 0x2:
183            return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
184          case 0x3:
185            return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, imm);
186          case 0x5:
187            return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
188          case 0x6:
189            return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
190          case 0x7:
191            return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, imm);
192          case 0xa:
193            return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, imm);
194          case 0xb:
195            return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, imm);
196          case 0xf:
197            return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, imm);
198        }
199        return new Unknown64(machInst);
200    }
201
202    // NOTE: SVE load-and-replicate instructions are decoded with
203    // decodeSveContigLoadSIInsts(...).
204
205}};
206
207let {{
208
209    header_output = ''
210    exec_output = ''
211    decoders = { 'Generic': {} }
212
213    SPAlignmentCheckCode = '''
214        if (this->baseIsSP && bits(XBase, 3, 0) &&
215            SPAlignmentCheckEnabled(xc->tcBase())) {
216            return std::make_shared<SPAlignmentFault>();
217        }
218    '''
219
220    def emitSveMemFillSpill(isPred):
221        global header_output, exec_output, decoders
222        eaCode = SPAlignmentCheckCode + '''
223        int memAccessSize = %(memacc_size)s;
224        EA = XBase + ((int64_t) imm * %(memacc_size)s)''' % {
225            'memacc_size': 'eCount / 8' if isPred else 'eCount'}
226        if isPred:
227            loadMemAccCode = '''
228            int index = 0;
229            uint8_t byte;
230            for (int i = 0; i < eCount / 8; i++) {
231                byte = memDataView[i];
232                for (int j = 0; j < 8; j++, index++) {
233                    PDest_x[index] = (byte >> j) & 1;
234                }
235            }
236            '''
237            storeMemAccCode = '''
238            int index = 0;
239            uint8_t byte;
240            for (int i = 0; i < eCount / 8; i++) {
241                byte = 0;
242                for (int j = 0; j < 8; j++, index++) {
243                    byte |= PDest_x[index] << j;
244                }
245                memDataView[i] = byte;
246            }
247            '''
248            storeWrEnableCode = '''
249            auto wrEn = std::vector<bool>(eCount / 8, true);
250            '''
251        else:
252            loadMemAccCode = '''
253            for (int i = 0; i < eCount; i++) {
254                AA64FpDest_x[i] = memDataView[i];
255            }
256            '''
257            storeMemAccCode = '''
258            for (int i = 0; i < eCount; i++) {
259                memDataView[i] = AA64FpDest_x[i];
260            }
261            '''
262            storeWrEnableCode = '''
263            auto wrEn = std::vector<bool>(sizeof(MemElemType) * eCount, true);
264            '''
265        loadIop = InstObjParams('ldr',
266            'SveLdrPred' if isPred else 'SveLdrVec',
267            'SveMemPredFillSpill' if isPred else 'SveMemVecFillSpill',
268            {'tpl_header': '',
269             'tpl_args': '',
270             'memacc_code': loadMemAccCode,
271             'ea_code' : sveEnabledCheckCode + eaCode,
272             'fa_code' : ''},
273            ['IsMemRef', 'IsLoad'])
274        storeIop = InstObjParams('str',
275            'SveStrPred' if isPred else 'SveStrVec',
276            'SveMemPredFillSpill' if isPred else 'SveMemVecFillSpill',
277            {'tpl_header': '',
278             'tpl_args': '',
279             'wren_code': storeWrEnableCode,
280             'memacc_code': storeMemAccCode,
281             'ea_code' : sveEnabledCheckCode + eaCode,
282             'fa_code' : ''},
283            ['IsMemRef', 'IsStore'])
284        header_output += SveMemFillSpillOpDeclare.subst(loadIop)
285        header_output += SveMemFillSpillOpDeclare.subst(storeIop)
286        exec_output += (
287            SveContigLoadExecute.subst(loadIop) +
288            SveContigLoadInitiateAcc.subst(loadIop) +
289            SveContigLoadCompleteAcc.subst(loadIop) +
290            SveContigStoreExecute.subst(storeIop) +
291            SveContigStoreInitiateAcc.subst(storeIop) +
292            SveContigStoreCompleteAcc.subst(storeIop))
293
294    loadTplArgs = (
295        ('uint8_t', 'uint8_t'),
296        ('uint16_t', 'uint8_t'),
297        ('uint32_t', 'uint8_t'),
298        ('uint64_t', 'uint8_t'),
299        ('int64_t', 'int32_t'),
300        ('uint16_t', 'uint16_t'),
301        ('uint32_t', 'uint16_t'),
302        ('uint64_t', 'uint16_t'),
303        ('int64_t', 'int16_t'),
304        ('int32_t', 'int16_t'),
305        ('uint32_t', 'uint32_t'),
306        ('uint64_t', 'uint32_t'),
307        ('int64_t', 'int8_t'),
308        ('int32_t', 'int8_t'),
309        ('int16_t', 'int8_t'),
310        ('uint64_t', 'uint64_t'),
311    )
312
313    storeTplArgs = (
314        ('uint8_t', 'uint8_t'),
315        ('uint16_t', 'uint8_t'),
316        ('uint32_t', 'uint8_t'),
317        ('uint64_t', 'uint8_t'),
318        ('uint16_t', 'uint16_t'),
319        ('uint32_t', 'uint16_t'),
320        ('uint64_t', 'uint16_t'),
321        ('uint32_t', 'uint32_t'),
322        ('uint64_t', 'uint32_t'),
323        ('uint64_t', 'uint64_t'),
324    )
325
326    # Generates definitions for SVE contiguous loads
327    def emitSveContigMemInsts(offsetIsImm):
328        global header_output, exec_output, decoders
329        tplHeader = 'template <class RegElemType, class MemElemType>'
330        tplArgs = '<RegElemType, MemElemType>'
331        eaCode = SPAlignmentCheckCode + '''
332        int memAccessSize = eCount * sizeof(MemElemType);
333        EA = XBase + '''
334        if offsetIsImm:
335            eaCode += '((int64_t) this->imm * eCount * sizeof(MemElemType))'
336        else:
337            eaCode += '(XOffset * sizeof(MemElemType));'
338        loadMemAccCode = '''
339        for (int i = 0; i < eCount; i++) {
340            if (GpOp_x[i]) {
341                AA64FpDest_x[i] = memDataView[i];
342            } else {
343                AA64FpDest_x[i] = 0;
344            }
345        }
346        '''
347        storeMemAccCode = '''
348        for (int i = 0; i < eCount; i++) {
349            if (GpOp_x[i]) {
350                memDataView[i] = AA64FpDest_x[i];
351            } else {
352                memDataView[i] = 0;
353                for (int j = 0; j < sizeof(MemElemType); j++) {
354                    wrEn[sizeof(MemElemType) * i + j] = false;
355                }
356            }
357        }
358        '''
359        storeWrEnableCode = '''
360        auto wrEn = std::vector<bool>(sizeof(MemElemType) * eCount, true);
361        '''
362        loadIop = InstObjParams('ld1',
363            'SveContigLoadSI' if offsetIsImm else 'SveContigLoadSS',
364            'SveContigMemSI' if offsetIsImm else 'SveContigMemSS',
365            {'tpl_header': tplHeader,
366             'tpl_args': tplArgs,
367             'memacc_code': loadMemAccCode,
368             'ea_code' : sveEnabledCheckCode + eaCode,
369             'fa_code' : ''},
370            ['IsMemRef', 'IsLoad'])
371        storeIop = InstObjParams('st1',
372            'SveContigStoreSI' if offsetIsImm else 'SveContigStoreSS',
373            'SveContigMemSI' if offsetIsImm else 'SveContigMemSS',
374            {'tpl_header': tplHeader,
375             'tpl_args': tplArgs,
376             'wren_code': storeWrEnableCode,
377             'memacc_code': storeMemAccCode,
378             'ea_code' : sveEnabledCheckCode + eaCode,
379             'fa_code' : ''},
380            ['IsMemRef', 'IsStore'])
381        if offsetIsImm:
382            header_output += SveContigMemSIOpDeclare.subst(loadIop)
383            header_output += SveContigMemSIOpDeclare.subst(storeIop)
384        else:
385            header_output += SveContigMemSSOpDeclare.subst(loadIop)
386            header_output += SveContigMemSSOpDeclare.subst(storeIop)
387        exec_output += (
388            SveContigLoadExecute.subst(loadIop) +
389            SveContigLoadInitiateAcc.subst(loadIop) +
390            SveContigLoadCompleteAcc.subst(loadIop) +
391            SveContigStoreExecute.subst(storeIop) +
392            SveContigStoreInitiateAcc.subst(storeIop) +
393            SveContigStoreCompleteAcc.subst(storeIop))
394        for args in loadTplArgs:
395            substDict = {'tpl_args': '<%s>' % ', '.join(args),
396                         'class_name': 'SveContigLoadSI' if offsetIsImm
397                                       else 'SveContigLoadSS'}
398            exec_output += SveContigMemExecDeclare.subst(substDict)
399        for args in storeTplArgs:
400            substDict = {'tpl_args': '<%s>' % ', '.join(args),
401                         'class_name': 'SveContigStoreSI' if offsetIsImm
402                                       else 'SveContigStoreSS'}
403            exec_output += SveContigMemExecDeclare.subst(substDict)
404
405    # Generates definitions for SVE load-and-replicate instructions
406    def emitSveLoadAndRepl():
407        global header_output, exec_output, decoders
408        tplHeader = 'template <class RegElemType, class MemElemType>'
409        tplArgs = '<RegElemType, MemElemType>'
410        eaCode = SPAlignmentCheckCode + '''
411        EA = XBase + imm * sizeof(MemElemType);'''
412        memAccCode = '''
413        for (int i = 0; i < eCount; i++) {
414            if (GpOp_x[i]) {
415                AA64FpDest_x[i] = memData;
416            } else {
417                AA64FpDest_x[i] = 0;
418            }
419        }
420        '''
421        iop = InstObjParams('ld1r',
422            'SveLoadAndRepl',
423            'SveContigMemSI',
424            {'tpl_header': tplHeader,
425             'tpl_args': tplArgs,
426             'memacc_code': memAccCode,
427             'ea_code' : sveEnabledCheckCode + eaCode,
428             'fa_code' : ''},
429            ['IsMemRef', 'IsLoad'])
430        header_output += SveContigMemSIOpDeclare.subst(iop)
431        exec_output += (
432            SveLoadAndReplExecute.subst(iop) +
433            SveLoadAndReplInitiateAcc.subst(iop) +
434            SveLoadAndReplCompleteAcc.subst(iop))
435        for args in loadTplArgs:
436            substDict = {'tpl_args': '<%s>' % ', '.join(args),
437                         'class_name': 'SveLoadAndRepl'}
438            exec_output += SveContigMemExecDeclare.subst(substDict)
439
440    # LD1[S]{B,H,W,D} (scalar plus immediate)
441    emitSveContigMemInsts(True)
442    # LD1[S]{B,H,W,D} (scalar plus scalar)
443    emitSveContigMemInsts(False)
444
445    # LD1R[S]{B,H,W,D}
446    emitSveLoadAndRepl()
447
448    # LDR (predicate), STR (predicate)
449    emitSveMemFillSpill(True)
450    # LDR (vector), STR (vector)
451    emitSveMemFillSpill(False)
452
453}};
454