fp64.isa revision 13120:690a0db8e58b
1// -*- mode:c++ -*-
2
3// Copyright (c) 2012-2013, 2016-2018 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Thomas Grocutt
39//          Edmund Grimley Evans
40
41let {{
42
43    header_output = ""
44    decoder_output = ""
45    exec_output = ""
46
47    fmovImmSCode = vfp64EnabledCheckCode + '''
48        AA64FpDestP0_uw = bits(imm, 31, 0);
49        AA64FpDestP1_uw = 0;
50        AA64FpDestP2_uw = 0;
51        AA64FpDestP3_uw = 0;
52    '''
53    fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
54                                { "code": fmovImmSCode,
55                                  "op_class": "FloatMiscOp" }, [])
56    header_output  += FpRegImmOpDeclare.subst(fmovImmSIop);
57    decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
58    exec_output    += BasicExecute.subst(fmovImmSIop);
59
60    fmovImmDCode = vfp64EnabledCheckCode + '''
61        AA64FpDestP0_uw = bits(imm, 31, 0);
62        AA64FpDestP1_uw = bits(imm, 63, 32);
63        AA64FpDestP2_uw = 0;
64        AA64FpDestP3_uw = 0;
65    '''
66    fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
67                                { "code": fmovImmDCode,
68                                  "op_class": "FloatMiscOp" }, [])
69    header_output  += FpRegImmOpDeclare.subst(fmovImmDIop);
70    decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
71    exec_output    += BasicExecute.subst(fmovImmDIop);
72
73    fmovRegSCode = vfp64EnabledCheckCode + '''
74        AA64FpDestP0_uw = AA64FpOp1P0_uw;
75        AA64FpDestP1_uw = 0;
76        AA64FpDestP2_uw = 0;
77        AA64FpDestP3_uw = 0;
78    '''
79    fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
80                                { "code": fmovRegSCode,
81                                  "op_class": "FloatMiscOp" }, [])
82    header_output  += FpRegRegOpDeclare.subst(fmovRegSIop);
83    decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
84    exec_output    += BasicExecute.subst(fmovRegSIop);
85
86    fmovRegDCode = vfp64EnabledCheckCode + '''
87        AA64FpDestP0_uw = AA64FpOp1P0_uw;
88        AA64FpDestP1_uw = AA64FpOp1P1_uw;
89        AA64FpDestP2_uw = 0;
90        AA64FpDestP3_uw = 0;
91    '''
92    fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
93                                { "code": fmovRegDCode,
94                                  "op_class": "FloatMiscOp" }, [])
95    header_output  += FpRegRegOpDeclare.subst(fmovRegDIop);
96    decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
97    exec_output    += BasicExecute.subst(fmovRegDIop);
98
99    fmovCoreRegWCode = vfp64EnabledCheckCode + '''
100        AA64FpDestP0_uw = WOp1_uw;
101        AA64FpDestP1_uw = 0;
102        AA64FpDestP2_uw = 0;
103        AA64FpDestP3_uw = 0;
104    '''
105    fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
106                                    { "code": fmovCoreRegWCode,
107                                      "op_class": "FloatMiscOp" }, [])
108    header_output  += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
109    decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
110    exec_output    += BasicExecute.subst(fmovCoreRegWIop);
111
112    fmovCoreRegXCode = vfp64EnabledCheckCode + '''
113        AA64FpDestP0_uw = XOp1_ud;
114        AA64FpDestP1_uw = XOp1_ud >> 32;
115        AA64FpDestP2_uw = 0;
116        AA64FpDestP3_uw = 0;
117    '''
118    fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
119                                    { "code": fmovCoreRegXCode,
120                                      "op_class": "FloatMiscOp" }, [])
121    header_output  += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
122    decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
123    exec_output    += BasicExecute.subst(fmovCoreRegXIop);
124
125    fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
126        /* Explicitly merge with previous value */
127        AA64FpDestP0_uw = AA64FpDestP0_uw;
128        AA64FpDestP1_uw = AA64FpDestP1_uw;
129        AA64FpDestP2_uw = XOp1_ud;
130        AA64FpDestP3_uw = XOp1_ud >> 32;'''
131    fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
132                                    { "code": fmovUCoreRegXCode,
133                                      "op_class": "FloatMiscOp" }, [])
134    header_output  += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
135    decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
136    exec_output    += BasicExecute.subst(fmovUCoreRegXIop);
137
138    fmovRegCoreWCode = vfp64EnabledCheckCode + '''
139        WDest = AA64FpOp1P0_uw;
140    '''
141    fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
142                                     { "code": fmovRegCoreWCode,
143                                       "op_class": "FloatMiscOp" }, [])
144    header_output  += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
145    decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
146    exec_output    += BasicExecute.subst(fmovRegCoreWIop);
147
148    fmovRegCoreXCode = vfp64EnabledCheckCode + '''
149        XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw;
150    '''
151    fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
152                                     { "code": fmovRegCoreXCode,
153                                       "op_class": "FloatMiscOp" }, [])
154    header_output  += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
155    decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
156    exec_output    += BasicExecute.subst(fmovRegCoreXIop);
157
158    fmovURegCoreXCode = vfp64EnabledCheckCode + '''
159        XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw;
160    '''
161    fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
162                                    { "code":     fmovURegCoreXCode,
163                                      "op_class": "FloatMiscOp" }, [])
164    header_output  += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
165    decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
166    exec_output    += BasicExecute.subst(fmovURegCoreXIop);
167}};
168
169let {{
170
171    header_output = ""
172    decoder_output = ""
173    exec_output = ""
174
175    halfIntConvCode = vfp64EnabledCheckCode + '''
176        FPSCR fpscr = (FPSCR) FpscrExc;
177        uint16_t cOp1  = AA64FpOp1P0_uw;
178        uint16_t cDest = %(op)s;
179        AA64FpDestP0_uw = cDest;
180        AA64FpDestP1_uw = 0;
181        AA64FpDestP2_uw = 0;
182        AA64FpDestP3_uw = 0;
183        FpscrExc = fpscr;
184    '''
185
186    halfIntConvCode2 = vfp64EnabledCheckCode + '''
187        FPSCR fpscr = (FPSCR) FpscrExc;
188        uint16_t cOp1  = AA64FpOp1P0_uw;
189        uint16_t cOp2  = AA64FpOp2P0_uw;
190        uint16_t cDest = %(op)s;
191        AA64FpDestP0_uw = cDest;
192        AA64FpDestP1_uw = 0;
193        AA64FpDestP2_uw = 0;
194        AA64FpDestP3_uw = 0;
195        FpscrExc = fpscr;
196    '''
197
198    halfBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
199                "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
200    halfUnaryOp = "unaryOp(fpscr, AA64FpOp1P0," + \
201                  "%(func)s, fpscr.fz, fpscr.rMode)"
202
203    singleIntConvCode = vfp64EnabledCheckCode + '''
204        FPSCR fpscr = (FPSCR) FpscrExc;
205        uint32_t cOp1  = AA64FpOp1P0_uw;
206        uint32_t cDest = %(op)s;
207        AA64FpDestP0_uw = cDest;
208        AA64FpDestP1_uw = 0;
209        AA64FpDestP2_uw = 0;
210        AA64FpDestP3_uw = 0;
211        FpscrExc = fpscr;
212    '''
213
214    singleIntConvCode2 = vfp64EnabledCheckCode + '''
215        FPSCR fpscr = (FPSCR) FpscrExc;
216        uint32_t cOp1  = AA64FpOp1P0_uw;
217        uint32_t cOp2  = AA64FpOp2P0_uw;
218        uint32_t cDest = %(op)s;
219        AA64FpDestP0_uw = cDest;
220        AA64FpDestP1_uw = 0;
221        AA64FpDestP2_uw = 0;
222        AA64FpDestP3_uw = 0;
223        FpscrExc = fpscr;
224    '''
225
226    singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
227                "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
228    singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)"
229
230    doubleIntConvCode = vfp64EnabledCheckCode + '''
231        FPSCR fpscr = (FPSCR) FpscrExc;
232        uint64_t cOp1  = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
233        uint64_t cDest = %(op)s;
234        AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
235        AA64FpDestP1_uw = cDest >> 32;
236        AA64FpDestP2_uw = 0;
237        AA64FpDestP3_uw = 0;
238        FpscrExc = fpscr;
239    '''
240
241    doubleIntConvCode2 = vfp64EnabledCheckCode + '''
242        FPSCR fpscr = (FPSCR) FpscrExc;
243        uint64_t cOp1  = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
244        uint64_t cOp2  = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw;
245        uint64_t cDest = %(op)s;
246        AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
247        AA64FpDestP1_uw = cDest >> 32;
248        AA64FpDestP2_uw = 0;
249        AA64FpDestP3_uw = 0;
250        FpscrExc = fpscr;
251    '''
252
253    doubleBinOp = '''
254        binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw),
255                        dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw),
256                        %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
257    '''
258    doubleUnaryOp = '''
259        unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s,
260                fpscr.fz, fpscr.rMode)
261    '''
262
263    def buildTernaryFpOp(name, opClass, hOp, sOp, dOp):
264        global header_output, decoder_output, exec_output
265        for suffix in "D", "S", "H":
266            code = vfp64EnabledCheckCode + '''
267                FPSCR fpscr = (FPSCR) FpscrExc;
268            '''
269            if suffix == "H":
270                code += '''
271                    uint16_t cOp1 = AA64FpOp1P0_uw;
272                    uint16_t cOp2 = AA64FpOp2P0_uw;
273                    uint16_t cOp3 = AA64FpOp3P0_uw;
274                    uint16_t cDest;
275                ''' "cDest = " + hOp + ";" + '''
276                    AA64FpDestP0_uw = cDest;
277                    AA64FpDestP1_uw = 0;
278                '''
279            elif suffix == "S":
280                code += '''
281                    uint32_t cOp1 = AA64FpOp1P0_uw;
282                    uint32_t cOp2 = AA64FpOp2P0_uw;
283                    uint32_t cOp3 = AA64FpOp3P0_uw;
284                    uint32_t cDest;
285                ''' "cDest = " + sOp + ";" + '''
286                    AA64FpDestP0_uw = cDest;
287                    AA64FpDestP1_uw = 0;
288                '''
289            elif suffix == "D":
290                code += '''
291                    uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
292                    uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32;
293                    uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32;
294                    uint64_t cDest;
295                ''' "cDest = " + dOp + ";" + '''
296                    AA64FpDestP0_uw = cDest;
297                    AA64FpDestP1_uw = cDest >> 32;
298                '''
299            code += '''
300                AA64FpDestP2_uw = 0;
301                AA64FpDestP3_uw = 0;
302                FpscrExc = fpscr;
303            '''
304
305            iop = InstObjParams(name.lower(), name + suffix,
306                                "FpRegRegRegRegOp",
307                                { "code": code, "op_class": opClass }, [])
308
309            header_output  += AA64FpRegRegRegRegOpDeclare.subst(iop)
310            decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
311            exec_output    += BasicExecute.subst(iop)
312
313    buildTernaryFpOp("FMAdd", "FloatMultAccOp",
314                     "fplibMulAdd<uint16_t>(cOp3, cOp1, cOp2, fpscr)",
315                     "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
316                     "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
317    buildTernaryFpOp("FMSub", "FloatMultAccOp",
318        "fplibMulAdd<uint16_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
319        "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
320        "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
321    buildTernaryFpOp("FNMAdd", "FloatMultAccOp",
322                     "fplibMulAdd<uint16_t>(fplibNeg<uint16_t>(cOp3), " +
323                     "fplibNeg<uint16_t>(cOp1), cOp2, fpscr)",
324                     "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), " +
325                     "fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
326                     "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), " +
327                     "fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
328    buildTernaryFpOp("FNMSub", "FloatMultAccOp",
329        "fplibMulAdd<uint16_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
330        "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
331        "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )
332
333    def buildBinFpOp(name, Name, base, opClass, halfOp, singleOp, doubleOp):
334        global header_output, decoder_output, exec_output
335
336        code = halfIntConvCode2 % { "op": halfOp }
337        hIop = InstObjParams(name, Name + "H", base,
338                { "code": code,
339                  "op_class": opClass }, [])
340
341        code = singleIntConvCode2 % { "op": singleOp }
342        sIop = InstObjParams(name, Name + "S", base,
343                { "code": code,
344                  "op_class": opClass }, [])
345
346        code = doubleIntConvCode2 % { "op": doubleOp }
347        dIop = InstObjParams(name, Name + "D", base,
348                { "code": code,
349                  "op_class": opClass }, [])
350
351        declareTempl     = eval(         base + "Declare");
352        constructorTempl = eval("AA64" + base + "Constructor");
353
354        for iop in hIop, sIop, dIop:
355            header_output  += declareTempl.subst(iop)
356            decoder_output += constructorTempl.subst(iop)
357            exec_output    += BasicExecute.subst(iop)
358
359    buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "FloatAddOp",
360                 "fplibAdd<uint16_t>(cOp1, cOp2, fpscr)",
361                 "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
362                 "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
363    buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "FloatAddOp",
364                 "fplibSub<uint16_t>(cOp1, cOp2, fpscr)",
365                 "fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
366                 "fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
367    buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "FloatDivOp",
368                 "fplibDiv<uint16_t>(cOp1, cOp2, fpscr)",
369                 "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
370                 "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
371    buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "FloatMultOp",
372                 "fplibMul<uint16_t>(cOp1, cOp2, fpscr)",
373                 "fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
374                 "fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
375    buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "FloatMultOp",
376                 "fplibNeg<uint16_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
377                 "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
378                 "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
379    buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "FloatCmpOp",
380                 "fplibMin<uint16_t>(cOp1, cOp2, fpscr)",
381                 "fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
382                 "fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
383    buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "FloatCmpOp",
384                 "fplibMax<uint16_t>(cOp1, cOp2, fpscr)",
385                 "fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
386                 "fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
387    buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "FloatCmpOp",
388                 "fplibMinNum<uint16_t>(cOp1, cOp2, fpscr)",
389                 "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
390                 "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
391    buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "FloatCmpOp",
392                 "fplibMaxNum<uint16_t>(cOp1, cOp2, fpscr)",
393                 "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
394                 "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
395
396    def buildUnaryFpOp(name, Name, base, opClass,
397                       halfOp, singleOp, doubleOp = None):
398        if doubleOp is None:
399            doubleOp = singleOp
400        global header_output, decoder_output, exec_output
401
402        code = halfIntConvCode % { "op": halfOp }
403        hIop = InstObjParams(name, Name + "H", base,
404                { "code": code,
405                  "op_class": opClass }, [])
406        code = singleIntConvCode % { "op": singleOp }
407        sIop = InstObjParams(name, Name + "S", base,
408                { "code": code,
409                  "op_class": opClass }, [])
410        code = doubleIntConvCode % { "op": doubleOp }
411        dIop = InstObjParams(name, Name + "D", base,
412                { "code": code,
413                  "op_class": opClass }, [])
414
415        declareTempl     = eval(         base + "Declare");
416        constructorTempl = eval("AA64" + base + "Constructor");
417
418        for iop in hIop, sIop, dIop:
419            header_output  += declareTempl.subst(iop)
420            decoder_output += constructorTempl.subst(iop)
421            exec_output    += BasicExecute.subst(iop)
422
423    buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "FloatSqrtOp",
424                   "fplibSqrt<uint16_t>(cOp1, fpscr)",
425                   "fplibSqrt<uint32_t>(cOp1, fpscr)",
426                   "fplibSqrt<uint64_t>(cOp1, fpscr)")
427
428    def buildSimpleUnaryFpOp(name, Name, base, opClass, halfOp, singleOp,
429                             doubleOp = None, isIntConv = True):
430        if doubleOp is None:
431            doubleOp = singleOp
432        global header_output, decoder_output, exec_output
433
434        if isIntConv:
435            hCode = halfIntConvCode
436            sCode = singleIntConvCode
437            dCode = doubleIntConvCode
438        else:
439            hCode = halfCode
440            sCode = singleCode
441            dCode = doubleCode
442
443        for code, op, suffix in [[hCode, halfOp, "H"],
444                                 [sCode, singleOp, "S"],
445                                 [dCode, doubleOp, "D"]]:
446            iop = InstObjParams(name, Name + suffix, base,
447                { "code": code % { "op": op },
448                  "op_class": opClass }, [])
449
450            declareTempl     = eval(         base + "Declare");
451            constructorTempl = eval("AA64" + base + "Constructor");
452
453            header_output  += declareTempl.subst(iop)
454            decoder_output += constructorTempl.subst(iop)
455            exec_output    += BasicExecute.subst(iop)
456
457    buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "FloatMiscOp",
458                         "fplibNeg<uint16_t>(cOp1)",
459                         "fplibNeg<uint32_t>(cOp1)",
460                         "fplibNeg<uint64_t>(cOp1)")
461    buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "FloatMiscOp",
462                         "fplibAbs<uint16_t>(cOp1)",
463                         "fplibAbs<uint32_t>(cOp1)",
464                         "fplibAbs<uint64_t>(cOp1)")
465    buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "FloatMiscOp",
466        "fplibRoundInt<uint16_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
467        "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
468        "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
469    buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "FloatMiscOp",
470        "fplibRoundInt<uint16_t>(cOp1, FPRounding_POSINF, false, fpscr)",
471        "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
472        "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
473    buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "FloatMiscOp",
474        "fplibRoundInt<uint16_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
475        "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
476        "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
477    buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "FloatMiscOp",
478        "fplibRoundInt<uint16_t>(cOp1, FPRounding_ZERO, false, fpscr)",
479        "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
480        "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
481    buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "FloatMiscOp",
482        "fplibRoundInt<uint16_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
483        "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
484        "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
485    buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "FloatMiscOp",
486        "fplibRoundInt<uint16_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
487        "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
488        "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
489    buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "FloatMiscOp",
490        "fplibRoundInt<uint16_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
491        "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
492        "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
493}};
494
495let {{
496
497    header_output = ""
498    decoder_output = ""
499    exec_output = ""
500
501    # Creates the integer to floating point instructions, including variants for
502    # signed/unsigned, float/double, etc
503    for regL, regOpL, width in [["W", "w", 32],
504                                ["X", "d", 64]]:
505        for isDouble in True, False:
506            for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)],
507                               ["S", "int%d_t  cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]:
508                fcvtIntFpDCode = vfp64EnabledCheckCode + '''
509                    FPSCR fpscr = (FPSCR) FpscrExc;
510                    %s
511                ''' %(usCode)
512
513                if isDouble:
514                    fcvtIntFpDCode += '''
515                        uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0,
516                            %s, FPCRRounding(fpscr), fpscr);
517                        AA64FpDestP0_uw = cDest;
518                        AA64FpDestP1_uw = cDest >> 32;
519                    ''' % ("true" if us == "U" else "false")
520                else:
521                    fcvtIntFpDCode += '''
522                        uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0,
523                            %s, FPCRRounding(fpscr), fpscr);
524                        AA64FpDestP0_uw = cDest;
525                        AA64FpDestP1_uw = 0;
526                    ''' % ("true" if us == "U" else "false")
527                fcvtIntFpDCode += '''
528                    AA64FpDestP2_uw = 0;
529                    AA64FpDestP3_uw = 0;
530                    FpscrExc = fpscr;
531                '''
532
533                instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
534                mnem     = "%scvtf" %(us.lower())
535                fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
536                                              { "code": fcvtIntFpDCode,
537                                                "op_class": "FloatCvtOp" }, [])
538                header_output  += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
539                decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
540                exec_output    += BasicExecute.subst(fcvtIntFpDIop);
541
542    # Generates the floating point to integer conversion instructions in various
543    # variants, eg signed/unsigned
544    def buildFpCvtIntOp(isDouble, isSigned, isXReg):
545        global header_output, decoder_output, exec_output
546
547        for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"],
548                                    ["P", "FPRounding_POSINF"],
549                                    ["M", "FPRounding_NEGINF"],
550                                    ["Z", "FPRounding_ZERO"],
551                                    ["A", "FPRounding_TIEAWAY"]]:
552            fcvtFpIntCode = vfp64EnabledCheckCode + '''
553                FPSCR fpscr = (FPSCR) FpscrExc;'''
554            if isDouble:
555                fcvtFpIntCode += '''
556                uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
557                '''
558            else:
559                fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
560
561            fcvtFpIntCode += '''
562                %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr);
563                FpscrExc = fpscr;
564            ''' %("X"      if isXReg   else "W",
565                  "64"     if isDouble else "32",
566                  "64"     if isXReg   else "32",
567                  "false"  if isSigned else "true",
568                  roundingMode)
569
570            instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U",
571                                             "X" if isXReg   else "W",
572                                             "D" if isDouble else "S", rmode)
573            mnem     = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
574            fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
575                                        { "code": fcvtFpIntCode,
576                                        "op_class": "FloatCvtOp" }, [])
577            header_output  += FpRegRegOpDeclare.subst(fcvtFpIntIop);
578            decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
579            exec_output    += BasicExecute.subst(fcvtFpIntIop);
580
581    # Now actually do the building with the different variants
582    for isDouble in True, False:
583       for isSigned in True, False:
584           for isXReg in True, False:
585             buildFpCvtIntOp(isDouble, isSigned, isXReg)
586
587    fcvtFpSFpDCode = vfp64EnabledCheckCode + '''
588        FPSCR fpscr = (FPSCR) FpscrExc;
589        uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw,
590            FPCRRounding(fpscr), fpscr);
591        AA64FpDestP0_uw = cDest;
592        AA64FpDestP1_uw = cDest >> 32;
593        AA64FpDestP2_uw = 0;
594        AA64FpDestP3_uw = 0;
595        FpscrExc = fpscr;
596    '''
597    fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
598                                     { "code": fcvtFpSFpDCode,
599                                       "op_class": "FloatCvtOp" }, [])
600    header_output  += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
601    decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
602    exec_output    += BasicExecute.subst(fcvtFpSFpDIop);
603
604    fcvtFpDFpSCode = vfp64EnabledCheckCode + '''
605        FPSCR fpscr = (FPSCR) FpscrExc;
606        uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
607        AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1,
608            FPCRRounding(fpscr), fpscr);
609        AA64FpDestP1_uw = 0;
610        AA64FpDestP2_uw = 0;
611        AA64FpDestP3_uw = 0;
612        FpscrExc = fpscr;
613    '''
614    fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
615                                 {"code":     fcvtFpDFpSCode,
616                                  "op_class": "FloatCvtOp" }, [])
617    header_output  += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
618    decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
619    exec_output    += BasicExecute.subst(fcvtFpDFpSIop);
620
621    # Half precision to single or double precision conversion
622    for isDouble in True, False:
623        code = vfp64EnabledCheckCode + '''
624            FPSCR fpscr = (FPSCR) FpscrExc;
625            %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw,
626                FPCRRounding(fpscr), fpscr);
627        ''' % ("uint64_t" if isDouble else "uint32_t",
628               "64" if isDouble else "32")
629        if isDouble:
630            code += '''
631                AA64FpDestP0_uw = cDest;
632                AA64FpDestP1_uw = cDest >> 32;
633            '''
634        else:
635            code += '''
636                AA64FpDestP0_uw = cDest;
637                AA64FpDestP1_uw = 0;
638            '''
639        code += '''
640            AA64FpDestP2_uw = 0;
641            AA64FpDestP3_uw = 0;
642            FpscrExc = fpscr;
643        '''
644
645        instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
646        fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
647                                     { "code": code,
648                                       "op_class": "FloatCvtOp" }, [])
649        header_output  += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
650        decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
651        exec_output    += BasicExecute.subst(fcvtFpHFpIop);
652
653    # single or double precision to Half precision conversion
654    for isDouble in True, False:
655        code = vfp64EnabledCheckCode + '''
656            FPSCR fpscr = (FPSCR) FpscrExc;
657            %s;
658            AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1,
659                FPCRRounding(fpscr), fpscr);
660            AA64FpDestP1_uw = 0;
661            AA64FpDestP2_uw = 0;
662            AA64FpDestP3_uw = 0;
663            FpscrExc = fpscr;
664        ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
665               if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw",
666               "64" if isDouble else "32")
667
668        instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
669        fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
670                                     { "code": code,
671                                       "op_class": "FloatCvtOp" }, [])
672        header_output  += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
673        decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
674        exec_output    += BasicExecute.subst(fcvtFpFpHIop);
675
676    # Build the various versions of the floating point compare instructions
677    def buildFCmpOp(isQuiet, isDouble, isImm):
678        global header_output, decoder_output, exec_output
679
680        fcmpCode = vfp64EnabledCheckCode + '''
681            FPSCR fpscr = (FPSCR) FpscrExc;
682            %s cOp1 = %s;
683        ''' % ("uint64_t" if isDouble else "uint32_t",
684               "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32"
685               if isDouble else "AA64FpDestP0_uw")
686        if isImm:
687            fcmpCode += '''
688                %s cOp2 = imm;
689            ''' % ("uint64_t" if isDouble else "uint32_t")
690        else:
691            fcmpCode += '''
692                %s cOp2  = %s;
693            ''' % ("uint64_t" if isDouble else "uint32_t",
694                   "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
695                   if isDouble else "AA64FpOp1P0_uw")
696        fcmpCode += '''
697            int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
698            CondCodesNZ = cc >> 2 & 3;
699            CondCodesC = cc >> 1 & 1;
700            CondCodesV = cc & 1;
701            FpCondCodes = fpscr & FpCondCodesMask;
702            FpscrExc    = fpscr;
703        ''' % ("64" if isDouble else "32", "false" if isQuiet else "true")
704
705        typeName = "Imm" if isImm else "Reg"
706        instName = "FCmp%s%s%s" %(""  if isQuiet  else "E", typeName,
707                                  "D" if isDouble else "S")
708        fcmpIop = InstObjParams("fcmp%s" %(""  if isQuiet else "e"), instName,
709                                "FpReg%sOp" %(typeName),
710                               {"code":     fcmpCode,
711                                "op_class": "FloatCmpOp"}, [])
712
713        declareTemp     = eval("FpReg%sOpDeclare"         %(typeName));
714        constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
715        header_output  += declareTemp.subst(fcmpIop);
716        decoder_output += constructorTemp.subst(fcmpIop);
717        exec_output    += BasicExecute.subst(fcmpIop);
718
719    for isQuiet in True, False:
720        for isDouble in True, False:
721            for isImm in True, False:
722                buildFCmpOp(isQuiet, isDouble, isImm)
723
724    # Build the various versions of the conditional floating point compare
725    # instructions
726    def buildFCCmpOp(isQuiet, isDouble):
727        global header_output, decoder_output, exec_output
728
729        fccmpCode = vfp64EnabledCheckCode + '''
730            FPSCR fpscr = (FPSCR) FpscrExc;
731            if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
732                %s cOp1 = %s;
733                %s cOp2 = %s;
734                int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
735                CondCodesNZ = cc >> 2 & 3;
736                CondCodesC = cc >> 1 & 1;
737                CondCodesV = cc & 1;
738            } else {
739                CondCodesNZ = (defCc >> 2) & 0x3;
740                CondCodesC  = (defCc >> 1) & 0x1;
741                CondCodesV  = defCc & 0x1;
742            }
743            FpCondCodes = fpscr & FpCondCodesMask;
744            FpscrExc    = fpscr;
745        ''' % ("uint64_t" if isDouble else "uint32_t",
746               "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
747               if isDouble else "AA64FpOp1P0_uw",
748               "uint64_t" if isDouble else "uint32_t",
749               "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32"
750               if isDouble else "AA64FpOp2P0_uw",
751               "64" if isDouble else "32", "false" if isQuiet else "true")
752
753        instName = "FCCmp%sReg%s" %(""  if isQuiet  else "E",
754                                    "D" if isDouble else "S")
755        fccmpIop = InstObjParams("fccmp%s" %(""  if isQuiet  else "e"),
756                                 instName, "FpCondCompRegOp",
757                                {"code":           fccmpCode,
758                                 "op_class":       "FloatCmpOp"}, [])
759        header_output  += DataXCondCompRegDeclare.subst(fccmpIop);
760        decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
761        exec_output    += BasicExecute.subst(fccmpIop);
762
763    for isQuiet in True, False:
764        for isDouble in True, False:
765            buildFCCmpOp(isQuiet, isDouble)
766
767}};
768
769let {{
770
771    header_output = ""
772    decoder_output = ""
773    exec_output = ""
774
775    # Generates the variants of the floating to fixed point instructions
776    def buildFpCvtFixedOp(isSigned, isDouble, isXReg):
777        global header_output, decoder_output, exec_output
778
779        fcvtFpFixedCode = vfp64EnabledCheckCode + '''
780            FPSCR fpscr = (FPSCR) FpscrExc;
781        '''
782        if isDouble:
783            fcvtFpFixedCode += '''
784                uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
785            '''
786        else:
787            fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
788        fcvtFpFixedCode += '''
789            %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s,
790                FPRounding_ZERO, fpscr);
791            FpscrExc = fpscr;
792        ''' %("X"      if isXReg   else "W",
793              "64"     if isDouble else "32",
794              "64"     if isXReg   else "32",
795              "false"  if isSigned else "true")
796
797        instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U",
798                                         "D" if isDouble else "S",
799                                         "X" if isXReg   else "W")
800        mnem = "fcvtz%s" %("s" if isSigned else "u")
801        fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
802                                       { "code": fcvtFpFixedCode,
803                                         "op_class": "FloatCvtOp" }, [])
804        header_output  += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
805        decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
806        exec_output    += BasicExecute.subst(fcvtFpFixedIop);
807
808    # Generates the variants of the fixed to floating point instructions
809    def buildFixedCvtFpOp(isSigned, isDouble, isXReg):
810        global header_output, decoder_output, exec_output
811
812        srcRegType = "X" if isXReg   else "W"
813        fcvtFixedFpCode = vfp64EnabledCheckCode + '''
814            FPSCR fpscr = (FPSCR) FpscrExc;
815            %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm,
816                %s, FPCRRounding(fpscr), fpscr);
817        ''' %("uint64_t" if isDouble else "uint32_t",
818              "64" if isDouble else "32",
819              "int" if isSigned else "uint", "64" if isXReg else "32",
820              srcRegType,
821              "false" if isSigned else "true")
822        if isDouble:
823            fcvtFixedFpCode += '''
824                AA64FpDestP0_uw = result;
825                AA64FpDestP1_uw = result >> 32;
826            '''
827        else:
828            fcvtFixedFpCode += '''
829                AA64FpDestP0_uw = result;
830                AA64FpDestP1_uw = 0;
831            '''
832        fcvtFixedFpCode += '''
833            AA64FpDestP2_uw = 0;
834            AA64FpDestP3_uw = 0;
835            FpscrExc = fpscr;
836        '''
837
838        instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U",
839                                         "D" if isDouble else "S",
840                                         srcRegType)
841        mnem = "%scvtf" %("s" if isSigned else "u")
842        fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
843                                       { "code":     fcvtFixedFpCode,
844                                         "op_class": "FloatCvtOp" }, [])
845        header_output  += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
846        decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
847        exec_output    += BasicExecute.subst(fcvtFixedFpIop);
848
849    # loop over the variants building the instructions for each
850    for isXReg in True, False:
851        for isDouble in True, False:
852            for isSigned in True, False:
853                buildFpCvtFixedOp(isSigned, isDouble, isXReg)
854                buildFixedCvtFpOp(isSigned, isDouble, isXReg)
855}};
856
857let {{
858
859    header_output  = ""
860    decoder_output = ""
861    exec_output    = ""
862
863    for isDouble in True, False:
864        code = '''
865            if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
866                AA64FpDestP0_uw = AA64FpOp1P0_uw;
867        '''
868        if isDouble:
869            code += '''
870                    AA64FpDestP1_uw = AA64FpOp1P1_uw;
871                } else {
872                    AA64FpDestP0_uw = AA64FpOp2P0_uw;
873                    AA64FpDestP1_uw = AA64FpOp2P1_uw;
874                }
875            '''
876        else:
877            code += '''
878                } else {
879                    AA64FpDestP0_uw = AA64FpOp2P0_uw;
880                }
881                AA64FpDestP1_uw = 0;
882            '''
883        code += '''
884            AA64FpDestP2_uw = 0;
885            AA64FpDestP3_uw = 0;
886        '''
887
888        iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
889                            "FpCondSelOp", { "code":     code,
890                                             "op_class": "FloatCvtOp" })
891        header_output  += DataXCondSelDeclare.subst(iop)
892        decoder_output += DataXCondSelConstructor.subst(iop)
893        exec_output    += BasicExecute.subst(iop)
894}};
895