fp64.isa revision 10037:5cac77888310
1// -*- mode:c++ -*- 2 3// Copyright (c) 2012-2013 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Thomas Grocutt 39// Edmund Grimley Evans 40 41let {{ 42 43 header_output = "" 44 decoder_output = "" 45 exec_output = "" 46 47 fmovImmSCode = vfp64EnabledCheckCode + ''' 48 AA64FpDestP0_uw = bits(imm, 31, 0); 49 AA64FpDestP1_uw = 0; 50 AA64FpDestP2_uw = 0; 51 AA64FpDestP3_uw = 0; 52 ''' 53 fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp", 54 { "code": fmovImmSCode, 55 "op_class": "SimdFloatMiscOp" }, []) 56 header_output += FpRegImmOpDeclare.subst(fmovImmSIop); 57 decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop); 58 exec_output += BasicExecute.subst(fmovImmSIop); 59 60 fmovImmDCode = vfp64EnabledCheckCode + ''' 61 AA64FpDestP0_uw = bits(imm, 31, 0); 62 AA64FpDestP1_uw = bits(imm, 63, 32); 63 AA64FpDestP2_uw = 0; 64 AA64FpDestP3_uw = 0; 65 ''' 66 fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp", 67 { "code": fmovImmDCode, 68 "op_class": "SimdFloatMiscOp" }, []) 69 header_output += FpRegImmOpDeclare.subst(fmovImmDIop); 70 decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop); 71 exec_output += BasicExecute.subst(fmovImmDIop); 72 73 fmovRegSCode = vfp64EnabledCheckCode + ''' 74 AA64FpDestP0_uw = AA64FpOp1P0_uw; 75 AA64FpDestP1_uw = 0; 76 AA64FpDestP2_uw = 0; 77 AA64FpDestP3_uw = 0; 78 ''' 79 fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp", 80 { "code": fmovRegSCode, 81 "op_class": "SimdFloatMiscOp" }, []) 82 header_output += FpRegRegOpDeclare.subst(fmovRegSIop); 83 decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop); 84 exec_output += BasicExecute.subst(fmovRegSIop); 85 86 fmovRegDCode = vfp64EnabledCheckCode + ''' 87 AA64FpDestP0_uw = AA64FpOp1P0_uw; 88 AA64FpDestP1_uw = AA64FpOp1P1_uw; 89 AA64FpDestP2_uw = 0; 90 AA64FpDestP3_uw = 0; 91 ''' 92 fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp", 93 { "code": fmovRegDCode, 94 "op_class": "SimdFloatMiscOp" }, []) 95 header_output += FpRegRegOpDeclare.subst(fmovRegDIop); 96 decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop); 97 exec_output += BasicExecute.subst(fmovRegDIop); 98 99 fmovCoreRegWCode = vfp64EnabledCheckCode + ''' 100 AA64FpDestP0_uw = WOp1_uw; 101 AA64FpDestP1_uw = 0; 102 AA64FpDestP2_uw = 0; 103 AA64FpDestP3_uw = 0; 104 ''' 105 fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp", 106 { "code": fmovCoreRegWCode, 107 "op_class": "SimdFloatMiscOp" }, []) 108 header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop); 109 decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop); 110 exec_output += BasicExecute.subst(fmovCoreRegWIop); 111 112 fmovCoreRegXCode = vfp64EnabledCheckCode + ''' 113 AA64FpDestP0_uw = XOp1_ud; 114 AA64FpDestP1_uw = XOp1_ud >> 32; 115 AA64FpDestP2_uw = 0; 116 AA64FpDestP3_uw = 0; 117 ''' 118 fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp", 119 { "code": fmovCoreRegXCode, 120 "op_class": "SimdFloatMiscOp" }, []) 121 header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop); 122 decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop); 123 exec_output += BasicExecute.subst(fmovCoreRegXIop); 124 125 fmovUCoreRegXCode = vfp64EnabledCheckCode + ''' 126 AA64FpDestP2_uw = XOp1_ud; 127 AA64FpDestP3_uw = XOp1_ud >> 32; 128 ''' 129 fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp", 130 { "code": fmovUCoreRegXCode, 131 "op_class": "SimdFloatMiscOp" }, []) 132 header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop); 133 decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop); 134 exec_output += BasicExecute.subst(fmovUCoreRegXIop); 135 136 fmovRegCoreWCode = vfp64EnabledCheckCode + ''' 137 WDest = AA64FpOp1P0_uw; 138 ''' 139 fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp", 140 { "code": fmovRegCoreWCode, 141 "op_class": "SimdFloatMiscOp" }, []) 142 header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop); 143 decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop); 144 exec_output += BasicExecute.subst(fmovRegCoreWIop); 145 146 fmovRegCoreXCode = vfp64EnabledCheckCode + ''' 147 XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw; 148 ''' 149 fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp", 150 { "code": fmovRegCoreXCode, 151 "op_class": "SimdFloatMiscOp" }, []) 152 header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop); 153 decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop); 154 exec_output += BasicExecute.subst(fmovRegCoreXIop); 155 156 fmovURegCoreXCode = vfp64EnabledCheckCode + ''' 157 XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw; 158 ''' 159 fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp", 160 { "code": fmovURegCoreXCode, 161 "op_class": "SimdFloatMiscOp" }, []) 162 header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop); 163 decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop); 164 exec_output += BasicExecute.subst(fmovURegCoreXIop); 165}}; 166 167let {{ 168 169 header_output = "" 170 decoder_output = "" 171 exec_output = "" 172 173 singleIntConvCode = vfp64EnabledCheckCode + ''' 174 FPSCR fpscr = (FPSCR) FpscrExc; 175 uint32_t cOp1 = AA64FpOp1P0_uw; 176 uint32_t cDest = %(op)s; 177 AA64FpDestP0_uw = cDest; 178 AA64FpDestP1_uw = 0; 179 AA64FpDestP2_uw = 0; 180 AA64FpDestP3_uw = 0; 181 FpscrExc = fpscr; 182 ''' 183 184 singleIntConvCode2 = vfp64EnabledCheckCode + ''' 185 FPSCR fpscr = (FPSCR) FpscrExc; 186 uint32_t cOp1 = AA64FpOp1P0_uw; 187 uint32_t cOp2 = AA64FpOp2P0_uw; 188 uint32_t cDest = %(op)s; 189 AA64FpDestP0_uw = cDest; 190 AA64FpDestP1_uw = 0; 191 AA64FpDestP2_uw = 0; 192 AA64FpDestP3_uw = 0; 193 FpscrExc = fpscr; 194 ''' 195 196 singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \ 197 "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" 198 singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)" 199 200 doubleIntConvCode = vfp64EnabledCheckCode + ''' 201 FPSCR fpscr = (FPSCR) FpscrExc; 202 uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; 203 uint64_t cDest = %(op)s; 204 AA64FpDestP0_uw = cDest & 0xFFFFFFFF; 205 AA64FpDestP1_uw = cDest >> 32; 206 AA64FpDestP2_uw = 0; 207 AA64FpDestP3_uw = 0; 208 FpscrExc = fpscr; 209 ''' 210 211 doubleIntConvCode2 = vfp64EnabledCheckCode + ''' 212 FPSCR fpscr = (FPSCR) FpscrExc; 213 uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; 214 uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw; 215 uint64_t cDest = %(op)s; 216 AA64FpDestP0_uw = cDest & 0xFFFFFFFF; 217 AA64FpDestP1_uw = cDest >> 32; 218 AA64FpDestP2_uw = 0; 219 AA64FpDestP3_uw = 0; 220 FpscrExc = fpscr; 221 ''' 222 223 doubleBinOp = ''' 224 binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), 225 dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw), 226 %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode); 227 ''' 228 doubleUnaryOp = ''' 229 unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s, 230 fpscr.fz, fpscr.rMode) 231 ''' 232 233 def buildTernaryFpOp(name, opClass, sOp, dOp): 234 global header_output, decoder_output, exec_output 235 for isDouble in True, False: 236 code = vfp64EnabledCheckCode + ''' 237 FPSCR fpscr = (FPSCR) FpscrExc; 238 ''' 239 if isDouble: 240 code += ''' 241 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; 242 uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; 243 uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; 244 uint64_t cDest; 245 ''' "cDest = " + dOp + ";" + ''' 246 AA64FpDestP0_uw = cDest; 247 AA64FpDestP1_uw = cDest >> 32; 248 ''' 249 else: 250 code += ''' 251 uint32_t cOp1 = AA64FpOp1P0_uw; 252 uint32_t cOp2 = AA64FpOp2P0_uw; 253 uint32_t cOp3 = AA64FpOp3P0_uw; 254 uint32_t cDest; 255 ''' "cDest = " + sOp + ";" + ''' 256 AA64FpDestP0_uw = cDest; 257 AA64FpDestP1_uw = 0; 258 ''' 259 code += ''' 260 AA64FpDestP2_uw = 0; 261 AA64FpDestP3_uw = 0; 262 FpscrExc = fpscr; 263 ''' 264 265 iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"), 266 "FpRegRegRegRegOp", 267 { "code": code, "op_class": opClass }, []) 268 269 header_output += AA64FpRegRegRegRegOpDeclare.subst(iop) 270 decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop) 271 exec_output += BasicExecute.subst(iop) 272 273 buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp", 274 "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)", 275 "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" ) 276 buildTernaryFpOp("FMSub", "SimdFloatMultAccOp", 277 "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", 278 "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) 279 buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp", 280 "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", 281 "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) 282 buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp", 283 "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)", 284 "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" ) 285 286 def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): 287 global header_output, decoder_output, exec_output 288 289 code = singleIntConvCode2 % { "op": singleOp } 290 sIop = InstObjParams(name, Name + "S", base, 291 { "code": code, 292 "op_class": opClass }, []) 293 294 code = doubleIntConvCode2 % { "op": doubleOp } 295 dIop = InstObjParams(name, Name + "D", base, 296 { "code": code, 297 "op_class": opClass }, []) 298 299 declareTempl = eval( base + "Declare"); 300 constructorTempl = eval("AA64" + base + "Constructor"); 301 302 for iop in sIop, dIop: 303 header_output += declareTempl.subst(iop) 304 decoder_output += constructorTempl.subst(iop) 305 exec_output += BasicExecute.subst(iop) 306 307 buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp", 308 "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)", 309 "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)") 310 buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp", 311 "fplibSub<uint32_t>(cOp1, cOp2, fpscr)", 312 "fplibSub<uint64_t>(cOp1, cOp2, fpscr)") 313 buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp", 314 "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)", 315 "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)") 316 buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp", 317 "fplibMul<uint32_t>(cOp1, cOp2, fpscr)", 318 "fplibMul<uint64_t>(cOp1, cOp2, fpscr)") 319 buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp", 320 "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))", 321 "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))") 322 buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp", 323 "fplibMin<uint32_t>(cOp1, cOp2, fpscr)", 324 "fplibMin<uint64_t>(cOp1, cOp2, fpscr)") 325 buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp", 326 "fplibMax<uint32_t>(cOp1, cOp2, fpscr)", 327 "fplibMax<uint64_t>(cOp1, cOp2, fpscr)") 328 buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp", 329 "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)", 330 "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)") 331 buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp", 332 "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)", 333 "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)") 334 335 def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None): 336 if doubleOp is None: 337 doubleOp = singleOp 338 global header_output, decoder_output, exec_output 339 340 code = singleIntConvCode % { "op": singleOp } 341 sIop = InstObjParams(name, Name + "S", base, 342 { "code": code, 343 "op_class": opClass }, []) 344 code = doubleIntConvCode % { "op": doubleOp } 345 dIop = InstObjParams(name, Name + "D", base, 346 { "code": code, 347 "op_class": opClass }, []) 348 349 declareTempl = eval( base + "Declare"); 350 constructorTempl = eval("AA64" + base + "Constructor"); 351 352 for iop in sIop, dIop: 353 header_output += declareTempl.subst(iop) 354 decoder_output += constructorTempl.subst(iop) 355 exec_output += BasicExecute.subst(iop) 356 357 buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp", 358 "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)") 359 360 def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp, 361 doubleOp = None, isIntConv = True): 362 if doubleOp is None: 363 doubleOp = singleOp 364 global header_output, decoder_output, exec_output 365 366 if isIntConv: 367 sCode = singleIntConvCode 368 dCode = doubleIntConvCode 369 else: 370 sCode = singleCode 371 dCode = doubleCode 372 373 for code, op, suffix in [[sCode, singleOp, "S"], 374 [dCode, doubleOp, "D"]]: 375 iop = InstObjParams(name, Name + suffix, base, 376 { "code": code % { "op": op }, 377 "op_class": opClass }, []) 378 379 declareTempl = eval( base + "Declare"); 380 constructorTempl = eval("AA64" + base + "Constructor"); 381 382 header_output += declareTempl.subst(iop) 383 decoder_output += constructorTempl.subst(iop) 384 exec_output += BasicExecute.subst(iop) 385 386 buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp", 387 "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)") 388 buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp", 389 "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)") 390 buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp", 391 "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)", 392 "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)") 393 buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp", 394 "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)", 395 "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)") 396 buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp", 397 "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)", 398 "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)") 399 buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp", 400 "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)", 401 "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)") 402 buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp", 403 "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)", 404 "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)") 405 buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp", 406 "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)", 407 "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)") 408 buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp", 409 "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)", 410 "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)") 411}}; 412 413let {{ 414 415 header_output = "" 416 decoder_output = "" 417 exec_output = "" 418 419 # Creates the integer to floating point instructions, including variants for 420 # signed/unsigned, float/double, etc 421 for regL, regOpL, width in [["W", "w", 32], 422 ["X", "d", 64]]: 423 for isDouble in True, False: 424 for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)], 425 ["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]: 426 fcvtIntFpDCode = vfp64EnabledCheckCode + ''' 427 FPSCR fpscr = (FPSCR) FpscrExc; 428 %s 429 ''' %(usCode) 430 431 if isDouble: 432 fcvtIntFpDCode += ''' 433 uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0, 434 %s, FPCRRounding(fpscr), fpscr); 435 AA64FpDestP0_uw = cDest; 436 AA64FpDestP1_uw = cDest >> 32; 437 ''' % ("true" if us == "U" else "false") 438 else: 439 fcvtIntFpDCode += ''' 440 uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0, 441 %s, FPCRRounding(fpscr), fpscr); 442 AA64FpDestP0_uw = cDest; 443 AA64FpDestP1_uw = 0; 444 ''' % ("true" if us == "U" else "false") 445 fcvtIntFpDCode += ''' 446 AA64FpDestP2_uw = 0; 447 AA64FpDestP3_uw = 0; 448 FpscrExc = fpscr; 449 ''' 450 451 instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S") 452 mnem = "%scvtf" %(us.lower()) 453 fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp", 454 { "code": fcvtIntFpDCode, 455 "op_class": "SimdFloatCvtOp" }, []) 456 header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop); 457 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop); 458 exec_output += BasicExecute.subst(fcvtIntFpDIop); 459 460 # Generates the floating point to integer conversion instructions in various 461 # variants, eg signed/unsigned 462 def buildFpCvtIntOp(isDouble, isSigned, isXReg): 463 global header_output, decoder_output, exec_output 464 465 for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"], 466 ["P", "FPRounding_POSINF"], 467 ["M", "FPRounding_NEGINF"], 468 ["Z", "FPRounding_ZERO"], 469 ["A", "FPRounding_TIEAWAY"]]: 470 fcvtFpIntCode = vfp64EnabledCheckCode + ''' 471 FPSCR fpscr = (FPSCR) FpscrExc;''' 472 if isDouble: 473 fcvtFpIntCode += ''' 474 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; 475 ''' 476 else: 477 fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" 478 479 fcvtFpIntCode += ''' 480 %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr); 481 FpscrExc = fpscr; 482 ''' %("X" if isXReg else "W", 483 "64" if isDouble else "32", 484 "64" if isXReg else "32", 485 "false" if isSigned else "true", 486 roundingMode) 487 488 instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U", 489 "X" if isXReg else "W", 490 "D" if isDouble else "S", rmode) 491 mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u") 492 fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp", 493 { "code": fcvtFpIntCode, 494 "op_class": "SimdFloatCvtOp" }, []) 495 header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop); 496 decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop); 497 exec_output += BasicExecute.subst(fcvtFpIntIop); 498 499 # Now actually do the building with the different variants 500 for isDouble in True, False: 501 for isSigned in True, False: 502 for isXReg in True, False: 503 buildFpCvtIntOp(isDouble, isSigned, isXReg) 504 505 fcvtFpSFpDCode = vfp64EnabledCheckCode + ''' 506 FPSCR fpscr = (FPSCR) FpscrExc; 507 uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw, 508 FPCRRounding(fpscr), fpscr); 509 AA64FpDestP0_uw = cDest; 510 AA64FpDestP1_uw = cDest >> 32; 511 AA64FpDestP2_uw = 0; 512 AA64FpDestP3_uw = 0; 513 FpscrExc = fpscr; 514 ''' 515 fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp", 516 { "code": fcvtFpSFpDCode, 517 "op_class": "SimdFloatCvtOp" }, []) 518 header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop); 519 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop); 520 exec_output += BasicExecute.subst(fcvtFpSFpDIop); 521 522 fcvtFpDFpSCode = vfp64EnabledCheckCode + ''' 523 FPSCR fpscr = (FPSCR) FpscrExc; 524 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; 525 AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1, 526 FPCRRounding(fpscr), fpscr); 527 AA64FpDestP1_uw = 0; 528 AA64FpDestP2_uw = 0; 529 AA64FpDestP3_uw = 0; 530 FpscrExc = fpscr; 531 ''' 532 fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp", 533 {"code": fcvtFpDFpSCode, 534 "op_class": "SimdFloatCvtOp" }, []) 535 header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop); 536 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop); 537 exec_output += BasicExecute.subst(fcvtFpDFpSIop); 538 539 # Half precision to single or double precision conversion 540 for isDouble in True, False: 541 code = vfp64EnabledCheckCode + ''' 542 FPSCR fpscr = (FPSCR) FpscrExc; 543 %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw, 544 FPCRRounding(fpscr), fpscr); 545 ''' % ("uint64_t" if isDouble else "uint32_t", 546 "64" if isDouble else "32") 547 if isDouble: 548 code += ''' 549 AA64FpDestP0_uw = cDest; 550 AA64FpDestP1_uw = cDest >> 32; 551 ''' 552 else: 553 code += ''' 554 AA64FpDestP0_uw = cDest; 555 AA64FpDestP1_uw = 0; 556 ''' 557 code += ''' 558 AA64FpDestP2_uw = 0; 559 AA64FpDestP3_uw = 0; 560 FpscrExc = fpscr; 561 ''' 562 563 instName = "FcvtFpHFp%s" %("D" if isDouble else "S") 564 fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp", 565 { "code": code, 566 "op_class": "SimdFloatCvtOp" }, []) 567 header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop); 568 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop); 569 exec_output += BasicExecute.subst(fcvtFpHFpIop); 570 571 # single or double precision to Half precision conversion 572 for isDouble in True, False: 573 code = vfp64EnabledCheckCode + ''' 574 FPSCR fpscr = (FPSCR) FpscrExc; 575 %s; 576 AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1, 577 FPCRRounding(fpscr), fpscr); 578 AA64FpDestP1_uw = 0; 579 AA64FpDestP2_uw = 0; 580 AA64FpDestP3_uw = 0; 581 FpscrExc = fpscr; 582 ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" 583 if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw", 584 "64" if isDouble else "32") 585 586 instName = "FcvtFp%sFpH" %("D" if isDouble else "S") 587 fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp", 588 { "code": code, 589 "op_class": "SimdFloatCvtOp" }, []) 590 header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop); 591 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop); 592 exec_output += BasicExecute.subst(fcvtFpFpHIop); 593 594 # Build the various versions of the floating point compare instructions 595 def buildFCmpOp(isQuiet, isDouble, isImm): 596 global header_output, decoder_output, exec_output 597 598 fcmpCode = vfp64EnabledCheckCode + ''' 599 FPSCR fpscr = (FPSCR) FpscrExc; 600 %s cOp1 = %s; 601 ''' % ("uint64_t" if isDouble else "uint32_t", 602 "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32" 603 if isDouble else "AA64FpDestP0_uw") 604 if isImm: 605 fcmpCode += ''' 606 %s cOp2 = imm; 607 ''' % ("uint64_t" if isDouble else "uint32_t") 608 else: 609 fcmpCode += ''' 610 %s cOp2 = %s; 611 ''' % ("uint64_t" if isDouble else "uint32_t", 612 "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" 613 if isDouble else "AA64FpOp1P0_uw") 614 fcmpCode += ''' 615 int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr); 616 CondCodesNZ = cc >> 2 & 3; 617 CondCodesC = cc >> 1 & 1; 618 CondCodesV = cc & 1; 619 FpCondCodes = fpscr & FpCondCodesMask; 620 FpscrExc = fpscr; 621 ''' % ("64" if isDouble else "32", "false" if isQuiet else "true") 622 623 typeName = "Imm" if isImm else "Reg" 624 instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName, 625 "D" if isDouble else "S") 626 fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName, 627 "FpReg%sOp" %(typeName), 628 {"code": fcmpCode, 629 "op_class": "SimdFloatCmpOp"}, []) 630 631 declareTemp = eval("FpReg%sOpDeclare" %(typeName)); 632 constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName)); 633 header_output += declareTemp.subst(fcmpIop); 634 decoder_output += constructorTemp.subst(fcmpIop); 635 exec_output += BasicExecute.subst(fcmpIop); 636 637 for isQuiet in True, False: 638 for isDouble in True, False: 639 for isImm in True, False: 640 buildFCmpOp(isQuiet, isDouble, isImm) 641 642 # Build the various versions of the conditional floating point compare 643 # instructions 644 def buildFCCmpOp(isQuiet, isDouble): 645 global header_output, decoder_output, exec_output 646 647 fccmpCode = vfp64EnabledCheckCode + ''' 648 FPSCR fpscr = (FPSCR) FpscrExc; 649 if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { 650 %s cOp1 = %s; 651 %s cOp2 = %s; 652 int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr); 653 CondCodesNZ = cc >> 2 & 3; 654 CondCodesC = cc >> 1 & 1; 655 CondCodesV = cc & 1; 656 } else { 657 CondCodesNZ = (defCc >> 2) & 0x3; 658 CondCodesC = (defCc >> 1) & 0x1; 659 CondCodesV = defCc & 0x1; 660 } 661 FpCondCodes = fpscr & FpCondCodesMask; 662 FpscrExc = fpscr; 663 ''' % ("uint64_t" if isDouble else "uint32_t", 664 "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" 665 if isDouble else "AA64FpOp1P0_uw", 666 "uint64_t" if isDouble else "uint32_t", 667 "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32" 668 if isDouble else "AA64FpOp2P0_uw", 669 "64" if isDouble else "32", "false" if isQuiet else "true") 670 671 instName = "FCCmp%sReg%s" %("" if isQuiet else "E", 672 "D" if isDouble else "S") 673 fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"), 674 instName, "FpCondCompRegOp", 675 {"code": fccmpCode, 676 "op_class": "SimdFloatCmpOp"}, []) 677 header_output += DataXCondCompRegDeclare.subst(fccmpIop); 678 decoder_output += DataXCondCompRegConstructor.subst(fccmpIop); 679 exec_output += BasicExecute.subst(fccmpIop); 680 681 for isQuiet in True, False: 682 for isDouble in True, False: 683 buildFCCmpOp(isQuiet, isDouble) 684 685}}; 686 687let {{ 688 689 header_output = "" 690 decoder_output = "" 691 exec_output = "" 692 693 # Generates the variants of the floating to fixed point instructions 694 def buildFpCvtFixedOp(isSigned, isDouble, isXReg): 695 global header_output, decoder_output, exec_output 696 697 fcvtFpFixedCode = vfp64EnabledCheckCode + ''' 698 FPSCR fpscr = (FPSCR) FpscrExc; 699 ''' 700 if isDouble: 701 fcvtFpFixedCode += ''' 702 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; 703 ''' 704 else: 705 fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" 706 fcvtFpFixedCode += ''' 707 %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s, 708 FPRounding_ZERO, fpscr); 709 FpscrExc = fpscr; 710 ''' %("X" if isXReg else "W", 711 "64" if isDouble else "32", 712 "64" if isXReg else "32", 713 "false" if isSigned else "true") 714 715 instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U", 716 "D" if isDouble else "S", 717 "X" if isXReg else "W") 718 mnem = "fcvtz%s" %("s" if isSigned else "u") 719 fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp", 720 { "code": fcvtFpFixedCode, 721 "op_class": "SimdFloatCvtOp" }, []) 722 header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop); 723 decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop); 724 exec_output += BasicExecute.subst(fcvtFpFixedIop); 725 726 # Generates the variants of the fixed to floating point instructions 727 def buildFixedCvtFpOp(isSigned, isDouble, isXReg): 728 global header_output, decoder_output, exec_output 729 730 srcRegType = "X" if isXReg else "W" 731 fcvtFixedFpCode = vfp64EnabledCheckCode + ''' 732 FPSCR fpscr = (FPSCR) FpscrExc; 733 %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm, 734 %s, FPCRRounding(fpscr), fpscr); 735 ''' %("uint64_t" if isDouble else "uint32_t", 736 "64" if isDouble else "32", 737 "int" if isSigned else "uint", "64" if isXReg else "32", 738 srcRegType, 739 "false" if isSigned else "true") 740 if isDouble: 741 fcvtFixedFpCode += ''' 742 AA64FpDestP0_uw = result; 743 AA64FpDestP1_uw = result >> 32; 744 ''' 745 else: 746 fcvtFixedFpCode += ''' 747 AA64FpDestP0_uw = result; 748 AA64FpDestP1_uw = 0; 749 ''' 750 fcvtFixedFpCode += ''' 751 AA64FpDestP2_uw = 0; 752 AA64FpDestP3_uw = 0; 753 FpscrExc = fpscr; 754 ''' 755 756 instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U", 757 "D" if isDouble else "S", 758 srcRegType) 759 mnem = "%scvtf" %("s" if isSigned else "u") 760 fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp", 761 { "code": fcvtFixedFpCode, 762 "op_class": "SimdFloatCvtOp" }, []) 763 header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop); 764 decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop); 765 exec_output += BasicExecute.subst(fcvtFixedFpIop); 766 767 # loop over the variants building the instructions for each 768 for isXReg in True, False: 769 for isDouble in True, False: 770 for isSigned in True, False: 771 buildFpCvtFixedOp(isSigned, isDouble, isXReg) 772 buildFixedCvtFpOp(isSigned, isDouble, isXReg) 773}}; 774 775let {{ 776 777 header_output = "" 778 decoder_output = "" 779 exec_output = "" 780 781 for isDouble in True, False: 782 code = ''' 783 if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { 784 AA64FpDestP0_uw = AA64FpOp1P0_uw; 785 ''' 786 if isDouble: 787 code += ''' 788 AA64FpDestP1_uw = AA64FpOp1P1_uw; 789 } else { 790 AA64FpDestP0_uw = AA64FpOp2P0_uw; 791 AA64FpDestP1_uw = AA64FpOp2P1_uw; 792 } 793 ''' 794 else: 795 code += ''' 796 } else { 797 AA64FpDestP0_uw = AA64FpOp2P0_uw; 798 } 799 AA64FpDestP1_uw = 0; 800 ''' 801 code += ''' 802 AA64FpDestP2_uw = 0; 803 AA64FpDestP3_uw = 0; 804 ''' 805 806 iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"), 807 "FpCondSelOp", code) 808 header_output += DataXCondSelDeclare.subst(iop) 809 decoder_output += DataXCondSelConstructor.subst(iop) 810 exec_output += BasicExecute.subst(iop) 811}}; 812