Cross Reference: /gem5/src/arch/arm/insts/vfp.cc

vfp.cc (7434:dd5a09b86b14)	vfp.cc (7639:8c09b7ff5b57)
1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 / 39 40#include "arch/arm/insts/vfp.hh" 41 42/ 43 * The asm statements below are to keep gcc from reordering code. Otherwise 44 * the rounding mode might be set after the operation it was intended for, the 45 * exception bits read before it, etc. 46 / 47 48std::string 49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 50{ 51 std::stringstream ss; 52 printMnemonic(ss); 53 printReg(ss, dest + FP_Base_DepTag); 54 ss << ", "; 55 printReg(ss, op1 + FP_Base_DepTag); 56 return ss.str(); 57} 58 59std::string 60FpRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 61{ 62 std::stringstream ss; 63 printMnemonic(ss); 64 printReg(ss, dest + FP_Base_DepTag); 65 ccprintf(ss, ", #%d", imm); 66 return ss.str(); 67} 68 69std::string 70FpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 71{ 72 std::stringstream ss; 73 printMnemonic(ss); 74 printReg(ss, dest + FP_Base_DepTag); 75 ss << ", "; 76 printReg(ss, op1 + FP_Base_DepTag); 77 ccprintf(ss, ", #%d", imm); 78 return ss.str(); 79} 80 81std::string 82FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 83{ 84 std::stringstream ss; 85 printMnemonic(ss); 86 printReg(ss, dest + FP_Base_DepTag); 87 ss << ", "; 88 printReg(ss, op1 + FP_Base_DepTag); 89 ss << ", "; 90 printReg(ss, op2 + FP_Base_DepTag); 91 return ss.str(); 92} 93	1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 / 39 40#include "arch/arm/insts/vfp.hh" 41 42/ 43 * The asm statements below are to keep gcc from reordering code. Otherwise 44 * the rounding mode might be set after the operation it was intended for, the 45 * exception bits read before it, etc. 46 / 47 48std::string 49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 50{ 51 std::stringstream ss; 52 printMnemonic(ss); 53 printReg(ss, dest + FP_Base_DepTag); 54 ss << ", "; 55 printReg(ss, op1 + FP_Base_DepTag); 56 return ss.str(); 57} 58 59std::string 60FpRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 61{ 62 std::stringstream ss; 63 printMnemonic(ss); 64 printReg(ss, dest + FP_Base_DepTag); 65 ccprintf(ss, ", #%d", imm); 66 return ss.str(); 67} 68 69std::string 70FpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 71{ 72 std::stringstream ss; 73 printMnemonic(ss); 74 printReg(ss, dest + FP_Base_DepTag); 75 ss << ", "; 76 printReg(ss, op1 + FP_Base_DepTag); 77 ccprintf(ss, ", #%d", imm); 78 return ss.str(); 79} 80 81std::string 82FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 83{ 84 std::stringstream ss; 85 printMnemonic(ss); 86 printReg(ss, dest + FP_Base_DepTag); 87 ss << ", "; 88 printReg(ss, op1 + FP_Base_DepTag); 89 ss << ", "; 90 printReg(ss, op2 + FP_Base_DepTag); 91 return ss.str(); 92} 93
	94std::string 95FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 96{ 97 std::stringstream ss; 98 printMnemonic(ss); 99 printReg(ss, dest + FP_Base_DepTag); 100* ss << ", "; 101 printReg(ss, op1 + FP_Base_DepTag); 102 ss << ", "; 103 printReg(ss, op2 + FP_Base_DepTag); 104 ccprintf(ss, ", #%d", imm); 105 return ss.str(); 106} 107
94namespace ArmISA 95{ 96 97VfpSavedState 98prepFpState(uint32_t rMode) 99{ 100 int roundingMode = fegetround(); 101 feclearexcept(FeAllExceptions); 102 switch (rMode) { 103 case VfpRoundNearest: 104 fesetround(FeRoundNearest); 105 break; 106 case VfpRoundUpward: 107 fesetround(FeRoundUpward); 108 break; 109 case VfpRoundDown: 110 fesetround(FeRoundDown); 111 break; 112 case VfpRoundZero: 113 fesetround(FeRoundZero); 114 break; 115 } 116 return roundingMode; 117} 118 119void	108namespace ArmISA 109{ 110 111VfpSavedState 112prepFpState(uint32_t rMode) 113{ 114 int roundingMode = fegetround(); 115 feclearexcept(FeAllExceptions); 116 switch (rMode) { 117 case VfpRoundNearest: 118 fesetround(FeRoundNearest); 119 break; 120 case VfpRoundUpward: 121 fesetround(FeRoundUpward); 122 break; 123 case VfpRoundDown: 124 fesetround(FeRoundDown); 125 break; 126 case VfpRoundZero: 127 fesetround(FeRoundZero); 128 break; 129 } 130 return roundingMode; 131} 132 133void
120finishVfp(FPSCR &fpscr, VfpSavedState state)	134finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)
121{ 122 int exceptions = fetestexcept(FeAllExceptions); 123 bool underflow = false; 124 if (exceptions & FeInvalid) { 125 fpscr.ioc = 1; 126 } 127 if (exceptions & FeDivByZero) { 128 fpscr.dzc = 1; 129 } 130 if (exceptions & FeOverflow) { 131 fpscr.ofc = 1; 132 } 133 if (exceptions & FeUnderflow) { 134 underflow = true; 135 fpscr.ufc = 1; 136 }	135{ 136 int exceptions = fetestexcept(FeAllExceptions); 137 bool underflow = false; 138 if (exceptions & FeInvalid) { 139 fpscr.ioc = 1; 140 } 141 if (exceptions & FeDivByZero) { 142 fpscr.dzc = 1; 143 } 144 if (exceptions & FeOverflow) { 145 fpscr.ofc = 1; 146 } 147 if (exceptions & FeUnderflow) { 148 underflow = true; 149 fpscr.ufc = 1; 150 }
137 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {	151 if ((exceptions & FeInexact) && !(underflow && flush)) {
138 fpscr.ixc = 1; 139 } 140 fesetround(state); 141} 142 143template <class fpType> 144fpType	152 fpscr.ixc = 1; 153 } 154 fesetround(state); 155} 156 157template <class fpType> 158fpType
145fixDest(FPSCR fpscr, fpType val, fpType op1)	159fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
146{ 147 int fpClass = std::fpclassify(val); 148 fpType junk = 0.0; 149 if (fpClass == FP_NAN) { 150 const bool single = (sizeof(val) == sizeof(float)); 151 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 152 const bool nan = std::isnan(op1);	160{ 161 int fpClass = std::fpclassify(val); 162 fpType junk = 0.0; 163 if (fpClass == FP_NAN) { 164 const bool single = (sizeof(val) == sizeof(float)); 165 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 166 const bool nan = std::isnan(op1);
153 if (!nan \|\| (fpscr.dn == 1)) {	167 if (!nan \|\| defaultNan) {
154 val = bitsToFp(qnan, junk); 155 } else if (nan) { 156 val = bitsToFp(fpToBits(op1) \| qnan, junk); 157 }	168 val = bitsToFp(qnan, junk); 169 } else if (nan) { 170 val = bitsToFp(fpToBits(op1) \| qnan, junk); 171 }
158 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {	172 } else if (fpClass == FP_SUBNORMAL && flush == 1) {
159 // Turn val into a zero with the correct sign; 160 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 161 val = bitsToFp(fpToBits(val) & bitMask, junk); 162 feclearexcept(FeInexact); 163 feraiseexcept(FeUnderflow); 164 } 165 return val; 166} 167 168template	173 // Turn val into a zero with the correct sign; 174 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 175 val = bitsToFp(fpToBits(val) & bitMask, junk); 176 feclearexcept(FeInexact); 177 feraiseexcept(FeUnderflow); 178 } 179 return val; 180} 181 182template
169float fixDest<float>(FPSCR fpscr, float val, float op1);	183float fixDest<float>(bool flush, bool defaultNan, float val, float op1);
170template	184template
171double fixDest<double>(FPSCR fpscr, double val, double op1);	185double fixDest<double>(bool flush, bool defaultNan, double val, double op1);
172 173template <class fpType> 174fpType	186 187template <class fpType> 188fpType
175fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)	189fixDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
176{ 177 int fpClass = std::fpclassify(val); 178 fpType junk = 0.0; 179 if (fpClass == FP_NAN) { 180 const bool single = (sizeof(val) == sizeof(float)); 181 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 182 const bool nan1 = std::isnan(op1); 183 const bool nan2 = std::isnan(op2); 184 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 185 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);	190{ 191 int fpClass = std::fpclassify(val); 192 fpType junk = 0.0; 193 if (fpClass == FP_NAN) { 194 const bool single = (sizeof(val) == sizeof(float)); 195 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 196 const bool nan1 = std::isnan(op1); 197 const bool nan2 = std::isnan(op2); 198 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 199 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
186 if ((!nan1 && !nan2) \|\| (fpscr.dn == 1)) {	200 if ((!nan1 && !nan2) \|\| defaultNan) {
187 val = bitsToFp(qnan, junk); 188 } else if (signal1) { 189 val = bitsToFp(fpToBits(op1) \| qnan, junk); 190 } else if (signal2) { 191 val = bitsToFp(fpToBits(op2) \| qnan, junk); 192 } else if (nan1) { 193 val = op1; 194 } else if (nan2) { 195 val = op2; 196 }	201 val = bitsToFp(qnan, junk); 202 } else if (signal1) { 203 val = bitsToFp(fpToBits(op1) \| qnan, junk); 204 } else if (signal2) { 205 val = bitsToFp(fpToBits(op2) \| qnan, junk); 206 } else if (nan1) { 207 val = op1; 208 } else if (nan2) { 209 val = op2; 210 }
197 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {	211 } else if (fpClass == FP_SUBNORMAL && flush) {
198 // Turn val into a zero with the correct sign; 199 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 200 val = bitsToFp(fpToBits(val) & bitMask, junk); 201 feclearexcept(FeInexact); 202 feraiseexcept(FeUnderflow); 203 } 204 return val; 205} 206 207template	212 // Turn val into a zero with the correct sign; 213 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 214 val = bitsToFp(fpToBits(val) & bitMask, junk); 215 feclearexcept(FeInexact); 216 feraiseexcept(FeUnderflow); 217 } 218 return val; 219} 220 221template
208float fixDest<float>(FPSCR fpscr, float val, float op1, float op2);	222float fixDest<float>(bool flush, bool defaultNan, 223 float val, float op1, float op2);
209template	224template
210double fixDest<double>(FPSCR fpscr, double val, double op1, double op2);	225double fixDest<double>(bool flush, bool defaultNan, 226 double val, double op1, double op2);
211 212template <class fpType> 213fpType	227 228template <class fpType> 229fpType
214fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)	230fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
215{	231{
216 fpType mid = fixDest(fpscr, val, op1, op2);	232 fpType mid = fixDest(flush, defaultNan, val, op1, op2);
217 const bool single = (sizeof(fpType) == sizeof(float)); 218 const fpType junk = 0.0; 219 if ((single && (val == bitsToFp(0x00800000, junk) \|\| 220 val == bitsToFp(0x80800000, junk))) \|\| 221 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) \|\| 222 val == bitsToFp(ULL(0x8010000000000000), junk))) 223 ) { 224 __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 225 fesetround(FeRoundZero); 226 fpType temp = 0.0; 227 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 228 temp = op1 / op2; 229 if (flushToZero(temp)) { 230 feraiseexcept(FeUnderflow);	233 const bool single = (sizeof(fpType) == sizeof(float)); 234 const fpType junk = 0.0; 235 if ((single && (val == bitsToFp(0x00800000, junk) \|\| 236 val == bitsToFp(0x80800000, junk))) \|\| 237 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) \|\| 238 val == bitsToFp(ULL(0x8010000000000000), junk))) 239 ) { 240 __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 241 fesetround(FeRoundZero); 242 fpType temp = 0.0; 243 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 244 temp = op1 / op2; 245 if (flushToZero(temp)) { 246 feraiseexcept(FeUnderflow);
231 if (fpscr.fz) {	247 if (flush) {
232 feclearexcept(FeInexact); 233 mid = temp; 234 } 235 } 236 __asm__ __volatile__("" :: "m" (temp)); 237 } 238 return mid; 239} 240 241template	248 feclearexcept(FeInexact); 249 mid = temp; 250 } 251 } 252 __asm__ __volatile__("" :: "m" (temp)); 253 } 254 return mid; 255} 256 257template
242float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);	258float fixDivDest<float>(bool flush, bool defaultNan, 259 float val, float op1, float op2);
243template	260template
244double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);	261double fixDivDest<double>(bool flush, bool defaultNan, 262 double val, double op1, double op2);
245 246float 247fixFpDFpSDest(FPSCR fpscr, double val) 248{ 249 const float junk = 0.0; 250 float op1 = 0.0; 251 if (std::isnan(val)) { 252 uint64_t valBits = fpToBits(val); 253 uint32_t op1Bits = bits(valBits, 50, 29) \| 254 (mask(9) << 22) \| 255 (bits(valBits, 63) << 31); 256 op1 = bitsToFp(op1Bits, junk); 257 }	263 264float 265fixFpDFpSDest(FPSCR fpscr, double val) 266{ 267 const float junk = 0.0; 268 float op1 = 0.0; 269 if (std::isnan(val)) { 270 uint64_t valBits = fpToBits(val); 271 uint32_t op1Bits = bits(valBits, 50, 29) \| 272 (mask(9) << 22) \| 273 (bits(valBits, 63) << 31); 274 op1 = bitsToFp(op1Bits, junk); 275 }
258 float mid = fixDest(fpscr, (float)val, op1);	276 float mid = fixDest(fpscr.fz, fpscr.dn, (float)val, op1);
259 if (fpscr.fz && fetestexcept(FeUnderflow \| FeInexact) == 260 (FeUnderflow \| FeInexact)) { 261 feclearexcept(FeInexact); 262 } 263 if (mid == bitsToFp(0x00800000, junk) \|\| 264 mid == bitsToFp(0x80800000, junk)) { 265 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 266 fesetround(FeRoundZero); 267 float temp = 0.0; 268 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 269 temp = val; 270 if (flushToZero(temp)) { 271 feraiseexcept(FeUnderflow); 272 if (fpscr.fz) { 273 feclearexcept(FeInexact); 274 mid = temp; 275 } 276 } 277 __asm__ __volatile__("" :: "m" (temp)); 278 } 279 return mid; 280} 281 282double 283fixFpSFpDDest(FPSCR fpscr, float val) 284{ 285 const double junk = 0.0; 286 double op1 = 0.0; 287 if (std::isnan(val)) { 288 uint32_t valBits = fpToBits(val); 289 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) \| 290 (mask(12) << 51) \| 291 ((uint64_t)bits(valBits, 31) << 63); 292 op1 = bitsToFp(op1Bits, junk); 293 }	277 if (fpscr.fz && fetestexcept(FeUnderflow \| FeInexact) == 278 (FeUnderflow \| FeInexact)) { 279 feclearexcept(FeInexact); 280 } 281 if (mid == bitsToFp(0x00800000, junk) \|\| 282 mid == bitsToFp(0x80800000, junk)) { 283 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 284 fesetround(FeRoundZero); 285 float temp = 0.0; 286 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 287 temp = val; 288 if (flushToZero(temp)) { 289 feraiseexcept(FeUnderflow); 290 if (fpscr.fz) { 291 feclearexcept(FeInexact); 292 mid = temp; 293 } 294 } 295 __asm__ __volatile__("" :: "m" (temp)); 296 } 297 return mid; 298} 299 300double 301fixFpSFpDDest(FPSCR fpscr, float val) 302{ 303 const double junk = 0.0; 304 double op1 = 0.0; 305 if (std::isnan(val)) { 306 uint32_t valBits = fpToBits(val); 307 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) \| 308 (mask(12) << 51) \| 309 ((uint64_t)bits(valBits, 31) << 63); 310 op1 = bitsToFp(op1Bits, junk); 311 }
294 double mid = fixDest(fpscr, (double)val, op1);	312 double mid = fixDest(fpscr.fz, fpscr.dn, (double)val, op1);
295 if (mid == bitsToFp(ULL(0x0010000000000000), junk) \|\| 296 mid == bitsToFp(ULL(0x8010000000000000), junk)) { 297 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 298 fesetround(FeRoundZero); 299 double temp = 0.0; 300 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 301 temp = val; 302 if (flushToZero(temp)) { 303 feraiseexcept(FeUnderflow); 304 if (fpscr.fz) { 305 feclearexcept(FeInexact); 306 mid = temp; 307 } 308 } 309 __asm__ __volatile__("" :: "m" (temp)); 310 } 311 return mid; 312} 313	313 if (mid == bitsToFp(ULL(0x0010000000000000), junk) \|\| 314 mid == bitsToFp(ULL(0x8010000000000000), junk)) { 315 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 316 fesetround(FeRoundZero); 317 double temp = 0.0; 318 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 319 temp = val; 320 if (flushToZero(temp)) { 321 feraiseexcept(FeUnderflow); 322 if (fpscr.fz) { 323 feclearexcept(FeInexact); 324 mid = temp; 325 } 326 } 327 __asm__ __volatile__("" :: "m" (temp)); 328 } 329 return mid; 330} 331
314float 315vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)	332uint16_t 333vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, 334 uint32_t rMode, bool ahp, float op)
316{	335{
317 float junk = 0.0; 318 uint32_t destBits = fpToBits(dest);
319 uint32_t opBits = fpToBits(op); 320 // Extract the operand. 321 bool neg = bits(opBits, 31); 322 uint32_t exponent = bits(opBits, 30, 23); 323 uint32_t oldMantissa = bits(opBits, 22, 0); 324 uint32_t mantissa = oldMantissa >> (23 - 10); 325 // Do the conversion. 326 uint32_t extra = oldMantissa & mask(23 - 10); 327 if (exponent == 0xff) { 328 if (oldMantissa != 0) { 329 // Nans. 330 if (bits(mantissa, 9) == 0) { 331 // Signalling nan. 332 fpscr.ioc = 1; 333 }	336 uint32_t opBits = fpToBits(op); 337 // Extract the operand. 338 bool neg = bits(opBits, 31); 339 uint32_t exponent = bits(opBits, 30, 23); 340 uint32_t oldMantissa = bits(opBits, 22, 0); 341 uint32_t mantissa = oldMantissa >> (23 - 10); 342 // Do the conversion. 343 uint32_t extra = oldMantissa & mask(23 - 10); 344 if (exponent == 0xff) { 345 if (oldMantissa != 0) { 346 // Nans. 347 if (bits(mantissa, 9) == 0) { 348 // Signalling nan. 349 fpscr.ioc = 1; 350 }
334 if (fpscr.ahp) {	351 if (ahp) {
335 mantissa = 0; 336 exponent = 0; 337 fpscr.ioc = 1;	352 mantissa = 0; 353 exponent = 0; 354 fpscr.ioc = 1;
338 } else if (fpscr.dn) {	355 } else if (defaultNan) {
339 mantissa = (1 << 9); 340 exponent = 0x1f; 341 neg = false; 342 } else { 343 exponent = 0x1f; 344 mantissa \|= (1 << 9); 345 } 346 } else { 347 // Infinities. 348 exponent = 0x1F;	356 mantissa = (1 << 9); 357 exponent = 0x1f; 358 neg = false; 359 } else { 360 exponent = 0x1f; 361 mantissa \|= (1 << 9); 362 } 363 } else { 364 // Infinities. 365 exponent = 0x1F;
349 if (fpscr.ahp) {	366 if (ahp) {
350 fpscr.ioc = 1; 351 mantissa = 0x3ff; 352 } else { 353 mantissa = 0; 354 } 355 } 356 } else if (exponent == 0 && oldMantissa == 0) { 357 // Zero, don't need to do anything. 358 } else { 359 // Normalized or denormalized numbers. 360 361 bool inexact = (extra != 0); 362 363 if (exponent == 0) { 364 // Denormalized. 365 366 // If flush to zero is on, this shouldn't happen.	367 fpscr.ioc = 1; 368 mantissa = 0x3ff; 369 } else { 370 mantissa = 0; 371 } 372 } 373 } else if (exponent == 0 && oldMantissa == 0) { 374 // Zero, don't need to do anything. 375 } else { 376 // Normalized or denormalized numbers. 377 378 bool inexact = (extra != 0); 379 380 if (exponent == 0) { 381 // Denormalized. 382 383 // If flush to zero is on, this shouldn't happen.
367 assert(fpscr.fz == 0);	384 assert(!flush);
368 369 // Check for underflow 370 if (inexact \|\| fpscr.ufe) 371 fpscr.ufc = 1; 372 373 // Handle rounding.	385 386 // Check for underflow 387 if (inexact \|\| fpscr.ufe) 388 fpscr.ufc = 1; 389 390 // Handle rounding.
374 unsigned mode = fpscr.rMode;	391 unsigned mode = rMode;
375 if ((mode == VfpRoundUpward && !neg && extra) \|\| 376 (mode == VfpRoundDown && neg && extra) \|\| 377 (mode == VfpRoundNearest && 378 (extra > (1 << 9) \|\| 379 (extra == (1 << 9) && bits(mantissa, 0))))) { 380 mantissa++; 381 } 382 383 // See if the number became normalized after rounding. 384 if (mantissa == (1 << 10)) { 385 mantissa = 0; 386 exponent = 1; 387 } 388 } else { 389 // Normalized. 390 391 // We need to track the dropped bits differently since 392 // more can be dropped by denormalizing. 393 bool topOne = bits(extra, 12); 394 bool restZeros = bits(extra, 11, 0) == 0; 395 396 if (exponent <= (127 - 15)) { 397 // The result is too small. Denormalize. 398 mantissa \|= (1 << 10); 399 while (mantissa && exponent <= (127 - 15)) { 400 restZeros = restZeros && !topOne; 401 topOne = bits(mantissa, 0); 402 mantissa = mantissa >> 1; 403 exponent++; 404 } 405 if (topOne \|\| !restZeros) 406 inexact = true; 407 exponent = 0; 408 } else { 409 // Change bias. 410 exponent -= (127 - 15); 411 } 412 413 if (exponent == 0 && (inexact \|\| fpscr.ufe)) { 414 // Underflow 415 fpscr.ufc = 1; 416 } 417 418 // Handle rounding.	392 if ((mode == VfpRoundUpward && !neg && extra) \|\| 393 (mode == VfpRoundDown && neg && extra) \|\| 394 (mode == VfpRoundNearest && 395 (extra > (1 << 9) \|\| 396 (extra == (1 << 9) && bits(mantissa, 0))))) { 397 mantissa++; 398 } 399 400 // See if the number became normalized after rounding. 401 if (mantissa == (1 << 10)) { 402 mantissa = 0; 403 exponent = 1; 404 } 405 } else { 406 // Normalized. 407 408 // We need to track the dropped bits differently since 409 // more can be dropped by denormalizing. 410 bool topOne = bits(extra, 12); 411 bool restZeros = bits(extra, 11, 0) == 0; 412 413 if (exponent <= (127 - 15)) { 414 // The result is too small. Denormalize. 415 mantissa \|= (1 << 10); 416 while (mantissa && exponent <= (127 - 15)) { 417 restZeros = restZeros && !topOne; 418 topOne = bits(mantissa, 0); 419 mantissa = mantissa >> 1; 420 exponent++; 421 } 422 if (topOne \|\| !restZeros) 423 inexact = true; 424 exponent = 0; 425 } else { 426 // Change bias. 427 exponent -= (127 - 15); 428 } 429 430 if (exponent == 0 && (inexact \|\| fpscr.ufe)) { 431 // Underflow 432 fpscr.ufc = 1; 433 } 434 435 // Handle rounding.
419 unsigned mode = fpscr.rMode;	436 unsigned mode = rMode;
420 bool nonZero = topOne \|\| !restZeros; 421 if ((mode == VfpRoundUpward && !neg && nonZero) \|\| 422 (mode == VfpRoundDown && neg && nonZero) \|\| 423 (mode == VfpRoundNearest && topOne && 424 (!restZeros \|\| bits(mantissa, 0)))) { 425 mantissa++; 426 } 427 428 // See if we rounded up and need to bump the exponent. 429 if (mantissa == (1 << 10)) { 430 mantissa = 0; 431 exponent++; 432 } 433 434 // Deal with overflow	437 bool nonZero = topOne \|\| !restZeros; 438 if ((mode == VfpRoundUpward && !neg && nonZero) \|\| 439 (mode == VfpRoundDown && neg && nonZero) \|\| 440 (mode == VfpRoundNearest && topOne && 441 (!restZeros \|\| bits(mantissa, 0)))) { 442 mantissa++; 443 } 444 445 // See if we rounded up and need to bump the exponent. 446 if (mantissa == (1 << 10)) { 447 mantissa = 0; 448 exponent++; 449 } 450 451 // Deal with overflow
435 if (fpscr.ahp) {	452 if (ahp) {
436 if (exponent >= 0x20) { 437 exponent = 0x1f; 438 mantissa = 0x3ff; 439 fpscr.ioc = 1; 440 // Supress inexact exception. 441 inexact = false; 442 } 443 } else { 444 if (exponent >= 0x1f) { 445 if ((mode == VfpRoundNearest) \|\| 446 (mode == VfpRoundUpward && !neg) \|\| 447 (mode == VfpRoundDown && neg)) { 448 // Overflow to infinity. 449 exponent = 0x1f; 450 mantissa = 0; 451 } else { 452 // Overflow to max normal. 453 exponent = 0x1e; 454 mantissa = 0x3ff; 455 } 456 fpscr.ofc = 1; 457 inexact = true; 458 } 459 } 460 } 461 462 if (inexact) { 463 fpscr.ixc = 1; 464 } 465 } 466 // Reassemble and install the result. 467 uint32_t result = bits(mantissa, 9, 0); 468 replaceBits(result, 14, 10, exponent); 469 if (neg) 470 result \|= (1 << 15);	453 if (exponent >= 0x20) { 454 exponent = 0x1f; 455 mantissa = 0x3ff; 456 fpscr.ioc = 1; 457 // Supress inexact exception. 458 inexact = false; 459 } 460 } else { 461 if (exponent >= 0x1f) { 462 if ((mode == VfpRoundNearest) \|\| 463 (mode == VfpRoundUpward && !neg) \|\| 464 (mode == VfpRoundDown && neg)) { 465 // Overflow to infinity. 466 exponent = 0x1f; 467 mantissa = 0; 468 } else { 469 // Overflow to max normal. 470 exponent = 0x1e; 471 mantissa = 0x3ff; 472 } 473 fpscr.ofc = 1; 474 inexact = true; 475 } 476 } 477 } 478 479 if (inexact) { 480 fpscr.ixc = 1; 481 } 482 } 483 // Reassemble and install the result. 484 uint32_t result = bits(mantissa, 9, 0); 485 replaceBits(result, 14, 10, exponent); 486 if (neg) 487 result \|= (1 << 15);
471 if (top) 472 replaceBits(destBits, 31, 16, result); 473 else 474 replaceBits(destBits, 15, 0, result); 475 return bitsToFp(destBits, junk);	488 return result;
476} 477 478float	489} 490 491float
479vcvtFpHFpS(FPSCR &fpscr, float op, bool top)	492vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
480{ 481 float junk = 0.0;	493{ 494 float junk = 0.0;
482 uint32_t opBits = fpToBits(op); 483 // Extract the operand. 484 if (top) 485 opBits = bits(opBits, 31, 16); 486 else 487 opBits = bits(opBits, 15, 0);
488 // Extract the bitfields.	495 // Extract the bitfields.
489 bool neg = bits(opBits, 15); 490 uint32_t exponent = bits(opBits, 14, 10); 491 uint32_t mantissa = bits(opBits, 9, 0);	496 bool neg = bits(op, 15); 497 uint32_t exponent = bits(op, 14, 10); 498 uint32_t mantissa = bits(op, 9, 0);
492 // Do the conversion. 493 if (exponent == 0) { 494 if (mantissa != 0) { 495 // Normalize the value. 496 exponent = exponent + (127 - 15) + 1; 497 while (mantissa < (1 << 10)) { 498 mantissa = mantissa << 1; 499 exponent--; 500 } 501 } 502 mantissa = mantissa << (23 - 10);	499 // Do the conversion. 500 if (exponent == 0) { 501 if (mantissa != 0) { 502 // Normalize the value. 503 exponent = exponent + (127 - 15) + 1; 504 while (mantissa < (1 << 10)) { 505 mantissa = mantissa << 1; 506 exponent--; 507 } 508 } 509 mantissa = mantissa << (23 - 10);
503 } else if (exponent == 0x1f && !fpscr.ahp) {	510 } else if (exponent == 0x1f && !ahp) {
504 // Infinities and nans. 505 exponent = 0xff; 506 if (mantissa != 0) { 507 // Nans. 508 mantissa = mantissa << (23 - 10); 509 if (bits(mantissa, 22) == 0) { 510 // Signalling nan. 511 fpscr.ioc = 1; 512 mantissa \|= (1 << 22); 513 }	511 // Infinities and nans. 512 exponent = 0xff; 513 if (mantissa != 0) { 514 // Nans. 515 mantissa = mantissa << (23 - 10); 516 if (bits(mantissa, 22) == 0) { 517 // Signalling nan. 518 fpscr.ioc = 1; 519 mantissa \|= (1 << 22); 520 }
514 if (fpscr.dn) {	521 if (defaultNan) {
515 mantissa &= ~mask(22); 516 neg = false; 517 } 518 } 519 } else { 520 exponent = exponent + (127 - 15); 521 mantissa = mantissa << (23 - 10); 522 } 523 // Reassemble the result. 524 uint32_t result = bits(mantissa, 22, 0); 525 replaceBits(result, 30, 23, exponent); 526 if (neg) 527 result \|= (1 << 31); 528 return bitsToFp(result, junk); 529} 530 531uint64_t 532vfpFpSToFixed(float val, bool isSigned, bool half, 533 uint8_t imm, bool rzero) 534{ 535 int rmode = rzero ? FeRoundZero : fegetround(); 536 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 537 fesetround(FeRoundNearest); 538 val = val * powf(2.0, imm); 539 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 540 fesetround(rmode); 541 feclearexcept(FeAllExceptions); 542 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 543 float origVal = val; 544 val = rintf(val); 545 int fpType = std::fpclassify(val); 546 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 547 if (fpType == FP_NAN) { 548 feraiseexcept(FeInvalid); 549 } 550 val = 0.0; 551 } else if (origVal != val) { 552 switch (rmode) { 553 case FeRoundNearest: 554 if (origVal - val > 0.5) 555 val += 1.0; 556 else if (val - origVal > 0.5) 557 val -= 1.0; 558 break; 559 case FeRoundDown: 560 if (origVal < val) 561 val -= 1.0; 562 break; 563 case FeRoundUpward: 564 if (origVal > val) 565 val += 1.0; 566 break; 567 } 568 feraiseexcept(FeInexact); 569 } 570 571 if (isSigned) { 572 if (half) { 573 if ((double)val < (int16_t)(1 << 15)) { 574 feraiseexcept(FeInvalid); 575 feclearexcept(FeInexact); 576 return (int16_t)(1 << 15); 577 } 578 if ((double)val > (int16_t)mask(15)) { 579 feraiseexcept(FeInvalid); 580 feclearexcept(FeInexact); 581 return (int16_t)mask(15); 582 } 583 return (int16_t)val; 584 } else { 585 if ((double)val < (int32_t)(1 << 31)) { 586 feraiseexcept(FeInvalid); 587 feclearexcept(FeInexact); 588 return (int32_t)(1 << 31); 589 } 590 if ((double)val > (int32_t)mask(31)) { 591 feraiseexcept(FeInvalid); 592 feclearexcept(FeInexact); 593 return (int32_t)mask(31); 594 } 595 return (int32_t)val; 596 } 597 } else { 598 if (half) { 599 if ((double)val < 0) { 600 feraiseexcept(FeInvalid); 601 feclearexcept(FeInexact); 602 return 0; 603 } 604 if ((double)val > (mask(16))) { 605 feraiseexcept(FeInvalid); 606 feclearexcept(FeInexact); 607 return mask(16); 608 } 609 return (uint16_t)val; 610 } else { 611 if ((double)val < 0) { 612 feraiseexcept(FeInvalid); 613 feclearexcept(FeInexact); 614 return 0; 615 } 616 if ((double)val > (mask(32))) { 617 feraiseexcept(FeInvalid); 618 feclearexcept(FeInexact); 619 return mask(32); 620 } 621 return (uint32_t)val; 622 } 623 } 624} 625 626float	522 mantissa &= ~mask(22); 523 neg = false; 524 } 525 } 526 } else { 527 exponent = exponent + (127 - 15); 528 mantissa = mantissa << (23 - 10); 529 } 530 // Reassemble the result. 531 uint32_t result = bits(mantissa, 22, 0); 532 replaceBits(result, 30, 23, exponent); 533 if (neg) 534 result \|= (1 << 31); 535 return bitsToFp(result, junk); 536} 537 538uint64_t 539vfpFpSToFixed(float val, bool isSigned, bool half, 540 uint8_t imm, bool rzero) 541{ 542 int rmode = rzero ? FeRoundZero : fegetround(); 543 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 544 fesetround(FeRoundNearest); 545 val = val * powf(2.0, imm); 546 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 547 fesetround(rmode); 548 feclearexcept(FeAllExceptions); 549 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 550 float origVal = val; 551 val = rintf(val); 552 int fpType = std::fpclassify(val); 553 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 554 if (fpType == FP_NAN) { 555 feraiseexcept(FeInvalid); 556 } 557 val = 0.0; 558 } else if (origVal != val) { 559 switch (rmode) { 560 case FeRoundNearest: 561 if (origVal - val > 0.5) 562 val += 1.0; 563 else if (val - origVal > 0.5) 564 val -= 1.0; 565 break; 566 case FeRoundDown: 567 if (origVal < val) 568 val -= 1.0; 569 break; 570 case FeRoundUpward: 571 if (origVal > val) 572 val += 1.0; 573 break; 574 } 575 feraiseexcept(FeInexact); 576 } 577 578 if (isSigned) { 579 if (half) { 580 if ((double)val < (int16_t)(1 << 15)) { 581 feraiseexcept(FeInvalid); 582 feclearexcept(FeInexact); 583 return (int16_t)(1 << 15); 584 } 585 if ((double)val > (int16_t)mask(15)) { 586 feraiseexcept(FeInvalid); 587 feclearexcept(FeInexact); 588 return (int16_t)mask(15); 589 } 590 return (int16_t)val; 591 } else { 592 if ((double)val < (int32_t)(1 << 31)) { 593 feraiseexcept(FeInvalid); 594 feclearexcept(FeInexact); 595 return (int32_t)(1 << 31); 596 } 597 if ((double)val > (int32_t)mask(31)) { 598 feraiseexcept(FeInvalid); 599 feclearexcept(FeInexact); 600 return (int32_t)mask(31); 601 } 602 return (int32_t)val; 603 } 604 } else { 605 if (half) { 606 if ((double)val < 0) { 607 feraiseexcept(FeInvalid); 608 feclearexcept(FeInexact); 609 return 0; 610 } 611 if ((double)val > (mask(16))) { 612 feraiseexcept(FeInvalid); 613 feclearexcept(FeInexact); 614 return mask(16); 615 } 616 return (uint16_t)val; 617 } else { 618 if ((double)val < 0) { 619 feraiseexcept(FeInvalid); 620 feclearexcept(FeInexact); 621 return 0; 622 } 623 if ((double)val > (mask(32))) { 624 feraiseexcept(FeInvalid); 625 feclearexcept(FeInexact); 626 return mask(32); 627 } 628 return (uint32_t)val; 629 } 630 } 631} 632 633float
627vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)	634vfpUFixedToFpS(bool flush, bool defaultNan, 635 uint32_t val, bool half, uint8_t imm)
628{ 629 fesetround(FeRoundNearest); 630 if (half) 631 val = (uint16_t)val; 632 float scale = powf(2.0, imm); 633 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 634 feclearexcept(FeAllExceptions); 635 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));	636{ 637 fesetround(FeRoundNearest); 638 if (half) 639 val = (uint16_t)val; 640 float scale = powf(2.0, imm); 641 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 642 feclearexcept(FeAllExceptions); 643 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
636 return fixDivDest(fpscr, val / scale, (float)val, scale);	644 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
637} 638 639float	645} 646 647float
640vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)	648vfpSFixedToFpS(bool flush, bool defaultNan, 649 int32_t val, bool half, uint8_t imm)
641{ 642 fesetround(FeRoundNearest); 643 if (half) 644 val = sext<16>(val & mask(16)); 645 float scale = powf(2.0, imm); 646 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 647 feclearexcept(FeAllExceptions); 648 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));	650{ 651 fesetround(FeRoundNearest); 652 if (half) 653 val = sext<16>(val & mask(16)); 654 float scale = powf(2.0, imm); 655 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 656 feclearexcept(FeAllExceptions); 657 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
649 return fixDivDest(fpscr, val / scale, (float)val, scale);	658 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
650} 651 652uint64_t 653vfpFpDToFixed(double val, bool isSigned, bool half, 654 uint8_t imm, bool rzero) 655{ 656 int rmode = rzero ? FeRoundZero : fegetround(); 657 fesetround(FeRoundNearest); 658 val = val * pow(2.0, imm); 659 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 660 fesetround(rmode); 661 feclearexcept(FeAllExceptions); 662 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 663 double origVal = val; 664 val = rint(val); 665 int fpType = std::fpclassify(val); 666 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 667 if (fpType == FP_NAN) { 668 feraiseexcept(FeInvalid); 669 } 670 val = 0.0; 671 } else if (origVal != val) { 672 switch (rmode) { 673 case FeRoundNearest: 674 if (origVal - val > 0.5) 675 val += 1.0; 676 else if (val - origVal > 0.5) 677 val -= 1.0; 678 break; 679 case FeRoundDown: 680 if (origVal < val) 681 val -= 1.0; 682 break; 683 case FeRoundUpward: 684 if (origVal > val) 685 val += 1.0; 686 break; 687 } 688 feraiseexcept(FeInexact); 689 } 690 if (isSigned) { 691 if (half) { 692 if (val < (int16_t)(1 << 15)) { 693 feraiseexcept(FeInvalid); 694 feclearexcept(FeInexact); 695 return (int16_t)(1 << 15); 696 } 697 if (val > (int16_t)mask(15)) { 698 feraiseexcept(FeInvalid); 699 feclearexcept(FeInexact); 700 return (int16_t)mask(15); 701 } 702 return (int16_t)val; 703 } else { 704 if (val < (int32_t)(1 << 31)) { 705 feraiseexcept(FeInvalid); 706 feclearexcept(FeInexact); 707 return (int32_t)(1 << 31); 708 } 709 if (val > (int32_t)mask(31)) { 710 feraiseexcept(FeInvalid); 711 feclearexcept(FeInexact); 712 return (int32_t)mask(31); 713 } 714 return (int32_t)val; 715 } 716 } else { 717 if (half) { 718 if (val < 0) { 719 feraiseexcept(FeInvalid); 720 feclearexcept(FeInexact); 721 return 0; 722 } 723 if (val > mask(16)) { 724 feraiseexcept(FeInvalid); 725 feclearexcept(FeInexact); 726 return mask(16); 727 } 728 return (uint16_t)val; 729 } else { 730 if (val < 0) { 731 feraiseexcept(FeInvalid); 732 feclearexcept(FeInexact); 733 return 0; 734 } 735 if (val > mask(32)) { 736 feraiseexcept(FeInvalid); 737 feclearexcept(FeInexact); 738 return mask(32); 739 } 740 return (uint32_t)val; 741 } 742 } 743} 744 745double	659} 660 661uint64_t 662vfpFpDToFixed(double val, bool isSigned, bool half, 663 uint8_t imm, bool rzero) 664{ 665 int rmode = rzero ? FeRoundZero : fegetround(); 666 fesetround(FeRoundNearest); 667 val = val * pow(2.0, imm); 668 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 669 fesetround(rmode); 670 feclearexcept(FeAllExceptions); 671 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 672 double origVal = val; 673 val = rint(val); 674 int fpType = std::fpclassify(val); 675 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 676 if (fpType == FP_NAN) { 677 feraiseexcept(FeInvalid); 678 } 679 val = 0.0; 680 } else if (origVal != val) { 681 switch (rmode) { 682 case FeRoundNearest: 683 if (origVal - val > 0.5) 684 val += 1.0; 685 else if (val - origVal > 0.5) 686 val -= 1.0; 687 break; 688 case FeRoundDown: 689 if (origVal < val) 690 val -= 1.0; 691 break; 692 case FeRoundUpward: 693 if (origVal > val) 694 val += 1.0; 695 break; 696 } 697 feraiseexcept(FeInexact); 698 } 699 if (isSigned) { 700 if (half) { 701 if (val < (int16_t)(1 << 15)) { 702 feraiseexcept(FeInvalid); 703 feclearexcept(FeInexact); 704 return (int16_t)(1 << 15); 705 } 706 if (val > (int16_t)mask(15)) { 707 feraiseexcept(FeInvalid); 708 feclearexcept(FeInexact); 709 return (int16_t)mask(15); 710 } 711 return (int16_t)val; 712 } else { 713 if (val < (int32_t)(1 << 31)) { 714 feraiseexcept(FeInvalid); 715 feclearexcept(FeInexact); 716 return (int32_t)(1 << 31); 717 } 718 if (val > (int32_t)mask(31)) { 719 feraiseexcept(FeInvalid); 720 feclearexcept(FeInexact); 721 return (int32_t)mask(31); 722 } 723 return (int32_t)val; 724 } 725 } else { 726 if (half) { 727 if (val < 0) { 728 feraiseexcept(FeInvalid); 729 feclearexcept(FeInexact); 730 return 0; 731 } 732 if (val > mask(16)) { 733 feraiseexcept(FeInvalid); 734 feclearexcept(FeInexact); 735 return mask(16); 736 } 737 return (uint16_t)val; 738 } else { 739 if (val < 0) { 740 feraiseexcept(FeInvalid); 741 feclearexcept(FeInexact); 742 return 0; 743 } 744 if (val > mask(32)) { 745 feraiseexcept(FeInvalid); 746 feclearexcept(FeInexact); 747 return mask(32); 748 } 749 return (uint32_t)val; 750 } 751 } 752} 753 754double
746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)	755vfpUFixedToFpD(bool flush, bool defaultNan, 756 uint32_t val, bool half, uint8_t imm)
747{ 748 fesetround(FeRoundNearest); 749 if (half) 750 val = (uint16_t)val; 751 double scale = pow(2.0, imm); 752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 753 feclearexcept(FeAllExceptions); 754 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));	757{ 758 fesetround(FeRoundNearest); 759 if (half) 760 val = (uint16_t)val; 761 double scale = pow(2.0, imm); 762 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 763 feclearexcept(FeAllExceptions); 764 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
755 return fixDivDest(fpscr, val / scale, (double)val, scale);	765 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
756} 757 758double	766} 767 768double
759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)	769vfpSFixedToFpD(bool flush, bool defaultNan, 770 int32_t val, bool half, uint8_t imm)
760{ 761 fesetround(FeRoundNearest); 762 if (half) 763 val = sext<16>(val & mask(16)); 764 double scale = pow(2.0, imm); 765 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 766 feclearexcept(FeAllExceptions); 767 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));	771{ 772 fesetround(FeRoundNearest); 773 if (half) 774 val = sext<16>(val & mask(16)); 775 double scale = pow(2.0, imm); 776 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 777 feclearexcept(FeAllExceptions); 778 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
768 return fixDivDest(fpscr, val / scale, (double)val, scale);	779 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
769} 770	780} 781
	782// This function implements a magic formula taken from the architecture 783// reference manual. It was originally called recip_sqrt_estimate. 784static double 785recipSqrtEstimate(double a) 786{ 787 int64_t q0, q1, s; 788 double r; 789 if (a < 0.5) { 790 q0 = (int64_t)(a * 512.0); 791 r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0); 792 } else { 793 q1 = (int64_t)(a * 256.0); 794 r = 1.0 / sqrt(((double)q1 + 0.5) / 256.0); 795 } 796 s = (int64_t)(256.0 * r + 0.5); 797 return (double)s / 256.0; 798} 799 800// This function is only intended for use in Neon instructions because 801// it ignores certain bits in the FPSCR. 802float 803fprSqrtEstimate(FPSCR &fpscr, float op) 804{ 805 const uint32_t qnan = 0x7fc00000; 806 float junk = 0.0; 807 int fpClass = std::fpclassify(op); 808 if (fpClass == FP_NAN) { 809 if ((fpToBits(op) & qnan) != qnan) 810 fpscr.ioc = 1; 811 return bitsToFp(qnan, junk); 812 } else if (fpClass == FP_ZERO) { 813 fpscr.dzc = 1; 814 // Return infinity with the same sign as the operand. 815 return bitsToFp((std::signbit(op) << 31) \| 816 (0xFF << 23) \| (0 << 0), junk); 817 } else if (std::signbit(op)) { 818 // Set invalid op bit. 819 fpscr.ioc = 1; 820 return bitsToFp(qnan, junk); 821 } else if (fpClass == FP_INFINITE) { 822 return 0.0; 823 } else { 824 uint64_t opBits = fpToBits(op); 825 double scaled; 826 if (bits(opBits, 23)) { 827 scaled = bitsToFp((0 << 0) \| (bits(opBits, 22, 0) << 29) \| 828 (ULL(0x3fd) << 52) \| (bits(opBits, 31) << 63), 829 (double)0.0); 830 } else { 831 scaled = bitsToFp((0 << 0) \| (bits(opBits, 22, 0) << 29) \| 832 (ULL(0x3fe) << 52) \| (bits(opBits, 31) << 63), 833 (double)0.0); 834 } 835 uint64_t resultExp = (380 - bits(opBits, 30, 23)) / 2; 836 837 uint64_t estimate = fpToBits(recipSqrtEstimate(scaled)); 838 839 return bitsToFp((bits(estimate, 63) << 31) \| 840 (bits(resultExp, 7, 0) << 23) \| 841 (bits(estimate, 51, 29) << 0), junk); 842 } 843} 844 845uint32_t 846unsignedRSqrtEstimate(uint32_t op) 847{ 848 if (bits(op, 31, 30) == 0) { 849 return -1; 850 } else { 851 double dpOp; 852 if (bits(op, 31)) { 853 dpOp = bitsToFp((ULL(0) << 63) \| 854 (ULL(0x3fe) << 52) \| 855 (bits((uint64_t)op, 30, 0) << 21) \| 856 (0 << 0), (double)0.0); 857 } else { 858 dpOp = bitsToFp((ULL(0) << 63) \| 859 (ULL(0x3fd) << 52) \| 860 (bits((uint64_t)op, 29, 0) << 22) \| 861 (0 << 0), (double)0.0); 862 } 863 uint64_t estimate = fpToBits(recipSqrtEstimate(dpOp)); 864 return (1 << 31) \| bits(estimate, 51, 21); 865 } 866} 867 868// This function implements a magic formula taken from the architecture 869// reference manual. It was originally called recip_estimate. 870 871static double 872recipEstimate(double a) 873{ 874 int64_t q, s; 875 double r; 876 q = (int64_t)(a * 512.0); 877 r = 1.0 / (((double)q + 0.5) / 512.0); 878 s = (int64_t)(256.0 * r + 0.5); 879 return (double)s / 256.0; 880} 881 882// This function is only intended for use in Neon instructions because 883// it ignores certain bits in the FPSCR. 884float 885fpRecipEstimate(FPSCR &fpscr, float op) 886{ 887 const uint32_t qnan = 0x7fc00000; 888 float junk = 0.0; 889 int fpClass = std::fpclassify(op); 890 if (fpClass == FP_NAN) { 891 if ((fpToBits(op) & qnan) != qnan) 892 fpscr.ioc = 1; 893 return bitsToFp(qnan, junk); 894 } else if (fpClass == FP_INFINITE) { 895 return bitsToFp(std::signbit(op) << 31, junk); 896 } else if (fpClass == FP_ZERO) { 897 fpscr.dzc = 1; 898 // Return infinity with the same sign as the operand. 899 return bitsToFp((std::signbit(op) << 31) \| 900 (0xFF << 23) \| (0 << 0), junk); 901 } else if (fabs(op) >= pow(2.0, 126)) { 902 fpscr.ufc = 1; 903 return bitsToFp(std::signbit(op) << 31, junk); 904 } else { 905 uint64_t opBits = fpToBits(op); 906 double scaled; 907 scaled = bitsToFp((0 << 0) \| (bits(opBits, 22, 0) << 29) \| 908 (ULL(0x3fe) << 52) \| (ULL(0) << 63), 909 (double)0.0); 910 uint64_t resultExp = 253 - bits(opBits, 30, 23); 911 912 uint64_t estimate = fpToBits(recipEstimate(scaled)); 913 914 return bitsToFp((bits(opBits, 31) << 31) \| 915 (bits(resultExp, 7, 0) << 23) \| 916 (bits(estimate, 51, 29) << 0), junk); 917 } 918} 919 920uint32_t 921unsignedRecipEstimate(uint32_t op) 922{ 923 if (bits(op, 31) == 0) { 924 return -1; 925 } else { 926 double dpOp; 927 dpOp = bitsToFp((ULL(0) << 63) \| 928 (ULL(0x3fe) << 52) \| 929 (bits((uint64_t)op, 30, 0) << 21) \| 930 (0 << 0), (double)0.0); 931 uint64_t estimate = fpToBits(recipEstimate(dpOp)); 932 return (1 << 31) \| bits(estimate, 51, 21); 933 } 934} 935
771template <class fpType> 772fpType	936template <class fpType> 937fpType
	938FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 939 fpType op1, fpType op2) const 940{ 941 done = true; 942 fpType junk = 0.0; 943 fpType dest = 0.0; 944 const bool single = (sizeof(fpType) == sizeof(float)); 945 const uint64_t qnan = 946 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 947 const bool nan1 = std::isnan(op1); 948 const bool nan2 = std::isnan(op2); 949 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 950 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 951 if (nan1 \|\| nan2) { 952 if (defaultNan) { 953 dest = bitsToFp(qnan, junk); 954 } else if (signal1) { 955 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 956 } else if (signal2) { 957 dest = bitsToFp(fpToBits(op2) \| qnan, junk); 958 } else if (nan1) { 959 dest = op1; 960 } else if (nan2) { 961 dest = op2; 962 } 963 if (signal1 \|\| signal2) { 964 fpscr.ioc = 1; 965 } 966 } else { 967 done = false; 968 } 969 return dest; 970} 971 972template 973float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 974 float op1, float op2) const; 975template 976double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 977 double op1, double op2) const; 978 979template <class fpType> 980fpType
773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 774 fpType (*func)(fpType, fpType),	981FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 982 fpType (*func)(fpType, fpType),
775 bool flush, uint32_t rMode) const	983 bool flush, bool defaultNan, uint32_t rMode) const
776{ 777 const bool single = (sizeof(fpType) == sizeof(float)); 778 fpType junk = 0.0; 779 780 if (flush && flushToZero(op1, op2)) 781 fpscr.idc = 1; 782 VfpSavedState state = prepFpState(rMode); 783 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 784 : "m" (op1), "m" (op2), "m" (state)); 785 fpType dest = func(op1, op2); 786 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 787 788 int fpClass = std::fpclassify(dest); 789 // Get NAN behavior right. This varies between x86 and ARM. 790 if (fpClass == FP_NAN) { 791 const bool single = (sizeof(fpType) == sizeof(float)); 792 const uint64_t qnan = 793 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 794 const bool nan1 = std::isnan(op1); 795 const bool nan2 = std::isnan(op2); 796 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 797 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);	984{ 985 const bool single = (sizeof(fpType) == sizeof(float)); 986 fpType junk = 0.0; 987 988 if (flush && flushToZero(op1, op2)) 989 fpscr.idc = 1; 990 VfpSavedState state = prepFpState(rMode); 991 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 992 : "m" (op1), "m" (op2), "m" (state)); 993 fpType dest = func(op1, op2); 994 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 995 996 int fpClass = std::fpclassify(dest); 997 // Get NAN behavior right. This varies between x86 and ARM. 998 if (fpClass == FP_NAN) { 999 const bool single = (sizeof(fpType) == sizeof(float)); 1000 const uint64_t qnan = 1001 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 1002 const bool nan1 = std::isnan(op1); 1003 const bool nan2 = std::isnan(op2); 1004 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 1005 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
798 if ((!nan1 && !nan2) \|\| (fpscr.dn == 1)) {	1006 if ((!nan1 && !nan2) \|\| (defaultNan == 1)) {
799 dest = bitsToFp(qnan, junk); 800 } else if (signal1) { 801 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 802 } else if (signal2) { 803 dest = bitsToFp(fpToBits(op2) \| qnan, junk); 804 } else if (nan1) { 805 dest = op1; 806 } else if (nan2) { 807 dest = op2; 808 } 809 } else if (flush && flushToZero(dest)) { 810 feraiseexcept(FeUnderflow); 811 } else if (( 812 (single && (dest == bitsToFp(0x00800000, junk) \|\| 813 dest == bitsToFp(0x80800000, junk))) \|\| 814 (!single && 815 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 816 dest == bitsToFp(ULL(0x8010000000000000), junk))) 817 ) && rMode != VfpRoundZero) { 818 /* 819 * Correct for the fact that underflow is detected -before- rounding 820 * in ARM and -after- rounding in x86. 821 / 822* fesetround(FeRoundZero); 823 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 824 : "m" (op1), "m" (op2)); 825 fpType temp = func(op1, op2); 826 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 827 if (flush && flushToZero(temp)) { 828 dest = temp; 829 } 830 }	1007 dest = bitsToFp(qnan, junk); 1008 } else if (signal1) { 1009 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 1010 } else if (signal2) { 1011 dest = bitsToFp(fpToBits(op2) \| qnan, junk); 1012 } else if (nan1) { 1013 dest = op1; 1014 } else if (nan2) { 1015 dest = op2; 1016 } 1017 } else if (flush && flushToZero(dest)) { 1018 feraiseexcept(FeUnderflow); 1019 } else if (( 1020 (single && (dest == bitsToFp(0x00800000, junk) \|\| 1021 dest == bitsToFp(0x80800000, junk))) \|\| 1022 (!single && 1023 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 1024 dest == bitsToFp(ULL(0x8010000000000000), junk))) 1025 ) && rMode != VfpRoundZero) { 1026 /* 1027 * Correct for the fact that underflow is detected -before- rounding 1028 * in ARM and -after- rounding in x86. 1029 / 1030* fesetround(FeRoundZero); 1031 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 1032 : "m" (op1), "m" (op2)); 1033 fpType temp = func(op1, op2); 1034 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 1035 if (flush && flushToZero(temp)) { 1036 dest = temp; 1037 } 1038 }
831 finishVfp(fpscr, state);	1039 finishVfp(fpscr, state, flush);
832 return dest; 833} 834 835template 836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2, 837 float (*func)(float, float),	1040 return dest; 1041} 1042 1043template 1044float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2, 1045 float (*func)(float, float),
838 bool flush, uint32_t rMode) const;	1046 bool flush, bool defaultNan, uint32_t rMode) const;
839template 840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2, 841 double (*func)(double, double),	1047template 1048double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2, 1049 double (*func)(double, double),
842 bool flush, uint32_t rMode) const;	1050 bool flush, bool defaultNan, uint32_t rMode) const;
843 844template <class fpType> 845fpType 846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (func)(fpType), 847* bool flush, uint32_t rMode) const 848{ 849 const bool single = (sizeof(fpType) == sizeof(float)); 850 fpType junk = 0.0; 851 852 if (flush && flushToZero(op1)) 853 fpscr.idc = 1; 854 VfpSavedState state = prepFpState(rMode); 855 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 856 : "m" (op1), "m" (state)); 857 fpType dest = func(op1); 858 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 859 860 int fpClass = std::fpclassify(dest); 861 // Get NAN behavior right. This varies between x86 and ARM. 862 if (fpClass == FP_NAN) { 863 const bool single = (sizeof(fpType) == sizeof(float)); 864 const uint64_t qnan = 865 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 866 const bool nan = std::isnan(op1); 867 if (!nan \|\| fpscr.dn == 1) { 868 dest = bitsToFp(qnan, junk); 869 } else if (nan) { 870 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 871 } 872 } else if (flush && flushToZero(dest)) { 873 feraiseexcept(FeUnderflow); 874 } else if (( 875 (single && (dest == bitsToFp(0x00800000, junk) \|\| 876 dest == bitsToFp(0x80800000, junk))) \|\| 877 (!single && 878 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 879 dest == bitsToFp(ULL(0x8010000000000000), junk))) 880 ) && rMode != VfpRoundZero) { 881 /* 882 * Correct for the fact that underflow is detected -before- rounding 883 * in ARM and -after- rounding in x86. 884 / 885* fesetround(FeRoundZero); 886 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 887 fpType temp = func(op1); 888 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 889 if (flush && flushToZero(temp)) { 890 dest = temp; 891 } 892 }	1051 1052template <class fpType> 1053fpType 1054FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (func)(fpType), 1055* bool flush, uint32_t rMode) const 1056{ 1057 const bool single = (sizeof(fpType) == sizeof(float)); 1058 fpType junk = 0.0; 1059 1060 if (flush && flushToZero(op1)) 1061 fpscr.idc = 1; 1062 VfpSavedState state = prepFpState(rMode); 1063 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 1064 : "m" (op1), "m" (state)); 1065 fpType dest = func(op1); 1066 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 1067 1068 int fpClass = std::fpclassify(dest); 1069 // Get NAN behavior right. This varies between x86 and ARM. 1070 if (fpClass == FP_NAN) { 1071 const bool single = (sizeof(fpType) == sizeof(float)); 1072 const uint64_t qnan = 1073 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 1074 const bool nan = std::isnan(op1); 1075 if (!nan \|\| fpscr.dn == 1) { 1076 dest = bitsToFp(qnan, junk); 1077 } else if (nan) { 1078 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 1079 } 1080 } else if (flush && flushToZero(dest)) { 1081 feraiseexcept(FeUnderflow); 1082 } else if (( 1083 (single && (dest == bitsToFp(0x00800000, junk) \|\| 1084 dest == bitsToFp(0x80800000, junk))) \|\| 1085 (!single && 1086 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 1087 dest == bitsToFp(ULL(0x8010000000000000), junk))) 1088 ) && rMode != VfpRoundZero) { 1089 /* 1090 * Correct for the fact that underflow is detected -before- rounding 1091 * in ARM and -after- rounding in x86. 1092 / 1093* fesetround(FeRoundZero); 1094 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 1095 fpType temp = func(op1); 1096 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 1097 if (flush && flushToZero(temp)) { 1098 dest = temp; 1099 } 1100 }
893 finishVfp(fpscr, state);	1101 finishVfp(fpscr, state, flush);
894 return dest; 895} 896 897template 898float FpOp::unaryOp(FPSCR &fpscr, float op1, float (func)(float), 899* bool flush, uint32_t rMode) const; 900template 901double FpOp::unaryOp(FPSCR &fpscr, double op1, double (func)(double), 902* bool flush, uint32_t rMode) const; 903 904IntRegIndex 905VfpMacroOp::addStride(IntRegIndex idx, unsigned stride) 906{ 907 if (wide) { 908 stride = 2; 909* } 910 unsigned offset = idx % 8; 911 idx = (IntRegIndex)(idx - offset); 912 offset += stride; 913 idx = (IntRegIndex)(idx + (offset % 8)); 914 return idx; 915} 916 917void 918VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 919{ 920 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 921 assert(!inScalarBank(dest)); 922 dest = addStride(dest, stride); 923 op1 = addStride(op1, stride); 924 if (!inScalarBank(op2)) { 925 op2 = addStride(op2, stride); 926 } 927} 928 929void 930VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 931{ 932 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 933 assert(!inScalarBank(dest)); 934 dest = addStride(dest, stride); 935 if (!inScalarBank(op1)) { 936 op1 = addStride(op1, stride); 937 } 938} 939 940void 941VfpMacroOp::nextIdxs(IntRegIndex &dest) 942{ 943 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 944 assert(!inScalarBank(dest)); 945 dest = addStride(dest, stride); 946} 947 948}	1102 return dest; 1103} 1104 1105template 1106float FpOp::unaryOp(FPSCR &fpscr, float op1, float (func)(float), 1107* bool flush, uint32_t rMode) const; 1108template 1109double FpOp::unaryOp(FPSCR &fpscr, double op1, double (func)(double), 1110* bool flush, uint32_t rMode) const; 1111 1112IntRegIndex 1113VfpMacroOp::addStride(IntRegIndex idx, unsigned stride) 1114{ 1115 if (wide) { 1116 stride = 2; 1117* } 1118 unsigned offset = idx % 8; 1119 idx = (IntRegIndex)(idx - offset); 1120 offset += stride; 1121 idx = (IntRegIndex)(idx + (offset % 8)); 1122 return idx; 1123} 1124 1125void 1126VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 1127{ 1128 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 1129 assert(!inScalarBank(dest)); 1130 dest = addStride(dest, stride); 1131 op1 = addStride(op1, stride); 1132 if (!inScalarBank(op2)) { 1133 op2 = addStride(op2, stride); 1134 } 1135} 1136 1137void 1138VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 1139{ 1140 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 1141 assert(!inScalarBank(dest)); 1142 dest = addStride(dest, stride); 1143 if (!inScalarBank(op1)) { 1144 op1 = addStride(op1, stride); 1145 } 1146} 1147 1148void 1149VfpMacroOp::nextIdxs(IntRegIndex &dest) 1150{ 1151 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 1152 assert(!inScalarBank(dest)); 1153 dest = addStride(dest, stride); 1154} 1155 1156}

1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#include "arch/arm/insts/vfp.hh"
41
42/*
43 * The asm statements below are to keep gcc from reordering code. Otherwise
44 * the rounding mode might be set after the operation it was intended for, the
45 * exception bits read before it, etc.
46 */
47
48std::string
49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
50{
51 std::stringstream ss;
52 printMnemonic(ss);
53 printReg(ss, dest + FP_Base_DepTag);
54 ss << ", ";
55 printReg(ss, op1 + FP_Base_DepTag);
56 return ss.str();
57}
58
59std::string
60FpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
61{
62 std::stringstream ss;
63 printMnemonic(ss);
64 printReg(ss, dest + FP_Base_DepTag);
65 ccprintf(ss, ", #%d", imm);
66 return ss.str();
67}
68
69std::string
70FpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
71{
72 std::stringstream ss;
73 printMnemonic(ss);
74 printReg(ss, dest + FP_Base_DepTag);
75 ss << ", ";
76 printReg(ss, op1 + FP_Base_DepTag);
77 ccprintf(ss, ", #%d", imm);
78 return ss.str();
79}
80
81std::string
82FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
83{
84 std::stringstream ss;
85 printMnemonic(ss);
86 printReg(ss, dest + FP_Base_DepTag);
87 ss << ", ";
88 printReg(ss, op1 + FP_Base_DepTag);
89 ss << ", ";
90 printReg(ss, op2 + FP_Base_DepTag);
91 return ss.str();
92}
93

94std::string
95FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
96{
97 std::stringstream ss;
98 printMnemonic(ss);
99 printReg(ss, dest + FP_Base_DepTag);
100 ss << ", ";
101 printReg(ss, op1 + FP_Base_DepTag);
102 ss << ", ";
103 printReg(ss, op2 + FP_Base_DepTag);
104 ccprintf(ss, ", #%d", imm);
105 return ss.str();
106}
107

94namespace ArmISA
95{
96
97VfpSavedState
98prepFpState(uint32_t rMode)
99{
100 int roundingMode = fegetround();
101 feclearexcept(FeAllExceptions);
102 switch (rMode) {
103 case VfpRoundNearest:
104 fesetround(FeRoundNearest);
105 break;
106 case VfpRoundUpward:
107 fesetround(FeRoundUpward);
108 break;
109 case VfpRoundDown:
110 fesetround(FeRoundDown);
111 break;
112 case VfpRoundZero:
113 fesetround(FeRoundZero);
114 break;
115 }
116 return roundingMode;
117}
118
119void

108namespace ArmISA
109{
110
111VfpSavedState
112prepFpState(uint32_t rMode)
113{
114 int roundingMode = fegetround();
115 feclearexcept(FeAllExceptions);
116 switch (rMode) {
117 case VfpRoundNearest:
118 fesetround(FeRoundNearest);
119 break;
120 case VfpRoundUpward:
121 fesetround(FeRoundUpward);
122 break;
123 case VfpRoundDown:
124 fesetround(FeRoundDown);
125 break;
126 case VfpRoundZero:
127 fesetround(FeRoundZero);
128 break;
129 }
130 return roundingMode;
131}
132
133void

120finishVfp(FPSCR &fpscr, VfpSavedState state)

134finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)

121{
122 int exceptions = fetestexcept(FeAllExceptions);
123 bool underflow = false;
124 if (exceptions & FeInvalid) {
125 fpscr.ioc = 1;
126 }
127 if (exceptions & FeDivByZero) {
128 fpscr.dzc = 1;
129 }
130 if (exceptions & FeOverflow) {
131 fpscr.ofc = 1;
132 }
133 if (exceptions & FeUnderflow) {
134 underflow = true;
135 fpscr.ufc = 1;
136 }

135{
136 int exceptions = fetestexcept(FeAllExceptions);
137 bool underflow = false;
138 if (exceptions & FeInvalid) {
139 fpscr.ioc = 1;
140 }
141 if (exceptions & FeDivByZero) {
142 fpscr.dzc = 1;
143 }
144 if (exceptions & FeOverflow) {
145 fpscr.ofc = 1;
146 }
147 if (exceptions & FeUnderflow) {
148 underflow = true;
149 fpscr.ufc = 1;
150 }

137 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {

151 if ((exceptions & FeInexact) && !(underflow && flush)) {

138 fpscr.ixc = 1;
139 }
140 fesetround(state);
141}
142
143template <class fpType>
144fpType

152 fpscr.ixc = 1;
153 }
154 fesetround(state);
155}
156
157template <class fpType>
158fpType

145fixDest(FPSCR fpscr, fpType val, fpType op1)

159fixDest(bool flush, bool defaultNan, fpType val, fpType op1)

146{
147 int fpClass = std::fpclassify(val);
148 fpType junk = 0.0;
149 if (fpClass == FP_NAN) {
150 const bool single = (sizeof(val) == sizeof(float));
151 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
152 const bool nan = std::isnan(op1);

160{
161 int fpClass = std::fpclassify(val);
162 fpType junk = 0.0;
163 if (fpClass == FP_NAN) {
164 const bool single = (sizeof(val) == sizeof(float));
165 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
166 const bool nan = std::isnan(op1);

153 if (!nan || (fpscr.dn == 1)) {

167 if (!nan || defaultNan) {

154 val = bitsToFp(qnan, junk);
155 } else if (nan) {
156 val = bitsToFp(fpToBits(op1) | qnan, junk);
157 }

168 val = bitsToFp(qnan, junk);
169 } else if (nan) {
170 val = bitsToFp(fpToBits(op1) | qnan, junk);
171 }

158 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {

172 } else if (fpClass == FP_SUBNORMAL && flush == 1) {

159 // Turn val into a zero with the correct sign;
160 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
161 val = bitsToFp(fpToBits(val) & bitMask, junk);
162 feclearexcept(FeInexact);
163 feraiseexcept(FeUnderflow);
164 }
165 return val;
166}
167
168template

173 // Turn val into a zero with the correct sign;
174 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
175 val = bitsToFp(fpToBits(val) & bitMask, junk);
176 feclearexcept(FeInexact);
177 feraiseexcept(FeUnderflow);
178 }
179 return val;
180}
181
182template

169float fixDest<float>(FPSCR fpscr, float val, float op1);

183float fixDest<float>(bool flush, bool defaultNan, float val, float op1);

170template

184template

171double fixDest<double>(FPSCR fpscr, double val, double op1);

185double fixDest<double>(bool flush, bool defaultNan, double val, double op1);

172
173template <class fpType>
174fpType

186
187template <class fpType>
188fpType

175fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)

189fixDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)

176{
177 int fpClass = std::fpclassify(val);
178 fpType junk = 0.0;
179 if (fpClass == FP_NAN) {
180 const bool single = (sizeof(val) == sizeof(float));
181 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
182 const bool nan1 = std::isnan(op1);
183 const bool nan2 = std::isnan(op2);
184 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
185 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);

190{
191 int fpClass = std::fpclassify(val);
192 fpType junk = 0.0;
193 if (fpClass == FP_NAN) {
194 const bool single = (sizeof(val) == sizeof(float));
195 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
196 const bool nan1 = std::isnan(op1);
197 const bool nan2 = std::isnan(op2);
198 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
199 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);

186 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {

200 if ((!nan1 && !nan2) || defaultNan) {

187 val = bitsToFp(qnan, junk);
188 } else if (signal1) {
189 val = bitsToFp(fpToBits(op1) | qnan, junk);
190 } else if (signal2) {
191 val = bitsToFp(fpToBits(op2) | qnan, junk);
192 } else if (nan1) {
193 val = op1;
194 } else if (nan2) {
195 val = op2;
196 }

201 val = bitsToFp(qnan, junk);
202 } else if (signal1) {
203 val = bitsToFp(fpToBits(op1) | qnan, junk);
204 } else if (signal2) {
205 val = bitsToFp(fpToBits(op2) | qnan, junk);
206 } else if (nan1) {
207 val = op1;
208 } else if (nan2) {
209 val = op2;
210 }

197 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {

211 } else if (fpClass == FP_SUBNORMAL && flush) {

198 // Turn val into a zero with the correct sign;
199 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
200 val = bitsToFp(fpToBits(val) & bitMask, junk);
201 feclearexcept(FeInexact);
202 feraiseexcept(FeUnderflow);
203 }
204 return val;
205}
206
207template

212 // Turn val into a zero with the correct sign;
213 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
214 val = bitsToFp(fpToBits(val) & bitMask, junk);
215 feclearexcept(FeInexact);
216 feraiseexcept(FeUnderflow);
217 }
218 return val;
219}
220
221template

208float fixDest<float>(FPSCR fpscr, float val, float op1, float op2);

222float fixDest<float>(bool flush, bool defaultNan,
223 float val, float op1, float op2);

209template

224template

210double fixDest<double>(FPSCR fpscr, double val, double op1, double op2);

225double fixDest<double>(bool flush, bool defaultNan,
226 double val, double op1, double op2);

211
212template <class fpType>
213fpType

227
228template <class fpType>
229fpType

214fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)

230fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)

215{

231{

216 fpType mid = fixDest(fpscr, val, op1, op2);

232 fpType mid = fixDest(flush, defaultNan, val, op1, op2);

217 const bool single = (sizeof(fpType) == sizeof(float));
218 const fpType junk = 0.0;
219 if ((single && (val == bitsToFp(0x00800000, junk) ||
220 val == bitsToFp(0x80800000, junk))) ||
221 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
222 val == bitsToFp(ULL(0x8010000000000000), junk)))
223 ) {
224 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
225 fesetround(FeRoundZero);
226 fpType temp = 0.0;
227 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
228 temp = op1 / op2;
229 if (flushToZero(temp)) {
230 feraiseexcept(FeUnderflow);

233 const bool single = (sizeof(fpType) == sizeof(float));
234 const fpType junk = 0.0;
235 if ((single && (val == bitsToFp(0x00800000, junk) ||
236 val == bitsToFp(0x80800000, junk))) ||
237 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
238 val == bitsToFp(ULL(0x8010000000000000), junk)))
239 ) {
240 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
241 fesetround(FeRoundZero);
242 fpType temp = 0.0;
243 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
244 temp = op1 / op2;
245 if (flushToZero(temp)) {
246 feraiseexcept(FeUnderflow);

231 if (fpscr.fz) {

247 if (flush) {

232 feclearexcept(FeInexact);
233 mid = temp;
234 }
235 }
236 __asm__ __volatile__("" :: "m" (temp));
237 }
238 return mid;
239}
240
241template

248 feclearexcept(FeInexact);
249 mid = temp;
250 }
251 }
252 __asm__ __volatile__("" :: "m" (temp));
253 }
254 return mid;
255}
256
257template

242float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);

258float fixDivDest<float>(bool flush, bool defaultNan,
259 float val, float op1, float op2);

243template

260template

244double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);

261double fixDivDest<double>(bool flush, bool defaultNan,
262 double val, double op1, double op2);

245
246float
247fixFpDFpSDest(FPSCR fpscr, double val)
248{
249 const float junk = 0.0;
250 float op1 = 0.0;
251 if (std::isnan(val)) {
252 uint64_t valBits = fpToBits(val);
253 uint32_t op1Bits = bits(valBits, 50, 29) |
254 (mask(9) << 22) |
255 (bits(valBits, 63) << 31);
256 op1 = bitsToFp(op1Bits, junk);
257 }

263
264float
265fixFpDFpSDest(FPSCR fpscr, double val)
266{
267 const float junk = 0.0;
268 float op1 = 0.0;
269 if (std::isnan(val)) {
270 uint64_t valBits = fpToBits(val);
271 uint32_t op1Bits = bits(valBits, 50, 29) |
272 (mask(9) << 22) |
273 (bits(valBits, 63) << 31);
274 op1 = bitsToFp(op1Bits, junk);
275 }

258 float mid = fixDest(fpscr, (float)val, op1);

276 float mid = fixDest(fpscr.fz, fpscr.dn, (float)val, op1);

259 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
260 (FeUnderflow | FeInexact)) {
261 feclearexcept(FeInexact);
262 }
263 if (mid == bitsToFp(0x00800000, junk) ||
264 mid == bitsToFp(0x80800000, junk)) {
265 __asm__ __volatile__("" : "=m" (val) : "m" (val));
266 fesetround(FeRoundZero);
267 float temp = 0.0;
268 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
269 temp = val;
270 if (flushToZero(temp)) {
271 feraiseexcept(FeUnderflow);
272 if (fpscr.fz) {
273 feclearexcept(FeInexact);
274 mid = temp;
275 }
276 }
277 __asm__ __volatile__("" :: "m" (temp));
278 }
279 return mid;
280}
281
282double
283fixFpSFpDDest(FPSCR fpscr, float val)
284{
285 const double junk = 0.0;
286 double op1 = 0.0;
287 if (std::isnan(val)) {
288 uint32_t valBits = fpToBits(val);
289 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
290 (mask(12) << 51) |
291 ((uint64_t)bits(valBits, 31) << 63);
292 op1 = bitsToFp(op1Bits, junk);
293 }

277 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
278 (FeUnderflow | FeInexact)) {
279 feclearexcept(FeInexact);
280 }
281 if (mid == bitsToFp(0x00800000, junk) ||
282 mid == bitsToFp(0x80800000, junk)) {
283 __asm__ __volatile__("" : "=m" (val) : "m" (val));
284 fesetround(FeRoundZero);
285 float temp = 0.0;
286 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
287 temp = val;
288 if (flushToZero(temp)) {
289 feraiseexcept(FeUnderflow);
290 if (fpscr.fz) {
291 feclearexcept(FeInexact);
292 mid = temp;
293 }
294 }
295 __asm__ __volatile__("" :: "m" (temp));
296 }
297 return mid;
298}
299
300double
301fixFpSFpDDest(FPSCR fpscr, float val)
302{
303 const double junk = 0.0;
304 double op1 = 0.0;
305 if (std::isnan(val)) {
306 uint32_t valBits = fpToBits(val);
307 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
308 (mask(12) << 51) |
309 ((uint64_t)bits(valBits, 31) << 63);
310 op1 = bitsToFp(op1Bits, junk);
311 }

294 double mid = fixDest(fpscr, (double)val, op1);

312 double mid = fixDest(fpscr.fz, fpscr.dn, (double)val, op1);

295 if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
296 mid == bitsToFp(ULL(0x8010000000000000), junk)) {
297 __asm__ __volatile__("" : "=m" (val) : "m" (val));
298 fesetround(FeRoundZero);
299 double temp = 0.0;
300 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
301 temp = val;
302 if (flushToZero(temp)) {
303 feraiseexcept(FeUnderflow);
304 if (fpscr.fz) {
305 feclearexcept(FeInexact);
306 mid = temp;
307 }
308 }
309 __asm__ __volatile__("" :: "m" (temp));
310 }
311 return mid;
312}
313

313 if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
314 mid == bitsToFp(ULL(0x8010000000000000), junk)) {
315 __asm__ __volatile__("" : "=m" (val) : "m" (val));
316 fesetround(FeRoundZero);
317 double temp = 0.0;
318 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
319 temp = val;
320 if (flushToZero(temp)) {
321 feraiseexcept(FeUnderflow);
322 if (fpscr.fz) {
323 feclearexcept(FeInexact);
324 mid = temp;
325 }
326 }
327 __asm__ __volatile__("" :: "m" (temp));
328 }
329 return mid;
330}
331

314float
315vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)

332uint16_t
333vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
334 uint32_t rMode, bool ahp, float op)

316{

335{

317 float junk = 0.0;
318 uint32_t destBits = fpToBits(dest);

319 uint32_t opBits = fpToBits(op);
320 // Extract the operand.
321 bool neg = bits(opBits, 31);
322 uint32_t exponent = bits(opBits, 30, 23);
323 uint32_t oldMantissa = bits(opBits, 22, 0);
324 uint32_t mantissa = oldMantissa >> (23 - 10);
325 // Do the conversion.
326 uint32_t extra = oldMantissa & mask(23 - 10);
327 if (exponent == 0xff) {
328 if (oldMantissa != 0) {
329 // Nans.
330 if (bits(mantissa, 9) == 0) {
331 // Signalling nan.
332 fpscr.ioc = 1;
333 }

336 uint32_t opBits = fpToBits(op);
337 // Extract the operand.
338 bool neg = bits(opBits, 31);
339 uint32_t exponent = bits(opBits, 30, 23);
340 uint32_t oldMantissa = bits(opBits, 22, 0);
341 uint32_t mantissa = oldMantissa >> (23 - 10);
342 // Do the conversion.
343 uint32_t extra = oldMantissa & mask(23 - 10);
344 if (exponent == 0xff) {
345 if (oldMantissa != 0) {
346 // Nans.
347 if (bits(mantissa, 9) == 0) {
348 // Signalling nan.
349 fpscr.ioc = 1;
350 }

334 if (fpscr.ahp) {

351 if (ahp) {

335 mantissa = 0;
336 exponent = 0;
337 fpscr.ioc = 1;

352 mantissa = 0;
353 exponent = 0;
354 fpscr.ioc = 1;

338 } else if (fpscr.dn) {

355 } else if (defaultNan) {

339 mantissa = (1 << 9);
340 exponent = 0x1f;
341 neg = false;
342 } else {
343 exponent = 0x1f;
344 mantissa |= (1 << 9);
345 }
346 } else {
347 // Infinities.
348 exponent = 0x1F;

356 mantissa = (1 << 9);
357 exponent = 0x1f;
358 neg = false;
359 } else {
360 exponent = 0x1f;
361 mantissa |= (1 << 9);
362 }
363 } else {
364 // Infinities.
365 exponent = 0x1F;

349 if (fpscr.ahp) {

366 if (ahp) {

350 fpscr.ioc = 1;
351 mantissa = 0x3ff;
352 } else {
353 mantissa = 0;
354 }
355 }
356 } else if (exponent == 0 && oldMantissa == 0) {
357 // Zero, don't need to do anything.
358 } else {
359 // Normalized or denormalized numbers.
360
361 bool inexact = (extra != 0);
362
363 if (exponent == 0) {
364 // Denormalized.
365
366 // If flush to zero is on, this shouldn't happen.

367 fpscr.ioc = 1;
368 mantissa = 0x3ff;
369 } else {
370 mantissa = 0;
371 }
372 }
373 } else if (exponent == 0 && oldMantissa == 0) {
374 // Zero, don't need to do anything.
375 } else {
376 // Normalized or denormalized numbers.
377
378 bool inexact = (extra != 0);
379
380 if (exponent == 0) {
381 // Denormalized.
382
383 // If flush to zero is on, this shouldn't happen.

367 assert(fpscr.fz == 0);

384 assert(!flush);

368
369 // Check for underflow
370 if (inexact || fpscr.ufe)
371 fpscr.ufc = 1;
372
373 // Handle rounding.

385
386 // Check for underflow
387 if (inexact || fpscr.ufe)
388 fpscr.ufc = 1;
389
390 // Handle rounding.

374 unsigned mode = fpscr.rMode;

391 unsigned mode = rMode;

375 if ((mode == VfpRoundUpward && !neg && extra) ||
376 (mode == VfpRoundDown && neg && extra) ||
377 (mode == VfpRoundNearest &&
378 (extra > (1 << 9) ||
379 (extra == (1 << 9) && bits(mantissa, 0))))) {
380 mantissa++;
381 }
382
383 // See if the number became normalized after rounding.
384 if (mantissa == (1 << 10)) {
385 mantissa = 0;
386 exponent = 1;
387 }
388 } else {
389 // Normalized.
390
391 // We need to track the dropped bits differently since
392 // more can be dropped by denormalizing.
393 bool topOne = bits(extra, 12);
394 bool restZeros = bits(extra, 11, 0) == 0;
395
396 if (exponent <= (127 - 15)) {
397 // The result is too small. Denormalize.
398 mantissa |= (1 << 10);
399 while (mantissa && exponent <= (127 - 15)) {
400 restZeros = restZeros && !topOne;
401 topOne = bits(mantissa, 0);
402 mantissa = mantissa >> 1;
403 exponent++;
404 }
405 if (topOne || !restZeros)
406 inexact = true;
407 exponent = 0;
408 } else {
409 // Change bias.
410 exponent -= (127 - 15);
411 }
412
413 if (exponent == 0 && (inexact || fpscr.ufe)) {
414 // Underflow
415 fpscr.ufc = 1;
416 }
417
418 // Handle rounding.

392 if ((mode == VfpRoundUpward && !neg && extra) ||
393 (mode == VfpRoundDown && neg && extra) ||
394 (mode == VfpRoundNearest &&
395 (extra > (1 << 9) ||
396 (extra == (1 << 9) && bits(mantissa, 0))))) {
397 mantissa++;
398 }
399
400 // See if the number became normalized after rounding.
401 if (mantissa == (1 << 10)) {
402 mantissa = 0;
403 exponent = 1;
404 }
405 } else {
406 // Normalized.
407
408 // We need to track the dropped bits differently since
409 // more can be dropped by denormalizing.
410 bool topOne = bits(extra, 12);
411 bool restZeros = bits(extra, 11, 0) == 0;
412
413 if (exponent <= (127 - 15)) {
414 // The result is too small. Denormalize.
415 mantissa |= (1 << 10);
416 while (mantissa && exponent <= (127 - 15)) {
417 restZeros = restZeros && !topOne;
418 topOne = bits(mantissa, 0);
419 mantissa = mantissa >> 1;
420 exponent++;
421 }
422 if (topOne || !restZeros)
423 inexact = true;
424 exponent = 0;
425 } else {
426 // Change bias.
427 exponent -= (127 - 15);
428 }
429
430 if (exponent == 0 && (inexact || fpscr.ufe)) {
431 // Underflow
432 fpscr.ufc = 1;
433 }
434
435 // Handle rounding.

419 unsigned mode = fpscr.rMode;

436 unsigned mode = rMode;

420 bool nonZero = topOne || !restZeros;
421 if ((mode == VfpRoundUpward && !neg && nonZero) ||
422 (mode == VfpRoundDown && neg && nonZero) ||
423 (mode == VfpRoundNearest && topOne &&
424 (!restZeros || bits(mantissa, 0)))) {
425 mantissa++;
426 }
427
428 // See if we rounded up and need to bump the exponent.
429 if (mantissa == (1 << 10)) {
430 mantissa = 0;
431 exponent++;
432 }
433
434 // Deal with overflow

437 bool nonZero = topOne || !restZeros;
438 if ((mode == VfpRoundUpward && !neg && nonZero) ||
439 (mode == VfpRoundDown && neg && nonZero) ||
440 (mode == VfpRoundNearest && topOne &&
441 (!restZeros || bits(mantissa, 0)))) {
442 mantissa++;
443 }
444
445 // See if we rounded up and need to bump the exponent.
446 if (mantissa == (1 << 10)) {
447 mantissa = 0;
448 exponent++;
449 }
450
451 // Deal with overflow

435 if (fpscr.ahp) {

452 if (ahp) {

436 if (exponent >= 0x20) {
437 exponent = 0x1f;
438 mantissa = 0x3ff;
439 fpscr.ioc = 1;
440 // Supress inexact exception.
441 inexact = false;
442 }
443 } else {
444 if (exponent >= 0x1f) {
445 if ((mode == VfpRoundNearest) ||
446 (mode == VfpRoundUpward && !neg) ||
447 (mode == VfpRoundDown && neg)) {
448 // Overflow to infinity.
449 exponent = 0x1f;
450 mantissa = 0;
451 } else {
452 // Overflow to max normal.
453 exponent = 0x1e;
454 mantissa = 0x3ff;
455 }
456 fpscr.ofc = 1;
457 inexact = true;
458 }
459 }
460 }
461
462 if (inexact) {
463 fpscr.ixc = 1;
464 }
465 }
466 // Reassemble and install the result.
467 uint32_t result = bits(mantissa, 9, 0);
468 replaceBits(result, 14, 10, exponent);
469 if (neg)
470 result |= (1 << 15);

453 if (exponent >= 0x20) {
454 exponent = 0x1f;
455 mantissa = 0x3ff;
456 fpscr.ioc = 1;
457 // Supress inexact exception.
458 inexact = false;
459 }
460 } else {
461 if (exponent >= 0x1f) {
462 if ((mode == VfpRoundNearest) ||
463 (mode == VfpRoundUpward && !neg) ||
464 (mode == VfpRoundDown && neg)) {
465 // Overflow to infinity.
466 exponent = 0x1f;
467 mantissa = 0;
468 } else {
469 // Overflow to max normal.
470 exponent = 0x1e;
471 mantissa = 0x3ff;
472 }
473 fpscr.ofc = 1;
474 inexact = true;
475 }
476 }
477 }
478
479 if (inexact) {
480 fpscr.ixc = 1;
481 }
482 }
483 // Reassemble and install the result.
484 uint32_t result = bits(mantissa, 9, 0);
485 replaceBits(result, 14, 10, exponent);
486 if (neg)
487 result |= (1 << 15);

471 if (top)
472 replaceBits(destBits, 31, 16, result);
473 else
474 replaceBits(destBits, 15, 0, result);
475 return bitsToFp(destBits, junk);

488 return result;

476}
477
478float

489}
490
491float

479vcvtFpHFpS(FPSCR &fpscr, float op, bool top)

492vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)

480{
481 float junk = 0.0;

493{
494 float junk = 0.0;

482 uint32_t opBits = fpToBits(op);
483 // Extract the operand.
484 if (top)
485 opBits = bits(opBits, 31, 16);
486 else
487 opBits = bits(opBits, 15, 0);

488 // Extract the bitfields.

495 // Extract the bitfields.

489 bool neg = bits(opBits, 15);
490 uint32_t exponent = bits(opBits, 14, 10);
491 uint32_t mantissa = bits(opBits, 9, 0);

496 bool neg = bits(op, 15);
497 uint32_t exponent = bits(op, 14, 10);
498 uint32_t mantissa = bits(op, 9, 0);

492 // Do the conversion.
493 if (exponent == 0) {
494 if (mantissa != 0) {
495 // Normalize the value.
496 exponent = exponent + (127 - 15) + 1;
497 while (mantissa < (1 << 10)) {
498 mantissa = mantissa << 1;
499 exponent--;
500 }
501 }
502 mantissa = mantissa << (23 - 10);

499 // Do the conversion.
500 if (exponent == 0) {
501 if (mantissa != 0) {
502 // Normalize the value.
503 exponent = exponent + (127 - 15) + 1;
504 while (mantissa < (1 << 10)) {
505 mantissa = mantissa << 1;
506 exponent--;
507 }
508 }
509 mantissa = mantissa << (23 - 10);

503 } else if (exponent == 0x1f && !fpscr.ahp) {

510 } else if (exponent == 0x1f && !ahp) {

504 // Infinities and nans.
505 exponent = 0xff;
506 if (mantissa != 0) {
507 // Nans.
508 mantissa = mantissa << (23 - 10);
509 if (bits(mantissa, 22) == 0) {
510 // Signalling nan.
511 fpscr.ioc = 1;
512 mantissa |= (1 << 22);
513 }

511 // Infinities and nans.
512 exponent = 0xff;
513 if (mantissa != 0) {
514 // Nans.
515 mantissa = mantissa << (23 - 10);
516 if (bits(mantissa, 22) == 0) {
517 // Signalling nan.
518 fpscr.ioc = 1;
519 mantissa |= (1 << 22);
520 }

514 if (fpscr.dn) {

521 if (defaultNan) {

515 mantissa &= ~mask(22);
516 neg = false;
517 }
518 }
519 } else {
520 exponent = exponent + (127 - 15);
521 mantissa = mantissa << (23 - 10);
522 }
523 // Reassemble the result.
524 uint32_t result = bits(mantissa, 22, 0);
525 replaceBits(result, 30, 23, exponent);
526 if (neg)
527 result |= (1 << 31);
528 return bitsToFp(result, junk);
529}
530
531uint64_t
532vfpFpSToFixed(float val, bool isSigned, bool half,
533 uint8_t imm, bool rzero)
534{
535 int rmode = rzero ? FeRoundZero : fegetround();
536 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
537 fesetround(FeRoundNearest);
538 val = val * powf(2.0, imm);
539 __asm__ __volatile__("" : "=m" (val) : "m" (val));
540 fesetround(rmode);
541 feclearexcept(FeAllExceptions);
542 __asm__ __volatile__("" : "=m" (val) : "m" (val));
543 float origVal = val;
544 val = rintf(val);
545 int fpType = std::fpclassify(val);
546 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
547 if (fpType == FP_NAN) {
548 feraiseexcept(FeInvalid);
549 }
550 val = 0.0;
551 } else if (origVal != val) {
552 switch (rmode) {
553 case FeRoundNearest:
554 if (origVal - val > 0.5)
555 val += 1.0;
556 else if (val - origVal > 0.5)
557 val -= 1.0;
558 break;
559 case FeRoundDown:
560 if (origVal < val)
561 val -= 1.0;
562 break;
563 case FeRoundUpward:
564 if (origVal > val)
565 val += 1.0;
566 break;
567 }
568 feraiseexcept(FeInexact);
569 }
570
571 if (isSigned) {
572 if (half) {
573 if ((double)val < (int16_t)(1 << 15)) {
574 feraiseexcept(FeInvalid);
575 feclearexcept(FeInexact);
576 return (int16_t)(1 << 15);
577 }
578 if ((double)val > (int16_t)mask(15)) {
579 feraiseexcept(FeInvalid);
580 feclearexcept(FeInexact);
581 return (int16_t)mask(15);
582 }
583 return (int16_t)val;
584 } else {
585 if ((double)val < (int32_t)(1 << 31)) {
586 feraiseexcept(FeInvalid);
587 feclearexcept(FeInexact);
588 return (int32_t)(1 << 31);
589 }
590 if ((double)val > (int32_t)mask(31)) {
591 feraiseexcept(FeInvalid);
592 feclearexcept(FeInexact);
593 return (int32_t)mask(31);
594 }
595 return (int32_t)val;
596 }
597 } else {
598 if (half) {
599 if ((double)val < 0) {
600 feraiseexcept(FeInvalid);
601 feclearexcept(FeInexact);
602 return 0;
603 }
604 if ((double)val > (mask(16))) {
605 feraiseexcept(FeInvalid);
606 feclearexcept(FeInexact);
607 return mask(16);
608 }
609 return (uint16_t)val;
610 } else {
611 if ((double)val < 0) {
612 feraiseexcept(FeInvalid);
613 feclearexcept(FeInexact);
614 return 0;
615 }
616 if ((double)val > (mask(32))) {
617 feraiseexcept(FeInvalid);
618 feclearexcept(FeInexact);
619 return mask(32);
620 }
621 return (uint32_t)val;
622 }
623 }
624}
625
626float

522 mantissa &= ~mask(22);
523 neg = false;
524 }
525 }
526 } else {
527 exponent = exponent + (127 - 15);
528 mantissa = mantissa << (23 - 10);
529 }
530 // Reassemble the result.
531 uint32_t result = bits(mantissa, 22, 0);
532 replaceBits(result, 30, 23, exponent);
533 if (neg)
534 result |= (1 << 31);
535 return bitsToFp(result, junk);
536}
537
538uint64_t
539vfpFpSToFixed(float val, bool isSigned, bool half,
540 uint8_t imm, bool rzero)
541{
542 int rmode = rzero ? FeRoundZero : fegetround();
543 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
544 fesetround(FeRoundNearest);
545 val = val * powf(2.0, imm);
546 __asm__ __volatile__("" : "=m" (val) : "m" (val));
547 fesetround(rmode);
548 feclearexcept(FeAllExceptions);
549 __asm__ __volatile__("" : "=m" (val) : "m" (val));
550 float origVal = val;
551 val = rintf(val);
552 int fpType = std::fpclassify(val);
553 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
554 if (fpType == FP_NAN) {
555 feraiseexcept(FeInvalid);
556 }
557 val = 0.0;
558 } else if (origVal != val) {
559 switch (rmode) {
560 case FeRoundNearest:
561 if (origVal - val > 0.5)
562 val += 1.0;
563 else if (val - origVal > 0.5)
564 val -= 1.0;
565 break;
566 case FeRoundDown:
567 if (origVal < val)
568 val -= 1.0;
569 break;
570 case FeRoundUpward:
571 if (origVal > val)
572 val += 1.0;
573 break;
574 }
575 feraiseexcept(FeInexact);
576 }
577
578 if (isSigned) {
579 if (half) {
580 if ((double)val < (int16_t)(1 << 15)) {
581 feraiseexcept(FeInvalid);
582 feclearexcept(FeInexact);
583 return (int16_t)(1 << 15);
584 }
585 if ((double)val > (int16_t)mask(15)) {
586 feraiseexcept(FeInvalid);
587 feclearexcept(FeInexact);
588 return (int16_t)mask(15);
589 }
590 return (int16_t)val;
591 } else {
592 if ((double)val < (int32_t)(1 << 31)) {
593 feraiseexcept(FeInvalid);
594 feclearexcept(FeInexact);
595 return (int32_t)(1 << 31);
596 }
597 if ((double)val > (int32_t)mask(31)) {
598 feraiseexcept(FeInvalid);
599 feclearexcept(FeInexact);
600 return (int32_t)mask(31);
601 }
602 return (int32_t)val;
603 }
604 } else {
605 if (half) {
606 if ((double)val < 0) {
607 feraiseexcept(FeInvalid);
608 feclearexcept(FeInexact);
609 return 0;
610 }
611 if ((double)val > (mask(16))) {
612 feraiseexcept(FeInvalid);
613 feclearexcept(FeInexact);
614 return mask(16);
615 }
616 return (uint16_t)val;
617 } else {
618 if ((double)val < 0) {
619 feraiseexcept(FeInvalid);
620 feclearexcept(FeInexact);
621 return 0;
622 }
623 if ((double)val > (mask(32))) {
624 feraiseexcept(FeInvalid);
625 feclearexcept(FeInexact);
626 return mask(32);
627 }
628 return (uint32_t)val;
629 }
630 }
631}
632
633float

627vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)

634vfpUFixedToFpS(bool flush, bool defaultNan,
635 uint32_t val, bool half, uint8_t imm)

628{
629 fesetround(FeRoundNearest);
630 if (half)
631 val = (uint16_t)val;
632 float scale = powf(2.0, imm);
633 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
634 feclearexcept(FeAllExceptions);
635 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

636{
637 fesetround(FeRoundNearest);
638 if (half)
639 val = (uint16_t)val;
640 float scale = powf(2.0, imm);
641 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
642 feclearexcept(FeAllExceptions);
643 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

636 return fixDivDest(fpscr, val / scale, (float)val, scale);

644 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);

637}
638
639float

645}
646
647float

640vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)

648vfpSFixedToFpS(bool flush, bool defaultNan,
649 int32_t val, bool half, uint8_t imm)

641{
642 fesetround(FeRoundNearest);
643 if (half)
644 val = sext<16>(val & mask(16));
645 float scale = powf(2.0, imm);
646 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
647 feclearexcept(FeAllExceptions);
648 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

650{
651 fesetround(FeRoundNearest);
652 if (half)
653 val = sext<16>(val & mask(16));
654 float scale = powf(2.0, imm);
655 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
656 feclearexcept(FeAllExceptions);
657 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

649 return fixDivDest(fpscr, val / scale, (float)val, scale);

658 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);

650}
651
652uint64_t
653vfpFpDToFixed(double val, bool isSigned, bool half,
654 uint8_t imm, bool rzero)
655{
656 int rmode = rzero ? FeRoundZero : fegetround();
657 fesetround(FeRoundNearest);
658 val = val * pow(2.0, imm);
659 __asm__ __volatile__("" : "=m" (val) : "m" (val));
660 fesetround(rmode);
661 feclearexcept(FeAllExceptions);
662 __asm__ __volatile__("" : "=m" (val) : "m" (val));
663 double origVal = val;
664 val = rint(val);
665 int fpType = std::fpclassify(val);
666 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
667 if (fpType == FP_NAN) {
668 feraiseexcept(FeInvalid);
669 }
670 val = 0.0;
671 } else if (origVal != val) {
672 switch (rmode) {
673 case FeRoundNearest:
674 if (origVal - val > 0.5)
675 val += 1.0;
676 else if (val - origVal > 0.5)
677 val -= 1.0;
678 break;
679 case FeRoundDown:
680 if (origVal < val)
681 val -= 1.0;
682 break;
683 case FeRoundUpward:
684 if (origVal > val)
685 val += 1.0;
686 break;
687 }
688 feraiseexcept(FeInexact);
689 }
690 if (isSigned) {
691 if (half) {
692 if (val < (int16_t)(1 << 15)) {
693 feraiseexcept(FeInvalid);
694 feclearexcept(FeInexact);
695 return (int16_t)(1 << 15);
696 }
697 if (val > (int16_t)mask(15)) {
698 feraiseexcept(FeInvalid);
699 feclearexcept(FeInexact);
700 return (int16_t)mask(15);
701 }
702 return (int16_t)val;
703 } else {
704 if (val < (int32_t)(1 << 31)) {
705 feraiseexcept(FeInvalid);
706 feclearexcept(FeInexact);
707 return (int32_t)(1 << 31);
708 }
709 if (val > (int32_t)mask(31)) {
710 feraiseexcept(FeInvalid);
711 feclearexcept(FeInexact);
712 return (int32_t)mask(31);
713 }
714 return (int32_t)val;
715 }
716 } else {
717 if (half) {
718 if (val < 0) {
719 feraiseexcept(FeInvalid);
720 feclearexcept(FeInexact);
721 return 0;
722 }
723 if (val > mask(16)) {
724 feraiseexcept(FeInvalid);
725 feclearexcept(FeInexact);
726 return mask(16);
727 }
728 return (uint16_t)val;
729 } else {
730 if (val < 0) {
731 feraiseexcept(FeInvalid);
732 feclearexcept(FeInexact);
733 return 0;
734 }
735 if (val > mask(32)) {
736 feraiseexcept(FeInvalid);
737 feclearexcept(FeInexact);
738 return mask(32);
739 }
740 return (uint32_t)val;
741 }
742 }
743}
744
745double

659}
660
661uint64_t
662vfpFpDToFixed(double val, bool isSigned, bool half,
663 uint8_t imm, bool rzero)
664{
665 int rmode = rzero ? FeRoundZero : fegetround();
666 fesetround(FeRoundNearest);
667 val = val * pow(2.0, imm);
668 __asm__ __volatile__("" : "=m" (val) : "m" (val));
669 fesetround(rmode);
670 feclearexcept(FeAllExceptions);
671 __asm__ __volatile__("" : "=m" (val) : "m" (val));
672 double origVal = val;
673 val = rint(val);
674 int fpType = std::fpclassify(val);
675 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
676 if (fpType == FP_NAN) {
677 feraiseexcept(FeInvalid);
678 }
679 val = 0.0;
680 } else if (origVal != val) {
681 switch (rmode) {
682 case FeRoundNearest:
683 if (origVal - val > 0.5)
684 val += 1.0;
685 else if (val - origVal > 0.5)
686 val -= 1.0;
687 break;
688 case FeRoundDown:
689 if (origVal < val)
690 val -= 1.0;
691 break;
692 case FeRoundUpward:
693 if (origVal > val)
694 val += 1.0;
695 break;
696 }
697 feraiseexcept(FeInexact);
698 }
699 if (isSigned) {
700 if (half) {
701 if (val < (int16_t)(1 << 15)) {
702 feraiseexcept(FeInvalid);
703 feclearexcept(FeInexact);
704 return (int16_t)(1 << 15);
705 }
706 if (val > (int16_t)mask(15)) {
707 feraiseexcept(FeInvalid);
708 feclearexcept(FeInexact);
709 return (int16_t)mask(15);
710 }
711 return (int16_t)val;
712 } else {
713 if (val < (int32_t)(1 << 31)) {
714 feraiseexcept(FeInvalid);
715 feclearexcept(FeInexact);
716 return (int32_t)(1 << 31);
717 }
718 if (val > (int32_t)mask(31)) {
719 feraiseexcept(FeInvalid);
720 feclearexcept(FeInexact);
721 return (int32_t)mask(31);
722 }
723 return (int32_t)val;
724 }
725 } else {
726 if (half) {
727 if (val < 0) {
728 feraiseexcept(FeInvalid);
729 feclearexcept(FeInexact);
730 return 0;
731 }
732 if (val > mask(16)) {
733 feraiseexcept(FeInvalid);
734 feclearexcept(FeInexact);
735 return mask(16);
736 }
737 return (uint16_t)val;
738 } else {
739 if (val < 0) {
740 feraiseexcept(FeInvalid);
741 feclearexcept(FeInexact);
742 return 0;
743 }
744 if (val > mask(32)) {
745 feraiseexcept(FeInvalid);
746 feclearexcept(FeInexact);
747 return mask(32);
748 }
749 return (uint32_t)val;
750 }
751 }
752}
753
754double

746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)

755vfpUFixedToFpD(bool flush, bool defaultNan,
756 uint32_t val, bool half, uint8_t imm)

747{
748 fesetround(FeRoundNearest);
749 if (half)
750 val = (uint16_t)val;
751 double scale = pow(2.0, imm);
752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
753 feclearexcept(FeAllExceptions);
754 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

757{
758 fesetround(FeRoundNearest);
759 if (half)
760 val = (uint16_t)val;
761 double scale = pow(2.0, imm);
762 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
763 feclearexcept(FeAllExceptions);
764 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

755 return fixDivDest(fpscr, val / scale, (double)val, scale);

765 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);

756}
757
758double

766}
767
768double

759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)

769vfpSFixedToFpD(bool flush, bool defaultNan,
770 int32_t val, bool half, uint8_t imm)

760{
761 fesetround(FeRoundNearest);
762 if (half)
763 val = sext<16>(val & mask(16));
764 double scale = pow(2.0, imm);
765 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
766 feclearexcept(FeAllExceptions);
767 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

771{
772 fesetround(FeRoundNearest);
773 if (half)
774 val = sext<16>(val & mask(16));
775 double scale = pow(2.0, imm);
776 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
777 feclearexcept(FeAllExceptions);
778 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

768 return fixDivDest(fpscr, val / scale, (double)val, scale);

779 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);

769}
770

780}
781

782// This function implements a magic formula taken from the architecture
783// reference manual. It was originally called recip_sqrt_estimate.
784static double
785recipSqrtEstimate(double a)
786{
787 int64_t q0, q1, s;
788 double r;
789 if (a < 0.5) {
790 q0 = (int64_t)(a * 512.0);
791 r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0);
792 } else {
793 q1 = (int64_t)(a * 256.0);
794 r = 1.0 / sqrt(((double)q1 + 0.5) / 256.0);
795 }
796 s = (int64_t)(256.0 * r + 0.5);
797 return (double)s / 256.0;
798}
799
800// This function is only intended for use in Neon instructions because
801// it ignores certain bits in the FPSCR.
802float
803fprSqrtEstimate(FPSCR &fpscr, float op)
804{
805 const uint32_t qnan = 0x7fc00000;
806 float junk = 0.0;
807 int fpClass = std::fpclassify(op);
808 if (fpClass == FP_NAN) {
809 if ((fpToBits(op) & qnan) != qnan)
810 fpscr.ioc = 1;
811 return bitsToFp(qnan, junk);
812 } else if (fpClass == FP_ZERO) {
813 fpscr.dzc = 1;
814 // Return infinity with the same sign as the operand.
815 return bitsToFp((std::signbit(op) << 31) |
816 (0xFF << 23) | (0 << 0), junk);
817 } else if (std::signbit(op)) {
818 // Set invalid op bit.
819 fpscr.ioc = 1;
820 return bitsToFp(qnan, junk);
821 } else if (fpClass == FP_INFINITE) {
822 return 0.0;
823 } else {
824 uint64_t opBits = fpToBits(op);
825 double scaled;
826 if (bits(opBits, 23)) {
827 scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
828 (ULL(0x3fd) << 52) | (bits(opBits, 31) << 63),
829 (double)0.0);
830 } else {
831 scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
832 (ULL(0x3fe) << 52) | (bits(opBits, 31) << 63),
833 (double)0.0);
834 }
835 uint64_t resultExp = (380 - bits(opBits, 30, 23)) / 2;
836
837 uint64_t estimate = fpToBits(recipSqrtEstimate(scaled));
838
839 return bitsToFp((bits(estimate, 63) << 31) |
840 (bits(resultExp, 7, 0) << 23) |
841 (bits(estimate, 51, 29) << 0), junk);
842 }
843}
844
845uint32_t
846unsignedRSqrtEstimate(uint32_t op)
847{
848 if (bits(op, 31, 30) == 0) {
849 return -1;
850 } else {
851 double dpOp;
852 if (bits(op, 31)) {
853 dpOp = bitsToFp((ULL(0) << 63) |
854 (ULL(0x3fe) << 52) |
855 (bits((uint64_t)op, 30, 0) << 21) |
856 (0 << 0), (double)0.0);
857 } else {
858 dpOp = bitsToFp((ULL(0) << 63) |
859 (ULL(0x3fd) << 52) |
860 (bits((uint64_t)op, 29, 0) << 22) |
861 (0 << 0), (double)0.0);
862 }
863 uint64_t estimate = fpToBits(recipSqrtEstimate(dpOp));
864 return (1 << 31) | bits(estimate, 51, 21);
865 }
866}
867
868// This function implements a magic formula taken from the architecture
869// reference manual. It was originally called recip_estimate.
870
871static double
872recipEstimate(double a)
873{
874 int64_t q, s;
875 double r;
876 q = (int64_t)(a * 512.0);
877 r = 1.0 / (((double)q + 0.5) / 512.0);
878 s = (int64_t)(256.0 * r + 0.5);
879 return (double)s / 256.0;
880}
881
882// This function is only intended for use in Neon instructions because
883// it ignores certain bits in the FPSCR.
884float
885fpRecipEstimate(FPSCR &fpscr, float op)
886{
887 const uint32_t qnan = 0x7fc00000;
888 float junk = 0.0;
889 int fpClass = std::fpclassify(op);
890 if (fpClass == FP_NAN) {
891 if ((fpToBits(op) & qnan) != qnan)
892 fpscr.ioc = 1;
893 return bitsToFp(qnan, junk);
894 } else if (fpClass == FP_INFINITE) {
895 return bitsToFp(std::signbit(op) << 31, junk);
896 } else if (fpClass == FP_ZERO) {
897 fpscr.dzc = 1;
898 // Return infinity with the same sign as the operand.
899 return bitsToFp((std::signbit(op) << 31) |
900 (0xFF << 23) | (0 << 0), junk);
901 } else if (fabs(op) >= pow(2.0, 126)) {
902 fpscr.ufc = 1;
903 return bitsToFp(std::signbit(op) << 31, junk);
904 } else {
905 uint64_t opBits = fpToBits(op);
906 double scaled;
907 scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
908 (ULL(0x3fe) << 52) | (ULL(0) << 63),
909 (double)0.0);
910 uint64_t resultExp = 253 - bits(opBits, 30, 23);
911
912 uint64_t estimate = fpToBits(recipEstimate(scaled));
913
914 return bitsToFp((bits(opBits, 31) << 31) |
915 (bits(resultExp, 7, 0) << 23) |
916 (bits(estimate, 51, 29) << 0), junk);
917 }
918}
919
920uint32_t
921unsignedRecipEstimate(uint32_t op)
922{
923 if (bits(op, 31) == 0) {
924 return -1;
925 } else {
926 double dpOp;
927 dpOp = bitsToFp((ULL(0) << 63) |
928 (ULL(0x3fe) << 52) |
929 (bits((uint64_t)op, 30, 0) << 21) |
930 (0 << 0), (double)0.0);
931 uint64_t estimate = fpToBits(recipEstimate(dpOp));
932 return (1 << 31) | bits(estimate, 51, 21);
933 }
934}
935

771template <class fpType>
772fpType

936template <class fpType>
937fpType

938FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
939 fpType op1, fpType op2) const
940{
941 done = true;
942 fpType junk = 0.0;
943 fpType dest = 0.0;
944 const bool single = (sizeof(fpType) == sizeof(float));
945 const uint64_t qnan =
946 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
947 const bool nan1 = std::isnan(op1);
948 const bool nan2 = std::isnan(op2);
949 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
950 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
951 if (nan1 || nan2) {
952 if (defaultNan) {
953 dest = bitsToFp(qnan, junk);
954 } else if (signal1) {
955 dest = bitsToFp(fpToBits(op1) | qnan, junk);
956 } else if (signal2) {
957 dest = bitsToFp(fpToBits(op2) | qnan, junk);
958 } else if (nan1) {
959 dest = op1;
960 } else if (nan2) {
961 dest = op2;
962 }
963 if (signal1 || signal2) {
964 fpscr.ioc = 1;
965 }
966 } else {
967 done = false;
968 }
969 return dest;
970}
971
972template
973float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
974 float op1, float op2) const;
975template
976double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
977 double op1, double op2) const;
978
979template <class fpType>
980fpType

773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
774 fpType (*func)(fpType, fpType),

981FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
982 fpType (*func)(fpType, fpType),

775 bool flush, uint32_t rMode) const

983 bool flush, bool defaultNan, uint32_t rMode) const

776{
777 const bool single = (sizeof(fpType) == sizeof(float));
778 fpType junk = 0.0;
779
780 if (flush && flushToZero(op1, op2))
781 fpscr.idc = 1;
782 VfpSavedState state = prepFpState(rMode);
783 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
784 : "m" (op1), "m" (op2), "m" (state));
785 fpType dest = func(op1, op2);
786 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
787
788 int fpClass = std::fpclassify(dest);
789 // Get NAN behavior right. This varies between x86 and ARM.
790 if (fpClass == FP_NAN) {
791 const bool single = (sizeof(fpType) == sizeof(float));
792 const uint64_t qnan =
793 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
794 const bool nan1 = std::isnan(op1);
795 const bool nan2 = std::isnan(op2);
796 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
797 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);

984{
985 const bool single = (sizeof(fpType) == sizeof(float));
986 fpType junk = 0.0;
987
988 if (flush && flushToZero(op1, op2))
989 fpscr.idc = 1;
990 VfpSavedState state = prepFpState(rMode);
991 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
992 : "m" (op1), "m" (op2), "m" (state));
993 fpType dest = func(op1, op2);
994 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
995
996 int fpClass = std::fpclassify(dest);
997 // Get NAN behavior right. This varies between x86 and ARM.
998 if (fpClass == FP_NAN) {
999 const bool single = (sizeof(fpType) == sizeof(float));
1000 const uint64_t qnan =
1001 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
1002 const bool nan1 = std::isnan(op1);
1003 const bool nan2 = std::isnan(op2);
1004 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
1005 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);

798 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {

1006 if ((!nan1 && !nan2) || (defaultNan == 1)) {

799 dest = bitsToFp(qnan, junk);
800 } else if (signal1) {
801 dest = bitsToFp(fpToBits(op1) | qnan, junk);
802 } else if (signal2) {
803 dest = bitsToFp(fpToBits(op2) | qnan, junk);
804 } else if (nan1) {
805 dest = op1;
806 } else if (nan2) {
807 dest = op2;
808 }
809 } else if (flush && flushToZero(dest)) {
810 feraiseexcept(FeUnderflow);
811 } else if ((
812 (single && (dest == bitsToFp(0x00800000, junk) ||
813 dest == bitsToFp(0x80800000, junk))) ||
814 (!single &&
815 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
816 dest == bitsToFp(ULL(0x8010000000000000), junk)))
817 ) && rMode != VfpRoundZero) {
818 /*
819 * Correct for the fact that underflow is detected -before- rounding
820 * in ARM and -after- rounding in x86.
821 */
822 fesetround(FeRoundZero);
823 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
824 : "m" (op1), "m" (op2));
825 fpType temp = func(op1, op2);
826 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
827 if (flush && flushToZero(temp)) {
828 dest = temp;
829 }
830 }

1007 dest = bitsToFp(qnan, junk);
1008 } else if (signal1) {
1009 dest = bitsToFp(fpToBits(op1) | qnan, junk);
1010 } else if (signal2) {
1011 dest = bitsToFp(fpToBits(op2) | qnan, junk);
1012 } else if (nan1) {
1013 dest = op1;
1014 } else if (nan2) {
1015 dest = op2;
1016 }
1017 } else if (flush && flushToZero(dest)) {
1018 feraiseexcept(FeUnderflow);
1019 } else if ((
1020 (single && (dest == bitsToFp(0x00800000, junk) ||
1021 dest == bitsToFp(0x80800000, junk))) ||
1022 (!single &&
1023 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
1024 dest == bitsToFp(ULL(0x8010000000000000), junk)))
1025 ) && rMode != VfpRoundZero) {
1026 /*
1027 * Correct for the fact that underflow is detected -before- rounding
1028 * in ARM and -after- rounding in x86.
1029 */
1030 fesetround(FeRoundZero);
1031 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
1032 : "m" (op1), "m" (op2));
1033 fpType temp = func(op1, op2);
1034 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1035 if (flush && flushToZero(temp)) {
1036 dest = temp;
1037 }
1038 }

831 finishVfp(fpscr, state);

1039 finishVfp(fpscr, state, flush);

832 return dest;
833}
834
835template
836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
837 float (*func)(float, float),

1040 return dest;
1041}
1042
1043template
1044float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
1045 float (*func)(float, float),

838 bool flush, uint32_t rMode) const;

1046 bool flush, bool defaultNan, uint32_t rMode) const;

839template
840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
841 double (*func)(double, double),

1047template
1048double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
1049 double (*func)(double, double),

842 bool flush, uint32_t rMode) const;

1050 bool flush, bool defaultNan, uint32_t rMode) const;

843
844template <class fpType>
845fpType
846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
847 bool flush, uint32_t rMode) const
848{
849 const bool single = (sizeof(fpType) == sizeof(float));
850 fpType junk = 0.0;
851
852 if (flush && flushToZero(op1))
853 fpscr.idc = 1;
854 VfpSavedState state = prepFpState(rMode);
855 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
856 : "m" (op1), "m" (state));
857 fpType dest = func(op1);
858 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
859
860 int fpClass = std::fpclassify(dest);
861 // Get NAN behavior right. This varies between x86 and ARM.
862 if (fpClass == FP_NAN) {
863 const bool single = (sizeof(fpType) == sizeof(float));
864 const uint64_t qnan =
865 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
866 const bool nan = std::isnan(op1);
867 if (!nan || fpscr.dn == 1) {
868 dest = bitsToFp(qnan, junk);
869 } else if (nan) {
870 dest = bitsToFp(fpToBits(op1) | qnan, junk);
871 }
872 } else if (flush && flushToZero(dest)) {
873 feraiseexcept(FeUnderflow);
874 } else if ((
875 (single && (dest == bitsToFp(0x00800000, junk) ||
876 dest == bitsToFp(0x80800000, junk))) ||
877 (!single &&
878 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
879 dest == bitsToFp(ULL(0x8010000000000000), junk)))
880 ) && rMode != VfpRoundZero) {
881 /*
882 * Correct for the fact that underflow is detected -before- rounding
883 * in ARM and -after- rounding in x86.
884 */
885 fesetround(FeRoundZero);
886 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
887 fpType temp = func(op1);
888 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
889 if (flush && flushToZero(temp)) {
890 dest = temp;
891 }
892 }

1051
1052template <class fpType>
1053fpType
1054FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
1055 bool flush, uint32_t rMode) const
1056{
1057 const bool single = (sizeof(fpType) == sizeof(float));
1058 fpType junk = 0.0;
1059
1060 if (flush && flushToZero(op1))
1061 fpscr.idc = 1;
1062 VfpSavedState state = prepFpState(rMode);
1063 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
1064 : "m" (op1), "m" (state));
1065 fpType dest = func(op1);
1066 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1067
1068 int fpClass = std::fpclassify(dest);
1069 // Get NAN behavior right. This varies between x86 and ARM.
1070 if (fpClass == FP_NAN) {
1071 const bool single = (sizeof(fpType) == sizeof(float));
1072 const uint64_t qnan =
1073 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
1074 const bool nan = std::isnan(op1);
1075 if (!nan || fpscr.dn == 1) {
1076 dest = bitsToFp(qnan, junk);
1077 } else if (nan) {
1078 dest = bitsToFp(fpToBits(op1) | qnan, junk);
1079 }
1080 } else if (flush && flushToZero(dest)) {
1081 feraiseexcept(FeUnderflow);
1082 } else if ((
1083 (single && (dest == bitsToFp(0x00800000, junk) ||
1084 dest == bitsToFp(0x80800000, junk))) ||
1085 (!single &&
1086 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
1087 dest == bitsToFp(ULL(0x8010000000000000), junk)))
1088 ) && rMode != VfpRoundZero) {
1089 /*
1090 * Correct for the fact that underflow is detected -before- rounding
1091 * in ARM and -after- rounding in x86.
1092 */
1093 fesetround(FeRoundZero);
1094 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
1095 fpType temp = func(op1);
1096 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1097 if (flush && flushToZero(temp)) {
1098 dest = temp;
1099 }
1100 }

893 finishVfp(fpscr, state);

1101 finishVfp(fpscr, state, flush);