ARM: Implement flush to zero mode for VFP, and clean up some corner cases.

This commit is contained in:
Gabe Black 2010-06-02 12:58:15 -05:00
parent efbceff96a
commit 8466999aef
2 changed files with 134 additions and 5 deletions

View file

@ -43,6 +43,7 @@
#include "arch/arm/insts/misc.hh" #include "arch/arm/insts/misc.hh"
#include "arch/arm/miscregs.hh" #include "arch/arm/miscregs.hh"
#include <fenv.h> #include <fenv.h>
#include <cmath>
enum VfpMicroMode { enum VfpMicroMode {
VfpNotAMicroop, VfpNotAMicroop,
@ -101,6 +102,26 @@ enum VfpRoundingMode
VfpRoundZero = 3 VfpRoundZero = 3
}; };
template <class fpType>
static inline void
vfpFlushToZero(uint32_t &_fpscr, fpType &op)
{
FPSCR fpscr = _fpscr;
if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
fpscr.idc = 1;
op = 0;
}
_fpscr = fpscr;
}
template <class fpType>
static inline void
vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
{
vfpFlushToZero(fpscr, op1);
vfpFlushToZero(fpscr, op2);
}
static inline uint64_t static inline uint64_t
vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
{ {
@ -108,24 +129,41 @@ vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
val = val * powf(2.0, imm); val = val * powf(2.0, imm);
__asm__ __volatile__("" : "=m" (val) : "m" (val)); __asm__ __volatile__("" : "=m" (val) : "m" (val));
feclearexcept(FeAllExceptions); feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (val) : "m" (val));
float origVal = val;
val = rintf(val);
int fpType = std::fpclassify(val);
if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
if (fpType == FP_NAN) {
feraiseexcept(FeInvalid);
}
val = 0.0;
} else if (origVal != val) {
feraiseexcept(FeInexact);
}
if (isSigned) { if (isSigned) {
if (half) { if (half) {
if ((double)val < (int16_t)(1 << 15)) { if ((double)val < (int16_t)(1 << 15)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return (int16_t)(1 << 15); return (int16_t)(1 << 15);
} }
if ((double)val > (int16_t)mask(15)) { if ((double)val > (int16_t)mask(15)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return (int16_t)mask(15); return (int16_t)mask(15);
} }
return (int16_t)val; return (int16_t)val;
} else { } else {
if ((double)val < (int32_t)(1 << 31)) { if ((double)val < (int32_t)(1 << 31)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return (int32_t)(1 << 31); return (int32_t)(1 << 31);
} }
if ((double)val > (int32_t)mask(31)) { if ((double)val > (int32_t)mask(31)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return (int32_t)mask(31); return (int32_t)mask(31);
} }
return (int32_t)val; return (int32_t)val;
@ -134,20 +172,24 @@ vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
if (half) { if (half) {
if ((double)val < 0) { if ((double)val < 0) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return 0; return 0;
} }
if ((double)val > (mask(16))) { if ((double)val > (mask(16))) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return mask(16); return mask(16);
} }
return (uint16_t)val; return (uint16_t)val;
} else { } else {
if ((double)val < 0) { if ((double)val < 0) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return 0; return 0;
} }
if ((double)val > (mask(32))) { if ((double)val > (mask(32))) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return mask(32); return mask(32);
} }
return (uint32_t)val; return (uint32_t)val;
@ -161,7 +203,11 @@ vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
fesetround(FeRoundNearest); fesetround(FeRoundNearest);
if (half) if (half)
val = (uint16_t)val; val = (uint16_t)val;
return val / powf(2.0, imm); float scale = powf(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
return val / scale;
} }
static inline float static inline float
@ -170,34 +216,55 @@ vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
fesetround(FeRoundNearest); fesetround(FeRoundNearest);
if (half) if (half)
val = sext<16>(val & mask(16)); val = sext<16>(val & mask(16));
return val / powf(2.0, imm); float scale = powf(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
return val / scale;
} }
static inline uint64_t static inline uint64_t
vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm) vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
{ {
fesetround(FeRoundZero); fesetround(FeRoundNearest);
val = val * pow(2.0, imm); val = val * pow(2.0, imm);
__asm__ __volatile__("" : "=m" (val) : "m" (val)); __asm__ __volatile__("" : "=m" (val) : "m" (val));
fesetround(FeRoundZero);
feclearexcept(FeAllExceptions); feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (val) : "m" (val));
double origVal = val;
val = rint(val);
int fpType = std::fpclassify(val);
if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
if (fpType == FP_NAN) {
feraiseexcept(FeInvalid);
}
val = 0.0;
} else if (origVal != val) {
feraiseexcept(FeInexact);
}
if (isSigned) { if (isSigned) {
if (half) { if (half) {
if (val < (int16_t)(1 << 15)) { if (val < (int16_t)(1 << 15)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return (int16_t)(1 << 15); return (int16_t)(1 << 15);
} }
if (val > (int16_t)mask(15)) { if (val > (int16_t)mask(15)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return (int16_t)mask(15); return (int16_t)mask(15);
} }
return (int16_t)val; return (int16_t)val;
} else { } else {
if (val < (int32_t)(1 << 31)) { if (val < (int32_t)(1 << 31)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return (int32_t)(1 << 31); return (int32_t)(1 << 31);
} }
if (val > (int32_t)mask(31)) { if (val > (int32_t)mask(31)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return (int32_t)mask(31); return (int32_t)mask(31);
} }
return (int32_t)val; return (int32_t)val;
@ -206,20 +273,24 @@ vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
if (half) { if (half) {
if (val < 0) { if (val < 0) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return 0; return 0;
} }
if (val > mask(16)) { if (val > mask(16)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return mask(16); return mask(16);
} }
return (uint16_t)val; return (uint16_t)val;
} else { } else {
if (val < 0) { if (val < 0) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return 0; return 0;
} }
if (val > mask(32)) { if (val > mask(32)) {
feraiseexcept(FeInvalid); feraiseexcept(FeInvalid);
feclearexcept(FeInexact);
return mask(32); return mask(32);
} }
return (uint32_t)val; return (uint32_t)val;
@ -233,7 +304,11 @@ vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
fesetround(FeRoundNearest); fesetround(FeRoundNearest);
if (half) if (half)
val = (uint16_t)val; val = (uint16_t)val;
return val / pow(2.0, imm); double scale = pow(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
return val / scale;
} }
static inline double static inline double
@ -242,7 +317,11 @@ vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
fesetround(FeRoundNearest); fesetround(FeRoundNearest);
if (half) if (half)
val = sext<16>(val & mask(16)); val = sext<16>(val & mask(16));
return val / pow(2.0, imm); double scale = pow(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
return val / scale;
} }
typedef int VfpSavedState; typedef int VfpSavedState;

View file

@ -383,6 +383,7 @@ let {{
exec_output = "" exec_output = ""
vmulSCode = ''' vmulSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = FpOp1 * FpOp2; FpDest = FpOp1 * FpOp2;
@ -403,6 +404,7 @@ let {{
IntDoubleUnion cOp1, cOp2, cDest; IntDoubleUnion cOp1, cOp2, cDest;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
cDest.fp = cOp1.fp * cOp2.fp; cDest.fp = cOp1.fp * cOp2.fp;
@ -471,6 +473,7 @@ let {{
exec_output += PredOpExecute.subst(vabsDIop); exec_output += PredOpExecute.subst(vabsDIop);
vaddSCode = ''' vaddSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = FpOp1 + FpOp2; FpDest = FpOp1 + FpOp2;
@ -488,6 +491,7 @@ let {{
IntDoubleUnion cOp1, cOp2, cDest; IntDoubleUnion cOp1, cOp2, cDest;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
cDest.fp = cOp1.fp + cOp2.fp; cDest.fp = cOp1.fp + cOp2.fp;
@ -504,6 +508,7 @@ let {{
exec_output += PredOpExecute.subst(vaddDIop); exec_output += PredOpExecute.subst(vaddDIop);
vsubSCode = ''' vsubSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = FpOp1 - FpOp2; FpDest = FpOp1 - FpOp2;
@ -521,6 +526,7 @@ let {{
IntDoubleUnion cOp1, cOp2, cDest; IntDoubleUnion cOp1, cOp2, cDest;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
cDest.fp = cOp1.fp - cOp2.fp; cDest.fp = cOp1.fp - cOp2.fp;
@ -537,6 +543,7 @@ let {{
exec_output += PredOpExecute.subst(vsubDIop); exec_output += PredOpExecute.subst(vsubDIop);
vdivSCode = ''' vdivSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = FpOp1 / FpOp2; FpDest = FpOp1 / FpOp2;
@ -554,6 +561,7 @@ let {{
IntDoubleUnion cOp1, cOp2, cDest; IntDoubleUnion cOp1, cOp2, cDest;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cDest.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cDest.fp));
cDest.fp = cOp1.fp / cOp2.fp; cDest.fp = cOp1.fp / cOp2.fp;
@ -570,6 +578,7 @@ let {{
exec_output += PredOpExecute.subst(vdivDIop); exec_output += PredOpExecute.subst(vdivDIop);
vsqrtSCode = ''' vsqrtSCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = sqrtf(FpOp1); FpDest = sqrtf(FpOp1);
@ -589,6 +598,7 @@ let {{
vsqrtDCode = ''' vsqrtDCode = '''
IntDoubleUnion cOp1, cDest; IntDoubleUnion cOp1, cDest;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cDest.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cDest.fp));
cDest.fp = sqrt(cOp1.fp); cDest.fp = sqrt(cOp1.fp);
@ -615,12 +625,14 @@ let {{
exec_output = "" exec_output = ""
vmlaSCode = ''' vmlaSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
float mid = FpOp1 * FpOp2; float mid = FpOp1 * FpOp2;
if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
mid = NAN; mid = NAN;
} }
vfpFlushToZero(Fpscr, FpDest, mid);
FpDest = FpDest + mid; FpDest = FpDest + mid;
__asm__ __volatile__("" :: "m" (FpDest)); __asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state); Fpscr = setVfpFpscr(Fpscr, state);
@ -637,6 +649,7 @@ let {{
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
double mid = cOp1.fp * cOp2.fp; double mid = cOp1.fp * cOp2.fp;
@ -644,6 +657,7 @@ let {{
(isinf(cOp2.fp) && cOp1.fp == 0)) { (isinf(cOp2.fp) && cOp1.fp == 0)) {
mid = NAN; mid = NAN;
} }
vfpFlushToZero(Fpscr, cDest.fp, mid);
cDest.fp = cDest.fp + mid; cDest.fp = cDest.fp + mid;
__asm__ __volatile__("" :: "m" (cDest.fp)); __asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state); Fpscr = setVfpFpscr(Fpscr, state);
@ -658,12 +672,14 @@ let {{
exec_output += PredOpExecute.subst(vmlaDIop); exec_output += PredOpExecute.subst(vmlaDIop);
vmlsSCode = ''' vmlsSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
float mid = FpOp1 * FpOp2; float mid = FpOp1 * FpOp2;
if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
mid = NAN; mid = NAN;
} }
vfpFlushToZero(Fpscr, FpDest, mid);
FpDest = FpDest - mid; FpDest = FpDest - mid;
__asm__ __volatile__("" :: "m" (FpDest)); __asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state); Fpscr = setVfpFpscr(Fpscr, state);
@ -680,6 +696,7 @@ let {{
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
double mid = cOp1.fp * cOp2.fp; double mid = cOp1.fp * cOp2.fp;
@ -688,6 +705,7 @@ let {{
mid = NAN; mid = NAN;
} }
cDest.fp = cDest.fp - mid; cDest.fp = cDest.fp - mid;
vfpFlushToZero(Fpscr, cDest.fp, mid);
__asm__ __volatile__("" :: "m" (cDest.fp)); __asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state); Fpscr = setVfpFpscr(Fpscr, state);
FpDestP0.uw = cDest.bits; FpDestP0.uw = cDest.bits;
@ -701,12 +719,14 @@ let {{
exec_output += PredOpExecute.subst(vmlsDIop); exec_output += PredOpExecute.subst(vmlsDIop);
vnmlaSCode = ''' vnmlaSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
float mid = FpOp1 * FpOp2; float mid = FpOp1 * FpOp2;
if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
mid = NAN; mid = NAN;
} }
vfpFlushToZero(Fpscr, FpDest, mid);
FpDest = -FpDest - mid; FpDest = -FpDest - mid;
__asm__ __volatile__("" :: "m" (FpDest)); __asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state); Fpscr = setVfpFpscr(Fpscr, state);
@ -723,6 +743,7 @@ let {{
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
double mid = cOp1.fp * cOp2.fp; double mid = cOp1.fp * cOp2.fp;
@ -730,6 +751,7 @@ let {{
(isinf(cOp2.fp) && cOp1.fp == 0)) { (isinf(cOp2.fp) && cOp1.fp == 0)) {
mid = NAN; mid = NAN;
} }
vfpFlushToZero(Fpscr, cDest.fp, mid);
cDest.fp = -cDest.fp - mid; cDest.fp = -cDest.fp - mid;
__asm__ __volatile__("" :: "m" (cDest.fp)); __asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state); Fpscr = setVfpFpscr(Fpscr, state);
@ -744,12 +766,14 @@ let {{
exec_output += PredOpExecute.subst(vnmlaDIop); exec_output += PredOpExecute.subst(vnmlaDIop);
vnmlsSCode = ''' vnmlsSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
float mid = FpOp1 * FpOp2; float mid = FpOp1 * FpOp2;
if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
mid = NAN; mid = NAN;
} }
vfpFlushToZero(Fpscr, FpDest, mid);
FpDest = -FpDest + mid; FpDest = -FpDest + mid;
__asm__ __volatile__("" :: "m" (FpDest)); __asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state); Fpscr = setVfpFpscr(Fpscr, state);
@ -766,6 +790,7 @@ let {{
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
double mid = cOp1.fp * cOp2.fp; double mid = cOp1.fp * cOp2.fp;
@ -773,6 +798,7 @@ let {{
(isinf(cOp2.fp) && cOp1.fp == 0)) { (isinf(cOp2.fp) && cOp1.fp == 0)) {
mid = NAN; mid = NAN;
} }
vfpFlushToZero(Fpscr, cDest.fp, mid);
cDest.fp = -cDest.fp + mid; cDest.fp = -cDest.fp + mid;
__asm__ __volatile__("" :: "m" (cDest.fp)); __asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state); Fpscr = setVfpFpscr(Fpscr, state);
@ -787,6 +813,7 @@ let {{
exec_output += PredOpExecute.subst(vnmlsDIop); exec_output += PredOpExecute.subst(vnmlsDIop);
vnmulSCode = ''' vnmulSCode = '''
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
float mid = FpOp1 * FpOp2; float mid = FpOp1 * FpOp2;
@ -809,6 +836,7 @@ let {{
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
double mid = cOp1.fp * cOp2.fp; double mid = cOp1.fp * cOp2.fp;
@ -899,6 +927,7 @@ let {{
exec_output += PredOpExecute.subst(vcvtSIntFpDIop); exec_output += PredOpExecute.subst(vcvtSIntFpDIop);
vcvtFpUIntSRCode = ''' vcvtFpUIntSRCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uw = FpOp1; FpDest.uw = FpOp1;
@ -915,6 +944,7 @@ let {{
vcvtFpUIntDRCode = ''' vcvtFpUIntDRCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
uint64_t result = cOp1.fp; uint64_t result = cOp1.fp;
@ -930,6 +960,7 @@ let {{
exec_output += PredOpExecute.subst(vcvtFpUIntDRIop); exec_output += PredOpExecute.subst(vcvtFpUIntDRIop);
vcvtFpSIntSRCode = ''' vcvtFpSIntSRCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sw = FpOp1; FpDest.sw = FpOp1;
@ -946,6 +977,7 @@ let {{
vcvtFpSIntDRCode = ''' vcvtFpSIntDRCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
int64_t result = cOp1.fp; int64_t result = cOp1.fp;
@ -961,6 +993,7 @@ let {{
exec_output += PredOpExecute.subst(vcvtFpSIntDRIop); exec_output += PredOpExecute.subst(vcvtFpSIntDRIop);
vcvtFpUIntSCode = ''' vcvtFpUIntSCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
fesetround(FeRoundZero); fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
@ -978,6 +1011,7 @@ let {{
vcvtFpUIntDCode = ''' vcvtFpUIntDCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
fesetround(FeRoundZero); fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
@ -994,6 +1028,7 @@ let {{
exec_output += PredOpExecute.subst(vcvtFpUIntDIop); exec_output += PredOpExecute.subst(vcvtFpUIntDIop);
vcvtFpSIntSCode = ''' vcvtFpSIntSCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
fesetround(FeRoundZero); fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
@ -1011,6 +1046,7 @@ let {{
vcvtFpSIntDCode = ''' vcvtFpSIntDCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
fesetround(FeRoundZero); fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
@ -1028,6 +1064,7 @@ let {{
vcvtFpSFpDCode = ''' vcvtFpSFpDCode = '''
IntDoubleUnion cDest; IntDoubleUnion cDest;
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
cDest.fp = FpOp1; cDest.fp = FpOp1;
@ -1046,6 +1083,7 @@ let {{
vcvtFpDFpSCode = ''' vcvtFpDFpSCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
FpDest = cOp1.fp; FpDest = cOp1.fp;
@ -1061,6 +1099,7 @@ let {{
vcmpSCode = ''' vcmpSCode = '''
FPSCR fpscr = Fpscr; FPSCR fpscr = Fpscr;
vfpFlushToZero(Fpscr, FpDest, FpOp1);
if (FpDest == FpOp1) { if (FpDest == FpOp1) {
fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
} else if (FpDest < FpOp1) { } else if (FpDest < FpOp1) {
@ -1083,6 +1122,7 @@ let {{
IntDoubleUnion cOp1, cDest; IntDoubleUnion cOp1, cDest;
cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cDest.fp, cOp1.fp);
FPSCR fpscr = Fpscr; FPSCR fpscr = Fpscr;
if (cDest.fp == cOp1.fp) { if (cDest.fp == cOp1.fp) {
fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
@ -1104,6 +1144,7 @@ let {{
vcmpZeroSCode = ''' vcmpZeroSCode = '''
FPSCR fpscr = Fpscr; FPSCR fpscr = Fpscr;
vfpFlushToZero(Fpscr, FpDest);
if (FpDest == imm) { if (FpDest == imm) {
fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
} else if (FpDest < imm) { } else if (FpDest < imm) {
@ -1125,6 +1166,7 @@ let {{
vcmpZeroDCode = ''' vcmpZeroDCode = '''
IntDoubleUnion cDest; IntDoubleUnion cDest;
cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
vfpFlushToZero(Fpscr, cDest.fp);
FPSCR fpscr = Fpscr; FPSCR fpscr = Fpscr;
if (cDest.fp == imm) { if (cDest.fp == imm) {
fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
@ -1152,6 +1194,7 @@ let {{
exec_output = "" exec_output = ""
vcvtFpSFixedSCode = ''' vcvtFpSFixedSCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm); FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm);
@ -1168,6 +1211,7 @@ let {{
vcvtFpSFixedDCode = ''' vcvtFpSFixedDCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
uint64_t mid = vfpFpDToFixed(cOp1.fp, true, false, imm); uint64_t mid = vfpFpDToFixed(cOp1.fp, true, false, imm);
@ -1184,6 +1228,7 @@ let {{
exec_output += PredOpExecute.subst(vcvtFpSFixedDIop); exec_output += PredOpExecute.subst(vcvtFpSFixedDIop);
vcvtFpUFixedSCode = ''' vcvtFpUFixedSCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm); FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm);
@ -1200,6 +1245,7 @@ let {{
vcvtFpUFixedDCode = ''' vcvtFpUFixedDCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
uint64_t mid = vfpFpDToFixed(cOp1.fp, false, false, imm); uint64_t mid = vfpFpDToFixed(cOp1.fp, false, false, imm);
@ -1280,6 +1326,7 @@ let {{
exec_output += PredOpExecute.subst(vcvtUFixedFpDIop); exec_output += PredOpExecute.subst(vcvtUFixedFpDIop);
vcvtFpSHFixedSCode = ''' vcvtFpSHFixedSCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm); FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm);
@ -1297,6 +1344,7 @@ let {{
vcvtFpSHFixedDCode = ''' vcvtFpSHFixedDCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
uint64_t result = vfpFpDToFixed(cOp1.fp, true, true, imm); uint64_t result = vfpFpDToFixed(cOp1.fp, true, true, imm);
@ -1314,6 +1362,7 @@ let {{
exec_output += PredOpExecute.subst(vcvtFpSHFixedDIop); exec_output += PredOpExecute.subst(vcvtFpSHFixedDIop);
vcvtFpUHFixedSCode = ''' vcvtFpUHFixedSCode = '''
vfpFlushToZero(Fpscr, FpOp1);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm); FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm);
@ -1331,6 +1380,7 @@ let {{
vcvtFpUHFixedDCode = ''' vcvtFpUHFixedDCode = '''
IntDoubleUnion cOp1; IntDoubleUnion cOp1;
cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr); VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
uint64_t mid = vfpFpDToFixed(cOp1.fp, false, true, imm); uint64_t mid = vfpFpDToFixed(cOp1.fp, false, true, imm);