From 186273e5f318786cd4db1b652b9b0332c18ea7bf Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:15 -0500 Subject: [PATCH] ARM: Fix up nans to match ARM's expected behavior. --- src/arch/arm/insts/vfp.hh | 80 +++++++++++++++++++++++++++++++++++ src/arch/arm/isa/insts/fp.isa | 52 +++++++++++------------ 2 files changed, 106 insertions(+), 26 deletions(-) diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index b0fc8b6dc..b69f93598 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -45,6 +45,9 @@ #include #include +namespace ArmISA +{ + enum VfpMicroMode { VfpNotAMicroop, VfpMicroop, @@ -122,6 +125,81 @@ vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2) vfpFlushToZero(fpscr, op2); } +static inline uint32_t +fpToBits(float fp) +{ + union + { + float fp; + uint32_t bits; + } val; + val.fp = fp; + return val.bits; +} + +static inline uint64_t +fpToBits(double fp) +{ + union + { + double fp; + uint64_t bits; + } val; + val.fp = fp; + return val.bits; +} + +static inline float +bitsToFp(uint64_t bits, float junk) +{ + union + { + float fp; + uint32_t bits; + } val; + val.bits = bits; + return val.fp; +} + +static inline double +bitsToFp(uint64_t bits, double junk) +{ + union + { + double fp; + uint64_t bits; + } val; + val.bits = bits; + return val.fp; +} + +template +static inline fpType +fixNan(FPSCR fpscr, fpType val, fpType op1, fpType op2) +{ + if (std::isnan(val)) { + const bool single = (sizeof(val) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + const bool nan1 = std::isnan(op1); + const bool nan2 = std::isnan(op2); + const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); + const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); + fpType junk = 0.0; + if ((!nan1 && !nan2) || (fpscr.dn == 1)) { + val = bitsToFp(qnan, junk); + } else if (signal1) { + val = bitsToFp(fpToBits(op1) | qnan, junk); + } else if (signal2) { + val = bitsToFp(fpToBits(op2) | qnan, junk); + } else if (nan1) { + val = op1; + } else if (nan2) { + val = op2; + } + } + return val; +} + static inline uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) { @@ -481,4 +559,6 @@ class VfpRegRegRegOp : public RegRegRegOp } }; +} + #endif //__ARCH_ARM_INSTS_VFP_HH__ diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index cd1ddc498..6cd3c1345 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -386,7 +386,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = FpOp1 * FpOp2; + FpDest = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { @@ -407,7 +407,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - cDest.fp = cOp1.fp * cOp2.fp; + cDest.fp = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); if ((isinf(cOp1.fp) && cOp2.fp == 0) || @@ -476,7 +476,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = FpOp1 + FpOp2; + FpDest = fixNan(Fpscr, FpOp1 + FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -494,7 +494,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - cDest.fp = cOp1.fp + cOp2.fp; + cDest.fp = fixNan(Fpscr, cOp1.fp + cOp2.fp, cOp1.fp, cOp2.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -511,7 +511,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = FpOp1 - FpOp2; + FpDest = fixNan(Fpscr, FpOp1 - FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state) ''' @@ -529,7 +529,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - cDest.fp = cOp1.fp - cOp2.fp; + cDest.fp = fixNan(Fpscr, cOp1.fp - cOp2.fp, cOp1.fp, cOp2.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -546,7 +546,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = FpOp1 / FpOp2; + FpDest = fixNan(Fpscr, FpOp1 / FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -564,7 +564,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cDest.fp)); - cDest.fp = cOp1.fp / cOp2.fp; + cDest.fp = fixNan(Fpscr, cOp1.fp / cOp2.fp, cOp1.fp, cOp2.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -628,12 +628,12 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = FpOp1 * FpOp2; + float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = FpDest + mid; + FpDest = fixNan(Fpscr, FpDest + mid, FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -652,13 +652,13 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = cOp1.fp * cOp2.fp; + double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, cDest.fp, mid); - cDest.fp = cDest.fp + mid; + cDest.fp = fixNan(Fpscr, cDest.fp + mid, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -675,12 +675,12 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = FpOp1 * FpOp2; + float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = FpDest - mid; + FpDest = fixNan(Fpscr, FpDest - mid, FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -699,12 +699,12 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = cOp1.fp * cOp2.fp; + double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } - cDest.fp = cDest.fp - mid; + cDest.fp = fixNan(Fpscr, cDest.fp - mid, cDest.fp, mid); vfpFlushToZero(Fpscr, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); @@ -722,12 +722,12 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = FpOp1 * FpOp2; + float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = -FpDest - mid; + FpDest = fixNan(Fpscr, -FpDest - mid, FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -746,13 +746,13 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = cOp1.fp * cOp2.fp; + double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, cDest.fp, mid); - cDest.fp = -cDest.fp - mid; + cDest.fp = fixNan(Fpscr, -cDest.fp - mid, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -769,12 +769,12 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = FpOp1 * FpOp2; + float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = -FpDest + mid; + FpDest = fixNan(Fpscr, -FpDest + mid, FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -793,13 +793,13 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = cOp1.fp * cOp2.fp; + double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, cDest.fp, mid); - cDest.fp = -cDest.fp + mid; + cDest.fp = fixNan(Fpscr, -cDest.fp + mid, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -816,7 +816,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = FpOp1 * FpOp2; + float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } @@ -839,7 +839,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = cOp1.fp * cOp2.fp; + double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN;