From fd82a47b964016332611dbe768762377531a3619 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:15 -0500 Subject: [PATCH] ARM: Implement flush to zero for destinations as well. --- src/arch/arm/insts/vfp.hh | 12 ++++++-- src/arch/arm/isa/insts/fp.isa | 55 ++++++++++++++++++----------------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index b69f93598..e32aac721 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -175,16 +175,17 @@ bitsToFp(uint64_t bits, double junk) template static inline fpType -fixNan(FPSCR fpscr, fpType val, fpType op1, fpType op2) +fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) { - if (std::isnan(val)) { + int fpClass = std::fpclassify(val); + fpType junk = 0.0; + if (fpClass == FP_NAN) { const bool single = (sizeof(val) == sizeof(float)); const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); const bool nan1 = std::isnan(op1); const bool nan2 = std::isnan(op2); const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); - fpType junk = 0.0; if ((!nan1 && !nan2) || (fpscr.dn == 1)) { val = bitsToFp(qnan, junk); } else if (signal1) { @@ -196,6 +197,11 @@ fixNan(FPSCR fpscr, fpType val, fpType op1, fpType op2) } else if (nan2) { val = op2; } + } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { + // Turn val into a zero with the correct sign; + uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); + val = bitsToFp(fpToBits(val) & bitMask, junk); + feraiseexcept(FeUnderflow); } return val; } diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index 6cd3c1345..4f693f46a 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -386,7 +386,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); + FpDest = fixDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { @@ -407,7 +407,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - cDest.fp = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); + cDest.fp = fixDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); if ((isinf(cOp1.fp) && cOp2.fp == 0) || @@ -476,7 +476,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = fixNan(Fpscr, FpOp1 + FpOp2, FpOp1, FpOp2); + FpDest = fixDest(Fpscr, FpOp1 + FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -491,10 +491,13 @@ let {{ IntDoubleUnion cOp1, cOp2, cDest; cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + DPRINTFN("cOp1.bits = %#x, cOp1.fp = %f.\\n", cOp1.bits, cOp1.fp); + DPRINTFN("cOp2.bits = %#x, cOp2.fp = %f.\\n", cOp2.bits, cOp2.fp); vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - cDest.fp = fixNan(Fpscr, cOp1.fp + cOp2.fp, cOp1.fp, cOp2.fp); + cDest.fp = fixDest(Fpscr, cOp1.fp + cOp2.fp, cOp1.fp, cOp2.fp); + DPRINTFN("cDest.bits = %#x, cDest.fp = %f.\\n", cDest.bits, cDest.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -511,7 +514,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = fixNan(Fpscr, FpOp1 - FpOp2, FpOp1, FpOp2); + FpDest = fixDest(Fpscr, FpOp1 - FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state) ''' @@ -529,7 +532,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - cDest.fp = fixNan(Fpscr, cOp1.fp - cOp2.fp, cOp1.fp, cOp2.fp); + cDest.fp = fixDest(Fpscr, cOp1.fp - cOp2.fp, cOp1.fp, cOp2.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -546,7 +549,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest = fixNan(Fpscr, FpOp1 / FpOp2, FpOp1, FpOp2); + FpDest = fixDest(Fpscr, FpOp1 / FpOp2, FpOp1, FpOp2); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -564,7 +567,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cDest.fp)); - cDest.fp = fixNan(Fpscr, cOp1.fp / cOp2.fp, cOp1.fp, cOp2.fp); + cDest.fp = fixDest(Fpscr, cOp1.fp / cOp2.fp, cOp1.fp, cOp2.fp); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -628,12 +631,12 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); + float mid = fixDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = fixNan(Fpscr, FpDest + mid, FpDest, mid); + FpDest = fixDest(Fpscr, FpDest + mid, FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -652,13 +655,13 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); + double mid = fixDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, cDest.fp, mid); - cDest.fp = fixNan(Fpscr, cDest.fp + mid, cDest.fp, mid); + cDest.fp = fixDest(Fpscr, cDest.fp + mid, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -675,12 +678,12 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); + float mid = fixDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = fixNan(Fpscr, FpDest - mid, FpDest, mid); + FpDest = fixDest(Fpscr, FpDest - mid, FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -699,12 +702,12 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); + double mid = fixDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } - cDest.fp = fixNan(Fpscr, cDest.fp - mid, cDest.fp, mid); + cDest.fp = fixDest(Fpscr, cDest.fp - mid, cDest.fp, mid); vfpFlushToZero(Fpscr, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); @@ -722,12 +725,12 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); + float mid = fixDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = fixNan(Fpscr, -FpDest - mid, FpDest, mid); + FpDest = fixDest(Fpscr, -FpDest - mid, FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -746,13 +749,13 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); + double mid = fixDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, cDest.fp, mid); - cDest.fp = fixNan(Fpscr, -cDest.fp - mid, cDest.fp, mid); + cDest.fp = fixDest(Fpscr, -cDest.fp - mid, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -769,12 +772,12 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); + float mid = fixDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, FpDest, mid); - FpDest = fixNan(Fpscr, -FpDest + mid, FpDest, mid); + FpDest = fixDest(Fpscr, -FpDest + mid, FpDest, mid); __asm__ __volatile__("" :: "m" (FpDest)); Fpscr = setVfpFpscr(Fpscr, state); ''' @@ -793,13 +796,13 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); + double mid = fixDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN; } vfpFlushToZero(Fpscr, cDest.fp, mid); - cDest.fp = fixNan(Fpscr, -cDest.fp + mid, cDest.fp, mid); + cDest.fp = fixDest(Fpscr, -cDest.fp + mid, cDest.fp, mid); __asm__ __volatile__("" :: "m" (cDest.fp)); Fpscr = setVfpFpscr(Fpscr, state); FpDestP0.uw = cDest.bits; @@ -816,7 +819,7 @@ let {{ vfpFlushToZero(Fpscr, FpOp1, FpOp2); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - float mid = fixNan(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); + float mid = fixDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2); if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { mid = NAN; } @@ -839,7 +842,7 @@ let {{ vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp); VfpSavedState state = prepVfpFpscr(Fpscr); __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp)); - double mid = fixNan(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); + double mid = fixDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp); if ((isinf(cOp1.fp) && cOp2.fp == 0) || (isinf(cOp2.fp) && cOp1.fp == 0)) { mid = NAN;