From 90d70a22cb15e6461fc7397a0f55322dc163f701 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:14 -0500 Subject: [PATCH] ARM: Implement the VFP version of vdiv and vsqrt. --- src/arch/arm/isa/formats/fp.isa | 36 +++++++++++++++++++-- src/arch/arm/isa/insts/fp.isa | 55 +++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index b6fcf4ac7..2cca96bea 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -552,7 +552,25 @@ let {{ } case 0x8: if ((opc3 & 0x1) == 0) { - return new WarnUnimplemented("vdiv", machInst); + uint32_t vd; + uint32_t vm; + uint32_t vn; + if (bits(machInst, 8) == 0) { + vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1); + vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1); + vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1); + return new VdivS(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } else { + vd = (bits(machInst, 22) << 5) | + (bits(machInst, 15, 12) << 1); + vm = (bits(machInst, 5) << 5) | + (bits(machInst, 3, 0) << 1); + vn = (bits(machInst, 7) << 5) | + (bits(machInst, 19, 16) << 1); + return new VdivD(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } } break; case 0xb: @@ -624,7 +642,21 @@ let {{ (IntRegIndex)vd, (IntRegIndex)vm); } } else { - return new WarnUnimplemented("vsqrt", machInst); + uint32_t vd; + uint32_t vm; + if (bits(machInst, 8) == 0) { + vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1); + vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1); + return new VsqrtS(machInst, + (IntRegIndex)vd, (IntRegIndex)vm); + } else { + vd = (bits(machInst, 22) << 5) | + (bits(machInst, 15, 12) << 1); + vm = (bits(machInst, 5) << 5) | + (bits(machInst, 3, 0) << 1); + return new VsqrtD(machInst, + (IntRegIndex)vd, (IntRegIndex)vm); + } } case 0x2: case 0x3: diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index dd3f6598c..99efcec32 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -356,4 +356,59 @@ let {{ header_output += RegRegRegOpDeclare.subst(vsubDIop); decoder_output += RegRegRegOpConstructor.subst(vsubDIop); exec_output += PredOpExecute.subst(vsubDIop); + + vdivSCode = ''' + FpDest = FpOp1 / FpOp2; + ''' + vdivSIop = InstObjParams("vdivs", "VdivS", "RegRegRegOp", + { "code": vdivSCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vdivSIop); + decoder_output += RegRegRegOpConstructor.subst(vdivSIop); + exec_output += PredOpExecute.subst(vdivSIop); + + vdivDCode = ''' + IntDoubleUnion cOp1, cOp2, cDest; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + cDest.fp = cOp1.fp / cOp2.fp; + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vdivDIop = InstObjParams("vdivd", "VdivD", "RegRegRegOp", + { "code": vdivDCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vdivDIop); + decoder_output += RegRegRegOpConstructor.subst(vdivDIop); + exec_output += PredOpExecute.subst(vdivDIop); + + vsqrtSCode = ''' + FpDest = sqrtf(FpOp1); + if (FpOp1 < 0) { + FpDest = NAN; + } + ''' + vsqrtSIop = InstObjParams("vsqrts", "VsqrtS", "RegRegOp", + { "code": vsqrtSCode, + "predicate_test": predicateTest }, []) + header_output += RegRegOpDeclare.subst(vsqrtSIop); + decoder_output += RegRegOpConstructor.subst(vsqrtSIop); + exec_output += PredOpExecute.subst(vsqrtSIop); + + vsqrtDCode = ''' + IntDoubleUnion cOp1, cDest; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + cDest.fp = sqrt(cOp1.fp); + if (cOp1.fp < 0) { + cDest.fp = NAN; + } + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vsqrtDIop = InstObjParams("vsqrtd", "VsqrtD", "RegRegOp", + { "code": vsqrtDCode, + "predicate_test": predicateTest }, []) + header_output += RegRegOpDeclare.subst(vsqrtDIop); + decoder_output += RegRegOpConstructor.subst(vsqrtDIop); + exec_output += PredOpExecute.subst(vsqrtDIop); }};