From 80fa3a7ccfd930b87c9702f33e0f8461c1eb9e5b Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:14 -0500 Subject: [PATCH] ARM: Implement the VFP negated multiplies. --- src/arch/arm/isa/formats/fp.isa | 64 ++++++++++++++++++- src/arch/arm/isa/insts/fp.isa | 105 ++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 2 deletions(-) diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 9bb062a2e..850f761d7 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -524,6 +524,48 @@ let {{ (IntRegIndex)vn, (IntRegIndex)vm); } } + case 0x1: + if (bits(machInst, 6) == 1) { + uint32_t vd; + uint32_t vm; + uint32_t vn; + if (bits(machInst, 8) == 0) { + vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1); + vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1); + vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1); + return new VnmlaS(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } else { + vd = (bits(machInst, 22) << 5) | + (bits(machInst, 15, 12) << 1); + vm = (bits(machInst, 5) << 5) | + (bits(machInst, 3, 0) << 1); + vn = (bits(machInst, 7) << 5) | + (bits(machInst, 19, 16) << 1); + return new VnmlaD(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } + } else { + uint32_t vd; + uint32_t vm; + uint32_t vn; + if (bits(machInst, 8) == 0) { + vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1); + vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1); + vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1); + return new VnmlsS(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } else { + vd = (bits(machInst, 22) << 5) | + (bits(machInst, 15, 12) << 1); + vm = (bits(machInst, 5) << 5) | + (bits(machInst, 3, 0) << 1); + vn = (bits(machInst, 7) << 5) | + (bits(machInst, 19, 16) << 1); + return new VnmlsD(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } + } case 0x2: if ((opc3 & 0x1) == 0) { uint32_t vd; @@ -545,9 +587,27 @@ let {{ return new VmulD(machInst, (IntRegIndex)vd, (IntRegIndex)vn, (IntRegIndex)vm); } + } else { + uint32_t vd; + uint32_t vm; + uint32_t vn; + if (bits(machInst, 8) == 0) { + vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1); + vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1); + vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1); + return new VnmulS(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } else { + vd = (bits(machInst, 22) << 5) | + (bits(machInst, 15, 12) << 1); + vm = (bits(machInst, 5) << 5) | + (bits(machInst, 3, 0) << 1); + vn = (bits(machInst, 7) << 5) | + (bits(machInst, 19, 16) << 1); + return new VnmulD(machInst, (IntRegIndex)vd, + (IntRegIndex)vn, (IntRegIndex)vm); + } } - case 0x1: - return new WarnUnimplemented("vnmla, vnmls, vnmul", machInst); case 0x3: if ((opc3 & 0x1) == 0) { uint32_t vd; diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index 58c2cafa7..d40b00176 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -481,4 +481,109 @@ let {{ header_output += RegRegRegOpDeclare.subst(vmlsDIop); decoder_output += RegRegRegOpConstructor.subst(vmlsDIop); exec_output += PredOpExecute.subst(vmlsDIop); + + vnmlaSCode = ''' + float mid = FpOp1 * FpOp2; + if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { + mid = NAN; + } + FpDest = -FpDest - mid; + ''' + vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "RegRegRegOp", + { "code": vnmlaSCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vnmlaSIop); + decoder_output += RegRegRegOpConstructor.subst(vnmlaSIop); + exec_output += PredOpExecute.subst(vnmlaSIop); + + vnmlaDCode = ''' + IntDoubleUnion cOp1, cOp2, cDest; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + double mid = cOp1.fp * cOp2.fp; + if ((isinf(cOp1.fp) && cOp2.fp == 0) || + (isinf(cOp2.fp) && cOp1.fp == 0)) { + mid = NAN; + } + cDest.fp = -cDest.fp - mid; + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vnmlaDIop = InstObjParams("vnmlad", "VnmlaD", "RegRegRegOp", + { "code": vnmlaDCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vnmlaDIop); + decoder_output += RegRegRegOpConstructor.subst(vnmlaDIop); + exec_output += PredOpExecute.subst(vnmlaDIop); + + vnmlsSCode = ''' + float mid = FpOp1 * FpOp2; + if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { + mid = NAN; + } + FpDest = -FpDest + mid; + ''' + vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "RegRegRegOp", + { "code": vnmlsSCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vnmlsSIop); + decoder_output += RegRegRegOpConstructor.subst(vnmlsSIop); + exec_output += PredOpExecute.subst(vnmlsSIop); + + vnmlsDCode = ''' + IntDoubleUnion cOp1, cOp2, cDest; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + double mid = cOp1.fp * cOp2.fp; + if ((isinf(cOp1.fp) && cOp2.fp == 0) || + (isinf(cOp2.fp) && cOp1.fp == 0)) { + mid = NAN; + } + cDest.fp = -cDest.fp + mid; + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vnmlsDIop = InstObjParams("vnmlsd", "VnmlsD", "RegRegRegOp", + { "code": vnmlsDCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vnmlsDIop); + decoder_output += RegRegRegOpConstructor.subst(vnmlsDIop); + exec_output += PredOpExecute.subst(vnmlsDIop); + + vnmulSCode = ''' + float mid = FpOp1 * FpOp2; + if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) { + mid = NAN; + } + FpDest = -mid; + ''' + vnmulSIop = InstObjParams("vnmuls", "VnmulS", "RegRegRegOp", + { "code": vnmulSCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vnmulSIop); + decoder_output += RegRegRegOpConstructor.subst(vnmulSIop); + exec_output += PredOpExecute.subst(vnmulSIop); + + vnmulDCode = ''' + IntDoubleUnion cOp1, cOp2, cDest; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32)); + cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32)); + double mid = cOp1.fp * cOp2.fp; + if ((isinf(cOp1.fp) && cOp2.fp == 0) || + (isinf(cOp2.fp) && cOp1.fp == 0)) { + mid = NAN; + } + cDest.fp = -mid; + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vnmulDIop = InstObjParams("vnmuld", "VnmulD", "RegRegRegOp", + { "code": vnmulDCode, + "predicate_test": predicateTest }, []) + header_output += RegRegRegOpDeclare.subst(vnmulDIop); + decoder_output += RegRegRegOpConstructor.subst(vnmulDIop); + exec_output += PredOpExecute.subst(vnmulDIop); }};