cpu, arm: Distinguish Float* and SimdFloat*, create FloatMem* opClass

Modify the opClass assigned to AArch64 FP instructions from SimdFloat* to
Float*. Also create the FloatMemRead and FloatMemWrite opClasses, which
distinguishes writes to the INT and FP register banks.
Change the latency of (Simd)FloatMultAcc to 5, based on the Cortex-A72,
where the "latency" of FMADD is 3 if the next instruction is a FMADD and
has only the augend to destination dependency, otherwise it's 7 cycles.

Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
This commit is contained in:
Fernando Endo 2016-10-15 14:58:45 -05:00
parent 2f5262eb67
commit 6c72c35519
7 changed files with 88 additions and 59 deletions

View file

@ -62,24 +62,28 @@ class O3_ARM_v7a_FP(FUDesc):
OpDesc(opClass='SimdFloatDiv', opLat=3), OpDesc(opClass='SimdFloatDiv', opLat=3),
OpDesc(opClass='SimdFloatMisc', opLat=3), OpDesc(opClass='SimdFloatMisc', opLat=3),
OpDesc(opClass='SimdFloatMult', opLat=3), OpDesc(opClass='SimdFloatMult', opLat=3),
OpDesc(opClass='SimdFloatMultAcc',opLat=1), OpDesc(opClass='SimdFloatMultAcc',opLat=5),
OpDesc(opClass='SimdFloatSqrt', opLat=9), OpDesc(opClass='SimdFloatSqrt', opLat=9),
OpDesc(opClass='FloatAdd', opLat=5), OpDesc(opClass='FloatAdd', opLat=5),
OpDesc(opClass='FloatCmp', opLat=5), OpDesc(opClass='FloatCmp', opLat=5),
OpDesc(opClass='FloatCvt', opLat=5), OpDesc(opClass='FloatCvt', opLat=5),
OpDesc(opClass='FloatDiv', opLat=9, pipelined=False), OpDesc(opClass='FloatDiv', opLat=9, pipelined=False),
OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False), OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
OpDesc(opClass='FloatMult', opLat=4) ] OpDesc(opClass='FloatMult', opLat=4),
OpDesc(opClass='FloatMultAcc', opLat=5),
OpDesc(opClass='FloatMisc', opLat=3) ]
count = 2 count = 2
# Load/Store Units # Load/Store Units
class O3_ARM_v7a_Load(FUDesc): class O3_ARM_v7a_Load(FUDesc):
opList = [ OpDesc(opClass='MemRead',opLat=2) ] opList = [ OpDesc(opClass='MemRead',opLat=2),
OpDesc(opClass='FloatMemRead',opLat=2) ]
count = 1 count = 1
class O3_ARM_v7a_Store(FUDesc): class O3_ARM_v7a_Store(FUDesc):
opList = [OpDesc(opClass='MemWrite',opLat=2) ] opList = [ OpDesc(opClass='MemWrite',opLat=2),
OpDesc(opClass='FloatMemWrite',opLat=2) ]
count = 1 count = 1
# Functional Units for this CPU # Functional Units for this CPU

View file

@ -52,7 +52,7 @@ let {{
''' '''
fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp", fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
{ "code": fmovImmSCode, { "code": fmovImmSCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegImmOpDeclare.subst(fmovImmSIop); header_output += FpRegImmOpDeclare.subst(fmovImmSIop);
decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop); decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
exec_output += BasicExecute.subst(fmovImmSIop); exec_output += BasicExecute.subst(fmovImmSIop);
@ -65,7 +65,7 @@ let {{
''' '''
fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp", fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
{ "code": fmovImmDCode, { "code": fmovImmDCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegImmOpDeclare.subst(fmovImmDIop); header_output += FpRegImmOpDeclare.subst(fmovImmDIop);
decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop); decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
exec_output += BasicExecute.subst(fmovImmDIop); exec_output += BasicExecute.subst(fmovImmDIop);
@ -78,7 +78,7 @@ let {{
''' '''
fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp", fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
{ "code": fmovRegSCode, { "code": fmovRegSCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegSIop); header_output += FpRegRegOpDeclare.subst(fmovRegSIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
exec_output += BasicExecute.subst(fmovRegSIop); exec_output += BasicExecute.subst(fmovRegSIop);
@ -91,7 +91,7 @@ let {{
''' '''
fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp", fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
{ "code": fmovRegDCode, { "code": fmovRegDCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegDIop); header_output += FpRegRegOpDeclare.subst(fmovRegDIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
exec_output += BasicExecute.subst(fmovRegDIop); exec_output += BasicExecute.subst(fmovRegDIop);
@ -104,7 +104,7 @@ let {{
''' '''
fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp", fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
{ "code": fmovCoreRegWCode, { "code": fmovCoreRegWCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop); header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
exec_output += BasicExecute.subst(fmovCoreRegWIop); exec_output += BasicExecute.subst(fmovCoreRegWIop);
@ -117,7 +117,7 @@ let {{
''' '''
fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp", fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
{ "code": fmovCoreRegXCode, { "code": fmovCoreRegXCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop); header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
exec_output += BasicExecute.subst(fmovCoreRegXIop); exec_output += BasicExecute.subst(fmovCoreRegXIop);
@ -128,7 +128,7 @@ let {{
''' '''
fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp", fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
{ "code": fmovUCoreRegXCode, { "code": fmovUCoreRegXCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop); header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
exec_output += BasicExecute.subst(fmovUCoreRegXIop); exec_output += BasicExecute.subst(fmovUCoreRegXIop);
@ -138,7 +138,7 @@ let {{
''' '''
fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp", fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
{ "code": fmovRegCoreWCode, { "code": fmovRegCoreWCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop); header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
exec_output += BasicExecute.subst(fmovRegCoreWIop); exec_output += BasicExecute.subst(fmovRegCoreWIop);
@ -148,7 +148,7 @@ let {{
''' '''
fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp", fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
{ "code": fmovRegCoreXCode, { "code": fmovRegCoreXCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop); header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
exec_output += BasicExecute.subst(fmovRegCoreXIop); exec_output += BasicExecute.subst(fmovRegCoreXIop);
@ -158,7 +158,7 @@ let {{
''' '''
fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp", fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
{ "code": fmovURegCoreXCode, { "code": fmovURegCoreXCode,
"op_class": "SimdFloatMiscOp" }, []) "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop); header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop); decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
exec_output += BasicExecute.subst(fmovURegCoreXIop); exec_output += BasicExecute.subst(fmovURegCoreXIop);
@ -270,16 +270,16 @@ let {{
decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop) decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
exec_output += BasicExecute.subst(iop) exec_output += BasicExecute.subst(iop)
buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp", buildTernaryFpOp("FMAdd", "FloatMultAccOp",
"fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)", "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
"fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" ) "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
buildTernaryFpOp("FMSub", "SimdFloatMultAccOp", buildTernaryFpOp("FMSub", "FloatMultAccOp",
"fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
"fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp", buildTernaryFpOp("FNMAdd", "FloatMultAccOp",
"fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
"fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp", buildTernaryFpOp("FNMSub", "FloatMultAccOp",
"fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)", "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
"fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" ) "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )
@ -304,31 +304,31 @@ let {{
decoder_output += constructorTempl.subst(iop) decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop) exec_output += BasicExecute.subst(iop)
buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp", buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "FloatAddOp",
"fplibAdd<uint32_t>(cOp1, cOp2, fpscr)", "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
"fplibAdd<uint64_t>(cOp1, cOp2, fpscr)") "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp", buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "FloatAddOp",
"fplibSub<uint32_t>(cOp1, cOp2, fpscr)", "fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
"fplibSub<uint64_t>(cOp1, cOp2, fpscr)") "fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp", buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "FloatDivOp",
"fplibDiv<uint32_t>(cOp1, cOp2, fpscr)", "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
"fplibDiv<uint64_t>(cOp1, cOp2, fpscr)") "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp", buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "FloatMultOp",
"fplibMul<uint32_t>(cOp1, cOp2, fpscr)", "fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMul<uint64_t>(cOp1, cOp2, fpscr)") "fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp", buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "FloatMultOp",
"fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))", "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
"fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))") "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp", buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "FloatCmpOp",
"fplibMin<uint32_t>(cOp1, cOp2, fpscr)", "fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMin<uint64_t>(cOp1, cOp2, fpscr)") "fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp", buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "FloatCmpOp",
"fplibMax<uint32_t>(cOp1, cOp2, fpscr)", "fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMax<uint64_t>(cOp1, cOp2, fpscr)") "fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp", buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "FloatCmpOp",
"fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)", "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)") "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp", buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "FloatCmpOp",
"fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)", "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)") "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
@ -354,7 +354,7 @@ let {{
decoder_output += constructorTempl.subst(iop) decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop) exec_output += BasicExecute.subst(iop)
buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp", buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "FloatSqrtOp",
"fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)") "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)")
def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp, def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp,
@ -383,29 +383,29 @@ let {{
decoder_output += constructorTempl.subst(iop) decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop) exec_output += BasicExecute.subst(iop)
buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "FloatMiscOp",
"fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)") "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)")
buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "FloatMiscOp",
"fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)") "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)")
buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)", "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)") "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)", "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)") "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)", "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)") "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)", "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)") "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)", "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)") "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)", "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)") "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp", buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)", "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)") "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
}}; }};
@ -451,8 +451,8 @@ let {{
instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S") instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
mnem = "%scvtf" %(us.lower()) mnem = "%scvtf" %(us.lower())
fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp", fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
{ "code": fcvtIntFpDCode, { "code": fcvtIntFpDCode,
"op_class": "SimdFloatCvtOp" }, []) "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop); header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
exec_output += BasicExecute.subst(fcvtIntFpDIop); exec_output += BasicExecute.subst(fcvtIntFpDIop);
@ -491,7 +491,7 @@ let {{
mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u") mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp", fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
{ "code": fcvtFpIntCode, { "code": fcvtFpIntCode,
"op_class": "SimdFloatCvtOp" }, []) "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop); header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop);
decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop); decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
exec_output += BasicExecute.subst(fcvtFpIntIop); exec_output += BasicExecute.subst(fcvtFpIntIop);
@ -514,7 +514,7 @@ let {{
''' '''
fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp", fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
{ "code": fcvtFpSFpDCode, { "code": fcvtFpSFpDCode,
"op_class": "SimdFloatCvtOp" }, []) "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop); header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
exec_output += BasicExecute.subst(fcvtFpSFpDIop); exec_output += BasicExecute.subst(fcvtFpSFpDIop);
@ -531,7 +531,7 @@ let {{
''' '''
fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp", fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
{"code": fcvtFpDFpSCode, {"code": fcvtFpDFpSCode,
"op_class": "SimdFloatCvtOp" }, []) "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop); header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
exec_output += BasicExecute.subst(fcvtFpDFpSIop); exec_output += BasicExecute.subst(fcvtFpDFpSIop);
@ -563,7 +563,7 @@ let {{
instName = "FcvtFpHFp%s" %("D" if isDouble else "S") instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp", fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
{ "code": code, { "code": code,
"op_class": "SimdFloatCvtOp" }, []) "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop); header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
exec_output += BasicExecute.subst(fcvtFpHFpIop); exec_output += BasicExecute.subst(fcvtFpHFpIop);
@ -586,7 +586,7 @@ let {{
instName = "FcvtFp%sFpH" %("D" if isDouble else "S") instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp", fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
{ "code": code, { "code": code,
"op_class": "SimdFloatCvtOp" }, []) "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop); header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop); decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
exec_output += BasicExecute.subst(fcvtFpFpHIop); exec_output += BasicExecute.subst(fcvtFpFpHIop);
@ -626,7 +626,7 @@ let {{
fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName, fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName,
"FpReg%sOp" %(typeName), "FpReg%sOp" %(typeName),
{"code": fcmpCode, {"code": fcmpCode,
"op_class": "SimdFloatCmpOp"}, []) "op_class": "FloatCmpOp"}, [])
declareTemp = eval("FpReg%sOpDeclare" %(typeName)); declareTemp = eval("FpReg%sOpDeclare" %(typeName));
constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName)); constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
@ -673,7 +673,7 @@ let {{
fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"), fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"),
instName, "FpCondCompRegOp", instName, "FpCondCompRegOp",
{"code": fccmpCode, {"code": fccmpCode,
"op_class": "SimdFloatCmpOp"}, []) "op_class": "FloatCmpOp"}, [])
header_output += DataXCondCompRegDeclare.subst(fccmpIop); header_output += DataXCondCompRegDeclare.subst(fccmpIop);
decoder_output += DataXCondCompRegConstructor.subst(fccmpIop); decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
exec_output += BasicExecute.subst(fccmpIop); exec_output += BasicExecute.subst(fccmpIop);
@ -718,7 +718,7 @@ let {{
mnem = "fcvtz%s" %("s" if isSigned else "u") mnem = "fcvtz%s" %("s" if isSigned else "u")
fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp", fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
{ "code": fcvtFpFixedCode, { "code": fcvtFpFixedCode,
"op_class": "SimdFloatCvtOp" }, []) "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop); header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop); decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
exec_output += BasicExecute.subst(fcvtFpFixedIop); exec_output += BasicExecute.subst(fcvtFpFixedIop);
@ -759,7 +759,7 @@ let {{
mnem = "%scvtf" %("s" if isSigned else "u") mnem = "%scvtf" %("s" if isSigned else "u")
fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp", fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
{ "code": fcvtFixedFpCode, { "code": fcvtFixedFpCode,
"op_class": "SimdFloatCvtOp" }, []) "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop); header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop); decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
exec_output += BasicExecute.subst(fcvtFixedFpIop); exec_output += BasicExecute.subst(fcvtFixedFpIop);
@ -804,7 +804,8 @@ let {{
''' '''
iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"), iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
"FpCondSelOp", code) "FpCondSelOp", { "code": code,
"op_class": "FloatCvtOp" })
header_output += DataXCondSelDeclare.subst(iop) header_output += DataXCondSelDeclare.subst(iop)
decoder_output += DataXCondSelConstructor.subst(iop) decoder_output += DataXCondSelConstructor.subst(iop)
exec_output += BasicExecute.subst(iop) exec_output += BasicExecute.subst(iop)

View file

@ -1130,9 +1130,21 @@ class InstObjParams(object):
# These are good enough for most cases. # These are good enough for most cases.
if not self.op_class: if not self.op_class:
if 'IsStore' in self.flags: if 'IsStore' in self.flags:
self.op_class = 'MemWriteOp' # The order matters here: 'IsFloating' and 'IsInteger' are
# usually set in FP instructions because of the base
# register
if 'IsFloating' in self.flags:
self.op_class = 'FloatMemWriteOp'
else:
self.op_class = 'MemWriteOp'
elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags: elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
self.op_class = 'MemReadOp' # The order matters here: 'IsFloating' and 'IsInteger' are
# usually set in FP instructions because of the base
# register
if 'IsFloating' in self.flags:
self.op_class = 'FloatMemReadOp'
else:
self.op_class = 'MemReadOp'
elif 'IsFloating' in self.flags: elif 'IsFloating' in self.flags:
self.op_class = 'FloatAddOp' self.op_class = 'FloatAddOp'
else: else:

View file

@ -43,13 +43,15 @@ from m5.params import *
class OpClass(Enum): class OpClass(Enum):
vals = ['No_OpClass', 'IntAlu', 'IntMult', 'IntDiv', 'FloatAdd', vals = ['No_OpClass', 'IntAlu', 'IntMult', 'IntDiv', 'FloatAdd',
'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv', 'FloatSqrt', 'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatMultAcc', 'FloatDiv',
'FloatMisc', 'FloatSqrt',
'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt', 'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt',
'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc', 'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc',
'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp', 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp',
'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', 'SimdFloatMult', 'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', 'SimdFloatMult',
'SimdFloatMultAcc', 'SimdFloatSqrt', 'SimdFloatMultAcc', 'SimdFloatSqrt',
'MemRead', 'MemWrite', 'IprAccess', 'InstPrefetch'] 'MemRead', 'MemWrite', 'FloatMemRead', 'FloatMemWrite',
'IprAccess', 'InstPrefetch']
class OpDesc(SimObject): class OpDesc(SimObject):
type = 'OpDesc' type = 'OpDesc'

View file

@ -142,8 +142,8 @@ class MinorDefaultIntDivFU(MinorFU):
class MinorDefaultFloatSimdFU(MinorFU): class MinorDefaultFloatSimdFU(MinorFU):
opClasses = minorMakeOpClassSet([ opClasses = minorMakeOpClassSet([
'FloatAdd', 'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv', 'FloatAdd', 'FloatCmp', 'FloatCvt', 'FloatMisc', 'FloatMult',
'FloatSqrt', 'FloatMultAcc', 'FloatDiv', 'FloatSqrt',
'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt', 'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt',
'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc', 'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc',
'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp', 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp',
@ -154,7 +154,8 @@ class MinorDefaultFloatSimdFU(MinorFU):
opLat = 6 opLat = 6
class MinorDefaultMemFU(MinorFU): class MinorDefaultMemFU(MinorFU):
opClasses = minorMakeOpClassSet(['MemRead', 'MemWrite']) opClasses = minorMakeOpClassSet(['MemRead', 'MemWrite', 'FloatMemRead',
'FloatMemWrite'])
timings = [MinorFUTiming(description='Mem', timings = [MinorFUTiming(description='Mem',
srcRegsRelativeLats=[1], extraAssumedLat=2)] srcRegsRelativeLats=[1], extraAssumedLat=2)]
opLat = 1 opLat = 1

View file

@ -68,6 +68,8 @@ class FP_ALU(FUDesc):
class FP_MultDiv(FUDesc): class FP_MultDiv(FUDesc):
opList = [ OpDesc(opClass='FloatMult', opLat=4), opList = [ OpDesc(opClass='FloatMult', opLat=4),
OpDesc(opClass='FloatMultAcc', opLat=5),
OpDesc(opClass='FloatMisc', opLat=3),
OpDesc(opClass='FloatDiv', opLat=12, pipelined=False), OpDesc(opClass='FloatDiv', opLat=12, pipelined=False),
OpDesc(opClass='FloatSqrt', opLat=24, pipelined=False) ] OpDesc(opClass='FloatSqrt', opLat=24, pipelined=False) ]
count = 2 count = 2
@ -96,15 +98,18 @@ class SIMD_Unit(FUDesc):
count = 4 count = 4
class ReadPort(FUDesc): class ReadPort(FUDesc):
opList = [ OpDesc(opClass='MemRead') ] opList = [ OpDesc(opClass='MemRead'),
OpDesc(opClass='FloatMemRead') ]
count = 0 count = 0
class WritePort(FUDesc): class WritePort(FUDesc):
opList = [ OpDesc(opClass='MemWrite') ] opList = [ OpDesc(opClass='MemWrite'),
OpDesc(opClass='FloatMemWrite') ]
count = 0 count = 0
class RdWrPort(FUDesc): class RdWrPort(FUDesc):
opList = [ OpDesc(opClass='MemRead'), OpDesc(opClass='MemWrite') ] opList = [ OpDesc(opClass='MemRead'), OpDesc(opClass='MemWrite'),
OpDesc(opClass='FloatMemRead'), OpDesc(opClass='FloatMemWrite')]
count = 4 count = 4
class IprPort(FUDesc): class IprPort(FUDesc):

View file

@ -59,7 +59,9 @@ static const OpClass FloatAddOp = Enums::FloatAdd;
static const OpClass FloatCmpOp = Enums::FloatCmp; static const OpClass FloatCmpOp = Enums::FloatCmp;
static const OpClass FloatCvtOp = Enums::FloatCvt; static const OpClass FloatCvtOp = Enums::FloatCvt;
static const OpClass FloatMultOp = Enums::FloatMult; static const OpClass FloatMultOp = Enums::FloatMult;
static const OpClass FloatMultAccOp = Enums::FloatMultAcc;
static const OpClass FloatDivOp = Enums::FloatDiv; static const OpClass FloatDivOp = Enums::FloatDiv;
static const OpClass FloatMiscOp = Enums::FloatMisc;
static const OpClass FloatSqrtOp = Enums::FloatSqrt; static const OpClass FloatSqrtOp = Enums::FloatSqrt;
static const OpClass SimdAddOp = Enums::SimdAdd; static const OpClass SimdAddOp = Enums::SimdAdd;
static const OpClass SimdAddAccOp = Enums::SimdAddAcc; static const OpClass SimdAddAccOp = Enums::SimdAddAcc;
@ -83,6 +85,8 @@ static const OpClass SimdFloatMultAccOp = Enums::SimdFloatMultAcc;
static const OpClass SimdFloatSqrtOp = Enums::SimdFloatSqrt; static const OpClass SimdFloatSqrtOp = Enums::SimdFloatSqrt;
static const OpClass MemReadOp = Enums::MemRead; static const OpClass MemReadOp = Enums::MemRead;
static const OpClass MemWriteOp = Enums::MemWrite; static const OpClass MemWriteOp = Enums::MemWrite;
static const OpClass FloatMemReadOp = Enums::FloatMemRead;
static const OpClass FloatMemWriteOp = Enums::FloatMemWrite;
static const OpClass IprAccessOp = Enums::IprAccess; static const OpClass IprAccessOp = Enums::IprAccess;
static const OpClass InstPrefetchOp = Enums::InstPrefetch; static const OpClass InstPrefetchOp = Enums::InstPrefetch;
static const OpClass Num_OpClasses = Enums::Num_OpClass; static const OpClass Num_OpClasses = Enums::Num_OpClass;