Explicitly handle rounding on FP-to-integer conversions.
Seems to avoid the significant problems on platforms w/o fenv.h. arch/alpha/isa_desc: Explicitly handle rounding on FP-to-integer conversions. Seems to avoid the significant problems on platforms w/o fenv.h. Get rid of FP "Fast" vs "General" distinction... more headache than it's worth. arch/isa_parser.py: Fix bug with "%s" in C++ templates (must escape properly to pass through Python string interpolation). --HG-- extra : convert_revision : de964d764e67e0934ac0ef535f53c974640731fb
This commit is contained in:
parent
845bdb0d8e
commit
11cb904ad7
2 changed files with 175 additions and 88 deletions
|
@ -565,7 +565,7 @@ output header {{
|
||||||
* instructions that require this support are derived from this
|
* instructions that require this support are derived from this
|
||||||
* class; the rest derive directly from AlphaStaticInst.
|
* class; the rest derive directly from AlphaStaticInst.
|
||||||
*/
|
*/
|
||||||
class AlphaFP : public AlphaStaticInst
|
class AlphaFP : public AlphaStaticInst
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/// Alpha FP rounding modes.
|
/// Alpha FP rounding modes.
|
||||||
|
@ -607,15 +607,22 @@ output header {{
|
||||||
/// This instruction's trapping mode.
|
/// This instruction's trapping mode.
|
||||||
TrappingMode trappingMode;
|
TrappingMode trappingMode;
|
||||||
|
|
||||||
|
/// Have we warned about this instruction's unsupported
|
||||||
|
/// rounding mode (if applicable)?
|
||||||
|
mutable bool warnedOnRounding;
|
||||||
|
|
||||||
|
/// Have we warned about this instruction's unsupported
|
||||||
|
/// trapping mode (if applicable)?
|
||||||
|
mutable bool warnedOnTrapping;
|
||||||
|
|
||||||
/// Constructor
|
/// Constructor
|
||||||
AlphaFP(const char *mnem, MachInst _machInst, OpClass __opClass)
|
AlphaFP(const char *mnem, MachInst _machInst, OpClass __opClass)
|
||||||
: AlphaStaticInst(mnem, _machInst, __opClass),
|
: AlphaStaticInst(mnem, _machInst, __opClass),
|
||||||
roundingMode((enum RoundingMode)FP_ROUNDMODE),
|
roundingMode((enum RoundingMode)FP_ROUNDMODE),
|
||||||
trappingMode((enum TrappingMode)FP_TRAPMODE)
|
trappingMode((enum TrappingMode)FP_TRAPMODE),
|
||||||
|
warnedOnRounding(false),
|
||||||
|
warnedOnTrapping(false)
|
||||||
{
|
{
|
||||||
if (trappingMode != Imprecise) {
|
|
||||||
warn("precise FP traps unimplemented\n");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int getC99RoundingMode(uint64_t fpcr_val) const;
|
int getC99RoundingMode(uint64_t fpcr_val) const;
|
||||||
|
@ -629,22 +636,6 @@ output header {{
|
||||||
}};
|
}};
|
||||||
|
|
||||||
|
|
||||||
def template FloatingPointDecode {{
|
|
||||||
{
|
|
||||||
bool fast = (FP_TRAPMODE == AlphaFP::Imprecise
|
|
||||||
&& FP_ROUNDMODE == AlphaFP::Normal);
|
|
||||||
AlphaStaticInst *i =
|
|
||||||
fast ? (AlphaStaticInst *)new %(class_name)sFast(machInst) :
|
|
||||||
(AlphaStaticInst *)new %(class_name)sGeneral(machInst);
|
|
||||||
|
|
||||||
if (FC == 31) {
|
|
||||||
i = makeNop(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}};
|
|
||||||
|
|
||||||
output decoder {{
|
output decoder {{
|
||||||
int
|
int
|
||||||
AlphaFP::getC99RoundingMode(uint64_t fpcr_val) const
|
AlphaFP::getC99RoundingMode(uint64_t fpcr_val) const
|
||||||
|
@ -715,6 +706,86 @@ output decoder {{
|
||||||
{ "", "v", "INVTM2", "INVTM3", "INVTM4", "sv", "INVTM6", "svi" };
|
{ "", "v", "INVTM2", "INVTM3", "INVTM4", "sv", "INVTM6", "svi" };
|
||||||
}};
|
}};
|
||||||
|
|
||||||
|
// FP instruction class execute method template. Handles non-standard
|
||||||
|
// rounding modes.
|
||||||
|
def template FloatingPointExecute {{
|
||||||
|
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
|
||||||
|
Trace::InstRecord *traceData) const
|
||||||
|
{
|
||||||
|
if (trappingMode != Imprecise) {
|
||||||
|
warn("%s: non-standard trapping mode not supported",
|
||||||
|
generateDisassembly(0, NULL));
|
||||||
|
warnedOnTrapping = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Fault fault = No_Fault;
|
||||||
|
|
||||||
|
%(fp_enable_check)s;
|
||||||
|
%(op_decl)s;
|
||||||
|
%(op_rd)s;
|
||||||
|
#if USE_FENV
|
||||||
|
if (roundingMode == Normal) {
|
||||||
|
%(code)s;
|
||||||
|
} else {
|
||||||
|
fesetround(getC99RoundingMode(xc->readFpcr()));
|
||||||
|
%(code)s;
|
||||||
|
fesetround(FE_TONEAREST);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (roundingMode != Normal && !warnedOnRounding) {
|
||||||
|
warn("%s: non-standard rounding mode not supported",
|
||||||
|
generateDisassembly(0, NULL));
|
||||||
|
warnedOnRounding = true;
|
||||||
|
}
|
||||||
|
%(code)s;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (fault == No_Fault) {
|
||||||
|
%(op_wb)s;
|
||||||
|
}
|
||||||
|
|
||||||
|
return fault;
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
// FP instruction class execute method template where no dynamic
|
||||||
|
// rounding mode control is needed. Like BasicExecute, but includes
|
||||||
|
// check & warning for non-standard trapping mode.
|
||||||
|
def template FPFixedRoundingExecute {{
|
||||||
|
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
|
||||||
|
Trace::InstRecord *traceData) const
|
||||||
|
{
|
||||||
|
if (trappingMode != Imprecise) {
|
||||||
|
warn("%s: non-standard trapping mode not supported",
|
||||||
|
generateDisassembly(0, NULL));
|
||||||
|
warnedOnTrapping = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Fault fault = No_Fault;
|
||||||
|
|
||||||
|
%(fp_enable_check)s;
|
||||||
|
%(op_decl)s;
|
||||||
|
%(op_rd)s;
|
||||||
|
%(code)s;
|
||||||
|
|
||||||
|
if (fault == No_Fault) {
|
||||||
|
%(op_wb)s;
|
||||||
|
}
|
||||||
|
|
||||||
|
return fault;
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
def template FloatingPointDecode {{
|
||||||
|
{
|
||||||
|
AlphaStaticInst *i = new %(class_name)s(machInst);
|
||||||
|
if (FC == 31) {
|
||||||
|
i = makeNop(i);
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
// General format for floating-point operate instructions:
|
// General format for floating-point operate instructions:
|
||||||
// - Checks trapping and rounding mode flags. Trapping modes
|
// - Checks trapping and rounding mode flags. Trapping modes
|
||||||
// currently unimplemented (will fail).
|
// currently unimplemented (will fail).
|
||||||
|
@ -722,28 +793,20 @@ output decoder {{
|
||||||
def format FloatingPointOperate(code, *opt_args) {{
|
def format FloatingPointOperate(code, *opt_args) {{
|
||||||
iop = InstObjParams(name, Name, 'AlphaFP', CodeBlock(code), opt_args)
|
iop = InstObjParams(name, Name, 'AlphaFP', CodeBlock(code), opt_args)
|
||||||
decode_block = FloatingPointDecode.subst(iop)
|
decode_block = FloatingPointDecode.subst(iop)
|
||||||
|
header_output = BasicDeclare.subst(iop)
|
||||||
fast_iop = InstObjParams(name, Name + 'Fast', 'AlphaFP',
|
decoder_output = BasicConstructor.subst(iop)
|
||||||
CodeBlock(code), opt_args)
|
exec_output = FloatingPointExecute.subst(iop)
|
||||||
header_output = BasicDeclare.subst(fast_iop)
|
|
||||||
decoder_output = BasicConstructor.subst(fast_iop)
|
|
||||||
exec_output = BasicExecute.subst(fast_iop)
|
|
||||||
|
|
||||||
gen_code_prefix = r'''
|
|
||||||
fesetround(getC99RoundingMode(xc->readFpcr()));
|
|
||||||
'''
|
|
||||||
|
|
||||||
gen_code_suffix = r'''
|
|
||||||
fesetround(FE_TONEAREST);
|
|
||||||
'''
|
|
||||||
|
|
||||||
gen_iop = InstObjParams(name, Name + 'General', 'AlphaFP',
|
|
||||||
CodeBlock(gen_code_prefix + code + gen_code_suffix), opt_args)
|
|
||||||
header_output += BasicDeclare.subst(gen_iop)
|
|
||||||
decoder_output += BasicConstructor.subst(gen_iop)
|
|
||||||
exec_output += BasicExecute.subst(gen_iop)
|
|
||||||
}};
|
}};
|
||||||
|
|
||||||
|
// Special format for cvttq where rounding mode is pre-decoded
|
||||||
|
def format FPFixedRounding(code, class_suffix, *opt_args) {{
|
||||||
|
Name += class_suffix
|
||||||
|
iop = InstObjParams(name, Name, 'AlphaFP', CodeBlock(code), opt_args)
|
||||||
|
decode_block = FloatingPointDecode.subst(iop)
|
||||||
|
header_output = BasicDeclare.subst(iop)
|
||||||
|
decoder_output = BasicConstructor.subst(iop)
|
||||||
|
exec_output = FPFixedRoundingExecute.subst(iop)
|
||||||
|
}};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
|
@ -2193,30 +2256,30 @@ decode OPCODE default Unknown::unknown() {
|
||||||
0x1c: decode INTFUNC {
|
0x1c: decode INTFUNC {
|
||||||
0x00: decode RA { 31: sextb({{ Rc.sb = Rb_or_imm< 7:0>; }}); }
|
0x00: decode RA { 31: sextb({{ Rc.sb = Rb_or_imm< 7:0>; }}); }
|
||||||
0x01: decode RA { 31: sextw({{ Rc.sw = Rb_or_imm<15:0>; }}); }
|
0x01: decode RA { 31: sextw({{ Rc.sw = Rb_or_imm<15:0>; }}); }
|
||||||
0x32: ctlz({{
|
0x32: ctlz({{
|
||||||
uint64_t count = 0;
|
uint64_t count = 0;
|
||||||
uint64_t temp = Rb;
|
uint64_t temp = Rb;
|
||||||
if (temp<63:32>) temp >>= 32; else count += 32;
|
if (temp<63:32>) temp >>= 32; else count += 32;
|
||||||
if (temp<31:16>) temp >>= 16; else count += 16;
|
if (temp<31:16>) temp >>= 16; else count += 16;
|
||||||
if (temp<15:8>) temp >>= 8; else count += 8;
|
if (temp<15:8>) temp >>= 8; else count += 8;
|
||||||
if (temp<7:4>) temp >>= 4; else count += 4;
|
if (temp<7:4>) temp >>= 4; else count += 4;
|
||||||
if (temp<3:2>) temp >>= 2; else count += 2;
|
if (temp<3:2>) temp >>= 2; else count += 2;
|
||||||
if (temp<1:1>) temp >>= 1; else count += 1;
|
if (temp<1:1>) temp >>= 1; else count += 1;
|
||||||
if ((temp<0:0>) != 0x1) count += 1;
|
if ((temp<0:0>) != 0x1) count += 1;
|
||||||
Rc = count;
|
Rc = count;
|
||||||
}}, IntAluOp);
|
}}, IntAluOp);
|
||||||
|
|
||||||
0x33: cttz({{
|
0x33: cttz({{
|
||||||
uint64_t count = 0;
|
uint64_t count = 0;
|
||||||
uint64_t temp = Rb;
|
uint64_t temp = Rb;
|
||||||
if (!(temp<31:0>)) { temp >>= 32; count += 32; }
|
if (!(temp<31:0>)) { temp >>= 32; count += 32; }
|
||||||
if (!(temp<15:0>)) { temp >>= 16; count += 16; }
|
if (!(temp<15:0>)) { temp >>= 16; count += 16; }
|
||||||
if (!(temp<7:0>)) { temp >>= 8; count += 8; }
|
if (!(temp<7:0>)) { temp >>= 8; count += 8; }
|
||||||
if (!(temp<3:0>)) { temp >>= 4; count += 4; }
|
if (!(temp<3:0>)) { temp >>= 4; count += 4; }
|
||||||
if (!(temp<1:0>)) { temp >>= 2; count += 2; }
|
if (!(temp<1:0>)) { temp >>= 2; count += 2; }
|
||||||
if (!(temp<0:0> & ULL(0x1))) count += 1;
|
if (!(temp<0:0> & ULL(0x1))) count += 1;
|
||||||
Rc = count;
|
Rc = count;
|
||||||
}}, IntAluOp);
|
}}, IntAluOp);
|
||||||
|
|
||||||
format FailUnimpl {
|
format FailUnimpl {
|
||||||
0x30: ctpop();
|
0x30: ctpop();
|
||||||
|
@ -2282,7 +2345,7 @@ decode OPCODE default Unknown::unknown() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// IEEE floating point
|
// Square root and integer-to-FP moves
|
||||||
0x14: decode FP_SHORTFUNC {
|
0x14: decode FP_SHORTFUNC {
|
||||||
// Integer to FP register moves must have RB == 31
|
// Integer to FP register moves must have RB == 31
|
||||||
0x4: decode RB {
|
0x4: decode RB {
|
||||||
|
@ -2327,35 +2390,40 @@ decode OPCODE default Unknown::unknown() {
|
||||||
|
|
||||||
// IEEE floating point
|
// IEEE floating point
|
||||||
0x16: decode FP_SHORTFUNC_TOP2 {
|
0x16: decode FP_SHORTFUNC_TOP2 {
|
||||||
// The top two bits of the short function code break this space
|
// The top two bits of the short function code break this
|
||||||
// into four groups: binary ops, compares, reserved, and conversions.
|
// space into four groups: binary ops, compares, reserved, and
|
||||||
// See Table 4-12 of AHB.
|
// conversions. See Table 4-12 of AHB. There are different
|
||||||
|
// special cases in these different groups, so we decode on
|
||||||
|
// these top two bits first just to select a decode strategy.
|
||||||
// Most of these instructions may have various trapping and
|
// Most of these instructions may have various trapping and
|
||||||
// rounding mode flags set; these are decoded in the
|
// rounding mode flags set; these are decoded in the
|
||||||
// FloatingPointDecode template used by the
|
// FloatingPointDecode template used by the
|
||||||
// FloatingPointOperate format.
|
// FloatingPointOperate format.
|
||||||
|
|
||||||
// add/sub/mul/div: just decode on the short function code
|
// add/sub/mul/div: just decode on the short function code
|
||||||
// and source type.
|
// and source type. All valid trapping and rounding modes apply.
|
||||||
0: decode FP_TYPEFUNC {
|
0: decode FP_TRAPMODE {
|
||||||
format FloatingPointOperate {
|
// check for valid trapping modes here
|
||||||
|
0,1,5,7: decode FP_TYPEFUNC {
|
||||||
|
format FloatingPointOperate {
|
||||||
#if SS_COMPATIBLE_FP
|
#if SS_COMPATIBLE_FP
|
||||||
0x00: adds({{ Fc = Fa + Fb; }});
|
0x00: adds({{ Fc = Fa + Fb; }});
|
||||||
0x01: subs({{ Fc = Fa - Fb; }});
|
0x01: subs({{ Fc = Fa - Fb; }});
|
||||||
0x02: muls({{ Fc = Fa * Fb; }}, FloatMultOp);
|
0x02: muls({{ Fc = Fa * Fb; }}, FloatMultOp);
|
||||||
0x03: divs({{ Fc = Fa / Fb; }}, FloatDivOp);
|
0x03: divs({{ Fc = Fa / Fb; }}, FloatDivOp);
|
||||||
#else
|
#else
|
||||||
0x00: adds({{ Fc.sf = Fa.sf + Fb.sf; }});
|
0x00: adds({{ Fc.sf = Fa.sf + Fb.sf; }});
|
||||||
0x01: subs({{ Fc.sf = Fa.sf - Fb.sf; }});
|
0x01: subs({{ Fc.sf = Fa.sf - Fb.sf; }});
|
||||||
0x02: muls({{ Fc.sf = Fa.sf * Fb.sf; }}, FloatMultOp);
|
0x02: muls({{ Fc.sf = Fa.sf * Fb.sf; }}, FloatMultOp);
|
||||||
0x03: divs({{ Fc.sf = Fa.sf / Fb.sf; }}, FloatDivOp);
|
0x03: divs({{ Fc.sf = Fa.sf / Fb.sf; }}, FloatDivOp);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
0x20: addt({{ Fc = Fa + Fb; }});
|
0x20: addt({{ Fc = Fa + Fb; }});
|
||||||
0x21: subt({{ Fc = Fa - Fb; }});
|
0x21: subt({{ Fc = Fa - Fb; }});
|
||||||
0x22: mult({{ Fc = Fa * Fb; }}, FloatMultOp);
|
0x22: mult({{ Fc = Fa * Fb; }}, FloatMultOp);
|
||||||
0x23: divt({{ Fc = Fa / Fb; }}, FloatDivOp);
|
0x23: divt({{ Fc = Fa / Fb; }}, FloatDivOp);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Floating-point compare instructions must have the default
|
// Floating-point compare instructions must have the default
|
||||||
|
@ -2384,7 +2452,17 @@ decode OPCODE default Unknown::unknown() {
|
||||||
3: decode FA {
|
3: decode FA {
|
||||||
31: decode FP_TYPEFUNC {
|
31: decode FP_TYPEFUNC {
|
||||||
format FloatingPointOperate {
|
format FloatingPointOperate {
|
||||||
0x2f: cvttq({{ Fc.sq = (int64_t)rint(Fb); }});
|
0x2f: decode FP_ROUNDMODE {
|
||||||
|
format FPFixedRounding {
|
||||||
|
// "chopped" i.e. round toward zero
|
||||||
|
0: cvttq({{ Fc.sq = (int64_t)trunc(Fb); }},
|
||||||
|
Chopped);
|
||||||
|
// round to minus infinity
|
||||||
|
1: cvttq({{ Fc.sq = (int64_t)floor(Fb); }},
|
||||||
|
MinusInfinity);
|
||||||
|
}
|
||||||
|
default: cvttq({{ Fc.sq = (int64_t)nearbyint(Fb); }});
|
||||||
|
}
|
||||||
|
|
||||||
// The cvtts opcode is overloaded to be cvtst if the trap
|
// The cvtts opcode is overloaded to be cvtst if the trap
|
||||||
// mode is 2 or 6 (which are not valid otherwise)
|
// mode is 2 or 6 (which are not valid otherwise)
|
||||||
|
|
|
@ -256,14 +256,19 @@ def p_def_or_output(t):
|
||||||
# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
|
# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
|
||||||
# directly to the appropriate output section.
|
# directly to the appropriate output section.
|
||||||
|
|
||||||
|
|
||||||
|
# Protect any non-dict-substitution '%'s in a format string
|
||||||
|
# (i.e. those not followed by '(')
|
||||||
|
def protect_non_subst_percents(s):
|
||||||
|
return re.sub(r'%(?!\()', '%%', s)
|
||||||
|
|
||||||
# Massage output block by substituting in template definitions and bit
|
# Massage output block by substituting in template definitions and bit
|
||||||
# operators. We handle '%'s embedded in the string that don't
|
# operators. We handle '%'s embedded in the string that don't
|
||||||
# indicate template substitutions (or CPU-specific symbols, which get
|
# indicate template substitutions (or CPU-specific symbols, which get
|
||||||
# handled in GenCode) by doubling them first so that the format
|
# handled in GenCode) by doubling them first so that the format
|
||||||
# operation will reduce them back to single '%'s.
|
# operation will reduce them back to single '%'s.
|
||||||
def process_output(s):
|
def process_output(s):
|
||||||
# protect any non-substitution '%'s (not followed by '(')
|
s = protect_non_subst_percents(s)
|
||||||
s = re.sub(r'%(?!\()', '%%', s)
|
|
||||||
# protects cpu-specific symbols too
|
# protects cpu-specific symbols too
|
||||||
s = protect_cpu_symbols(s)
|
s = protect_cpu_symbols(s)
|
||||||
return substBitOps(s % templateMap)
|
return substBitOps(s % templateMap)
|
||||||
|
@ -921,8 +926,12 @@ class Template:
|
||||||
myDict.update(d.__dict__)
|
myDict.update(d.__dict__)
|
||||||
else:
|
else:
|
||||||
raise TypeError, "Template.subst() arg must be or have dictionary"
|
raise TypeError, "Template.subst() arg must be or have dictionary"
|
||||||
|
# Protect non-Python-dict substitutions (e.g. if there's a printf
|
||||||
|
# in the templated C++ code)
|
||||||
|
template = protect_non_subst_percents(self.template)
|
||||||
# CPU-model-specific substitutions are handled later (in GenCode).
|
# CPU-model-specific substitutions are handled later (in GenCode).
|
||||||
return protect_cpu_symbols(self.template) % myDict
|
template = protect_cpu_symbols(template)
|
||||||
|
return template % myDict
|
||||||
|
|
||||||
# Convert to string. This handles the case when a template with a
|
# Convert to string. This handles the case when a template with a
|
||||||
# CPU-specific term gets interpolated into another template or into
|
# CPU-specific term gets interpolated into another template or into
|
||||||
|
|
Loading…
Reference in a new issue