X86: Total overhaul of the division instructions and microops.

--HG--
extra : convert_revision : 303ea45f69f7805361ad877fe6bb43fbc3dfd7a6
This commit is contained in:
Gabe Black 2007-09-13 16:34:46 -07:00
parent 0f57b407a3
commit f7b6230d99
4 changed files with 650 additions and 39 deletions

View file

@ -234,20 +234,70 @@ def macroop IMUL_R_P_I
def macroop DIV_B_R
{
div1 rax, rax, reg
# Do the initial part of the division
div1 rsi, reg, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
div2i t1, rax, 8, dataSize=1
div2 t1, rax, t1, dataSize=1
#Loop until we're out of bits to shift in
divLoopTop:
div2 t1, rax, t1, dataSize=1
div2 t1, rax, t1, flags=(EZF,), dataSize=1
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq rax, dataSize=1
divr rsi, dataSize=1
};
def macroop DIV_B_M
{
ld t1, seg, sib, disp
div1 rax, rax, t1
ld t2, seg, sib, disp
# Do the initial part of the division
div1 rsi, t2, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
div2i t1, rax, 8, dataSize=1
div2 t1, rax, t1, dataSize=1
#Loop until we're out of bits to shift in
divLoopTop:
div2 t1, rax, t1, dataSize=1
div2 t1, rax, t1, flags=(EZF,), dataSize=1
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq rax, dataSize=1
divr rsi, dataSize=1
};
def macroop DIV_B_P
{
rdip t7
ld t1, seg, riprel, disp
div1 rax, rax, t1
ld t2, seg, riprel, disp
# Do the initial part of the division
div1 rsi, t2, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
div2i t1, rax, 8, dataSize=1
div2 t1, rax, t1, dataSize=1
#Loop until we're out of bits to shift in
divLoopTop:
div2 t1, rax, t1, dataSize=1
div2 t1, rax, t1, flags=(EZF,), dataSize=1
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq rax, dataSize=1
divr rsi, dataSize=1
};
#
@ -256,24 +306,301 @@ def macroop DIV_B_P
def macroop DIV_R
{
divr t1, rax, reg
divq rax, rax, reg
mov rdx, rdx, t1
# Do the initial part of the division
div1 rdx, reg
#These are split out so we can initialize the number of bits in the
#second register
div2i t1, rax, "env.dataSize * 8"
div2 t1, rax, t1
#Loop until we're out of bits to shift in
#The amount of unrolling here could stand some tuning
divLoopTop:
div2 t1, rax, t1
div2 t1, rax, t1
div2 t1, rax, t1
div2 t1, rax, t1, flags=(EZF,)
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq rax
divr rdx
};
def macroop DIV_M
{
ld t1, seg, sib, disp
divr rdx, rax, t1
divq rax, rax, t1
ld t2, seg, sib, disp
# Do the initial part of the division
div1 rdx, t2
#These are split out so we can initialize the number of bits in the
#second register
div2i t1, rax, "env.dataSize * 8"
div2 t1, rax, t1
#Loop until we're out of bits to shift in
#The amount of unrolling here could stand some tuning
divLoopTop:
div2 t1, rax, t1
div2 t1, rax, t1
div2 t1, rax, t1
div2 t1, rax, t1, flags=(EZF,)
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq rax
divr rdx
};
def macroop DIV_P
{
rdip t7
ld t1, seg, riprel, disp
divr rdx, rax, t1
divq rax, rax, t1
ld t2, seg, riprel, disp
# Do the initial part of the division
div1 rdx, t2
#These are split out so we can initialize the number of bits in the
#second register
div2i t1, rax, "env.dataSize * 8"
div2 t1, rax, t1
#Loop until we're out of bits to shift in
#The amount of unrolling here could stand some tuning
divLoopTop:
div2 t1, rax, t1
div2 t1, rax, t1
div2 t1, rax, t1
div2 t1, rax, t1, flags=(EZF,)
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq rax
divr rdx
};
#
# One byte version of signed division
#
def macroop IDIV_B_R
{
# Negate dividend
sub t1, t0, rax, flags=(ECF,), dataSize=1
ruflag t4, 3
sub t2, t0, rsi, dataSize=1
sub t2, t2, t4
#Find the sign of the divisor
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, reg, 1, flags=(ECF,), dataSize=1
# Negate divisor
sub t3, t0, reg, dataSize=1
# Put the divisor's absolute value into t3
mov t3, t3, reg, flags=(nCECF,), dataSize=1
#Find the sign of the dividend
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, rsi, 1, flags=(ECF,), dataSize=1
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,), dataSize=1
mov t2, t2, rsi, flags=(nCECF,), dataSize=1
# Do the initial part of the division
div1 t2, t3, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
div2i t4, t1, 8, dataSize=1
div2 t4, t1, t4, dataSize=1
#Loop until we're out of bits to shift in
divLoopTop:
div2 t4, t1, t4, dataSize=1
div2 t4, t1, t4, flags=(EZF,), dataSize=1
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq t5, dataSize=1
divr t6, dataSize=1
# Fix up signs. The sign of the dividend is still lying around in ECF.
# The sign of the remainder, ah, is the same as the dividend. The sign
# of the quotient is negated if the signs of the divisor and dividend
# were different.
# Negate the remainder
sub t4, t0, t6, dataSize=1
# If the dividend was negitive, put the negated remainder in rsi.
mov rsi, rsi, t4, (CECF,), dataSize=1
# Otherwise put the regular remainder in rsi.
mov rsi, rsi, t6, (nCECF,), dataSize=1
# Negate the quotient.
sub t4, t0, t5, dataSize=1
# If the dividend was negative, start using the negated quotient
mov t5, t5, t4, (CECF,), dataSize=1
# Check the sign of the divisor
slli t0, t3, 1, flags=(ECF,), dataSize=1
# Negate the (possibly already negated) quotient
sub t4, t0, t5, dataSize=1
# If the divisor was negative, put the negated quotient in rax.
mov rax, rax, t4, (CECF,), dataSize=1
# Otherwise put the one that wasn't negated (at least here) in rax.
mov rax, rax, t5, (nCECF,), dataSize=1
};
def macroop IDIV_B_M
{
# Negate dividend
sub t1, t0, rax, flags=(ECF,), dataSize=1
ruflag t4, 3
sub t2, t0, rsi, dataSize=1
sub t2, t2, t4
ld t3, seg, sib, disp
#Find the sign of the divisor
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, t3, 1, flags=(ECF,), dataSize=1
# Negate divisor
sub t4, t0, t3, dataSize=1
# Put the divisor's absolute value into t3
mov t3, t3, t4, flags=(CECF,), dataSize=1
#Find the sign of the dividend
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, rsi, 1, flags=(ECF,), dataSize=1
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,), dataSize=1
mov t2, t2, rsi, flags=(nCECF,), dataSize=1
# Do the initial part of the division
div1 t2, t3, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
div2i t4, t1, 8, dataSize=1
div2 t4, t1, t4, dataSize=1
#Loop until we're out of bits to shift in
divLoopTop:
div2 t4, t1, t4, dataSize=1
div2 t4, t1, t4, flags=(EZF,), dataSize=1
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq t5, dataSize=1
divr t6, dataSize=1
# Fix up signs. The sign of the dividend is still lying around in ECF.
# The sign of the remainder, ah, is the same as the dividend. The sign
# of the quotient is negated if the signs of the divisor and dividend
# were different.
# Negate the remainder
sub t4, t0, t6, dataSize=1
# If the dividend was negitive, put the negated remainder in rsi.
mov rsi, rsi, t4, (CECF,), dataSize=1
# Otherwise put the regular remainder in rsi.
mov rsi, rsi, t6, (nCECF,), dataSize=1
# Negate the quotient.
sub t4, t0, t5, dataSize=1
# If the dividend was negative, start using the negated quotient
mov t5, t5, t4, (CECF,), dataSize=1
# Check the sign of the divisor
slli t0, t3, 1, flags=(ECF,), dataSize=1
# Negate the (possibly already negated) quotient
sub t4, t0, t5, dataSize=1
# If the divisor was negative, put the negated quotient in rax.
mov rax, rax, t4, (CECF,), dataSize=1
# Otherwise put the one that wasn't negated (at least here) in rax.
mov rax, rax, t5, (nCECF,), dataSize=1
};
def macroop IDIV_B_P
{
# Negate dividend
sub t1, t0, rax, flags=(ECF,), dataSize=1
ruflag t4, 3
sub t2, t0, rsi, dataSize=1
sub t2, t2, t4
rdip t7
ld t3, seg, riprel, disp
#Find the sign of the divisor
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, t3, 1, flags=(ECF,), dataSize=1
# Negate divisor
sub t4, t0, t3, dataSize=1
# Put the divisor's absolute value into t3
mov t3, t3, t4, flags=(CECF,), dataSize=1
#Find the sign of the dividend
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, rsi, 1, flags=(ECF,), dataSize=1
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,), dataSize=1
mov t2, t2, rsi, flags=(nCECF,), dataSize=1
# Do the initial part of the division
div1 t2, t3, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
div2i t4, t1, 8, dataSize=1
div2 t4, t1, t4, dataSize=1
#Loop until we're out of bits to shift in
divLoopTop:
div2 t4, t1, t4, dataSize=1
div2 t4, t1, t4, flags=(EZF,), dataSize=1
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq t5, dataSize=1
divr t6, dataSize=1
# Fix up signs. The sign of the dividend is still lying around in ECF.
# The sign of the remainder, ah, is the same as the dividend. The sign
# of the quotient is negated if the signs of the divisor and dividend
# were different.
# Negate the remainder
sub t4, t0, t6, dataSize=1
# If the dividend was negitive, put the negated remainder in rsi.
mov rsi, rsi, t4, (CECF,), dataSize=1
# Otherwise put the regular remainder in rsi.
mov rsi, rsi, t6, (nCECF,), dataSize=1
# Negate the quotient.
sub t4, t0, t5, dataSize=1
# If the dividend was negative, start using the negated quotient
mov t5, t5, t4, (CECF,), dataSize=1
# Check the sign of the divisor
slli t0, t3, 1, flags=(ECF,), dataSize=1
# Negate the (possibly already negated) quotient
sub t4, t0, t5, dataSize=1
# If the divisor was negative, put the negated quotient in rax.
mov rax, rax, t4, (CECF,), dataSize=1
# Otherwise put the one that wasn't negated (at least here) in rax.
mov rax, rax, t5, (nCECF,), dataSize=1
};
#
@ -282,27 +609,225 @@ def macroop DIV_P
def macroop IDIV_R
{
divr t1, rax, reg
divq rax, rax, reg
mov rdx, rdx, t1
# Negate dividend
sub t1, t0, rax, flags=(ECF,)
ruflag t4, 3
sub t2, t0, rdx
sub t2, t2, t4
#Find the sign of the divisor
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, reg, 1, flags=(ECF,)
# Negate divisor
sub t3, t0, reg
# Put the divisor's absolute value into t3
mov t3, t3, reg, flags=(nCECF,)
#Find the sign of the dividend
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, rdx, 1, flags=(ECF,)
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,)
mov t2, t2, rdx, flags=(nCECF,)
# Do the initial part of the division
div1 t2, t3
#These are split out so we can initialize the number of bits in the
#second register
div2i t4, t1, "env.dataSize * 8"
div2 t4, t1, t4
#Loop until we're out of bits to shift in
divLoopTop:
div2 t4, t1, t4
div2 t4, t1, t4
div2 t4, t1, t4
div2 t4, t1, t4, flags=(EZF,)
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq t5
divr t6
# Fix up signs. The sign of the dividend is still lying around in ECF.
# The sign of the remainder, ah, is the same as the dividend. The sign
# of the quotient is negated if the signs of the divisor and dividend
# were different.
# Negate the remainder
sub t4, t0, t6
# If the dividend was negitive, put the negated remainder in rdx.
mov rdx, rdx, t4, (CECF,)
# Otherwise put the regular remainder in rdx.
mov rdx, rdx, t6, (nCECF,)
# Negate the quotient.
sub t4, t0, t5
# If the dividend was negative, start using the negated quotient
mov t5, t5, t4, (CECF,)
# Check the sign of the divisor
slli t0, t3, 1, flags=(ECF,)
# Negate the (possibly already negated) quotient
sub t4, t0, t5
# If the divisor was negative, put the negated quotient in rax.
mov rax, rax, t4, (CECF,)
# Otherwise put the one that wasn't negated (at least here) in rax.
mov rax, rax, t5, (nCECF,)
};
def macroop IDIV_M
{
ld t1, seg, sib, disp
divr rdx, rax, t1
divq rax, rax, t1
# Negate dividend
sub t1, t0, rax, flags=(ECF,)
ruflag t4, 3
sub t2, t0, rdx
sub t2, t2, t4
ld t3, seg, sib, disp
#Find the sign of the divisor
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, t3, 1, flags=(ECF,)
# Negate divisor
sub t4, t0, t3
# Put the divisor's absolute value into t3
mov t3, t3, t4, flags=(CECF,)
#Find the sign of the dividend
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, rdx, 1, flags=(ECF,)
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,)
mov t2, t2, rdx, flags=(nCECF,)
# Do the initial part of the division
div1 t2, t3
#These are split out so we can initialize the number of bits in the
#second register
div2i t4, t1, "env.dataSize * 8"
div2 t4, t1, t4
#Loop until we're out of bits to shift in
divLoopTop:
div2 t4, t1, t4
div2 t4, t1, t4
div2 t4, t1, t4
div2 t4, t1, t4, flags=(EZF,)
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq t5
divr t6
# Fix up signs. The sign of the dividend is still lying around in ECF.
# The sign of the remainder, ah, is the same as the dividend. The sign
# of the quotient is negated if the signs of the divisor and dividend
# were different.
# Negate the remainder
sub t4, t0, t6
# If the dividend was negitive, put the negated remainder in rdx.
mov rdx, rdx, t4, (CECF,)
# Otherwise put the regular remainder in rdx.
mov rdx, rdx, t6, (nCECF,)
# Negate the quotient.
sub t4, t0, t5
# If the dividend was negative, start using the negated quotient
mov t5, t5, t4, (CECF,)
# Check the sign of the divisor
slli t0, t3, 1, flags=(ECF,)
# Negate the (possibly already negated) quotient
sub t4, t0, t5
# If the divisor was negative, put the negated quotient in rax.
mov rax, rax, t4, (CECF,)
# Otherwise put the one that wasn't negated (at least here) in rax.
mov rax, rax, t5, (nCECF,)
};
def macroop IDIV_P
{
# Negate dividend
sub t1, t0, rax, flags=(ECF,)
ruflag t4, 3
sub t2, t0, rdx
sub t2, t2, t4
rdip t7
ld t1, seg, riprel, disp
divr rdx, rax, t1
divq rax, rax, t1
ld t3, seg, riprel, disp
#Find the sign of the divisor
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, t3, 1, flags=(ECF,)
# Negate divisor
sub t4, t0, t3
# Put the divisor's absolute value into t3
mov t3, t3, t4, flags=(CECF,)
#Find the sign of the dividend
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, rdx, 1, flags=(ECF,)
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,)
mov t2, t2, rdx, flags=(nCECF,)
# Do the initial part of the division
div1 t2, t3
#These are split out so we can initialize the number of bits in the
#second register
div2i t4, t1, "env.dataSize * 8"
div2 t4, t1, t4
#Loop until we're out of bits to shift in
divLoopTop:
div2 t4, t1, t4
div2 t4, t1, t4
div2 t4, t1, t4
div2 t4, t1, t4, flags=(EZF,)
bri t0, label("divLoopTop"), flags=(nCEZF,)
#Unload the answer
divq t5
divr t6
# Fix up signs. The sign of the dividend is still lying around in ECF.
# The sign of the remainder, ah, is the same as the dividend. The sign
# of the quotient is negated if the signs of the divisor and dividend
# were different.
# Negate the remainder
sub t4, t0, t6
# If the dividend was negitive, put the negated remainder in rdx.
mov rdx, rdx, t4, (CECF,)
# Otherwise put the regular remainder in rdx.
mov rdx, rdx, t6, (nCECF,)
# Negate the quotient.
sub t4, t0, t5
# If the dividend was negative, start using the negated quotient
mov t5, t5, t4, (CECF,)
# Check the sign of the divisor
slli t0, t3, 1, flags=(ECF,)
# Negate the (possibly already negated) quotient
sub t4, t0, t5
# If the divisor was negative, put the negated quotient in rax.
mov rax, rax, t4, (CECF,)
# Otherwise put the one that wasn't negated (at least here) in rax.
mov rax, rax, t5, (nCECF,)
};
'''
#let {{
# class IDIV(Inst):
# "GenFault ${new UnimpInstFault}"
#}};

View file

@ -227,6 +227,45 @@ def template MicroRegOpImmConstructor {{
}
}};
output header {{
void
divide(uint64_t dividend, uint64_t divisor,
uint64_t &quotient, uint64_t &remainder);
}};
output decoder {{
void
divide(uint64_t dividend, uint64_t divisor,
uint64_t &quotient, uint64_t &remainder)
{
//Check for divide by zero.
if (divisor == 0)
panic("Divide by zero!\\n");
//If the divisor is bigger than the dividend, don't do anything.
if (divisor <= dividend) {
//Shift the divisor so it's msb lines up with the dividend.
int dividendMsb = findMsbSet(dividend);
int divisorMsb = findMsbSet(divisor);
int shift = dividendMsb - divisorMsb;
divisor <<= shift;
//Compute what we'll add to the quotient if the divisor isn't
//now larger than the dividend.
uint64_t quotientBit = 1;
quotientBit <<= shift;
//If we need to step back a bit (no pun intended) because the
//divisor got too to large, do that here. This is the "or two"
//part of one or two bit division.
if (divisor > dividend) {
quotientBit >>= 1;
divisor >>= 1;
}
//Decrement the remainder and increment the quotient.
quotient += quotientBit;
remainder -= divisor;
}
}
}};
let {{
# Make these empty strings so that concatenating onto
# them will always work.
@ -507,20 +546,65 @@ let {{
ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
'''
class Div1(FlagRegOp):
# One or two bit divide
class Div1(WrRegOp):
code = '''
int halfSize = (dataSize * 8) / 2;
IntReg quotient = (psrc1 / op2) & mask(halfSize);
IntReg remainder = (psrc1 % op2) & mask(halfSize);
IntReg result = quotient | (remainder << halfSize);
DestReg = merge(DestReg, result, dataSize);
//These are temporaries so that modifying them later won't make
//the ISA parser think they're also sources.
uint64_t quotient = 0;
uint64_t remainder = psrc1;
//Similarly, this is a temporary so changing it doesn't make it
//a source.
uint64_t divisor = op2;
//This is a temporary just for consistency and clarity.
uint64_t dividend = remainder;
//Do the division.
divide(dividend, divisor, quotient, remainder);
//Record the final results.
Remainder = remainder;
Quotient = quotient;
Divisor = divisor;
'''
class Divq(FlagRegOp):
code = 'DestReg = merge(DestReg, psrc1 / op2, dataSize);'
# Step divide
class Div2(RegOp):
code = '''
uint64_t dividend = Remainder;
uint64_t divisor = Divisor;
uint64_t quotient = Quotient;
uint64_t remainder = dividend;
int remaining = op2;
//If we overshot, do nothing. This lets us unrool division loops a
//little.
if (remaining) {
//Shift in bits from the low order portion of the dividend
while(dividend < divisor && remaining) {
dividend = (dividend << 1) | bits(SrcReg1, remaining - 1);
quotient <<= 1;
remaining--;
}
remainder = dividend;
//Do the division.
divide(dividend, divisor, quotient, remainder);
}
//Keep track of how many bits there are still to pull in.
DestReg = merge(DestReg, remaining, dataSize);
//Record the final results
Remainder = remainder;
Quotient = quotient;
'''
flag_code = '''
if (DestReg == 0)
ccFlagBits = ccFlagBits | (ext & EZFBit);
else
ccFlagBits = ccFlagBits & ~(ext & EZFBit);
'''
class Divr(FlagRegOp):
code = 'DestReg = merge(DestReg, psrc1 % op2, dataSize);'
class Divq(RdRegOp):
code = 'DestReg = merge(SrcReg1, Quotient, dataSize);'
class Divr(RdRegOp):
code = 'DestReg = merge(SrcReg1, Remainder, dataSize);'
class Mov(CondRegOp):
code = 'DestReg = merge(SrcReg1, op2, dataSize)'

View file

@ -105,7 +105,8 @@ def operands {{
'ProdHi': ('IntReg', 'uqw', 'INTREG_IMPLICIT(1)', 'IsInteger', 8),
'Quotient': ('IntReg', 'uqw', 'INTREG_IMPLICIT(2)', 'IsInteger', 9),
'Remainder': ('IntReg', 'uqw', 'INTREG_IMPLICIT(3)', 'IsInteger', 10),
'rax': ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 11),
'Divisor': ('IntReg', 'uqw', 'INTREG_IMPLICIT(4)', 'IsInteger', 11),
'rax': ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 12),
'FpSrcReg1': ('FloatReg', 'df', 'src1', 'IsFloating', 20),
'FpSrcReg2': ('FloatReg', 'df', 'src2', 'IsFloating', 21),
'FpDestReg': ('FloatReg', 'df', 'dest', 'IsFloating', 22),

View file

@ -64,11 +64,12 @@ namespace X86ISA
const int NumPseudoIntRegs = 1;
//1. The condition code bits of the rflags register.
const int NumImplicitIntRegs = 4;
const int NumImplicitIntRegs = 5;
//1. The lower part of the result of multiplication.
//2. The upper part of the result of multiplication.
//3. The quotient from division
//4. The remainder from division
//5. The divisor for division
const int NumMMXRegs = 8;
const int NumXMMRegs = 16;