X86: Total overhaul of the division instructions and microops.

--HG-- extra : convert_revision : 303ea45f69f7805361ad877fe6bb43fbc3dfd7a6
2007-09-13 16:34:46 -07:00 · 2007-09-13 16:34:46 -07:00 · f7b6230d99
parent 0f57b407a3
commit f7b6230d99
4 changed files with 650 additions and 39 deletions
--- a/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
+++ b/src/arch/x86/isa/insts/arithmetic/multiply_and_divide.py
@ -234,20 +234,70 @@ def macroop IMUL_R_P_I

 def macroop DIV_B_R
 {
-    div1 rax, rax, reg
+    # Do the initial part of the division
+    div1 rsi, reg, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, 8, dataSize=1
+    div2 t1, rax, t1, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t1, rax, t1, dataSize=1
+    div2 t1, rax, t1, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax, dataSize=1
+    divr rsi, dataSize=1
 };

 def macroop DIV_B_M
 {
-    ld t1, seg, sib, disp
-    div1 rax, rax, t1
+    ld t2, seg, sib, disp
+
+    # Do the initial part of the division
+    div1 rsi, t2, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, 8, dataSize=1
+    div2 t1, rax, t1, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t1, rax, t1, dataSize=1
+    div2 t1, rax, t1, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax, dataSize=1
+    divr rsi, dataSize=1
 };

 def macroop DIV_B_P
 {
    rdip t7
-    ld t1, seg, riprel, disp
-    div1 rax, rax, t1
+    ld t2, seg, riprel, disp
+
+    # Do the initial part of the division
+    div1 rsi, t2, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, 8, dataSize=1
+    div2 t1, rax, t1, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t1, rax, t1, dataSize=1
+    div2 t1, rax, t1, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax, dataSize=1
+    divr rsi, dataSize=1
 };

 #
@ -256,24 +306,301 @@ def macroop DIV_B_P

 def macroop DIV_R
 {
-    divr t1, rax, reg
-    divq rax, rax, reg
-    mov rdx, rdx, t1
+    # Do the initial part of the division
+    div1 rdx, reg
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, "env.dataSize * 8"
+    div2 t1, rax, t1
+
+    #Loop until we're out of bits to shift in
+    #The amount of unrolling here could stand some tuning
+divLoopTop:
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax
+    divr rdx
 };

 def macroop DIV_M
 {
-    ld t1, seg, sib, disp
-    divr rdx, rax, t1
-    divq rax, rax, t1
+    ld t2, seg, sib, disp
+
+    # Do the initial part of the division
+    div1 rdx, t2
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, "env.dataSize * 8"
+    div2 t1, rax, t1
+
+    #Loop until we're out of bits to shift in
+    #The amount of unrolling here could stand some tuning
+divLoopTop:
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax
+    divr rdx
 };

 def macroop DIV_P
 {
    rdip t7
-    ld t1, seg, riprel, disp
-    divr rdx, rax, t1
-    divq rax, rax, t1
+    ld t2, seg, riprel, disp
+
+    # Do the initial part of the division
+    div1 rdx, t2
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t1, rax, "env.dataSize * 8"
+    div2 t1, rax, t1
+
+    #Loop until we're out of bits to shift in
+    #The amount of unrolling here could stand some tuning
+divLoopTop:
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1
+    div2 t1, rax, t1, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq rax
+    divr rdx
+};
+
+#
+# One byte version of signed division
+#
+
+def macroop IDIV_B_R
+{
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,), dataSize=1
+    ruflag t4, 3
+    sub t2, t0, rsi, dataSize=1
+    sub t2, t2, t4
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, reg, 1, flags=(ECF,), dataSize=1
+
+    # Negate divisor
+    sub t3, t0, reg, dataSize=1
+    # Put the divisor's absolute value into t3
+    mov t3, t3, reg, flags=(nCECF,), dataSize=1
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,), dataSize=1
+    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+
+    # Do the initial part of the division
+    div1 t2, t3, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, 8, dataSize=1
+    div2 t4, t1, t4, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4, dataSize=1
+    div2 t4, t1, t4, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5, dataSize=1
+    divr t6, dataSize=1
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6, dataSize=1
+    # If the dividend was negitive, put the negated remainder in rsi.
+    mov rsi, rsi, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in rsi.
+    mov rsi, rsi, t6, (nCECF,), dataSize=1
+
+    # Negate the quotient.
+    sub t4, t0, t5, dataSize=1
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,), dataSize=1
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5, dataSize=1
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,), dataSize=1
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,), dataSize=1
+};
+
+def macroop IDIV_B_M
+{
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,), dataSize=1
+    ruflag t4, 3
+    sub t2, t0, rsi, dataSize=1
+    sub t2, t2, t4
+
+    ld t3, seg, sib, disp
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate divisor
+    sub t4, t0, t3, dataSize=1
+    # Put the divisor's absolute value into t3
+    mov t3, t3, t4, flags=(CECF,), dataSize=1
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,), dataSize=1
+    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+
+    # Do the initial part of the division
+    div1 t2, t3, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, 8, dataSize=1
+    div2 t4, t1, t4, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4, dataSize=1
+    div2 t4, t1, t4, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5, dataSize=1
+    divr t6, dataSize=1
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6, dataSize=1
+    # If the dividend was negitive, put the negated remainder in rsi.
+    mov rsi, rsi, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in rsi.
+    mov rsi, rsi, t6, (nCECF,), dataSize=1
+
+    # Negate the quotient.
+    sub t4, t0, t5, dataSize=1
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,), dataSize=1
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5, dataSize=1
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,), dataSize=1
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,), dataSize=1
+};
+
+def macroop IDIV_B_P
+{
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,), dataSize=1
+    ruflag t4, 3
+    sub t2, t0, rsi, dataSize=1
+    sub t2, t2, t4
+
+    rdip t7
+    ld t3, seg, riprel, disp
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate divisor
+    sub t4, t0, t3, dataSize=1
+    # Put the divisor's absolute value into t3
+    mov t3, t3, t4, flags=(CECF,), dataSize=1
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rsi, 1, flags=(ECF,), dataSize=1
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,), dataSize=1
+    mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+
+    # Do the initial part of the division
+    div1 t2, t3, dataSize=1
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, 8, dataSize=1
+    div2 t4, t1, t4, dataSize=1
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4, dataSize=1
+    div2 t4, t1, t4, flags=(EZF,), dataSize=1
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5, dataSize=1
+    divr t6, dataSize=1
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6, dataSize=1
+    # If the dividend was negitive, put the negated remainder in rsi.
+    mov rsi, rsi, t4, (CECF,), dataSize=1
+    # Otherwise put the regular remainder in rsi.
+    mov rsi, rsi, t6, (nCECF,), dataSize=1
+
+    # Negate the quotient.
+    sub t4, t0, t5, dataSize=1
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,), dataSize=1
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,), dataSize=1
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5, dataSize=1
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,), dataSize=1
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,), dataSize=1
 };

 #
@ -282,27 +609,225 @@ def macroop DIV_P

 def macroop IDIV_R
 {
-    divr t1, rax, reg
-    divq rax, rax, reg
-    mov rdx, rdx, t1
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,)
+    ruflag t4, 3
+    sub t2, t0, rdx
+    sub t2, t2, t4
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, reg, 1, flags=(ECF,)
+
+    # Negate divisor
+    sub t3, t0, reg
+    # Put the divisor's absolute value into t3
+    mov t3, t3, reg, flags=(nCECF,)
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rdx, 1, flags=(ECF,)
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,)
+    mov t2, t2, rdx, flags=(nCECF,)
+
+    # Do the initial part of the division
+    div1 t2, t3
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, "env.dataSize * 8"
+    div2 t4, t1, t4
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5
+    divr t6
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6
+    # If the dividend was negitive, put the negated remainder in rdx.
+    mov rdx, rdx, t4, (CECF,)
+    # Otherwise put the regular remainder in rdx.
+    mov rdx, rdx, t6, (nCECF,)
+
+    # Negate the quotient.
+    sub t4, t0, t5
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,)
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,)
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,)
 };

 def macroop IDIV_M
 {
-    ld t1, seg, sib, disp
-    divr rdx, rax, t1
-    divq rax, rax, t1
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,)
+    ruflag t4, 3
+    sub t2, t0, rdx
+    sub t2, t2, t4
+
+    ld t3, seg, sib, disp
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate divisor
+    sub t4, t0, t3
+    # Put the divisor's absolute value into t3
+    mov t3, t3, t4, flags=(CECF,)
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rdx, 1, flags=(ECF,)
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,)
+    mov t2, t2, rdx, flags=(nCECF,)
+
+    # Do the initial part of the division
+    div1 t2, t3
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, "env.dataSize * 8"
+    div2 t4, t1, t4
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5
+    divr t6
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6
+    # If the dividend was negitive, put the negated remainder in rdx.
+    mov rdx, rdx, t4, (CECF,)
+    # Otherwise put the regular remainder in rdx.
+    mov rdx, rdx, t6, (nCECF,)
+
+    # Negate the quotient.
+    sub t4, t0, t5
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,)
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,)
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,)
 };

 def macroop IDIV_P
 {
+    # Negate dividend
+    sub t1, t0, rax, flags=(ECF,)
+    ruflag t4, 3
+    sub t2, t0, rdx
+    sub t2, t2, t4
+
    rdip t7
-    ld t1, seg, riprel, disp
-    divr rdx, rax, t1
-    divq rax, rax, t1
+    ld t3, seg, riprel, disp
+
+    #Find the sign of the divisor
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate divisor
+    sub t4, t0, t3
+    # Put the divisor's absolute value into t3
+    mov t3, t3, t4, flags=(CECF,)
+
+    #Find the sign of the dividend
+    #FIXME!!! This depends on shifts setting the carry flag correctly.
+    slli t0, rdx, 1, flags=(ECF,)
+
+    # Put the dividend's absolute value into t1 and t2
+    mov t1, t1, rax, flags=(nCECF,)
+    mov t2, t2, rdx, flags=(nCECF,)
+
+    # Do the initial part of the division
+    div1 t2, t3
+
+    #These are split out so we can initialize the number of bits in the
+    #second register
+    div2i t4, t1, "env.dataSize * 8"
+    div2 t4, t1, t4
+
+    #Loop until we're out of bits to shift in
+divLoopTop:
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4
+    div2 t4, t1, t4, flags=(EZF,)
+    bri t0, label("divLoopTop"), flags=(nCEZF,)
+
+    #Unload the answer
+    divq t5
+    divr t6
+
+    # Fix up signs. The sign of the dividend is still lying around in ECF.
+    # The sign of the remainder, ah, is the same as the dividend. The sign
+    # of the quotient is negated if the signs of the divisor and dividend
+    # were different.
+
+    # Negate the remainder
+    sub t4, t0, t6
+    # If the dividend was negitive, put the negated remainder in rdx.
+    mov rdx, rdx, t4, (CECF,)
+    # Otherwise put the regular remainder in rdx.
+    mov rdx, rdx, t6, (nCECF,)
+
+    # Negate the quotient.
+    sub t4, t0, t5
+    # If the dividend was negative, start using the negated quotient
+    mov t5, t5, t4, (CECF,)
+
+    # Check the sign of the divisor
+    slli t0, t3, 1, flags=(ECF,)
+
+    # Negate the (possibly already negated) quotient
+    sub t4, t0, t5
+    # If the divisor was negative, put the negated quotient in rax.
+    mov rax, rax, t4, (CECF,)
+    # Otherwise put the one that wasn't negated (at least here) in rax.
+    mov rax, rax, t5, (nCECF,)
 };
 '''
-#let {{
-#    class IDIV(Inst):
-#	"GenFault ${new UnimpInstFault}"
-#}};
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@ -227,6 +227,45 @@ def template MicroRegOpImmConstructor {{
    }
 }};

+output header {{
+    void
+    divide(uint64_t dividend, uint64_t divisor,
+            uint64_t &quotient, uint64_t &remainder);
+}};
+
+output decoder {{
+    void
+    divide(uint64_t dividend, uint64_t divisor,
+            uint64_t &quotient, uint64_t &remainder)
+    {
+        //Check for divide by zero.
+        if (divisor == 0)
+            panic("Divide by zero!\\n");
+        //If the divisor is bigger than the dividend, don't do anything.
+        if (divisor <= dividend) {
+            //Shift the divisor so it's msb lines up with the dividend.
+            int dividendMsb = findMsbSet(dividend);
+            int divisorMsb = findMsbSet(divisor);
+            int shift = dividendMsb - divisorMsb;
+            divisor <<= shift;
+            //Compute what we'll add to the quotient if the divisor isn't
+            //now larger than the dividend.
+            uint64_t quotientBit = 1;
+            quotientBit <<= shift;
+            //If we need to step back a bit (no pun intended) because the
+            //divisor got too to large, do that here. This is the "or two"
+            //part of one or two bit division.
+            if (divisor > dividend) {
+                quotientBit >>= 1;
+                divisor >>= 1;
+            }
+            //Decrement the remainder and increment the quotient.
+            quotient += quotientBit;
+            remainder -= divisor;
+        }
+    }
+}};
+
 let {{
    # Make these empty strings so that concatenating onto
    # them will always work.
@ -507,20 +546,65 @@ let {{
                ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
        '''

-    class Div1(FlagRegOp):
+    # One or two bit divide
+    class Div1(WrRegOp):
        code = '''
-            int halfSize = (dataSize * 8) / 2;
-            IntReg quotient = (psrc1 / op2) & mask(halfSize);
-            IntReg remainder = (psrc1 % op2) & mask(halfSize);
-            IntReg result = quotient | (remainder << halfSize);
-            DestReg = merge(DestReg, result, dataSize);
+            //These are temporaries so that modifying them later won't make
+            //the ISA parser think they're also sources.
+            uint64_t quotient = 0;
+            uint64_t remainder = psrc1;
+            //Similarly, this is a temporary so changing it doesn't make it
+            //a source.
+            uint64_t divisor = op2;
+            //This is a temporary just for consistency and clarity.
+            uint64_t dividend = remainder;
+            //Do the division.
+            divide(dividend, divisor, quotient, remainder);
+            //Record the final results.
+            Remainder = remainder;
+            Quotient = quotient;
+            Divisor = divisor;
            '''

-    class Divq(FlagRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 / op2, dataSize);'
+    # Step divide
+    class Div2(RegOp):
+        code = '''
+            uint64_t dividend = Remainder;
+            uint64_t divisor = Divisor;
+            uint64_t quotient = Quotient;
+            uint64_t remainder = dividend;
+            int remaining = op2;
+            //If we overshot, do nothing. This lets us unrool division loops a
+            //little.
+            if (remaining) {
+                //Shift in bits from the low order portion of the dividend
+                while(dividend < divisor && remaining) {
+                    dividend = (dividend << 1) | bits(SrcReg1, remaining - 1);
+                    quotient <<= 1;
+                    remaining--;
+                }
+                remainder = dividend;
+                //Do the division.
+                divide(dividend, divisor, quotient, remainder);
+            }
+            //Keep track of how many bits there are still to pull in.
+            DestReg = merge(DestReg, remaining, dataSize);
+            //Record the final results
+            Remainder = remainder;
+            Quotient = quotient;
+        '''
+        flag_code = '''
+            if (DestReg == 0)
+                ccFlagBits = ccFlagBits | (ext & EZFBit);
+            else
+                ccFlagBits = ccFlagBits & ~(ext & EZFBit);
+        '''

-    class Divr(FlagRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 % op2, dataSize);'
+    class Divq(RdRegOp):
+        code = 'DestReg = merge(SrcReg1, Quotient, dataSize);'
+
+    class Divr(RdRegOp):
+        code = 'DestReg = merge(SrcReg1, Remainder, dataSize);'

    class Mov(CondRegOp):
        code = 'DestReg = merge(SrcReg1, op2, dataSize)'
--- a/src/arch/x86/isa/operands.isa
+++ b/src/arch/x86/isa/operands.isa
@ -105,7 +105,8 @@ def operands {{
        'ProdHi':        ('IntReg', 'uqw', 'INTREG_IMPLICIT(1)', 'IsInteger', 8),
        'Quotient':      ('IntReg', 'uqw', 'INTREG_IMPLICIT(2)', 'IsInteger', 9),
        'Remainder':     ('IntReg', 'uqw', 'INTREG_IMPLICIT(3)', 'IsInteger', 10),
-        'rax':           ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 11),
+        'Divisor':       ('IntReg', 'uqw', 'INTREG_IMPLICIT(4)', 'IsInteger', 11),
+        'rax':           ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 12),
        'FpSrcReg1':     ('FloatReg', 'df', 'src1', 'IsFloating', 20),
        'FpSrcReg2':     ('FloatReg', 'df', 'src2', 'IsFloating', 21),
        'FpDestReg':     ('FloatReg', 'df', 'dest', 'IsFloating', 22),
--- a/src/arch/x86/x86_traits.hh
+++ b/src/arch/x86/x86_traits.hh
@ -64,11 +64,12 @@ namespace X86ISA

    const int NumPseudoIntRegs = 1;
    //1. The condition code bits of the rflags register.
-    const int NumImplicitIntRegs = 4;
+    const int NumImplicitIntRegs = 5;
    //1. The lower part of the result of multiplication.
    //2. The upper part of the result of multiplication.
    //3. The quotient from division
    //4. The remainder from division
+    //5. The divisor for division

    const int NumMMXRegs = 8;
    const int NumXMMRegs = 16;