From c08b7802a9caa7823e75a71839bac14a65a3102b Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sat, 12 Jan 2008 06:41:32 -0500
Subject: [PATCH] X86: Redo the bit test instructions.

--HG--
extra : convert_revision : 433c2a9f3675ed02f3be5ce759a440f2686d2ccd
---
 .../compare_and_test/bit_test.py              | 226 +++++++++---------
 src/arch/x86/isa/microasm.isa                 |   9 +
 2 files changed, 125 insertions(+), 110 deletions(-)

diff --git a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py
index 9b99b331e..e06add896 100644
--- a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py
+++ b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py
@@ -1,4 +1,32 @@
-# Copyright (c) 2007 The Hewlett-Packard Development Company
+# Copyright (c) 2008 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+# Copyright (c) 2007-2008 The Hewlett-Packard Development Company
 # All rights reserved.
 #
 # Redistribution and use of this software in source and binary forms,
@@ -63,19 +91,14 @@ def macroop BT_M_I {
     # This fudges just a tiny bit, but it's reasonable to expect the
     # microcode generation logic to have the log of the various sizes
     # floating around as well.
-    srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    add t2, t2, base, dataSize=asz
-    ld t1, seg, [scale, index, t2], disp
+    ld t1, seg, sib, disp
     sexti t0, t1, imm, flags=(CF,)
 };
 
 def macroop BT_P_I {
     rdip t7
     limm t1, imm, dataSize=asz
-    srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    ld t1, seg, [1, t2, t7]
+    ld t1, seg, riprel, disp, dataSize=asz
     sexti t0, t1, imm, flags=(CF,)
 };
 
@@ -84,18 +107,19 @@ def macroop BT_R_R {
 };
 
 def macroop BT_M_R {
-    srai t2, reg, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    add t2, t2, base, dataSize=asz
-    ld t1, seg, [scale, index, t2], disp
+    srai t2, reg, 3, dataSize=asz
+    srai t3, t2, ldsz, dataSize=asz
+    lea t3, flatseg, [dsz, t3, base], dataSize=asz
+    ld t1, seg, [scale, index, t3], disp
     sext t0, t1, reg, flags=(CF,)
 };
 
 def macroop BT_P_R {
     rdip t7
-    srai t2, reg, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    ld t1, seg, [1, t2, t7]
+    srai t2, reg, 3, dataSize=asz
+    srai t3, t2, ldsz, dataSize=asz
+    lea t3, flatseg, [ldsz, t3, base], dataSize=asz
+    ld t1, seg, [1, t3, t7], disp
     sext t0, t1, reg, flags=(CF,)
 };
 
@@ -111,28 +135,23 @@ def macroop BTC_M_I {
     # This fudges just a tiny bit, but it's reasonable to expect the
     # microcode generation logic to have the log of the various sizes
     # floating around as well.
-    srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    add t2, t2, base, dataSize=asz
-    limm t3, 1
-    roli t3, t3, imm
-    ldst t1, seg, [scale, index, t2], disp
+    limm t4, 1
+    roli t4, t4, imm
+    ldst t1, seg, sib, disp
     sexti t0, t1, imm, flags=(CF,)
-    xor t1, t1, t3
-    st t1, seg, [scale, index, t2], disp
+    xor t1, t1, t4
+    st t1, seg, sib, disp
 };
 
 def macroop BTC_P_I {
     rdip t7, dataSize=asz
     limm t1, imm, dataSize=asz
-    srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    limm t3, 1
-    roli t3, t3, imm
-    ldst t1, seg, [1, t2, t7]
+    limm t4, 1
+    roli t4, t4, imm
+    ldst t1, seg, riprel, disp
     sexti t0, t1, imm, flags=(CF,)
-    xor t1, t1, t3
-    st t1, seg, [1, t2, t7], disp
+    xor t1, t1, t4
+    st t1, seg, riprel, disp
 };
 
 def macroop BTC_R_R {
@@ -143,26 +162,27 @@ def macroop BTC_R_R {
 };
 
 def macroop BTC_M_R {
-    srai t2, reg, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    add t2, t2, base, dataSize=asz
-    limm t3, 1
-    rol t3, t3, reg
-    ldst t1, seg, [scale, index, t2], disp
+    srai t2, reg, 3, dataSize=asz
+    srai t3, t2, ldsz, dataSize=asz
+    lea t3, flatseg, [dsz, t3, base], dataSize=asz
+    limm t4, 1
+    rol t4, t4, reg
+    ldst t1, seg, [scale, index, t3], disp
     sext t0, t1, reg, flags=(CF,)
-    xor t1, t1, t3
-    st t1, seg, [scale, index, t2], disp
+    xor t1, t1, t4
+    st t1, seg, [scale, index, t3], disp
 };
 
 def macroop BTC_P_R {
     rdip t7, dataSize=asz
-    srai t2, reg, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    limm t3, 1
-    rol t3, t3, reg
-    ldst t1, seg, [1, t2, t7]
+    srai t2, reg, 3, dataSize=asz
+    srai t3, t2, ldsz, dataSize=asz
+    lea t3, flatseg, [dsz, t3, base], dataSize=asz
+    limm t4, 1
+    rol t4, t4, reg
+    ldst t1, seg, [1, t2, t7], disp
     sext t0, t1, reg, flags=(CF,)
-    xor t1, t1, t3
+    xor t1, t1, t4
     st t1, seg, [1, t2, t7], disp
 };
 
@@ -175,31 +195,23 @@ def macroop BTR_R_I {
 
 def macroop BTR_M_I {
     limm t1, imm, dataSize=asz
-    # This fudges just a tiny bit, but it's reasonable to expect the
-    # microcode generation logic to have the log of the various sizes
-    # floating around as well.
-    srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    add t2, t2, base, dataSize=asz
-    limm t3, "(uint64_t(-(2ULL)))"
-    roli t3, t3, imm
-    ldst t1, seg, [scale, index, t2], disp
+    limm t4, "(uint64_t(-(2ULL)))"
+    roli t4, t4, imm
+    ldst t1, seg, sib, disp
     sexti t0, t1, imm, flags=(CF,)
-    and t1, t1, t3
-    st t1, seg, [scale, index, t2], disp
+    and t1, t1, t4
+    st t1, seg, sib, disp
 };
 
 def macroop BTR_P_I {
     rdip t7, dataSize=asz
     limm t1, imm, dataSize=asz
-    srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    limm t3, "(uint64_t(-(2ULL)))"
-    roli t3, t3, imm
-    ldst t1, seg, [1, t2, t7]
+    limm t4, "(uint64_t(-(2ULL)))"
+    roli t4, t4, imm
+    ldst t1, seg, riprel, disp
     sexti t0, t1, imm, flags=(CF,)
-    and t1, t1, t3
-    st t1, seg, [1, t2, t7], disp
+    and t1, t1, t4
+    st t1, seg, riprel, disp
 };
 
 def macroop BTR_R_R {
@@ -210,27 +222,28 @@ def macroop BTR_R_R {
 };
 
 def macroop BTR_M_R {
-    srai t2, reg, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    add t2, t2, base, dataSize=asz
-    limm t3, "(uint64_t(-(2ULL)))"
-    rol t3, t3, reg
-    ldst t1, seg, [scale, index, t2], disp
+    srai t2, reg, 3, dataSize=asz
+    srai t3, t2, ldsz, dataSize=asz
+    lea t3, flatseg, [dsz, t3, base], dataSize=asz
+    limm t4, "(uint64_t(-(2ULL)))"
+    rol t4, t4, reg
+    ldst t1, seg, [scale, index, t3], disp
     sext t0, t1, reg, flags=(CF,)
-    and t1, t1, t3
-    st t1, seg, [scale, index, t2], disp
+    and t1, t1, t4
+    st t1, seg, [scale, index, t3], disp
 };
 
 def macroop BTR_P_R {
     rdip t7, dataSize=asz
-    srai t2, reg, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    limm t3, "(uint64_t(-(2ULL)))"
-    rol t3, t3, reg
-    ldst t1, seg, [1, t2, t7]
+    srai t2, reg, 3, dataSize=asz
+    srai t3, t2, ldsz, dataSize=asz
+    lea t3, flatseg, [dsz, t3, base], dataSize=asz
+    limm t4, "(uint64_t(-(2ULL)))"
+    rol t4, t4, reg
+    ldst t1, seg, [1, t3, t7], disp
     sext t0, t1, reg, flags=(CF,)
-    and t1, t1, t3
-    st t1, seg, [1, t2, t7], disp
+    and t1, t1, t4
+    st t1, seg, [1, t3, t7], disp
 };
 
 def macroop BTS_R_I {
@@ -242,31 +255,23 @@ def macroop BTS_R_I {
 
 def macroop BTS_M_I {
     limm t1, imm, dataSize=asz
-    # This fudges just a tiny bit, but it's reasonable to expect the
-    # microcode generation logic to have the log of the various sizes
-    # floating around as well.
-    srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    add t2, t2, base, dataSize=asz
-    limm t3, 1
-    roli t3, t3, imm
-    ldst t1, seg, [scale, index, t2], disp
+    limm t4, 1
+    roli t4, t4, imm
+    ldst t1, seg, sib, disp
     sexti t0, t1, imm, flags=(CF,)
-    or t1, t1, t3
-    st t1, seg, [scale, index, t2], disp
+    or t1, t1, t4
+    st t1, seg, sib, disp
 };
 
 def macroop BTS_P_I {
     rdip t7, dataSize=asz
     limm t1, imm, dataSize=asz
-    srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    limm t3, 1
-    roli t3, t3, imm
-    ldst t1, seg, [1, t2, t7]
+    limm t4, 1
+    roli t4, t4, imm
+    ldst t1, seg, riprel, disp
     sexti t0, t1, imm, flags=(CF,)
-    or t1, t1, t3
-    st t1, seg, [1, t2, t7], disp
+    or t1, t1, t4
+    st t1, seg, riprel, disp
 };
 
 def macroop BTS_R_R {
@@ -277,26 +282,27 @@ def macroop BTS_R_R {
 };
 
 def macroop BTS_M_R {
-    srai t2, reg, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    add t2, t2, base, dataSize=asz
-    limm t3, 1
-    rol t3, t3, reg
-    ldst t1, seg, [scale, index, t2], disp
+    srai t2, reg, 3, dataSize=asz
+    srai t3, t2, ldsz, dataSize=asz
+    lea t3, flatseg, [dsz, t3, base], dataSize=asz
+    limm t4, 1
+    rol t4, t4, reg
+    ldst t1, seg, [scale, index, t3], disp
     sext t0, t1, reg, flags=(CF,)
-    or t1, t1, t3
-    st t1, seg, [scale, index, t2], disp
+    or t1, t1, t4
+    st t1, seg, [scale, index, t3], disp
 };
 
 def macroop BTS_P_R {
     rdip t7, dataSize=asz
-    srai t2, reg, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)", \
-            dataSize=asz
-    limm t3, 1
-    rol t3, t3, reg
-    ldst t1, seg, [1, t2, t7]
+    srai t2, reg, 3, dataSize=asz
+    srai t3, t2, ldsz, dataSize=asz
+    lea t3, flatseg, [dsz, t3, base], dataSize=asz
+    limm t4, 1
+    rol t4, t4, reg
+    ldst t1, seg, [1, t3, t7], disp
     sext t0, t1, reg, flags=(CF,)
-    or t1, t1, t3
-    st t1, seg, [1, t2, t7], disp
+    or t1, t1, t4
+    st t1, seg, [1, t3, t7], disp
 };
 '''
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index 9e8b65c4e..78ae34f52 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -107,6 +107,15 @@ let {{
     }
     assembler.symbols.update(symbols)
 
+    assembler.symbols["ldsz"] = \
+        "((env.dataSize == 8) ? 3 : (env.dataSize == 4) ? 2 : 1)"
+
+    assembler.symbols["lasz"] = \
+        "((env.addressSize == 8) ? 3 : (env.addressSize == 4) ? 2 : 1)"
+
+    assembler.symbols["lssz"] = \
+        "((env.stackSize == 8) ? 3 : (env.stackSize == 4) ? 2 : 1)"
+
     # Short hand for common scale-index-base combinations.
     assembler.symbols["sib"] = \
         [symbols["scale"], symbols["index"], symbols["base"]]