From 1bb293d1e7a27e306ca584a3922f2fd13481e248 Mon Sep 17 00:00:00 2001
From: Yasuko Eckert <yasuko.eckert@amd.com>
Date: Tue, 15 Oct 2013 14:22:44 -0400
Subject: [PATCH] arch/x86: add support for explicit CC register file

Convert condition code registers from being specialized
("pseudo") integer registers to using the recently
added CC register class.

Nilay Vaish also contributed to this patch.
---
 src/arch/x86/isa/operands.isa | 24 ++++++++------
 src/arch/x86/registers.hh     | 12 ++++---
 src/arch/x86/regs/ccr.hh      | 59 +++++++++++++++++++++++++++++++++++
 src/arch/x86/regs/int.hh      |  9 +-----
 src/arch/x86/utility.cc       | 21 +++++++------
 src/arch/x86/x86_traits.hh    |  2 --
 src/cpu/o3/O3CPU.py           | 13 +++++++-
 7 files changed, 104 insertions(+), 36 deletions(-)
 create mode 100644 src/arch/x86/regs/ccr.hh

diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa
index 79b59dbc3..59adada13 100644
--- a/src/arch/x86/isa/operands.isa
+++ b/src/arch/x86/isa/operands.isa
@@ -11,6 +11,8 @@
 // modified or unmodified, in source code or in binary form.
 //
 // Copyright (c) 2007 The Regents of The University of Michigan
+// Copyright (c) 2012 Mark D. Hill and David A. Wood
+// Copyright (c) 2012-2013 Advanced Micro Devices, Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -61,6 +63,8 @@ let {{
         return ('IntReg', 'uqw', 'INTREG_IMPLICIT(%s)' % idx, 'IsInteger', id)
     def floatReg(idx, id):
         return ('FloatReg', 'df', idx, 'IsFloating', id)
+    def ccReg(idx, id):
+        return ('CCReg', 'uqw', idx, 'IsCC', id)
     def controlReg(idx, id, ctype = 'uqw'):
         return ('ControlReg', ctype, idx,
                 (None, None, ['IsSerializeAfter',
@@ -118,11 +122,11 @@ def operands {{
                           (None, None, 'IsControl'), 50),
         # These registers hold the condition code portion of the flag
         # register. The nccFlagBits version holds the rest.
-        'ccFlagBits':    intReg('INTREG_PSEUDO(0)', 60),
-        'cfofBits':      intReg('INTREG_PSEUDO(1)', 61),
-        'dfBit':         intReg('INTREG_PSEUDO(2)', 62),
-        'ecfBit':        intReg('INTREG_PSEUDO(3)', 63),
-        'ezfBit':        intReg('INTREG_PSEUDO(4)', 64),
+        'ccFlagBits':    ccReg('(CCREG_ZAPS)', 60),
+        'cfofBits':      ccReg('(CCREG_CFOF)', 61),
+        'dfBit':         ccReg('(CCREG_DF)', 62),
+        'ecfBit':        ccReg('(CCREG_ECF)', 63),
+        'ezfBit':        ccReg('(CCREG_EZF)', 64),
 
         # These Pred registers are to be used where reading the portions of
         # condition code registers is possibly optional, depending on how the
@@ -139,20 +143,20 @@ def operands {{
         # would be retained, the write predicate checks if any of the bits
         # are being written.
 
-        'PredccFlagBits': ('IntReg', 'uqw', 'INTREG_PSEUDO(0)', 'IsInteger',
+        'PredccFlagBits': ('CCReg', 'uqw', '(CCREG_ZAPS)', 'IsCC',
                 60, None, None, '''(((ext & (PFBit | AFBit | ZFBit | SFBit
                 )) != (PFBit | AFBit | ZFBit | SFBit )) &&
                 ((ext & (PFBit | AFBit | ZFBit | SFBit )) != 0))''',
                 '((ext & (PFBit | AFBit | ZFBit | SFBit )) != 0)'),
-        'PredcfofBits':   ('IntReg', 'uqw', 'INTREG_PSEUDO(1)', 'IsInteger',
+        'PredcfofBits':   ('CCReg', 'uqw', '(CCREG_CFOF)', 'IsCC',
                 61, None, None, '''(((ext & CFBit) == 0 ||
                 (ext & OFBit) == 0) && ((ext & (CFBit | OFBit)) != 0))''',
                 '((ext & (CFBit | OFBit)) != 0)'),
-        'PreddfBit':   ('IntReg', 'uqw', 'INTREG_PSEUDO(2)', 'IsInteger',
+        'PreddfBit':   ('CCReg', 'uqw', '(CCREG_DF)', 'IsCC',
                 62, None, None, '(false)', '((ext & DFBit) != 0)'),
-        'PredecfBit':   ('IntReg', 'uqw', 'INTREG_PSEUDO(3)', 'IsInteger',
+        'PredecfBit':   ('CCReg', 'uqw', '(CCREG_ECF)', 'IsCC',
                 63, None, None, '(false)', '((ext & ECFBit) != 0)'),
-        'PredezfBit':   ('IntReg', 'uqw', 'INTREG_PSEUDO(4)', 'IsInteger',
+        'PredezfBit':   ('CCReg', 'uqw', '(CCREG_EZF)', 'IsCC',
                 64, None, None, '(false)', '((ext & EZFBit) != 0)'),
 
         # These register should needs to be more protected so that later
diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh
index d62992dcd..ebd88136e 100644
--- a/src/arch/x86/registers.hh
+++ b/src/arch/x86/registers.hh
@@ -43,6 +43,7 @@
 
 #include "arch/x86/generated/max_inst_regs.hh"
 #include "arch/x86/regs/int.hh"
+#include "arch/x86/regs/ccr.hh"
 #include "arch/x86/regs/misc.hh"
 #include "arch/x86/x86_traits.hh"
 
@@ -54,10 +55,10 @@ using X86ISAInst::MaxMiscDestRegs;
 const int NumMiscRegs = NUM_MISCREGS;
 
 const int NumIntArchRegs = NUM_INTREGS;
-const int NumIntRegs =
-    NumIntArchRegs + NumMicroIntRegs +
-    NumPseudoIntRegs + NumImplicitIntRegs;
-const int NumCCRegs = 0;
+const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs + NumImplicitIntRegs;
+const int NumCCRegs = NUM_CCREGS;
+
+#define ISA_HAS_CC_REGS
 
 // Each 128 bit xmm register is broken into two effective 64 bit registers.
 // Add 8 for the indices that are mapped over the fp stack
@@ -71,7 +72,7 @@ enum DependenceTags {
     // we just start at (1 << 7) == 128.
     FP_Reg_Base = 128,
     CC_Reg_Base = FP_Reg_Base + NumFloatRegs,
-    Misc_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0
+    Misc_Reg_Base = CC_Reg_Base + NumCCRegs,
     Max_Reg_Index = Misc_Reg_Base + NumMiscRegs
 };
 
@@ -102,6 +103,7 @@ typedef union
 {
     IntReg intReg;
     FloatReg fpReg;
+    CCReg ccReg;
     MiscReg ctrlReg;
 } AnyReg;
 
diff --git a/src/arch/x86/regs/ccr.hh b/src/arch/x86/regs/ccr.hh
new file mode 100644
index 000000000..697660d29
--- /dev/null
+++ b/src/arch/x86/regs/ccr.hh
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2012 Mark D. Hill and David A. Wood
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Nilay Vaish
+ */
+
+#ifndef __ARCH_X86_CCREGS_HH__
+#define __ARCH_X86_CCREGS_HH__
+
+#include "arch/x86/x86_traits.hh"
+
+namespace X86ISA
+{
+    enum CCRegIndex
+    {
+        CCREG_ZAPS,
+        CCREG_CFOF,
+        CCREG_DF,
+        CCREG_ECF,
+        CCREG_EZF,
+
+        NUM_CCREGS
+    };
+}
+
+#endif // __ARCH_X86_CCREGS_HH__
diff --git a/src/arch/x86/regs/int.hh b/src/arch/x86/regs/int.hh
index 0a682ef54..60aa6d0cc 100644
--- a/src/arch/x86/regs/int.hh
+++ b/src/arch/x86/regs/int.hh
@@ -158,17 +158,10 @@ namespace X86ISA
         return (IntRegIndex)(NUM_INTREGS + index);
     }
 
-    inline static IntRegIndex
-    INTREG_PSEUDO(int index)
-    {
-        return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs + index);
-    }
-
     inline static IntRegIndex
     INTREG_IMPLICIT(int index)
     {
-        return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs +
-                             NumPseudoIntRegs + index);
+        return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs + index);
     }
 
     inline static IntRegIndex
diff --git a/src/arch/x86/utility.cc b/src/arch/x86/utility.cc
index df7d3935d..f7358341b 100644
--- a/src/arch/x86/utility.cc
+++ b/src/arch/x86/utility.cc
@@ -244,8 +244,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest)
     //copy float regs
     for (int i = 0; i < NumFloatRegs; ++i)
          dest->setFloatRegBits(i, src->readFloatRegBits(i));
-    // Will need to add condition-code regs when implemented
-    assert(NumCCRegs == 0);
+    //copy condition-code regs
+    for (int i = 0; i < NumCCRegs; ++i)
+         dest->setCCReg(i, src->readCCReg(i));
     copyMiscRegs(src, dest);
     dest->pcState(src->pcState());
 }
@@ -260,9 +261,9 @@ uint64_t
 getRFlags(ThreadContext *tc)
 {
     const uint64_t ncc_flags(tc->readMiscRegNoEffect(MISCREG_RFLAGS));
-    const uint64_t cc_flags(tc->readIntReg(X86ISA::INTREG_PSEUDO(0)));
-    const uint64_t cfof_bits(tc->readIntReg(X86ISA::INTREG_PSEUDO(1)));
-    const uint64_t df_bit(tc->readIntReg(X86ISA::INTREG_PSEUDO(2)));
+    const uint64_t cc_flags(tc->readIntReg(X86ISA::CCREG_ZAPS));
+    const uint64_t cfof_bits(tc->readIntReg(X86ISA::CCREG_CFOF));
+    const uint64_t df_bit(tc->readIntReg(X86ISA::CCREG_DF));
     // ecf (PSEUDO(3)) & ezf (PSEUDO(4)) are only visible to
     // microcode, so we can safely ignore them.
 
@@ -275,13 +276,13 @@ getRFlags(ThreadContext *tc)
 void
 setRFlags(ThreadContext *tc, uint64_t val)
 {
-    tc->setIntReg(X86ISA::INTREG_PSEUDO(0), val & ccFlagMask);
-    tc->setIntReg(X86ISA::INTREG_PSEUDO(1), val & cfofMask);
-    tc->setIntReg(X86ISA::INTREG_PSEUDO(2), val & DFBit);
+    tc->setIntReg(X86ISA::CCREG_ZAPS, val & ccFlagMask);
+    tc->setIntReg(X86ISA::CCREG_CFOF, val & cfofMask);
+    tc->setIntReg(X86ISA::CCREG_DF, val & DFBit);
 
     // Internal microcode registers (ECF & EZF)
-    tc->setIntReg(X86ISA::INTREG_PSEUDO(3), 0);
-    tc->setIntReg(X86ISA::INTREG_PSEUDO(4), 0);
+    tc->setIntReg(X86ISA::CCREG_ECF, 0);
+    tc->setIntReg(X86ISA::CCREG_EZF, 0);
 
     // Update the RFLAGS misc reg with whatever didn't go into the
     // magic registers.
diff --git a/src/arch/x86/x86_traits.hh b/src/arch/x86/x86_traits.hh
index 408fda106..6f1c9ae36 100644
--- a/src/arch/x86/x86_traits.hh
+++ b/src/arch/x86/x86_traits.hh
@@ -46,8 +46,6 @@ namespace X86ISA
 {
     const int NumMicroIntRegs = 16;
 
-    const int NumPseudoIntRegs = 5;
-    //1. The condition code bits of the rflags register.
     const int NumImplicitIntRegs = 6;
     //1. The lower part of the result of multiplication.
     //2. The upper part of the result of multiplication.
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index 4b94f3581..044ee9d59 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -112,7 +112,18 @@ class DerivO3CPU(BaseCPU):
     numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers")
     numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point "
                                       "registers")
-    numPhysCCRegs = Param.Unsigned(0, "Number of physical cc registers")
+    # most ISAs don't use condition-code regs, so default is 0
+    _defaultNumPhysCCRegs = 0
+    if buildEnv['TARGET_ISA'] == 'x86':
+        # For x86, each CC reg is used to hold only a subset of the
+        # flags, so we need 4-5 times the number of CC regs as
+        # physical integer regs to be sure we don't run out.  In
+        # typical real machines, CC regs are not explicitly renamed
+        # (it's a side effect of int reg renaming), so they should
+        # never be the bottleneck here.
+        _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5
+    numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs,
+                                   "Number of physical cc registers")
     numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
     numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries")