From 43335495754abac71377bbd6df0c668b60b22822 Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Wed, 29 Apr 2015 22:35:22 -0500 Subject: [PATCH] cpu: o3: replace issueLatency with bool pipelined Currently, each op class has a parameter issueLat that denotes the cycles after which another op of the same class can be issued. As of now, this latency can either be one cycle (fully pipelined) or same as execution latency of the op (not at all pipelined). The fact that issueLat is a parameter of type Cycles makes one believe that it can be set to any value. To avoid the confusion, the parameter is being renamed as 'pipelined' with type boolean. If set to true, the op would execute in a fully pipelined fashion. Otherwise, it would execute in an unpipelined fashion. --- configs/common/O3_ARM_v7a.py | 10 +++++----- src/cpu/FuncUnit.py | 3 ++- src/cpu/func_unit.cc | 14 +++++++------- src/cpu/func_unit.hh | 10 +++++----- src/cpu/o3/FuncUnitConfig.py | 9 ++++----- src/cpu/o3/fu_pool.cc | 8 ++++---- src/cpu/o3/fu_pool.hh | 8 ++++---- src/cpu/o3/inst_queue_impl.hh | 5 ++--- 8 files changed, 33 insertions(+), 34 deletions(-) diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py index 7d4987d7f..1bb2b4a5e 100644 --- a/configs/common/O3_ARM_v7a.py +++ b/configs/common/O3_ARM_v7a.py @@ -36,9 +36,9 @@ class O3_ARM_v7a_Simple_Int(FUDesc): # Complex ALU instructions have a variable latencies class O3_ARM_v7a_Complex_Int(FUDesc): - opList = [ OpDesc(opClass='IntMult', opLat=3, issueLat=1), - OpDesc(opClass='IntDiv', opLat=12, issueLat=12), - OpDesc(opClass='IprAccess', opLat=3, issueLat=1) ] + opList = [ OpDesc(opClass='IntMult', opLat=3, pipelined=True), + OpDesc(opClass='IntDiv', opLat=12, pipelined=False), + OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ] count = 1 @@ -67,8 +67,8 @@ class O3_ARM_v7a_FP(FUDesc): OpDesc(opClass='FloatAdd', opLat=5), OpDesc(opClass='FloatCmp', opLat=5), OpDesc(opClass='FloatCvt', opLat=5), - OpDesc(opClass='FloatDiv', opLat=9, issueLat=9), - OpDesc(opClass='FloatSqrt', opLat=33, issueLat=33), + OpDesc(opClass='FloatDiv', opLat=9, pipelined=False), + OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False), OpDesc(opClass='FloatMult', opLat=4) ] count = 2 diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py index 0bb23e876..d4493ecf2 100644 --- a/src/cpu/FuncUnit.py +++ b/src/cpu/FuncUnit.py @@ -54,9 +54,10 @@ class OpClass(Enum): class OpDesc(SimObject): type = 'OpDesc' cxx_header = "cpu/func_unit.hh" - issueLat = Param.Cycles(1, "cycles until another can be issued") opClass = Param.OpClass("type of operation") opLat = Param.Cycles(1, "cycles until result is available") + pipelined = Param.Bool(True, "set to true when the functional unit for" + "this op is fully pipelined. False means not pipelined at all.") class FUDesc(SimObject): type = 'FUDesc' diff --git a/src/cpu/func_unit.cc b/src/cpu/func_unit.cc index bb7427da5..6d009a440 100644 --- a/src/cpu/func_unit.cc +++ b/src/cpu/func_unit.cc @@ -52,7 +52,7 @@ FuncUnit::FuncUnit(const FuncUnit &fu) for (int i = 0; i < Num_OpClasses; ++i) { opLatencies[i] = fu.opLatencies[i]; - issueLatencies[i] = fu.issueLatencies[i]; + pipelined[i] = fu.pipelined[i]; } capabilityList = fu.capabilityList; @@ -60,15 +60,15 @@ FuncUnit::FuncUnit(const FuncUnit &fu) void -FuncUnit::addCapability(OpClass cap, unsigned oplat, unsigned issuelat) +FuncUnit::addCapability(OpClass cap, unsigned oplat, bool pipeline) { - if (issuelat == 0 || oplat == 0) + if (oplat == 0) panic("FuncUnit: you don't really want a zero-cycle latency do you?"); capabilityList.set(cap); opLatencies[cap] = oplat; - issueLatencies[cap] = issuelat; + pipelined[cap] = pipeline; } bool @@ -89,10 +89,10 @@ FuncUnit::opLatency(OpClass cap) return opLatencies[cap]; } -unsigned -FuncUnit::issueLatency(OpClass capability) +bool +FuncUnit::isPipelined(OpClass capability) { - return issueLatencies[capability]; + return pipelined[capability]; } //////////////////////////////////////////////////////////////////////////// diff --git a/src/cpu/func_unit.hh b/src/cpu/func_unit.hh index 51e2011f8..721a69df1 100644 --- a/src/cpu/func_unit.hh +++ b/src/cpu/func_unit.hh @@ -52,11 +52,11 @@ class OpDesc : public SimObject public: OpClass opClass; Cycles opLat; - Cycles issueLat; + bool pipelined; OpDesc(const OpDescParams *p) : SimObject(p), opClass(p->opClass), opLat(p->opLat), - issueLat(p->issueLat) {}; + pipelined(p->pipelined) {}; }; class FUDesc : public SimObject @@ -85,7 +85,7 @@ class FuncUnit { private: unsigned opLatencies[Num_OpClasses]; - unsigned issueLatencies[Num_OpClasses]; + bool pipelined[Num_OpClasses]; std::bitset capabilityList; public: @@ -94,13 +94,13 @@ class FuncUnit std::string name; - void addCapability(OpClass cap, unsigned oplat, unsigned issuelat); + void addCapability(OpClass cap, unsigned oplat, bool pipelined); bool provides(OpClass capability); std::bitset capabilities(); unsigned &opLatency(OpClass capability); - unsigned issueLatency(OpClass capability); + bool isPipelined(OpClass capability); }; #endif // __FU_POOL_HH__ diff --git a/src/cpu/o3/FuncUnitConfig.py b/src/cpu/o3/FuncUnitConfig.py index 0f5efb776..b8be400b5 100644 --- a/src/cpu/o3/FuncUnitConfig.py +++ b/src/cpu/o3/FuncUnitConfig.py @@ -49,7 +49,7 @@ class IntALU(FUDesc): class IntMultDiv(FUDesc): opList = [ OpDesc(opClass='IntMult', opLat=3), - OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ] + OpDesc(opClass='IntDiv', opLat=20, pipelined=False) ] # DIV and IDIV instructions in x86 are implemented using a loop which # issues division microops. The latency of these microops should really be @@ -57,7 +57,6 @@ class IntMultDiv(FUDesc): # of the quotient. if buildEnv['TARGET_ISA'] in ('x86'): opList[1].opLat=1 - opList[1].issueLat=1 count=2 @@ -69,8 +68,8 @@ class FP_ALU(FUDesc): class FP_MultDiv(FUDesc): opList = [ OpDesc(opClass='FloatMult', opLat=4), - OpDesc(opClass='FloatDiv', opLat=12, issueLat=12), - OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ] + OpDesc(opClass='FloatDiv', opLat=12, pipelined=False), + OpDesc(opClass='FloatSqrt', opLat=24, pipelined=False) ] count = 2 class SIMD_Unit(FUDesc): @@ -109,6 +108,6 @@ class RdWrPort(FUDesc): count = 4 class IprPort(FUDesc): - opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ] + opList = [ OpDesc(opClass='IprAccess', opLat = 3, pipelined = False) ] count = 1 diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc index 016b171bc..dab7dbed2 100644 --- a/src/cpu/o3/fu_pool.cc +++ b/src/cpu/o3/fu_pool.cc @@ -89,7 +89,7 @@ FUPool::FUPool(const Params *p) for (int i = 0; i < Num_OpClasses; ++i) { maxOpLatencies[i] = Cycles(0); - maxIssueLatencies[i] = Cycles(0); + pipelined[i] = true; } // @@ -123,13 +123,13 @@ FUPool::FUPool(const Params *p) fuPerCapList[(*j)->opClass].addFU(numFU + k); // indicate that this FU has the capability - fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat); + fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->pipelined); if ((*j)->opLat > maxOpLatencies[(*j)->opClass]) maxOpLatencies[(*j)->opClass] = (*j)->opLat; - if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass]) - maxIssueLatencies[(*j)->opClass] = (*j)->issueLat; + if (!(*j)->pipelined) + pipelined[(*j)->opClass] = false; } numFU++; diff --git a/src/cpu/o3/fu_pool.hh b/src/cpu/o3/fu_pool.hh index 2e1b71dad..8b501fc81 100644 --- a/src/cpu/o3/fu_pool.hh +++ b/src/cpu/o3/fu_pool.hh @@ -72,8 +72,8 @@ class FUPool : public SimObject private: /** Maximum op execution latencies, per op class. */ Cycles maxOpLatencies[Num_OpClasses]; - /** Maximum issue latencies, per op class. */ - Cycles maxIssueLatencies[Num_OpClasses]; + /** Whether op is pipelined or not. */ + bool pipelined[Num_OpClasses]; /** Bitvector listing capabilities of this FU pool. */ std::bitset capabilityList; @@ -160,8 +160,8 @@ class FUPool : public SimObject } /** Returns the issue latency of the given capability. */ - Cycles getIssueLatency(OpClass capability) { - return maxIssueLatencies[capability]; + bool isPipelined(OpClass capability) { + return pipelined[capability]; } /** Have all the FUs drained? */ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index fa621ffbf..7d359b992 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -825,7 +825,7 @@ InstructionQueue::scheduleReadyInsts() if (idx >= 0) fuPool->freeUnitNextCycle(idx); } else { - Cycles issue_latency = fuPool->getIssueLatency(op_class); + bool pipelined = fuPool->isPipelined(op_class); // Generate completion event for the FU ++wbOutstanding; FUCompletion *execution = new FUCompletion(issuing_inst, @@ -834,8 +834,7 @@ InstructionQueue::scheduleReadyInsts() cpu->schedule(execution, cpu->clockEdge(Cycles(op_latency - 1))); - // @todo: Enforce that issue_latency == 1 or op_latency - if (issue_latency > Cycles(1)) { + if (!pipelined) { // If FU isn't pipelined, then it must be freed // upon the execution completing. execution->setFreeFU();