diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py index 7d4987d7f..1bb2b4a5e 100644 --- a/configs/common/O3_ARM_v7a.py +++ b/configs/common/O3_ARM_v7a.py @@ -36,9 +36,9 @@ class O3_ARM_v7a_Simple_Int(FUDesc): # Complex ALU instructions have a variable latencies class O3_ARM_v7a_Complex_Int(FUDesc): - opList = [ OpDesc(opClass='IntMult', opLat=3, issueLat=1), - OpDesc(opClass='IntDiv', opLat=12, issueLat=12), - OpDesc(opClass='IprAccess', opLat=3, issueLat=1) ] + opList = [ OpDesc(opClass='IntMult', opLat=3, pipelined=True), + OpDesc(opClass='IntDiv', opLat=12, pipelined=False), + OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ] count = 1 @@ -67,8 +67,8 @@ class O3_ARM_v7a_FP(FUDesc): OpDesc(opClass='FloatAdd', opLat=5), OpDesc(opClass='FloatCmp', opLat=5), OpDesc(opClass='FloatCvt', opLat=5), - OpDesc(opClass='FloatDiv', opLat=9, issueLat=9), - OpDesc(opClass='FloatSqrt', opLat=33, issueLat=33), + OpDesc(opClass='FloatDiv', opLat=9, pipelined=False), + OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False), OpDesc(opClass='FloatMult', opLat=4) ] count = 2 diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py index 0bb23e876..d4493ecf2 100644 --- a/src/cpu/FuncUnit.py +++ b/src/cpu/FuncUnit.py @@ -54,9 +54,10 @@ class OpClass(Enum): class OpDesc(SimObject): type = 'OpDesc' cxx_header = "cpu/func_unit.hh" - issueLat = Param.Cycles(1, "cycles until another can be issued") opClass = Param.OpClass("type of operation") opLat = Param.Cycles(1, "cycles until result is available") + pipelined = Param.Bool(True, "set to true when the functional unit for" + "this op is fully pipelined. False means not pipelined at all.") class FUDesc(SimObject): type = 'FUDesc' diff --git a/src/cpu/func_unit.cc b/src/cpu/func_unit.cc index bb7427da5..6d009a440 100644 --- a/src/cpu/func_unit.cc +++ b/src/cpu/func_unit.cc @@ -52,7 +52,7 @@ FuncUnit::FuncUnit(const FuncUnit &fu) for (int i = 0; i < Num_OpClasses; ++i) { opLatencies[i] = fu.opLatencies[i]; - issueLatencies[i] = fu.issueLatencies[i]; + pipelined[i] = fu.pipelined[i]; } capabilityList = fu.capabilityList; @@ -60,15 +60,15 @@ FuncUnit::FuncUnit(const FuncUnit &fu) void -FuncUnit::addCapability(OpClass cap, unsigned oplat, unsigned issuelat) +FuncUnit::addCapability(OpClass cap, unsigned oplat, bool pipeline) { - if (issuelat == 0 || oplat == 0) + if (oplat == 0) panic("FuncUnit: you don't really want a zero-cycle latency do you?"); capabilityList.set(cap); opLatencies[cap] = oplat; - issueLatencies[cap] = issuelat; + pipelined[cap] = pipeline; } bool @@ -89,10 +89,10 @@ FuncUnit::opLatency(OpClass cap) return opLatencies[cap]; } -unsigned -FuncUnit::issueLatency(OpClass capability) +bool +FuncUnit::isPipelined(OpClass capability) { - return issueLatencies[capability]; + return pipelined[capability]; } //////////////////////////////////////////////////////////////////////////// diff --git a/src/cpu/func_unit.hh b/src/cpu/func_unit.hh index 51e2011f8..721a69df1 100644 --- a/src/cpu/func_unit.hh +++ b/src/cpu/func_unit.hh @@ -52,11 +52,11 @@ class OpDesc : public SimObject public: OpClass opClass; Cycles opLat; - Cycles issueLat; + bool pipelined; OpDesc(const OpDescParams *p) : SimObject(p), opClass(p->opClass), opLat(p->opLat), - issueLat(p->issueLat) {}; + pipelined(p->pipelined) {}; }; class FUDesc : public SimObject @@ -85,7 +85,7 @@ class FuncUnit { private: unsigned opLatencies[Num_OpClasses]; - unsigned issueLatencies[Num_OpClasses]; + bool pipelined[Num_OpClasses]; std::bitset capabilityList; public: @@ -94,13 +94,13 @@ class FuncUnit std::string name; - void addCapability(OpClass cap, unsigned oplat, unsigned issuelat); + void addCapability(OpClass cap, unsigned oplat, bool pipelined); bool provides(OpClass capability); std::bitset capabilities(); unsigned &opLatency(OpClass capability); - unsigned issueLatency(OpClass capability); + bool isPipelined(OpClass capability); }; #endif // __FU_POOL_HH__ diff --git a/src/cpu/o3/FuncUnitConfig.py b/src/cpu/o3/FuncUnitConfig.py index 0f5efb776..b8be400b5 100644 --- a/src/cpu/o3/FuncUnitConfig.py +++ b/src/cpu/o3/FuncUnitConfig.py @@ -49,7 +49,7 @@ class IntALU(FUDesc): class IntMultDiv(FUDesc): opList = [ OpDesc(opClass='IntMult', opLat=3), - OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ] + OpDesc(opClass='IntDiv', opLat=20, pipelined=False) ] # DIV and IDIV instructions in x86 are implemented using a loop which # issues division microops. The latency of these microops should really be @@ -57,7 +57,6 @@ class IntMultDiv(FUDesc): # of the quotient. if buildEnv['TARGET_ISA'] in ('x86'): opList[1].opLat=1 - opList[1].issueLat=1 count=2 @@ -69,8 +68,8 @@ class FP_ALU(FUDesc): class FP_MultDiv(FUDesc): opList = [ OpDesc(opClass='FloatMult', opLat=4), - OpDesc(opClass='FloatDiv', opLat=12, issueLat=12), - OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ] + OpDesc(opClass='FloatDiv', opLat=12, pipelined=False), + OpDesc(opClass='FloatSqrt', opLat=24, pipelined=False) ] count = 2 class SIMD_Unit(FUDesc): @@ -109,6 +108,6 @@ class RdWrPort(FUDesc): count = 4 class IprPort(FUDesc): - opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ] + opList = [ OpDesc(opClass='IprAccess', opLat = 3, pipelined = False) ] count = 1 diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc index 016b171bc..dab7dbed2 100644 --- a/src/cpu/o3/fu_pool.cc +++ b/src/cpu/o3/fu_pool.cc @@ -89,7 +89,7 @@ FUPool::FUPool(const Params *p) for (int i = 0; i < Num_OpClasses; ++i) { maxOpLatencies[i] = Cycles(0); - maxIssueLatencies[i] = Cycles(0); + pipelined[i] = true; } // @@ -123,13 +123,13 @@ FUPool::FUPool(const Params *p) fuPerCapList[(*j)->opClass].addFU(numFU + k); // indicate that this FU has the capability - fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat); + fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->pipelined); if ((*j)->opLat > maxOpLatencies[(*j)->opClass]) maxOpLatencies[(*j)->opClass] = (*j)->opLat; - if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass]) - maxIssueLatencies[(*j)->opClass] = (*j)->issueLat; + if (!(*j)->pipelined) + pipelined[(*j)->opClass] = false; } numFU++; diff --git a/src/cpu/o3/fu_pool.hh b/src/cpu/o3/fu_pool.hh index 2e1b71dad..8b501fc81 100644 --- a/src/cpu/o3/fu_pool.hh +++ b/src/cpu/o3/fu_pool.hh @@ -72,8 +72,8 @@ class FUPool : public SimObject private: /** Maximum op execution latencies, per op class. */ Cycles maxOpLatencies[Num_OpClasses]; - /** Maximum issue latencies, per op class. */ - Cycles maxIssueLatencies[Num_OpClasses]; + /** Whether op is pipelined or not. */ + bool pipelined[Num_OpClasses]; /** Bitvector listing capabilities of this FU pool. */ std::bitset capabilityList; @@ -160,8 +160,8 @@ class FUPool : public SimObject } /** Returns the issue latency of the given capability. */ - Cycles getIssueLatency(OpClass capability) { - return maxIssueLatencies[capability]; + bool isPipelined(OpClass capability) { + return pipelined[capability]; } /** Have all the FUs drained? */ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index fa621ffbf..7d359b992 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -825,7 +825,7 @@ InstructionQueue::scheduleReadyInsts() if (idx >= 0) fuPool->freeUnitNextCycle(idx); } else { - Cycles issue_latency = fuPool->getIssueLatency(op_class); + bool pipelined = fuPool->isPipelined(op_class); // Generate completion event for the FU ++wbOutstanding; FUCompletion *execution = new FUCompletion(issuing_inst, @@ -834,8 +834,7 @@ InstructionQueue::scheduleReadyInsts() cpu->schedule(execution, cpu->clockEdge(Cycles(op_latency - 1))); - // @todo: Enforce that issue_latency == 1 or op_latency - if (issue_latency > Cycles(1)) { + if (!pipelined) { // If FU isn't pipelined, then it must be freed // upon the execution completing. execution->setFreeFU();