From 0c6a679359fa84060b5bc745a737073890d2fb90 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 4 Feb 2011 00:08:18 -0500 Subject: [PATCH] inorder: stage width as a python parameter allow the user to specify how many instructions a pipeline stage can process on any given cycle (stageWidth...i.e.bandwidth) by setting the parameter through the python interface rather than compile the code after changing the *.cc file. (we always had the parameter there, but still used the static 'ThePipeline::StageWidth' instead) - Since StageWidth is now dynamically defined, change the interstage communication structure to use a vector and get rid of array and array handling index (toNextStageIndex) since we can just make calls to the list for the same information --- src/cpu/inorder/InOrderCPU.py | 2 +- src/cpu/inorder/comm.hh | 11 +++---- src/cpu/inorder/cpu.cc | 1 + src/cpu/inorder/cpu.hh | 3 ++ src/cpu/inorder/pipeline_stage.cc | 51 ++++++++---------------------- src/cpu/inorder/pipeline_traits.hh | 14 -------- src/cpu/inorder/resource_pool.cc | 20 ++++++------ 7 files changed, 35 insertions(+), 67 deletions(-) diff --git a/src/cpu/inorder/InOrderCPU.py b/src/cpu/inorder/InOrderCPU.py index 8e25891e7..5d24ae4fd 100644 --- a/src/cpu/inorder/InOrderCPU.py +++ b/src/cpu/inorder/InOrderCPU.py @@ -40,7 +40,7 @@ class InOrderCPU(BaseCPU): threadModel = Param.ThreadModel('SMT', "Multithreading model (SE-MODE only)") cachePorts = Param.Unsigned(2, "Cache Ports") - stageWidth = Param.Unsigned(1, "Stage width") + stageWidth = Param.Unsigned(4, "Stage width") fetchMemPort = Param.String("icache_port" , "Name of Memory Port to get instructions from") dataMemPort = Param.String("dcache_port" , "Name of Memory Port to get data from") diff --git a/src/cpu/inorder/comm.hh b/src/cpu/inorder/comm.hh index 386046d1c..ba9322079 100644 --- a/src/cpu/inorder/comm.hh +++ b/src/cpu/inorder/comm.hh @@ -44,8 +44,7 @@ /** Struct that defines the information passed from in between stages */ /** This information mainly goes forward through the pipeline. */ struct InterStageStruct { - int size; - ThePipeline::DynInstPtr insts[ThePipeline::StageWidth]; + std::vector insts; bool squash; bool branchMispredict; bool branchTaken; @@ -55,10 +54,10 @@ struct InterStageStruct { bool includeSquashInst; InterStageStruct() - :size(0), squash(false), - branchMispredict(false), branchTaken(false), - mispredPC(0), nextPC(0), - squashedSeqNum(0), includeSquashInst(false) + : squash(false), + branchMispredict(false), branchTaken(false), + mispredPC(0), nextPC(0), + squashedSeqNum(0), includeSquashInst(false) { } }; diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 5a742cb5d..ffdcae7df 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -174,6 +174,7 @@ InOrderCPU::InOrderCPU(Params *params) coreType("default"), _status(Idle), tickEvent(this), + stageWidth(params->stageWidth), timeBuffer(2 , 2), removeInstsThisCycle(false), activityRec(params->name, NumStages, 10, params->activity), diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index c3658373a..9ff0f12ce 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -268,6 +268,9 @@ class InOrderCPU : public BaseCPU /** The Pipeline Stages for the CPU */ PipelineStage *pipelineStage[ThePipeline::NumStages]; + /** Width (processing bandwidth) of each stage */ + int stageWidth; + /** Program Counters */ TheISA::PCState pc[ThePipeline::MaxThreads]; diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index b4b9e497f..e10ceb326 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -39,9 +39,9 @@ using namespace std; using namespace ThePipeline; PipelineStage::PipelineStage(Params *params, unsigned stage_num) - : stageNum(stage_num), stageWidth(ThePipeline::StageWidth), + : stageNum(stage_num), stageWidth(params->stageWidth), numThreads(ThePipeline::MaxThreads), _status(Inactive), - stageBufferMax(ThePipeline::interStageBuffSize[stage_num]), + stageBufferMax(params->stageWidth), prevStageValid(false), nextStageValid(false), idle(false) { switchedOutBuffer.resize(ThePipeline::MaxThreads); @@ -143,7 +143,6 @@ PipelineStage::setNextStageQueue(TimeBuffer *next_stage_ptr) // Setup wire to write information to proper place in stage queue. nextStage = nextStageQueue->getWire(0); - nextStage->size = 0; nextStageValid = true; } @@ -257,7 +256,7 @@ PipelineStage::removeStalls(ThreadID tid) inline bool PipelineStage::prevStageInstsValid() { - return prevStage->size > 0; + return prevStage->insts.size() > 0; } bool @@ -382,7 +381,8 @@ PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid) DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from " "incoming stage queue.\n", tid); - for (int i=0; i < prevStage->size; i++) { + int insts_from_prev_stage = prevStage->insts.size(); + for (int i=0; i < insts_from_prev_stage; i++) { if (prevStage->insts[i]->threadNumber == tid && prevStage->insts[i]->seqNum > squash_seq_num) { // Change Comment to Annulling previous instruction @@ -441,16 +441,8 @@ PipelineStage::stageBufferAvail() total += skidBuffer[i].size(); } - int incoming_insts = (prevStageValid) ? - cpu->pipelineStage[stageNum]->prevStage->size : - 0; - int avail = stageBufferMax - total; - - if (avail < 0) - fatal("stageNum %i:stageBufferAvail() < 0..." - "stBMax=%i,total=%i,incoming=%i=>%i", - stageNum, stageBufferMax, total, incoming_insts, avail); + assert(avail >= 0); return avail; } @@ -462,7 +454,7 @@ PipelineStage::canSendInstToStage(unsigned stage_num) if (cpu->pipelineStage[stage_num]->prevStageValid) { buffer_avail = cpu->pipelineStage[stage_num]->stageBufferAvail() - - cpu->pipelineStage[stage_num-1]->nextStage->size >= 1; + cpu->pipelineStage[stage_num-1]->nextStage->insts.size() >= 1; } if (!buffer_avail && nextStageQueueValid(stage_num)) { @@ -576,7 +568,9 @@ void PipelineStage::sortInsts() { if (prevStageValid) { - int insts_from_prev_stage = prevStage->size; + assert(prevStage->insts.size() <= stageWidth); + + int insts_from_prev_stage = prevStage->insts.size(); int insts_from_cur_stage = skidSize(); DPRINTF(InOrderStage, "%i insts available from stage buffer %i. Stage " "currently has %i insts from last cycle.\n", @@ -591,7 +585,6 @@ PipelineStage::sortInsts() "not inserting into stage buffer.\n", prevStage->insts[i]->readTid(), prevStage->insts[i]->seqNum); - prevStage->size--; continue; } @@ -619,12 +612,8 @@ PipelineStage::sortInsts() prevStage->insts[i] = cpu->dummyBufferInst; - prevStage->size--; - inserted_insts++; } - - assert(prevStage->size == 0); } } @@ -728,11 +717,6 @@ PipelineStage::tick() wroteToTimeBuffer = false; bool status_change = false; - - if (nextStageValid) - nextStage->size = 0; - - toNextStageIndex = 0; sortInsts(); @@ -807,7 +791,7 @@ PipelineStage::processStage(bool &status_change) if (nextStageValid) { DPRINTF(InOrderStage, "%i insts now available for stage %i.\n", - nextStage->size, stageNum + 1); + nextStage->insts.size(), stageNum + 1); } if (instsProcessed > 0) { @@ -1083,20 +1067,13 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst) DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: being placed into " "index %i of stage buffer %i queue.\n", - tid, inst->seqNum, toNextStageIndex, + tid, inst->seqNum, + cpu->pipelineStage[prev_stage]->nextStage->insts.size(), cpu->pipelineStage[prev_stage]->nextStageQueue->id()); - int next_stage_idx = - cpu->pipelineStage[prev_stage]->nextStage->size; - // Place instructions in inter-stage communication struct for next // pipeline stage to read next cycle - cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] - = inst; - - ++(cpu->pipelineStage[prev_stage]->nextStage->size); - - ++toNextStageIndex; + cpu->pipelineStage[prev_stage]->nextStage->insts.push_back(inst); success = true; diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh index 7abfc9a81..df964e254 100644 --- a/src/cpu/inorder/pipeline_traits.hh +++ b/src/cpu/inorder/pipeline_traits.hh @@ -52,7 +52,6 @@ namespace ThePipeline { // Pipeline Constants const unsigned NumStages = 5; const ThreadID MaxThreads = 8; - const unsigned StageWidth = 1; const unsigned BackEndStartStage = 2; // List of Resources The Pipeline Uses @@ -71,19 +70,6 @@ namespace ThePipeline { FetchBuff2 }; - // Expand this as necessary for your inter stage buffer sizes - static const unsigned interStageBuffSize[] = { - StageWidth, /* Stage 0 - 1 */ - StageWidth, /* Stage 1 - 2 */ - StageWidth, /* Stage 2 - 3 */ - StageWidth, /* Stage 3 - 4 */ - StageWidth, /* Stage 4 - 5 */ - StageWidth, /* Stage 5 - 6 */ - StageWidth, /* Stage 6 - 7 */ - StageWidth, /* Stage 7 - 8 */ - StageWidth /* Stage 8 - 9 */ - }; - typedef InOrderCPUParams Params; typedef RefCountingPtr DynInstPtr; diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index e8400405a..a0ec910f5 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -45,46 +45,48 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) //This will help in the auto-generation of this pipeline model. //ThePipeline::addResources(resources, memObjects); + int stage_width = cpu->stageWidth; + // Declare Resource Objects // name - id - bandwidth - latency - CPU - Parameters // -------------------------------------------------- resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, - StageWidth * 2, 0, _cpu, params)); + stage_width * 2, 0, _cpu, params)); memObjects.push_back(ICache); resources.push_back(new CacheUnit("icache_port", ICache, - StageWidth * MaxThreads, 0, _cpu, + stage_width * MaxThreads, 0, _cpu, params)); resources.push_back(new DecodeUnit("Decode-Unit", Decode, - StageWidth, 0, _cpu, params)); + stage_width, 0, _cpu, params)); resources.push_back(new BranchPredictor("Branch-Predictor", BPred, - StageWidth, 0, _cpu, params)); + stage_width, 0, _cpu, params)); resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 0, _cpu, params)); resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, - StageWidth * MaxThreads, 0, _cpu, + stage_width * MaxThreads, 0, _cpu, params)); resources.push_back(new AGENUnit("AGEN-Unit", AGEN, - StageWidth, 0, _cpu, params)); + stage_width, 0, _cpu, params)); resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, - StageWidth, 0, _cpu, params)); + stage_width, 0, _cpu, params)); resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params)); memObjects.push_back(DCache); resources.push_back(new CacheUnit("dcache_port", DCache, - StageWidth * MaxThreads, 0, _cpu, + stage_width * MaxThreads, 0, _cpu, params)); resources.push_back(new GraduationUnit("Graduation-Unit", Grad, - StageWidth * MaxThreads, 0, _cpu, + stage_width * MaxThreads, 0, _cpu, params)); resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4,