From eac5eac67ae8076e934d78063a24eeef08f25413 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:26:13 -0500 Subject: [PATCH] inorder: squash on memory stall add code to recognize memory stalls in resources and the pipeline as well as squash a thread if there is a stall and we are in the switch on cache miss model --- src/cpu/inorder/cpu.cc | 29 ++++ src/cpu/inorder/cpu.hh | 8 +- src/cpu/inorder/first_stage.cc | 44 +++--- src/cpu/inorder/first_stage.hh | 2 + src/cpu/inorder/pipeline_stage.cc | 35 +++-- src/cpu/inorder/pipeline_stage.hh | 2 +- src/cpu/inorder/resource.cc | 8 +- src/cpu/inorder/resource.hh | 12 +- src/cpu/inorder/resource_pool.cc | 185 ++++++++++++++++-------- src/cpu/inorder/resource_pool.hh | 8 +- src/cpu/inorder/resources/cache_unit.cc | 48 ++++-- src/cpu/inorder/resources/cache_unit.hh | 3 + 12 files changed, 278 insertions(+), 106 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 69aea0c57..035aa0571 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -140,6 +140,10 @@ InOrderCPU::CPUEvent::process() cpu->disableThreads(tid, vpe); break; + case SquashFromMemStall: + cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid); + break; + case Trap: cpu->trapCPU(fault, tid); break; @@ -579,6 +583,31 @@ InOrderCPU::trapCPU(Fault fault, ThreadID tid) fault->invoke(tcBase(tid)); } +void +InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay) +{ + scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay); +} + + +void +InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid) +{ + DPRINTF(InOrderCPU, "Squashing Pipeline Stages Due to Memory Stall...\n"); + + // Squash all instructions in each stage including + // instruction that caused the squash (seq_num - 1) + // NOTE: The stage bandwidth needs to be cleared so thats why + // the stalling instruction is squashed as well. The stalled + // instruction is previously placed in another intermediate buffer + // while it's stall is being handled. + InstSeqNum squash_seq_num = seq_num - 1; + + for (int stNum=stage_num; stNum >= 0 ; stNum--) { + pipelineStage[stNum]->squashDueToMemStall(squash_seq_num, tid); + } +} + void InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, ThreadID tid, DynInstPtr inst, diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 4c7b2710d..5d34de67a 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -183,7 +183,7 @@ class InOrderCPU : public BaseCPU EnableVPEs, Trap, InstGraduated, - SquashAll, + SquashFromMemStall, UpdatePCs, NumCPUEvents }; @@ -344,6 +344,12 @@ class InOrderCPU : public BaseCPU void trap(Fault fault, ThreadID tid, int delay = 0); void trapCPU(Fault fault, ThreadID tid); + /** squashFromMemStall() - sets up a squash event + * squashDueToMemStall() - squashes pipeline + */ + void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0); + void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid); + /** Setup CPU to insert a thread's context */ void insertThread(ThreadID tid); diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc index 8bd703c56..1427ca46a 100644 --- a/src/cpu/inorder/first_stage.cc +++ b/src/cpu/inorder/first_stage.cc @@ -67,11 +67,12 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) // Clear the instruction list and skid buffer in case they have any // insts in them. - DPRINTF(InOrderStage, "Removing instructions from stage instruction list.\n"); + DPRINTF(InOrderStage, "Removing instructions from stage instruction " + "list.\n"); while (!insts[tid].empty()) { if (insts[tid].front()->seqNum <= squash_seq_num) { - DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because it's <= " - "squashing seqNum %i.\n", + DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because " + "it's <= squashing seqNum %i.\n", tid, insts[tid].front()->seqNum, squash_seq_num); @@ -82,8 +83,9 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) insts[tid].size()); break; } - DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n", - tid, insts[tid].front()->seqNum, insts[tid].front()->PC); + DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] " + "PC %08p.\n", tid, insts[tid].front()->seqNum, + insts[tid].front()->PC); insts[tid].pop(); } @@ -93,6 +95,18 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) cpu->removeInstsUntil(squash_seq_num, tid); } +void +FirstStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid) +{ + // Need to preserve the stalling instruction in first-stage + // since the squash() from first stage also removes + // the instruction from the CPU (removeInstsUntil). If that + // functionality gets changed then you can move this offset. + // (stalling instruction = seq_num + 1) + squash(seq_num+1, tid); +} + + void FirstStage::processStage(bool &status_change) { @@ -106,6 +120,7 @@ FirstStage::processStage(bool &status_change) for (int threadFetched = 0; threadFetched < numFetchingThreads; threadFetched++) { + ThreadID tid = getFetchingThread(fetchPolicy); if (tid >= 0) { @@ -117,14 +132,17 @@ FirstStage::processStage(bool &status_change) } } -//@TODO: Note in documentation, that when you make a pipeline stage change, then -//make sure you change the first stage too +//@TODO: Note in documentation, that when you make a pipeline stage change, +//then make sure you change the first stage too void FirstStage::processInsts(ThreadID tid) { bool all_reqs_completed = true; - for (int insts_fetched = 0; insts_fetched < stageWidth && canSendInstToStage(1); insts_fetched++) { + for (int insts_fetched = 0; + insts_fetched < stageWidth && canSendInstToStage(1); + insts_fetched++) { + DynInstPtr inst; bool new_inst = false; @@ -150,19 +168,9 @@ FirstStage::processInsts(ThreadID tid) inst->traceData = NULL; #endif // TRACING_ON - DPRINTF(RefCount, "creation: [tid:%i]: [sn:%i]: Refcount = %i.\n", - inst->readTid(), - inst->seqNum, - 0/*inst->curCount()*/); - // Add instruction to the CPU's list of instructions. inst->setInstListIt(cpu->addInst(inst)); - DPRINTF(RefCount, "after add to CPU List: [tid:%i]: [sn:%i]: Refcount = %i.\n", - inst->readTid(), - inst->seqNum, - 0/*inst->curCount()*/); - // Create Front-End Resource Schedule For Instruction ThePipeline::createFrontEndSchedule(inst); } diff --git a/src/cpu/inorder/first_stage.hh b/src/cpu/inorder/first_stage.hh index 2a69678e4..383b799f3 100644 --- a/src/cpu/inorder/first_stage.hh +++ b/src/cpu/inorder/first_stage.hh @@ -61,6 +61,8 @@ class FirstStage : public PipelineStage { /** Squash Instructions Above a Seq. Num */ void squash(InstSeqNum squash_seq_num, ThreadID tid); + void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid); + /** There are no insts. coming from previous stages, so there is * no need to sort insts here */ diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 8d14aae27..1fd7150da 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -339,9 +339,9 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) { if (cpu->squashSeqNum[tid] < inst->seqNum && cpu->lastSquashCycle[tid] == curTick){ - DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another " - "stage's squash signal for after [sn:%i].\n", inst->seqNum, - cpu->squashSeqNum[tid]); + DPRINTF(Resource, "Ignoring [sn:%i] branch squash signal due to " + "another stage's squash signal for after [sn:%i].\n", + inst->seqNum, cpu->squashSeqNum[tid]); } else { // Send back mispredict information. toPrevStages->stageInfo[stageNum][tid].branchMispredict = true; @@ -381,6 +381,12 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) } } +void +PipelineStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid) +{ + squash(seq_num, tid); +} + void PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid) { @@ -413,8 +419,9 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid) while (!skidBuffer[tid].empty()) { if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { DPRINTF(InOrderStage, "[tid:%i]: Cannot remove skidBuffer " - "instructions before delay slot [sn:%i]. %i insts" - "left.\n", tid, squash_seq_num, + "instructions (starting w/[sn:%i]) before delay slot " + "[sn:%i]. %i insts left.\n", tid, + skidBuffer[tid].front()->seqNum, squash_seq_num, skidBuffer[tid].size()); break; } @@ -775,7 +782,7 @@ void PipelineStage::processThread(bool &status_change, ThreadID tid) { // If status is Running or idle, - // call stageInsts() + // call processInsts() // If status is Unblocking, // buffer any instructions coming from fetch // continue trying to empty skid buffer @@ -787,7 +794,7 @@ PipelineStage::processThread(bool &status_change, ThreadID tid) ;//++stageSquashCycles; } - // Stage should try to stage as many instructions as its bandwidth + // Stage should try to process as many instructions as its bandwidth // will allow, as long as it is not currently blocked. if (stageStatus[tid] == Running || stageStatus[tid] == Idle) { @@ -904,9 +911,7 @@ bool PipelineStage::processInstSchedule(DynInstPtr inst) { bool last_req_completed = true; -#if TRACING_ON ThreadID tid = inst->readTid(); -#endif if (inst->nextResStage() == stageNum) { int res_stage_num = inst->nextResStage(); @@ -937,6 +942,18 @@ PipelineStage::processInstSchedule(DynInstPtr inst) last_req_completed = false; + if (req->isMemStall() && + cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { + // Save Stalling Instruction + switchedOutBuffer[tid] = inst; + switchedOutValid[tid] = true; + + // Remove Thread From Pipeline & Resource Pool + inst->squashingStage = stageNum; + inst->bdelaySeqNum = inst->seqNum; + cpu->squashFromMemStall(inst, tid); + } + break; } diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index 42a632560..f10906e4c 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -240,7 +240,7 @@ class PipelineStage */ virtual void squashDueToBranch(DynInstPtr &inst, ThreadID tid); - virtual void squashDueToMemStall(DynInstPtr &inst, ThreadID tid); + virtual void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid); /** Squash instructions from stage buffer */ virtual void squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid); diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 286332e08..47a9a4b9a 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -340,6 +340,12 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, } } +void +Resource::squashDueToMemStall(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, + ThreadID tid) +{ + squash(inst, stage_num, squash_seq_num, tid); +} Tick Resource::ticks(int num_cycles) @@ -407,7 +413,7 @@ ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, unsigned _cmd) : res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num), resIdx(res_idx), slotNum(slot_num), completed(false), - squashed(false), processing(false), waiting(false) + squashed(false), processing(false), memStall(false) { #ifdef DEBUG reqID = resReqID++; diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index 2cf8e61eb..f7c4b8fcd 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -156,6 +156,9 @@ class Resource { virtual void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + virtual void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid); + /** The number of instructions available that this resource can * can still process */ @@ -376,8 +379,8 @@ class ResourceRequest void setProcessing() { processing = true; } /** Get/Set IsWaiting variables */ - bool isWaiting() { return waiting; } - void setWaiting() { waiting = true; } + bool isMemStall() { return memStall; } + void setMemStall(bool stall = true) { memStall = stall; } protected: /** Resource Identification */ @@ -386,11 +389,12 @@ class ResourceRequest int resIdx; int slotNum; - /** Resource Status */ + /** Resource Request Status */ bool completed; bool squashed; bool processing; - bool waiting; + + bool memStall; }; #endif //__CPU_INORDER_RESOURCE_HH__ diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 0d78c232b..8822715c7 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -41,45 +41,62 @@ using namespace ThePipeline; ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) : cpu(_cpu) { - //@todo: use this function to instantiate the resources in resource pool. This will help in the - //auto-generation of this pipeline model. + //@todo: use this function to instantiate the resources in resource pool. + //This will help in the auto-generation of this pipeline model. //ThePipeline::addResources(resources, memObjects); // Declare Resource Objects // name - id - bandwidth - latency - CPU - Parameters // -------------------------------------------------- - resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, StageWidth * 2, 0, _cpu, params)); + resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, + StageWidth * 2, 0, _cpu, params)); memObjects.push_back(ICache); - resources.push_back(new CacheUnit("icache_port", ICache, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new CacheUnit("icache_port", ICache, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new DecodeUnit("Decode-Unit", Decode, StageWidth, 0, _cpu, params)); + resources.push_back(new DecodeUnit("Decode-Unit", Decode, + StageWidth, 0, _cpu, params)); - resources.push_back(new BranchPredictor("Branch-Predictor", BPred, StageWidth, 0, _cpu, params)); + resources.push_back(new BranchPredictor("Branch-Predictor", BPred, + StageWidth, 0, _cpu, params)); - resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 0, _cpu, params)); + resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, + 0, _cpu, params)); - resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new AGENUnit("AGEN-Unit", AGEN, StageWidth, 0, _cpu, params)); + resources.push_back(new AGENUnit("AGEN-Unit", AGEN, + StageWidth, 0, _cpu, params)); - resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, StageWidth, 0, _cpu, params)); + resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, + StageWidth, 0, _cpu, params)); - resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params)); + resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, + params)); memObjects.push_back(DCache); - resources.push_back(new CacheUnit("dcache_port", DCache, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new CacheUnit("dcache_port", DCache, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new GraduationUnit("Graduation-Unit", Grad, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new GraduationUnit("Graduation-Unit", Grad, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 0, _cpu, params)); + resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, + 0, _cpu, params)); } void ResourcePool::init() { for (int i=0; i < resources.size(); i++) { - DPRINTF(Resource, "Initializing resource: %s.\n", resources[i]->name()); + DPRINTF(Resource, "Initializing resource: %s.\n", + resources[i]->name()); resources[i]->init(); } @@ -113,8 +130,8 @@ ResourcePool::getPort(const std::string &if_name, int idx) int obj_idx = memObjects[i]; Port *port = resources[obj_idx]->getPort(if_name, idx); if (port != NULL) { - DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", if_name, - resources[obj_idx]->name(), obj_idx); + DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", + if_name, resources[obj_idx]->name(), obj_idx); return port; } } @@ -131,7 +148,8 @@ ResourcePool::getPortIdx(const std::string &port_name) unsigned obj_idx = memObjects[i]; Port *port = resources[obj_idx]->getPort(port_name, obj_idx); if (port != NULL) { - DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, port_name); + DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, + port_name); return obj_idx; } } @@ -167,7 +185,8 @@ void ResourcePool::squash(DynInstPtr inst, int res_idx, InstSeqNum done_seq_num, ThreadID tid) { - resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, tid); + resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, + tid); } int @@ -192,15 +211,17 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, { case InOrderCPU::ActivateThread: { - DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event for tick %i.\n", - curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this, - e_type, - inst, - inst->squashingStage, - inst->bdelaySeqNum, - inst->readTid()); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event " + "for tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this, + e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; @@ -208,49 +229,72 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, case InOrderCPU::DeallocateThread: { - DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool Event for tick %i.\n", - curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this, - e_type, - inst, - inst->squashingStage, - inst->bdelaySeqNum, - tid); + DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool " + "Event for tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this, + e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + tid); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; case ResourcePool::InstGraduated: { - DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool Event for tick %i.\n", - curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type, - inst, - inst->squashingStage, - inst->seqNum, - inst->readTid()); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool " + "Event for tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->seqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; case ResourcePool::SquashAll: { - DPRINTF(Resource, "Scheduling Squash Resource Pool Event for tick %i.\n", + DPRINTF(Resource, "Scheduling Squash Resource Pool Event for " + "tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); + } + break; + + case InOrderCPU::SquashFromMemStall: + { + DPRINTF(Resource, "Scheduling Squash Due to Memory Stall Resource " + "Pool Event for tick %i.\n", curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type, - inst, - inst->squashingStage, - inst->bdelaySeqNum, - inst->readTid()); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->seqNum - 1, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; default: - DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]); + DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", + InOrderCPU::eventNames[e_type]); ; // If Resource Pool doesnt recognize event, we ignore it. } } @@ -265,8 +309,8 @@ void ResourcePool::squashAll(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above [sn:%i].\n", - stage_num, tid, done_seq_num); + DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above " + "[sn:%i].\n", stage_num, tid, done_seq_num); int num_resources = resources.size(); @@ -275,11 +319,26 @@ ResourcePool::squashAll(DynInstPtr inst, int stage_num, } } +void +ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum done_seq_num, ThreadID tid) +{ + DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above " + "[sn:%i].\n", stage_num, tid, done_seq_num); + + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + resources[idx]->squashDueToMemStall(inst, stage_num, done_seq_num, + tid); + } +} + void ResourcePool::activateAll(ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all resources.\n", - tid); + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all " + "resources.\n", tid); int num_resources = resources.size(); @@ -291,8 +350,8 @@ ResourcePool::activateAll(ThreadID tid) void ResourcePool::deactivateAll(ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all resources.\n", - tid); + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all " + "resources.\n", tid); int num_resources = resources.size(); @@ -304,8 +363,8 @@ ResourcePool::deactivateAll(ThreadID tid) void ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all resources.\n", - tid, seq_num); + DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all " + "resources.\n", tid, seq_num); int num_resources = resources.size(); @@ -353,6 +412,10 @@ ResourcePool::ResPoolEvent::process() resPool->squashAll(inst, stageNum, seqNum, tid); break; + case InOrderCPU::SquashFromMemStall: + resPool->squashDueToMemStall(inst, stageNum, seqNum, tid); + break; + default: fatal("Unrecognized Event Type"); } diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index 016fae2bf..61e691f35 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -123,7 +123,7 @@ class ResourcePool { }; public: - ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params); + ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params); virtual ~ResourcePool() {} std::string name(); @@ -160,6 +160,12 @@ class ResourcePool { void squashAll(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid); + /** Squash Resources in Pool after a memory stall + * NOTE: Only use during Switch-On-Miss Thread model + */ + void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum done_seq_num, ThreadID tid); + /** Activate Thread in all resources */ void activateAll(ThreadID tid); diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index eb66e10f8..570d27fbe 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -241,8 +241,8 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request) // If different, then update command in the request cache_req->cmd = inst->resSched.top()->cmd; DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Updating the command for this instruction\n", - inst->readTid(), inst->seqNum); + "[tid:%i]: [sn:%i]: Updating the command for this " + "instruction\n ", inst->readTid(), inst->seqNum); service_request = true; } else { @@ -416,6 +416,7 @@ CacheUnit::execute(int slot_num) tid, seq_num, inst->staticInst->disassemble(inst->PC)); delete cache_req->dataPkt; + //cache_req->setMemStall(false); cache_req->done(); } else { DPRINTF(InOrderCachePort, @@ -425,6 +426,7 @@ CacheUnit::execute(int slot_num) "STALL: [tid:%i]: Fetch miss from %08p\n", tid, cache_req->inst->readPC()); cache_req->setCompleted(false); + //cache_req->setMemStall(true); } break; @@ -437,11 +439,13 @@ CacheUnit::execute(int slot_num) if (cache_req->isMemAccComplete() || inst->isDataPrefetch() || inst->isInstPrefetch()) { + cache_req->setMemStall(false); cache_req->done(); } else { DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", tid, cache_req->inst->getMemAddr()); cache_req->setCompleted(false); + cache_req->setMemStall(true); } break; @@ -510,7 +514,8 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) if (cache_req->pktCmd == MemCmd::WriteReq) { cache_req->pktCmd = cache_req->memReq->isSwap() ? MemCmd::SwapReq : - (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); + (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq + : MemCmd::WriteReq); } cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd, @@ -641,8 +646,9 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) ExtMachInst ext_inst; StaticInstPtr staticInst = NULL; Addr inst_pc = inst->readPC(); - MachInst mach_inst = TheISA::gtoh(*reinterpret_cast - (cache_pkt->getPtr())); + MachInst mach_inst = + TheISA::gtoh(*reinterpret_cast + (cache_pkt->getPtr())); predecoder.setTC(cpu->thread[tid]->getTC()); predecoder.moreBytes(inst_pc, inst_pc, mach_inst); @@ -755,7 +761,8 @@ CacheUnitEvent::process() tlb_res->tlbBlocked[tid] = false; - tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid); + tlb_res->cpu->pipelineStage[stage_num]-> + unsetResStall(tlb_res->reqMap[slotIdx], tid); req_ptr->tlbStall = false; @@ -764,6 +771,23 @@ CacheUnitEvent::process() } } +void +CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid) +{ + // If squashing due to memory stall, then we do NOT want to + // squash the instruction that caused the stall so we + // increment the sequence number here to prevent that. + // + // NOTE: This is only for the SwitchOnCacheMiss Model + // NOTE: If you have multiple outstanding misses from the same + // thread then you need to reevaluate this code + // NOTE: squash should originate from + // pipeline_stage.cc:processInstSchedule + squash(inst, stage_num, squash_seq_num + 1, tid); +} + + void CacheUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) @@ -798,7 +822,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, int stall_stage = reqMap[req_slot_num]->getStageNum(); - cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid); + cpu->pipelineStage[stall_stage]-> + unsetResStall(reqMap[req_slot_num], tid); } if (!cache_req->tlbStall && !cache_req->isMemAccPending()) { @@ -927,14 +952,16 @@ CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr, template<> Fault -CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint64_t*)&data, addr, flags, res); } template<> Fault -CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint32_t*)&data, addr, flags, res); } @@ -942,7 +969,8 @@ CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_ template<> Fault -CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, (uint32_t)data, addr, flags, res); } diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index c467e9771..a6b07ebd9 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -146,6 +146,9 @@ class CacheUnit : public Resource void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid); + /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt);