From 7b3b362ba5d2690324abd58c883fd1d5fe4dc767 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 17:18:15 -0500 Subject: [PATCH 01/36] inorder: init internal debug cpu counters - cpuEventNum - resReqCount --- src/cpu/inorder/cpu.cc | 60 ++++++++++++++++++++++++++----------- src/cpu/inorder/cpu.hh | 32 +++++++++++++++----- src/cpu/inorder/resource.cc | 54 ++++++++++++++++++++++++++++----- src/cpu/inorder/resource.hh | 40 ++++++++----------------- 4 files changed, 128 insertions(+), 58 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 1e3fdc40e..38f6b4eed 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -115,7 +115,8 @@ InOrderCPU::CPUEvent::process() cpu->activateThread(tid); break; - //@TODO: Consider Implementing "Suspend Thread" as Separate from Deallocate + //@TODO: Consider Implementing "Suspend Thread" as Separate from + //Deallocate case SuspendThread: // Suspend & Deallocate are same for now. //cpu->suspendThread(tid); //break; @@ -145,11 +146,14 @@ InOrderCPU::CPUEvent::process() default: fatal("Unrecognized Event Type %d", cpuEventType); + } - + cpu->cpuEventRemoveList.push(this); } + + const char * InOrderCPU::CPUEvent::description() { @@ -185,6 +189,10 @@ InOrderCPU::InOrderCPU(Params *params) system(params->system), physmem(system->physmem), #endif // FULL_SYSTEM +#ifdef DEBUG + cpuEventNum(0), + resReqCount(0), +#endif // DEBUG switchCount(0), deferRegistration(false/*params->deferRegistration*/), stageTracing(params->stageTracing), @@ -301,7 +309,7 @@ InOrderCPU::InOrderCPU(Params *params) // Define dummy instructions and resource requests to be used. DynInstPtr dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0); - dummyReq = new ResourceRequest(NULL, NULL, 0, 0, 0, 0); + dummyReq = new ResourceRequest(resPool->getResource(0), NULL, 0, 0, 0, 0); // Reset CPU to reset state. #if FULL_SYSTEM @@ -322,6 +330,13 @@ InOrderCPU::regStats() /* Register the Resource Pool's stats here.*/ resPool->regStats(); +#ifdef DEBUG + maxResReqCount + .name(name() + ".maxResReqCount") + .desc("Maximum number of live resource requests in CPU") + .prereq(maxResReqCount); +#endif + /* Register any of the InOrderCPU's stats here.*/ timesIdled .name(name() + ".timesIdled") @@ -342,7 +357,7 @@ InOrderCPU::regStats() smtCycles .name(name() + ".smtCycles") - .desc("Total number of cycles that the CPU was simultaneous multithreading.(SMT)"); + .desc("Total number of cycles that the CPU was in SMT-mode"); committedInsts .init(numThreads) @@ -435,7 +450,8 @@ InOrderCPU::tick() //Tick next_tick = curTick + cycles(1); //tickEvent.schedule(next_tick); mainEventQueue.schedule(&tickEvent, nextCycle(curTick + 1)); - DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", nextCycle(curTick + 1)); + DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", + nextCycle(curTick + 1)); } } @@ -640,8 +656,8 @@ void InOrderCPU::addToCurrentThreads(ThreadID tid) { if (!isThreadInCPU(tid)) { - DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU.\n", - tid); + DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU." + "\n", tid); currentThreads.push_back(tid); } } @@ -1002,9 +1018,11 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid) tid = TheISA::getTargetThread(tcBase(tid)); } - if (reg_idx < FP_Base_DepTag) { // Integer Register File + if (reg_idx < FP_Base_DepTag) { + // Integer Register File return readIntReg(reg_idx, tid); - } else if (reg_idx < Ctrl_Base_DepTag) { // Float Register File + } else if (reg_idx < Ctrl_Base_DepTag) { + // Float Register File reg_idx -= FP_Base_DepTag; return readFloatRegBits(reg_idx, tid); } else { @@ -1070,9 +1088,12 @@ InOrderCPU::addInst(DynInstPtr &inst) void InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) { - // Set the CPU's PCs - This contributes to the precise state of the CPU which can be used - // when restoring a thread to the CPU after a fork or after an exception - // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if it's a branch or not + // Set the CPU's PCs - This contributes to the precise state of the CPU + // which can be used when restoring a thread to the CPU after a fork or + // after an exception + // ================= + // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if + // it's a branch or not setPC(inst->readPC(), tid); setNextPC(inst->readNextPC(), tid); setNextNPC(inst->readNextNPC(), tid); @@ -1112,7 +1133,8 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) // Broadcast to other resources an instruction // has been completed - resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, tid); + resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, + tid); // Finally, remove instruction from CPU removeInst(inst); @@ -1380,7 +1402,8 @@ InOrderCPU::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) { //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case // you want to run w/out caches? - CacheUnit *cache_res = dynamic_cast(resPool->getResource(dataPortIdx)); + CacheUnit *cache_res = + dynamic_cast(resPool->getResource(dataPortIdx)); return cache_res->read(inst, addr, data, flags); } @@ -1483,14 +1506,16 @@ InOrderCPU::write(DynInstPtr inst, uint8_t data, Addr addr, template<> Fault -InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res) +InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint64_t*)&data, addr, flags, res); } template<> Fault -InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res) +InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint32_t*)&data, addr, flags, res); } @@ -1498,7 +1523,8 @@ InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64 template<> Fault -InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res) +InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, (uint32_t)data, addr, flags, res); } diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 3320532ba..463ca5445 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -144,9 +144,11 @@ class InOrderCPU : public BaseCPU void scheduleTickEvent(int delay) { if (tickEvent.squashed()) - mainEventQueue.reschedule(&tickEvent, nextCycle(curTick + ticks(delay))); + mainEventQueue.reschedule(&tickEvent, + nextCycle(curTick + ticks(delay))); else if (!tickEvent.scheduled()) - mainEventQueue.schedule(&tickEvent, nextCycle(curTick + ticks(delay))); + mainEventQueue.schedule(&tickEvent, + nextCycle(curTick + ticks(delay))); } /** Unschedule tick event, regardless of its current state. */ @@ -228,7 +230,8 @@ class InOrderCPU : public BaseCPU /** Interface between the CPU and CPU resources. */ ResourcePool *resPool; - /** Instruction used to signify that there is no *real* instruction in buffer slot */ + /** Instruction used to signify that there is no *real* instruction in + buffer slot */ DynInstPtr dummyBufferInst; /** Used by resources to signify a denied access to a resource. */ @@ -420,7 +423,11 @@ class InOrderCPU : public BaseCPU /** Get & Update Next Event Number */ InstSeqNum getNextEventNum() { +#ifdef DEBUG return cpuEventNum++; +#else + return 0; +#endif } /** Register file accessors */ @@ -550,8 +557,8 @@ class InOrderCPU : public BaseCPU */ std::queue removeList; - /** List of all the resource requests that will be removed at the end of this - * cycle. + /** List of all the resource requests that will be removed at the end + * of this cycle. */ std::queue reqRemoveList; @@ -632,8 +639,12 @@ class InOrderCPU : public BaseCPU // LL/SC debug functionality unsigned stCondFails; - unsigned readStCondFailures() { return stCondFails; } - unsigned setStCondFailures(unsigned st_fails) { return stCondFails = st_fails; } + + unsigned readStCondFailures() + { return stCondFails; } + + unsigned setStCondFailures(unsigned st_fails) + { return stCondFails = st_fails; } /** Returns a pointer to a thread context. */ ThreadContext *tcBase(ThreadID tid = 0) @@ -663,9 +674,16 @@ class InOrderCPU : public BaseCPU /** The global sequence number counter. */ InstSeqNum globalSeqNum[ThePipeline::MaxThreads]; +#ifdef DEBUG /** The global event number counter. */ InstSeqNum cpuEventNum; + /** Number of resource requests active in CPU **/ + unsigned resReqCount; + + Stats::Scalar maxResReqCount; +#endif + /** Counter of how many stages have completed switching out. */ int switchCount; diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index cb5681bc1..286332e08 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -80,7 +80,8 @@ Resource::regStats() { instReqsProcessed .name(name() + ".instReqsProcessed") - .desc("Number of Instructions Requests that completed in this resource."); + .desc("Number of Instructions Requests that completed in " + "this resource."); } int @@ -98,7 +99,8 @@ Resource::slotsInUse() void Resource::freeSlot(int slot_idx) { - DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource [slot:%i].\n", + DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource " + "[slot:%i].\n", reqMap[slot_idx]->inst->readTid(), reqMap[slot_idx]->inst->seqNum, slot_idx); @@ -159,7 +161,8 @@ Resource::getSlot(DynInstPtr inst) while (map_it != map_end) { if ((*map_it).second) { - DPRINTF(Resource, "Currently Serving request from: [tid:%i] [sn:%i].\n", + DPRINTF(Resource, "Currently Serving request from: " + "[tid:%i] [sn:%i].\n", (*map_it).second->getInst()->readTid(), (*map_it).second->getInst()->seqNum); } @@ -202,10 +205,12 @@ Resource::request(DynInstPtr inst) inst_req = getRequest(inst, stage_num, id, slot_num, cmd); if (inst->staticInst) { - DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource.\n", + DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this " + "resource.\n", inst->readTid(), inst->seqNum); } else { - DPRINTF(Resource, "[tid:%i]: instruction requesting this resource.\n", + DPRINTF(Resource, "[tid:%i]: instruction requesting this " + "resource.\n", inst->readTid()); } @@ -232,7 +237,8 @@ Resource::requestAgain(DynInstPtr inst, bool &do_request) do_request = true; if (inst->staticInst) { - DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource again.\n", + DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource " + "again.\n", inst->readTid(), inst->seqNum); } else { DPRINTF(Resource, "[tid:%i]: requesting this resource again.\n", @@ -394,7 +400,41 @@ Resource::unscheduleEvent(DynInstPtr inst) int ResourceRequest::resReqID = 0; -int ResourceRequest::resReqCount = 0; +int ResourceRequest::maxReqCount = 0; + +ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, + int stage_num, int res_idx, int slot_num, + unsigned _cmd) + : res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num), + resIdx(res_idx), slotNum(slot_num), completed(false), + squashed(false), processing(false), waiting(false) +{ +#ifdef DEBUG + reqID = resReqID++; + res->cpu->resReqCount++; + DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, + res->cpu->resReqCount); + + if (res->cpu->resReqCount > 100) { + fatal("Too many undeleted resource requests. Memory leak?\n"); + } + + if (res->cpu->resReqCount > maxReqCount) { + maxReqCount = res->cpu->resReqCount; + res->cpu->maxResReqCount = maxReqCount; + } + +#endif +} + +ResourceRequest::~ResourceRequest() +{ +#ifdef DEBUG + res->cpu->resReqCount--; + DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, + res->cpu->resReqCount); +#endif +} void ResourceRequest::done(bool completed) diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index 605b7f690..2cf8e61eb 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -70,7 +70,8 @@ class Resource { /** Define this function if resource, has a port to connect to an outside * simulation object. */ - virtual Port* getPort(const std::string &if_name, int idx) { return NULL; } + virtual Port* getPort(const std::string &if_name, int idx) + { return NULL; } /** Return ID for this resource */ int getId() { return id; } @@ -114,9 +115,9 @@ class Resource { /** Free a resource slot */ virtual void freeSlot(int slot_idx); - /** Request usage of a resource for this instruction. If this instruction already - * has made this request to this resource, and that request is uncompleted - * this function will just return that request + /** Request usage of a resource for this instruction. If this instruction + * already has made this request to this resource, and that request is + * uncompleted this function will just return that request */ virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, @@ -166,7 +167,8 @@ class Resource { /** Schedule resource event, regardless of its current state. */ void scheduleEvent(int slot_idx, int delay); - /** Find instruction in list, Schedule resource event, regardless of its current state. */ + /** Find instruction in list, Schedule resource event, regardless of its + * current state. */ bool scheduleEvent(DynInstPtr inst, int delay); /** Unschedule resource event, regardless of its current state. */ @@ -303,30 +305,14 @@ class ResourceRequest static int resReqID; - static int resReqCount; - + static int maxReqCount; + public: ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num, - int res_idx, int slot_num, unsigned _cmd) - : res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num), - resIdx(res_idx), slotNum(slot_num), completed(false), - squashed(false), processing(false), waiting(false) - { - reqID = resReqID++; - resReqCount++; - DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, resReqCount); - - if (resReqCount > 100) { - fatal("Too many undeleted resource requests. Memory leak?\n"); - } - } - - virtual ~ResourceRequest() - { - resReqCount--; - DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, resReqCount); - } - + int res_idx, int slot_num, unsigned _cmd); + + virtual ~ResourceRequest(); + int reqID; /** Acknowledge that this is a request is done and remove From 0e96798fe0a56936f8590dbd301f2b07a1850e22 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:25:13 -0500 Subject: [PATCH 02/36] configs/inorder: add options for switch-on-miss to inorder cpu --- src/cpu/inorder/InOrderCPU.py | 5 +++++ src/cpu/inorder/cpu.cc | 11 ++++++++++- src/cpu/inorder/cpu.hh | 10 +++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/cpu/inorder/InOrderCPU.py b/src/cpu/inorder/InOrderCPU.py index a0b0466a7..d6db346d4 100644 --- a/src/cpu/inorder/InOrderCPU.py +++ b/src/cpu/inorder/InOrderCPU.py @@ -30,10 +30,15 @@ from m5.params import * from m5.proxy import * from BaseCPU import BaseCPU +class ThreadModel(Enum): + vals = ['Single', 'SMT', 'SwitchOnCacheMiss'] + class InOrderCPU(BaseCPU): type = 'InOrderCPU' activity = Param.Unsigned(0, "Initial count") + threadModel = Param.ThreadModel('SMT', "Multithreading model (SE-MODE only)") + cachePorts = Param.Unsigned(2, "Cache Ports") stageWidth = Param.Unsigned(1, "Stage width") diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 38f6b4eed..a1e6c9c86 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -197,7 +197,7 @@ InOrderCPU::InOrderCPU(Params *params) deferRegistration(false/*params->deferRegistration*/), stageTracing(params->stageTracing), numVirtProcs(1) -{ +{ ThreadID active_threads; cpu_params = params; @@ -216,6 +216,15 @@ InOrderCPU::InOrderCPU(Params *params) "in your InOrder implementation or " "edit your workload size."); } + + if (active_threads > 1) { + threadModel = (InOrderCPU::ThreadModel) params->threadModel; + } else { + threadModel = Single; + } + + + #endif // Bind the fetch & data ports from the resource pool. diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 463ca5445..804054f8c 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -100,6 +100,15 @@ class InOrderCPU : public BaseCPU /** Type of core that this is */ std::string coreType; + // Only need for SE MODE + enum ThreadModel { + Single, + SMT, + SwitchOnCacheMiss + }; + + ThreadModel threadModel; + int readCpuId() { return cpu_id; } void setCpuId(int val) { cpu_id = val; } @@ -117,7 +126,6 @@ class InOrderCPU : public BaseCPU /** Overall CPU status. */ Status _status; - private: /** Define TickEvent for the CPU */ class TickEvent : public Event From a892af7b261e1c48b06ccbded5551e958c778414 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:25:27 -0500 Subject: [PATCH 03/36] inorder: dont allow early loads - loads were happening on same cycle as the address was generated which is slightly unrealistic. Instead, force address generation to be on separate cycle from load initiation - also, mark the stages in a more traditional way (F-D-X-M-W) --- src/cpu/inorder/pipeline_traits.cc | 45 +++++++++++++++++------------- src/cpu/inorder/pipeline_traits.hh | 3 +- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/cpu/inorder/pipeline_traits.cc b/src/cpu/inorder/pipeline_traits.cc index ed72ab1d0..8ff26dce2 100644 --- a/src/cpu/inorder/pipeline_traits.cc +++ b/src/cpu/inorder/pipeline_traits.cc @@ -65,16 +65,18 @@ int getNextPriority(DynInstPtr &inst, int stage_num) void createFrontEndSchedule(DynInstPtr &inst) { - InstStage *I = inst->addStage(); - InstStage *E = inst->addStage(); + InstStage *F = inst->addStage(); + InstStage *D = inst->addStage(); - I->needs(FetchSeq, FetchSeqUnit::AssignNextPC); - I->needs(ICache, CacheUnit::InitiateFetch); + // FETCH + F->needs(FetchSeq, FetchSeqUnit::AssignNextPC); + F->needs(ICache, CacheUnit::InitiateFetch); - E->needs(ICache, CacheUnit::CompleteFetch); - E->needs(Decode, DecodeUnit::DecodeInst); - E->needs(BPred, BranchPredictor::PredictBranch); - E->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC); + // DECODE + D->needs(ICache, CacheUnit::CompleteFetch); + D->needs(Decode, DecodeUnit::DecodeInst); + D->needs(BPred, BranchPredictor::PredictBranch); + D->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC); } bool createBackEndSchedule(DynInstPtr &inst) @@ -83,45 +85,48 @@ bool createBackEndSchedule(DynInstPtr &inst) return false; } - InstStage *E = inst->currentStage(); + InstStage *X = inst->addStage(); InstStage *M = inst->addStage(); - InstStage *A = inst->addStage(); InstStage *W = inst->addStage(); + // EXECUTE for (int idx=0; idx < inst->numSrcRegs(); idx++) { if (!idx || !inst->isStore()) { - E->needs(RegManager, UseDefUnit::ReadSrcReg, idx); + X->needs(RegManager, UseDefUnit::ReadSrcReg, idx); } } - if ( inst->isNonSpeculative() ) { // skip execution of non speculative insts until later } else if ( inst->isMemRef() ) { if ( inst->isLoad() ) { - E->needs(AGEN, AGENUnit::GenerateAddr); - E->needs(DCache, CacheUnit::InitiateReadData); + X->needs(AGEN, AGENUnit::GenerateAddr); } } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { - E->needs(MDU, MultDivUnit::StartMultDiv); + X->needs(MDU, MultDivUnit::StartMultDiv); } else { - E->needs(ExecUnit, ExecutionUnit::ExecuteInst); + X->needs(ExecUnit, ExecutionUnit::ExecuteInst); } if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { - M->needs(MDU, MultDivUnit::EndMultDiv); + X->needs(MDU, MultDivUnit::EndMultDiv); } + // MEMORY if ( inst->isLoad() ) { - M->needs(DCache, CacheUnit::CompleteReadData); + M->needs(DCache, CacheUnit::InitiateReadData); } else if ( inst->isStore() ) { M->needs(RegManager, UseDefUnit::ReadSrcReg, 1); M->needs(AGEN, AGENUnit::GenerateAddr); M->needs(DCache, CacheUnit::InitiateWriteData); } - if ( inst->isStore() ) { - A->needs(DCache, CacheUnit::CompleteWriteData); + + // WRITEBACK + if ( inst->isLoad() ) { + W->needs(DCache, CacheUnit::CompleteReadData); + } else if ( inst->isStore() ) { + W->needs(DCache, CacheUnit::CompleteWriteData); } if ( inst->isNonSpeculative() ) { diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh index 3c28894e7..ddc8a3ad7 100644 --- a/src/cpu/inorder/pipeline_traits.hh +++ b/src/cpu/inorder/pipeline_traits.hh @@ -113,7 +113,8 @@ namespace ThePipeline { }; struct entryCompare { - bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) const + bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) + const { // Prioritize first by stage number that the resource is needed if (lhs->stageNum > rhs->stageNum) { From e8312ab6f700b31dfa357607ab51c9c05014572d Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:25:48 -0500 Subject: [PATCH 04/36] inorder: switch out buffer add buffer for instructions to switch out to in a pipeline stage can't squash the instruction and remove the pipeline so we kind of need to 'suspend' an instruction at the stage while the memory stall resolves for the switch on cache miss model --- src/cpu/inorder/pipeline_stage.cc | 137 +++++++++++++++++------------- src/cpu/inorder/pipeline_stage.hh | 19 ++++- 2 files changed, 95 insertions(+), 61 deletions(-) diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index dc0378bf3..8d14aae27 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -44,6 +44,9 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num) stageBufferMax(ThePipeline::interStageBuffSize[stage_num]), prevStageValid(false), nextStageValid(false) { + switchedOutBuffer.resize(ThePipeline::MaxThreads); + switchedOutValid.resize(ThePipeline::MaxThreads); + init(params); } @@ -267,7 +270,8 @@ PipelineStage::isBlocked(ThreadID tid) bool PipelineStage::block(ThreadID tid) { - DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to previous stages.\n", tid); + DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to " + "previous stages.\n", tid); // Add the current inputs to the skid buffer so they can be // reprocessed when this stage unblocks. @@ -296,7 +300,8 @@ PipelineStage::block(ThreadID tid) void PipelineStage::blockDueToBuffer(ThreadID tid) { - DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to next stage.\n", tid); + DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to " + "next stage.\n", tid); if (stageStatus[tid] != Blocked) { // Set the status to Blocked. @@ -334,8 +339,9 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) { if (cpu->squashSeqNum[tid] < inst->seqNum && cpu->lastSquashCycle[tid] == curTick){ - DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another stage's squash " - "signal for after [sn:%i].\n", inst->seqNum, cpu->squashSeqNum[tid]); + DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another " + "stage's squash signal for after [sn:%i].\n", inst->seqNum, + cpu->squashSeqNum[tid]); } else { // Send back mispredict information. toPrevStages->stageInfo[stageNum][tid].branchMispredict = true; @@ -346,20 +352,28 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) #if ISA_HAS_DELAY_SLOT - toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextNPC() != + toPrevStages->stageInfo[stageNum][tid].branchTaken = + inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); - toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->bdelaySeqNum; + + toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = + inst->bdelaySeqNum; + InstSeqNum squash_seq_num = inst->bdelaySeqNum; #else - toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextPC() != + toPrevStages->stageInfo[stageNum][tid].branchTaken = + inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->seqNum; InstSeqNum squash_seq_num = inst->seqNum; #endif - DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg()); - DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], due to [sn:%i] " - "branch.\n", tid, squash_seq_num, inst->seqNum); + DPRINTF(InOrderStage, "Target being re-set to %08p\n", + inst->readPredTarg()); + DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], " + "due to [sn:%i] branch.\n", tid, squash_seq_num, + inst->seqNum); // Save squash num for later stage use cpu->squashSeqNum[tid] = squash_seq_num; @@ -394,8 +408,8 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid) squashPrevStageInsts(squash_seq_num, tid); - DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage skidbuffer.\n", - tid); + DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage" + " skidbuffer.\n", tid); while (!skidBuffer[tid].empty()) { if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { DPRINTF(InOrderStage, "[tid:%i]: Cannot remove skidBuffer " @@ -404,8 +418,9 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid) skidBuffer[tid].size()); break; } - DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n", - tid, skidBuffer[tid].front()->seqNum, skidBuffer[tid].front()->PC); + DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] " + " PC %08p.\n", tid, skidBuffer[tid].front()->seqNum, + skidBuffer[tid].front()->PC); skidBuffer[tid].pop(); } @@ -427,7 +442,8 @@ PipelineStage::stageBufferAvail() int avail = stageBufferMax - total -0;// incoming_insts; if (avail < 0) - fatal("stageNum %i:stageBufferAvail() < 0...stBMax=%i,total=%i,incoming=%i=>%i", + fatal("stageNum %i:stageBufferAvail() < 0..." + "stBMax=%i,total=%i,incoming=%i=>%i", stageNum, stageBufferMax, total, incoming_insts, avail); return avail; @@ -443,7 +459,8 @@ PipelineStage::canSendInstToStage(unsigned stage_num) } if (!buffer_avail && nextStageQueueValid(stage_num)) { - DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", stageNum + 1); + DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", + stageNum + 1); } return buffer_avail; @@ -461,8 +478,9 @@ PipelineStage::skidInsert(ThreadID tid) assert(tid == inst->threadNumber); - DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n", - tid, inst->seqNum, inst->readPC(), inst->threadNumber); + DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage " + "skidBuffer %i\n", tid, inst->seqNum, inst->readPC(), + inst->threadNumber); skidBuffer[tid].push(inst); } @@ -547,16 +565,16 @@ PipelineStage::sortInsts() for (int i = 0; i < insts_from_prev_stage; ++i) { if (prevStage->insts[i]->isSquashed()) { - DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], not inserting " - "into stage buffer.\n", + DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], " + "not inserting into stage buffer.\n", prevStage->insts[i]->readTid(), prevStage->insts[i]->seqNum); continue; } - DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage buffer.\n", - prevStage->insts[i]->readTid(), + DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage " + "buffer.\n", prevStage->insts[i]->readTid(), prevStage->insts[i]->seqNum); ThreadID tid = prevStage->insts[i]->threadNumber; @@ -611,8 +629,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid) // Check for squash from later pipeline stages for (int stage_idx=stageNum; stage_idx < NumStages; stage_idx++) { if (fromNextStages->stageInfo[stage_idx][tid].squash) { - DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to squash " - "from stage %u.\n", tid, stage_idx); + DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to " + "squash from stage %u.\n", tid, stage_idx); InstSeqNum squash_seq_num = fromNextStages-> stageInfo[stage_idx][tid].bdelayDoneSeqNum; squash(squash_seq_num, tid); @@ -625,8 +643,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid) } if (stageStatus[tid] == Blocked) { - DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to unblocking.\n", - tid); + DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to " + "unblocking.\n", tid); stageStatus[tid] = Unblocking; @@ -637,15 +655,15 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid) if (stageStatus[tid] == Squashing) { if (!skidBuffer[tid].empty()) { - DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to unblocking.\n", - tid); + DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to " + "unblocking.\n", tid); stageStatus[tid] = Unblocking; } else { // Switch status to running if stage isn't being told to block or // squash this cycle. - DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to running.\n", - tid); + DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to " + "running.\n", tid); stageStatus[tid] = Running; } @@ -717,13 +735,13 @@ PipelineStage::unsetResStall(ResReqPtr res_req, ThreadID tid) } if (stalls[tid].resources.size() == 0) { - DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource stalls.\n", - tid); + DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource" + "stalls.\n", tid); } } -// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be handled -// one at a time, but instead first come first serve by instruction? +// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be +// handled one at a time, but instead first come first serve by instruction? // Questions are how should a pipeline stage handle thread-specific stalls & // pipeline squashes void @@ -749,8 +767,8 @@ PipelineStage::processStage(bool &status_change) DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(), stageNum); - DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", stageBufferAvail(), - stageNum); + DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", + stageBufferAvail(), stageNum); } void @@ -828,8 +846,8 @@ PipelineStage::processInsts(ThreadID tid) inst = insts_to_stage.front(); - DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] with " - "PC %#x\n", + DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] " + "with PC %#x\n", tid, inst->seqNum, inst->readPC()); if (inst->isSquashed()) { @@ -856,8 +874,8 @@ PipelineStage::processInsts(ThreadID tid) // Send to Next Stage or Break Loop if (nextStageValid && !sendInstToNextStage(inst)) { - DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage %i.\n", - tid, inst->seqNum,inst->nextStage); + DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage" + " %i.\n", tid, inst->seqNum,inst->nextStage); break; } @@ -897,14 +915,15 @@ PipelineStage::processInstSchedule(DynInstPtr inst) int res_num = inst->nextResource(); - DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s.\n", - tid, inst->seqNum, cpu->resPool->name(res_num)); + DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s." + "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); ResReqPtr req = cpu->resPool->request(res_num, inst); if (req->isCompleted()) { - DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s completed.\n", - tid, inst->seqNum, cpu->resPool->name(res_num)); + DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s " + "completed.\n", tid, inst->seqNum, + cpu->resPool->name(res_num)); if (req->fault == NoFault) { inst->popSchedEntry(); @@ -913,8 +932,8 @@ PipelineStage::processInstSchedule(DynInstPtr inst) curTick, req->fault->name()); } } else { - DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed.\n", - tid, inst->seqNum, cpu->resPool->name(res_num)); + DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed." + "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); last_req_completed = false; @@ -956,12 +975,12 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst) assert(next_stage >= 1); assert(prev_stage >= 0); - DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to stage %u.\n", tid, - stageNum+1); + DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to " + "stage %u.\n", tid, stageNum+1); if (!canSendInstToStage(inst->nextStage)) { - DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to stage %u.\n", tid, - stageNum+1); + DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to " + "stage %u.\n", tid, stageNum+1); return false; } @@ -969,12 +988,14 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst) if (nextStageQueueValid(inst->nextStage - 1)) { if (inst->seqNum > cpu->squashSeqNum[tid] && curTick == cpu->lastSquashCycle[tid]) { - DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping insertion " - "into stage %i queue.\n", tid, inst->seqNum, inst->nextStage); + DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping " + "insertion into stage %i queue.\n", tid, inst->seqNum, + inst->nextStage); } else { if (nextStageValid) { - DPRINTF(InOrderStage, "[tid:%u] %i slots available in next stage buffer.\n", - tid, cpu->pipelineStage[next_stage]->stageBufferAvail()); + DPRINTF(InOrderStage, "[tid:%u] %i slots available in next " + "stage buffer.\n", tid, + cpu->pipelineStage[next_stage]->stageBufferAvail()); } DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: being placed into " @@ -982,11 +1003,13 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst) tid, inst->seqNum, toNextStageIndex, cpu->pipelineStage[prev_stage]->nextStageQueue->id()); - int next_stage_idx = cpu->pipelineStage[prev_stage]->nextStage->size; + int next_stage_idx = + cpu->pipelineStage[prev_stage]->nextStage->size; - // Place instructions in inter-stage communication struct for the next + // Place instructions in inter-stage communication struct for next // pipeline stage to read next cycle - cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] = inst; + cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] + = inst; ++(cpu->pipelineStage[prev_stage]->nextStage->size); diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index 86ee98132..42a632560 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -240,6 +240,8 @@ class PipelineStage */ virtual void squashDueToBranch(DynInstPtr &inst, ThreadID tid); + virtual void squashDueToMemStall(DynInstPtr &inst, ThreadID tid); + /** Squash instructions from stage buffer */ virtual void squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid); @@ -259,19 +261,28 @@ class PipelineStage /** List of active thread ids */ std::list *activeThreads; + /** Buffer of instructions switched out to mem-stall. + * Only used when using SwitchOnCacheMiss threading model + * Used as 1-to-1 mapping between ThreadID and Entry. + */ + std::vector switchedOutBuffer; + std::vector switchedOutValid; + /** Queue of all instructions coming from previous stage on this cycle. */ std::queue insts[ThePipeline::MaxThreads]; - /** Queue of instructions that are finished processing and ready to go next stage. - * This is used to prevent from processing an instrution more than once on any - * stage. NOTE: It is up to the PROGRAMMER must manage this as a queue + /** Queue of instructions that are finished processing and ready to go + * next stage. This is used to prevent from processing an instrution more + * than once on any stage. NOTE: It is up to the PROGRAMMER must manage + * this as a queue */ std::list instsToNextStage; /** Skid buffer between previous stage and this one. */ std::queue skidBuffer[ThePipeline::MaxThreads]; - /** Instruction used to signify that there is no *real* instruction in buffer slot */ + /** Instruction used to signify that there is no *real* instruction in + * buffer slot */ DynInstPtr dummyBufferInst; /** SeqNum of Squashing Branch Delay Instruction (used for MIPS) */ From d8e0935af2805bc2c4bdfbab7de2c63f7fde46f7 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:26:03 -0500 Subject: [PATCH 05/36] inorder: add insts to cpu event some events are going to need instruction data when they process, so just include the instruction in the event construction --- src/cpu/inorder/cpu.cc | 29 +++++++++++++++-------------- src/cpu/inorder/cpu.hh | 16 +++++++++------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index a1e6c9c86..69aea0c57 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -84,10 +84,10 @@ InOrderCPU::TickEvent::description() } InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, - Fault fault, ThreadID _tid, unsigned _vpe) + Fault fault, ThreadID _tid, DynInstPtr inst) : Event(CPU_Tick_Pri), cpu(_cpu) { - setEvent(e_type, fault, _tid, _vpe); + setEvent(e_type, fault, _tid, inst); } @@ -317,7 +317,7 @@ InOrderCPU::InOrderCPU(Params *params) contextSwitch = false; // Define dummy instructions and resource requests to be used. - DynInstPtr dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0); + dummyInst = new InOrderDynInst(this, NULL, 0, 0); dummyReq = new ResourceRequest(resPool->getResource(0), NULL, 0, 0, 0, 0); // Reset CPU to reset state. @@ -570,7 +570,7 @@ void InOrderCPU::trap(Fault fault, ThreadID tid, int delay) { //@ Squash Pipeline during TRAP - scheduleCpuEvent(Trap, fault, tid, 0/*vpe*/, delay); + scheduleCpuEvent(Trap, fault, tid, dummyInst, delay); } void @@ -581,9 +581,10 @@ InOrderCPU::trapCPU(Fault fault, ThreadID tid) void InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, - ThreadID tid, unsigned vpe, unsigned delay) + ThreadID tid, DynInstPtr inst, + unsigned delay) { - CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, vpe); + CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst); if (delay >= 0) { DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i.\n", @@ -597,7 +598,7 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, // Broadcast event to the Resource Pool DynInstPtr dummy_inst = new InOrderDynInst(this, NULL, getNextEventNum(), tid); - resPool->scheduleEvent(c_event, dummy_inst, 0, 0, tid); + resPool->scheduleEvent(c_event, inst, 0, 0, tid); } inline bool @@ -699,7 +700,7 @@ InOrderCPU::enableVirtProcElement(unsigned vpe) "Enabling of concurrent virtual processor execution", vpe); - scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, vpe); + scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, dummyInst); } void @@ -725,7 +726,7 @@ InOrderCPU::disableVirtProcElement(ThreadID tid, unsigned vpe) "Disabling of concurrent virtual processor execution", vpe); - scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, vpe); + scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, dummyInst); } void @@ -759,7 +760,7 @@ InOrderCPU::enableMultiThreading(unsigned vpe) DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling Enable Multithreading on " "virtual processor %i", vpe); - scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, vpe); + scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, dummyInst); } void @@ -786,7 +787,7 @@ InOrderCPU::disableMultiThreading(ThreadID tid, unsigned vpe) DPRINTF(InOrderCPU, "[tid:%i]: Scheduling Disable Multithreading on " "virtual processor %i", tid, vpe); - scheduleCpuEvent(DisableThreads, NoFault, tid, vpe); + scheduleCpuEvent(DisableThreads, NoFault, tid, dummyInst); } void @@ -850,7 +851,7 @@ InOrderCPU::activateContext(ThreadID tid, int delay) { DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid); - scheduleCpuEvent(ActivateThread, NoFault, tid, 0/*vpe*/, delay); + scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst, delay); // Be sure to signal that there's some activity so the CPU doesn't // deschedule itself. @@ -863,7 +864,7 @@ InOrderCPU::activateContext(ThreadID tid, int delay) void InOrderCPU::suspendContext(ThreadID tid, int delay) { - scheduleCpuEvent(SuspendThread, NoFault, tid, 0/*vpe*/, delay); + scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst, delay); //_status = Idle; } @@ -877,7 +878,7 @@ InOrderCPU::suspendThread(ThreadID tid) void InOrderCPU::deallocateContext(ThreadID tid, int delay) { - scheduleCpuEvent(DeallocateThread, NoFault, tid, 0/*vpe*/, delay); + scheduleCpuEvent(DeallocateThread, NoFault, tid, dummyInst, delay); } void diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 804054f8c..4c7b2710d 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -199,22 +199,24 @@ class InOrderCPU : public BaseCPU public: CPUEventType cpuEventType; ThreadID tid; - unsigned vpe; + DynInstPtr inst; Fault fault; - + unsigned vpe; + public: /** Constructs a CPU event. */ CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, Fault fault, - ThreadID _tid, unsigned _vpe); + ThreadID _tid, DynInstPtr inst); /** Set Type of Event To Be Scheduled */ void setEvent(CPUEventType e_type, Fault _fault, ThreadID _tid, - unsigned _vpe) + DynInstPtr _inst) { fault = _fault; cpuEventType = e_type; tid = _tid; - vpe = _vpe; + inst = _inst; + vpe = 0; } /** Processes a resource event. */ @@ -232,7 +234,7 @@ class InOrderCPU : public BaseCPU /** Schedule a CPU Event */ void scheduleCpuEvent(CPUEventType cpu_event, Fault fault, ThreadID tid, - unsigned vpe, unsigned delay = 0); + DynInstPtr inst, unsigned delay = 0); public: /** Interface between the CPU and CPU resources. */ @@ -240,7 +242,7 @@ class InOrderCPU : public BaseCPU /** Instruction used to signify that there is no *real* instruction in buffer slot */ - DynInstPtr dummyBufferInst; + DynInstPtr dummyInst; /** Used by resources to signify a denied access to a resource. */ ResourceRequest *dummyReq; From eac5eac67ae8076e934d78063a24eeef08f25413 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:26:13 -0500 Subject: [PATCH 06/36] inorder: squash on memory stall add code to recognize memory stalls in resources and the pipeline as well as squash a thread if there is a stall and we are in the switch on cache miss model --- src/cpu/inorder/cpu.cc | 29 ++++ src/cpu/inorder/cpu.hh | 8 +- src/cpu/inorder/first_stage.cc | 44 +++--- src/cpu/inorder/first_stage.hh | 2 + src/cpu/inorder/pipeline_stage.cc | 35 +++-- src/cpu/inorder/pipeline_stage.hh | 2 +- src/cpu/inorder/resource.cc | 8 +- src/cpu/inorder/resource.hh | 12 +- src/cpu/inorder/resource_pool.cc | 185 ++++++++++++++++-------- src/cpu/inorder/resource_pool.hh | 8 +- src/cpu/inorder/resources/cache_unit.cc | 48 ++++-- src/cpu/inorder/resources/cache_unit.hh | 3 + 12 files changed, 278 insertions(+), 106 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 69aea0c57..035aa0571 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -140,6 +140,10 @@ InOrderCPU::CPUEvent::process() cpu->disableThreads(tid, vpe); break; + case SquashFromMemStall: + cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid); + break; + case Trap: cpu->trapCPU(fault, tid); break; @@ -579,6 +583,31 @@ InOrderCPU::trapCPU(Fault fault, ThreadID tid) fault->invoke(tcBase(tid)); } +void +InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay) +{ + scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay); +} + + +void +InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid) +{ + DPRINTF(InOrderCPU, "Squashing Pipeline Stages Due to Memory Stall...\n"); + + // Squash all instructions in each stage including + // instruction that caused the squash (seq_num - 1) + // NOTE: The stage bandwidth needs to be cleared so thats why + // the stalling instruction is squashed as well. The stalled + // instruction is previously placed in another intermediate buffer + // while it's stall is being handled. + InstSeqNum squash_seq_num = seq_num - 1; + + for (int stNum=stage_num; stNum >= 0 ; stNum--) { + pipelineStage[stNum]->squashDueToMemStall(squash_seq_num, tid); + } +} + void InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, ThreadID tid, DynInstPtr inst, diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 4c7b2710d..5d34de67a 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -183,7 +183,7 @@ class InOrderCPU : public BaseCPU EnableVPEs, Trap, InstGraduated, - SquashAll, + SquashFromMemStall, UpdatePCs, NumCPUEvents }; @@ -344,6 +344,12 @@ class InOrderCPU : public BaseCPU void trap(Fault fault, ThreadID tid, int delay = 0); void trapCPU(Fault fault, ThreadID tid); + /** squashFromMemStall() - sets up a squash event + * squashDueToMemStall() - squashes pipeline + */ + void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0); + void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid); + /** Setup CPU to insert a thread's context */ void insertThread(ThreadID tid); diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc index 8bd703c56..1427ca46a 100644 --- a/src/cpu/inorder/first_stage.cc +++ b/src/cpu/inorder/first_stage.cc @@ -67,11 +67,12 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) // Clear the instruction list and skid buffer in case they have any // insts in them. - DPRINTF(InOrderStage, "Removing instructions from stage instruction list.\n"); + DPRINTF(InOrderStage, "Removing instructions from stage instruction " + "list.\n"); while (!insts[tid].empty()) { if (insts[tid].front()->seqNum <= squash_seq_num) { - DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because it's <= " - "squashing seqNum %i.\n", + DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because " + "it's <= squashing seqNum %i.\n", tid, insts[tid].front()->seqNum, squash_seq_num); @@ -82,8 +83,9 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) insts[tid].size()); break; } - DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n", - tid, insts[tid].front()->seqNum, insts[tid].front()->PC); + DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] " + "PC %08p.\n", tid, insts[tid].front()->seqNum, + insts[tid].front()->PC); insts[tid].pop(); } @@ -93,6 +95,18 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid) cpu->removeInstsUntil(squash_seq_num, tid); } +void +FirstStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid) +{ + // Need to preserve the stalling instruction in first-stage + // since the squash() from first stage also removes + // the instruction from the CPU (removeInstsUntil). If that + // functionality gets changed then you can move this offset. + // (stalling instruction = seq_num + 1) + squash(seq_num+1, tid); +} + + void FirstStage::processStage(bool &status_change) { @@ -106,6 +120,7 @@ FirstStage::processStage(bool &status_change) for (int threadFetched = 0; threadFetched < numFetchingThreads; threadFetched++) { + ThreadID tid = getFetchingThread(fetchPolicy); if (tid >= 0) { @@ -117,14 +132,17 @@ FirstStage::processStage(bool &status_change) } } -//@TODO: Note in documentation, that when you make a pipeline stage change, then -//make sure you change the first stage too +//@TODO: Note in documentation, that when you make a pipeline stage change, +//then make sure you change the first stage too void FirstStage::processInsts(ThreadID tid) { bool all_reqs_completed = true; - for (int insts_fetched = 0; insts_fetched < stageWidth && canSendInstToStage(1); insts_fetched++) { + for (int insts_fetched = 0; + insts_fetched < stageWidth && canSendInstToStage(1); + insts_fetched++) { + DynInstPtr inst; bool new_inst = false; @@ -150,19 +168,9 @@ FirstStage::processInsts(ThreadID tid) inst->traceData = NULL; #endif // TRACING_ON - DPRINTF(RefCount, "creation: [tid:%i]: [sn:%i]: Refcount = %i.\n", - inst->readTid(), - inst->seqNum, - 0/*inst->curCount()*/); - // Add instruction to the CPU's list of instructions. inst->setInstListIt(cpu->addInst(inst)); - DPRINTF(RefCount, "after add to CPU List: [tid:%i]: [sn:%i]: Refcount = %i.\n", - inst->readTid(), - inst->seqNum, - 0/*inst->curCount()*/); - // Create Front-End Resource Schedule For Instruction ThePipeline::createFrontEndSchedule(inst); } diff --git a/src/cpu/inorder/first_stage.hh b/src/cpu/inorder/first_stage.hh index 2a69678e4..383b799f3 100644 --- a/src/cpu/inorder/first_stage.hh +++ b/src/cpu/inorder/first_stage.hh @@ -61,6 +61,8 @@ class FirstStage : public PipelineStage { /** Squash Instructions Above a Seq. Num */ void squash(InstSeqNum squash_seq_num, ThreadID tid); + void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid); + /** There are no insts. coming from previous stages, so there is * no need to sort insts here */ diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 8d14aae27..1fd7150da 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -339,9 +339,9 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) { if (cpu->squashSeqNum[tid] < inst->seqNum && cpu->lastSquashCycle[tid] == curTick){ - DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another " - "stage's squash signal for after [sn:%i].\n", inst->seqNum, - cpu->squashSeqNum[tid]); + DPRINTF(Resource, "Ignoring [sn:%i] branch squash signal due to " + "another stage's squash signal for after [sn:%i].\n", + inst->seqNum, cpu->squashSeqNum[tid]); } else { // Send back mispredict information. toPrevStages->stageInfo[stageNum][tid].branchMispredict = true; @@ -381,6 +381,12 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid) } } +void +PipelineStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid) +{ + squash(seq_num, tid); +} + void PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid) { @@ -413,8 +419,9 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid) while (!skidBuffer[tid].empty()) { if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { DPRINTF(InOrderStage, "[tid:%i]: Cannot remove skidBuffer " - "instructions before delay slot [sn:%i]. %i insts" - "left.\n", tid, squash_seq_num, + "instructions (starting w/[sn:%i]) before delay slot " + "[sn:%i]. %i insts left.\n", tid, + skidBuffer[tid].front()->seqNum, squash_seq_num, skidBuffer[tid].size()); break; } @@ -775,7 +782,7 @@ void PipelineStage::processThread(bool &status_change, ThreadID tid) { // If status is Running or idle, - // call stageInsts() + // call processInsts() // If status is Unblocking, // buffer any instructions coming from fetch // continue trying to empty skid buffer @@ -787,7 +794,7 @@ PipelineStage::processThread(bool &status_change, ThreadID tid) ;//++stageSquashCycles; } - // Stage should try to stage as many instructions as its bandwidth + // Stage should try to process as many instructions as its bandwidth // will allow, as long as it is not currently blocked. if (stageStatus[tid] == Running || stageStatus[tid] == Idle) { @@ -904,9 +911,7 @@ bool PipelineStage::processInstSchedule(DynInstPtr inst) { bool last_req_completed = true; -#if TRACING_ON ThreadID tid = inst->readTid(); -#endif if (inst->nextResStage() == stageNum) { int res_stage_num = inst->nextResStage(); @@ -937,6 +942,18 @@ PipelineStage::processInstSchedule(DynInstPtr inst) last_req_completed = false; + if (req->isMemStall() && + cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { + // Save Stalling Instruction + switchedOutBuffer[tid] = inst; + switchedOutValid[tid] = true; + + // Remove Thread From Pipeline & Resource Pool + inst->squashingStage = stageNum; + inst->bdelaySeqNum = inst->seqNum; + cpu->squashFromMemStall(inst, tid); + } + break; } diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index 42a632560..f10906e4c 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -240,7 +240,7 @@ class PipelineStage */ virtual void squashDueToBranch(DynInstPtr &inst, ThreadID tid); - virtual void squashDueToMemStall(DynInstPtr &inst, ThreadID tid); + virtual void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid); /** Squash instructions from stage buffer */ virtual void squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid); diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 286332e08..47a9a4b9a 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -340,6 +340,12 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, } } +void +Resource::squashDueToMemStall(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, + ThreadID tid) +{ + squash(inst, stage_num, squash_seq_num, tid); +} Tick Resource::ticks(int num_cycles) @@ -407,7 +413,7 @@ ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, unsigned _cmd) : res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num), resIdx(res_idx), slotNum(slot_num), completed(false), - squashed(false), processing(false), waiting(false) + squashed(false), processing(false), memStall(false) { #ifdef DEBUG reqID = resReqID++; diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index 2cf8e61eb..f7c4b8fcd 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -156,6 +156,9 @@ class Resource { virtual void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + virtual void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid); + /** The number of instructions available that this resource can * can still process */ @@ -376,8 +379,8 @@ class ResourceRequest void setProcessing() { processing = true; } /** Get/Set IsWaiting variables */ - bool isWaiting() { return waiting; } - void setWaiting() { waiting = true; } + bool isMemStall() { return memStall; } + void setMemStall(bool stall = true) { memStall = stall; } protected: /** Resource Identification */ @@ -386,11 +389,12 @@ class ResourceRequest int resIdx; int slotNum; - /** Resource Status */ + /** Resource Request Status */ bool completed; bool squashed; bool processing; - bool waiting; + + bool memStall; }; #endif //__CPU_INORDER_RESOURCE_HH__ diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 0d78c232b..8822715c7 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -41,45 +41,62 @@ using namespace ThePipeline; ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) : cpu(_cpu) { - //@todo: use this function to instantiate the resources in resource pool. This will help in the - //auto-generation of this pipeline model. + //@todo: use this function to instantiate the resources in resource pool. + //This will help in the auto-generation of this pipeline model. //ThePipeline::addResources(resources, memObjects); // Declare Resource Objects // name - id - bandwidth - latency - CPU - Parameters // -------------------------------------------------- - resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, StageWidth * 2, 0, _cpu, params)); + resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, + StageWidth * 2, 0, _cpu, params)); memObjects.push_back(ICache); - resources.push_back(new CacheUnit("icache_port", ICache, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new CacheUnit("icache_port", ICache, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new DecodeUnit("Decode-Unit", Decode, StageWidth, 0, _cpu, params)); + resources.push_back(new DecodeUnit("Decode-Unit", Decode, + StageWidth, 0, _cpu, params)); - resources.push_back(new BranchPredictor("Branch-Predictor", BPred, StageWidth, 0, _cpu, params)); + resources.push_back(new BranchPredictor("Branch-Predictor", BPred, + StageWidth, 0, _cpu, params)); - resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 0, _cpu, params)); + resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, + 0, _cpu, params)); - resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new AGENUnit("AGEN-Unit", AGEN, StageWidth, 0, _cpu, params)); + resources.push_back(new AGENUnit("AGEN-Unit", AGEN, + StageWidth, 0, _cpu, params)); - resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, StageWidth, 0, _cpu, params)); + resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, + StageWidth, 0, _cpu, params)); - resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params)); + resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, + params)); memObjects.push_back(DCache); - resources.push_back(new CacheUnit("dcache_port", DCache, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new CacheUnit("dcache_port", DCache, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new GraduationUnit("Graduation-Unit", Grad, StageWidth * MaxThreads, 0, _cpu, params)); + resources.push_back(new GraduationUnit("Graduation-Unit", Grad, + StageWidth * MaxThreads, 0, _cpu, + params)); - resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 0, _cpu, params)); + resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, + 0, _cpu, params)); } void ResourcePool::init() { for (int i=0; i < resources.size(); i++) { - DPRINTF(Resource, "Initializing resource: %s.\n", resources[i]->name()); + DPRINTF(Resource, "Initializing resource: %s.\n", + resources[i]->name()); resources[i]->init(); } @@ -113,8 +130,8 @@ ResourcePool::getPort(const std::string &if_name, int idx) int obj_idx = memObjects[i]; Port *port = resources[obj_idx]->getPort(if_name, idx); if (port != NULL) { - DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", if_name, - resources[obj_idx]->name(), obj_idx); + DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", + if_name, resources[obj_idx]->name(), obj_idx); return port; } } @@ -131,7 +148,8 @@ ResourcePool::getPortIdx(const std::string &port_name) unsigned obj_idx = memObjects[i]; Port *port = resources[obj_idx]->getPort(port_name, obj_idx); if (port != NULL) { - DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, port_name); + DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, + port_name); return obj_idx; } } @@ -167,7 +185,8 @@ void ResourcePool::squash(DynInstPtr inst, int res_idx, InstSeqNum done_seq_num, ThreadID tid) { - resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, tid); + resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, + tid); } int @@ -192,15 +211,17 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, { case InOrderCPU::ActivateThread: { - DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event for tick %i.\n", - curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this, - e_type, - inst, - inst->squashingStage, - inst->bdelaySeqNum, - inst->readTid()); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event " + "for tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this, + e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; @@ -208,49 +229,72 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, case InOrderCPU::DeallocateThread: { - DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool Event for tick %i.\n", - curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this, - e_type, - inst, - inst->squashingStage, - inst->bdelaySeqNum, - tid); + DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool " + "Event for tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this, + e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + tid); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; case ResourcePool::InstGraduated: { - DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool Event for tick %i.\n", - curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type, - inst, - inst->squashingStage, - inst->seqNum, - inst->readTid()); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool " + "Event for tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->seqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; case ResourcePool::SquashAll: { - DPRINTF(Resource, "Scheduling Squash Resource Pool Event for tick %i.\n", + DPRINTF(Resource, "Scheduling Squash Resource Pool Event for " + "tick %i.\n", curTick + delay); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); + } + break; + + case InOrderCPU::SquashFromMemStall: + { + DPRINTF(Resource, "Scheduling Squash Due to Memory Stall Resource " + "Pool Event for tick %i.\n", curTick + delay); - ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type, - inst, - inst->squashingStage, - inst->bdelaySeqNum, - inst->readTid()); - mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + ResPoolEvent *res_pool_event = + new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->seqNum - 1, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, + curTick + cpu->ticks(delay)); } break; default: - DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]); + DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", + InOrderCPU::eventNames[e_type]); ; // If Resource Pool doesnt recognize event, we ignore it. } } @@ -265,8 +309,8 @@ void ResourcePool::squashAll(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above [sn:%i].\n", - stage_num, tid, done_seq_num); + DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above " + "[sn:%i].\n", stage_num, tid, done_seq_num); int num_resources = resources.size(); @@ -275,11 +319,26 @@ ResourcePool::squashAll(DynInstPtr inst, int stage_num, } } +void +ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum done_seq_num, ThreadID tid) +{ + DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above " + "[sn:%i].\n", stage_num, tid, done_seq_num); + + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + resources[idx]->squashDueToMemStall(inst, stage_num, done_seq_num, + tid); + } +} + void ResourcePool::activateAll(ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all resources.\n", - tid); + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all " + "resources.\n", tid); int num_resources = resources.size(); @@ -291,8 +350,8 @@ ResourcePool::activateAll(ThreadID tid) void ResourcePool::deactivateAll(ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all resources.\n", - tid); + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all " + "resources.\n", tid); int num_resources = resources.size(); @@ -304,8 +363,8 @@ ResourcePool::deactivateAll(ThreadID tid) void ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all resources.\n", - tid, seq_num); + DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all " + "resources.\n", tid, seq_num); int num_resources = resources.size(); @@ -353,6 +412,10 @@ ResourcePool::ResPoolEvent::process() resPool->squashAll(inst, stageNum, seqNum, tid); break; + case InOrderCPU::SquashFromMemStall: + resPool->squashDueToMemStall(inst, stageNum, seqNum, tid); + break; + default: fatal("Unrecognized Event Type"); } diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index 016fae2bf..61e691f35 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -123,7 +123,7 @@ class ResourcePool { }; public: - ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params); + ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params); virtual ~ResourcePool() {} std::string name(); @@ -160,6 +160,12 @@ class ResourcePool { void squashAll(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid); + /** Squash Resources in Pool after a memory stall + * NOTE: Only use during Switch-On-Miss Thread model + */ + void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum done_seq_num, ThreadID tid); + /** Activate Thread in all resources */ void activateAll(ThreadID tid); diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index eb66e10f8..570d27fbe 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -241,8 +241,8 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request) // If different, then update command in the request cache_req->cmd = inst->resSched.top()->cmd; DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Updating the command for this instruction\n", - inst->readTid(), inst->seqNum); + "[tid:%i]: [sn:%i]: Updating the command for this " + "instruction\n ", inst->readTid(), inst->seqNum); service_request = true; } else { @@ -416,6 +416,7 @@ CacheUnit::execute(int slot_num) tid, seq_num, inst->staticInst->disassemble(inst->PC)); delete cache_req->dataPkt; + //cache_req->setMemStall(false); cache_req->done(); } else { DPRINTF(InOrderCachePort, @@ -425,6 +426,7 @@ CacheUnit::execute(int slot_num) "STALL: [tid:%i]: Fetch miss from %08p\n", tid, cache_req->inst->readPC()); cache_req->setCompleted(false); + //cache_req->setMemStall(true); } break; @@ -437,11 +439,13 @@ CacheUnit::execute(int slot_num) if (cache_req->isMemAccComplete() || inst->isDataPrefetch() || inst->isInstPrefetch()) { + cache_req->setMemStall(false); cache_req->done(); } else { DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", tid, cache_req->inst->getMemAddr()); cache_req->setCompleted(false); + cache_req->setMemStall(true); } break; @@ -510,7 +514,8 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) if (cache_req->pktCmd == MemCmd::WriteReq) { cache_req->pktCmd = cache_req->memReq->isSwap() ? MemCmd::SwapReq : - (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); + (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq + : MemCmd::WriteReq); } cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd, @@ -641,8 +646,9 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) ExtMachInst ext_inst; StaticInstPtr staticInst = NULL; Addr inst_pc = inst->readPC(); - MachInst mach_inst = TheISA::gtoh(*reinterpret_cast - (cache_pkt->getPtr())); + MachInst mach_inst = + TheISA::gtoh(*reinterpret_cast + (cache_pkt->getPtr())); predecoder.setTC(cpu->thread[tid]->getTC()); predecoder.moreBytes(inst_pc, inst_pc, mach_inst); @@ -755,7 +761,8 @@ CacheUnitEvent::process() tlb_res->tlbBlocked[tid] = false; - tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid); + tlb_res->cpu->pipelineStage[stage_num]-> + unsetResStall(tlb_res->reqMap[slotIdx], tid); req_ptr->tlbStall = false; @@ -764,6 +771,23 @@ CacheUnitEvent::process() } } +void +CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid) +{ + // If squashing due to memory stall, then we do NOT want to + // squash the instruction that caused the stall so we + // increment the sequence number here to prevent that. + // + // NOTE: This is only for the SwitchOnCacheMiss Model + // NOTE: If you have multiple outstanding misses from the same + // thread then you need to reevaluate this code + // NOTE: squash should originate from + // pipeline_stage.cc:processInstSchedule + squash(inst, stage_num, squash_seq_num + 1, tid); +} + + void CacheUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) @@ -798,7 +822,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, int stall_stage = reqMap[req_slot_num]->getStageNum(); - cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid); + cpu->pipelineStage[stall_stage]-> + unsetResStall(reqMap[req_slot_num], tid); } if (!cache_req->tlbStall && !cache_req->isMemAccPending()) { @@ -927,14 +952,16 @@ CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr, template<> Fault -CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint64_t*)&data, addr, flags, res); } template<> Fault -CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint32_t*)&data, addr, flags, res); } @@ -942,7 +969,8 @@ CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_ template<> Fault -CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, (uint32_t)data, addr, flags, res); } diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index c467e9771..a6b07ebd9 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -146,6 +146,9 @@ class CacheUnit : public Resource void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid); + /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); From 4a945aab1958d39fcfea4608715e77d5112809cf Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:26:26 -0500 Subject: [PATCH 07/36] inorder: add event priority offset allow for events to schedule themselves later if desired. this is important because of cases like where you need to activate a thread only after the previous thread has been deactivated. The ordering there has to be enforced --- src/cpu/inorder/cpu.cc | 15 +++++++++------ src/cpu/inorder/cpu.hh | 5 +++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 035aa0571..c0daad207 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -84,8 +84,10 @@ InOrderCPU::TickEvent::description() } InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, - Fault fault, ThreadID _tid, DynInstPtr inst) - : Event(CPU_Tick_Pri), cpu(_cpu) + Fault fault, ThreadID _tid, DynInstPtr inst, + unsigned event_pri_offset) + : Event(Event::Priority((unsigned int)CPU_Tick_Pri + event_pri_offset)), + cpu(_cpu) { setEvent(e_type, fault, _tid, inst); } @@ -611,13 +613,14 @@ InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid) void InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, ThreadID tid, DynInstPtr inst, - unsigned delay) + unsigned delay, unsigned event_pri_offset) { - CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst); + CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst, + event_pri_offset); if (delay >= 0) { - DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i.\n", - eventNames[c_event], curTick + delay); + DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n", + eventNames[c_event], curTick + delay, tid); mainEventQueue.schedule(cpu_event,curTick + delay); } else { cpu_event->process(); diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 5d34de67a..1c819638d 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -206,7 +206,7 @@ class InOrderCPU : public BaseCPU public: /** Constructs a CPU event. */ CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, Fault fault, - ThreadID _tid, DynInstPtr inst); + ThreadID _tid, DynInstPtr inst, unsigned event_pri_offset); /** Set Type of Event To Be Scheduled */ void setEvent(CPUEventType e_type, Fault _fault, ThreadID _tid, @@ -234,7 +234,8 @@ class InOrderCPU : public BaseCPU /** Schedule a CPU Event */ void scheduleCpuEvent(CPUEventType cpu_event, Fault fault, ThreadID tid, - DynInstPtr inst, unsigned delay = 0); + DynInstPtr inst, unsigned delay = 0, + unsigned event_pri_offset = 0); public: /** Interface between the CPU and CPU resources. */ From e1fcc6498017574735362636791f9ad73fb39b04 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:26:32 -0500 Subject: [PATCH 08/36] inorder: activate thread on cache miss -Support ability to activate next ready thread after a cache miss through the activateNextReadyContext/Thread() functions -To support this a "readyList" of thread ids is added -After a cache miss, thread will suspend and then call activitynextreadythread --- src/cpu/inorder/cpu.cc | 78 ++++++++++++++++++++++++++++--- src/cpu/inorder/cpu.hh | 23 ++++++++- src/cpu/inorder/pipeline_stage.cc | 10 +++- src/cpu/inorder/resource_pool.cc | 34 +++++++++----- src/cpu/inorder/thread_context.cc | 6 +-- 5 files changed, 128 insertions(+), 23 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index c0daad207..e52e5935a 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -96,6 +96,8 @@ InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, std::string InOrderCPU::eventNames[NumCPUEvents] = { "ActivateThread", + "ActivateNextReadyThread", + "DeactivateThread", "DeallocateThread", "SuspendThread", "DisableThreads", @@ -119,9 +121,18 @@ InOrderCPU::CPUEvent::process() //@TODO: Consider Implementing "Suspend Thread" as Separate from //Deallocate + case ActivateNextReadyThread: + cpu->activateNextReadyThread(); + break; + + case DeactivateThread: + cpu->deactivateThread(tid); + break; + case SuspendThread: // Suspend & Deallocate are same for now. - //cpu->suspendThread(tid); - //break; + cpu->suspendThread(tid); + break; + case DeallocateThread: cpu->deallocateThread(tid); break; @@ -225,6 +236,14 @@ InOrderCPU::InOrderCPU(Params *params) if (active_threads > 1) { threadModel = (InOrderCPU::ThreadModel) params->threadModel; + + if (threadModel == SMT) { + DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n"); + } else if (threadModel == SwitchOnCacheMiss) { + DPRINTF(InOrderCPU, "Setting Thread Model to " + "Switch On Cache Miss\n"); + } + } else { threadModel = Single; } @@ -628,8 +647,8 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, } // Broadcast event to the Resource Pool - DynInstPtr dummy_inst = - new InOrderDynInst(this, NULL, getNextEventNum(), tid); + // Need to reset tid just in case this is a dummy instruction + inst->setTid(tid); resPool->scheduleEvent(c_event, inst, 0, 0, tid); } @@ -643,10 +662,39 @@ InOrderCPU::isThreadActive(ThreadID tid) } +void +InOrderCPU::activateNextReadyThread() +{ + if (readyThreads.size() >= 1) { + ThreadID ready_tid = readyThreads.front(); + + // Activate in Pipeline + activateThread(ready_tid); + + // Activate in Resource Pool + resPool->activateAll(ready_tid); + + list::iterator ready_it = + std::find(readyThreads.begin(), readyThreads.end(), ready_tid); + readyThreads.erase(ready_it); + } else { + DPRINTF(InOrderCPU, + "No Ready Threads to Activate.\n"); + } +} + void InOrderCPU::activateThread(ThreadID tid) { - if (!isThreadActive(tid)) { + if (threadModel == SwitchOnCacheMiss && + numActiveThreads() == 1) { + DPRINTF(InOrderCPU, + "Ignoring Activation of [tid:%i]. Placing on " + "ready list\n", tid); + + readyThreads.push_back(tid); + + } else if (!isThreadActive(tid)) { DPRINTF(InOrderCPU, "Adding Thread %i to active threads list in CPU.\n", tid); activeThreads.push_back(tid); @@ -892,6 +940,23 @@ InOrderCPU::activateContext(ThreadID tid, int delay) _status = Running; } +void +InOrderCPU::activateNextReadyContext(int delay) +{ + DPRINTF(InOrderCPU,"Activating next ready thread\n"); + + // NOTE: Add 5 to the event priority so that we always activate + // threads after we've finished deactivating, squashing,etc. + // other threads + scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst, + delay, 5); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + + _status = Running; +} void InOrderCPU::suspendContext(ThreadID tid, int delay) @@ -903,8 +968,9 @@ InOrderCPU::suspendContext(ThreadID tid, int delay) void InOrderCPU::suspendThread(ThreadID tid) { - DPRINTF(InOrderCPU,"[tid: %i]: Suspended ...\n", tid); + DPRINTF(InOrderCPU, "[tid: %i]: Placing on Suspended Threads List...\n", tid); deactivateThread(tid); + suspendedThreads.push_back(tid); } void diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 1c819638d..854f5167c 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -89,7 +89,7 @@ class InOrderCPU : public BaseCPU typedef TimeBuffer StageQueue; friend class Resource; - + public: /** Constructs a CPU with the given parameters. */ InOrderCPU(Params *params); @@ -175,6 +175,8 @@ class InOrderCPU : public BaseCPU // pool event. enum CPUEventType { ActivateThread, + ActivateNextReadyThread, + DeactivateThread, DeallocateThread, SuspendThread, DisableThreads, @@ -361,6 +363,10 @@ class InOrderCPU : public BaseCPU void activateContext(ThreadID tid, int delay = 0); void activateThread(ThreadID tid); + /** Add Thread to Active Threads List. */ + void activateNextReadyContext(int delay = 0); + void activateNextReadyThread(); + /** Remove Thread from Active Threads List */ void suspendContext(ThreadID tid, int delay = 0); void suspendThread(ThreadID tid); @@ -612,6 +618,9 @@ class InOrderCPU : public BaseCPU /** Current Threads List */ std::list currentThreads; + /** Ready Threads List */ + std::list readyThreads; + /** Suspended Threads List */ std::list suspendedThreads; @@ -633,6 +642,18 @@ class InOrderCPU : public BaseCPU /** Number of Active Threads in the CPU */ ThreadID numActiveThreads() { return activeThreads.size(); } + /** Thread id of active thread + * Only used for SwitchOnCacheMiss model. Assumes only 1 thread active + */ + ThreadID activeThreadId() + { + if (numActiveThreads() > 0) + return activeThreads.front(); + else + return -1; + } + + /** Records that there was time buffer activity this cycle. */ void activityThisCycle() { activityRec.activity(); } diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 1fd7150da..30a3733b0 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -951,7 +951,15 @@ PipelineStage::processInstSchedule(DynInstPtr inst) // Remove Thread From Pipeline & Resource Pool inst->squashingStage = stageNum; inst->bdelaySeqNum = inst->seqNum; - cpu->squashFromMemStall(inst, tid); + cpu->squashFromMemStall(inst, tid); + + // Switch On Cache Miss + //===================== + // Suspend Thread at end of cycle + cpu->suspendContext(tid); + + // Activate Next Ready Thread at end of cycle + cpu->activateNextReadyContext(); } break; diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 8822715c7..97ba4d087 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -212,7 +212,8 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, case InOrderCPU::ActivateThread: { DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event " - "for tick %i.\n", curTick + delay); + "for tick %i, [tid:%i].\n", curTick + delay, + inst->readTid()); ResPoolEvent *res_pool_event = new ResPoolEvent(this, e_type, @@ -295,7 +296,6 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, default: DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]); - ; // If Resource Pool doesnt recognize event, we ignore it. } } @@ -310,7 +310,7 @@ ResourcePool::squashAll(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid) { DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above " - "[sn:%i].\n", stage_num, tid, done_seq_num); + "[sn:%i].\n", tid, stage_num, done_seq_num); int num_resources = resources.size(); @@ -337,14 +337,24 @@ ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num, void ResourcePool::activateAll(ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all " - "resources.\n", tid); - - int num_resources = resources.size(); - - for (int idx = 0; idx < num_resources; idx++) { - resources[idx]->activateThread(tid); - } + bool do_activate = cpu->threadModel != InOrderCPU::SwitchOnCacheMiss || + cpu->numActiveThreads() < 1 || + cpu->activeThreadId() == tid; + + + if (do_activate) { + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all " + "resources.\n", tid); + + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + resources[idx]->activateThread(tid); + } + } else { + DPRINTF(Resource, "[tid:%i] Ignoring Thread Activation to all " + "resources.\n", tid); + } } void @@ -374,7 +384,7 @@ ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid) } ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool) - : Event(CPU_Tick_Pri), resPool(_resPool), + : Event((Event::Priority)((unsigned)CPU_Tick_Pri+5)), resPool(_resPool), eventType((InOrderCPU::CPUEventType) Default) { } diff --git a/src/cpu/inorder/thread_context.cc b/src/cpu/inorder/thread_context.cc index 41d16b633..d2f511b9d 100644 --- a/src/cpu/inorder/thread_context.cc +++ b/src/cpu/inorder/thread_context.cc @@ -242,21 +242,21 @@ InOrderThreadContext::setRegOtherThread(int misc_reg, const MiscReg &val, void InOrderThreadContext::setPC(uint64_t val) { - DPRINTF(InOrderCPU, "Setting PC to %08p\n", val); + DPRINTF(InOrderCPU, "[tid:%i] Setting PC to %08p\n", thread->readTid(), val); cpu->setPC(val, thread->readTid()); } void InOrderThreadContext::setNextPC(uint64_t val) { - DPRINTF(InOrderCPU, "Setting NPC to %08p\n", val); + DPRINTF(InOrderCPU, "[tid:%i] Setting NPC to %08p\n", thread->readTid(), val); cpu->setNextPC(val, thread->readTid()); } void InOrderThreadContext::setNextNPC(uint64_t val) { - DPRINTF(InOrderCPU, "Setting NNPC to %08p\n", val); + DPRINTF(InOrderCPU, "[tid:%i] Setting NNPC to %08p\n", thread->readTid(), val); cpu->setNextNPC(val, thread->readTid()); } From d9eaa2fe2149528e109b8b32a00dd4fa72d8ec4f Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:26:40 -0500 Subject: [PATCH 09/36] inorder-cleanup: remove unused thread functions --- src/cpu/inorder/cpu.cc | 290 ++++++---------------------- src/cpu/inorder/cpu.hh | 92 ++------- src/cpu/inorder/inorder_dyn_inst.cc | 24 --- src/cpu/inorder/inorder_dyn_inst.hh | 6 - 4 files changed, 80 insertions(+), 332 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index e52e5935a..954309a74 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -100,13 +100,9 @@ std::string InOrderCPU::eventNames[NumCPUEvents] = "DeactivateThread", "DeallocateThread", "SuspendThread", - "DisableThreads", - "EnableThreads", - "DisableVPEs", - "EnableVPEs", "Trap", "InstGraduated", - "SquashAll", + "SquashFromMemStall", "UpdatePCs" }; @@ -119,8 +115,6 @@ InOrderCPU::CPUEvent::process() cpu->activateThread(tid); break; - //@TODO: Consider Implementing "Suspend Thread" as Separate from - //Deallocate case ActivateNextReadyThread: cpu->activateNextReadyThread(); break; @@ -129,28 +123,12 @@ InOrderCPU::CPUEvent::process() cpu->deactivateThread(tid); break; - case SuspendThread: // Suspend & Deallocate are same for now. - cpu->suspendThread(tid); - break; - case DeallocateThread: cpu->deallocateThread(tid); break; - case EnableVPEs: - cpu->enableVPEs(vpe); - break; - - case DisableVPEs: - cpu->disableVPEs(tid, vpe); - break; - - case EnableThreads: - cpu->enableThreads(vpe); - break; - - case DisableThreads: - cpu->disableThreads(tid, vpe); + case SuspendThread: + cpu->suspendThread(tid); break; case SquashFromMemStall: @@ -212,8 +190,7 @@ InOrderCPU::InOrderCPU(Params *params) #endif // DEBUG switchCount(0), deferRegistration(false/*params->deferRegistration*/), - stageTracing(params->stageTracing), - numVirtProcs(1) + stageTracing(params->stageTracing) { ThreadID active_threads; cpu_params = params; @@ -335,11 +312,10 @@ InOrderCPU::InOrderCPU(Params *params) memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid])); isa[tid].clear(); - isa[tid].expandForMultithreading(numThreads, numVirtProcs); + isa[tid].expandForMultithreading(numThreads, 1/*numVirtProcs*/); } lastRunningCycle = curTick; - contextSwitch = false; // Define dummy instructions and resource requests to be used. dummyInst = new InOrderDynInst(this, NULL, 0, 0); @@ -526,7 +502,7 @@ InOrderCPU::reset() { for (int i = 0; i < numThreads; i++) { isa[i].reset(coreType, numThreads, - numVirtProcs, dynamic_cast(this)); + 1/*numVirtProcs*/, dynamic_cast(this)); } } @@ -703,6 +679,20 @@ InOrderCPU::activateThread(ThreadID tid) } } +void +InOrderCPU::deactivateContext(ThreadID tid, int delay) +{ + DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid); + + scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst, delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + + _status = Running; +} + void InOrderCPU::deactivateThread(ThreadID tid) { @@ -722,6 +712,40 @@ InOrderCPU::deactivateThread(ThreadID tid) } } +void +InOrderCPU::deallocateContext(ThreadID tid, int delay) +{ + DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...\n", tid); + + scheduleCpuEvent(DeallocateThread, NoFault, tid, dummyInst, delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + + _status = Running; +} + +void +InOrderCPU::deallocateThread(ThreadID tid) +{ + DPRINTF(InOrderCPU, "[tid:%i]: Calling deallocate thread.\n", tid); + + if (isThreadActive(tid)) { + DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n", + tid); + list::iterator thread_it = + std::find(activeThreads.begin(), activeThreads.end(), tid); + + removePipelineStalls(*thread_it); + + activeThreads.erase(thread_it); + } + + // TODO: "Un"Load/Unmap register file state + +} + void InOrderCPU::removePipelineStalls(ThreadID tid) { @@ -733,36 +757,6 @@ InOrderCPU::removePipelineStalls(ThreadID tid) } } -bool -InOrderCPU::isThreadInCPU(ThreadID tid) -{ - list::iterator isCurrent = - std::find(currentThreads.begin(), currentThreads.end(), tid); - - return (isCurrent != currentThreads.end()); -} - -void -InOrderCPU::addToCurrentThreads(ThreadID tid) -{ - if (!isThreadInCPU(tid)) { - DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU." - "\n", tid); - currentThreads.push_back(tid); - } -} - -void -InOrderCPU::removeFromCurrentThreads(ThreadID tid) -{ - if (isThreadInCPU(tid)) { - DPRINTF(InOrderCPU, - "Adding Thread %i to current threads list in CPU.\n", tid); - list::iterator isCurrent = - std::find(currentThreads.begin(), currentThreads.end(), tid); - currentThreads.erase(isCurrent); - } -} bool InOrderCPU::isThreadSuspended(ThreadID tid) @@ -773,125 +767,6 @@ InOrderCPU::isThreadSuspended(ThreadID tid) return (isSuspended!= suspendedThreads.end()); } -void -InOrderCPU::enableVirtProcElement(unsigned vpe) -{ - DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling " - "Enabling of concurrent virtual processor execution", - vpe); - - scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, dummyInst); -} - -void -InOrderCPU::enableVPEs(unsigned vpe) -{ - DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Concurrent Execution " - "virtual processors %i", vpe); - - list::iterator thread_it = currentThreads.begin(); - - while (thread_it != currentThreads.end()) { - if (!isThreadSuspended(*thread_it)) { - activateThread(*thread_it); - } - thread_it++; - } -} - -void -InOrderCPU::disableVirtProcElement(ThreadID tid, unsigned vpe) -{ - DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling " - "Disabling of concurrent virtual processor execution", - vpe); - - scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, dummyInst); -} - -void -InOrderCPU::disableVPEs(ThreadID tid, unsigned vpe) -{ - DPRINTF(InOrderCPU, "[vpe:%i]: Disabling Concurrent Execution of " - "virtual processors %i", vpe); - - unsigned base_vpe = TheISA::getVirtProcNum(tcBase(tid)); - - list::iterator thread_it = activeThreads.begin(); - - vector::iterator> removeList; - - while (thread_it != activeThreads.end()) { - if (base_vpe != vpe) { - removeList.push_back(thread_it); - } - thread_it++; - } - - for (int i = 0; i < removeList.size(); i++) { - activeThreads.erase(removeList[i]); - } -} - -void -InOrderCPU::enableMultiThreading(unsigned vpe) -{ - // Schedule event to take place at end of cycle - DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling Enable Multithreading on " - "virtual processor %i", vpe); - - scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, dummyInst); -} - -void -InOrderCPU::enableThreads(unsigned vpe) -{ - DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Multithreading on " - "virtual processor %i", vpe); - - list::iterator thread_it = currentThreads.begin(); - - while (thread_it != currentThreads.end()) { - if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) { - if (!isThreadSuspended(*thread_it)) { - activateThread(*thread_it); - } - } - thread_it++; - } -} -void -InOrderCPU::disableMultiThreading(ThreadID tid, unsigned vpe) -{ - // Schedule event to take place at end of cycle - DPRINTF(InOrderCPU, "[tid:%i]: Scheduling Disable Multithreading on " - "virtual processor %i", tid, vpe); - - scheduleCpuEvent(DisableThreads, NoFault, tid, dummyInst); -} - -void -InOrderCPU::disableThreads(ThreadID tid, unsigned vpe) -{ - DPRINTF(InOrderCPU, "[tid:%i]: Disabling Multithreading on " - "virtual processor %i", tid, vpe); - - list::iterator thread_it = activeThreads.begin(); - - vector::iterator> removeList; - - while (thread_it != activeThreads.end()) { - if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) { - removeList.push_back(thread_it); - } - thread_it++; - } - - for (int i = 0; i < removeList.size(); i++) { - activeThreads.erase(removeList[i]); - } -} - void InOrderCPU::updateThreadPriority() { @@ -958,6 +833,12 @@ InOrderCPU::activateNextReadyContext(int delay) _status = Running; } +void +InOrderCPU::haltContext(ThreadID tid, int delay) +{ + suspendContext(tid, delay); +} + void InOrderCPU::suspendContext(ThreadID tid, int delay) { @@ -973,24 +854,6 @@ InOrderCPU::suspendThread(ThreadID tid) suspendedThreads.push_back(tid); } -void -InOrderCPU::deallocateContext(ThreadID tid, int delay) -{ - scheduleCpuEvent(DeallocateThread, NoFault, tid, dummyInst, delay); -} - -void -InOrderCPU::deallocateThread(ThreadID tid) -{ - DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...", tid); - - removeFromCurrentThreads(tid); - - deactivateThread(tid); - - squashThreadInPipeline(tid); -} - void InOrderCPU::squashThreadInPipeline(ThreadID tid) { @@ -1000,45 +863,12 @@ InOrderCPU::squashThreadInPipeline(ThreadID tid) } } -void -InOrderCPU::haltContext(ThreadID tid, int delay) -{ - DPRINTF(InOrderCPU, "[tid:%i]: Halt context called.\n", tid); - - // Halt is same thing as deallocate for now - // @TODO: Differentiate between halt & deallocate in the CPU - // model - deallocateContext(tid, delay); -} - -void -InOrderCPU::insertThread(ThreadID tid) -{ - panic("Unimplemented Function\n."); -} - -void -InOrderCPU::removeThread(ThreadID tid) -{ - DPRINTF(InOrderCPU, "Removing Thread %i from CPU.\n", tid); - - /** Broadcast to CPU resources*/ -} - PipelineStage* InOrderCPU::getPipeStage(int stage_num) { return pipelineStage[stage_num]; } - -void -InOrderCPU::activateWhenReady(ThreadID tid) -{ - panic("Unimplemented Function\n."); -} - - uint64_t InOrderCPU::readPC(ThreadID tid) { diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 854f5167c..c31481421 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -179,10 +179,6 @@ class InOrderCPU : public BaseCPU DeactivateThread, DeallocateThread, SuspendThread, - DisableThreads, - EnableThreads, - DisableVPEs, - EnableVPEs, Trap, InstGraduated, SquashFromMemStall, @@ -347,18 +343,6 @@ class InOrderCPU : public BaseCPU void trap(Fault fault, ThreadID tid, int delay = 0); void trapCPU(Fault fault, ThreadID tid); - /** squashFromMemStall() - sets up a squash event - * squashDueToMemStall() - squashes pipeline - */ - void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0); - void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid); - - /** Setup CPU to insert a thread's context */ - void insertThread(ThreadID tid); - - /** Remove all of a thread's context from CPU */ - void removeThread(ThreadID tid); - /** Add Thread to Active Threads List. */ void activateContext(ThreadID tid, int delay = 0); void activateThread(ThreadID tid); @@ -367,16 +351,28 @@ class InOrderCPU : public BaseCPU void activateNextReadyContext(int delay = 0); void activateNextReadyThread(); - /** Remove Thread from Active Threads List */ + /** Remove from Active Thread List */ + void deactivateContext(ThreadID tid, int delay = 0); + void deactivateThread(ThreadID tid); + + /** Suspend Thread, Remove from Active Threads List, Add to Suspend List */ + void haltContext(ThreadID tid, int delay = 0); void suspendContext(ThreadID tid, int delay = 0); void suspendThread(ThreadID tid); - /** Remove Thread from Active Threads List && - * Remove Thread Context from CPU. - */ + /** Remove Thread from Active Threads List, Remove Any Loaded Thread State */ void deallocateContext(ThreadID tid, int delay = 0); void deallocateThread(ThreadID tid); - void deactivateThread(ThreadID tid); + + /** squashFromMemStall() - sets up a squash event + * squashDueToMemStall() - squashes pipeline + */ + void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0); + void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid); + + void removePipelineStalls(ThreadID tid); + void squashThreadInPipeline(ThreadID tid); + void squashBehindMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid); PipelineStage* getPipeStage(int stage_num); @@ -387,37 +383,6 @@ class InOrderCPU : public BaseCPU return 0; } - /** Remove Thread from Active Threads List && - * Remove Thread Context from CPU. - */ - void haltContext(ThreadID tid, int delay = 0); - - void removePipelineStalls(ThreadID tid); - - void squashThreadInPipeline(ThreadID tid); - - /// Notify the CPU to enable a virtual processor element. - virtual void enableVirtProcElement(unsigned vpe); - void enableVPEs(unsigned vpe); - - /// Notify the CPU to disable a virtual processor element. - virtual void disableVirtProcElement(ThreadID tid, unsigned vpe); - void disableVPEs(ThreadID tid, unsigned vpe); - - /// Notify the CPU that multithreading is enabled. - virtual void enableMultiThreading(unsigned vpe); - void enableThreads(unsigned vpe); - - /// Notify the CPU that multithreading is disabled. - virtual void disableMultiThreading(ThreadID tid, unsigned vpe); - void disableThreads(ThreadID tid, unsigned vpe); - - /** Activate a Thread When CPU Resources are Available. */ - void activateWhenReady(ThreadID tid); - - /** Add or Remove a Thread Context in the CPU. */ - void doContextSwitch(); - /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); @@ -615,21 +580,15 @@ class InOrderCPU : public BaseCPU /** Active Threads List */ std::list activeThreads; - /** Current Threads List */ - std::list currentThreads; - /** Ready Threads List */ std::list readyThreads; /** Suspended Threads List */ std::list suspendedThreads; - /** Thread Status Functions (Unused Currently) */ - bool isThreadInCPU(ThreadID tid); + /** Thread Status Functions */ bool isThreadActive(ThreadID tid); bool isThreadSuspended(ThreadID tid); - void addToCurrentThreads(ThreadID tid); - void removeFromCurrentThreads(ThreadID tid); private: /** The activity recorder; used to tell if the CPU has any @@ -643,7 +602,8 @@ class InOrderCPU : public BaseCPU ThreadID numActiveThreads() { return activeThreads.size(); } /** Thread id of active thread - * Only used for SwitchOnCacheMiss model. Assumes only 1 thread active + * Only used for SwitchOnCacheMiss model. + * Assumes only 1 thread active */ ThreadID activeThreadId() { @@ -672,9 +632,6 @@ class InOrderCPU : public BaseCPU virtual void wakeup(); #endif - /** Gets a free thread id. Use if thread ids change across system. */ - ThreadID getFreeTid(); - // LL/SC debug functionality unsigned stCondFails; @@ -740,18 +697,9 @@ class InOrderCPU : public BaseCPU /** Per-Stage Instruction Tracing */ bool stageTracing; - /** Is there a context switch pending? */ - bool contextSwitch; - - /** Threads Scheduled to Enter CPU */ - std::list cpuWaitList; - /** The cycle that the CPU was last running, used for statistics. */ Tick lastRunningCycle; - /** Number of Virtual Processors the CPU can process */ - unsigned numVirtProcs; - /** Update Thread , used for statistic purposes*/ inline void tickThreadStats(); diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 5ab839615..79f8de05d 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -583,30 +583,6 @@ InOrderDynInst::deallocateContext(int thread_num) this->cpu->deallocateContext(thread_num); } -void -InOrderDynInst::enableVirtProcElement(unsigned vpe) -{ - this->cpu->enableVirtProcElement(vpe); -} - -void -InOrderDynInst::disableVirtProcElement(unsigned vpe) -{ - this->cpu->disableVirtProcElement(threadNumber, vpe); -} - -void -InOrderDynInst::enableMultiThreading(unsigned vpe) -{ - this->cpu->enableMultiThreading(vpe); -} - -void -InOrderDynInst::disableMultiThreading(unsigned vpe) -{ - this->cpu->disableMultiThreading(threadNumber, vpe); -} - template inline Fault InOrderDynInst::read(Addr addr, T &data, unsigned flags) diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 522b4e8d7..b573c1029 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -515,12 +515,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted //////////////////////////////////////////////////////////// virtual void deallocateContext(int thread_num); - virtual void enableVirtProcElement(unsigned vpe); - virtual void disableVirtProcElement(unsigned vpe); - - virtual void enableMultiThreading(unsigned vpe); - virtual void disableMultiThreading(unsigned vpe); - //////////////////////////////////////////////////////////// // // PROGRAM COUNTERS - PC/NPC/NPC From 96b493d3159f7e94b8e53edbe562e28076f2af95 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:26:47 -0500 Subject: [PATCH 10/36] inorder: ready/suspend status fns update/add in the use of isThreadReady & isThreadSuspended functions.Check in activateThread what list a thread is on so it can be managed accordingly. --- src/cpu/inorder/cpu.cc | 56 ++++++++++++++++++++++++++++++------------ src/cpu/inorder/cpu.hh | 1 + 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 954309a74..ec6bb21ee 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -628,7 +628,7 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault, resPool->scheduleEvent(c_event, inst, 0, 0, tid); } -inline bool +bool InOrderCPU::isThreadActive(ThreadID tid) { list::iterator isActive = @@ -637,6 +637,23 @@ InOrderCPU::isThreadActive(ThreadID tid) return (isActive != activeThreads.end()); } +bool +InOrderCPU::isThreadReady(ThreadID tid) +{ + list::iterator isReady = + std::find(readyThreads.begin(), readyThreads.end(), tid); + + return (isReady != readyThreads.end()); +} + +bool +InOrderCPU::isThreadSuspended(ThreadID tid) +{ + list::iterator isSuspended = + std::find(suspendedThreads.begin(), suspendedThreads.end(), tid); + + return (isSuspended != suspendedThreads.end()); +} void InOrderCPU::activateNextReadyThread() @@ -655,26 +672,40 @@ InOrderCPU::activateNextReadyThread() readyThreads.erase(ready_it); } else { DPRINTF(InOrderCPU, - "No Ready Threads to Activate.\n"); + "Attempting to activate new thread, but No Ready Threads to" + "activate.\n"); } } void InOrderCPU::activateThread(ThreadID tid) { + if (isThreadSuspended(tid)) { + DPRINTF(InOrderCPU, + "Removing [tid:%i] from suspended threads list.\n", tid); + + list::iterator susp_it = + std::find(suspendedThreads.begin(), suspendedThreads.end(), + tid); + suspendedThreads.erase(susp_it); + } + if (threadModel == SwitchOnCacheMiss && numActiveThreads() == 1) { DPRINTF(InOrderCPU, - "Ignoring Activation of [tid:%i]. Placing on " - "ready list\n", tid); + "Ignoring activation of [tid:%i], since [tid:%i] is " + "already running.\n", tid, activeThreadId()); + + DPRINTF(InOrderCPU,"Placing [tid:%i] ready threads list\n", + tid); readyThreads.push_back(tid); - } else if (!isThreadActive(tid)) { + } else if (!isThreadActive(tid)) { DPRINTF(InOrderCPU, - "Adding Thread %i to active threads list in CPU.\n", tid); + "Adding [tid:%i] to active threads list.\n", tid); activeThreads.push_back(tid); - + wakeCPU(); } } @@ -710,6 +741,8 @@ InOrderCPU::deactivateThread(ThreadID tid) activeThreads.erase(thread_it); } + + assert(!isThreadActive(tid)); } void @@ -758,15 +791,6 @@ InOrderCPU::removePipelineStalls(ThreadID tid) } -bool -InOrderCPU::isThreadSuspended(ThreadID tid) -{ - list::iterator isSuspended = - std::find(suspendedThreads.begin(), suspendedThreads.end(), tid); - - return (isSuspended!= suspendedThreads.end()); -} - void InOrderCPU::updateThreadPriority() { diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index c31481421..f4f7cb390 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -588,6 +588,7 @@ class InOrderCPU : public BaseCPU /** Thread Status Functions */ bool isThreadActive(ThreadID tid); + bool isThreadReady(ThreadID tid); bool isThreadSuspended(ThreadID tid); private: From 4ea296e29686154656c380982f987d7b6e1774f0 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:26:54 -0500 Subject: [PATCH 11/36] inorder: fetch thread bug dont check total # of threads but instead all active threads --- src/cpu/inorder/cpu.hh | 2 +- src/cpu/inorder/first_stage.cc | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index f4f7cb390..7ac433723 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -611,7 +611,7 @@ class InOrderCPU : public BaseCPU if (numActiveThreads() > 0) return activeThreads.front(); else - return -1; + return InvalidThreadID; } diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc index 1427ca46a..75e13e559 100644 --- a/src/cpu/inorder/first_stage.cc +++ b/src/cpu/inorder/first_stage.cc @@ -205,11 +205,12 @@ FirstStage::processInsts(ThreadID tid) ThreadID FirstStage::getFetchingThread(FetchPriority &fetch_priority) { - if (numThreads > 1) { - switch (fetch_priority) { + ThreadID num_active_threads = cpu->numActiveThreads(); + if (num_active_threads > 1) { + switch (fetch_priority) { case SingleThread: - return 0; + return cpu->activeThreadId(); case RoundRobin: return roundRobin(); @@ -217,7 +218,7 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority) default: return InvalidThreadID; } - } else { + } else if (num_active_threads == 1) { ThreadID tid = *activeThreads->begin(); if (stageStatus[tid] == Running || @@ -226,8 +227,9 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority) } else { return InvalidThreadID; } - } - + } else { + return InvalidThreadID; + } } ThreadID From 4dbc2f17180d3d8c82d5414daa55b102de9755e5 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:27:02 -0500 Subject: [PATCH 12/36] inorder: suspend in respool give resources their own specific activity to do for a "suspend" event instead of defaulting to deactivating the thread for a suspend thread event. This really matters for the fetch sequence unit which wants to remove the thread from fetching while other units want to ignore a thread suspension. If you deactivate a thread in a resource then you may lose some of the allotted bandwidth that the thread is taking up... --- src/cpu/inorder/resource.hh | 4 ++ src/cpu/inorder/resource_pool.cc | 48 ++++++++++++++++++--- src/cpu/inorder/resource_pool.hh | 3 ++ src/cpu/inorder/resources/cache_unit.cc | 14 +++++- src/cpu/inorder/resources/fetch_seq_unit.cc | 6 +++ src/cpu/inorder/resources/fetch_seq_unit.hh | 1 + 6 files changed, 68 insertions(+), 8 deletions(-) diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index f7c4b8fcd..4ae4db818 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -93,6 +93,10 @@ class Resource { */ virtual void deactivateThread(ThreadID tid); + /** Resources that care about thread activation override this. */ + virtual void suspendThread(ThreadID tid) { } + + /** Resources that care when an instruction has been graduated * can override this */ diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 97ba4d087..45a4a9e60 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -226,7 +226,7 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, } break; - case InOrderCPU::SuspendThread: + case InOrderCPU::DeactivateThread: case InOrderCPU::DeallocateThread: { @@ -246,6 +246,23 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, } break; + case InOrderCPU::SuspendThread: + { + + DPRINTF(Resource, "Scheduling Suspend Thread Resource Pool Event for tick %i.\n", + curTick + delay); + ResPoolEvent *res_pool_event = new ResPoolEvent(this, + e_type, + inst, + inst->squashingStage, + inst->bdelaySeqNum, + tid); + + mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + + } + break; + case ResourcePool::InstGraduated: { DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool " @@ -309,8 +326,9 @@ void ResourcePool::squashAll(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above " - "[sn:%i].\n", tid, stage_num, done_seq_num); + DPRINTF(Resource, "[tid:%i] Broadcasting Squash All Event " + " starting w/stage %i for all instructions above [sn:%i].\n", + tid, stage_num, done_seq_num); int num_resources = resources.size(); @@ -323,8 +341,9 @@ void ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num, InstSeqNum done_seq_num, ThreadID tid) { - DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above " - "[sn:%i].\n", stage_num, tid, done_seq_num); + DPRINTF(Resource, "[tid:%i] Broadcasting SquashDueToMemStall Event" + " starting w/stage %i for all instructions above [sn:%i].\n", + tid, stage_num, done_seq_num); int num_resources = resources.size(); @@ -370,6 +389,19 @@ ResourcePool::deactivateAll(ThreadID tid) } } +void +ResourcePool::suspendAll(ThreadID tid) +{ + DPRINTF(Resource, "[tid:%i] Broadcasting Thread Suspension to all resources.\n", + tid); + + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + resources[idx]->suspendThread(tid); + } +} + void ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid) { @@ -409,11 +441,15 @@ ResourcePool::ResPoolEvent::process() resPool->activateAll(tid); break; - case InOrderCPU::SuspendThread: + case InOrderCPU::DeactivateThread: case InOrderCPU::DeallocateThread: resPool->deactivateAll(tid); break; + case InOrderCPU::SuspendThread: + resPool->suspendAll(tid); + break; + case ResourcePool::InstGraduated: resPool->instGraduated(seqNum, tid); break; diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index 61e691f35..ae63c4c59 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -172,6 +172,9 @@ class ResourcePool { /** De-Activate Thread in all resources */ void deactivateAll(ThreadID tid); + /** De-Activate Thread in all resources */ + void suspendAll(ThreadID tid); + /** Broadcast graduation to all resources */ void instGraduated(InstSeqNum seq_num, ThreadID tid); diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 570d27fbe..8f92db3e4 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -158,9 +158,9 @@ CacheUnit::getSlot(DynInstPtr inst) return new_slot; } else { DPRINTF(InOrderCachePort, - "Denying request because there is an outstanding" + "[tid:%i] Denying request because there is an outstanding" " request to/for addr. %08p. by [sn:%i] @ tick %i\n", - req_addr, addrMap[req_addr], inst->memTime); + inst->readTid(), req_addr, addrMap[req_addr], inst->memTime); return -1; } } @@ -702,6 +702,13 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); + if (cache_req->isMemStall() && + cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { + DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n"); + + cpu->activateContext(tid); + } + // Wake up the CPU (if it went to sleep and was waiting on this // completion event). cpu->wakeCPU(); @@ -784,6 +791,9 @@ CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num, // thread then you need to reevaluate this code // NOTE: squash should originate from // pipeline_stage.cc:processInstSchedule + DPRINTF(InOrderCachePort, "Squashing above [sn:%u]\n", + squash_seq_num + 1); + squash(inst, stage_num, squash_seq_num + 1, tid); } diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index 1d0b92075..e0b9ea1f9 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -336,3 +336,9 @@ FetchSeqUnit::deactivateThread(ThreadID tid) if (thread_it != cpu->fetchPriorityList.end()) cpu->fetchPriorityList.erase(thread_it); } + +void +FetchSeqUnit::suspendThread(ThreadID tid) +{ + deactivateThread(tid); +} diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh index a4495564b..fdbc4521f 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.hh +++ b/src/cpu/inorder/resources/fetch_seq_unit.hh @@ -59,6 +59,7 @@ class FetchSeqUnit : public Resource { virtual void init(); virtual void activateThread(ThreadID tid); virtual void deactivateThread(ThreadID tid); + virtual void suspendThread(ThreadID tid); virtual void execute(int slot_num); /** Override default Resource squash sequence. This actually, From 611a8642c2d50989da15e1ddd9dc87c036e8ab99 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:27:12 -0500 Subject: [PATCH 13/36] inorder: mem. mgmt. update update address List and address Map to take into account multiple threads --- src/cpu/inorder/resources/cache_unit.cc | 18 +++++++++++------- src/cpu/inorder/resources/cache_unit.hh | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 8f92db3e4..3de5c518a 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -131,6 +131,8 @@ CacheUnit::init() int CacheUnit::getSlot(DynInstPtr inst) { + ThreadID tid = inst->readTid(); + if (tlbBlocked[inst->threadNumber]) { return -1; } @@ -142,7 +144,7 @@ CacheUnit::getSlot(DynInstPtr inst) Addr req_addr = inst->getMemAddr(); if (resName == "icache_port" || - find(addrList.begin(), addrList.end(), req_addr) == addrList.end()) { + find(addrList[tid].begin(), addrList[tid].end(), req_addr) == addrList[tid].end()) { int new_slot = Resource::getSlot(inst); @@ -150,8 +152,8 @@ CacheUnit::getSlot(DynInstPtr inst) return -1; inst->memTime = curTick; - addrList.push_back(req_addr); - addrMap[req_addr] = inst->seqNum; + addrList[tid].push_back(req_addr); + addrMap[tid][req_addr] = inst->seqNum; DPRINTF(InOrderCachePort, "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", inst->readTid(), inst->seqNum, req_addr); @@ -160,7 +162,7 @@ CacheUnit::getSlot(DynInstPtr inst) DPRINTF(InOrderCachePort, "[tid:%i] Denying request because there is an outstanding" " request to/for addr. %08p. by [sn:%i] @ tick %i\n", - inst->readTid(), req_addr, addrMap[req_addr], inst->memTime); + inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime); return -1; } } @@ -168,15 +170,17 @@ CacheUnit::getSlot(DynInstPtr inst) void CacheUnit::freeSlot(int slot_num) { - vector::iterator vect_it = find(addrList.begin(), addrList.end(), + ThreadID tid = reqMap[slot_num]->inst->readTid(); + + vector::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(), reqMap[slot_num]->inst->getMemAddr()); - assert(vect_it != addrList.end()); + assert(vect_it != addrList[tid].end()); DPRINTF(InOrderCachePort, "[tid:%i]: Address %08p removed from dependency list\n", reqMap[slot_num]->inst->readTid(), (*vect_it)); - addrList.erase(vect_it); + addrList[tid].erase(vect_it); Resource::freeSlot(slot_num); } diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index a6b07ebd9..26f6859ed 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -198,9 +198,9 @@ class CacheUnit : public Resource bool cacheBlocked; - std::vector addrList; + std::vector addrList[ThePipeline::MaxThreads]; - std::map addrMap; + std::map addrMap[ThePipeline::MaxThreads]; public: int cacheBlkSize; From 3eb04b4ad73cb66e86d09ffd5989a93d9f62b299 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:27:25 -0500 Subject: [PATCH 14/36] inorder: add threadmodel flag this prints out messages relative to what threading model is being used (smt, switch-on-miss, single, etc.) --- src/cpu/inorder/SConscript | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript index 82a1028c2..afc6a29e4 100644 --- a/src/cpu/inorder/SConscript +++ b/src/cpu/inorder/SConscript @@ -52,12 +52,14 @@ if 'InOrderCPU' in env['CPU_MODELS']: TraceFlag('InOrderUseDef') TraceFlag('InOrderMDU') TraceFlag('InOrderGraduation') + TraceFlag('ThreadModel') TraceFlag('RefCount') CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU', 'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred', 'InOrderDecode', 'InOrderExecute', 'InOrderInstBuffer', 'InOrderUseDef', - 'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource']) + 'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource', + 'ThreadModel']) Source('pipeline_traits.cc') Source('inorder_dyn_inst.cc') From 90d3b45a566847fe15095b92238e32973ad9cc0e Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:27:38 -0500 Subject: [PATCH 15/36] inorder: ready thread wakeup allow a thread to wakeup and be activated after it has been in suspended state and another thread is switched out. Need to give pipeline stages a "activateThread" function so that can get to their suspended instruction when the time is right. --- src/cpu/inorder/cpu.cc | 14 +++++++++++- src/cpu/inorder/cpu.hh | 3 ++- src/cpu/inorder/pipeline_stage.cc | 30 +++++++++++++++++++++++++ src/cpu/inorder/pipeline_stage.hh | 2 ++ src/cpu/inorder/resources/cache_unit.cc | 5 ++++- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index ec6bb21ee..501150386 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -674,6 +674,8 @@ InOrderCPU::activateNextReadyThread() DPRINTF(InOrderCPU, "Attempting to activate new thread, but No Ready Threads to" "activate.\n"); + DPRINTF(InOrderCPU, + "Unable to switch to next active thread.\n"); } } @@ -696,7 +698,7 @@ InOrderCPU::activateThread(ThreadID tid) "Ignoring activation of [tid:%i], since [tid:%i] is " "already running.\n", tid, activeThreadId()); - DPRINTF(InOrderCPU,"Placing [tid:%i] ready threads list\n", + DPRINTF(InOrderCPU,"Placing [tid:%i] on ready threads list\n", tid); readyThreads.push_back(tid); @@ -706,10 +708,20 @@ InOrderCPU::activateThread(ThreadID tid) "Adding [tid:%i] to active threads list.\n", tid); activeThreads.push_back(tid); + activateThreadInPipeline(tid); + wakeCPU(); } } +void +InOrderCPU::activateThreadInPipeline(ThreadID tid) +{ + for (int stNum=0; stNum < NumStages; stNum++) { + pipelineStage[stNum]->activateThread(tid); + } +} + void InOrderCPU::deactivateContext(ThreadID tid, int delay) { diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 7ac433723..1e514e1ed 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -346,7 +346,8 @@ class InOrderCPU : public BaseCPU /** Add Thread to Active Threads List. */ void activateContext(ThreadID tid, int delay = 0); void activateThread(ThreadID tid); - + void activateThreadInPipeline(ThreadID tid); + /** Add Thread to Active Threads List. */ void activateNextReadyContext(int delay = 0); void activateNextReadyThread(); diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 30a3733b0..ef91f206b 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -558,6 +558,28 @@ PipelineStage::updateStatus() } } +void +PipelineStage::activateThread(ThreadID tid) +{ + if (cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { + if (!switchedOutValid[tid]) { + DPRINTF(InOrderStage, "[tid:%i] No instruction available in " + "switch out buffer.\n", tid); + } else { + DynInstPtr inst = switchedOutBuffer[tid]; + + DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n", + tid, inst->seqNum, inst->readPC(), inst->threadNumber); + + skidBuffer[tid].push(inst); + + switchedOutBuffer[tid] = NULL; + + switchedOutValid[tid] = false; + } + } + +} void @@ -945,6 +967,11 @@ PipelineStage::processInstSchedule(DynInstPtr inst) if (req->isMemStall() && cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { // Save Stalling Instruction + DPRINTF(ThreadModel, "[tid:%i] Detected cache miss.\n", tid); + + DPRINTF(InOrderStage, "Inserting [tid:%i][sn:%i] into switch out buffer.\n", + tid, inst->seqNum); + switchedOutBuffer[tid] = inst; switchedOutValid[tid] = true; @@ -956,9 +983,12 @@ PipelineStage::processInstSchedule(DynInstPtr inst) // Switch On Cache Miss //===================== // Suspend Thread at end of cycle + DPRINTF(ThreadModel, "Suspending [tid:%i] due to cache miss.\n", tid); cpu->suspendContext(tid); // Activate Next Ready Thread at end of cycle + DPRINTF(ThreadModel, "Attempting to activate next ready thread due to" + " cache miss.\n"); cpu->activateNextReadyContext(); } diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index f10906e4c..dfe1ac7c3 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -235,6 +235,8 @@ class PipelineStage public: + virtual void activateThread(ThreadID tid); + /** Squashes if there is a PC-relative branch that was predicted * incorrectly. Sends squash information back to fetch. */ diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 3de5c518a..2cf6c3195 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -708,9 +708,12 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) if (cache_req->isMemStall() && cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { - DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n"); + DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n", tid); cpu->activateContext(tid); + + DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache" + "miss.\n", tid); } // Wake up the CPU (if it went to sleep and was waiting on this From aacc5cb205c17a91545a5d8209f5c4bda85543a9 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:27:49 -0500 Subject: [PATCH 16/36] inorder: add updatePC event to resPool this will be used for when a thread comes back from a cache miss, it needs to update the PCs because the inst might of been a branch or delayslot in which the next PC isnt always a straight addition --- src/cpu/inorder/pipeline_stage.cc | 7 ++++- src/cpu/inorder/resource.hh | 4 +++ src/cpu/inorder/resource_pool.cc | 34 +++++++++++++++++++++ src/cpu/inorder/resource_pool.hh | 4 +++ src/cpu/inorder/resources/cache_unit.cc | 6 ++-- src/cpu/inorder/resources/fetch_seq_unit.cc | 14 +++++++++ src/cpu/inorder/resources/fetch_seq_unit.hh | 2 ++ 7 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index ef91f206b..620951e34 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -571,10 +571,15 @@ PipelineStage::activateThread(ThreadID tid) DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n", tid, inst->seqNum, inst->readPC(), inst->threadNumber); + // Make instruction available for pipeline processing skidBuffer[tid].push(inst); - switchedOutBuffer[tid] = NULL; + // Update PC so that we start fetching after this instruction to prevent + // "double"-execution of instructions + cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)ResourcePool::UpdateAfterContextSwitch, inst, 0, 0, tid); + // Clear switchout buffer + switchedOutBuffer[tid] = NULL; switchedOutValid[tid] = false; } } diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index 4ae4db818..383340df2 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -96,6 +96,10 @@ class Resource { /** Resources that care about thread activation override this. */ virtual void suspendThread(ThreadID tid) { } + /** Will be called the cycle before a context switch. Any bookkeeping + * that needs to be kept for that, can be done here + */ + virtual void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) { } /** Resources that care when an instruction has been graduated * can override this diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 45a4a9e60..20f112a66 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -201,6 +201,9 @@ ResourcePool::slotsInUse(int res_idx) return resources[res_idx]->slotsInUse(); } +//@todo: split this function and call this version schedulePoolEvent +// and use this scheduleEvent for scheduling a specific event on +// a resource void ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, int delay, int res_idx, ThreadID tid) @@ -310,6 +313,20 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, } break; + case ResourcePool::UpdateAfterContextSwitch: + { + DPRINTF(Resource, "Scheduling UpdatePC Resource Pool Event for tick %i.\n", + curTick + delay); + ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type, + inst, + inst->squashingStage, + inst->seqNum, + inst->readTid()); + mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay)); + + } + break; + default: DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]); @@ -415,6 +432,19 @@ ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid) } } +void +ResourcePool::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) +{ + DPRINTF(Resource, "[tid:%i] Broadcasting Update PC to all resources.\n", + tid); + + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + resources[idx]->updateAfterContextSwitch(inst, tid); + } +} + ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool) : Event((Event::Priority)((unsigned)CPU_Tick_Pri+5)), resPool(_resPool), eventType((InOrderCPU::CPUEventType) Default) @@ -462,6 +492,10 @@ ResourcePool::ResPoolEvent::process() resPool->squashDueToMemStall(inst, stageNum, seqNum, tid); break; + case ResourcePool::UpdateAfterContextSwitch: + resPool->updateAfterContextSwitch(inst, tid); + break; + default: fatal("Unrecognized Event Type"); } diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index ae63c4c59..3f62d2caa 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -63,6 +63,7 @@ class ResourcePool { enum ResPoolEventType { InstGraduated = InOrderCPU::NumCPUEvents, SquashAll, + UpdateAfterContextSwitch, Default }; @@ -175,6 +176,9 @@ class ResourcePool { /** De-Activate Thread in all resources */ void suspendAll(ThreadID tid); + /** Broadcast Context Switch Update to all resources */ + void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid); + /** Broadcast graduation to all resources */ void instGraduated(InstSeqNum seq_num, ThreadID tid); diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 2cf6c3195..4f9ed3eca 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -49,14 +49,14 @@ using namespace ThePipeline; Tick CacheUnit::CachePort::recvAtomic(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvAtomic callback!"); + panic("CacheUnit::CachePort doesn't expect recvAtomic callback!"); return curTick; } void CacheUnit::CachePort::recvFunctional(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvFunctional callback!"); + panic("CacheUnit::CachePort doesn't expect recvFunctional callback!"); } void @@ -65,7 +65,7 @@ CacheUnit::CachePort::recvStatusChange(Status status) if (status == RangeChange) return; - panic("DefaultFetch doesn't expect recvStatusChange callback!"); + panic("CacheUnit::CachePort doesn't expect recvStatusChange callback!"); } bool diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index e0b9ea1f9..c217f972e 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -342,3 +342,17 @@ FetchSeqUnit::suspendThread(ThreadID tid) { deactivateThread(tid); } + +void +FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) +{ + pcValid[tid] = true; + + PC[tid] = inst->readNextPC(); + nextPC[tid] = inst->readNextNPC(); + nextNPC[tid] = inst->readNextNPC() + instSize; + + + DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PC:%08p NPC:%08p NNPC:%08p.\n", + tid, PC[tid], nextPC[tid], nextNPC[tid]); +} diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh index fdbc4521f..3283e0330 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.hh +++ b/src/cpu/inorder/resources/fetch_seq_unit.hh @@ -61,6 +61,8 @@ class FetchSeqUnit : public Resource { virtual void deactivateThread(ThreadID tid); virtual void suspendThread(ThreadID tid); virtual void execute(int slot_num); + void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid); + /** Override default Resource squash sequence. This actually, * looks in the global communication buffer to get squash From 069b38c0d546708491d0da84668ba32f82ca7cb8 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:27:58 -0500 Subject: [PATCH 17/36] inorder: track last branch committed when threads are switching in/out the CPU, we need to keep track of special cases like branches. Add appropriate variables in ThreadState t track this and then use these variables when updating pc after context switch --- src/cpu/inorder/cpu.cc | 27 +++++++++++++-------- src/cpu/inorder/pipeline_stage.cc | 9 ++++--- src/cpu/inorder/resources/fetch_seq_unit.cc | 26 ++++++++++++++------ src/cpu/inorder/thread_state.hh | 13 +++++++--- 4 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 501150386..8d41a18b4 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -709,7 +709,9 @@ InOrderCPU::activateThread(ThreadID tid) activeThreads.push_back(tid); activateThreadInPipeline(tid); - + + thread[tid]->lastActivate = curTick; + wakeCPU(); } } @@ -888,6 +890,7 @@ InOrderCPU::suspendThread(ThreadID tid) DPRINTF(InOrderCPU, "[tid: %i]: Placing on Suspended Threads List...\n", tid); deactivateThread(tid); suspendedThreads.push_back(tid); + thread[tid]->lastSuspend = curTick; } void @@ -1063,15 +1066,22 @@ void InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) { // Set the CPU's PCs - This contributes to the precise state of the CPU - // which can be used when restoring a thread to the CPU after a fork or - // after an exception - // ================= - // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if - // it's a branch or not + // which can be used when restoring a thread to the CPU after after any + // type of context switching activity (fork, exception, etc.) setPC(inst->readPC(), tid); setNextPC(inst->readNextPC(), tid); setNextNPC(inst->readNextNPC(), tid); + if (inst->isControl()) { + thread[tid]->lastGradIsBranch = true; + thread[tid]->lastBranchPC = inst->readPC(); + thread[tid]->lastBranchNextPC = inst->readNextPC(); + thread[tid]->lastBranchNextNPC = inst->readNextNPC(); + } else { + thread[tid]->lastGradIsBranch = false; + } + + // Finalize Trace Data For Instruction if (inst->traceData) { //inst->traceData->setCycle(curTick); @@ -1082,9 +1092,6 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) inst->traceData = NULL; } - // Set Last Graduated Instruction In Thread State - //thread[tid]->lastGradInst = inst; - // Increment thread-state's instruction count thread[tid]->numInst++; @@ -1108,7 +1115,7 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) // Broadcast to other resources an instruction // has been completed resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, - tid); + 0, 0, tid); // Finally, remove instruction from CPU removeInst(inst); diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 620951e34..55ee3ad12 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -568,15 +568,18 @@ PipelineStage::activateThread(ThreadID tid) } else { DynInstPtr inst = switchedOutBuffer[tid]; - DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n", - tid, inst->seqNum, inst->readPC(), inst->threadNumber); + DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into " + "stage skidBuffer %i\n", tid, inst->seqNum, + inst->readPC(), inst->threadNumber); // Make instruction available for pipeline processing skidBuffer[tid].push(inst); // Update PC so that we start fetching after this instruction to prevent // "double"-execution of instructions - cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)ResourcePool::UpdateAfterContextSwitch, inst, 0, 0, tid); + cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType) + ResourcePool::UpdateAfterContextSwitch, + inst, 0, 0, tid); // Clear switchout buffer switchedOutBuffer[tid] = NULL; diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index c217f972e..ba86a91f0 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -348,11 +348,23 @@ FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) { pcValid[tid] = true; - PC[tid] = inst->readNextPC(); - nextPC[tid] = inst->readNextNPC(); - nextNPC[tid] = inst->readNextNPC() + instSize; - - - DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PC:%08p NPC:%08p NNPC:%08p.\n", - tid, PC[tid], nextPC[tid], nextNPC[tid]); + if (cpu->thread[tid]->lastGradIsBranch) { + /** This function assumes that the instruction causing the context + * switch was right after the branch. Thus, if it's not, then + * we are updating incorrectly here + */ + assert(cpu->thread[tid]->lastBranchNextPC == inst->readPC()); + + PC[tid] = cpu->thread[tid]->lastBranchNextNPC; + nextPC[tid] = PC[tid] + instSize; + nextNPC[tid] = nextPC[tid] + instSize; + } else { + PC[tid] = inst->readNextPC(); + nextPC[tid] = inst->readNextNPC(); + nextNPC[tid] = inst->readNextNPC() + instSize; + } + + DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PCs due to Context Switch." + "Assigning PC:%08p NPC:%08p NNPC:%08p.\n", tid, PC[tid], + nextPC[tid], nextNPC[tid]); } diff --git a/src/cpu/inorder/thread_state.hh b/src/cpu/inorder/thread_state.hh index 422df30aa..0a171a99f 100644 --- a/src/cpu/inorder/thread_state.hh +++ b/src/cpu/inorder/thread_state.hh @@ -79,14 +79,14 @@ class InOrderThreadState : public ThreadState { #if FULL_SYSTEM InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num) : ThreadState(reinterpret_cast(_cpu), _thread_num), - cpu(_cpu), inSyscall(0), trapPending(0) + cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false) { } #else InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num, Process *_process) : ThreadState(reinterpret_cast(_cpu), _thread_num, _process), - cpu(_cpu), inSyscall(0), trapPending(0) + cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false) { } #endif @@ -105,10 +105,15 @@ class InOrderThreadState : public ThreadState { /** Returns a pointer to the TC of this thread. */ ThreadContext *getTC() { return tc; } + /** Return the thread id */ int readTid() { return threadId(); } - /** Pointer to the last graduated instruction in the thread */ - //DynInstPtr lastGradInst; + + /** Is last instruction graduated a branch? */ + bool lastGradIsBranch; + Addr lastBranchPC; + Addr lastBranchNextPC; + Addr lastBranchNextNPC; }; #endif // __CPU_INORDER_THREAD_STATE_HH__ From 5e0b8337ed9c8aa975cd44df5565c2c3dde0c267 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:28:05 -0500 Subject: [PATCH 18/36] inorder: add/remove halt/deallocate context respectively Halt is called from the exit() system call while deallocate is unused. So to clear up things, just use halt and remove deallocate. --- src/cpu/inorder/cpu.cc | 65 +++++++++++--------------------- src/cpu/inorder/cpu.hh | 15 +++++--- src/cpu/inorder/resource_pool.cc | 7 +++- 3 files changed, 38 insertions(+), 49 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 8d41a18b4..5db86b258 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -98,7 +98,7 @@ std::string InOrderCPU::eventNames[NumCPUEvents] = "ActivateThread", "ActivateNextReadyThread", "DeactivateThread", - "DeallocateThread", + "HaltThread", "SuspendThread", "Trap", "InstGraduated", @@ -123,8 +123,8 @@ InOrderCPU::CPUEvent::process() cpu->deactivateThread(tid); break; - case DeallocateThread: - cpu->deallocateThread(tid); + case HaltThread: + cpu->haltThread(tid); break; case SuspendThread: @@ -140,8 +140,7 @@ InOrderCPU::CPUEvent::process() break; default: - fatal("Unrecognized Event Type %d", cpuEventType); - + fatal("Unrecognized Event Type %s", eventNames[cpuEventType]); } cpu->cpuEventRemoveList.push(this); @@ -759,40 +758,6 @@ InOrderCPU::deactivateThread(ThreadID tid) assert(!isThreadActive(tid)); } -void -InOrderCPU::deallocateContext(ThreadID tid, int delay) -{ - DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...\n", tid); - - scheduleCpuEvent(DeallocateThread, NoFault, tid, dummyInst, delay); - - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - - _status = Running; -} - -void -InOrderCPU::deallocateThread(ThreadID tid) -{ - DPRINTF(InOrderCPU, "[tid:%i]: Calling deallocate thread.\n", tid); - - if (isThreadActive(tid)) { - DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n", - tid); - list::iterator thread_it = - std::find(activeThreads.begin(), activeThreads.end(), tid); - - removePipelineStalls(*thread_it); - - activeThreads.erase(thread_it); - } - - // TODO: "Un"Load/Unmap register file state - -} - void InOrderCPU::removePipelineStalls(ThreadID tid) { @@ -874,20 +839,36 @@ InOrderCPU::activateNextReadyContext(int delay) void InOrderCPU::haltContext(ThreadID tid, int delay) { - suspendContext(tid, delay); + DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid); + + scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst, delay); + + activityRec.activity(); +} + +void +InOrderCPU::haltThread(ThreadID tid) +{ + DPRINTF(InOrderCPU, "[tid:%i]: Placing on Halted Threads List...\n", tid); + deactivateThread(tid); + squashThreadInPipeline(tid); + haltedThreads.push_back(tid); + + if (threadModel == SwitchOnCacheMiss) { + activateNextReadyContext(); + } } void InOrderCPU::suspendContext(ThreadID tid, int delay) { scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst, delay); - //_status = Idle; } void InOrderCPU::suspendThread(ThreadID tid) { - DPRINTF(InOrderCPU, "[tid: %i]: Placing on Suspended Threads List...\n", tid); + DPRINTF(InOrderCPU, "[tid:%i]: Placing on Suspended Threads List...\n", tid); deactivateThread(tid); suspendedThreads.push_back(tid); thread[tid]->lastSuspend = curTick; diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 1e514e1ed..70013c0f5 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -177,7 +177,7 @@ class InOrderCPU : public BaseCPU ActivateThread, ActivateNextReadyThread, DeactivateThread, - DeallocateThread, + HaltThread, SuspendThread, Trap, InstGraduated, @@ -357,16 +357,18 @@ class InOrderCPU : public BaseCPU void deactivateThread(ThreadID tid); /** Suspend Thread, Remove from Active Threads List, Add to Suspend List */ - void haltContext(ThreadID tid, int delay = 0); void suspendContext(ThreadID tid, int delay = 0); void suspendThread(ThreadID tid); - /** Remove Thread from Active Threads List, Remove Any Loaded Thread State */ - void deallocateContext(ThreadID tid, int delay = 0); - void deallocateThread(ThreadID tid); + /** Halt Thread, Remove from Active Thread List, Place Thread on Halted + * Threads List + */ + void haltContext(ThreadID tid, int delay = 0); + void haltThread(ThreadID tid); /** squashFromMemStall() - sets up a squash event * squashDueToMemStall() - squashes pipeline + * @note: maybe squashContext/squashThread would be better? */ void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0); void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid); @@ -587,6 +589,9 @@ class InOrderCPU : public BaseCPU /** Suspended Threads List */ std::list suspendedThreads; + /** Halted Threads List */ + std::list haltedThreads; + /** Thread Status Functions */ bool isThreadActive(ThreadID tid); bool isThreadReady(ThreadID tid); diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 20f112a66..3750d18d6 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -204,6 +204,9 @@ ResourcePool::slotsInUse(int res_idx) //@todo: split this function and call this version schedulePoolEvent // and use this scheduleEvent for scheduling a specific event on // a resource +//@todo: For arguments that arent being used in a ResPoolEvent, a dummyParam +// or some typedef can be used to signify what's important info +// to the event construction void ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, int delay, int res_idx, ThreadID tid) @@ -229,8 +232,8 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst, } break; + case InOrderCPU::HaltThread: case InOrderCPU::DeactivateThread: - case InOrderCPU::DeallocateThread: { DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool " @@ -472,7 +475,7 @@ ResourcePool::ResPoolEvent::process() break; case InOrderCPU::DeactivateThread: - case InOrderCPU::DeallocateThread: + case InOrderCPU::HaltThread: resPool->deactivateAll(tid); break; From b4e0ef78379dd5bab0ee6ec824bca3f51dd484c6 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:28:12 -0500 Subject: [PATCH 19/36] inorder: set thread status' set Active/Suspended/Halted status for threads. useful for system when determining if/when to exit simulation --- src/cpu/inorder/cpu.cc | 12 ++++++++++-- src/cpu/inorder/thread_context.hh | 1 - 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 5db86b258..d8fea79d9 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -711,6 +711,8 @@ InOrderCPU::activateThread(ThreadID tid) thread[tid]->lastActivate = curTick; + tcBase(tid)->setStatus(ThreadContext::Active); + wakeCPU(); } } @@ -750,9 +752,11 @@ InOrderCPU::deactivateThread(ThreadID tid) removePipelineStalls(*thread_it); - //@TODO: change stage status' to Idle? - activeThreads.erase(thread_it); + + // Ideally, this should be triggered from the + // suspendContext/Thread functions + tcBase(tid)->setStatus(ThreadContext::Suspended); } assert(!isThreadActive(tid)); @@ -854,6 +858,8 @@ InOrderCPU::haltThread(ThreadID tid) squashThreadInPipeline(tid); haltedThreads.push_back(tid); + tcBase(tid)->setStatus(ThreadContext::Halted); + if (threadModel == SwitchOnCacheMiss) { activateNextReadyContext(); } @@ -872,6 +878,8 @@ InOrderCPU::suspendThread(ThreadID tid) deactivateThread(tid); suspendedThreads.push_back(tid); thread[tid]->lastSuspend = curTick; + + tcBase(tid)->setStatus(ThreadContext::Suspended); } void diff --git a/src/cpu/inorder/thread_context.hh b/src/cpu/inorder/thread_context.hh index 820f3077f..6dd5f192f 100644 --- a/src/cpu/inorder/thread_context.hh +++ b/src/cpu/inorder/thread_context.hh @@ -64,7 +64,6 @@ class InOrderThreadContext : public ThreadContext /** Pointer to the thread state that this TC corrseponds to. */ InOrderThreadState *thread; - /** Returns a pointer to the ITB. */ /** @TODO: PERF: Should we bind this to a pointer in constructor? */ TheISA::TLB *getITBPtr() { return cpu->getITBPtr(); } From 4d749472e3cb97ff0421fbf5cbc53d9c89ecfa45 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:28:31 -0500 Subject: [PATCH 20/36] inorder: enforce stage bandwidth each stage keeps track of insts_processed on a per_thread basis but we should be keeping that on a total basis inorder to enforce stage width limits --- src/cpu/inorder/first_stage.cc | 11 ++++++++--- src/cpu/inorder/pipeline_stage.cc | 22 ++++++++++++++-------- src/cpu/inorder/pipeline_stage.hh | 7 ++++++- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc index 75e13e559..27831469e 100644 --- a/src/cpu/inorder/first_stage.cc +++ b/src/cpu/inorder/first_stage.cc @@ -175,9 +175,14 @@ FirstStage::processInsts(ThreadID tid) ThePipeline::createFrontEndSchedule(inst); } - // Don't let instruction pass to next stage if it hasnt completed - // all of it's requests for this stage. - all_reqs_completed = processInstSchedule(inst); + int reqs_processed = 0; + all_reqs_completed = processInstSchedule(inst, reqs_processed); + + // If the instruction isnt squashed & we've completed one request + // Then we can officially count this instruction toward the stage's + // bandwidth count + if (reqs_processed > 0) + instsProcessed++; if (!all_reqs_completed) { if (new_inst) { diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 55ee3ad12..79f1ff915 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -726,9 +726,11 @@ PipelineStage::tick() nextStage->size = 0; toNextStageIndex = 0; - + sortInsts(); + instsProcessed = 0; + processStage(status_change); if (status_change) { @@ -873,10 +875,8 @@ PipelineStage::processInsts(ThreadID tid) DynInstPtr inst; bool last_req_completed = true; - int insts_processed = 0; - while (insts_available > 0 && - insts_processed < stageWidth && + instsProcessed < stageWidth && (!nextStageValid || canSendInstToStage(stageNum+1)) && last_req_completed) { assert(!insts_to_stage.empty()); @@ -901,8 +901,14 @@ PipelineStage::processInsts(ThreadID tid) continue; } + int reqs_processed = 0; + last_req_completed = processInstSchedule(inst, reqs_processed); - last_req_completed = processInstSchedule(inst); + // If the instruction isnt squashed & we've completed one request + // Then we can officially count this instruction toward the stage's + // bandwidth count + if (reqs_processed > 0) + instsProcessed++; // Don't let instruction pass to next stage if it hasnt completed // all of it's requests for this stage. @@ -916,8 +922,6 @@ PipelineStage::processInsts(ThreadID tid) break; } - insts_processed++; - insts_to_stage.pop(); //++stageProcessedInsts; @@ -938,7 +942,7 @@ PipelineStage::processInsts(ThreadID tid) } bool -PipelineStage::processInstSchedule(DynInstPtr inst) +PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) { bool last_req_completed = true; ThreadID tid = inst->readTid(); @@ -966,6 +970,8 @@ PipelineStage::processInstSchedule(DynInstPtr inst) panic("%i: encountered %s fault!\n", curTick, req->fault->name()); } + + reqs_processed++; } else { DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed." "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index dfe1ac7c3..920734e6a 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -178,7 +178,7 @@ class PipelineStage virtual void processInsts(ThreadID tid); /** Process all resources on an instruction's resource schedule */ - virtual bool processInstSchedule(DynInstPtr inst); + virtual bool processInstSchedule(DynInstPtr inst, int &reqs_processed); /** Is there room in the next stage buffer for this instruction? */ virtual bool canSendInstToStage(unsigned stage_num); @@ -270,6 +270,11 @@ class PipelineStage std::vector switchedOutBuffer; std::vector switchedOutValid; + /** Instructions that we've processed this tick + * NOTE: "Processed" means completed at least 1 instruction request + */ + unsigned instsProcessed; + /** Queue of all instructions coming from previous stage on this cycle. */ std::queue insts[ThePipeline::MaxThreads]; From ffa9ecb1fa71f1fe89a65975b2c558e312bbfbc8 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:28:51 -0500 Subject: [PATCH 21/36] inorder: pipeline stage stats add idle/run/utilization stats for each pipeline stage --- src/cpu/inorder/cpu.cc | 15 +++++++- src/cpu/inorder/first_stage.cc | 14 +++++-- src/cpu/inorder/pipeline_stage.cc | 61 +++++++++++-------------------- src/cpu/inorder/pipeline_stage.hh | 27 ++++++-------- 4 files changed, 55 insertions(+), 62 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index d8fea79d9..4cc9b9f22 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -346,6 +346,11 @@ InOrderCPU::regStats() .prereq(maxResReqCount); #endif + /* Register for each Pipeline Stage */ + for (int stage_num=0; stage_num < ThePipeline::NumStages; stage_num++) { + pipelineStage[stage_num]->regStats(); + } + /* Register any of the InOrderCPU's stats here.*/ timesIdled .name(name() + ".timesIdled") @@ -1289,8 +1294,14 @@ InOrderCPU::wakeCPU() DPRINTF(Activity, "Waking up CPU\n"); - //@todo: figure out how to count idleCycles correctly - //idleCycles += (curTick - 1) - lastRunningCycle; + Tick extra_cycles = tickToCycles((curTick - 1) - lastRunningCycle); + + idleCycles += extra_cycles; + for (int stage_num = 0; stage_num < NumStages; stage_num++) { + pipelineStage[stage_num]->idleCycles += extra_cycles; + } + + numCycles += extra_cycles; mainEventQueue.schedule(&tickEvent, curTick); } diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc index 27831469e..c653d152b 100644 --- a/src/cpu/inorder/first_stage.cc +++ b/src/cpu/inorder/first_stage.cc @@ -118,9 +118,9 @@ FirstStage::processStage(bool &status_change) status_change = checkSignalsAndUpdate(tid) || status_change; } - for (int threadFetched = 0; threadFetched < numFetchingThreads; - threadFetched++) { - + for (int insts_fetched = 0; + insts_fetched < stageWidth && canSendInstToStage(1); + insts_fetched++) { ThreadID tid = getFetchingThread(fetchPolicy); if (tid >= 0) { @@ -130,6 +130,13 @@ FirstStage::processStage(bool &status_change) DPRINTF(InOrderStage, "No more threads to fetch from.\n"); } } + + if (instsProcessed > 0) { + ++runCycles; + } else { + ++idleCycles; + } + } //@TODO: Note in documentation, that when you make a pipeline stage change, @@ -197,7 +204,6 @@ FirstStage::processInsts(ThreadID tid) } sendInstToNextStage(inst); - //++stageProcessedInsts; } // Record that stage has written to the time buffer for activity diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 79f1ff915..e601edfcc 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -72,41 +72,27 @@ PipelineStage::init(Params *params) std::string PipelineStage::name() const { - return cpu->name() + ".stage-" + to_string(stageNum); + return cpu->name() + ".stage-" + to_string(stageNum); } void PipelineStage::regStats() { -/* stageIdleCycles - .name(name() + ".IdleCycles") - .desc("Number of cycles stage is idle") - .prereq(stageIdleCycles); - stageBlockedCycles - .name(name() + ".BlockedCycles") - .desc("Number of cycles stage is blocked") - .prereq(stageBlockedCycles); - stageRunCycles - .name(name() + ".RunCycles") - .desc("Number of cycles stage is running") - .prereq(stageRunCycles); - stageUnblockCycles - .name(name() + ".UnblockCycles") - .desc("Number of cycles stage is unblocking") - .prereq(stageUnblockCycles); - stageSquashCycles - .name(name() + ".SquashCycles") - .desc("Number of cycles stage is squashing") - .prereq(stageSquashCycles); - stageProcessedInsts - .name(name() + ".ProcessedInsts") - .desc("Number of instructions handled by stage") - .prereq(stageProcessedInsts); - stageSquashedInsts - .name(name() + ".SquashedInsts") - .desc("Number of squashed instructions handled by stage") - .prereq(stageSquashedInsts);*/ + idleCycles + .name(name() + ".idleCycles") + .desc("Number of cycles 0 instructions are processed."); + + runCycles + .name(name() + ".runCycles") + .desc("Number of cycles 1+ instructions are processed."); + + utilization + .name(name() + ".utilization") + .desc("Percentage of cycles stage was utilized (processing insts).") + .precision(6); + utilization = (runCycles / cpu->numCycles) * 100; + } @@ -803,6 +789,12 @@ PipelineStage::processStage(bool &status_change) nextStage->size, stageNum + 1); } + if (instsProcessed > 0) { + ++runCycles; + } else { + ++idleCycles; + } + DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(), stageNum); @@ -820,12 +812,6 @@ PipelineStage::processThread(bool &status_change, ThreadID tid) // continue trying to empty skid buffer // check if stall conditions have passed - if (stageStatus[tid] == Blocked) { - ;//++stageBlockedCycles; - } else if (stageStatus[tid] == Squashing) { - ;//++stageSquashCycles; - } - // Stage should try to process as many instructions as its bandwidth // will allow, as long as it is not currently blocked. if (stageStatus[tid] == Running || @@ -867,8 +853,6 @@ PipelineStage::processInsts(ThreadID tid) if (insts_available == 0) { DPRINTF(InOrderStage, "[tid:%u]: Nothing to do, breaking out" " early.\n",tid); - // Should I change the status to idle? - //++stageIdleCycles; return; } @@ -892,8 +876,6 @@ PipelineStage::processInsts(ThreadID tid) "squashed, skipping.\n", tid, inst->seqNum, inst->readPC()); - //++stageSquashedInsts; - insts_to_stage.pop(); --insts_available; @@ -924,7 +906,6 @@ PipelineStage::processInsts(ThreadID tid) insts_to_stage.pop(); - //++stageProcessedInsts; --insts_available; } diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index 920734e6a..be3a1093c 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -353,24 +353,19 @@ class PipelineStage std::vector resources; }; - /** Tracks which stages are telling decode to stall. */ + /** Tracks stage/resource stalls */ Stalls stalls[ThePipeline::MaxThreads]; - //@TODO: Use Stats for the pipeline stages - /** Stat for total number of idle cycles. */ - //Stats::Scalar stageIdleCycles; - /** Stat for total number of blocked cycles. */ - //Stats::Scalar stageBlockedCycles; - /** Stat for total number of normal running cycles. */ - //Stats::Scalar stageRunCycles; - /** Stat for total number of unblocking cycles. */ - //Stats::Scalar stageUnblockCycles; - /** Stat for total number of squashing cycles. */ - //Stats::Scalar stageSquashCycles; - /** Stat for total number of staged instructions. */ - //Stats::Scalar stageProcessedInsts; - /** Stat for total number of squashed instructions. */ - //Stats::Scalar stageSquashedInsts; + /** Number of cycles 0 instruction(s) are processed. */ + Stats::Scalar idleCycles; + + /** Number of cycles 1+ instructions are processed. */ + Stats::Scalar runCycles; + + /** Percentage of cycles 1+ instructions are processed. */ + Stats::Formula utilization; + + }; #endif From 0b29c2d057d2d6f4f8b9b7853da91bcb283e805c Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:28:59 -0500 Subject: [PATCH 22/36] inorder: ctxt switch stats - m5 line enforcement on use_def.cc,hh --- src/cpu/inorder/cpu.cc | 26 ++- src/cpu/inorder/cpu.hh | 5 + src/cpu/inorder/pipeline_stage.cc | 3 + src/cpu/inorder/resources/graduation_unit.hh | 2 - src/cpu/inorder/resources/use_def.cc | 212 +++++++++++++------ src/cpu/inorder/resources/use_def.hh | 12 +- 6 files changed, 194 insertions(+), 66 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 4cc9b9f22..b69fe2e3b 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -189,7 +189,8 @@ InOrderCPU::InOrderCPU(Params *params) #endif // DEBUG switchCount(0), deferRegistration(false/*params->deferRegistration*/), - stageTracing(params->stageTracing) + stageTracing(params->stageTracing), + instsPerSwitch(0) { ThreadID active_threads; cpu_params = params; @@ -352,6 +353,15 @@ InOrderCPU::regStats() } /* Register any of the InOrderCPU's stats here.*/ + instsPerCtxtSwitch + .name(name() + ".instsPerContextSwitch") + .desc("Instructions Committed Per Context Switch") + .prereq(instsPerCtxtSwitch); + + numCtxtSwitches + .name(name() + ".contextSwitches") + .desc("Number of context switches"); + timesIdled .name(name() + ".timesIdled") .desc("Number of times that the entire CPU went into an idle state and" @@ -719,6 +729,8 @@ InOrderCPU::activateThread(ThreadID tid) tcBase(tid)->setStatus(ThreadContext::Active); wakeCPU(); + + numCtxtSwitches++; } } @@ -1056,6 +1068,15 @@ InOrderCPU::addInst(DynInstPtr &inst) return --(instList[tid].end()); } +void +InOrderCPU::updateContextSwitchStats() +{ + // Set Average Stat Here, then reset to 0 + instsPerCtxtSwitch = instsPerSwitch; + instsPerSwitch = 0; +} + + void InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) { @@ -1086,6 +1107,9 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) inst->traceData = NULL; } + // Increment active thread's instruction count + instsPerSwitch++; + // Increment thread-state's instruction count thread[tid]->numInst++; diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 70013c0f5..6f1f3ee3f 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -707,6 +707,11 @@ class InOrderCPU : public BaseCPU /** The cycle that the CPU was last running, used for statistics. */ Tick lastRunningCycle; + void updateContextSwitchStats(); + unsigned instsPerSwitch; + Stats::Average instsPerCtxtSwitch; + Stats::Scalar numCtxtSwitches; + /** Update Thread , used for statistic purposes*/ inline void tickThreadStats(); diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index e601edfcc..550952947 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -570,6 +570,9 @@ PipelineStage::activateThread(ThreadID tid) // Clear switchout buffer switchedOutBuffer[tid] = NULL; switchedOutValid[tid] = false; + + // Update any CPU stats based off context switches + cpu->updateContextSwitchStats(); } } diff --git a/src/cpu/inorder/resources/graduation_unit.hh b/src/cpu/inorder/resources/graduation_unit.hh index ad222b119..7f0db98d0 100644 --- a/src/cpu/inorder/resources/graduation_unit.hh +++ b/src/cpu/inorder/resources/graduation_unit.hh @@ -63,8 +63,6 @@ class GraduationUnit : public Resource { bool *nonSpecInstActive[ThePipeline::MaxThreads]; InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads]; - - /** @todo: Add Resource Stats Here */ }; #endif //__CPU_INORDER_GRAD_UNIT_HH__ diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index 36392d054..a4f3a0d21 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -59,6 +59,17 @@ UseDefUnit::UseDefUnit(string res_name, int res_id, int res_width, } +void +UseDefUnit::regStats() +{ + uniqueRegsPerSwitch + .name(name() + ".uniqueRegsPerSwitch") + .desc("Number of Unique Registers Needed Per Context Switch") + .prereq(uniqueRegsPerSwitch); + + Resource::regStats(); +} + ResReqPtr UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) @@ -75,7 +86,8 @@ UseDefUnit::findRequest(DynInstPtr inst) map::iterator map_end = reqMap.end(); while (map_it != map_end) { - UseDefRequest* ud_req = dynamic_cast((*map_it).second); + UseDefRequest* ud_req = + dynamic_cast((*map_it).second); assert(ud_req); if (ud_req && @@ -107,9 +119,9 @@ UseDefUnit::execute(int slot_idx) // in the pipeline then stall instructions here if (*nonSpecInstActive[tid] == true && seq_num > *nonSpecSeqNum[tid]) { - DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because there is " - "non-speculative instruction [sn:%i] has not graduated.\n", - tid, seq_num, *nonSpecSeqNum[tid]); + DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because" + "there is non-speculative instruction [sn:%i] has not " + "graduated.\n", tid, seq_num, *nonSpecSeqNum[tid]); return; } else if (inst->isNonSpeculative()) { *nonSpecInstActive[tid] = true; @@ -121,89 +133,129 @@ UseDefUnit::execute(int slot_idx) case ReadSrcReg: { int reg_idx = inst->_srcRegIdx[ud_idx]; - - DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source register idx %i (reg #%i).\n", + + DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source " + "register idx %i (reg #%i).\n", tid, ud_idx, reg_idx); - // Ask register dependency map if it is OK to read from Arch. Reg. File + // Ask register dependency map if it is OK to read from Arch. + // Reg. File if (regDepMap[tid]->canRead(reg_idx, inst)) { + + uniqueRegMap[reg_idx] = true; + if (inst->seqNum <= outReadSeqNum[tid]) { if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i from Register File:%i.\n", - tid, reg_idx, cpu->readIntReg(reg_idx,inst->readTid())); + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i" + "from Register File:%i.\n", + tid, + reg_idx, + cpu->readIntReg(reg_idx,inst->readTid())); inst->setIntSrc(ud_idx, - cpu->readIntReg(reg_idx,inst->readTid())); + cpu->readIntReg(reg_idx, + inst->readTid())); } else if (reg_idx < Ctrl_Base_DepTag) { reg_idx -= FP_Base_DepTag; - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i from Register File:%x (%08f).\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i" + "from Register File:%x (%08f).\n", tid, reg_idx, - cpu->readFloatRegBits(reg_idx, inst->readTid()), - cpu->readFloatReg(reg_idx, inst->readTid())); + cpu->readFloatRegBits(reg_idx, + inst->readTid()), + cpu->readFloatReg(reg_idx, + inst->readTid())); inst->setFloatSrc(ud_idx, - cpu->readFloatReg(reg_idx, inst->readTid())); + cpu->readFloatReg(reg_idx, + inst->readTid())); } else { reg_idx -= Ctrl_Base_DepTag; - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i from Register File:%i.\n", - tid, reg_idx, cpu->readMiscReg(reg_idx, inst->readTid())); + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i " + "from Register File:%i.\n", + tid, + reg_idx, + cpu->readMiscReg(reg_idx, + inst->readTid())); inst->setIntSrc(ud_idx, - cpu->readMiscReg(reg_idx, inst->readTid())); + cpu->readMiscReg(reg_idx, + inst->readTid())); } outReadSeqNum[tid] = maxSeqNum; ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's" - " registers yet.\n", tid, outReadSeqNum[tid]); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to write\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because " + "of [sn:%i] hasnt read it's registers yet.\n", + tid, outReadSeqNum[tid]); + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to write\n", tid, outReadSeqNum[tid]); } } else { // Look for forwarding opportunities - DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, ud_idx, inst); + DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, + ud_idx, + inst); if (forward_inst) { if (inst->seqNum <= outReadSeqNum[tid]) { - int dest_reg_idx = forward_inst->getDestIdxNum(reg_idx); + int dest_reg_idx = + forward_inst->getDestIdxNum(reg_idx); if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readIntResult(dest_reg_idx) , - forward_inst->seqNum, inst->seqNum, ud_idx); - inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx)); + tid, + forward_inst->readIntResult(dest_reg_idx), + forward_inst->seqNum, + inst->seqNum, ud_idx); + inst->setIntSrc(ud_idx, + forward_inst-> + readIntResult(dest_reg_idx)); } else if (reg_idx < Ctrl_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readFloatResult(dest_reg_idx) , + tid, + forward_inst->readFloatResult(dest_reg_idx), forward_inst->seqNum, inst->seqNum, ud_idx); inst->setFloatSrc(ud_idx, - forward_inst->readFloatResult(dest_reg_idx)); + forward_inst-> + readFloatResult(dest_reg_idx)); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readIntResult(dest_reg_idx) , - forward_inst->seqNum, inst->seqNum, ud_idx); - inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx)); + tid, + forward_inst->readIntResult(dest_reg_idx), + forward_inst->seqNum, + inst->seqNum, ud_idx); + inst->setIntSrc(ud_idx, + forward_inst-> + readIntResult(dest_reg_idx)); } outReadSeqNum[tid] = maxSeqNum; ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's" + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read " + "because of [sn:%i] hasnt read it's" " registers yet.\n", tid, outReadSeqNum[tid]); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to forward\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to forward\n", tid, outReadSeqNum[tid]); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i is not ready to read.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i" + "is not ready to read.\n", tid, reg_idx); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read register (idx=%i)\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read " + "register (idx=%i)\n", tid, reg_idx); outReadSeqNum[tid] = inst->seqNum; } @@ -216,12 +268,14 @@ UseDefUnit::execute(int slot_idx) int reg_idx = inst->_destRegIdx[ud_idx]; if (regDepMap[tid]->canWrite(reg_idx, inst)) { - DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i & Attempting to write to Register File.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i &" + "Attempting to write to Register File.\n", tid, reg_idx); - + uniqueRegMap[reg_idx] = true; if (inst->seqNum <= outReadSeqNum[tid]) { if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result 0x%x to register idx %i.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result " + "0x%x to register idx %i.\n", tid, inst->readIntResult(ud_idx), reg_idx); // Remove Dependencies @@ -236,33 +290,54 @@ UseDefUnit::execute(int slot_idx) reg_idx -= FP_Base_DepTag; - if (inst->resultType(ud_idx) == InOrderDynInst::Integer) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); + if (inst->resultType(ud_idx) == + InOrderDynInst::Integer) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, + inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); - cpu->setFloatRegBits(reg_idx, // Check for FloatRegBits Here + // Check for FloatRegBits Here + cpu->setFloatRegBits(reg_idx, inst->readIntResult(ud_idx), inst->readTid()); - } else if (inst->resultType(ud_idx) == InOrderDynInst::Float) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); + } else if (inst->resultType(ud_idx) == + InOrderDynInst::Float) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); cpu->setFloatReg(reg_idx, inst->readFloatResult(ud_idx), inst->readTid()); - } else if (inst->resultType(ud_idx) == InOrderDynInst::Double) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); + } else if (inst->resultType(ud_idx) == + InOrderDynInst::Double) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, + inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); - cpu->setFloatReg(reg_idx, // Check for FloatRegBits Here + // Check for FloatRegBits Here + cpu->setFloatReg(reg_idx, inst->readFloatResult(ud_idx), inst->readTid()); } else { - panic("Result Type Not Set For [sn:%i] %s.\n", inst->seqNum, inst->instName()); + panic("Result Type Not Set For [sn:%i] %s.\n", + inst->seqNum, inst->instName()); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x to register idx %i.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x " + "to register idx %i.\n", tid, inst->readIntResult(ud_idx), reg_idx); // Remove Dependencies @@ -279,15 +354,19 @@ UseDefUnit::execute(int slot_idx) ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because of [sn:%i] hasnt read it's" + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because " + "of [sn:%i] hasnt read it's" " registers yet.\n", tid, outReadSeqNum); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to read\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to read\n", tid, outReadSeqNum); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is not ready to write.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is " + "not ready to write.\n", tid, reg_idx); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write register (idx=%i)\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write " + "register (idx=%i)\n", tid, reg_idx); outWriteSeqNum[tid] = inst->seqNum; } @@ -343,18 +422,29 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, } if (outReadSeqNum[tid] >= squash_seq_num) { - DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", tid); + DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", + tid); outReadSeqNum[tid] = maxSeqNum; } else if (outReadSeqNum[tid] != maxSeqNum) { - DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read Seq Num %i\n", + DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read " + "Seq Num %i\n", tid, outReadSeqNum[tid]); } if (outWriteSeqNum[tid] >= squash_seq_num) { - DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", tid); + DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", + tid); outWriteSeqNum[tid] = maxSeqNum; } else if (outWriteSeqNum[tid] != maxSeqNum) { - DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write Seq Num %i\n", + DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write " + "Seq Num %i\n", tid, outWriteSeqNum[tid]); } } + +void +UseDefUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) +{ + uniqueRegsPerSwitch = uniqueRegMap.size(); + uniqueRegMap.clear(); +} diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh index 6c76d8ab5..41d758dd7 100644 --- a/src/cpu/inorder/resources/use_def.hh +++ b/src/cpu/inorder/resources/use_def.hh @@ -68,8 +68,12 @@ class UseDefUnit : public Resource { virtual void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid); + const InstSeqNum maxSeqNum; + void regStats(); + protected: RegDepMap *regDepMap[ThePipeline::MaxThreads]; @@ -84,14 +88,18 @@ class UseDefUnit : public Resource { InstSeqNum floatRegSize[ThePipeline::MaxThreads]; + Stats::Average uniqueRegsPerSwitch; + std::map uniqueRegMap; + public: class UseDefRequest : public ResourceRequest { public: typedef ThePipeline::DynInstPtr DynInstPtr; public: - UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, int res_idx, - int slot_num, unsigned cmd, int use_def_idx) + UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, + int res_idx, int slot_num, unsigned cmd, + int use_def_idx) : ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd), useDefIdx(use_def_idx) { } From 349d86c0e4afb02962c9899bd5a3887ff2c55626 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:29:06 -0500 Subject: [PATCH 23/36] inorder-stats: add prereq to basic stat only show requests processed when the resource is actually in use --- src/cpu/inorder/resource.cc | 3 ++- src/cpu/inorder/resources/inst_buffer.cc | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 47a9a4b9a..1fd28c939 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -81,7 +81,8 @@ Resource::regStats() instReqsProcessed .name(name() + ".instReqsProcessed") .desc("Number of Instructions Requests that completed in " - "this resource."); + "this resource.") + .prereq(instReqsProcessed); } int diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc index bb308b0ea..17b308db0 100644 --- a/src/cpu/inorder/resources/inst_buffer.cc +++ b/src/cpu/inorder/resources/inst_buffer.cc @@ -52,7 +52,8 @@ InstBuffer::regStats() { instsBypassed .name(name() + ".instsBypassed") - .desc("Number of Instructions Bypassed."); + .desc("Number of Instructions Bypassed.") + .prereq(instsBypassed); Resource::regStats(); } From 82c5a754e684af6522f339ab30d2c661ee9c220c Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:29:18 -0500 Subject: [PATCH 24/36] inorder: recvRetry bug fix - on certain retry requests you can get an assertion failure - fix by allowing the request to literally "Retry" itself if it wasnt successful before, and then block any requests through cache port while waiting for the cache to be made available for access --- src/cpu/inorder/resources/cache_unit.cc | 53 +++++++------------------ src/cpu/inorder/resources/cache_unit.hh | 16 +------- 2 files changed, 16 insertions(+), 53 deletions(-) diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 4f9ed3eca..65782cb73 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -84,8 +84,7 @@ CacheUnit::CachePort::recvRetry() CacheUnit::CacheUnit(string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - retryPkt(NULL), retrySlot(-1), cacheBlocked(false), - predecoder(NULL) + cachePortBlocked(false), predecoder(NULL) { cachePort = new CachePort(this); @@ -351,8 +350,8 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, void CacheUnit::execute(int slot_num) { - if (cacheBlocked) { - DPRINTF(InOrderCachePort, "Cache Blocked. Cannot Access\n"); + if (cachePortBlocked) { + DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); return; } @@ -470,8 +469,7 @@ CacheUnit::prefetch(DynInstPtr inst) // Clean-Up cache resource request so // other memory insts. can use them cache_req->setCompleted(); - cacheStatus = cacheAccessComplete; - cacheBlocked = false; + cachePortBlocked = false; cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); inst->unsetMemAddr(); @@ -490,8 +488,7 @@ CacheUnit::writeHint(DynInstPtr inst) // Clean-Up cache resource request so // other memory insts. can use them cache_req->setCompleted(); - cacheStatus = cacheAccessComplete; - cacheBlocked = false; + cachePortBlocked = false; cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); inst->unsetMemAddr(); @@ -555,28 +552,18 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) if (do_access) { if (!cachePort->sendTiming(cache_req->dataPkt)) { DPRINTF(InOrderCachePort, - "[tid:%i] [sn:%i] is waiting to retry request\n", - tid, inst->seqNum); - - retrySlot = cache_req->getSlot(); - retryReq = cache_req; - retryPkt = cache_req->dataPkt; - - cacheStatus = cacheWaitRetry; - - //cacheBlocked = true; - - DPRINTF(InOrderStall, "STALL: \n"); - + "[tid:%i] [sn:%i] cannot access cache, because port " + "is blocked. now waiting to retry request\n", tid, + inst->seqNum); cache_req->setCompleted(false); + cachePortBlocked = true; } else { DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] is now waiting for cache response\n", tid, inst->seqNum); cache_req->setCompleted(); cache_req->setMemAccPending(); - cacheStatus = cacheWaitResponse; - cacheBlocked = false; + cachePortBlocked = false; } } else if (!do_access && memReq->isLLSC()){ // Store-Conditional instructions complete even if they "failed" @@ -737,22 +724,12 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) void CacheUnit::recvRetry() { - DPRINTF(InOrderCachePort, "Retrying Request for [tid:%i] [sn:%i]\n", - retryReq->inst->readTid(), retryReq->inst->seqNum); + DPRINTF(InOrderCachePort, "Unblocking Cache Port. \n"); + + assert(cachePortBlocked); - assert(retryPkt != NULL); - assert(cacheBlocked); - assert(cacheStatus == cacheWaitRetry); - - if (cachePort->sendTiming(retryPkt)) { - cacheStatus = cacheWaitResponse; - retryPkt = NULL; - cacheBlocked = false; - } else { - DPRINTF(InOrderCachePort, - "Retry Request for [tid:%i] [sn:%i] failed\n", - retryReq->inst->readTid(), retryReq->inst->seqNum); - } + // Clear the cache port for use again + cachePortBlocked = false; } CacheUnitEvent::CacheUnitEvent() diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index 26f6859ed..4162102c7 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -119,12 +119,6 @@ class CacheUnit : public Resource virtual void recvRetry(); }; - enum CachePortStatus { - cacheWaitResponse, - cacheWaitRetry, - cacheAccessComplete - }; - void init(); virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num, @@ -188,15 +182,7 @@ class CacheUnit : public Resource /** Cache interface. */ CachePort *cachePort; - CachePortStatus cacheStatus; - - CacheReqPtr retryReq; - - PacketPtr retryPkt; - - int retrySlot; - - bool cacheBlocked; + bool cachePortBlocked; std::vector addrList[ThePipeline::MaxThreads]; From 002f1b8b7e1d5292828e5157ff971965265140bc Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:29:49 -0500 Subject: [PATCH 25/36] inorder: add execution unit stats --- src/cpu/inorder/resources/execution_unit.cc | 17 +++++++++++++++++ src/cpu/inorder/resources/execution_unit.hh | 5 +++++ 2 files changed, 22 insertions(+) diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc index 6c44e2456..429291231 100644 --- a/src/cpu/inorder/resources/execution_unit.cc +++ b/src/cpu/inorder/resources/execution_unit.cc @@ -54,6 +54,17 @@ ExecutionUnit::regStats() .name(name() + ".predictedNotTakenIncorrect") .desc("Number of Branches Incorrectly Predicted As Not Taken)."); + lastExecuteCycle = curTick; + + cyclesExecuted + .name(name() + ".cyclesExecuted") + .desc("Number of Cycles Execution Unit was used."); + + utilization + .name(name() + ".utilization") + .desc("Utilization of Execution Unit (cycles / totalCycles)."); + utilization = cyclesExecuted / cpu->numCycles; + Resource::regStats(); } @@ -75,6 +86,12 @@ ExecutionUnit::execute(int slot_num) { case ExecuteInst: { + if (curTick != lastExecuteCycle) { + lastExecuteCycle = curTick; + cyclesExecuted++; + } + + if (inst->isMemRef()) { panic("%s not configured to handle memory ops.\n", resName); } else if (inst->isControl()) { diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh index 46691bbf2..37651e873 100644 --- a/src/cpu/inorder/resources/execution_unit.hh +++ b/src/cpu/inorder/resources/execution_unit.hh @@ -71,6 +71,11 @@ class ExecutionUnit : public Resource { ///////////////////////////////////////////////////////////////// Stats::Scalar predictedTakenIncorrect; Stats::Scalar predictedNotTakenIncorrect; + + Stats::Scalar cyclesExecuted; + Tick lastExecuteCycle; + + Stats::Formula utilization; }; From 1a89e8f4cbab3b3a6fd144d3d08dfeaac203f945 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:29:59 -0500 Subject: [PATCH 26/36] inorder: user per-thread dummy insts/reqs --- src/cpu/inorder/cpu.cc | 35 +++++++++++++++++++++++--------- src/cpu/inorder/cpu.hh | 7 +++++-- src/cpu/inorder/resource_pool.cc | 5 ----- src/cpu/inorder/resource_pool.hh | 3 --- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index b69fe2e3b..472317362 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -211,6 +211,7 @@ InOrderCPU::InOrderCPU(Params *params) "edit your workload size."); } + if (active_threads > 1) { threadModel = (InOrderCPU::ThreadModel) params->threadModel; @@ -257,6 +258,9 @@ InOrderCPU::InOrderCPU(Params *params) Process* dummy_proc = params->workload[0]; thread[tid] = new Thread(this, tid, dummy_proc); } + + // Eventually set this with parameters... + asid[tid] = tid; #endif // Setup the TC that will serve as the interface to the threads/CPU. @@ -313,14 +317,24 @@ InOrderCPU::InOrderCPU(Params *params) isa[tid].clear(); isa[tid].expandForMultithreading(numThreads, 1/*numVirtProcs*/); + + // Define dummy instructions and resource requests to be used. + dummyInst[tid] = new InOrderDynInst(this, + thread[tid], + 0, + tid, + asid[tid]); + + dummyReq[tid] = new ResourceRequest(resPool->getResource(0), + dummyInst[tid], + 0, + 0, + 0, + 0); } lastRunningCycle = curTick; - // Define dummy instructions and resource requests to be used. - dummyInst = new InOrderDynInst(this, NULL, 0, 0); - dummyReq = new ResourceRequest(resPool->getResource(0), NULL, 0, 0, 0, 0); - // Reset CPU to reset state. #if FULL_SYSTEM Fault resetFault = new ResetFault(); @@ -585,7 +599,7 @@ void InOrderCPU::trap(Fault fault, ThreadID tid, int delay) { //@ Squash Pipeline during TRAP - scheduleCpuEvent(Trap, fault, tid, dummyInst, delay); + scheduleCpuEvent(Trap, fault, tid, dummyInst[tid], delay); } void @@ -747,7 +761,7 @@ InOrderCPU::deactivateContext(ThreadID tid, int delay) { DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid); - scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst, delay); + scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst[tid], delay); // Be sure to signal that there's some activity so the CPU doesn't // deschedule itself. @@ -830,7 +844,8 @@ InOrderCPU::activateContext(ThreadID tid, int delay) { DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid); - scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst, delay); + + scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst[tid], delay); // Be sure to signal that there's some activity so the CPU doesn't // deschedule itself. @@ -847,7 +862,7 @@ InOrderCPU::activateNextReadyContext(int delay) // NOTE: Add 5 to the event priority so that we always activate // threads after we've finished deactivating, squashing,etc. // other threads - scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst, + scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst[0], delay, 5); // Be sure to signal that there's some activity so the CPU doesn't @@ -862,7 +877,7 @@ InOrderCPU::haltContext(ThreadID tid, int delay) { DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid); - scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst, delay); + scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay); activityRec.activity(); } @@ -885,7 +900,7 @@ InOrderCPU::haltThread(ThreadID tid) void InOrderCPU::suspendContext(ThreadID tid, int delay) { - scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst, delay); + scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay); } void diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 6f1f3ee3f..dc0164d8f 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -97,6 +97,9 @@ class InOrderCPU : public BaseCPU /** CPU ID */ int cpu_id; + // SE Mode ASIDs + ThreadID asid[ThePipeline::MaxThreads]; + /** Type of core that this is */ std::string coreType; @@ -241,10 +244,10 @@ class InOrderCPU : public BaseCPU /** Instruction used to signify that there is no *real* instruction in buffer slot */ - DynInstPtr dummyInst; + DynInstPtr dummyInst[ThePipeline::MaxThreads]; /** Used by resources to signify a denied access to a resource. */ - ResourceRequest *dummyReq; + ResourceRequest *dummyReq[ThePipeline::MaxThreads]; /** Identifies the resource id that identifies a fetch * access unit. diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 3750d18d6..dd51242a3 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -448,11 +448,6 @@ ResourcePool::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) } } -ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool) - : Event((Event::Priority)((unsigned)CPU_Tick_Pri+5)), resPool(_resPool), - eventType((InOrderCPU::CPUEventType) Default) -{ } - ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool, InOrderCPU::CPUEventType e_type, DynInstPtr _inst, diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index 3f62d2caa..f61fae4c8 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -85,9 +85,6 @@ class ResourcePool { ThreadID tid; public: - /** Constructs a resource event. */ - ResPoolEvent(ResourcePool *_resPool); - /** Constructs a resource event. */ ResPoolEvent(ResourcePool *_resPool, InOrderCPU::CPUEventType e_type, From f3bc2df663cccd7db7a4ba87acfc2d0137a5ca02 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:30:08 -0500 Subject: [PATCH 27/36] inorder: object cleanup in destructors --- src/cpu/inorder/cpu.cc | 5 +++++ src/cpu/inorder/cpu.hh | 4 +++- src/cpu/inorder/resource.cc | 1 + src/cpu/inorder/resource_pool.cc | 12 ++++++++++++ src/cpu/inorder/resource_pool.hh | 2 +- src/cpu/inorder/resources/cache_unit.hh | 1 - src/cpu/inorder/resources/execution_unit.hh | 1 - src/cpu/inorder/resources/fetch_seq_unit.cc | 5 +++++ src/cpu/inorder/resources/fetch_seq_unit.hh | 4 ++-- src/cpu/inorder/resources/mult_div_unit.hh | 1 - 10 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 472317362..a3b203559 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -347,6 +347,11 @@ InOrderCPU::InOrderCPU(Params *params) scheduleTickEvent(0); } +InOrderCPU::~InOrderCPU() +{ + delete resPool; +} + void InOrderCPU::regStats() diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index dc0164d8f..d8424397b 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -93,7 +93,9 @@ class InOrderCPU : public BaseCPU public: /** Constructs a CPU with the given parameters. */ InOrderCPU(Params *params); - + /* Destructor */ + ~InOrderCPU(); + /** CPU ID */ int cpu_id; diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 1fd28c939..e5fd4f70e 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -47,6 +47,7 @@ Resource::Resource(string res_name, int res_id, int res_width, Resource::~Resource() { delete [] resourceEvent; + delete deniedReq; } diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index dd51242a3..1f15a2c96 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -91,6 +91,18 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) 0, _cpu, params)); } +ResourcePool::~ResourcePool() +{ + cout << "Deleting resources ..." << endl; + + for (int i=0; i < resources.size(); i++) { + DPRINTF(Resource, "Deleting resource: %s.\n", resources[i]->name()); + + delete resources[i]; + } +} + + void ResourcePool::init() { diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index f61fae4c8..ce7167b87 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -122,7 +122,7 @@ class ResourcePool { public: ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~ResourcePool() {} + ~ResourcePool(); std::string name(); diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index 4162102c7..50cb47519 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -62,7 +62,6 @@ class CacheUnit : public Resource public: CacheUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~CacheUnit() {} enum Command { InitiateFetch, diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh index 37651e873..b9cf1d428 100644 --- a/src/cpu/inorder/resources/execution_unit.hh +++ b/src/cpu/inorder/resources/execution_unit.hh @@ -52,7 +52,6 @@ class ExecutionUnit : public Resource { public: ExecutionUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~ExecutionUnit() {} public: virtual void regStats(); diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index ba86a91f0..03663881c 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -54,6 +54,11 @@ FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width, } } +FetchSeqUnit::~FetchSeqUnit() +{ + delete [] resourceEvent; +} + void FetchSeqUnit::init() { diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh index 3283e0330..289e150aa 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.hh +++ b/src/cpu/inorder/resources/fetch_seq_unit.hh @@ -54,8 +54,8 @@ class FetchSeqUnit : public Resource { public: FetchSeqUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~FetchSeqUnit() {} - + virtual ~FetchSeqUnit(); + virtual void init(); virtual void activateThread(ThreadID tid); virtual void deactivateThread(ThreadID tid); diff --git a/src/cpu/inorder/resources/mult_div_unit.hh b/src/cpu/inorder/resources/mult_div_unit.hh index d3dd0260d..19688b09f 100644 --- a/src/cpu/inorder/resources/mult_div_unit.hh +++ b/src/cpu/inorder/resources/mult_div_unit.hh @@ -57,7 +57,6 @@ class MultDivUnit : public Resource { public: MultDivUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~MultDivUnit() {} public: /** Override default Resource getSlot(). Will only getSlot if From ea8909925fd0e7a33feabc9e17f83b85cd7c6039 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:30:24 -0500 Subject: [PATCH 28/36] inorder: add activity stats --- src/cpu/inorder/cpu.cc | 25 +++++++++++++++++++++---- src/cpu/inorder/cpu.hh | 8 +++++++- src/cpu/inorder/first_stage.cc | 4 +++- src/cpu/inorder/pipeline_stage.cc | 6 +++++- src/cpu/inorder/pipeline_stage.hh | 2 ++ src/cpu/inorder/resources/cache_unit.cc | 16 +++++++++++----- 6 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index a3b203559..e864c8c86 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -389,9 +389,17 @@ InOrderCPU::regStats() idleCycles .name(name() + ".idleCycles") - .desc("Total number of cycles that the CPU has spent unscheduled due " - "to idling") - .prereq(idleCycles); + .desc("Number of cycles cpu's stages were not processed"); + + runCycles + .name(name() + ".runCycles") + .desc("Number of cycles cpu stages are processed."); + + activity + .name(name() + ".activity") + .desc("Percentage of cycles cpu is active") + .precision(6); + activity = (runCycles / numCycles) * 100; threadCycles .init(numThreads) @@ -463,18 +471,27 @@ InOrderCPU::tick() ++numCycles; + bool pipes_idle = true; + //Tick each of the stages for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) { pipelineStage[stNum]->tick(); + + pipes_idle = pipes_idle && pipelineStage[stNum]->idle; } + if (pipes_idle) + idleCycles++; + else + runCycles++; + // Now advance the time buffers one tick timeBuffer.advance(); for (int sqNum=0; sqNum < NumStages - 1; sqNum++) { stageQueue[sqNum]->advance(); } activityRec.advance(); - + // Any squashed requests, events, or insts then remove them now cleanUpRemovedReqs(); cleanUpRemovedEvents(); diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index d8424397b..253b5b18f 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -729,9 +729,15 @@ class InOrderCPU : public BaseCPU /** Stat for total number of times the CPU is descheduled. */ Stats::Scalar timesIdled; - /** Stat for total number of cycles the CPU spends descheduled. */ + /** Stat for total number of cycles the CPU spends descheduled or no stages active. */ Stats::Scalar idleCycles; + /** Stat for total number of cycles the CPU is active. */ + Stats::Scalar runCycles; + + /** Percentage of cycles a stage was active */ + Stats::Formula activity; + /** Stat for the number of committed instructions per thread. */ Stats::Vector committedInsts; diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc index c653d152b..658ce37d3 100644 --- a/src/cpu/inorder/first_stage.cc +++ b/src/cpu/inorder/first_stage.cc @@ -133,8 +133,10 @@ FirstStage::processStage(bool &status_change) if (instsProcessed > 0) { ++runCycles; + idle = false; } else { - ++idleCycles; + ++idleCycles; + idle = true; } } diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 550952947..c991fe1bd 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -42,7 +42,7 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num) : stageNum(stage_num), stageWidth(ThePipeline::StageWidth), numThreads(ThePipeline::MaxThreads), _status(Inactive), stageBufferMax(ThePipeline::interStageBuffSize[stage_num]), - prevStageValid(false), nextStageValid(false) + prevStageValid(false), nextStageValid(false), idle(false) { switchedOutBuffer.resize(ThePipeline::MaxThreads); switchedOutValid.resize(ThePipeline::MaxThreads); @@ -707,6 +707,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid) void PipelineStage::tick() { + idle = false; + wroteToTimeBuffer = false; bool status_change = false; @@ -794,8 +796,10 @@ PipelineStage::processStage(bool &status_change) if (instsProcessed > 0) { ++runCycles; + idle = false; } else { ++idleCycles; + idle = true; } DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(), diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index be3a1093c..6c9cf0d99 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -347,6 +347,8 @@ class PipelineStage /** Is Next Stage Valid? */ bool nextStageValid; + bool idle; + /** Source of possible stalls. */ struct Stalls { bool stage[ThePipeline::NumStages]; diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 65782cb73..275d9a7e8 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -143,7 +143,8 @@ CacheUnit::getSlot(DynInstPtr inst) Addr req_addr = inst->getMemAddr(); if (resName == "icache_port" || - find(addrList[tid].begin(), addrList[tid].end(), req_addr) == addrList[tid].end()) { + find(addrList[tid].begin(), addrList[tid].end(), req_addr) == + addrList[tid].end()) { int new_slot = Resource::getSlot(inst); @@ -171,8 +172,9 @@ CacheUnit::freeSlot(int slot_num) { ThreadID tid = reqMap[slot_num]->inst->readTid(); - vector::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(), - reqMap[slot_num]->inst->getMemAddr()); + vector::iterator vect_it = + find(addrList[tid].begin(), addrList[tid].end(), + reqMap[slot_num]->inst->getMemAddr()); assert(vect_it != addrList[tid].end()); DPRINTF(InOrderCachePort, @@ -533,8 +535,6 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) } } - cache_req->dataPkt->time = curTick; - bool do_access = true; // flag to suppress cache access Request *memReq = cache_req->dataPkt->req; @@ -590,6 +590,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) { // Cast to correct packet type CacheReqPacket* cache_pkt = dynamic_cast(pkt); + assert(cache_pkt); if (cache_pkt->cacheReq->isSquashed()) { @@ -600,6 +601,9 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) cache_pkt->cacheReq->done(); delete cache_pkt; + + cpu->wakeCPU(); + return; } @@ -730,6 +734,8 @@ CacheUnit::recvRetry() // Clear the cache port for use again cachePortBlocked = false; + + cpu->wakeCPU(); } CacheUnitEvent::CacheUnitEvent() From 6939482c49b489ad8811364ec52ad10ae421fb44 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:30:35 -0500 Subject: [PATCH 29/36] inorder: implement split loads --- src/cpu/inorder/inorder_dyn_inst.cc | 10 +- src/cpu/inorder/inorder_dyn_inst.hh | 18 +- src/cpu/inorder/pipeline_traits.hh | 6 +- src/cpu/inorder/resource.cc | 13 +- src/cpu/inorder/resource_pool.cc | 19 ++ src/cpu/inorder/resource_pool.hh | 1 + src/cpu/inorder/resources/cache_unit.cc | 312 ++++++++++++++++++++++-- src/cpu/inorder/resources/cache_unit.hh | 36 ++- 8 files changed, 374 insertions(+), 41 deletions(-) diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 79f8de05d..9c0313721 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -111,7 +111,11 @@ InOrderDynInst::initVars() { fetchMemReq = NULL; dataMemReq = NULL; - + splitMemData = NULL; + split2ndAccess = false; + splitInst = false; + splitFinishCnt = 0; + effAddr = 0; physEffAddr = 0; @@ -187,6 +191,10 @@ InOrderDynInst::~InOrderDynInst() delete traceData; } + if (splitMemData) { + delete splitMemData; + } + fault = NoFault; --instcount; diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index b573c1029..6f5b7c0e9 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -330,6 +330,19 @@ class InOrderDynInst : public FastAlloc, public RefCounted public: Tick memTime; + PacketDataPtr splitMemData; + RequestPtr splitMemReq; + int splitTotalSize; + int split2ndSize; + Addr split2ndAddr; + bool split2ndAccess; + uint8_t split2ndData; + PacketDataPtr split2ndDataPtr; + unsigned split2ndFlags; + bool splitInst; + int splitFinishCnt; + + //////////////////////////////////////////////////////////// // // BASE INSTRUCTION INFORMATION. @@ -468,7 +481,10 @@ class InOrderDynInst : public FastAlloc, public RefCounted if (!resSched.empty()) { ThePipeline::ScheduleEntry* sked = resSched.top(); resSched.pop(); - delete sked; + if (sked != 0) { + delete sked; + + } } } diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh index ddc8a3ad7..f039b9e5d 100644 --- a/src/cpu/inorder/pipeline_traits.hh +++ b/src/cpu/inorder/pipeline_traits.hh @@ -53,8 +53,8 @@ namespace ThePipeline { const unsigned StageWidth = 1; const unsigned BackEndStartStage = 2; - // Enumerated List of Resources The Pipeline Uses - enum ResourceList { + // List of Resources The Pipeline Uses + enum ResourceId { FetchSeq = 0, ICache, Decode, @@ -94,6 +94,7 @@ namespace ThePipeline { stageNum(stage_num), resNum(res_num), cmd(_cmd), idx(_idx), priority(_priority) { } + virtual ~ScheduleEntry(){} // Stage number to perform this service. @@ -159,7 +160,6 @@ namespace ThePipeline { stageNum, nextTaskPriority++, unit, request, param )); } - }; }; diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index e5fd4f70e..dcf5f3117 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -262,15 +262,22 @@ Resource::findRequest(DynInstPtr inst) map::iterator map_it = reqMap.begin(); map::iterator map_end = reqMap.end(); + bool found = false; + ResReqPtr req = NULL; + while (map_it != map_end) { if ((*map_it).second && - (*map_it).second->getInst() == inst) { - return (*map_it).second; + (*map_it).second->getInst() == inst) { + req = (*map_it).second; + //return (*map_it).second; + assert(found == false); + found = true; } map_it++; } - return NULL; + return req; + //return NULL; } void diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index 1f15a2c96..74bf4f03b 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -181,6 +181,25 @@ ResourcePool::getResIdx(const std::string &res_name) return idx; } + panic("Can't find resource idx for: %s\n", res_name); + return 0; +} + +unsigned +ResourcePool::getResIdx(const ThePipeline::ResourceId &res_id) +{ + int num_resources = resources.size(); + + for (int idx = 0; idx < num_resources; idx++) { + if (resources[idx]->getId() == res_id) + return idx; + } + + // todo: change return value to int and return a -1 here + // maybe even have enumerated type + // panic for now... + panic("Can't find resource idx for: %i\n", res_id); + return 0; } diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index ce7167b87..60d35ab61 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -141,6 +141,7 @@ class ResourcePool { /** Returns a specific resource. */ unsigned getResIdx(const std::string &res_name); + unsigned getResIdx(const ThePipeline::ResourceId &res_id); /** Returns a pointer to a resource */ Resource* getResource(int res_idx) { return resources[res_idx]; } diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 275d9a7e8..85ef18a55 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -40,6 +40,7 @@ #include "cpu/inorder/resources/cache_unit.hh" #include "cpu/inorder/pipeline_traits.hh" #include "cpu/inorder/cpu.hh" +#include "cpu/inorder/resource_pool.hh" #include "mem/request.hh" using namespace std; @@ -136,7 +137,9 @@ CacheUnit::getSlot(DynInstPtr inst) return -1; } - if (!inst->validMemAddr()) { + // For a Split-Load, the instruction would have processed once already + // causing the address to be unset. + if (!inst->validMemAddr() && !inst->splitInst) { panic("Mem. Addr. must be set before requesting cache access\n"); } @@ -159,12 +162,24 @@ CacheUnit::getSlot(DynInstPtr inst) inst->readTid(), inst->seqNum, req_addr); return new_slot; } else { - DPRINTF(InOrderCachePort, + // Allow same instruction multiple accesses to same address + if (addrMap[tid][req_addr] == inst->seqNum) { + int new_slot = Resource::getSlot(inst); + + if (new_slot == -1) + return -1; + + return new_slot; + } else { + DPRINTF(InOrderCachePort, "[tid:%i] Denying request because there is an outstanding" " request to/for addr. %08p. by [sn:%i] @ tick %i\n", inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime); - return -1; + return -1; + } } + + return -1; } void @@ -175,17 +190,69 @@ CacheUnit::freeSlot(int slot_num) vector::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(), reqMap[slot_num]->inst->getMemAddr()); - assert(vect_it != addrList[tid].end()); + + assert(vect_it != addrList[tid].end() || + reqMap[slot_num]->inst->splitInst); DPRINTF(InOrderCachePort, "[tid:%i]: Address %08p removed from dependency list\n", reqMap[slot_num]->inst->readTid(), (*vect_it)); - addrList[tid].erase(vect_it); + if (vect_it != addrList[tid].end()) { + + DPRINTF(InOrderCachePort, + "[tid:%i]: Address %08p removed from dependency list\n", + reqMap[slot_num]->inst->readTid(), (*vect_it)); + + addrList[tid].erase(vect_it); + } Resource::freeSlot(slot_num); } +ResReqPtr +CacheUnit::findRequest(DynInstPtr inst) +{ + map::iterator map_it = reqMap.begin(); + map::iterator map_end = reqMap.end(); + + while (map_it != map_end) { + CacheRequest* cache_req = dynamic_cast((*map_it).second); + assert(cache_req); + + if (cache_req && + cache_req->getInst() == inst && + cache_req->instIdx == inst->resSched.top()->idx) { + return cache_req; + } + map_it++; + } + + return NULL; +} + +ResReqPtr +CacheUnit::findSplitRequest(DynInstPtr inst, int idx) +{ + map::iterator map_it = reqMap.begin(); + map::iterator map_end = reqMap.end(); + + while (map_it != map_end) { + CacheRequest* cache_req = dynamic_cast((*map_it).second); + assert(cache_req); + + if (cache_req && + cache_req->getInst() == inst && + cache_req->instIdx == idx) { + return cache_req; + } + map_it++; + } + + return NULL; +} + + ResReqPtr CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) @@ -200,6 +267,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, switch (sched_entry->cmd) { + case InitSecondSplitRead: + pkt_cmd = MemCmd::ReadReq; + + DPRINTF(InOrderCachePort, + "[tid:%i]: Read request from [sn:%i] for addr %08p\n", + inst->readTid(), inst->seqNum, inst->split2ndAddr); + break; + case InitiateReadData: pkt_cmd = MemCmd::ReadReq; @@ -231,7 +306,8 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, return new CacheRequest(this, inst, stage_num, id, slot_num, sched_entry->cmd, 0, pkt_cmd, - 0/*flags*/, this->cpu->readCpuId()); + 0/*flags*/, this->cpu->readCpuId(), + inst->resSched.top()->idx); } void @@ -242,7 +318,8 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request) // Check to see if this instruction is requesting the same command // or a different one - if (cache_req->cmd != inst->resSched.top()->cmd) { + if (cache_req->cmd != inst->resSched.top()->cmd && + cache_req->instIdx == inst->resSched.top()->idx) { // If different, then update command in the request cache_req->cmd = inst->resSched.top()->cmd; DPRINTF(InOrderCachePort, @@ -250,7 +327,7 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request) "instruction\n ", inst->readTid(), inst->seqNum); service_request = true; - } else { + } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead) { // If same command, just check to see if memory access was completed // but dont try to re-execute DPRINTF(InOrderCachePort, @@ -276,12 +353,25 @@ CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size, cpu->readCpuId(), inst->readTid()); cache_req->memReq = inst->fetchMemReq; } else { - inst->dataMemReq = new Request(inst->readTid(), aligned_addr, + if (!cache_req->is2ndSplit()) { + inst->dataMemReq = new Request(cpu->asid[tid], aligned_addr, acc_size, flags, inst->readPC(), cpu->readCpuId(), inst->readTid()); cache_req->memReq = inst->dataMemReq; + } else { + assert(inst->splitInst); + + inst->splitMemReq = new Request(cpu->asid[tid], + inst->split2ndAddr, + acc_size, + flags, + inst->readPC(), + cpu->readCpuId(), + tid); + cache_req->memReq = inst->splitMemReq; + } } - + cache_req->fault = _tlb->translateAtomic(cache_req->memReq, @@ -318,14 +408,94 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) CacheReqPtr cache_req = dynamic_cast(findRequest(inst)); assert(cache_req); - int acc_size = sizeof(T); - doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Read); + // The block size of our peer + unsigned blockSize = this->cachePort->peerBlockSize(); + + //The size of the data we're trying to read. + int dataSize = sizeof(T); + + if (inst->split2ndAccess) { + dataSize = inst->split2ndSize; + cache_req->splitAccess = true; + cache_req->split2ndAccess = true; + + DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (2 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + inst->getMemAddr(), inst->split2ndAddr); + } + + + //The address of the second part of this access if it needs to be split + //across a cache line boundary. + Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + + + if (secondAddr > addr && !inst->split2ndAccess) { + DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + addr, secondAddr); + + // Save All "Total" Split Information + // ============================== + inst->splitInst = true; + inst->splitMemData = new uint8_t[dataSize]; + inst->splitTotalSize = dataSize; + + + // Schedule Split Read/Complete for Instruction + // ============================== + int stage_num = cache_req->getStageNum(); + + int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + + inst->resSched.push(new ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + CacheUnit::InitSecondSplitRead, + 1) + ); + + inst->resSched.push(new ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + CacheUnit::CompleteSecondSplitRead, 1) + ); + + + // Split Information for First Access + // ============================== + dataSize = secondAddr - addr; + cache_req->splitAccess = true; + + // Split Information for Second Access + // ============================== + inst->split2ndSize = addr + sizeof(T) - secondAddr; + inst->split2ndAddr = secondAddr; + inst->split2ndDataPtr = inst->splitMemData + dataSize; + inst->split2ndFlags = flags; + } + + //cout << "h1" << endl; + + doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read); + + //cout << "h2" << endl; if (cache_req->fault == NoFault) { - cache_req->reqData = new uint8_t[acc_size]; - doCacheAccess(inst, NULL); + if (!cache_req->splitAccess) { + cache_req->reqData = new uint8_t[dataSize]; + doCacheAccess(inst, NULL); + } else { + if (!inst->split2ndAccess) { + cache_req->reqData = inst->splitMemData; + } else { + cache_req->reqData = inst->split2ndDataPtr; + } + + doCacheAccess(inst, NULL, cache_req); + } } + //cout << "h3" << endl; + return cache_req->fault; } @@ -337,6 +507,20 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, CacheReqPtr cache_req = dynamic_cast(findRequest(inst)); assert(cache_req); + // The block size of our peer + unsigned blockSize = this->cachePort->peerBlockSize(); + + //The size of the data we're trying to read. + int dataSize = sizeof(T); + + //The address of the second part of this access if it needs to be split + //across a cache line boundary. + Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + + if (secondAddr > addr) { + assert(0 && "Need Split Write Code!"); + } + int acc_size = sizeof(T); doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write); @@ -364,6 +548,8 @@ CacheUnit::execute(int slot_num) #if TRACING_ON ThreadID tid = inst->readTid(); int seq_num = inst->seqNum; + std::string acc_type = "write"; + #endif cache_req->fault = NoFault; @@ -395,10 +581,14 @@ CacheUnit::execute(int slot_num) } case InitiateReadData: +#if TRACING_ON + acc_type = "read"; +#endif case InitiateWriteData: + DPRINTF(InOrderCachePort, - "[tid:%u]: Initiating data access to %s for addr. %08p\n", - tid, name(), cache_req->inst->getMemAddr()); + "[tid:%u]: [sn:%i] Initiating data %s access to %s for addr. %08p\n", + tid, inst->seqNum, acc_type, name(), cache_req->inst->getMemAddr()); inst->setCurResSlot(slot_num); @@ -406,10 +596,31 @@ CacheUnit::execute(int slot_num) inst->execute(); } else { inst->initiateAcc(); + //if (inst->splitAccess) { + // assert(0 && " Marked as spill inst"); + //} } - + break; + case InitSecondSplitRead: + DPRINTF(InOrderCachePort, + "[tid:%u]: [sn:%i] Initiating split data read access to %s for addr. %08p\n", + tid, inst->seqNum, name(), cache_req->inst->split2ndAddr); + inst->split2ndAccess = true; + read(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags); + break; + + case InitSecondSplitWrite: + DPRINTF(InOrderCachePort, + "[tid:%u]: [sn:%i] Initiating split data write access to %s for addr. %08p\n", + tid, inst->seqNum, name(), cache_req->inst->getMemAddr()); + assert(0); + inst->split2ndAccess = true; + //write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags); + break; + + case CompleteFetch: if (cache_req->isMemAccComplete()) { DPRINTF(InOrderCachePort, @@ -425,7 +636,7 @@ CacheUnit::execute(int slot_num) cache_req->done(); } else { DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n", + "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n", tid, inst->seqNum); DPRINTF(InOrderStall, "STALL: [tid:%i]: Fetch miss from %08p\n", @@ -454,6 +665,24 @@ CacheUnit::execute(int slot_num) } break; + case CompleteSecondSplitRead: + DPRINTF(InOrderCachePort, + "[tid:%i]: [sn:%i]: Trying to Complete Split Data Read Access\n", + tid, inst->seqNum); + + if (cache_req->isMemAccComplete() || + inst->isDataPrefetch() || + inst->isInstPrefetch()) { + cache_req->setMemStall(false); + cache_req->done(); + } else { + DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", + tid, cache_req->inst->split2ndAddr); + cache_req->setCompleted(false); + cache_req->setMemStall(true); + } + break; + default: fatal("Unrecognized command to %s", resName); } @@ -498,15 +727,21 @@ CacheUnit::writeHint(DynInstPtr inst) // @TODO: Split into doCacheRead() and doCacheWrite() Fault -CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) +CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr split_req) { Fault fault = NoFault; #if TRACING_ON ThreadID tid = inst->readTid(); #endif - CacheReqPtr cache_req - = dynamic_cast(reqMap[inst->getCurResSlot()]); + CacheReqPtr cache_req; + + if (split_req == NULL) { + cache_req = dynamic_cast(reqMap[inst->getCurResSlot()]); + } else{ + cache_req = split_req; + } + assert(cache_req); // Check for LL/SC and if so change command @@ -522,7 +757,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) } cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd, - Packet::Broadcast); + Packet::Broadcast, cache_req->instIdx); if (cache_req->dataPkt->isRead()) { cache_req->dataPkt->dataStatic(cache_req->reqData); @@ -615,7 +850,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) // Cast to correct request type CacheRequest *cache_req = dynamic_cast( - findRequest(cache_pkt->cacheReq->getInst())); + findSplitRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx)); + + if (!cache_req) { + warn( + "[tid:%u]: [sn:%i]: Can't find slot for cache access to addr. %08p\n", + cache_pkt->cacheReq->getInst()->readTid(), + cache_pkt->cacheReq->getInst()->seqNum, + cache_pkt->cacheReq->getInst()->getMemAddr()); + } + assert(cache_req); @@ -661,9 +905,27 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) DPRINTF(InOrderCachePort, "[tid:%u]: [sn:%i]: Processing cache access\n", tid, inst->seqNum); + + if (inst->splitInst) { + inst->splitFinishCnt++; + + if (inst->splitFinishCnt == 2) { - inst->completeAcc(pkt); - + cache_req->memReq->setVirt(0/*inst->tid*/, + inst->getMemAddr(), + inst->splitTotalSize, + 0, + 0); + + Packet split_pkt(cache_req->memReq, cache_req->pktCmd, + Packet::Broadcast); + split_pkt.dataStatic(inst->splitMemData); + inst->completeAcc(&split_pkt); + } + } else { + inst->completeAcc(pkt); + } + if (inst->isLoad()) { assert(cache_pkt->isRead()); diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index 50cb47519..715ebd878 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -72,7 +72,10 @@ class CacheUnit : public Resource CompleteWriteData, Fetch, ReadData, - WriteData + WriteData, + InitSecondSplitRead, + InitSecondSplitWrite, + CompleteSecondSplitRead }; public: @@ -124,6 +127,9 @@ class CacheUnit : public Resource int res_idx, int slot_num, unsigned cmd); + ResReqPtr findRequest(DynInstPtr inst); + ResReqPtr findSplitRequest(DynInstPtr inst, int idx); + void requestAgain(DynInstPtr inst, bool &try_request); int getSlot(DynInstPtr inst); @@ -155,7 +161,7 @@ class CacheUnit : public Resource /** Returns a specific port. */ Port *getPort(const std::string &if_name, int idx); - + template Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags); @@ -169,7 +175,7 @@ class CacheUnit : public Resource /** Read/Write on behalf of an instruction. * curResSlot needs to be a valid value in instruction. */ - Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL); + Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL, CacheReqPtr split_req=NULL); void prefetch(DynInstPtr inst); @@ -237,17 +243,18 @@ class CacheRequest : public ResourceRequest public: CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd, int req_size, - MemCmd::Command pkt_cmd, unsigned flags, int cpu_id) + MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx) : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd), pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL), retryPkt(NULL), memAccComplete(false), memAccPending(false), - tlbStall(false) + tlbStall(false), splitAccess(false), splitAccessNum(-1), + split2ndAccess(false), instIdx(idx) { } virtual ~CacheRequest() { - if (reqData) { + if (reqData && !splitAccess) { delete [] reqData; } } @@ -261,6 +268,11 @@ class CacheRequest : public ResourceRequest memAccComplete = completed; } + bool is2ndSplit() + { + return split2ndAccess; + } + bool isMemAccComplete() { return memAccComplete; } void setMemAccPending(bool pending = true) { memAccPending = pending; } @@ -276,19 +288,27 @@ class CacheRequest : public ResourceRequest bool memAccComplete; bool memAccPending; bool tlbStall; + + bool splitAccess; + int splitAccessNum; + bool split2ndAccess; + int instIdx; + }; class CacheReqPacket : public Packet { public: CacheReqPacket(CacheRequest *_req, - Command _cmd, short _dest) - : Packet(_req->memReq, _cmd, _dest), cacheReq(_req) + Command _cmd, short _dest, int _idx = 0) + : Packet(_req->memReq, _cmd, _dest), cacheReq(_req), instIdx(_idx) { } CacheRequest *cacheReq; + int instIdx; + }; #endif //__CPU_CACHE_UNIT_HH__ From be6724f7e7a1c1d2f305c814cf3aa23d54a676e2 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:30:43 -0500 Subject: [PATCH 30/36] inorder: implement split stores --- src/cpu/inorder/inorder_dyn_inst.cc | 3 +- src/cpu/inorder/inorder_dyn_inst.hh | 2 +- src/cpu/inorder/resources/cache_unit.cc | 140 +++++++++++++++++++----- src/cpu/inorder/resources/cache_unit.hh | 3 +- 4 files changed, 117 insertions(+), 31 deletions(-) diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 9c0313721..c0e5aa69b 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -112,6 +112,7 @@ InOrderDynInst::initVars() fetchMemReq = NULL; dataMemReq = NULL; splitMemData = NULL; + split2ndAddr = 0; split2ndAccess = false; splitInst = false; splitFinishCnt = 0; @@ -192,7 +193,7 @@ InOrderDynInst::~InOrderDynInst() } if (splitMemData) { - delete splitMemData; + delete [] splitMemData; } fault = NoFault; diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 6f5b7c0e9..ad4da9aab 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -341,7 +341,7 @@ class InOrderDynInst : public FastAlloc, public RefCounted unsigned split2ndFlags; bool splitInst; int splitFinishCnt; - + uint64_t *split2ndStoreDataPtr; //////////////////////////////////////////////////////////// // diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 85ef18a55..3fa1ed180 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -283,6 +283,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, inst->readTid(), inst->seqNum, inst->getMemAddr()); break; + case InitSecondSplitWrite: + pkt_cmd = MemCmd::WriteReq; + + DPRINTF(InOrderCachePort, + "[tid:%i]: Write request from [sn:%i] for addr %08p\n", + inst->readTid(), inst->seqNum, inst->split2ndAddr); + break; + case InitiateWriteData: pkt_cmd = MemCmd::WriteReq; @@ -327,7 +335,8 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request) "instruction\n ", inst->readTid(), inst->seqNum); service_request = true; - } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead) { + } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead && + inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) { // If same command, just check to see if memory access was completed // but dont try to re-execute DPRINTF(InOrderCachePort, @@ -406,7 +415,7 @@ Fault CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) { CacheReqPtr cache_req = dynamic_cast(findRequest(inst)); - assert(cache_req); + assert(cache_req && "Can't Find Instruction for Read!"); // The block size of our peer unsigned blockSize = this->cachePort->peerBlockSize(); @@ -456,7 +465,8 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) inst->resSched.push(new ScheduleEntry(stage_num + 1, 1/*stage_pri*/, cpu->resPool->getResIdx(DCache), - CacheUnit::CompleteSecondSplitRead, 1) + CacheUnit::CompleteSecondSplitRead, + 1) ); @@ -473,12 +483,8 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) inst->split2ndFlags = flags; } - //cout << "h1" << endl; - doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read); - //cout << "h2" << endl; - if (cache_req->fault == NoFault) { if (!cache_req->splitAccess) { cache_req->reqData = new uint8_t[dataSize]; @@ -494,8 +500,6 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) } } - //cout << "h3" << endl; - return cache_req->fault; } @@ -505,7 +509,7 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, uint64_t *write_res) { CacheReqPtr cache_req = dynamic_cast(findRequest(inst)); - assert(cache_req); + assert(cache_req && "Can't Find Instruction for Write!"); // The block size of our peer unsigned blockSize = this->cachePort->peerBlockSize(); @@ -513,22 +517,75 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, //The size of the data we're trying to read. int dataSize = sizeof(T); + if (inst->split2ndAccess) { + dataSize = inst->split2ndSize; + cache_req->splitAccess = true; + cache_req->split2ndAccess = true; + + DPRINTF(InOrderCachePort, "%i: sn[%i] Split Write Access (2 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + inst->getMemAddr(), inst->split2ndAddr); + } + //The address of the second part of this access if it needs to be split //across a cache line boundary. Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); - if (secondAddr > addr) { - assert(0 && "Need Split Write Code!"); - } + if (secondAddr > addr && !inst->split2ndAccess) { + DPRINTF(InOrderCachePort, "%i: sn[%i] Split Write Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + addr, secondAddr); - int acc_size = sizeof(T); - doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write); + // Save All "Total" Split Information + // ============================== + inst->splitInst = true; + inst->splitTotalSize = dataSize; + + // Schedule Split Read/Complete for Instruction + // ============================== + int stage_num = cache_req->getStageNum(); + + int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + + inst->resSched.push(new ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + CacheUnit::InitSecondSplitWrite, + 1) + ); + + inst->resSched.push(new ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + CacheUnit::CompleteSecondSplitWrite, + 1) + ); + + // Split Information for First Access + // ============================== + dataSize = secondAddr - addr; + cache_req->splitAccess = true; + + // Split Information for Second Access + // ============================== + inst->split2ndSize = addr + sizeof(T) - secondAddr; + inst->split2ndAddr = secondAddr; + inst->split2ndStoreDataPtr = &cache_req->inst->storeData; + inst->split2ndStoreDataPtr += dataSize; + inst->split2ndFlags = flags; + } + + doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write); if (cache_req->fault == NoFault) { - cache_req->reqData = new uint8_t[acc_size]; - doCacheAccess(inst, write_res); + if (!cache_req->splitAccess) { + // Remove this line since storeData is saved in INST? + cache_req->reqData = new uint8_t[dataSize]; + doCacheAccess(inst, write_res); + } else { + doCacheAccess(inst, write_res, cache_req); + } + } - + return cache_req->fault; } @@ -596,9 +653,6 @@ CacheUnit::execute(int slot_num) inst->execute(); } else { inst->initiateAcc(); - //if (inst->splitAccess) { - // assert(0 && " Marked as spill inst"); - //} } break; @@ -608,6 +662,7 @@ CacheUnit::execute(int slot_num) "[tid:%u]: [sn:%i] Initiating split data read access to %s for addr. %08p\n", tid, inst->seqNum, name(), cache_req->inst->split2ndAddr); inst->split2ndAccess = true; + assert(inst->split2ndAddr != 0); read(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags); break; @@ -615,9 +670,10 @@ CacheUnit::execute(int slot_num) DPRINTF(InOrderCachePort, "[tid:%u]: [sn:%i] Initiating split data write access to %s for addr. %08p\n", tid, inst->seqNum, name(), cache_req->inst->getMemAddr()); - assert(0); + inst->split2ndAccess = true; - //write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags); + assert(inst->split2ndAddr != 0); + write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags, NULL); break; @@ -682,6 +738,24 @@ CacheUnit::execute(int slot_num) cache_req->setMemStall(true); } break; + + case CompleteSecondSplitWrite: + DPRINTF(InOrderCachePort, + "[tid:%i]: [sn:%i]: Trying to Complete Split Data Write Access\n", + tid, inst->seqNum); + + if (cache_req->isMemAccComplete() || + inst->isDataPrefetch() || + inst->isInstPrefetch()) { + cache_req->setMemStall(false); + cache_req->done(); + } else { + DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", + tid, cache_req->inst->split2ndAddr); + cache_req->setCompleted(false); + cache_req->setMemStall(true); + } + break; default: fatal("Unrecognized command to %s", resName); @@ -761,9 +835,13 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr split if (cache_req->dataPkt->isRead()) { cache_req->dataPkt->dataStatic(cache_req->reqData); - } else if (cache_req->dataPkt->isWrite()) { - cache_req->dataPkt->dataStatic(&cache_req->inst->storeData); - + } else if (cache_req->dataPkt->isWrite()) { + if (inst->split2ndAccess) { + cache_req->dataPkt->dataStatic(inst->split2ndStoreDataPtr); + } else { + cache_req->dataPkt->dataStatic(&cache_req->inst->storeData); + } + if (cache_req->memReq->isCondSwap()) { assert(write_res); cache_req->memReq->setExtraData(*write_res); @@ -910,7 +988,6 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) inst->splitFinishCnt++; if (inst->splitFinishCnt == 2) { - cache_req->memReq->setVirt(0/*inst->tid*/, inst->getMemAddr(), inst->splitTotalSize, @@ -919,7 +996,14 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) Packet split_pkt(cache_req->memReq, cache_req->pktCmd, Packet::Broadcast); - split_pkt.dataStatic(inst->splitMemData); + + + if (inst->isLoad()) { + split_pkt.dataStatic(inst->splitMemData); + } else { + split_pkt.dataStatic(&inst->storeData); + } + inst->completeAcc(&split_pkt); } } else { diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index 715ebd878..8200ace87 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -75,7 +75,8 @@ class CacheUnit : public Resource WriteData, InitSecondSplitRead, InitSecondSplitWrite, - CompleteSecondSplitRead + CompleteSecondSplitRead, + CompleteSecondSplitWrite }; public: From 9357e353fc976a409fb0cb3a875b402f452577f7 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:30:48 -0500 Subject: [PATCH 31/36] inorder: inst count mgmt --- src/cpu/inorder/SConscript | 2 + src/cpu/inorder/cpu.cc | 38 ++++--- src/cpu/inorder/cpu.hh | 2 + src/cpu/inorder/inorder_dyn_inst.cc | 8 +- src/cpu/inorder/inorder_dyn_inst.hh | 5 +- src/cpu/inorder/pipeline_stage.cc | 26 ++++- src/cpu/inorder/reg_dep_map.cc | 24 +++++ src/cpu/inorder/reg_dep_map.hh | 2 + src/cpu/inorder/resource.cc | 34 +++--- src/cpu/inorder/resource.hh | 11 +- src/cpu/inorder/resources/cache_unit.cc | 105 ++++++++++++++----- src/cpu/inorder/resources/cache_unit.hh | 5 +- src/cpu/inorder/resources/graduation_unit.cc | 2 - src/cpu/inorder/resources/use_def.cc | 15 ++- 14 files changed, 211 insertions(+), 68 deletions(-) diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript index afc6a29e4..f222350af 100644 --- a/src/cpu/inorder/SConscript +++ b/src/cpu/inorder/SConscript @@ -54,6 +54,8 @@ if 'InOrderCPU' in env['CPU_MODELS']: TraceFlag('InOrderGraduation') TraceFlag('ThreadModel') TraceFlag('RefCount') + TraceFlag('AddrDep') + CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU', 'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred', diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index e864c8c86..e28af9e7a 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -333,6 +333,12 @@ InOrderCPU::InOrderCPU(Params *params) 0); } + dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0); + dummyReqInst->setSquashed(); + + dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0, 0); + dummyBufferInst->setSquashed(); + lastRunningCycle = curTick; // Reset CPU to reset state. @@ -343,6 +349,8 @@ InOrderCPU::InOrderCPU(Params *params) reset(); #endif + dummyBufferInst->resetInstCount(); + // Schedule First Tick Event, CPU will reschedule itself from here on out. scheduleTickEvent(0); } @@ -1176,6 +1184,8 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid) removeInst(inst); } +// currently unused function, but substitute repetitive code w/this function +// call void InOrderCPU::addToRemoveList(DynInstPtr &inst) { @@ -1194,6 +1204,10 @@ InOrderCPU::removeInst(DynInstPtr &inst) removeInstsThisCycle = true; // Remove the instruction. + + DPRINTF(RefCount, "Pushing instruction [tid:%i] PC %#x " + "[sn:%lli] to remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); removeList.push(inst->getInstListIt()); } @@ -1208,7 +1222,7 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid) inst_iter--; - DPRINTF(InOrderCPU, "Deleting instructions from CPU instruction " + DPRINTF(InOrderCPU, "Squashing instructions from CPU instruction " "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", tid, seq_num, (*inst_iter)->seqNum); @@ -1238,6 +1252,9 @@ InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid) (*instIt)->setSquashed(); + DPRINTF(RefCount, "Pushing instruction [tid:%i] PC %#x " + "[sn:%lli] to remove list\n", + (*instIt)->threadNumber, (*instIt)->readPC(), (*instIt)->seqNum); removeList.push(instIt); } } @@ -1251,7 +1268,7 @@ InOrderCPU::cleanUpRemovedInsts() "[tid:%i] [sn:%lli] PC %#x\n", (*removeList.front())->threadNumber, (*removeList.front())->seqNum, - (*removeList.front())->readPC()); + (*removeList.front())->readPC()); DynInstPtr inst = *removeList.front(); ThreadID tid = inst->threadNumber; @@ -1279,11 +1296,6 @@ InOrderCPU::cleanUpRemovedInsts() instList[tid].erase(removeList.front()); removeList.pop(); - - DPRINTF(RefCount, "pop from remove list: [sn:%i]: Refcount = %i.\n", - inst->seqNum, - 0/*inst->curCount()*/); - } removeInstsThisCycle = false; @@ -1295,22 +1307,18 @@ InOrderCPU::cleanUpRemovedReqs() while (!reqRemoveList.empty()) { ResourceRequest *res_req = reqRemoveList.front(); - DPRINTF(RefCount, "[tid:%i]: Removing Request, " - "[sn:%lli] [slot:%i] [stage_num:%i] [res:%s] [refcount:%i].\n", + DPRINTF(InOrderCPU, "[tid:%i] [sn:%lli]: Removing Request " + "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n", res_req->inst->threadNumber, res_req->inst->seqNum, - res_req->getSlot(), res_req->getStageNum(), res_req->res->name(), - 0/*res_req->inst->curCount()*/); + (res_req->isCompleted()) ? res_req->getComplSlot() : res_req->getSlot(), + res_req->isCompleted()); reqRemoveList.pop(); delete res_req; - - DPRINTF(RefCount, "after remove request: [sn:%i]: Refcount = %i.\n", - res_req->inst->seqNum, - 0/*res_req->inst->curCount()*/); } } diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 253b5b18f..0c42f349e 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -247,6 +247,8 @@ class InOrderCPU : public BaseCPU /** Instruction used to signify that there is no *real* instruction in buffer slot */ DynInstPtr dummyInst[ThePipeline::MaxThreads]; + DynInstPtr dummyBufferInst; + DynInstPtr dummyReqInst; /** Used by resources to signify a denied access to a resource. */ ResourceRequest *dummyReq[ThePipeline::MaxThreads]; diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index c0e5aa69b..75e1c570f 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -164,7 +164,7 @@ InOrderDynInst::initVars() // Update Instruction Count for this instruction ++instcount; - if (instcount > 500) { + if (instcount > 100) { fatal("Number of Active Instructions in CPU is too high. " "(Not Dereferencing Ptrs. Correctly?)\n"); } @@ -175,6 +175,12 @@ InOrderDynInst::initVars() threadNumber, seqNum, instcount); } +void +InOrderDynInst::resetInstCount() +{ + instcount = 0; +} + InOrderDynInst::~InOrderDynInst() { diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index ad4da9aab..8a5f9cf25 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -1032,14 +1032,15 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Count of total number of dynamic instructions. */ static int instcount; + void resetInstCount(); + /** Dumps out contents of this BaseDynInst. */ void dump(); /** Dumps out contents of this BaseDynInst into given string. */ void dump(std::string &outstring); - - //inline int curCount() { return curCount(); } + //inline int curCount() { return curCount(); } }; diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index c991fe1bd..571cf10bb 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -101,8 +101,6 @@ PipelineStage::setCPU(InOrderCPU *cpu_ptr) { cpu = cpu_ptr; - dummyBufferInst = new InOrderDynInst(cpu_ptr, NULL, 0, 0, 0); - DPRINTF(InOrderStage, "Set CPU pointer.\n"); tracer = dynamic_cast(cpu->getTracer()); @@ -388,6 +386,8 @@ PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid) prevStage->insts[i]->seqNum, prevStage->insts[i]->readPC()); prevStage->insts[i]->setSquashed(); + + prevStage->insts[i] = cpu->dummyBufferInst; } } } @@ -609,7 +609,7 @@ PipelineStage::sortInsts() skidBuffer[tid].push(prevStage->insts[i]); - prevStage->insts[i] = dummyBufferInst; + prevStage->insts[i] = cpu->dummyBufferInst; } } @@ -816,7 +816,7 @@ PipelineStage::processThread(bool &status_change, ThreadID tid) // call processInsts() // If status is Unblocking, // buffer any instructions coming from fetch - // continue trying to empty skid buffer + // continue trying to empty skid buffer // check if stall conditions have passed // Stage should try to process as many instructions as its bandwidth @@ -960,6 +960,8 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) } reqs_processed++; + + req->stagePasses++; } else { DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed." "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); @@ -969,7 +971,7 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) if (req->isMemStall() && cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { // Save Stalling Instruction - DPRINTF(ThreadModel, "[tid:%i] Detected cache miss.\n", tid); + DPRINTF(ThreadModel, "[tid:%i] [sn:%i] Detected cache miss.\n", tid, inst->seqNum); DPRINTF(InOrderStage, "Inserting [tid:%i][sn:%i] into switch out buffer.\n", tid, inst->seqNum); @@ -994,6 +996,20 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) cpu->activateNextReadyContext(); } + // Mark request for deletion + // if it isnt currently being used by a resource + if (!req->hasSlot()) { + DPRINTF(InOrderStage, "[sn:%i] Deleting Request, has no slot in resource.\n", + inst->seqNum); + + cpu->reqRemoveList.push(req); + } else { + DPRINTF(InOrderStage, "[sn:%i] Ignoring Request Deletion, in resource [slot:%i].\n", + inst->seqNum, req->getSlot()); + //req = cpu->dummyReq[tid]; + } + + break; } diff --git a/src/cpu/inorder/reg_dep_map.cc b/src/cpu/inorder/reg_dep_map.cc index 51782a588..7fac0a905 100644 --- a/src/cpu/inorder/reg_dep_map.cc +++ b/src/cpu/inorder/reg_dep_map.cc @@ -235,3 +235,27 @@ RegDepMap::findBypassInst(unsigned idx) return NULL; } + +void +RegDepMap::dump() +{ + + for (int idx=0; idx < regMap.size(); idx++) { + + if (regMap[idx].size() > 0) { + cprintf("Reg #%i (size:%i): ", idx, regMap[idx].size()); + + std::list::iterator list_it = regMap[idx].begin(); + std::list::iterator list_end = regMap[idx].end(); + + while (list_it != list_end) { + cprintf("[sn:%i] ", (*list_it)->seqNum); + + list_it++; + } + + cprintf("\n"); + } + + } +} diff --git a/src/cpu/inorder/reg_dep_map.hh b/src/cpu/inorder/reg_dep_map.hh index b78e211bb..cb9d35bf4 100644 --- a/src/cpu/inorder/reg_dep_map.hh +++ b/src/cpu/inorder/reg_dep_map.hh @@ -88,6 +88,8 @@ class RegDepMap /** Size of Dependency of Map */ int depSize(unsigned idx); + void dump(); + protected: // Eventually make this a map of lists for // efficiency sake! diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index dcf5f3117..e63925fe8 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -101,12 +101,6 @@ Resource::slotsInUse() void Resource::freeSlot(int slot_idx) { - DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource " - "[slot:%i].\n", - reqMap[slot_idx]->inst->readTid(), - reqMap[slot_idx]->inst->seqNum, - slot_idx); - // Put slot number on this resource's free list availSlots.push_back(slot_idx); @@ -181,7 +175,7 @@ Resource::request(DynInstPtr inst) // See if the resource is already serving this instruction. // If so, use that request; bool try_request = false; - int slot_num; + int slot_num = -1; int stage_num; ResReqPtr inst_req = findRequest(inst); @@ -440,6 +434,10 @@ ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, } #endif + + stagePasses = 0; + complSlotNum = -1; + } ResourceRequest::~ResourceRequest() @@ -454,17 +452,29 @@ ResourceRequest::~ResourceRequest() void ResourceRequest::done(bool completed) { - DPRINTF(Resource, "%s done with request from [sn:%i] [tid:%i].\n", - res->name(), inst->seqNum, inst->readTid()); + DPRINTF(Resource, "%s [slot:%i] done with request from [sn:%i] [tid:%i].\n", + res->name(), slotNum, inst->seqNum, inst->readTid()); setCompleted(completed); - // Add to remove list - res->cpu->reqRemoveList.push(res->reqMap[slotNum]); - + // Used for debugging purposes + if (completed) { + complSlotNum = slotNum; + + // Would like to start a convention such as all requests deleted in resources/pipeline + // but a little more complex then it seems... + // For now, all COMPLETED requests deleted in resource.. + // all FAILED requests deleted in pipeline stage + // *all SQUASHED requests deleted in resource + res->cpu->reqRemoveList.push(res->reqMap[slotNum]); + } + // Free Slot So Another Instruction Can Use This Resource res->freeSlot(slotNum); + // change slot # to -1, since we check slotNum to see if request is still valid + slotNum = -1; + res->instReqsProcessed++; } diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index 383340df2..b9650df18 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -331,6 +331,8 @@ class ResourceRequest */ void done(bool completed = true); + short stagePasses; + ///////////////////////////////////////////// // // GET RESOURCE REQUEST IDENTIFICATION / INFO @@ -339,8 +341,11 @@ class ResourceRequest /** Get Resource Index */ int getResIdx() { return resIdx; } + /** Get Slot Number */ int getSlot() { return slotNum; } + int getComplSlot() { return complSlotNum; } + bool hasSlot() { return slotNum >= 0; } /** Get Stage Number */ int getStageNum() { return stageNum; } @@ -363,6 +368,9 @@ class ResourceRequest /** Instruction being used */ DynInstPtr inst; + /** Not guaranteed to be set, used for debugging */ + InstSeqNum seqNum; + /** Fault Associated With This Resource Request */ Fault fault; @@ -396,7 +404,8 @@ class ResourceRequest int stageNum; int resIdx; int slotNum; - + int complSlotNum; + /** Resource Request Status */ bool completed; bool squashed; diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 3fa1ed180..00058163f 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -155,14 +155,11 @@ CacheUnit::getSlot(DynInstPtr inst) return -1; inst->memTime = curTick; - addrList[tid].push_back(req_addr); - addrMap[tid][req_addr] = inst->seqNum; - DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", - inst->readTid(), inst->seqNum, req_addr); + setAddrDependency(inst); return new_slot; } else { // Allow same instruction multiple accesses to same address + // should only happen maybe after a squashed inst. needs to replay if (addrMap[tid][req_addr] == inst->seqNum) { int new_slot = Resource::getSlot(inst); @@ -183,31 +180,45 @@ CacheUnit::getSlot(DynInstPtr inst) } void -CacheUnit::freeSlot(int slot_num) +CacheUnit::setAddrDependency(DynInstPtr inst) { - ThreadID tid = reqMap[slot_num]->inst->readTid(); - - vector::iterator vect_it = - find(addrList[tid].begin(), addrList[tid].end(), - reqMap[slot_num]->inst->getMemAddr()); - - assert(vect_it != addrList[tid].end() || - reqMap[slot_num]->inst->splitInst); + Addr req_addr = inst->getMemAddr(); + ThreadID tid = inst->readTid(); + addrList[tid].push_back(req_addr); + addrMap[tid][req_addr] = inst->seqNum; DPRINTF(InOrderCachePort, - "[tid:%i]: Address %08p removed from dependency list\n", - reqMap[slot_num]->inst->readTid(), (*vect_it)); + "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", + inst->readTid(), inst->seqNum, req_addr); + DPRINTF(AddrDep, + "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", + inst->readTid(), inst->seqNum, req_addr); +} + +void +CacheUnit::removeAddrDependency(DynInstPtr inst) +{ + ThreadID tid = inst->readTid(); + + Addr mem_addr = inst->getMemAddr(); + + // Erase from Address List + vector::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(), + mem_addr); + assert(vect_it != addrList[tid].end() || inst->splitInst); if (vect_it != addrList[tid].end()) { - - DPRINTF(InOrderCachePort, - "[tid:%i]: Address %08p removed from dependency list\n", - reqMap[slot_num]->inst->readTid(), (*vect_it)); - - addrList[tid].erase(vect_it); - } + DPRINTF(AddrDep, + "[tid:%i]: [sn:%i] Address %08p removed from dependency list\n", + inst->readTid(), inst->seqNum, (*vect_it)); + + addrList[tid].erase(vect_it); + + // Erase From Address Map (Used for Debugging) + addrMap[tid].erase(addrMap[tid].find(mem_addr)); + } + - Resource::freeSlot(slot_num); } ResReqPtr @@ -687,8 +698,14 @@ CacheUnit::execute(int slot_num) DPRINTF(InOrderCachePort, "[tid:%i]: Instruction [sn:%i] is: %s\n", tid, seq_num, inst->staticInst->disassemble(inst->PC)); + removeAddrDependency(inst); + delete cache_req->dataPkt; - //cache_req->setMemStall(false); + + // Do not stall and switch threads for fetch... for now.. + // TODO: We need to detect cache misses for latencies > 1 + // cache_req->setMemStall(false); + cache_req->done(); } else { DPRINTF(InOrderCachePort, @@ -711,6 +728,7 @@ CacheUnit::execute(int slot_num) if (cache_req->isMemAccComplete() || inst->isDataPrefetch() || inst->isInstPrefetch()) { + removeAddrDependency(inst); cache_req->setMemStall(false); cache_req->done(); } else { @@ -729,6 +747,7 @@ CacheUnit::execute(int slot_num) if (cache_req->isMemAccComplete() || inst->isDataPrefetch() || inst->isInstPrefetch()) { + removeAddrDependency(inst); cache_req->setMemStall(false); cache_req->done(); } else { @@ -747,6 +766,7 @@ CacheUnit::execute(int slot_num) if (cache_req->isMemAccComplete() || inst->isDataPrefetch() || inst->isInstPrefetch()) { + removeAddrDependency(inst); cache_req->setMemStall(false); cache_req->done(); } else { @@ -911,6 +931,10 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n", cache_pkt->cacheReq->getInst()->readTid(), cache_pkt->cacheReq->getInst()->seqNum); + DPRINTF(RefCount, + "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n", + cache_pkt->cacheReq->getTid(), + cache_pkt->cacheReq->seqNum); cache_pkt->cacheReq->done(); delete cache_pkt; @@ -1154,6 +1178,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, "[tid:%i] Squashing request from [sn:%i]\n", req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + if (req_ptr->isSquashed()) { + DPRINTF(AddrDep, "Request for [tid:%i] [sn:%i] already squashed, ignoring squash process.\n", + req_ptr->getInst()->readTid(), + req_ptr->getInst()->seqNum); + map_it++; + continue; + } + req_ptr->setSquashed(); req_ptr->getInst()->setSquashed(); @@ -1178,7 +1210,29 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); + + DPRINTF(InOrderCachePort, + "[tid:%i] Squashing request from [sn:%i]\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + } else { + DPRINTF(InOrderCachePort, + "[tid:%i] Request from [sn:%i] squashed, but still pending completion.\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + DPRINTF(RefCount, + "[tid:%i] Request from [sn:%i] squashed (split:%i), but still pending completion.\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum, + req_ptr->getInst()->splitInst); } + + if (req_ptr->getInst()->validMemAddr()) { + DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to remove addr. %08p dependencies.\n", + req_ptr->getInst()->readTid(), + req_ptr->getInst()->seqNum, + req_ptr->getInst()->getMemAddr()); + + removeAddrDependency(req_ptr->getInst()); + } + } map_it++; @@ -1320,3 +1374,4 @@ CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, { return write(inst, (uint32_t)data, addr, flags, res); } + diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index 8200ace87..9004f3b93 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -135,8 +135,6 @@ class CacheUnit : public Resource int getSlot(DynInstPtr inst); - void freeSlot(int slot_num); - /** Execute the function of this resource. The Default is action * is to do nothing. More specific models will derive from this * class and define their own execute function. @@ -184,6 +182,9 @@ class CacheUnit : public Resource uint64_t getMemData(Packet *packet); + void setAddrDependency(DynInstPtr inst); + void removeAddrDependency(DynInstPtr inst); + protected: /** Cache interface. */ CachePort *cachePort; diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc index 2d7cd5c8c..2dad9889a 100644 --- a/src/cpu/inorder/resources/graduation_unit.cc +++ b/src/cpu/inorder/resources/graduation_unit.cc @@ -79,8 +79,6 @@ GraduationUnit::execute(int slot_num) "[tid:%i] Graduating instruction [sn:%i].\n", tid, inst->seqNum); - DPRINTF(RefCount, "Refcount = %i.\n", 0/*inst->curCount()*/); - // Release Non-Speculative "Block" on instructions that could not execute // because there was a non-speculative inst. active. // @TODO: Fix this functionality. Probably too conservative. diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index a4f3a0d21..5fd6a4724 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -191,6 +191,7 @@ UseDefUnit::execute(int slot_idx) DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " "[sn:%i] to write\n", tid, outReadSeqNum[tid]); + ud_req->done(false); } } else { @@ -249,6 +250,7 @@ UseDefUnit::execute(int slot_idx) DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " "[sn:%i] to forward\n", tid, outReadSeqNum[tid]); + ud_req->done(false); } } else { DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i" @@ -258,6 +260,7 @@ UseDefUnit::execute(int slot_idx) "register (idx=%i)\n", tid, reg_idx); outReadSeqNum[tid] = inst->seqNum; + ud_req->done(false); } } } @@ -360,6 +363,7 @@ UseDefUnit::execute(int slot_idx) DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " "[sn:%i] to read\n", tid, outReadSeqNum); + ud_req->done(false); } } else { DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is " @@ -369,6 +373,7 @@ UseDefUnit::execute(int slot_idx) "register (idx=%i)\n", tid, reg_idx); outWriteSeqNum[tid] = inst->seqNum; + ud_req->done(false); } } break; @@ -402,12 +407,16 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); - regDepMap[tid]->remove(req_ptr->getInst()); - int req_slot_num = req_ptr->getSlot(); - if (latency > 0) + if (latency > 0) { + assert(0); + unscheduleEvent(req_slot_num); + } + + // Mark request for later removal + cpu->reqRemoveList.push(req_ptr); // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); From c7f6e2661c958d996479ae9fe8c8cf2c8a9482f6 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:30:59 -0500 Subject: [PATCH 32/36] inorder: double delete inst bug Make sure that instructions are dereferenced/deleted twice by marking they are on the remove list --- src/cpu/inorder/cpu.cc | 48 ++++++++++--- src/cpu/inorder/inorder_dyn_inst.cc | 1 + src/cpu/inorder/inorder_dyn_inst.hh | 10 ++- src/cpu/inorder/pipeline_stage.cc | 1 + src/cpu/inorder/resources/cache_unit.cc | 91 ++++++++++++++----------- 5 files changed, 102 insertions(+), 49 deletions(-) diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index e28af9e7a..7342f9bc5 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -1190,8 +1190,18 @@ void InOrderCPU::addToRemoveList(DynInstPtr &inst) { removeInstsThisCycle = true; - - removeList.push(inst->getInstListIt()); + if (!inst->isRemoveList()) { + DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x " + "[sn:%lli] to remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); + inst->setRemoveList(); + removeList.push(inst->getInstListIt()); + } else { + DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x " + "[sn:%lli], already remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); + } + } void @@ -1204,11 +1214,18 @@ InOrderCPU::removeInst(DynInstPtr &inst) removeInstsThisCycle = true; // Remove the instruction. + if (!inst->isRemoveList()) { + DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x " + "[sn:%lli] to remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); + inst->setRemoveList(); + removeList.push(inst->getInstListIt()); + } else { + DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x " + "[sn:%lli], already on remove list\n", + inst->threadNumber, inst->readPC(), inst->seqNum); + } - DPRINTF(RefCount, "Pushing instruction [tid:%i] PC %#x " - "[sn:%lli] to remove list\n", - inst->threadNumber, inst->readPC(), inst->seqNum); - removeList.push(inst->getInstListIt()); } void @@ -1252,11 +1269,22 @@ InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid) (*instIt)->setSquashed(); - DPRINTF(RefCount, "Pushing instruction [tid:%i] PC %#x " - "[sn:%lli] to remove list\n", - (*instIt)->threadNumber, (*instIt)->readPC(), (*instIt)->seqNum); - removeList.push(instIt); + if (!(*instIt)->isRemoveList()) { + DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x " + "[sn:%lli] to remove list\n", + (*instIt)->threadNumber, (*instIt)->readPC(), + (*instIt)->seqNum); + (*instIt)->setRemoveList(); + removeList.push(instIt); + } else { + DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x " + "[sn:%lli], already on remove list\n", + (*instIt)->threadNumber, (*instIt)->readPC(), + (*instIt)->seqNum); + } + } + } diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 75e1c570f..1b55c90e0 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -115,6 +115,7 @@ InOrderDynInst::initVars() split2ndAddr = 0; split2ndAccess = false; splitInst = false; + splitInstSked = false; splitFinishCnt = 0; effAddr = 0; diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 8a5f9cf25..8c9cd69e0 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -164,6 +164,7 @@ class InOrderDynInst : public FastAlloc, public RefCounted /// instructions ahead of it SerializeAfter, /// Needs to serialize instructions behind it SerializeHandled, /// Serialization has been handled + RemoveList, /// Is Instruction on Remove List? NumStatus }; @@ -342,7 +343,8 @@ class InOrderDynInst : public FastAlloc, public RefCounted bool splitInst; int splitFinishCnt; uint64_t *split2ndStoreDataPtr; - + bool splitInstSked; + //////////////////////////////////////////////////////////// // // BASE INSTRUCTION INFORMATION. @@ -915,6 +917,12 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Returns whether or not the entry is on the CPU Reg Dep Map */ bool isRegDepEntry() const { return status[RegDepMapEntry]; } + /** Sets this instruction as entered on the CPU Reg Dep Map */ + void setRemoveList() { status.set(RemoveList); } + + /** Returns whether or not the entry is on the CPU Reg Dep Map */ + bool isRemoveList() const { return status[RemoveList]; } + /** Sets this instruction as completed. */ void setCompleted() { status.set(Completed); } diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 571cf10bb..dcf4d81bf 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -380,6 +380,7 @@ PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid) for (int i=0; i < prevStage->size; i++) { if (prevStage->insts[i]->threadNumber == tid && prevStage->insts[i]->seqNum > squash_seq_num) { + // Change Comment to Annulling previous instruction DPRINTF(InOrderStage, "[tid:%i]: Squashing instruction, " "[sn:%i] PC %08p.\n", tid, diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 00058163f..cb1861ea9 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -140,7 +140,8 @@ CacheUnit::getSlot(DynInstPtr inst) // For a Split-Load, the instruction would have processed once already // causing the address to be unset. if (!inst->validMemAddr() && !inst->splitInst) { - panic("Mem. Addr. must be set before requesting cache access\n"); + panic("[tid:%i][sn:%i] Mem. Addr. must be set before requesting cache access\n", + inst->readTid(), inst->seqNum); } Addr req_addr = inst->getMemAddr(); @@ -439,7 +440,7 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) cache_req->splitAccess = true; cache_req->split2ndAccess = true; - DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (2 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + DPRINTF(InOrderCachePort, "[sn:%i] Split Read Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, inst->getMemAddr(), inst->split2ndAddr); } @@ -459,27 +460,31 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) inst->splitMemData = new uint8_t[dataSize]; inst->splitTotalSize = dataSize; - - // Schedule Split Read/Complete for Instruction - // ============================== - int stage_num = cache_req->getStageNum(); + if (!inst->splitInstSked) { + // Schedule Split Read/Complete for Instruction + // ============================== + int stage_num = cache_req->getStageNum(); - int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + int stage_pri = ThePipeline::getNextPriority(inst, stage_num); - inst->resSched.push(new ScheduleEntry(stage_num, - stage_pri, - cpu->resPool->getResIdx(DCache), - CacheUnit::InitSecondSplitRead, - 1) - ); - - inst->resSched.push(new ScheduleEntry(stage_num + 1, - 1/*stage_pri*/, - cpu->resPool->getResIdx(DCache), - CacheUnit::CompleteSecondSplitRead, - 1) - ); + inst->resSched.push(new ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + CacheUnit::InitSecondSplitRead, + 1) + ); + inst->resSched.push(new ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + CacheUnit::CompleteSecondSplitRead, + 1) + ); + inst->splitInstSked = true; + } else { + DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", + inst->readTid(), inst->seqNum, addr, secondAddr); + } // Split Information for First Access // ============================== @@ -533,7 +538,7 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, cache_req->splitAccess = true; cache_req->split2ndAccess = true; - DPRINTF(InOrderCachePort, "%i: sn[%i] Split Write Access (2 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, inst->getMemAddr(), inst->split2ndAddr); } @@ -542,7 +547,8 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); if (secondAddr > addr && !inst->split2ndAccess) { - DPRINTF(InOrderCachePort, "%i: sn[%i] Split Write Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + + DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (1 of 2) for (%#x, %#x).\n", inst->seqNum, addr, secondAddr); // Save All "Total" Split Information @@ -550,25 +556,33 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, inst->splitInst = true; inst->splitTotalSize = dataSize; - // Schedule Split Read/Complete for Instruction - // ============================== - int stage_num = cache_req->getStageNum(); + if (!inst->splitInstSked) { + // Schedule Split Read/Complete for Instruction + // ============================== + int stage_num = cache_req->getStageNum(); - int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + int stage_pri = ThePipeline::getNextPriority(inst, stage_num); - inst->resSched.push(new ScheduleEntry(stage_num, - stage_pri, - cpu->resPool->getResIdx(DCache), - CacheUnit::InitSecondSplitWrite, - 1) - ); + inst->resSched.push(new ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + CacheUnit::InitSecondSplitWrite, + 1) + ); - inst->resSched.push(new ScheduleEntry(stage_num + 1, - 1/*stage_pri*/, - cpu->resPool->getResIdx(DCache), - CacheUnit::CompleteSecondSplitWrite, - 1) - ); + inst->resSched.push(new ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + CacheUnit::CompleteSecondSplitWrite, + 1) + ); + inst->splitInstSked = true; + } else { + DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", + inst->readTid(), inst->seqNum, addr, secondAddr); + } + + // Split Information for First Access // ============================== @@ -582,6 +596,7 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, inst->split2ndStoreDataPtr = &cache_req->inst->storeData; inst->split2ndStoreDataPtr += dataSize; inst->split2ndFlags = flags; + inst->splitInstSked = true; } doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write); From 7712740b5fc219f5610b79e3a06903785fc2d772 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:31:09 -0500 Subject: [PATCH 33/36] inorder: update hello world alpha --- .../ref/alpha/linux/inorder-timing/config.ini | 6 +- .../ref/alpha/linux/inorder-timing/simout | 8 +- .../ref/alpha/linux/inorder-timing/stats.txt | 207 ++++++++++-------- 3 files changed, 118 insertions(+), 103 deletions(-) diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini index b30560264..5ab5381fc 100644 --- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini @@ -63,6 +63,7 @@ progress_interval=0 stageTracing=false stageWidth=1 system=system +threadModel=SMT tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -78,7 +79,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -113,7 +113,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -148,7 +147,6 @@ hash_delay=1 latency=10000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=100000 @@ -190,7 +188,7 @@ egid=100 env= errout=cerr euid=100 -executable=tests/test-progs/hello/bin/alpha/linux/hello +executable=/dist/m5/regression/test-progs/hello/bin/alpha/linux/hello gid=100 input=cin max_stack_size=67108864 diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout index 18efdaa9e..4ad6292c5 100755 --- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout +++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 12 2009 11:18:39 -M5 revision 21550d38f156 6195 default qtip tip inorder-hello-regress -M5 started May 12 2009 11:18:40 +M5 compiled Jan 29 2010 09:13:03 +M5 revision 23ae96d82d21+ 6704+ default qtip tip inorder_hello_alpha +M5 started Jan 29 2010 09:13:04 M5 executing on zooks command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello world! -Exiting @ tick 31646000 because target called exit() +Exiting @ tick 31286000 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt index a88b80594..b9a12afbb 100644 --- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt @@ -1,53 +1,53 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 23793 # Simulator instruction rate (inst/s) -host_mem_usage 152032 # Number of bytes of host memory used -host_seconds 0.27 # Real time elapsed on the host -host_tick_rate 117464960 # Simulator tick rate (ticks/s) +host_inst_rate 23048 # Simulator instruction rate (inst/s) +host_mem_usage 153228 # Number of bytes of host memory used +host_seconds 0.28 # Real time elapsed on the host +host_tick_rate 112412599 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 6404 # Number of instructions simulated -sim_seconds 0.000032 # Number of seconds simulated -sim_ticks 31646000 # Number of ticks simulated +sim_seconds 0.000031 # Number of seconds simulated +sim_ticks 31286000 # Number of ticks simulated system.cpu.AGEN-Unit.instReqsProcessed 2050 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.instReqsProcessed 6405 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.predictedNotTaken 909 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 142 # Number of Branches Predicted As Taken (True). -system.cpu.Decode-Unit.instReqsProcessed 6405 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.instReqsProcessed 6581 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.predictedNotTaken 924 # Number of Branches Predicted As Not Taken (False). +system.cpu.Branch-Predictor.predictedTaken 143 # Number of Branches Predicted As Taken (True). +system.cpu.Decode-Unit.instReqsProcessed 6581 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.cyclesExecuted 4340 # Number of Cycles Execution Unit was used. system.cpu.Execution-Unit.instReqsProcessed 4354 # Number of Instructions Requests that completed in this resource. -system.cpu.Execution-Unit.predictedNotTakenIncorrect 607 # Number of Branches Incorrectly Predicted As Not Taken). -system.cpu.Execution-Unit.predictedTakenIncorrect 124 # Number of Branches Incorrectly Predicted As Taken. -system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 13560 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.predictedNotTakenIncorrect 608 # Number of Branches Incorrectly Predicted As Not Taken). +system.cpu.Execution-Unit.predictedTakenIncorrect 123 # Number of Branches Incorrectly Predicted As Taken. +system.cpu.Execution-Unit.utilization 0.069359 # Utilization of Execution Unit (cycles / totalCycles). +system.cpu.Fetch-Seq-Unit.instReqsProcessed 13858 # Number of Instructions Requests that completed in this resource. system.cpu.Graduation-Unit.instReqsProcessed 6404 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 2 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 1 # Number of Multiply Requests Processed. -system.cpu.RegFile-Manager.instReqsProcessed 12884 # Number of Instructions Requests that completed in this resource. +system.cpu.RegFile-Manager.instReqsProcessed 19961 # Number of Instructions Requests that completed in this resource. +system.cpu.activity 22.407428 # Percentage of cycles cpu is active system.cpu.committedInsts 6404 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 6404 # Number of Instructions Simulated (Total) -system.cpu.cpi 9.883354 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 9.883354 # CPI: Total CPI of All Threads +system.cpu.contextSwitches 1 # Number of context switches +system.cpu.cpi 9.770924 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 9.770924 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 1185 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 56352.631579 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53352.631579 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 56347.368421 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53347.368421 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 1090 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 5353500 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 5353000 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.080169 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 95 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 5068500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 5068000 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.080169 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 95 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 865 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 56419.540230 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53419.540230 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 56074.712644 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53074.712644 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 778 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 4908500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 4878500 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.100578 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 87 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 4647500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 4617500 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.100578 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 87 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked @@ -59,29 +59,29 @@ system.cpu.dcache.blocked_cycles::no_mshrs 0 # system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 2050 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 56384.615385 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 53384.615385 # average overall mshr miss latency +system.cpu.dcache.demand_avg_miss_latency 56217.032967 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 53217.032967 # average overall mshr miss latency system.cpu.dcache.demand_hits 1868 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 10262000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 10231500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.088780 # miss rate for demand accesses system.cpu.dcache.demand_misses 182 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 9716000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 9685500 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.088780 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 182 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 2050 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 56384.615385 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 53384.615385 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 56217.032967 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 53217.032967 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 1868 # number of overall hits -system.cpu.dcache.overall_miss_latency 10262000 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 10231500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.088780 # miss rate for overall accesses system.cpu.dcache.overall_misses 182 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 9716000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 9685500 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.088780 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 182 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -89,7 +89,7 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 168 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 104.325446 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 103.689640 # Cycle average of tags in use system.cpu.dcache.total_refs 1882 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks @@ -110,70 +110,71 @@ system.cpu.dtb.write_accesses 868 # DT system.cpu.dtb.write_acv 0 # DTB write access violations system.cpu.dtb.write_hits 865 # DTB write hits system.cpu.dtb.write_misses 3 # DTB write misses -system.cpu.icache.ReadReq_accesses 7155 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 55763.605442 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 52949.122807 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 6861 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 16394500 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.041090 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 294 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 9 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 15090500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.039832 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_accesses 7277 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 55521.594684 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 52863.157895 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 6976 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 16712000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.041363 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 301 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 16 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 15066000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.039164 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 285 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 24.158451 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 24.563380 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 7155 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 55763.605442 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 52949.122807 # average overall mshr miss latency -system.cpu.icache.demand_hits 6861 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 16394500 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.041090 # miss rate for demand accesses -system.cpu.icache.demand_misses 294 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 9 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 15090500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.039832 # mshr miss rate for demand accesses +system.cpu.icache.demand_accesses 7277 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 55521.594684 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 52863.157895 # average overall mshr miss latency +system.cpu.icache.demand_hits 6976 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 16712000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.041363 # miss rate for demand accesses +system.cpu.icache.demand_misses 301 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 16 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 15066000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.039164 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 285 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 7155 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 55763.605442 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 52949.122807 # average overall mshr miss latency +system.cpu.icache.overall_accesses 7277 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 55521.594684 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 52863.157895 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 6861 # number of overall hits -system.cpu.icache.overall_miss_latency 16394500 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.041090 # miss rate for overall accesses -system.cpu.icache.overall_misses 294 # number of overall misses -system.cpu.icache.overall_mshr_hits 9 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 15090500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.039832 # mshr miss rate for overall accesses +system.cpu.icache.overall_hits 6976 # number of overall hits +system.cpu.icache.overall_miss_latency 16712000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.041363 # miss rate for overall accesses +system.cpu.icache.overall_misses 301 # number of overall misses +system.cpu.icache.overall_mshr_hits 16 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 15066000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.039164 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 285 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 284 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 131.383181 # Cycle average of tags in use -system.cpu.icache.total_refs 6861 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 130.373495 # Cycle average of tags in use +system.cpu.icache.total_refs 6976 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 7153 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.101180 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.101180 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 7275 # Number of Instructions Requests that completed in this resource. +system.cpu.idleCycles 48552 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.102344 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.102344 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits system.cpu.itb.data_misses 0 # DTB misses -system.cpu.itb.fetch_accesses 7172 # ITB accesses +system.cpu.itb.fetch_accesses 7294 # ITB accesses system.cpu.itb.fetch_acv 0 # ITB acv -system.cpu.itb.fetch_hits 7155 # ITB hits +system.cpu.itb.fetch_hits 7277 # ITB hits system.cpu.itb.fetch_misses 17 # ITB misses system.cpu.itb.read_accesses 0 # DTB read accesses system.cpu.itb.read_acv 0 # DTB read access violations @@ -184,28 +185,28 @@ system.cpu.itb.write_acv 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 73 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52424.657534 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_miss_latency 52075.342466 # average ReadExReq miss latency system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40013.698630 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 3827000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_latency 3801500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 73 # number of ReadExReq misses system.cpu.l2cache.ReadExReq_mshr_miss_latency 2921000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 73 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 380 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52118.733509 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 39944.591029 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52068.601583 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 39945.910290 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 1 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 19753000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 19734000 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.997368 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 379 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 15139000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 15139500 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.997368 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 379 # number of ReadReq MSHR misses system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 52357.142857 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_miss_latency 52071.428571 # average UpgradeReq miss latency system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 733000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_latency 729000 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses system.cpu.l2cache.UpgradeReq_mshr_miss_latency 560000 # number of UpgradeReq MSHR miss cycles @@ -220,29 +221,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 453 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52168.141593 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 39955.752212 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52069.690265 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 39956.858407 # average overall mshr miss latency system.cpu.l2cache.demand_hits 1 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 23580000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 23535500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.997792 # miss rate for demand accesses system.cpu.l2cache.demand_misses 452 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 18060000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 18060500 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.997792 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 452 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 453 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52168.141593 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 39955.752212 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52069.690265 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 39956.858407 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 1 # number of overall hits -system.cpu.l2cache.overall_miss_latency 23580000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 23535500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.997792 # miss rate for overall accesses system.cpu.l2cache.overall_misses 452 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 18060000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 18060500 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.997792 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 452 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -250,16 +251,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 364 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 182.840902 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 181.532273 # Cycle average of tags in use system.cpu.l2cache.total_refs 1 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 63293 # number of cpu cycles simulated +system.cpu.numCycles 62573 # number of cpu cycles simulated +system.cpu.runCycles 14021 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) -system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) +system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 63293 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-0.idleCycles 55279 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 7294 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 11.656785 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 55992 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 6581 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 10.517316 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 56103 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 6470 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 10.339923 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 60520 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 2053 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 3.280968 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 56169 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 6404 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 10.234446 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 62573 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.workload.PROG:num_syscalls 17 # Number of system calls ---------- End Simulation Statistics ---------- From 81c9fdad243dc04ec18def93e3be328c9cbcf539 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:31:14 -0500 Subject: [PATCH 34/36] inorder: twolf alpha regression --- .../ref/alpha/tru64/inorder-timing/config.ini | 4 +- .../ref/alpha/tru64/inorder-timing/simout | 8 +- .../ref/alpha/tru64/inorder-timing/stats.txt | 270 ++++++++++-------- 3 files changed, 148 insertions(+), 134 deletions(-) diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini index f04bd741b..ca2d0ba7e 100644 --- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini +++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini @@ -63,6 +63,7 @@ progress_interval=0 stageTracing=false stageWidth=1 system=system +threadModel=SMT tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -78,7 +79,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -113,7 +113,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -148,7 +147,6 @@ hash_delay=1 latency=10000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=100000 diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout index 4a762fa1c..309f6bf40 100755 --- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout +++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout @@ -5,10 +5,10 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jul 4 2009 20:43:52 -M5 revision 20167772fb15 6281 default tip -M5 started Jul 4 2009 20:43:52 -M5 executing on tater +M5 compiled Jan 29 2010 09:29:58 +M5 revision a196f8cf520a 6706 default qtip tip inorder_twolf_alpha +M5 started Jan 29 2010 09:31:14 +M5 executing on zooks command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing Couldn't unlink build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sav Couldn't unlink build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sv2 diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt index c58b2a060..0453fd079 100644 --- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt +++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt @@ -1,88 +1,87 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 69440 # Simulator instruction rate (inst/s) -host_mem_usage 210892 # Number of bytes of host memory used -host_seconds 1323.48 # Real time elapsed on the host -host_tick_rate 76516395 # Simulator tick rate (ticks/s) +host_inst_rate 55182 # Simulator instruction rate (inst/s) +host_mem_usage 156168 # Number of bytes of host memory used +host_seconds 1665.47 # Real time elapsed on the host +host_tick_rate 59164617 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 91903056 # Number of instructions simulated -sim_seconds 0.101268 # Number of seconds simulated -sim_ticks 101268061000 # Number of ticks simulated +sim_seconds 0.098537 # Number of seconds simulated +sim_ticks 98536744000 # Number of ticks simulated system.cpu.AGEN-Unit.instReqsProcessed 26537108 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.instReqsProcessed 91903057 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.predictedNotTaken 8198984 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 2041701 # Number of Branches Predicted As Taken (True). -system.cpu.Decode-Unit.instReqsProcessed 91903057 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.instReqsProcessed 92657148 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.predictedNotTaken 8232810 # Number of Branches Predicted As Not Taken (False). +system.cpu.Branch-Predictor.predictedTaken 2041716 # Number of Branches Predicted As Taken (True). +system.cpu.Decode-Unit.instReqsProcessed 92657148 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.cyclesExecuted 64907308 # Number of Cycles Execution Unit was used. system.cpu.Execution-Unit.instReqsProcessed 64907696 # Number of Instructions Requests that completed in this resource. system.cpu.Execution-Unit.predictedNotTakenIncorrect 3739118 # Number of Branches Incorrectly Predicted As Not Taken). system.cpu.Execution-Unit.predictedTakenIncorrect 1029596 # Number of Branches Incorrectly Predicted As Taken. -system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 189586934 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.utilization 0.329356 # Utilization of Execution Unit (cycles / totalCycles). +system.cpu.Fetch-Seq-Unit.instReqsProcessed 191370621 # Number of Instructions Requests that completed in this resource. system.cpu.Graduation-Unit.instReqsProcessed 91903056 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 916504 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 458252 # Number of Multiply Requests Processed. -system.cpu.RegFile-Manager.instReqsProcessed 188816950 # Number of Instructions Requests that completed in this resource. +system.cpu.RegFile-Manager.instReqsProcessed 196152134 # Number of Instructions Requests that completed in this resource. +system.cpu.activity 96.743392 # Percentage of cycles cpu is active system.cpu.committedInsts 91903056 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 91903056 # Number of Instructions Simulated (Total) -system.cpu.cpi 2.203802 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 2.203802 # CPI: Total CPI of All Threads +system.cpu.contextSwitches 1 # Number of context switches +system.cpu.cpi 2.144363 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 2.144363 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 19996198 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 51623.700624 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48550.526316 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 19995717 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 24831000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_avg_miss_latency 51569.473684 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48547.368421 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 19995723 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 24495500 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.000024 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 481 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 6 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 23061500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_misses 475 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 23060000 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.000024 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 475 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 6501103 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 56415.277031 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53415.277031 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 56295.857988 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53295.857988 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 6499244 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 104876000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 104654000 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.000286 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 1859 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 99299000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 99077000 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.000286 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 1859 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 11918.612686 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 11918.613585 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 26497301 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 55430.341880 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 52425.235647 # average overall mshr miss latency -system.cpu.dcache.demand_hits 26494961 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 129707000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_avg_miss_latency 55333.976007 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 52329.477292 # average overall mshr miss latency +system.cpu.dcache.demand_hits 26494967 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 129149500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.000088 # miss rate for demand accesses -system.cpu.dcache.demand_misses 2340 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 6 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 122360500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_misses 2334 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 122137000 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.000088 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 2334 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 26497301 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 55430.341880 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 52425.235647 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 55333.976007 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 52329.477292 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 26494961 # number of overall hits -system.cpu.dcache.overall_miss_latency 129707000 # number of overall miss cycles +system.cpu.dcache.overall_hits 26494967 # number of overall hits +system.cpu.dcache.overall_miss_latency 129149500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.000088 # miss rate for overall accesses -system.cpu.dcache.overall_misses 2340 # number of overall misses -system.cpu.dcache.overall_mshr_hits 6 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 122360500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_misses 2334 # number of overall misses +system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 122137000 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.000088 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 2334 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -90,8 +89,8 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 157 # number of replacements system.cpu.dcache.sampled_refs 2223 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 1441.819572 # Cycle average of tags in use -system.cpu.dcache.total_refs 26495076 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 1441.684134 # Cycle average of tags in use +system.cpu.dcache.total_refs 26495078 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 104 # number of writebacks system.cpu.dcache_port.instReqsProcessed 26537108 # Number of Instructions Requests that completed in this resource. @@ -111,70 +110,71 @@ system.cpu.dtb.write_accesses 6501126 # DT system.cpu.dtb.write_acv 0 # DTB write access violations system.cpu.dtb.write_hits 6501103 # DTB write hits system.cpu.dtb.write_misses 23 # DTB write misses -system.cpu.icache.ReadReq_accesses 97683877 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 27282.787360 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 24026.266636 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 97675238 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 235696000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_accesses 98713473 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 27258.057090 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 23994.339402 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 98704785 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 236818000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.000088 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 8639 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 73 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 205809000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.000088 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 8566 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_misses 8688 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 120 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 205583500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.000087 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 8568 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked -system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 11402.666122 # Average number of references to valid blocks. +system.cpu.icache.avg_blocked_cycles::no_targets 1000 # average number of cycles each access was blocked +system.cpu.icache.avg_refs 11520.166317 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked -system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked::no_targets 1 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked -system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles::no_targets 1000 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 97683877 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 27282.787360 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 24026.266636 # average overall mshr miss latency -system.cpu.icache.demand_hits 97675238 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 235696000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_accesses 98713473 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 27258.057090 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 23994.339402 # average overall mshr miss latency +system.cpu.icache.demand_hits 98704785 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 236818000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.000088 # miss rate for demand accesses -system.cpu.icache.demand_misses 8639 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 73 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 205809000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.000088 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 8566 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_misses 8688 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 120 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 205583500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.000087 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 8568 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 97683877 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 27282.787360 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 24026.266636 # average overall mshr miss latency +system.cpu.icache.overall_accesses 98713473 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 27258.057090 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 23994.339402 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 97675238 # number of overall hits -system.cpu.icache.overall_miss_latency 235696000 # number of overall miss cycles +system.cpu.icache.overall_hits 98704785 # number of overall hits +system.cpu.icache.overall_miss_latency 236818000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.000088 # miss rate for overall accesses -system.cpu.icache.overall_misses 8639 # number of overall misses -system.cpu.icache.overall_mshr_hits 73 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 205809000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.000088 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 8566 # number of overall MSHR misses +system.cpu.icache.overall_misses 8688 # number of overall misses +system.cpu.icache.overall_mshr_hits 120 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 205583500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.000087 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 8568 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses -system.cpu.icache.replacements 6732 # number of replacements -system.cpu.icache.sampled_refs 8566 # Sample count of references to valid blocks. +system.cpu.icache.replacements 6734 # number of replacements +system.cpu.icache.sampled_refs 8568 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 1428.614683 # Cycle average of tags in use -system.cpu.icache.total_refs 97675238 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 1428.229557 # Cycle average of tags in use +system.cpu.icache.total_refs 98704785 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 97683876 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.453761 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.453761 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 98713472 # Number of Instructions Requests that completed in this resource. +system.cpu.idleCycles 6417911 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.466339 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.466339 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits system.cpu.itb.data_misses 0 # DTB misses -system.cpu.itb.fetch_accesses 97683924 # ITB accesses +system.cpu.itb.fetch_accesses 98713520 # ITB accesses system.cpu.itb.fetch_acv 0 # ITB acv -system.cpu.itb.fetch_hits 97683877 # ITB hits +system.cpu.itb.fetch_hits 98713473 # ITB hits system.cpu.itb.fetch_misses 47 # ITB misses system.cpu.itb.read_accesses 0 # DTB read accesses system.cpu.itb.read_acv 0 # DTB read access violations @@ -185,84 +185,100 @@ system.cpu.itb.write_acv 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 1748 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52413.043478 # average ReadExReq miss latency -system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40003.432494 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 91618000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_avg_miss_latency 52296.624714 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40005.720824 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 91414500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 1748 # number of ReadExReq misses -system.cpu.l2cache.ReadExReq_mshr_miss_latency 69926000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_latency 69930000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 1748 # number of ReadExReq MSHR misses -system.cpu.l2cache.ReadReq_accesses 9041 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52240.613777 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40013.548808 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_hits 5978 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 160013000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.338790 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_accesses 9043 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 52161.443030 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40020.078355 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_hits 5980 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 159770500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.338715 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 3063 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 122561500 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.338790 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_miss_latency 122581500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.338715 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 3063 # number of ReadReq MSHR misses system.cpu.l2cache.UpgradeReq_accesses 111 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 52414.414414 # average UpgradeReq miss latency -system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 5818000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_avg_miss_latency 52216.216216 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40009.009009 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 5796000 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_misses 111 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 4440000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 4441000 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_mshr_misses 111 # number of UpgradeReq MSHR misses system.cpu.l2cache.Writeback_accesses 104 # number of Writeback accesses(hits+misses) system.cpu.l2cache.Writeback_hits 104 # number of Writeback hits system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 1.968317 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 1.968977 # Average number of references to valid blocks. system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 10789 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52303.263355 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40009.873207 # average overall mshr miss latency -system.cpu.l2cache.demand_hits 5978 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 251631000 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 0.445917 # miss rate for demand accesses +system.cpu.l2cache.demand_accesses 10791 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 52210.559135 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40014.861775 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 5980 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 251185000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.445834 # miss rate for demand accesses system.cpu.l2cache.demand_misses 4811 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 192487500 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 0.445917 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_miss_latency 192511500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.445834 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 4811 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 10789 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52303.263355 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40009.873207 # average overall mshr miss latency +system.cpu.l2cache.overall_accesses 10791 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 52210.559135 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40014.861775 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.l2cache.overall_hits 5978 # number of overall hits -system.cpu.l2cache.overall_miss_latency 251631000 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 0.445917 # miss rate for overall accesses +system.cpu.l2cache.overall_hits 5980 # number of overall hits +system.cpu.l2cache.overall_miss_latency 251185000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.445834 # miss rate for overall accesses system.cpu.l2cache.overall_misses 4811 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 192487500 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 0.445917 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_miss_latency 192511500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.445834 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 4811 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 3030 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 2039.371088 # Cycle average of tags in use -system.cpu.l2cache.total_refs 5964 # Total number of references to valid blocks. +system.cpu.l2cache.tagsinuse 2038.814805 # Cycle average of tags in use +system.cpu.l2cache.total_refs 5966 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 202536123 # number of cpu cycles simulated +system.cpu.numCycles 197073489 # number of cpu cycles simulated +system.cpu.runCycles 190655578 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) -system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) +system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 202536123 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-0.idleCycles 98359969 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 98713520 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 50.089700 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 104416341 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 92657148 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 47.016546 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 103581004 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 93492485 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 47.440417 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 170536358 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 26537131 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 13.465602 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 105170433 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 91903056 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 46.633901 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 197073489 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.workload.PROG:num_syscalls 389 # Number of system calls ---------- End Simulation Statistics ---------- From a3c635f777f92536cf82d79baf26ef8ff50a0e4f Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:31:20 -0500 Subject: [PATCH 35/36] inorder: vortex alpha regression --- .../ref/alpha/tru64/inorder-timing/config.ini | 4 +- .../ref/alpha/tru64/inorder-timing/simout | 8 +- .../ref/alpha/tru64/inorder-timing/stats.txt | 234 ++++++++++-------- 3 files changed, 130 insertions(+), 116 deletions(-) diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini index ca54a43c1..95f95a25b 100644 --- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini +++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini @@ -63,6 +63,7 @@ progress_interval=0 stageTracing=false stageWidth=1 system=system +threadModel=SMT tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side @@ -78,7 +79,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -113,7 +113,6 @@ hash_delay=1 latency=1000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=10000 @@ -148,7 +147,6 @@ hash_delay=1 latency=10000 max_miss_count=0 mshrs=10 -prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 prefetch_latency=100000 diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout index b0f68db24..2c2b59190 100755 --- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout +++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout @@ -5,10 +5,10 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jul 4 2009 20:43:52 -M5 revision 20167772fb15 6281 default tip -M5 started Jul 4 2009 20:43:52 -M5 executing on tater +M5 compiled Jan 30 2010 14:58:44 +M5 revision 4b602939e245 6707 default inorder_vortex_alpha qtip tip +M5 started Jan 30 2010 14:58:45 +M5 executing on zooks command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt index 2791e3ab6..f03c66752 100644 --- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt +++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt @@ -1,88 +1,87 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 66323 # Simulator instruction rate (inst/s) -host_mem_usage 296324 # Number of bytes of host memory used -host_seconds 1331.98 # Real time elapsed on the host -host_tick_rate 81990812 # Simulator tick rate (ticks/s) +host_inst_rate 51950 # Simulator instruction rate (inst/s) +host_mem_usage 166756 # Number of bytes of host memory used +host_seconds 1700.48 # Real time elapsed on the host +host_tick_rate 63220517 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 88340673 # Number of instructions simulated -sim_seconds 0.109210 # Number of seconds simulated -sim_ticks 109210014500 # Number of ticks simulated +sim_seconds 0.107505 # Number of seconds simulated +sim_ticks 107505320500 # Number of ticks simulated system.cpu.AGEN-Unit.instReqsProcessed 35224018 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.instReqsProcessed 88340674 # Number of Instructions Requests that completed in this resource. -system.cpu.Branch-Predictor.predictedNotTaken 10443271 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 3311206 # Number of Branches Predicted As Taken (True). -system.cpu.Decode-Unit.instReqsProcessed 88340674 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.instReqsProcessed 88523379 # Number of Instructions Requests that completed in this resource. +system.cpu.Branch-Predictor.predictedNotTaken 10466150 # Number of Branches Predicted As Not Taken (False). +system.cpu.Branch-Predictor.predictedTaken 3314731 # Number of Branches Predicted As Taken (True). +system.cpu.Decode-Unit.instReqsProcessed 88523379 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.cyclesExecuted 53070972 # Number of Cycles Execution Unit was used. system.cpu.Execution-Unit.instReqsProcessed 53075554 # Number of Instructions Requests that completed in this resource. -system.cpu.Execution-Unit.predictedNotTakenIncorrect 4515835 # Number of Branches Incorrectly Predicted As Not Taken). -system.cpu.Execution-Unit.predictedTakenIncorrect 1659774 # Number of Branches Incorrectly Predicted As Taken. -system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 184507615 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.predictedNotTakenIncorrect 4515839 # Number of Branches Incorrectly Predicted As Not Taken). +system.cpu.Execution-Unit.predictedTakenIncorrect 1659770 # Number of Branches Incorrectly Predicted As Taken. +system.cpu.Execution-Unit.utilization 0.246830 # Utilization of Execution Unit (cycles / totalCycles). +system.cpu.Fetch-Seq-Unit.instReqsProcessed 186350086 # Number of Instructions Requests that completed in this resource. system.cpu.Graduation-Unit.instReqsProcessed 88340673 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 82202 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 41101 # Number of Multiply Requests Processed. -system.cpu.RegFile-Manager.instReqsProcessed 158796488 # Number of Instructions Requests that completed in this resource. +system.cpu.RegFile-Manager.instReqsProcessed 165783241 # Number of Instructions Requests that completed in this resource. +system.cpu.activity 86.931340 # Percentage of cycles cpu is active system.cpu.committedInsts 88340673 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 88340673 # Number of Instructions Simulated (Total) -system.cpu.cpi 2.472474 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 2.472474 # CPI: Total CPI of All Threads +system.cpu.contextSwitches 1 # Number of context switches +system.cpu.cpi 2.433881 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 2.433881 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 20276638 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 38181.240129 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 35069.166968 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 20215854 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 2320808500 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.002998 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 60784 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 18 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 2131013000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_avg_miss_latency 38009.956226 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 34917.034197 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 20215872 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 2309713000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.002997 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 60766 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 2121768500 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.002997 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 60766 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 14613377 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 56049.825426 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53049.825426 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 56040.926479 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53040.926479 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 14463584 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 8395871500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 8394538500 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.010250 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 149793 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 7946492500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 7945159500 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.010250 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 149793 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 169.741509 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 169.741568 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 34890015 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 50891.977756 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 47860.720748 # average overall mshr miss latency -system.cpu.dcache.demand_hits 34679438 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 10716680000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_avg_miss_latency 50837.302134 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 47810.485422 # average overall mshr miss latency +system.cpu.dcache.demand_hits 34679456 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 10704251500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.006035 # miss rate for demand accesses -system.cpu.dcache.demand_misses 210577 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 18 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 10077505500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_misses 210559 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 10066928000 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.006035 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 210559 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 34890015 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 50891.977756 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 47860.720748 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 50837.302134 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 47810.485422 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 34679438 # number of overall hits -system.cpu.dcache.overall_miss_latency 10716680000 # number of overall miss cycles +system.cpu.dcache.overall_hits 34679456 # number of overall hits +system.cpu.dcache.overall_miss_latency 10704251500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.006035 # miss rate for overall accesses -system.cpu.dcache.overall_misses 210577 # number of overall misses -system.cpu.dcache.overall_mshr_hits 18 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 10077505500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_misses 210559 # number of overall misses +system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 10066928000 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.006035 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 210559 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -90,9 +89,9 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 200248 # number of replacements system.cpu.dcache.sampled_refs 204344 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 4077.182458 # Cycle average of tags in use -system.cpu.dcache.total_refs 34685659 # Total number of references to valid blocks. -system.cpu.dcache.warmup_cycle 848449000 # Cycle when the warmup percentage was hit. +system.cpu.dcache.tagsinuse 4076.864414 # Cycle average of tags in use +system.cpu.dcache.total_refs 34685671 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 848885000 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 147714 # number of writebacks system.cpu.dcache_port.instReqsProcessed 35224018 # Number of Instructions Requests that completed in this resource. system.cpu.dtb.data_accesses 34987415 # DTB accesses @@ -111,70 +110,71 @@ system.cpu.dtb.write_accesses 14620629 # DT system.cpu.dtb.write_acv 0 # DTB write access violations system.cpu.dtb.write_hits 14613377 # DTB write hits system.cpu.dtb.write_misses 7252 # DTB write misses -system.cpu.icache.ReadReq_accesses 96166938 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 19084.949617 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 15849.033723 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 96087744 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1511413500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_accesses 97826463 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 19024.038820 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 15840.795350 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 97745885 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1532919000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.000824 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 79194 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 1266 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 1235083500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.000810 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 80578 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 2650 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 1234441500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.000797 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 77928 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 1233.032338 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 1254.310197 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 96166938 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 19084.949617 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 15849.033723 # average overall mshr miss latency -system.cpu.icache.demand_hits 96087744 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1511413500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_accesses 97826463 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 19024.038820 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 15840.795350 # average overall mshr miss latency +system.cpu.icache.demand_hits 97745885 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1532919000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.000824 # miss rate for demand accesses -system.cpu.icache.demand_misses 79194 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 1266 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 1235083500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.000810 # mshr miss rate for demand accesses +system.cpu.icache.demand_misses 80578 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 2650 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 1234441500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.000797 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 77928 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 96166938 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 19084.949617 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 15849.033723 # average overall mshr miss latency +system.cpu.icache.overall_accesses 97826463 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 19024.038820 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 15840.795350 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 96087744 # number of overall hits -system.cpu.icache.overall_miss_latency 1511413500 # number of overall miss cycles +system.cpu.icache.overall_hits 97745885 # number of overall hits +system.cpu.icache.overall_miss_latency 1532919000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.000824 # miss rate for overall accesses -system.cpu.icache.overall_misses 79194 # number of overall misses -system.cpu.icache.overall_mshr_hits 1266 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 1235083500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.000810 # mshr miss rate for overall accesses +system.cpu.icache.overall_misses 80578 # number of overall misses +system.cpu.icache.overall_mshr_hits 2650 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 1234441500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.000797 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 77928 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 75882 # number of replacements system.cpu.icache.sampled_refs 77928 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 1874.320715 # Cycle average of tags in use -system.cpu.icache.total_refs 96087744 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 1873.747475 # Cycle average of tags in use +system.cpu.icache.total_refs 97745885 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 96166940 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.404453 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.404453 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 97826706 # Number of Instructions Requests that completed in this resource. +system.cpu.idleCycles 28099010 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.410867 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.410867 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits system.cpu.itb.data_misses 0 # DTB misses -system.cpu.itb.fetch_accesses 96170872 # ITB accesses +system.cpu.itb.fetch_accesses 97830397 # ITB accesses system.cpu.itb.fetch_acv 0 # ITB acv -system.cpu.itb.fetch_hits 96166938 # ITB hits +system.cpu.itb.fetch_hits 97826463 # ITB hits system.cpu.itb.fetch_misses 3934 # ITB misses system.cpu.itb.read_accesses 0 # DTB read accesses system.cpu.itb.read_acv 0 # DTB read access violations @@ -185,31 +185,31 @@ system.cpu.itb.write_acv 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 143578 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52038.849963 # average ReadExReq miss latency -system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.083578 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 7471634000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_avg_miss_latency 52034.768558 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.222875 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 7471048000 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 143578 # number of ReadExReq misses -system.cpu.l2cache.ReadExReq_mshr_miss_latency 5743132000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_latency 5743152000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 143578 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 138694 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52316.057051 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40003.485162 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52087.681159 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40004.623879 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 95224 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 2274179000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 2264251500 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.313424 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 43470 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 1738951500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 1739001000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.313424 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 43470 # number of ReadReq MSHR misses system.cpu.l2cache.UpgradeReq_accesses 6215 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 51993.805310 # average UpgradeReq miss latency -system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000.884956 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 323141500 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_avg_miss_latency 51862.831858 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40002.815768 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 322327500 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_misses 6215 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 248605500 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 248617500 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_mshr_misses 6215 # number of UpgradeReq MSHR misses system.cpu.l2cache.Writeback_accesses 147714 # number of Writeback accesses(hits+misses) @@ -223,29 +223,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 282272 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52103.272957 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40000.874107 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52047.065459 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40001.245670 # average overall mshr miss latency system.cpu.l2cache.demand_hits 95224 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 9745813000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 9735299500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.662652 # miss rate for demand accesses system.cpu.l2cache.demand_misses 187048 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 7482083500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 7482153000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.662652 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 187048 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 282272 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52103.272957 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40000.874107 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52047.065459 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40001.245670 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 95224 # number of overall hits -system.cpu.l2cache.overall_miss_latency 9745813000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 9735299500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.662652 # miss rate for overall accesses system.cpu.l2cache.overall_misses 187048 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 7482083500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 7482153000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.662652 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 187048 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -253,16 +253,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 147733 # number of replacements system.cpu.l2cache.sampled_refs 172939 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 18262.944082 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 18257.402494 # Cycle average of tags in use system.cpu.l2cache.total_refs 110306 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 120636 # number of writebacks -system.cpu.numCycles 218420030 # number of cpu cycles simulated +system.cpu.numCycles 215010642 # number of cpu cycles simulated +system.cpu.runCycles 186911632 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) -system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) +system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 218420030 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-0.idleCycles 117180245 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 97830397 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 45.500258 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 126487263 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 88523379 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 41.171627 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 125185318 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 89825324 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 41.777153 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 179779372 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 35231270 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 16.385826 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 126669969 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 88340673 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 41.086651 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 215010642 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.workload.PROG:num_syscalls 4583 # Number of system calls ---------- End Simulation Statistics ---------- From 04466ab4ca04a4e1e195a6f68423792b2553dadb Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 31 Jan 2010 18:31:28 -0500 Subject: [PATCH 36/36] inorder: update hello world mips --- .../ref/mips/linux/inorder-timing/config.ini | 1 + .../ref/mips/linux/inorder-timing/simout | 12 +- .../ref/mips/linux/inorder-timing/stats.txt | 209 ++++++++++-------- 3 files changed, 120 insertions(+), 102 deletions(-) diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini index 78a86bf82..8d2a24508 100644 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini @@ -117,6 +117,7 @@ progress_interval=0 stageTracing=false stageWidth=1 system=system +threadModel=SMT tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout index 581c531f6..ce217f494 100755 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jan 2 2010 07:01:31 -M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch -M5 started Jan 2 2010 07:03:09 -M5 executing on fajita -command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing +M5 compiled Jan 31 2010 17:08:14 +M5 revision 01508015f86b 6964 default qtip tip inorder_hello_mips +M5 started Jan 31 2010 17:08:15 +M5 executing on zooks +command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 29940500 because target called exit() +Exiting @ tick 29206500 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt index d55c721ca..df2d539f4 100644 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt @@ -1,96 +1,96 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 10400 # Simulator instruction rate (inst/s) -host_mem_usage 205896 # Number of bytes of host memory used -host_seconds 0.56 # Real time elapsed on the host -host_tick_rate 53415864 # Simulator tick rate (ticks/s) +host_inst_rate 19644 # Simulator instruction rate (inst/s) +host_mem_usage 155856 # Number of bytes of host memory used +host_seconds 0.30 # Real time elapsed on the host +host_tick_rate 98307932 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5827 # Number of instructions simulated -sim_seconds 0.000030 # Number of seconds simulated -sim_ticks 29940500 # Number of ticks simulated +sim_seconds 0.000029 # Number of seconds simulated +sim_ticks 29206500 # Number of ticks simulated system.cpu.AGEN-Unit.instReqsProcessed 2090 # Number of Instructions Requests that completed in this resource. system.cpu.Branch-Predictor.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. system.cpu.Branch-Predictor.predictedNotTaken 826 # Number of Branches Predicted As Not Taken (False). system.cpu.Branch-Predictor.predictedTaken 90 # Number of Branches Predicted As Taken (True). system.cpu.Decode-Unit.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.cyclesExecuted 3725 # Number of Cycles Execution Unit was used. system.cpu.Execution-Unit.instReqsProcessed 3734 # Number of Instructions Requests that completed in this resource. system.cpu.Execution-Unit.predictedNotTakenIncorrect 541 # Number of Branches Incorrectly Predicted As Not Taken). system.cpu.Execution-Unit.predictedTakenIncorrect 35 # Number of Branches Incorrectly Predicted As Taken. -system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource. -system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed. -system.cpu.Fetch-Seq-Unit.instReqsProcessed 11657 # Number of Instructions Requests that completed in this resource. +system.cpu.Execution-Unit.utilization 0.063769 # Utilization of Execution Unit (cycles / totalCycles). +system.cpu.Fetch-Seq-Unit.instReqsProcessed 11702 # Number of Instructions Requests that completed in this resource. system.cpu.Graduation-Unit.instReqsProcessed 5827 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.divInstReqsProcessed 1 # Number of Divide Requests Processed. system.cpu.Mult-Div-Unit.instReqsProcessed 8 # Number of Instructions Requests that completed in this resource. system.cpu.Mult-Div-Unit.multInstReqsProcessed 3 # Number of Multiply Requests Processed. system.cpu.RegFile-Manager.instReqsProcessed 10713 # Number of Instructions Requests that completed in this resource. +system.cpu.activity 20.277673 # Percentage of cycles cpu is active system.cpu.committedInsts 5827 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 5827 # Number of Instructions Simulated (Total) -system.cpu.cpi 10.276643 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 10.276643 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 1165 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 56201.149425 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53201.149425 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1078 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 4889500 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.074678 # miss rate for ReadReq accesses +system.cpu.contextSwitches 1 # Number of context switches +system.cpu.cpi 10.024713 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 10.024713 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 1164 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 56229.885057 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53229.885057 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 1077 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 4892000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.074742 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 87 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 4628500 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.074678 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_latency 4631000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.074742 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 87 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 925 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 56554.687500 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53554.687500 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 56265.625000 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53265.625000 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 861 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 3619500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 3601000 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.069189 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 64 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 3427500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 3409000 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.069189 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 64 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 14.144928 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 14.137681 # Average number of references to valid blocks. system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2090 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 56350.993377 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 53350.993377 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1939 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 8509000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.072249 # miss rate for demand accesses +system.cpu.dcache.demand_accesses 2089 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 56245.033113 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 53245.033113 # average overall mshr miss latency +system.cpu.dcache.demand_hits 1938 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 8493000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.072283 # miss rate for demand accesses system.cpu.dcache.demand_misses 151 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 8056000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.072249 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_latency 8040000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.072283 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 151 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2090 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 56350.993377 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 53350.993377 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 2089 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 56245.033113 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 53245.033113 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1939 # number of overall hits -system.cpu.dcache.overall_miss_latency 8509000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.072249 # miss rate for overall accesses +system.cpu.dcache.overall_hits 1938 # number of overall hits +system.cpu.dcache.overall_miss_latency 8493000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.072283 # miss rate for overall accesses system.cpu.dcache.overall_misses 151 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 8056000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.072249 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_latency 8040000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.072283 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 151 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 138 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 88.212490 # Cycle average of tags in use -system.cpu.dcache.total_refs 1952 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 88.491296 # Cycle average of tags in use +system.cpu.dcache.total_refs 1951 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.dcache_port.instReqsProcessed 2089 # Number of Instructions Requests that completed in this resource. @@ -103,62 +103,63 @@ system.cpu.dtb.read_misses 0 # DT system.cpu.dtb.write_accesses 0 # DTB write accesses system.cpu.dtb.write_hits 0 # DTB write hits system.cpu.dtb.write_misses 0 # DTB write misses -system.cpu.icache.ReadReq_accesses 5829 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 55765.676568 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 52765.676568 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 5526 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 16897000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.051981 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_accesses 5874 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 55801.980198 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 52801.980198 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 5571 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 16908000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.051583 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 303 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_miss_latency 15988000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.051981 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_miss_latency 15999000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.051583 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 303 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked -system.cpu.icache.avg_refs 18.237624 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 18.386139 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 5829 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 55765.676568 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 52765.676568 # average overall mshr miss latency -system.cpu.icache.demand_hits 5526 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 16897000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.051981 # miss rate for demand accesses +system.cpu.icache.demand_accesses 5874 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 55801.980198 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 52801.980198 # average overall mshr miss latency +system.cpu.icache.demand_hits 5571 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 16908000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.051583 # miss rate for demand accesses system.cpu.icache.demand_misses 303 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 15988000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.051981 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_latency 15999000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.051583 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 303 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 5829 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 55765.676568 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 52765.676568 # average overall mshr miss latency +system.cpu.icache.overall_accesses 5874 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 55801.980198 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 52801.980198 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 5526 # number of overall hits -system.cpu.icache.overall_miss_latency 16897000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.051981 # miss rate for overall accesses +system.cpu.icache.overall_hits 5571 # number of overall hits +system.cpu.icache.overall_miss_latency 16908000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.051583 # miss rate for overall accesses system.cpu.icache.overall_misses 303 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 15988000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.051981 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_latency 15999000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.051583 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 303 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.replacements 13 # number of replacements system.cpu.icache.sampled_refs 303 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 134.267603 # Cycle average of tags in use -system.cpu.icache.total_refs 5526 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 135.362853 # Cycle average of tags in use +system.cpu.icache.total_refs 5571 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.icache_port.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource. -system.cpu.ipc 0.097308 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.097308 # IPC: Total IPC of All Threads +system.cpu.icache_port.instReqsProcessed 5873 # Number of Instructions Requests that completed in this resource. +system.cpu.idleCycles 46569 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.099753 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.099753 # IPC: Total IPC of All Threads system.cpu.itb.accesses 0 # DTB accesses system.cpu.itb.hits 0 # DTB hits system.cpu.itb.misses 0 # DTB misses @@ -169,31 +170,31 @@ system.cpu.itb.write_accesses 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 51 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52500 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_miss_latency 52264.705882 # average ReadExReq miss latency system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40098.039216 # average ReadExReq mshr miss latency -system.cpu.l2cache.ReadExReq_miss_latency 2677500 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_latency 2665500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 51 # number of ReadExReq misses system.cpu.l2cache.ReadExReq_mshr_miss_latency 2045000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 51 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 390 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52052.835052 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40023.195876 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52091.494845 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40048.969072 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 20196500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 20211500 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.994872 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 388 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 15529000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 15539000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994872 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 388 # number of ReadReq MSHR misses system.cpu.l2cache.UpgradeReq_accesses 13 # number of UpgradeReq accesses(hits+misses) -system.cpu.l2cache.UpgradeReq_avg_miss_latency 52538.461538 # average UpgradeReq miss latency -system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40076.923077 # average UpgradeReq mshr miss latency -system.cpu.l2cache.UpgradeReq_miss_latency 683000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_avg_miss_latency 52192.307692 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40153.846154 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 678500 # number of UpgradeReq miss cycles system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_misses 13 # number of UpgradeReq misses -system.cpu.l2cache.UpgradeReq_mshr_miss_latency 521000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 522000 # number of UpgradeReq MSHR miss cycles system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses system.cpu.l2cache.UpgradeReq_mshr_misses 13 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked @@ -205,29 +206,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 441 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52104.783599 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40031.890661 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52111.617312 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40054.669704 # average overall mshr miss latency system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 22874000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 22877000 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.995465 # miss rate for demand accesses system.cpu.l2cache.demand_misses 439 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 17574000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 17584000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.995465 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 439 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 441 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52104.783599 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40031.890661 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52111.617312 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40054.669704 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 2 # number of overall hits -system.cpu.l2cache.overall_miss_latency 22874000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 22877000 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.995465 # miss rate for overall accesses system.cpu.l2cache.overall_misses 439 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 17574000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 17584000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.995465 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 439 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -235,16 +236,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 375 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 185.807591 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 187.032260 # Cycle average of tags in use system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 59882 # number of cpu cycles simulated +system.cpu.numCycles 58414 # number of cpu cycles simulated +system.cpu.runCycles 11845 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) -system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT) +system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.threadCycles 59882 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-0.idleCycles 52540 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 5874 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 10.055809 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 52586 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 5828 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 9.977060 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 52582 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 5832 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 9.983908 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 56324 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 2090 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 3.577909 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 52587 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 5827 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 9.975348 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 58414 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.workload.PROG:num_syscalls 8 # Number of system calls ---------- End Simulation Statistics ----------