diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh index 93b74ebb0..f374b8fb7 100644 --- a/cpu/o3/commit.hh +++ b/cpu/o3/commit.hh @@ -369,6 +369,8 @@ class DefaultCommit /** Rename map interface. */ RenameMap *renameMap[Impl::MaxThreads]; + void updateComInstStats(DynInstPtr &inst); + /** Stat for the total number of committed instructions. */ Stats::Scalar<> commitCommittedInsts; /** Stat for the total number of squashed instructions discarded by commit. @@ -383,15 +385,26 @@ class DefaultCommit */ Stats::Scalar<> commitNonSpecStalls; /** Stat for the total number of committed branches. */ - Stats::Scalar<> commitCommittedBranches; +// Stats::Scalar<> commitCommittedBranches; /** Stat for the total number of committed loads. */ - Stats::Scalar<> commitCommittedLoads; +// Stats::Scalar<> commitCommittedLoads; /** Stat for the total number of committed memory references. */ - Stats::Scalar<> commitCommittedMemRefs; +// Stats::Scalar<> commitCommittedMemRefs; /** Stat for the total number of branch mispredicts that caused a squash. */ Stats::Scalar<> branchMispredicts; /** Distribution of the number of committed instructions each cycle. */ Stats::Distribution<> numCommittedDist; + + // total number of instructions committed + Stats::Vector<> stat_com_inst; + Stats::Vector<> stat_com_swp; + Stats::Vector<> stat_com_refs; + Stats::Vector<> stat_com_loads; + Stats::Vector<> stat_com_membars; + Stats::Vector<> stat_com_branches; + + Stats::Scalar<> commit_eligible_samples; + Stats::Vector<> commit_eligible; }; #endif // __CPU_O3_COMMIT_HH__ diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index ef1ba9282..157e688c7 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -133,6 +133,7 @@ template void DefaultCommit::regStats() { + using namespace Stats; commitCommittedInsts .name(name() + ".commitCommittedInsts") .desc("The number of committed instructions") @@ -150,6 +151,7 @@ DefaultCommit::regStats() .desc("The number of times commit has been forced to stall to " "communicate backwards") .prereq(commitNonSpecStalls); +/* commitCommittedBranches .name(name() + ".commitCommittedBranches") .desc("The number of committed branches") @@ -162,6 +164,7 @@ DefaultCommit::regStats() .name(name() + ".commitCommittedMemRefs") .desc("The number of committed memory references") .prereq(commitCommittedMemRefs); +*/ branchMispredicts .name(name() + ".branchMispredicts") .desc("The number of times a branch was mispredicted") @@ -172,6 +175,73 @@ DefaultCommit::regStats() .desc("Number of insts commited each cycle") .flags(Stats::pdf) ; + + stat_com_inst + .init(cpu->number_of_threads) + .name(name() + ".COM:count") + .desc("Number of instructions committed") + .flags(total) + ; + + stat_com_swp + .init(cpu->number_of_threads) + .name(name() + ".COM:swp_count") + .desc("Number of s/w prefetches committed") + .flags(total) + ; + + stat_com_refs + .init(cpu->number_of_threads) + .name(name() + ".COM:refs") + .desc("Number of memory references committed") + .flags(total) + ; + + stat_com_loads + .init(cpu->number_of_threads) + .name(name() + ".COM:loads") + .desc("Number of loads committed") + .flags(total) + ; + + stat_com_membars + .init(cpu->number_of_threads) + .name(name() + ".COM:membars") + .desc("Number of memory barriers committed") + .flags(total) + ; + + stat_com_branches + .init(cpu->number_of_threads) + .name(name() + ".COM:branches") + .desc("Number of branches committed") + .flags(total) + ; + + // + // Commit-Eligible instructions... + // + // -> The number of instructions eligible to commit in those + // cycles where we reached our commit BW limit (less the number + // actually committed) + // + // -> The average value is computed over ALL CYCLES... not just + // the BW limited cycles + // + // -> The standard deviation is computed only over cycles where + // we reached the BW limit + // + commit_eligible + .init(cpu->number_of_threads) + .name(name() + ".COM:bw_limited") + .desc("number of insts not committed due to BW limits") + .flags(total) + ; + + commit_eligible_samples + .name(name() + ".COM:bw_lim_events") + .desc("number cycles where commit BW limit reached") + ; } template @@ -1060,9 +1130,7 @@ head_inst->isWriteBarrier())*/ return false; } - if (head_inst->isControl()) { - ++commitCommittedBranches; - } + updateComInstStats(head_inst); // Now that the instruction is going to be committed, finalize its // trace data. @@ -1186,6 +1254,47 @@ DefaultCommit::robDoneSquashing() return true; } +template +void +DefaultCommit::updateComInstStats(DynInstPtr &inst) +{ + unsigned thread = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) { + stat_com_swp[thread]++; + } else { + stat_com_inst[thread]++; + } +#else + stat_com_inst[thread]++; +#endif + + // + // Control Instructions + // + if (inst->isControl()) + stat_com_branches[thread]++; + + // + // Memory references + // + if (inst->isMemRef()) { + stat_com_refs[thread]++; + + if (inst->isLoad()) { + stat_com_loads[thread]++; + } + } + + if (inst->isMemBarrier()) { + stat_com_membars[thread]++; + } +} + //////////////////////////////////////// // // // SMT COMMIT POLICY MAITAINED HERE // diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh index f0f3f2745..f0b15cb86 100644 --- a/cpu/o3/fetch.hh +++ b/cpu/o3/fetch.hh @@ -370,6 +370,7 @@ class DefaultFetch Stats::Scalar<> icacheStallCycles; /** Stat for total number of fetched instructions. */ Stats::Scalar<> fetchedInsts; + Stats::Scalar<> fetchedBranches; /** Stat for total number of predicted branches. */ Stats::Scalar<> predictedBranches; /** Stat for total number of cycles spent fetching. */ @@ -383,6 +384,8 @@ class DefaultFetch Stats::Scalar<> fetchBlockedCycles; /** Stat for total number of fetched cache lines. */ Stats::Scalar<> fetchedCacheLines; + + Stats::Scalar<> fetchIcacheSquashes; /** Distribution of number of instructions fetched each cycle. */ Stats::Distribution<> fetchNisnDist; Stats::Formula idleRate; diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index 7abc5733f..563a767df 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -178,6 +178,11 @@ DefaultFetch::regStats() .desc("Number of instructions fetch has processed") .prereq(fetchedInsts); + fetchedBranches + .name(name() + ".fetchedBranches") + .desc("Number of branches that fetch encountered") + .prereq(fetchedBranches); + predictedBranches .name(name() + ".predictedBranches") .desc("Number of branches that fetch has predicted taken") @@ -209,6 +214,11 @@ DefaultFetch::regStats() .desc("Number of cache lines fetched") .prereq(fetchedCacheLines); + fetchIcacheSquashes + .name(name() + ".fetchIcacheSquashes") + .desc("Number of outstanding Icache misses that were squashed") + .prereq(fetchIcacheSquashes); + fetchNisnDist .init(/* base value */ 0, /* last value */ fetchWidth, @@ -322,8 +332,10 @@ DefaultFetch::processCacheCompletion(MemReqPtr &req) // Can keep track of how many cache accesses go unused due to // misspeculation here. if (fetchStatus[tid] != IcacheMissStall || - req != memReq[tid]) + req != memReq[tid]) { + ++fetchIcacheSquashes; return; + } // Wake up the CPU (if it went to sleep and was waiting on this completion // event). @@ -400,6 +412,8 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber); + ++fetchedBranches; + if (predict_taken) { ++predictedBranches; } @@ -457,6 +471,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // If translation was successful, attempt to read the first // instruction. if (fault == NoFault) { +#if FULL_SYSTEM if (cpu->system->memctrl->badaddr(memReq[tid]->paddr)) { DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " "misspeculating path!", @@ -464,6 +479,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid ret_fault = TheISA::genMachineCheckFault(); return false; } +#endif DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); fault = cpu->mem->read(memReq[tid], cacheData[tid]); @@ -480,6 +496,8 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid MemAccessResult result = icacheInterface->access(memReq[tid]); + fetchedCacheLines++; + // If the cache missed, then schedule an event to wake // up this stage once the cache miss completes. // @todo: Possibly allow for longer than 1 cycle cache hits. @@ -499,8 +517,6 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid "read.\n", tid); // memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size); - - fetchedCacheLines++; } } else { DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); @@ -889,10 +905,14 @@ DefaultFetch::fetch(bool &status_change) if (!fetch_success) return; } else { - if (fetchStatus[tid] == Blocked) { + if (fetchStatus[tid] == Idle) { + ++fetchIdleCycles; + } else if (fetchStatus[tid] == Blocked) { ++fetchBlockedCycles; } else if (fetchStatus[tid] == Squashing) { ++fetchSquashCycles; + } else if (fetchStatus[tid] == IcacheMissStall) { + ++icacheStallCycles; } // Status is Idle, Squashing, Blocked, or IcacheMissStall, so @@ -904,6 +924,7 @@ DefaultFetch::fetch(bool &status_change) // If we had a stall due to an icache miss, then return. if (fetchStatus[tid] == IcacheMissStall) { + ++icacheStallCycles; status_change = true; return; } diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh index e55837812..58cd68b21 100644 --- a/cpu/o3/iew.hh +++ b/cpu/o3/iew.hh @@ -278,6 +278,8 @@ class DefaultIEW void tick(); private: + void updateExeInstStats(DynInstPtr &inst); + /** Pointer to main time buffer used for backwards communication. */ TimeBuffer *timeBuffer; @@ -443,9 +445,9 @@ class DefaultIEW /** Stat for total number of executed instructions. */ Stats::Scalar<> iewExecutedInsts; /** Stat for total number of executed load instructions. */ - Stats::Scalar<> iewExecLoadInsts; + Stats::Vector<> iewExecLoadInsts; /** Stat for total number of executed store instructions. */ - Stats::Scalar<> iewExecStoreInsts; +// Stats::Scalar<> iewExecStoreInsts; /** Stat for total number of squashed instructions skipped at execute. */ Stats::Scalar<> iewExecSquashedInsts; /** Stat for total number of memory ordering violation events. */ @@ -456,6 +458,33 @@ class DefaultIEW Stats::Scalar<> predictedNotTakenIncorrect; /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + + Stats::Vector<> exe_swp; + Stats::Vector<> exe_nop; + Stats::Vector<> exe_refs; + Stats::Vector<> exe_branches; + +// Stats::Vector<> issued_ops; +/* + Stats::Vector<> stat_fu_busy; + Stats::Vector2d<> stat_fuBusy; + Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; +*/ + Stats::Formula issue_rate; + Stats::Formula iewExecStoreInsts; +// Stats::Formula issue_op_rate; +// Stats::Formula fu_busy_rate; + + Stats::Vector<> iewInstsToCommit; + Stats::Vector<> writeback_count; + Stats::Vector<> producer_inst; + Stats::Vector<> consumer_inst; + Stats::Vector<> wb_penalized; + + Stats::Formula wb_rate; + Stats::Formula wb_fanout; + Stats::Formula wb_penalized_rate; }; #endif // __CPU_O3_IEW_HH__ diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index 21eb7dcf8..2ae2e1361 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -140,6 +140,8 @@ template void DefaultIEW::regStats() { + using namespace Stats; + instQueue.regStats(); //ldstQueue.regStats(); @@ -195,13 +197,15 @@ DefaultIEW::regStats() .desc("Number of executed instructions"); iewExecLoadInsts + .init(cpu->number_of_threads) .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed"); - + .desc("Number of load instructions executed") + .flags(total); +/* iewExecStoreInsts .name(name() + ".iewExecStoreInsts") .desc("Number of store instructions executed"); - +*/ iewExecSquashedInsts .name(name() + ".iewExecSquashedInsts") .desc("Number of squashed instructions skipped in execute"); @@ -223,6 +227,116 @@ DefaultIEW::regStats() .desc("Number of branch mispredicts detected at execute"); branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; + + exe_swp + .init(cpu->number_of_threads) + .name(name() + ".EXEC:swp") + .desc("number of swp insts executed") + .flags(total) + ; + + exe_nop + .init(cpu->number_of_threads) + .name(name() + ".EXEC:nop") + .desc("number of nop insts executed") + .flags(total) + ; + + exe_refs + .init(cpu->number_of_threads) + .name(name() + ".EXEC:refs") + .desc("number of memory reference insts executed") + .flags(total) + ; + + exe_branches + .init(cpu->number_of_threads) + .name(name() + ".EXEC:branches") + .desc("Number of branches executed") + .flags(total) + ; + + issue_rate + .name(name() + ".EXEC:rate") + .desc("Inst execution rate") + .flags(total) + ; + issue_rate = iewExecutedInsts / cpu->numCycles; + + iewExecStoreInsts + .name(name() + ".EXEC:stores") + .desc("Number of stores executed") + .flags(total) + ; + iewExecStoreInsts = exe_refs - iewExecLoadInsts; +/* + for (int i=0; inumber_of_threads) + .name(name() + ".WB:sent") + .desc("cumulative count of insts sent to commit") + .flags(total) + ; + + writeback_count + .init(cpu->number_of_threads) + .name(name() + ".WB:count") + .desc("cumulative count of insts written-back") + .flags(total) + ; + + producer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:producers") + .desc("num instructions producing a value") + .flags(total) + ; + + consumer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:consumers") + .desc("num instructions consuming a value") + .flags(total) + ; + + wb_penalized + .init(cpu->number_of_threads) + .name(name() + ".WB:penalized") + .desc("number of instrctions required to write to 'other' IQ") + .flags(total) + ; + + wb_penalized_rate + .name(name() + ".WB:penalized_rate") + .desc ("fraction of instructions written-back that wrote to 'other' IQ") + .flags(total) + ; + + wb_penalized_rate = wb_penalized / writeback_count; + + wb_fanout + .name(name() + ".WB:fanout") + .desc("average fanout of values written-back") + .flags(total) + ; + + wb_fanout = producer_inst / consumer_inst; + + wb_rate + .name(name() + ".WB:rate") + .desc("insts written-back per cycle") + .flags(total) + ; + wb_rate = writeback_count / cpu->numCycles; } template @@ -990,6 +1104,8 @@ DefaultIEW::dispatchInsts(unsigned tid) instQueue.advanceTail(inst); + exe_nop[tid]++; + add_to_iq = false; } else if (inst->isExecuted()) { assert(0 && "Instruction shouldn't be executed.\n"); @@ -1124,11 +1240,11 @@ DefaultIEW::executeInsts() // event adds the instruction to the queue to commit fault = ldstQueue.executeLoad(inst); - ++iewExecLoadInsts; +// ++iewExecLoadInsts; } else if (inst->isStore()) { ldstQueue.executeStore(inst); - ++iewExecStoreInsts; +// ++iewExecStoreInsts; // If the store had a fault then it may not have a mem req if (inst->req && !(inst->req->flags & LOCKED)) { @@ -1146,13 +1262,13 @@ DefaultIEW::executeInsts() } else { inst->execute(); - ++iewExecutedInsts; - inst->setExecuted(); instToCommit(inst); } + updateExeInstStats(inst); + // Check if branch was correct. This check happens after the // instruction is added to the queue because even if the branch // is mispredicted, the branch instruction itself is still valid. @@ -1243,17 +1359,20 @@ DefaultIEW::writebackInsts() for (int inst_num = 0; inst_num < issueWidth && toCommit->insts[inst_num]; inst_num++) { DynInstPtr inst = toCommit->insts[inst_num]; + int tid = inst->threadNumber; DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n", inst->readPC()); + iewInstsToCommit[tid]++; + // Some instructions will be sent to commit without having // executed because they need commit to handle them. // E.g. Uncached loads have not actually executed when they // are first sent to commit. Instead commit must tell the LSQ // when it's ready to execute the uncached load. if (!inst->isSquashed() && inst->isExecuted()) { - instQueue.wakeDependents(inst); + int dependents = instQueue.wakeDependents(inst); for (int i = 0; i < inst->numDestRegs(); i++) { //mark as Ready @@ -1261,6 +1380,10 @@ DefaultIEW::writebackInsts() inst->renamedDestRegIdx(i)); scoreboard->setReg(inst->renamedDestRegIdx(i)); } + + producer_inst[tid]++; + consumer_inst[tid]+= dependents; + writeback_count[tid]++; } } } @@ -1390,3 +1513,39 @@ DefaultIEW::tick() cpu->activityThisCycle(); } } + +template +void +DefaultIEW::updateExeInstStats(DynInstPtr &inst) +{ + int thread_number = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) + exe_swp[thread_number]++; + else + iewExecutedInsts++; +#else + iewExecutedInsts[thread_number]++; +#endif + + // + // Control operations + // + if (inst->isControl()) + exe_branches[thread_number]++; + + // + // Memory operations + // + if (inst->isMemRef()) { + exe_refs[thread_number]++; + + if (inst->isLoad()) { + iewExecLoadInsts[thread_number]++; + } + } +} diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 283bbdc22..06d9937f2 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -185,7 +185,7 @@ class InstructionQueue void commit(const InstSeqNum &inst, unsigned tid = 0); /** Wakes all dependents of a completed instruction. */ - void wakeDependents(DynInstPtr &completed_inst); + int wakeDependents(DynInstPtr &completed_inst); /** Adds a ready memory instruction to the ready list. */ void addReadyMemInst(DynInstPtr &ready_inst); @@ -479,6 +479,7 @@ class InstructionQueue /** Stat for number of non-speculative instructions added. */ Stats::Scalar<> iqNonSpecInstsAdded; // Stats::Scalar<> iqIntInstsAdded; + Stats::Scalar<> iqInstsIssued; /** Stat for number of integer instructions issued. */ Stats::Scalar<> iqIntInstsIssued; // Stats::Scalar<> iqFloatInstsAdded; @@ -505,6 +506,20 @@ class InstructionQueue */ Stats::Scalar<> iqSquashedNonSpecRemoved; + Stats::VectorDistribution<> queue_res_dist; + Stats::Vector<> n_issued_dist; + Stats::VectorDistribution<> issue_delay_dist; + + Stats::Vector<> stat_fu_busy; +// Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; + + Stats::Formula issue_rate; +// Stats::Formula issue_stores; +// Stats::Formula issue_op_rate; + Stats::Vector<> fu_busy; //cumulative fu busy + + Stats::Formula fu_busy_rate; }; #endif //__CPU_O3_INST_QUEUE_HH__ diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index cfdd25cd5..804bc2472 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -224,6 +224,7 @@ template void InstructionQueue::regStats() { + using namespace Stats; iqInstsAdded .name(name() + ".iqInstsAdded") .desc("Number of instructions added to the IQ (excludes non-spec)") @@ -236,6 +237,11 @@ InstructionQueue::regStats() // iqIntInstsAdded; + iqInstsIssued + .name(name() + ".iqInstsIssued") + .desc("Number of instructions issued") + .prereq(iqInstsIssued); + iqIntInstsIssued .name(name() + ".iqIntInstsIssued") .desc("Number of integer instructions issued") @@ -291,6 +297,103 @@ InstructionQueue::regStats() .desc("Number of squashed non-spec instructions that were removed") .prereq(iqSquashedNonSpecRemoved); + queue_res_dist + .init(Num_OpClasses, 0, 99, 2) + .name(name() + ".IQ:residence:") + .desc("cycles from dispatch to issue") + .flags(total | pdf | cdf ) + ; + for (int i = 0; i < Num_OpClasses; ++i) { + queue_res_dist.subname(i, opClassStrings[i]); + } + n_issued_dist + .init(totalWidth + 1) + .name(name() + ".ISSUE:issued_per_cycle") + .desc("Number of insts issued each cycle") + .flags(total | pdf | dist) + ; +/* + dist_unissued + .init(Num_OpClasses+2) + .name(name() + ".ISSUE:unissued_cause") + .desc("Reason ready instruction not issued") + .flags(pdf | dist) + ; + for (int i=0; i < (Num_OpClasses + 2); ++i) { + dist_unissued.subname(i, unissued_names[i]); + } +*/ + stat_issued_inst_type + .init(numThreads,Num_OpClasses) + .name(name() + ".ISSUE:FU_type") + .desc("Type of FU issued") + .flags(total | pdf | dist) + ; + stat_issued_inst_type.ysubnames(opClassStrings); + + // + // How long did instructions for a particular FU type wait prior to issue + // + + issue_delay_dist + .init(Num_OpClasses,0,99,2) + .name(name() + ".ISSUE:") + .desc("cycles from operands ready to issue") + .flags(pdf | cdf) + ; + + for (int i=0; inumCycles; +/* + issue_stores + .name(name() + ".ISSUE:stores") + .desc("Number of stores issued") + .flags(total) + ; + issue_stores = exe_refs - exe_loads; +*/ +/* + issue_op_rate + .name(name() + ".ISSUE:op_rate") + .desc("Operation issue rate") + .flags(total) + ; + issue_op_rate = issued_ops / numCycles; +*/ + stat_fu_busy + .init(Num_OpClasses) + .name(name() + ".ISSUE:fu_full") + .desc("attempts to use FU when none available") + .flags(pdf | dist) + ; + for (int i=0; i < Num_OpClasses; ++i) { + stat_fu_busy.subname(i, opClassStrings[i]); + } + + fu_busy + .init(numThreads) + .name(name() + ".ISSUE:fu_busy_cnt") + .desc("FU busy when requested") + .flags(total) + ; + + fu_busy_rate + .name(name() + ".ISSUE:fu_busy_rate") + .desc("FU busy rate (busy events/executed inst)") + .flags(total) + ; + fu_busy_rate = fu_busy / iqInstsIssued; + for ( int i=0; i < numThreads; i++) { // Tell mem dependence unit to reg stats as well. memDepUnit[i].regStats(); @@ -658,6 +761,8 @@ InstructionQueue::scheduleReadyInsts() int idx = fuPool->getUnit(op_class); + int tid = issuing_inst->threadNumber; + if (idx == -2) { assert(op_class == No_OpClass); @@ -666,7 +771,7 @@ InstructionQueue::scheduleReadyInsts() DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU" " %#x [sn:%lli]\n", - issuing_inst->threadNumber, issuing_inst->readPC(), + tid, issuing_inst->readPC(), issuing_inst->seqNum); readyInsts[op_class].pop(); @@ -685,14 +790,15 @@ InstructionQueue::scheduleReadyInsts() // Memory instructions can not be freed from the IQ until they // complete. ++freeEntries; - count[issuing_inst->threadNumber]--; + count[tid]--; issuing_inst->removeInIQ(); } else { - memDepUnit[issuing_inst->threadNumber].issue(issuing_inst); + memDepUnit[tid].issue(issuing_inst); } listOrder.erase(order_it++); + stat_issued_inst_type[tid][op_class]++; } else if (idx != -1) { int op_latency = fuPool->getOpLatency(op_class); @@ -722,7 +828,7 @@ InstructionQueue::scheduleReadyInsts() DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x " "[sn:%lli]\n", - issuing_inst->threadNumber, issuing_inst->readPC(), + tid, issuing_inst->readPC(), issuing_inst->seqNum); readyInsts[op_class].pop(); @@ -741,14 +847,17 @@ InstructionQueue::scheduleReadyInsts() // Memory instructions can not be freed from the IQ until they // complete. ++freeEntries; - count[issuing_inst->threadNumber]--; + count[tid]--; issuing_inst->removeInIQ(); } else { - memDepUnit[issuing_inst->threadNumber].issue(issuing_inst); + memDepUnit[tid].issue(issuing_inst); } listOrder.erase(order_it++); + stat_issued_inst_type[tid][op_class]++; } else { + stat_fu_busy[op_class]++; + fu_busy[tid]++; ++order_it; } } @@ -808,9 +917,11 @@ InstructionQueue::commit(const InstSeqNum &inst, unsigned tid) } template -void +int InstructionQueue::wakeDependents(DynInstPtr &completed_inst) { + int dependents = 0; + DPRINTF(IQ, "Waking dependents of completed instruction.\n"); assert(!completed_inst->isSquashed()); @@ -875,6 +986,8 @@ InstructionQueue::wakeDependents(DynInstPtr &completed_inst) curr = prev->next; prev->inst = NULL; + ++dependents; + delete prev; } @@ -886,6 +999,7 @@ InstructionQueue::wakeDependents(DynInstPtr &completed_inst) // Mark the scoreboard as having that register ready. regScoreboard[dest_reg] = true; } + return dependents; } template diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh index d5beccde9..c6f8f97aa 100644 --- a/cpu/o3/rename.hh +++ b/cpu/o3/rename.hh @@ -90,7 +90,7 @@ class DefaultRename Squashing, Blocked, Unblocking, - BarrierStall + SerializeStall }; private: @@ -359,8 +359,8 @@ class DefaultRename /** Tracks which stages are telling decode to stall. */ Stalls stalls[Impl::MaxThreads]; - /** The barrier instruction that rename has stalled on. */ - DynInstPtr barrierInst[Impl::MaxThreads]; + /** The serialize instruction that rename has stalled on. */ + DynInstPtr serializeInst[Impl::MaxThreads]; /** Records if rename needs to serialize on the next instruction for any * thread. @@ -419,8 +419,8 @@ class DefaultRename Stats::Scalar<> renameIdleCycles; /** Stat for total number of cycles spent blocking. */ Stats::Scalar<> renameBlockCycles; - /** Stat for total number of cycles spent stalling for a barrier. */ - Stats::Scalar<> renameBarrierCycles; + /** Stat for total number of cycles spent stalling for a serializing inst. */ + Stats::Scalar<> renameSerializeStallCycles; /** Stat for total number of cycles spent running normally. */ Stats::Scalar<> renameRunCycles; /** Stat for total number of cycles spent unblocking. */ @@ -446,6 +446,8 @@ class DefaultRename Stats::Scalar<> renameCommittedMaps; /** Stat for total number of mappings that were undone due to a squash. */ Stats::Scalar<> renameUndoneMaps; + Stats::Scalar<> renamedSerializing; + Stats::Scalar<> renamedTempSerializing; }; #endif // __CPU_O3_RENAME_HH__ diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index 441118ef1..e29211921 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -53,7 +53,7 @@ DefaultRename::DefaultRename(Params *params) stalls[i].iew = false; stalls[i].commit = false; - barrierInst[i] = NULL; + serializeInst[i] = NULL; instsInProgress[i] = 0; @@ -78,69 +78,79 @@ void DefaultRename::regStats() { renameSquashCycles - .name(name() + ".renameSquashCycles") + .name(name() + ".RENAME:SquashCycles") .desc("Number of cycles rename is squashing") .prereq(renameSquashCycles); renameIdleCycles - .name(name() + ".renameIdleCycles") + .name(name() + ".RENAME:IdleCycles") .desc("Number of cycles rename is idle") .prereq(renameIdleCycles); renameBlockCycles - .name(name() + ".renameBlockCycles") + .name(name() + ".RENAME:BlockCycles") .desc("Number of cycles rename is blocking") .prereq(renameBlockCycles); - renameBarrierCycles - .name(name() + ".renameBarrierCycles") - .desc("Number of cycles rename is blocking due to a barrier stall") - .prereq(renameBarrierCycles); + renameSerializeStallCycles + .name(name() + ".RENAME:serializeStallCycles") + .desc("count of cycles rename stalled for serializing inst") + .flags(Stats::total); renameRunCycles - .name(name() + ".renameRunCycles") + .name(name() + ".RENAME:RunCycles") .desc("Number of cycles rename is running") .prereq(renameIdleCycles); renameUnblockCycles - .name(name() + ".renameUnblockCycles") + .name(name() + ".RENAME:UnblockCycles") .desc("Number of cycles rename is unblocking") .prereq(renameUnblockCycles); renameRenamedInsts - .name(name() + ".renameRenamedInsts") + .name(name() + ".RENAME:RenamedInsts") .desc("Number of instructions processed by rename") .prereq(renameRenamedInsts); renameSquashedInsts - .name(name() + ".renameSquashedInsts") + .name(name() + ".RENAME:SquashedInsts") .desc("Number of squashed instructions processed by rename") .prereq(renameSquashedInsts); renameROBFullEvents - .name(name() + ".renameROBFullEvents") + .name(name() + ".RENAME:ROBFullEvents") .desc("Number of times rename has blocked due to ROB full") .prereq(renameROBFullEvents); renameIQFullEvents - .name(name() + ".renameIQFullEvents") + .name(name() + ".RENAME:IQFullEvents") .desc("Number of times rename has blocked due to IQ full") .prereq(renameIQFullEvents); renameLSQFullEvents - .name(name() + ".renameLSQFullEvents") + .name(name() + ".RENAME:LSQFullEvents") .desc("Number of times rename has blocked due to LSQ full") .prereq(renameLSQFullEvents); renameFullRegistersEvents - .name(name() + ".renameFullRegisterEvents") + .name(name() + ".RENAME:FullRegisterEvents") .desc("Number of times there has been no free registers") .prereq(renameFullRegistersEvents); renameRenamedOperands - .name(name() + ".renameRenamedOperands") + .name(name() + ".RENAME:RenamedOperands") .desc("Number of destination operands rename has renamed") .prereq(renameRenamedOperands); renameRenameLookups - .name(name() + ".renameRenameLookups") + .name(name() + ".RENAME:RenameLookups") .desc("Number of register rename lookups that rename has made") .prereq(renameRenameLookups); renameCommittedMaps - .name(name() + ".renameCommittedMaps") + .name(name() + ".RENAME:CommittedMaps") .desc("Number of HB maps that are committed") .prereq(renameCommittedMaps); renameUndoneMaps - .name(name() + ".renameUndoneMaps") + .name(name() + ".RENAME:UndoneMaps") .desc("Number of HB maps that are undone due to squashing") .prereq(renameUndoneMaps); + renamedSerializing + .name(name() + ".RENAME:serializingInsts") + .desc("count of serializing insts renamed") + .flags(Stats::total) + ; + renamedTempSerializing + .name(name() + ".RENAME:tempSerializingInsts") + .desc("count of temporary serializing insts renamed") + .flags(Stats::total) + ; } template @@ -254,7 +264,7 @@ DefaultRename::squash(unsigned tid) // cycle and there should be space to hold everything due to the squash. if (renameStatus[tid] == Blocked || renameStatus[tid] == Unblocking || - renameStatus[tid] == BarrierStall) { + renameStatus[tid] == SerializeStall) { #if !FULL_SYSTEM // In syscall emulation, we can have both a block and a squash due // to a syscall in the same cycle. This would cause both signals to @@ -267,7 +277,7 @@ DefaultRename::squash(unsigned tid) #else toDecode->renameUnblock[tid] = 1; #endif - barrierInst[tid] = NULL; + serializeInst[tid] = NULL; } // Set the status to Squashing. @@ -370,8 +380,8 @@ DefaultRename::rename(bool &status_change, unsigned tid) ++renameBlockCycles; } else if (renameStatus[tid] == Squashing) { ++renameSquashCycles; - } else if (renameStatus[tid] == BarrierStall) { - ++renameBarrierCycles; + } else if (renameStatus[tid] == SerializeStall) { + ++renameSerializeStallCycles; } if (renameStatus[tid] == Running || @@ -535,14 +545,18 @@ DefaultRename::renameInsts(unsigned tid) if (inst->isSerializeBefore() && !inst->isSerializeHandled()) { DPRINTF(Rename, "Serialize before instruction encountered.\n"); - if (!inst->isTempSerializeBefore()) + if (!inst->isTempSerializeBefore()) { + renamedSerializing++; inst->setSerializeHandled(); + } else { + renamedTempSerializing++; + } - // Change status over to BarrierStall so that other stages know + // Change status over to SerializeStall so that other stages know // what this is blocked on. - renameStatus[tid] = BarrierStall; + renameStatus[tid] = SerializeStall; - barrierInst[tid] = inst; + serializeInst[tid] = inst; blockThisCycle = true; @@ -716,9 +730,9 @@ DefaultRename::block(unsigned tid) wroteToTimeBuffer = true; } - // Rename can not go from BarrierStall to Blocked, otherwise it would - // not know to complete the barrier stall. - if (renameStatus[tid] != BarrierStall) { + // Rename can not go from SerializeStall to Blocked, otherwise it would + // not know to complete the serialize stall. + if (renameStatus[tid] != SerializeStall) { // Set status to Blocked. renameStatus[tid] = Blocked; return true; @@ -735,7 +749,7 @@ DefaultRename::unblock(unsigned tid) DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid); // Rename is done unblocking if the skid buffer is empty. - if (skidBuffer[tid].empty() && renameStatus[tid] != BarrierStall) { + if (skidBuffer[tid].empty() && renameStatus[tid] != SerializeStall) { DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid); @@ -1008,9 +1022,9 @@ DefaultRename::checkStall(unsigned tid) } else if (renameMap[tid]->numFreeEntries() <= 0) { DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid); ret_val = true; - } else if (renameStatus[tid] == BarrierStall && + } else if (renameStatus[tid] == SerializeStall && (!emptyROB[tid] || instsInProgress[tid])) { - DPRINTF(Rename,"[tid:%i]: Stall: Barrier stall and ROB is not " + DPRINTF(Rename,"[tid:%i]: Stall: Serialize stall and ROB is not " "empty.\n", tid); ret_val = true; @@ -1064,7 +1078,7 @@ DefaultRename::checkSignalsAndUpdate(unsigned tid) // if so then go to unblocking // If status was Squashing // check if squashing is not high. Switch to running this cycle. - // If status was barrier stall + // If status was serialize stall // check if ROB is empty and no insts are in flight to the ROB readFreeEntries(tid); @@ -1113,12 +1127,12 @@ DefaultRename::checkSignalsAndUpdate(unsigned tid) return false; } - if (renameStatus[tid] == BarrierStall) { + if (renameStatus[tid] == SerializeStall) { // Stall ends once the ROB is free. - DPRINTF(Rename, "[tid:%u]: Done with barrier stall, switching to " + DPRINTF(Rename, "[tid:%u]: Done with serialize stall, switching to " "unblocking.\n", tid); - DynInstPtr barr_inst = barrierInst[tid]; + DynInstPtr serial_inst = serializeInst[tid]; renameStatus[tid] = Unblocking; @@ -1126,21 +1140,21 @@ DefaultRename::checkSignalsAndUpdate(unsigned tid) DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with " "PC %#x.\n", - tid, barr_inst->seqNum, barr_inst->readPC()); + tid, serial_inst->seqNum, serial_inst->readPC()); // Put instruction into queue here. - barr_inst->clearSerializeBefore(); + serial_inst->clearSerializeBefore(); if (!skidBuffer[tid].empty()) { - skidBuffer[tid].push_front(barr_inst); + skidBuffer[tid].push_front(serial_inst); } else { - insts[tid].push_front(barr_inst); + insts[tid].push_front(serial_inst); } DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename." " Adding to front of list.", tid); - barrierInst[tid] = NULL; + serializeInst[tid] = NULL; return true; }