#include "encumbered/cpu/full/op_class.hh" #include "cpu/ozone/back_end.hh" template BackEnd::InstQueue::InstQueue(Params *params) : size(params->numIQEntries), numInsts(0), width(params->issueWidth) { } template std::string BackEnd::InstQueue::name() const { return be->name() + ".iq"; } template void BackEnd::InstQueue::regStats() { using namespace Stats; occ_dist .init(1, 0, size, 2) .name(name() + "occ_dist") .desc("IQ Occupancy per cycle") .flags(total | cdf) ; inst_count .init(1) .name(name() + "cum_num_insts") .desc("Total occupancy") .flags(total) ; peak_inst_count .init(1) .name(name() + "peak_occupancy") .desc("Peak IQ occupancy") .flags(total) ; current_count .name(name() + "current_count") .desc("Occupancy this cycle") ; empty_count .name(name() + "empty_count") .desc("Number of empty cycles") ; fullCount .name(name() + "full_count") .desc("Number of full cycles") ; occ_rate .name(name() + "occ_rate") .desc("Average occupancy") .flags(total) ; occ_rate = inst_count / be->cpu->numCycles; avg_residency .name(name() + "avg_residency") .desc("Average IQ residency") .flags(total) ; avg_residency = occ_rate / be->cpu->numCycles; empty_rate .name(name() + "empty_rate") .desc("Fraction of cycles empty") ; empty_rate = 100 * empty_count / be->cpu->numCycles; full_rate .name(name() + "full_rate") .desc("Fraction of cycles full") ; full_rate = 100 * fullCount / be->cpu->numCycles; } template void BackEnd::InstQueue::setIssueExecQueue(TimeBuffer *i2e_queue) { i2e = i2e_queue; numIssued = i2e->getWire(0); } template void BackEnd::InstQueue::insert(DynInstPtr &inst) { numInsts++; inst_count[0]++; if (!inst->isNonSpeculative()) { if (inst->readyToIssue()) { toBeScheduled.push_front(inst); inst->iqIt = toBeScheduled.begin(); inst->iqItValid = true; } else { iq.push_front(inst); inst->iqIt = iq.begin(); inst->iqItValid = true; } } else { nonSpec.push_front(inst); inst->iqIt = nonSpec.begin(); inst->iqItValid = true; } } template void BackEnd::InstQueue::scheduleReadyInsts() { int scheduled = numIssued->size; InstListIt iq_it = --toBeScheduled.end(); InstListIt iq_end_it = toBeScheduled.end(); while (iq_it != iq_end_it && scheduled < width) { // if ((*iq_it)->readyToIssue()) { DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n", (*iq_it)->seqNum, (*iq_it)->readPC()); readyQueue.push(*iq_it); readyList.push_front(*iq_it); (*iq_it)->iqIt = readyList.begin(); toBeScheduled.erase(iq_it--); ++scheduled; // } else { // iq_it++; // } } numIssued->size+= scheduled; } template void BackEnd::InstQueue::scheduleNonSpec(const InstSeqNum &sn) { /* InstListIt non_spec_it = nonSpec.begin(); InstListIt non_spec_end_it = nonSpec.end(); while ((*non_spec_it)->seqNum != sn) { non_spec_it++; assert(non_spec_it != non_spec_end_it); } */ DynInstPtr inst = nonSpec.back(); assert(inst->seqNum == sn); assert(find(NonSpec, inst->iqIt)); nonSpec.erase(inst->iqIt); readyList.push_front(inst); inst->iqIt = readyList.begin(); readyQueue.push(inst); numIssued->size++; } template typename Impl::DynInstPtr BackEnd::InstQueue::getReadyInst() { assert(!readyList.empty()); DynInstPtr inst = readyQueue.top(); readyQueue.pop(); assert(find(ReadyList, inst->iqIt)); readyList.erase(inst->iqIt); inst->iqItValid = false; // if (!inst->isMemRef()) --numInsts; return inst; } template void BackEnd::InstQueue::squash(const InstSeqNum &sn) { InstListIt iq_it = iq.begin(); InstListIt iq_end_it = iq.end(); while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { (*iq_it)->iqItValid = false; iq.erase(iq_it++); --numInsts; } iq_it = nonSpec.begin(); iq_end_it = nonSpec.end(); while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { (*iq_it)->iqItValid = false; nonSpec.erase(iq_it++); --numInsts; } iq_it = replayList.begin(); iq_end_it = replayList.end(); while (iq_it != iq_end_it) { if ((*iq_it)->seqNum > sn) { (*iq_it)->iqItValid = false; replayList.erase(iq_it++); --numInsts; } else { iq_it++; } } assert(numInsts >= 0); /* InstListIt ready_it = readyList.begin(); InstListIt ready_end_it = readyList.end(); while (ready_it != ready_end_it) { if ((*ready_it)->seqNum > sn) { readyList.erase(ready_it++); } else { ready_it++; } } */ } template int BackEnd::InstQueue::wakeDependents(DynInstPtr &inst) { assert(!inst->isSquashed()); std::vector &dependents = inst->getDependents(); int num_outputs = dependents.size(); for (int i = 0; i < num_outputs; i++) { DynInstPtr inst = dependents[i]; inst->markSrcRegReady(); if (inst->readyToIssue() && inst->iqItValid) { if (inst->isNonSpeculative()) { assert(find(NonSpec, inst->iqIt)); nonSpec.erase(inst->iqIt); } else { assert(find(IQ, inst->iqIt)); iq.erase(inst->iqIt); } toBeScheduled.push_front(inst); inst->iqIt = toBeScheduled.begin(); } } return num_outputs; } template void BackEnd::InstQueue::rescheduleMemInst(DynInstPtr &inst) { assert(!inst->iqItValid); replayList.push_front(inst); inst->iqIt = replayList.begin(); inst->iqItValid = true; ++numInsts; } template void BackEnd::InstQueue::replayMemInst(DynInstPtr &inst) { assert(find(ReplayList, inst->iqIt)); InstListIt iq_it = --replayList.end(); InstListIt iq_end_it = replayList.end(); while (iq_it != iq_end_it) { DynInstPtr rescheduled_inst = (*iq_it); replayList.erase(iq_it--); toBeScheduled.push_front(rescheduled_inst); rescheduled_inst->iqIt = toBeScheduled.begin(); } } template void BackEnd::InstQueue::completeMemInst(DynInstPtr &inst) { panic("Not implemented."); } template bool BackEnd::InstQueue::find(queue q, InstListIt it) { InstListIt iq_it, iq_end_it; switch(q) { case NonSpec: iq_it = nonSpec.begin(); iq_end_it = nonSpec.end(); break; case IQ: iq_it = iq.begin(); iq_end_it = iq.end(); break; case ToBeScheduled: iq_it = toBeScheduled.begin(); iq_end_it = toBeScheduled.end(); break; case ReadyList: iq_it = readyList.begin(); iq_end_it = readyList.end(); break; case ReplayList: iq_it = replayList.begin(); iq_end_it = replayList.end(); } while (iq_it != it && iq_it != iq_end_it) { iq_it++; } if (iq_it == it) { return true; } else { return false; } } template void BackEnd::InstQueue::dumpInsts() { cprintf("IQ size: %i\n", iq.size()); InstListIt inst_list_it = --iq.end(); int num = 0; int valid_num = 0; while (inst_list_it != iq.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it--; ++num; } cprintf("nonSpec size: %i\n", nonSpec.size()); inst_list_it = --nonSpec.end(); while (inst_list_it != nonSpec.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it--; ++num; } cprintf("toBeScheduled size: %i\n", toBeScheduled.size()); inst_list_it = --toBeScheduled.end(); while (inst_list_it != toBeScheduled.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it--; ++num; } cprintf("readyList size: %i\n", readyList.size()); inst_list_it = --readyList.end(); while (inst_list_it != readyList.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it--; ++num; } } template BackEnd::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, BackEnd *_be) : Event(&mainEventQueue), inst(_inst), be(_be) { this->setFlags(Event::AutoDelete); } template void BackEnd::LdWritebackEvent::process() { DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); // DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); // iewStage->wakeCPU(); if (inst->isSquashed()) { inst = NULL; return; } if (!inst->isExecuted()) { inst->setExecuted(); // Execute again to copy data to proper place. inst->completeAcc(); } // Need to insert instruction into queue to commit be->instToCommit(inst); //wroteToTimeBuffer = true; // iewStage->activityThisCycle(); inst = NULL; } template const char * BackEnd::LdWritebackEvent::description() { return "Load writeback event"; } template BackEnd::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be) : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) { } template void BackEnd::DCacheCompletionEvent::process() { } template const char * BackEnd::DCacheCompletionEvent::description() { return "Cache completion event"; } template BackEnd::BackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), xcSquash(false), IQ(params), cacheCompletionEvent(this), width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; numInsts = 0; numDispatchEntries = 32; IQ.setBE(this); LSQ.setBE(this); // Setup IQ and LSQ with their parameters here. instsToDispatch = d2i.getWire(-1); instsToExecute = i2e.getWire(-1); IQ.setIssueExecQueue(&i2e); dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; issueWidth = params->issueWidth ? params->issueWidth : width; wbWidth = params->wbWidth ? params->wbWidth : width; commitWidth = params->commitWidth ? params->commitWidth : width; LSQ.init(params, params->LQEntries, params->SQEntries, 0); dispatchStatus = Running; } template std::string BackEnd::name() const { return cpu->name() + ".backend"; } template void BackEnd::regStats() { using namespace Stats; rob_cap_events .init(cpu->number_of_threads) .name(name() + ".ROB:cap_events") .desc("number of cycles where ROB cap was active") .flags(total) ; rob_cap_inst_count .init(cpu->number_of_threads) .name(name() + ".ROB:cap_inst") .desc("number of instructions held up by ROB cap") .flags(total) ; iq_cap_events .init(cpu->number_of_threads) .name(name() +".IQ:cap_events" ) .desc("number of cycles where IQ cap was active") .flags(total) ; iq_cap_inst_count .init(cpu->number_of_threads) .name(name() + ".IQ:cap_inst") .desc("number of instructions held up by IQ cap") .flags(total) ; exe_inst .init(cpu->number_of_threads) .name(name() + ".ISSUE:count") .desc("number of insts issued") .flags(total) ; exe_swp .init(cpu->number_of_threads) .name(name() + ".ISSUE:swp") .desc("number of swp insts issued") .flags(total) ; exe_nop .init(cpu->number_of_threads) .name(name() + ".ISSUE:nop") .desc("number of nop insts issued") .flags(total) ; exe_refs .init(cpu->number_of_threads) .name(name() + ".ISSUE:refs") .desc("number of memory reference insts issued") .flags(total) ; exe_loads .init(cpu->number_of_threads) .name(name() + ".ISSUE:loads") .desc("number of load insts issued") .flags(total) ; exe_branches .init(cpu->number_of_threads) .name(name() + ".ISSUE:branches") .desc("Number of branches issued") .flags(total) ; issued_ops .init(cpu->number_of_threads) .name(name() + ".ISSUE:op_count") .desc("number of insts issued") .flags(total) ; /* for (int i=0; inumber_of_threads) .name(name() + ".LSQ:forw_loads") .desc("number of loads forwarded via LSQ") .flags(total) ; inv_addr_loads .init(cpu->number_of_threads) .name(name() + ".ISSUE:addr_loads") .desc("number of invalid-address loads") .flags(total) ; inv_addr_swpfs .init(cpu->number_of_threads) .name(name() + ".ISSUE:addr_swpfs") .desc("number of invalid-address SW prefetches") .flags(total) ; lsq_blocked_loads .init(cpu->number_of_threads) .name(name() + ".LSQ:blocked_loads") .desc("number of ready loads not issued due to memory disambiguation") .flags(total) ; lsqInversion .name(name() + ".ISSUE:lsq_invert") .desc("Number of times LSQ instruction issued early") ; n_issued_dist .init(issueWidth + 1) .name(name() + ".ISSUE:issued_per_cycle") .desc("Number of insts issued each cycle") .flags(total | pdf | dist) ; issue_delay_dist .init(Num_OpClasses,0,99,2) .name(name() + ".ISSUE:") .desc("cycles from operands ready to issue") .flags(pdf | cdf) ; queue_res_dist .init(Num_OpClasses, 0, 99, 2) .name(name() + ".IQ:residence:") .desc("cycles from dispatch to issue") .flags(total | pdf | cdf ) ; for (int i = 0; i < Num_OpClasses; ++i) { queue_res_dist.subname(i, opClassStrings[i]); } writeback_count .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") .flags(total) ; producer_inst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") .flags(total) ; consumer_inst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") .flags(total) ; wb_penalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") .flags(total) ; wb_penalized_rate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") .flags(total) ; wb_penalized_rate = wb_penalized / writeback_count; wb_fanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") .flags(total) ; wb_fanout = producer_inst / consumer_inst; wb_rate .name(name() + ".WB:rate") .desc("insts written-back per cycle") .flags(total) ; wb_rate = writeback_count / cpu->numCycles; stat_com_inst .init(cpu->number_of_threads) .name(name() + ".COM:count") .desc("Number of instructions committed") .flags(total) ; stat_com_swp .init(cpu->number_of_threads) .name(name() + ".COM:swp_count") .desc("Number of s/w prefetches committed") .flags(total) ; stat_com_refs .init(cpu->number_of_threads) .name(name() + ".COM:refs") .desc("Number of memory references committed") .flags(total) ; stat_com_loads .init(cpu->number_of_threads) .name(name() + ".COM:loads") .desc("Number of loads committed") .flags(total) ; stat_com_membars .init(cpu->number_of_threads) .name(name() + ".COM:membars") .desc("Number of memory barriers committed") .flags(total) ; stat_com_branches .init(cpu->number_of_threads) .name(name() + ".COM:branches") .desc("Number of branches committed") .flags(total) ; n_committed_dist .init(0,commitWidth,1) .name(name() + ".COM:committed_per_cycle") .desc("Number of insts commited each cycle") .flags(pdf) ; // // Commit-Eligible instructions... // // -> The number of instructions eligible to commit in those // cycles where we reached our commit BW limit (less the number // actually committed) // // -> The average value is computed over ALL CYCLES... not just // the BW limited cycles // // -> The standard deviation is computed only over cycles where // we reached the BW limit // commit_eligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") .desc("number of insts not committed due to BW limits") .flags(total) ; commit_eligible_samples .name(name() + ".COM:bw_lim_events") .desc("number cycles where commit BW limit reached") ; ROB_fcount .name(name() + ".ROB:full_count") .desc("number of cycles where ROB was full") ; ROB_count .init(cpu->number_of_threads) .name(name() + ".ROB:occupancy") .desc(name() + ".ROB occupancy (cumulative)") .flags(total) ; ROB_full_rate .name(name() + ".ROB:full_rate") .desc("ROB full per cycle") ; ROB_full_rate = ROB_fcount / cpu->numCycles; ROB_occ_rate .name(name() + ".ROB:occ_rate") .desc("ROB occupancy rate") .flags(total) ; ROB_occ_rate = ROB_count / cpu->numCycles; ROB_occ_dist .init(cpu->number_of_threads,0,numROBEntries,2) .name(name() + ".ROB:occ_dist") .desc("ROB Occupancy per cycle") .flags(total | cdf) ; IQ.regStats(); } template void BackEnd::setCommBuffer(TimeBuffer *_comm) { comm = _comm; toIEW = comm->getWire(0); fromCommit = comm->getWire(-1); } template void BackEnd::tick() { DPRINTF(BE, "Ticking back end\n"); ROB_count[0]+= numInsts; wbCycle = 0; if (xcSquash) { squashFromXC(); } // Read in any done instruction information and update the IQ or LSQ. updateStructures(); if (dispatchStatus != Blocked) { d2i.advance(); dispatchInsts(); } else { checkDispatchStatus(); } i2e.advance(); scheduleReadyInsts(); e2c.advance(); executeInsts(); numInstsToWB.advance(); writebackInsts(); commitInsts(); assert(numInsts == instList.size()); } template void BackEnd::updateStructures() { if (fromCommit->doneSeqNum) { IQ.commit(fromCommit->doneSeqNum); LSQ.commitLoads(fromCommit->doneSeqNum); LSQ.commitStores(fromCommit->doneSeqNum); } if (fromCommit->nonSpecSeqNum) { if (fromCommit->uncached) { LSQ.executeLoad(fromCommit->lqIdx); } else { IQ.scheduleNonSpec( fromCommit->nonSpecSeqNum); } } } template void BackEnd::addToIQ(DynInstPtr &inst) { // Do anything IQ specific here? IQ.insert(inst); } template void BackEnd::addToLSQ(DynInstPtr &inst) { // Do anything LSQ specific here? LSQ.insert(inst); } template void BackEnd::dispatchInsts() { DPRINTF(BE, "Trying to dispatch instructions.\n"); // Pull instructions out of the front end. int disp_width = dispatchWidth ? dispatchWidth : width; // Could model dispatching time, but in general 1 cycle is probably // good enough. if (dispatchSize < numDispatchEntries) { for (int i = 0; i < disp_width; i++) { // Get instructions DynInstPtr inst = frontEnd->getInst(); if (!inst) { // No more instructions to get break; } DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n", inst->seqNum, inst->readPC()); for (int i = 0; i < inst->numDestRegs(); ++i) renameTable[inst->destRegIdx(i)] = inst; // Add to queue to be dispatched. dispatch.push_back(inst); d2i[0].size++; ++dispatchSize; } } assert(dispatch.size() < 64); for (int i = 0; i < instsToDispatch->size; ++i) { assert(!dispatch.empty()); // Get instruction from front of time buffer DynInstPtr inst = dispatch.front(); dispatch.pop_front(); if (inst->isSquashed()) continue; --dispatchSize; ++numInsts; instList.push_back(inst); DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", inst->seqNum, inst->readPC()); addToIQ(inst); if (inst->isMemRef()) { addToLSQ(inst); } if (inst->isNonSpeculative()) { inst->setCanCommit(); } // Check if IQ or LSQ is full. If so we'll need to break and stop // removing instructions. Also update the number of insts to remove // from the queue. if (exactFullStall) { bool stall = false; if (IQ.isFull()) { DPRINTF(BE, "IQ is full!\n"); stall = true; } else if (LSQ.isFull()) { DPRINTF(BE, "LSQ is full!\n"); stall = true; } else if (isFull()) { DPRINTF(BE, "ROB is full!\n"); stall = true; ROB_fcount++; } if (stall) { instsToDispatch->size-= i+1; dispatchStall(); return; } } } // Check if IQ or LSQ is full. If so we'll need to break and stop // removing instructions. Also update the number of insts to remove // from the queue. Check here if we don't care about exact stall // conditions. bool stall = false; if (IQ.isFull()) { DPRINTF(BE, "IQ is full!\n"); stall = true; } else if (LSQ.isFull()) { DPRINTF(BE, "LSQ is full!\n"); stall = true; } else if (isFull()) { DPRINTF(BE, "ROB is full!\n"); stall = true; ROB_fcount++; } if (stall) { d2i.advance(); dispatchStall(); return; } } template void BackEnd::dispatchStall() { dispatchStatus = Blocked; if (!cpu->decoupledFrontEnd) { // Tell front end to stall here through a timebuffer, or just tell // it directly. } } template void BackEnd::checkDispatchStatus() { assert(dispatchStatus == Blocked); if (!IQ.isFull() && !LSQ.isFull() && !isFull()) { DPRINTF(BE, "Dispatch no longer blocked\n"); dispatchStatus = Running; dispatchInsts(); } } template void BackEnd::scheduleReadyInsts() { // Tell IQ to put any ready instructions into the instruction list. // Probably want to have a list of DynInstPtrs returned here. Then I // can choose to either put them into a time buffer to simulate // IQ scheduling time, or hand them directly off to the next stage. // Do you ever want to directly hand it off to the next stage? DPRINTF(BE, "Trying to schedule ready instructions\n"); IQ.scheduleReadyInsts(); } template void BackEnd::executeInsts() { int insts_to_execute = instsToExecute->size; issued_ops[0]+= insts_to_execute; n_issued_dist[insts_to_execute]++; DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute); fetchRedirect[0] = false; while (insts_to_execute > 0) { // Get ready instruction from the IQ (or queue coming out of IQ) // Execute the ready instruction. // Wakeup any dependents if it's done. DynInstPtr inst = IQ.getReadyInst(); DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", inst->seqNum, inst->readPC()); ++funcExeInst; // Check if the instruction is squashed; if so then skip it // and don't count it towards the FU usage. if (inst->isSquashed()) { DPRINTF(BE, "Execute: Instruction was squashed.\n"); // Not sure how to handle this plus the method of sending # of // instructions to use. Probably will just have to count it // towards the bandwidth usage, but not the FU usage. --insts_to_execute; // Consider this instruction executed so that commit can go // ahead and retire the instruction. inst->setExecuted(); // Not sure if I should set this here or just let commit try to // commit any squashed instructions. I like the latter a bit more. inst->setCanCommit(); // ++iewExecSquashedInsts; continue; } Fault fault = NoFault; // Execute instruction. // Note that if the instruction faults, it will be handled // at the commit stage. if (inst->isMemRef() && (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { DPRINTF(BE, "Execute: Initiating access for memory " "reference.\n"); // Tell the LDSTQ to execute this instruction (if it is a load). if (inst->isLoad()) { // Loads will mark themselves as executed, and their writeback // event adds the instruction to the queue to commit fault = LSQ.executeLoad(inst); // ++iewExecLoadInsts; } else if (inst->isStore()) { LSQ.executeStore(inst); // ++iewExecStoreInsts; if (!(inst->req->flags & LOCKED)) { inst->setExecuted(); instToCommit(inst); } // Store conditionals will mark themselves as executed, and // their writeback event will add the instruction to the queue // to commit. } else { panic("Unexpected memory type!\n"); } } else { inst->execute(); // ++iewExecutedInsts; inst->setExecuted(); instToCommit(inst); } updateExeInstStats(inst); // Probably should have some sort of function for this. // More general question of how to handle squashes? Have some sort of // squash unit that controls it? Probably... // Check if branch was correct. This check happens after the // instruction is added to the queue because even if the branch // is mispredicted, the branch instruction itself is still valid. // Only handle this if there hasn't already been something that // redirects fetch in this group of instructions. // This probably needs to prioritize the redirects if a different // scheduler is used. Currently the scheduler schedules the oldest // instruction first, so the branch resolution order will be correct. unsigned tid = inst->threadNumber; if (!fetchRedirect[tid]) { if (inst->mispredicted()) { fetchRedirect[tid] = true; DPRINTF(BE, "Execute: Branch mispredict detected.\n"); DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n", inst->nextPC); // If incorrect, then signal the ROB that it must be squashed. squashDueToBranch(inst); if (inst->predTaken()) { // predictedTakenIncorrect++; } else { // predictedNotTakenIncorrect++; } } else if (LSQ.violation()) { fetchRedirect[tid] = true; // Get the DynInst that caused the violation. Note that this // clears the violation signal. DynInstPtr violator; violator = LSQ.getMemDepViolator(); DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " "%#x, inst PC: %#x. Addr is: %#x.\n", violator->readPC(), inst->readPC(), inst->physEffAddr); // Tell the instruction queue that a violation has occured. // IQ.violation(inst, violator); // Squash. // squashDueToMemOrder(inst,tid); squashDueToBranch(inst); // ++memOrderViolationEvents; } else if (LSQ.loadBlocked()) { fetchRedirect[tid] = true; DPRINTF(BE, "Load operation couldn't execute because the " "memory system is blocked. PC: %#x [sn:%lli]\n", inst->readPC(), inst->seqNum); squashDueToMemBlocked(inst); } } // instList.pop_front(); --insts_to_execute; // keep an instruction count thread->numInst++; thread->numInsts++; } assert(insts_to_execute >= 0); } template void BackEnd::instToCommit(DynInstPtr &inst) { int wb_width = wbWidth; // First check the time slot that this instruction will write // to. If there are free write ports at the time, then go ahead // and write the instruction to that time. If there are not, // keep looking back to see where's the first time there's a // free slot. What happens if you run out of free spaces? // For now naively assume that all instructions take one cycle. // Otherwise would have to look into the time buffer based on the // latency of the instruction. DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", inst->seqNum, inst->readPC()); while (numInstsToWB[wbCycle].size >= wb_width) { ++wbCycle; assert(wbCycle < 5); } // Add finished instruction to queue to commit. writeback.push_back(inst); numInstsToWB[wbCycle].size++; if (wbCycle) wb_penalized[0]++; } template void BackEnd::writebackInsts() { int wb_width = wbWidth; // Using this method I'm not quite sure how to prevent an // instruction from waking its own dependents multiple times, // without the guarantee that commit always has enough bandwidth // to accept all instructions being written back. This guarantee // might not be too unrealistic. InstListIt wb_inst_it = writeback.begin(); InstListIt wb_end_it = writeback.end(); int inst_num = 0; int consumer_insts = 0; for (; inst_num < wb_width && wb_inst_it != wb_end_it; inst_num++) { DynInstPtr inst = (*wb_inst_it); // Some instructions will be sent to commit without having // executed because they need commit to handle them. // E.g. Uncached loads have not actually executed when they // are first sent to commit. Instead commit must tell the LSQ // when it's ready to execute the uncached load. if (!inst->isSquashed()) { DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", inst->seqNum, inst->readPC()); inst->setCanCommit(); inst->setCompleted(); if (inst->isExecuted()) { int dependents = IQ.wakeDependents(inst); if (dependents) { producer_inst[0]++; consumer_insts+= dependents; } } } writeback.erase(wb_inst_it++); } LSQ.writebackStores(); consumer_inst[0]+= consumer_insts; writeback_count[0]+= inst_num; } template bool BackEnd::commitInst(int inst_num) { // Read instruction from the head of the ROB DynInstPtr inst = instList.front(); // Make sure instruction is valid assert(inst); if (!inst->readyToCommit()) return false; DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", inst->seqNum, inst->readPC()); // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. if (!inst->isExecuted()) { // Keep this number correct. We have not yet actually executed // and committed this instruction. // thread->funcExeInst--; if (inst->isNonSpeculative()) { #if !FULL_SYSTEM // Hack to make sure syscalls aren't executed until all stores // write back their data. This direct communication shouldn't // be used for anything other than this. if (inst_num > 0 || LSQ.hasStoresToWB()) { DPRINTF(BE, "Waiting for all stores to writeback.\n"); return false; } #endif DPRINTF(BE, "Encountered a store or non-speculative " "instruction at the head of the ROB, PC %#x.\n", inst->readPC()); // Send back the non-speculative instruction's sequence number. toIEW->nonSpecSeqNum = inst->seqNum; // Change the instruction so it won't try to commit again until // it is executed. inst->clearCanCommit(); // ++commitNonSpecStalls; return false; } else if (inst->isLoad()) { DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", inst->seqNum, inst->readPC()); // Send back the non-speculative instruction's sequence // number. Maybe just tell the lsq to re-execute the load. toIEW->nonSpecSeqNum = inst->seqNum; toIEW->uncached = true; toIEW->lqIdx = inst->lqIdx; inst->clearCanCommit(); return false; } else { panic("Trying to commit un-executed instruction " "of unknown type!\n"); } } // Now check if it's one of the special trap or barrier or // serializing instructions. if (inst->isThreadSync()) { // Not handled for now. panic("Barrier instructions are not handled yet.\n"); } // Check if the instruction caused a fault. If so, trap. Fault inst_fault = inst->getFault(); if (inst_fault != NoFault) { if (!inst->isNop()) { #if FULL_SYSTEM DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", inst->seqNum, inst->readPC()); // assert(!thread->inSyscall); // thread->inSyscall = true; // Consider holding onto the trap and waiting until the trap event // happens for this to be executed. inst_fault->invoke(thread->getXCProxy()); // Exit state update mode to avoid accidental updating. // thread->inSyscall = false; // commitStatus = TrapPending; // Generate trap squash event. // generateTrapEvent(); return false; #else // !FULL_SYSTEM panic("fault (%d) detected @ PC %08p", inst_fault, inst->PC); #endif // FULL_SYSTEM } } if (inst->isControl()) { // ++commitCommittedBranches; } int freed_regs = 0; for (int i = 0; i < inst->numDestRegs(); ++i) { DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", (int)inst->destRegIdx(i), inst->seqNum); thread->renameTable[inst->destRegIdx(i)] = inst; ++freed_regs; } if (inst->traceData) { inst->traceData->finalize(); inst->traceData = NULL; } inst->clearDependents(); frontEnd->addFreeRegs(freed_regs); instList.pop_front(); --numInsts; cpu->numInst++; thread->numInsts++; ++thread->funcExeInst; thread->PC = inst->readNextPC(); updateComInstStats(inst); // Write the done sequence number here. toIEW->doneSeqNum = inst->seqNum; return true; } template void BackEnd::commitInsts() { int commit_width = commitWidth ? commitWidth : width; // Not sure this should be a loop or not. int inst_num = 0; while (!instList.empty() && inst_num < commit_width) { if (instList.front()->isSquashed()) { panic("No squashed insts should still be on the list!"); instList.front()->clearDependents(); instList.pop_front(); continue; } if (!commitInst(inst_num++)) { break; } } n_committed_dist.sample(inst_num); } template void BackEnd::squash(const InstSeqNum &sn) { IQ.squash(sn); LSQ.squash(sn); int freed_regs = 0; InstListIt dispatch_end = dispatch.end(); InstListIt insts_it = dispatch.end(); insts_it--; while (insts_it != dispatch_end && (*insts_it)->seqNum > sn) { DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n", (*insts_it)->readPC(), (*insts_it)->seqNum); // Mark the instruction as squashed, and ready to commit so that // it can drain out of the pipeline. (*insts_it)->setSquashed(); (*insts_it)->setCanCommit(); for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { renameTable[(*insts_it)->destRegIdx(i)] = (*insts_it)->getPrevDestInst(i); ++freed_regs; } (*insts_it)->clearDependents(); --insts_it; } insts_it = instList.end(); insts_it--; while (!instList.empty() && (*insts_it)->seqNum > sn) { DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n", (*insts_it)->readPC(), (*insts_it)->seqNum); // Mark the instruction as squashed, and ready to commit so that // it can drain out of the pipeline. (*insts_it)->setSquashed(); (*insts_it)->setCanCommit(); for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { renameTable[(*insts_it)->destRegIdx(i)] = (*insts_it)->getPrevDestInst(i); ++freed_regs; } (*insts_it)->clearDependents(); instList.erase(insts_it--); --numInsts; } frontEnd->addFreeRegs(freed_regs); } template void BackEnd::squashFromXC() { xcSquash = true; } template void BackEnd::squashDueToBranch(DynInstPtr &inst) { // Update the branch predictor state I guess squash(inst->seqNum); frontEnd->squash(inst->seqNum, inst->readNextPC(), true, inst->mispredicted()); } template void BackEnd::squashDueToMemBlocked(DynInstPtr &inst) { DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); squash(inst->seqNum - 1); frontEnd->squash(inst->seqNum - 1, inst->readPC()); } template void BackEnd::fetchFault(Fault &fault) { } template void BackEnd::updateExeInstStats(DynInstPtr &inst) { int thread_number = inst->threadNumber; // // Pick off the software prefetches // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) exe_swp[thread_number]++; else exe_inst[thread_number]++; #else exe_inst[thread_number]++; #endif // // Control operations // if (inst->isControl()) exe_branches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { exe_refs[thread_number]++; if (inst->isLoad()) exe_loads[thread_number]++; } } template void BackEnd::updateComInstStats(DynInstPtr &inst) { unsigned thread = inst->threadNumber; // // Pick off the software prefetches // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) { stat_com_swp[thread]++; } else { stat_com_inst[thread]++; } #else stat_com_inst[thread]++; #endif // // Control Instructions // if (inst->isControl()) stat_com_branches[thread]++; // // Memory references // if (inst->isMemRef()) { stat_com_refs[thread]++; if (inst->isLoad()) { stat_com_loads[thread]++; } } if (inst->isMemBarrier()) { stat_com_membars[thread]++; } } template void BackEnd::dumpInsts() { int num = 0; int valid_num = 0; InstListIt inst_list_it = instList.begin(); cprintf("Inst list size: %i\n", instList.size()); while (inst_list_it != instList.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it++; ++num; } cprintf("Dispatch list size: %i\n", dispatch.size()); inst_list_it = dispatch.begin(); while (inst_list_it != dispatch.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it++; ++num; } cprintf("Writeback list size: %i\n", writeback.size()); inst_list_it = writeback.begin(); while (inst_list_it != writeback.end()) { cprintf("Instruction:%i\n", num); if (!(*inst_list_it)->isSquashed()) { if (!(*inst_list_it)->isIssued()) { ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && !(*inst_list_it)->memOpDone) { // Loads that have not been marked as executed still count // towards the total instructions. ++valid_num; cprintf("Count:%i\n", valid_num); } } cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" "Issued:%i\nSquashed:%i\n", (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, (*inst_list_it)->threadNumber, (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); } cprintf("\n"); inst_list_it++; ++num; } }