gem5/cpu/ozone/back_end_impl.hh

1905 lines
51 KiB
C++
Raw Normal View History

#include "encumbered/cpu/full/op_class.hh"
#include "cpu/ozone/back_end.hh"
template <class Impl>
BackEnd<Impl>::InstQueue::InstQueue(Params *params)
: size(params->numIQEntries), numInsts(0), width(params->issueWidth)
{
}
template <class Impl>
std::string
BackEnd<Impl>::InstQueue::name() const
{
return be->name() + ".iq";
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::regStats()
{
using namespace Stats;
occ_dist
.init(1, 0, size, 2)
.name(name() + "occ_dist")
.desc("IQ Occupancy per cycle")
.flags(total | cdf)
;
inst_count
.init(1)
.name(name() + "cum_num_insts")
.desc("Total occupancy")
.flags(total)
;
peak_inst_count
.init(1)
.name(name() + "peak_occupancy")
.desc("Peak IQ occupancy")
.flags(total)
;
current_count
.name(name() + "current_count")
.desc("Occupancy this cycle")
;
empty_count
.name(name() + "empty_count")
.desc("Number of empty cycles")
;
fullCount
.name(name() + "full_count")
.desc("Number of full cycles")
;
occ_rate
.name(name() + "occ_rate")
.desc("Average occupancy")
.flags(total)
;
occ_rate = inst_count / be->cpu->numCycles;
avg_residency
.name(name() + "avg_residency")
.desc("Average IQ residency")
.flags(total)
;
avg_residency = occ_rate / be->cpu->numCycles;
empty_rate
.name(name() + "empty_rate")
.desc("Fraction of cycles empty")
;
empty_rate = 100 * empty_count / be->cpu->numCycles;
full_rate
.name(name() + "full_rate")
.desc("Fraction of cycles full")
;
full_rate = 100 * fullCount / be->cpu->numCycles;
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue)
{
i2e = i2e_queue;
numIssued = i2e->getWire(0);
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
{
numInsts++;
inst_count[0]++;
if (!inst->isNonSpeculative()) {
DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
if (inst->readyToIssue()) {
toBeScheduled.push_front(inst);
inst->iqIt = toBeScheduled.begin();
inst->iqItValid = true;
} else {
iq.push_front(inst);
inst->iqIt = iq.begin();
inst->iqItValid = true;
}
} else {
DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
nonSpec.push_front(inst);
inst->iqIt = nonSpec.begin();
inst->iqItValid = true;
}
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::scheduleReadyInsts()
{
int scheduled = numIssued->size;
InstListIt iq_it = --toBeScheduled.end();
InstListIt iq_end_it = toBeScheduled.end();
while (iq_it != iq_end_it && scheduled < width) {
// if ((*iq_it)->readyToIssue()) {
DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n",
(*iq_it)->seqNum, (*iq_it)->readPC());
readyQueue.push(*iq_it);
readyList.push_front(*iq_it);
(*iq_it)->iqIt = readyList.begin();
toBeScheduled.erase(iq_it--);
++scheduled;
// } else {
// iq_it++;
// }
}
numIssued->size+= scheduled;
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
{
/*
InstListIt non_spec_it = nonSpec.begin();
InstListIt non_spec_end_it = nonSpec.end();
while ((*non_spec_it)->seqNum != sn) {
non_spec_it++;
assert(non_spec_it != non_spec_end_it);
}
*/
DynInstPtr inst = nonSpec.back();
DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
assert(inst->seqNum == sn);
assert(find(NonSpec, inst->iqIt));
nonSpec.erase(inst->iqIt);
readyList.push_front(inst);
inst->iqIt = readyList.begin();
readyQueue.push(inst);
numIssued->size++;
}
template <class Impl>
typename Impl::DynInstPtr
BackEnd<Impl>::InstQueue::getReadyInst()
{
assert(!readyList.empty());
DynInstPtr inst = readyQueue.top();
readyQueue.pop();
assert(find(ReadyList, inst->iqIt));
readyList.erase(inst->iqIt);
inst->iqItValid = false;
// if (!inst->isMemRef())
--numInsts;
return inst;
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
{
InstListIt iq_it = iq.begin();
InstListIt iq_end_it = iq.end();
while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
(*iq_it)->iqItValid = false;
iq.erase(iq_it++);
--numInsts;
}
iq_it = nonSpec.begin();
iq_end_it = nonSpec.end();
while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
(*iq_it)->iqItValid = false;
nonSpec.erase(iq_it++);
--numInsts;
}
iq_it = replayList.begin();
iq_end_it = replayList.end();
while (iq_it != iq_end_it) {
if ((*iq_it)->seqNum > sn) {
DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
(*iq_it)->iqItValid = false;
replayList.erase(iq_it++);
--numInsts;
} else {
iq_it++;
}
}
assert(numInsts >= 0);
/*
InstListIt ready_it = readyList.begin();
InstListIt ready_end_it = readyList.end();
while (ready_it != ready_end_it) {
if ((*ready_it)->seqNum > sn) {
readyList.erase(ready_it++);
} else {
ready_it++;
}
}
*/
}
template <class Impl>
int
BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
{
assert(!inst->isSquashed());
std::vector<DynInstPtr> &dependents = inst->getDependents();
int num_outputs = dependents.size();
DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
for (int i = 0; i < num_outputs; i++) {
DynInstPtr dep_inst = dependents[i];
dep_inst->markSrcRegReady();
DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
if (dep_inst->isNonSpeculative()) {
assert(find(NonSpec, dep_inst->iqIt));
nonSpec.erase(dep_inst->iqIt);
} else {
assert(find(IQ, dep_inst->iqIt));
iq.erase(dep_inst->iqIt);
}
toBeScheduled.push_front(dep_inst);
dep_inst->iqIt = toBeScheduled.begin();
}
}
return num_outputs;
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
{
DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
assert(!inst->iqItValid);
replayList.push_front(inst);
inst->iqIt = replayList.begin();
inst->iqItValid = true;
++numInsts;
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
{
DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
assert(find(ReplayList, inst->iqIt));
InstListIt iq_it = --replayList.end();
InstListIt iq_end_it = replayList.end();
while (iq_it != iq_end_it) {
DynInstPtr rescheduled_inst = (*iq_it);
DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
replayList.erase(iq_it--);
toBeScheduled.push_front(rescheduled_inst);
rescheduled_inst->iqIt = toBeScheduled.begin();
}
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst)
{
panic("Not implemented.");
}
template <class Impl>
bool
BackEnd<Impl>::InstQueue::find(queue q, InstListIt it)
{
InstListIt iq_it, iq_end_it;
switch(q) {
case NonSpec:
iq_it = nonSpec.begin();
iq_end_it = nonSpec.end();
break;
case IQ:
iq_it = iq.begin();
iq_end_it = iq.end();
break;
case ToBeScheduled:
iq_it = toBeScheduled.begin();
iq_end_it = toBeScheduled.end();
break;
case ReadyList:
iq_it = readyList.begin();
iq_end_it = readyList.end();
break;
case ReplayList:
iq_it = replayList.begin();
iq_end_it = replayList.end();
}
while (iq_it != it && iq_it != iq_end_it) {
iq_it++;
}
if (iq_it == it) {
return true;
} else {
return false;
}
}
template <class Impl>
void
BackEnd<Impl>::InstQueue::dumpInsts()
{
cprintf("IQ size: %i\n", iq.size());
InstListIt inst_list_it = --iq.end();
int num = 0;
int valid_num = 0;
while (inst_list_it != iq.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it--;
++num;
}
cprintf("nonSpec size: %i\n", nonSpec.size());
inst_list_it = --nonSpec.end();
while (inst_list_it != nonSpec.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it--;
++num;
}
cprintf("toBeScheduled size: %i\n", toBeScheduled.size());
inst_list_it = --toBeScheduled.end();
while (inst_list_it != toBeScheduled.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it--;
++num;
}
cprintf("readyList size: %i\n", readyList.size());
inst_list_it = --readyList.end();
while (inst_list_it != readyList.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it--;
++num;
}
}
template<class Impl>
BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
BackEnd<Impl> *_be)
: Event(&mainEventQueue), inst(_inst), be(_be)
{
this->setFlags(Event::AutoDelete);
}
template<class Impl>
void
BackEnd<Impl>::LdWritebackEvent::process()
{
DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
// iewStage->wakeCPU();
if (inst->isSquashed()) {
inst = NULL;
return;
}
if (!inst->isExecuted()) {
inst->setExecuted();
// Execute again to copy data to proper place.
inst->completeAcc();
}
// Need to insert instruction into queue to commit
be->instToCommit(inst);
//wroteToTimeBuffer = true;
// iewStage->activityThisCycle();
inst = NULL;
}
template<class Impl>
const char *
BackEnd<Impl>::LdWritebackEvent::description()
{
return "Load writeback event";
}
template <class Impl>
BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be)
: Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
{
}
template <class Impl>
void
BackEnd<Impl>::DCacheCompletionEvent::process()
{
}
template <class Impl>
const char *
BackEnd<Impl>::DCacheCompletionEvent::description()
{
return "Cache completion event";
}
template <class Impl>
BackEnd<Impl>::BackEnd(Params *params)
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
xcSquash(false), IQ(params),
cacheCompletionEvent(this), width(params->backEndWidth),
exactFullStall(true)
{
numROBEntries = params->numROBEntries;
numInsts = 0;
numDispatchEntries = 32;
IQ.setBE(this);
LSQ.setBE(this);
// Setup IQ and LSQ with their parameters here.
instsToDispatch = d2i.getWire(-1);
instsToExecute = i2e.getWire(-1);
IQ.setIssueExecQueue(&i2e);
dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
issueWidth = params->issueWidth ? params->issueWidth : width;
wbWidth = params->wbWidth ? params->wbWidth : width;
commitWidth = params->commitWidth ? params->commitWidth : width;
LSQ.init(params, params->LQEntries, params->SQEntries, 0);
dispatchStatus = Running;
}
template <class Impl>
std::string
BackEnd<Impl>::name() const
{
return cpu->name() + ".backend";
}
template <class Impl>
void
BackEnd<Impl>::regStats()
{
using namespace Stats;
rob_cap_events
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events")
.desc("number of cycles where ROB cap was active")
.flags(total)
;
rob_cap_inst_count
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_inst")
.desc("number of instructions held up by ROB cap")
.flags(total)
;
iq_cap_events
.init(cpu->number_of_threads)
.name(name() +".IQ:cap_events" )
.desc("number of cycles where IQ cap was active")
.flags(total)
;
iq_cap_inst_count
.init(cpu->number_of_threads)
.name(name() + ".IQ:cap_inst")
.desc("number of instructions held up by IQ cap")
.flags(total)
;
exe_inst
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:count")
.desc("number of insts issued")
.flags(total)
;
exe_swp
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:swp")
.desc("number of swp insts issued")
.flags(total)
;
exe_nop
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:nop")
.desc("number of nop insts issued")
.flags(total)
;
exe_refs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:refs")
.desc("number of memory reference insts issued")
.flags(total)
;
exe_loads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:loads")
.desc("number of load insts issued")
.flags(total)
;
exe_branches
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:branches")
.desc("Number of branches issued")
.flags(total)
;
issued_ops
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:op_count")
.desc("number of insts issued")
.flags(total)
;
/*
for (int i=0; i<Num_OpClasses; ++i) {
stringstream subname;
subname << opClassStrings[i] << "_delay";
issue_delay_dist.subname(i, subname.str());
}
*/
//
// Other stats
//
lsq_forw_loads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:forw_loads")
.desc("number of loads forwarded via LSQ")
.flags(total)
;
inv_addr_loads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_loads")
.desc("number of invalid-address loads")
.flags(total)
;
inv_addr_swpfs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_swpfs")
.desc("number of invalid-address SW prefetches")
.flags(total)
;
lsq_blocked_loads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:blocked_loads")
.desc("number of ready loads not issued due to memory disambiguation")
.flags(total)
;
lsqInversion
.name(name() + ".ISSUE:lsq_invert")
.desc("Number of times LSQ instruction issued early")
;
n_issued_dist
.init(issueWidth + 1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
issue_delay_dist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
.desc("cycles from operands ready to issue")
.flags(pdf | cdf)
;
queue_res_dist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
.desc("cycles from dispatch to issue")
.flags(total | pdf | cdf )
;
for (int i = 0; i < Num_OpClasses; ++i) {
queue_res_dist.subname(i, opClassStrings[i]);
}
writeback_count
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
producer_inst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
consumer_inst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
wb_penalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
.flags(total)
;
wb_penalized_rate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
wb_penalized_rate = wb_penalized / writeback_count;
wb_fanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
wb_fanout = producer_inst / consumer_inst;
wb_rate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
wb_rate = writeback_count / cpu->numCycles;
stat_com_inst
.init(cpu->number_of_threads)
.name(name() + ".COM:count")
.desc("Number of instructions committed")
.flags(total)
;
stat_com_swp
.init(cpu->number_of_threads)
.name(name() + ".COM:swp_count")
.desc("Number of s/w prefetches committed")
.flags(total)
;
stat_com_refs
.init(cpu->number_of_threads)
.name(name() + ".COM:refs")
.desc("Number of memory references committed")
.flags(total)
;
stat_com_loads
.init(cpu->number_of_threads)
.name(name() + ".COM:loads")
.desc("Number of loads committed")
.flags(total)
;
stat_com_membars
.init(cpu->number_of_threads)
.name(name() + ".COM:membars")
.desc("Number of memory barriers committed")
.flags(total)
;
stat_com_branches
.init(cpu->number_of_threads)
.name(name() + ".COM:branches")
.desc("Number of branches committed")
.flags(total)
;
n_committed_dist
.init(0,commitWidth,1)
.name(name() + ".COM:committed_per_cycle")
.desc("Number of insts commited each cycle")
.flags(pdf)
;
//
// Commit-Eligible instructions...
//
// -> The number of instructions eligible to commit in those
// cycles where we reached our commit BW limit (less the number
// actually committed)
//
// -> The average value is computed over ALL CYCLES... not just
// the BW limited cycles
//
// -> The standard deviation is computed only over cycles where
// we reached the BW limit
//
commit_eligible
.init(cpu->number_of_threads)
.name(name() + ".COM:bw_limited")
.desc("number of insts not committed due to BW limits")
.flags(total)
;
commit_eligible_samples
.name(name() + ".COM:bw_lim_events")
.desc("number cycles where commit BW limit reached")
;
ROB_fcount
.name(name() + ".ROB:full_count")
.desc("number of cycles where ROB was full")
;
ROB_count
.init(cpu->number_of_threads)
.name(name() + ".ROB:occupancy")
.desc(name() + ".ROB occupancy (cumulative)")
.flags(total)
;
ROB_full_rate
.name(name() + ".ROB:full_rate")
.desc("ROB full per cycle")
;
ROB_full_rate = ROB_fcount / cpu->numCycles;
ROB_occ_rate
.name(name() + ".ROB:occ_rate")
.desc("ROB occupancy rate")
.flags(total)
;
ROB_occ_rate = ROB_count / cpu->numCycles;
ROB_occ_dist
.init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle")
.flags(total | cdf)
;
IQ.regStats();
}
template <class Impl>
void
BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
{
comm = _comm;
toIEW = comm->getWire(0);
fromCommit = comm->getWire(-1);
}
template <class Impl>
void
BackEnd<Impl>::tick()
{
DPRINTF(BE, "Ticking back end\n");
ROB_count[0]+= numInsts;
wbCycle = 0;
if (xcSquash) {
squashFromXC();
}
// Read in any done instruction information and update the IQ or LSQ.
updateStructures();
if (dispatchStatus != Blocked) {
d2i.advance();
dispatchInsts();
} else {
checkDispatchStatus();
}
i2e.advance();
scheduleReadyInsts();
e2c.advance();
executeInsts();
numInstsToWB.advance();
writebackInsts();
commitInsts();
DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
assert(numInsts == instList.size());
}
template <class Impl>
void
BackEnd<Impl>::updateStructures()
{
if (fromCommit->doneSeqNum) {
IQ.commit(fromCommit->doneSeqNum);
LSQ.commitLoads(fromCommit->doneSeqNum);
LSQ.commitStores(fromCommit->doneSeqNum);
}
if (fromCommit->nonSpecSeqNum) {
if (fromCommit->uncached) {
LSQ.executeLoad(fromCommit->lqIdx);
} else {
IQ.scheduleNonSpec(
fromCommit->nonSpecSeqNum);
}
}
}
template <class Impl>
void
BackEnd<Impl>::addToIQ(DynInstPtr &inst)
{
// Do anything IQ specific here?
IQ.insert(inst);
}
template <class Impl>
void
BackEnd<Impl>::addToLSQ(DynInstPtr &inst)
{
// Do anything LSQ specific here?
LSQ.insert(inst);
}
template <class Impl>
void
BackEnd<Impl>::dispatchInsts()
{
DPRINTF(BE, "Trying to dispatch instructions.\n");
// Pull instructions out of the front end.
int disp_width = dispatchWidth ? dispatchWidth : width;
// Could model dispatching time, but in general 1 cycle is probably
// good enough.
if (dispatchSize < numDispatchEntries) {
for (int i = 0; i < disp_width; i++) {
// Get instructions
DynInstPtr inst = frontEnd->getInst();
if (!inst) {
// No more instructions to get
break;
}
DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n",
inst->seqNum, inst->readPC());
for (int i = 0; i < inst->numDestRegs(); ++i)
renameTable[inst->destRegIdx(i)] = inst;
// Add to queue to be dispatched.
dispatch.push_back(inst);
d2i[0].size++;
++dispatchSize;
}
}
assert(dispatch.size() < 64);
for (int i = 0; i < instsToDispatch->size; ++i) {
assert(!dispatch.empty());
// Get instruction from front of time buffer
DynInstPtr inst = dispatch.front();
dispatch.pop_front();
--dispatchSize;
if (inst->isSquashed())
continue;
++numInsts;
instList.push_back(inst);
DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
inst->seqNum, inst->readPC());
addToIQ(inst);
if (inst->isMemRef()) {
addToLSQ(inst);
}
if (inst->isNonSpeculative()) {
inst->setCanCommit();
}
// Check if IQ or LSQ is full. If so we'll need to break and stop
// removing instructions. Also update the number of insts to remove
// from the queue.
if (exactFullStall) {
bool stall = false;
if (IQ.isFull()) {
DPRINTF(BE, "IQ is full!\n");
stall = true;
} else if (LSQ.isFull()) {
DPRINTF(BE, "LSQ is full!\n");
stall = true;
} else if (isFull()) {
DPRINTF(BE, "ROB is full!\n");
stall = true;
ROB_fcount++;
}
if (stall) {
instsToDispatch->size-= i+1;
dispatchStall();
return;
}
}
}
// Check if IQ or LSQ is full. If so we'll need to break and stop
// removing instructions. Also update the number of insts to remove
// from the queue. Check here if we don't care about exact stall
// conditions.
bool stall = false;
if (IQ.isFull()) {
DPRINTF(BE, "IQ is full!\n");
stall = true;
} else if (LSQ.isFull()) {
DPRINTF(BE, "LSQ is full!\n");
stall = true;
} else if (isFull()) {
DPRINTF(BE, "ROB is full!\n");
stall = true;
ROB_fcount++;
}
if (stall) {
d2i.advance();
dispatchStall();
return;
}
}
template <class Impl>
void
BackEnd<Impl>::dispatchStall()
{
dispatchStatus = Blocked;
if (!cpu->decoupledFrontEnd) {
// Tell front end to stall here through a timebuffer, or just tell
// it directly.
}
}
template <class Impl>
void
BackEnd<Impl>::checkDispatchStatus()
{
DPRINTF(BE, "Checking dispatch status\n");
assert(dispatchStatus == Blocked);
if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
DPRINTF(BE, "Dispatch no longer blocked\n");
dispatchStatus = Running;
dispatchInsts();
}
}
template <class Impl>
void
BackEnd<Impl>::scheduleReadyInsts()
{
// Tell IQ to put any ready instructions into the instruction list.
// Probably want to have a list of DynInstPtrs returned here. Then I
// can choose to either put them into a time buffer to simulate
// IQ scheduling time, or hand them directly off to the next stage.
// Do you ever want to directly hand it off to the next stage?
DPRINTF(BE, "Trying to schedule ready instructions\n");
IQ.scheduleReadyInsts();
}
template <class Impl>
void
BackEnd<Impl>::executeInsts()
{
int insts_to_execute = instsToExecute->size;
issued_ops[0]+= insts_to_execute;
n_issued_dist[insts_to_execute]++;
DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute);
fetchRedirect[0] = false;
while (insts_to_execute > 0) {
// Get ready instruction from the IQ (or queue coming out of IQ)
// Execute the ready instruction.
// Wakeup any dependents if it's done.
DynInstPtr inst = IQ.getReadyInst();
DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
inst->seqNum, inst->readPC());
++funcExeInst;
// Check if the instruction is squashed; if so then skip it
// and don't count it towards the FU usage.
if (inst->isSquashed()) {
DPRINTF(BE, "Execute: Instruction was squashed.\n");
// Not sure how to handle this plus the method of sending # of
// instructions to use. Probably will just have to count it
// towards the bandwidth usage, but not the FU usage.
--insts_to_execute;
// Consider this instruction executed so that commit can go
// ahead and retire the instruction.
inst->setExecuted();
// Not sure if I should set this here or just let commit try to
// commit any squashed instructions. I like the latter a bit more.
inst->setCanCommit();
// ++iewExecSquashedInsts;
continue;
}
Fault fault = NoFault;
// Execute instruction.
// Note that if the instruction faults, it will be handled
// at the commit stage.
if (inst->isMemRef() &&
(!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
DPRINTF(BE, "Execute: Initiating access for memory "
"reference.\n");
// Tell the LDSTQ to execute this instruction (if it is a load).
if (inst->isLoad()) {
// Loads will mark themselves as executed, and their writeback
// event adds the instruction to the queue to commit
fault = LSQ.executeLoad(inst);
// ++iewExecLoadInsts;
} else if (inst->isStore()) {
LSQ.executeStore(inst);
// ++iewExecStoreInsts;
if (!(inst->req->flags & LOCKED)) {
inst->setExecuted();
instToCommit(inst);
}
// Store conditionals will mark themselves as executed, and
// their writeback event will add the instruction to the queue
// to commit.
} else {
panic("Unexpected memory type!\n");
}
} else {
inst->execute();
// ++iewExecutedInsts;
inst->setExecuted();
instToCommit(inst);
}
updateExeInstStats(inst);
// Probably should have some sort of function for this.
// More general question of how to handle squashes? Have some sort of
// squash unit that controls it? Probably...
// Check if branch was correct. This check happens after the
// instruction is added to the queue because even if the branch
// is mispredicted, the branch instruction itself is still valid.
// Only handle this if there hasn't already been something that
// redirects fetch in this group of instructions.
// This probably needs to prioritize the redirects if a different
// scheduler is used. Currently the scheduler schedules the oldest
// instruction first, so the branch resolution order will be correct.
unsigned tid = inst->threadNumber;
if (!fetchRedirect[tid]) {
if (inst->mispredicted()) {
fetchRedirect[tid] = true;
DPRINTF(BE, "Execute: Branch mispredict detected.\n");
DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n",
inst->nextPC);
// If incorrect, then signal the ROB that it must be squashed.
squashDueToBranch(inst);
if (inst->predTaken()) {
// predictedTakenIncorrect++;
} else {
// predictedNotTakenIncorrect++;
}
} else if (LSQ.violation()) {
fetchRedirect[tid] = true;
// Get the DynInst that caused the violation. Note that this
// clears the violation signal.
DynInstPtr violator;
violator = LSQ.getMemDepViolator();
DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
"%#x, inst PC: %#x. Addr is: %#x.\n",
violator->readPC(), inst->readPC(), inst->physEffAddr);
// Tell the instruction queue that a violation has occured.
// IQ.violation(inst, violator);
// Squash.
// squashDueToMemOrder(inst,tid);
squashDueToBranch(inst);
// ++memOrderViolationEvents;
} else if (LSQ.loadBlocked()) {
fetchRedirect[tid] = true;
DPRINTF(BE, "Load operation couldn't execute because the "
"memory system is blocked. PC: %#x [sn:%lli]\n",
inst->readPC(), inst->seqNum);
squashDueToMemBlocked(inst);
}
}
// instList.pop_front();
--insts_to_execute;
// keep an instruction count
thread->numInst++;
thread->numInsts++;
}
assert(insts_to_execute >= 0);
}
template<class Impl>
void
BackEnd<Impl>::instToCommit(DynInstPtr &inst)
{
int wb_width = wbWidth;
// First check the time slot that this instruction will write
// to. If there are free write ports at the time, then go ahead
// and write the instruction to that time. If there are not,
// keep looking back to see where's the first time there's a
// free slot. What happens if you run out of free spaces?
// For now naively assume that all instructions take one cycle.
// Otherwise would have to look into the time buffer based on the
// latency of the instruction.
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
while (numInstsToWB[wbCycle].size >= wb_width) {
++wbCycle;
assert(wbCycle < 5);
}
// Add finished instruction to queue to commit.
writeback.push_back(inst);
numInstsToWB[wbCycle].size++;
if (wbCycle)
wb_penalized[0]++;
}
template <class Impl>
void
BackEnd<Impl>::writebackInsts()
{
int wb_width = wbWidth;
// Using this method I'm not quite sure how to prevent an
// instruction from waking its own dependents multiple times,
// without the guarantee that commit always has enough bandwidth
// to accept all instructions being written back. This guarantee
// might not be too unrealistic.
InstListIt wb_inst_it = writeback.begin();
InstListIt wb_end_it = writeback.end();
int inst_num = 0;
int consumer_insts = 0;
for (; inst_num < wb_width &&
wb_inst_it != wb_end_it; inst_num++) {
DynInstPtr inst = (*wb_inst_it);
// Some instructions will be sent to commit without having
// executed because they need commit to handle them.
// E.g. Uncached loads have not actually executed when they
// are first sent to commit. Instead commit must tell the LSQ
// when it's ready to execute the uncached load.
if (!inst->isSquashed()) {
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
inst->setCanCommit();
inst->setCompleted();
if (inst->isExecuted()) {
int dependents = IQ.wakeDependents(inst);
if (dependents) {
producer_inst[0]++;
consumer_insts+= dependents;
}
}
}
writeback.erase(wb_inst_it++);
}
LSQ.writebackStores();
consumer_inst[0]+= consumer_insts;
writeback_count[0]+= inst_num;
}
template <class Impl>
bool
BackEnd<Impl>::commitInst(int inst_num)
{
// Read instruction from the head of the ROB
DynInstPtr inst = instList.front();
// Make sure instruction is valid
assert(inst);
if (!inst->readyToCommit())
return false;
DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
inst->seqNum, inst->readPC());
// If the instruction is not executed yet, then it is a non-speculative
// or store inst. Signal backwards that it should be executed.
if (!inst->isExecuted()) {
// Keep this number correct. We have not yet actually executed
// and committed this instruction.
// thread->funcExeInst--;
if (inst->isNonSpeculative()) {
#if !FULL_SYSTEM
// Hack to make sure syscalls aren't executed until all stores
// write back their data. This direct communication shouldn't
// be used for anything other than this.
if (inst_num > 0 || LSQ.hasStoresToWB()) {
DPRINTF(BE, "Waiting for all stores to writeback.\n");
return false;
}
#endif
DPRINTF(BE, "Encountered a store or non-speculative "
"instruction at the head of the ROB, PC %#x.\n",
inst->readPC());
// Send back the non-speculative instruction's sequence number.
toIEW->nonSpecSeqNum = inst->seqNum;
// Change the instruction so it won't try to commit again until
// it is executed.
inst->clearCanCommit();
// ++commitNonSpecStalls;
return false;
} else if (inst->isLoad()) {
DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
inst->seqNum, inst->readPC());
// Send back the non-speculative instruction's sequence
// number. Maybe just tell the lsq to re-execute the load.
toIEW->nonSpecSeqNum = inst->seqNum;
toIEW->uncached = true;
toIEW->lqIdx = inst->lqIdx;
inst->clearCanCommit();
return false;
} else {
panic("Trying to commit un-executed instruction "
"of unknown type!\n");
}
}
// Now check if it's one of the special trap or barrier or
// serializing instructions.
if (inst->isThreadSync())
{
// Not handled for now.
panic("Barrier instructions are not handled yet.\n");
}
// Check if the instruction caused a fault. If so, trap.
Fault inst_fault = inst->getFault();
if (inst_fault != NoFault) {
if (!inst->isNop()) {
#if FULL_SYSTEM
DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
inst->seqNum, inst->readPC());
// assert(!thread->inSyscall);
// thread->inSyscall = true;
// Consider holding onto the trap and waiting until the trap event
// happens for this to be executed.
inst_fault->invoke(thread->getXCProxy());
// Exit state update mode to avoid accidental updating.
// thread->inSyscall = false;
// commitStatus = TrapPending;
// Generate trap squash event.
// generateTrapEvent();
return false;
#else // !FULL_SYSTEM
panic("fault (%d) detected @ PC %08p", inst_fault,
inst->PC);
#endif // FULL_SYSTEM
}
}
if (inst->isControl()) {
// ++commitCommittedBranches;
}
int freed_regs = 0;
for (int i = 0; i < inst->numDestRegs(); ++i) {
DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
(int)inst->destRegIdx(i), inst->seqNum);
thread->renameTable[inst->destRegIdx(i)] = inst;
++freed_regs;
}
if (inst->traceData) {
inst->traceData->finalize();
inst->traceData = NULL;
}
inst->clearDependents();
frontEnd->addFreeRegs(freed_regs);
instList.pop_front();
--numInsts;
cpu->numInst++;
thread->numInsts++;
++thread->funcExeInst;
thread->PC = inst->readNextPC();
updateComInstStats(inst);
// Write the done sequence number here.
toIEW->doneSeqNum = inst->seqNum;
#if FULL_SYSTEM
int count = 0;
Addr oldpc;
do {
if (count == 0)
assert(!thread->inSyscall && !thread->trapPending);
oldpc = thread->readPC();
cpu->system->pcEventQueue.service(
thread->getXCProxy());
count++;
} while (oldpc != thread->readPC());
if (count > 1) {
DPRINTF(BE, "PC skip function event, stopping commit\n");
// completed_last_inst = false;
// squashPending = true;
return false;
}
#endif
return true;
}
template <class Impl>
void
BackEnd<Impl>::commitInsts()
{
int commit_width = commitWidth ? commitWidth : width;
// Not sure this should be a loop or not.
int inst_num = 0;
while (!instList.empty() && inst_num < commit_width) {
if (instList.front()->isSquashed()) {
panic("No squashed insts should still be on the list!");
instList.front()->clearDependents();
instList.pop_front();
continue;
}
if (!commitInst(inst_num++)) {
break;
}
}
n_committed_dist.sample(inst_num);
}
template <class Impl>
void
BackEnd<Impl>::squash(const InstSeqNum &sn)
{
IQ.squash(sn);
LSQ.squash(sn);
int freed_regs = 0;
InstListIt dispatch_end = dispatch.end();
InstListIt insts_it = dispatch.end();
insts_it--;
while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
--insts_it;
continue;
}
DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
(*insts_it)->readPC(),
(*insts_it)->seqNum);
// Mark the instruction as squashed, and ready to commit so that
// it can drain out of the pipeline.
(*insts_it)->setSquashed();
(*insts_it)->setCanCommit();
// Be careful with IPRs and such here
for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
(int)(*insts_it)->destRegIdx(i), prev_dest);
renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
++freed_regs;
}
(*insts_it)->clearDependents();
--insts_it;
}
insts_it = instList.end();
insts_it--;
while (!instList.empty() && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
--insts_it;
continue;
}
DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
(*insts_it)->readPC(),
(*insts_it)->seqNum);
// Mark the instruction as squashed, and ready to commit so that
// it can drain out of the pipeline.
(*insts_it)->setSquashed();
(*insts_it)->setCanCommit();
for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
(int)(*insts_it)->destRegIdx(i), prev_dest);
renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
++freed_regs;
}
(*insts_it)->clearDependents();
instList.erase(insts_it--);
--numInsts;
}
frontEnd->addFreeRegs(freed_regs);
}
template <class Impl>
void
BackEnd<Impl>::squashFromXC()
{
xcSquash = true;
}
template <class Impl>
void
BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
{
// Update the branch predictor state I guess
squash(inst->seqNum);
frontEnd->squash(inst->seqNum, inst->readNextPC(),
true, inst->mispredicted());
}
template <class Impl>
void
BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
{
DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
"PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
squash(inst->seqNum - 1);
frontEnd->squash(inst->seqNum - 1, inst->readPC());
}
template <class Impl>
void
BackEnd<Impl>::fetchFault(Fault &fault)
{
faultFromFetch = fault;
}
template <class Impl>
void
BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
{
int thread_number = inst->threadNumber;
//
// Pick off the software prefetches
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
exe_swp[thread_number]++;
else
exe_inst[thread_number]++;
#else
exe_inst[thread_number]++;
#endif
//
// Control operations
//
if (inst->isControl())
exe_branches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
exe_refs[thread_number]++;
if (inst->isLoad())
exe_loads[thread_number]++;
}
}
template <class Impl>
void
BackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
{
unsigned thread = inst->threadNumber;
//
// Pick off the software prefetches
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
stat_com_swp[thread]++;
} else {
stat_com_inst[thread]++;
}
#else
stat_com_inst[thread]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
stat_com_branches[thread]++;
//
// Memory references
//
if (inst->isMemRef()) {
stat_com_refs[thread]++;
if (inst->isLoad()) {
stat_com_loads[thread]++;
}
}
if (inst->isMemBarrier()) {
stat_com_membars[thread]++;
}
}
template <class Impl>
void
BackEnd<Impl>::dumpInsts()
{
int num = 0;
int valid_num = 0;
InstListIt inst_list_it = instList.begin();
cprintf("Inst list size: %i\n", instList.size());
while (inst_list_it != instList.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it++;
++num;
}
cprintf("Dispatch list size: %i\n", dispatch.size());
inst_list_it = dispatch.begin();
while (inst_list_it != dispatch.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it++;
++num;
}
cprintf("Writeback list size: %i\n", writeback.size());
inst_list_it = writeback.begin();
while (inst_list_it != writeback.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it++;
++num;
}
}