Ozone updates.
cpu/ozone/front_end.hh: cpu/ozone/front_end_impl.hh: cpu/ozone/lw_back_end.hh: Support latency for Ozone FE and BE. cpu/ozone/lw_back_end_impl.hh: Support latency for Ozone FE and BE. Also fixes for switching out, profiling. cpu/ozone/lw_lsq.hh: cpu/ozone/lw_lsq_impl.hh: Fixes for switching out. cpu/ozone/simple_params.hh: Updated parameters. --HG-- extra : convert_revision : 21d4846a59a2239bfdf8fe92e47fd0972debe4f5
This commit is contained in:
parent
ad2fa1e1c9
commit
4ec5e90c8f
7 changed files with 236 additions and 67 deletions
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include <deque>
|
||||
|
||||
#include "base/timebuf.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/bpred_unit.hh"
|
||||
#include "cpu/ozone/rename_table.hh"
|
||||
|
@ -210,15 +211,21 @@ class FrontEnd
|
|||
void dumpInsts();
|
||||
|
||||
private:
|
||||
TimeBuffer<int> numInstsReady;
|
||||
|
||||
typedef typename std::deque<DynInstPtr> InstBuff;
|
||||
typedef typename InstBuff::iterator InstBuffIt;
|
||||
|
||||
InstBuff feBuffer;
|
||||
|
||||
InstBuff instBuffer;
|
||||
|
||||
int instBufferSize;
|
||||
|
||||
int maxInstBufferSize;
|
||||
|
||||
int latency;
|
||||
|
||||
int width;
|
||||
|
||||
int freeRegs;
|
||||
|
|
|
@ -41,8 +41,10 @@ template <class Impl>
|
|||
FrontEnd<Impl>::FrontEnd(Params *params)
|
||||
: branchPred(params),
|
||||
icacheInterface(params->icacheInterface),
|
||||
numInstsReady(params->frontEndLatency, 0),
|
||||
instBufferSize(0),
|
||||
maxInstBufferSize(params->maxInstBufferSize),
|
||||
latency(params->frontEndLatency),
|
||||
width(params->frontEndWidth),
|
||||
freeRegs(params->numPhysicalRegs),
|
||||
numPhysRegs(params->numPhysicalRegs),
|
||||
|
@ -261,6 +263,18 @@ FrontEnd<Impl>::tick()
|
|||
if (switchedOut)
|
||||
return;
|
||||
|
||||
for (int insts_to_queue = numInstsReady[-latency];
|
||||
!instBuffer.empty() && insts_to_queue;
|
||||
--insts_to_queue)
|
||||
{
|
||||
DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
|
||||
instBuffer.front()->seqNum);
|
||||
feBuffer.push_back(instBuffer.front());
|
||||
instBuffer.pop_front();
|
||||
}
|
||||
|
||||
numInstsReady.advance();
|
||||
|
||||
// @todo: Maybe I want to just have direct communication...
|
||||
if (fromCommit->doneSeqNum) {
|
||||
branchPred.update(fromCommit->doneSeqNum, 0);
|
||||
|
@ -349,6 +363,7 @@ FrontEnd<Impl>::tick()
|
|||
// latency
|
||||
instBuffer.push_back(inst);
|
||||
++instBufferSize;
|
||||
numInstsReady[0]++;
|
||||
++num_inst;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
|
@ -570,6 +585,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
|
|||
instruction->fault = fault;
|
||||
instruction->setCanIssue();
|
||||
instBuffer.push_back(instruction);
|
||||
numInstsReady[0]++;
|
||||
++instBufferSize;
|
||||
}
|
||||
|
||||
|
@ -599,6 +615,21 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
|
|||
freeRegs+= inst->numDestRegs();
|
||||
}
|
||||
|
||||
while (!feBuffer.empty() &&
|
||||
feBuffer.back()->seqNum > squash_num) {
|
||||
DynInstPtr inst = feBuffer.back();
|
||||
|
||||
DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
|
||||
inst->seqNum, inst->readPC());
|
||||
|
||||
inst->clearDependents();
|
||||
|
||||
feBuffer.pop_back();
|
||||
--instBufferSize;
|
||||
|
||||
freeRegs+= inst->numDestRegs();
|
||||
}
|
||||
|
||||
// Copy over rename table from the back end.
|
||||
renameTable.copyFrom(backEnd->renameTable);
|
||||
|
||||
|
@ -633,13 +664,13 @@ template <class Impl>
|
|||
typename Impl::DynInstPtr
|
||||
FrontEnd<Impl>::getInst()
|
||||
{
|
||||
if (instBufferSize == 0) {
|
||||
if (feBuffer.empty()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DynInstPtr inst = instBuffer.front();
|
||||
DynInstPtr inst = feBuffer.front();
|
||||
|
||||
instBuffer.pop_front();
|
||||
feBuffer.pop_front();
|
||||
|
||||
--instBufferSize;
|
||||
|
||||
|
@ -857,6 +888,7 @@ FrontEnd<Impl>::doSwitchOut()
|
|||
squash(0, 0);
|
||||
instBuffer.clear();
|
||||
instBufferSize = 0;
|
||||
feBuffer.clear();
|
||||
status = Idle;
|
||||
}
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ class LWBackEnd
|
|||
TimeBuffer<IssueToExec> i2e;
|
||||
typename TimeBuffer<IssueToExec>::wire instsToExecute;
|
||||
TimeBuffer<ExecToCommit> e2c;
|
||||
TimeBuffer<Writeback> numInstsToWB;
|
||||
TimeBuffer<int> numInstsToWB;
|
||||
|
||||
TimeBuffer<CommStruct> *comm;
|
||||
typename TimeBuffer<CommStruct>::wire toIEW;
|
||||
|
@ -157,7 +157,7 @@ class LWBackEnd
|
|||
|
||||
Tick lastCommitCycle;
|
||||
|
||||
bool robEmpty() { return instList.empty(); }
|
||||
bool robEmpty() { return numInsts == 0; }
|
||||
|
||||
bool isFull() { return numInsts >= numROBEntries; }
|
||||
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
|
||||
|
@ -212,6 +212,7 @@ class LWBackEnd
|
|||
}
|
||||
|
||||
void instToCommit(DynInstPtr &inst);
|
||||
void readyInstsForCommit();
|
||||
|
||||
void switchOut();
|
||||
void doSwitchOut();
|
||||
|
@ -293,12 +294,13 @@ class LWBackEnd
|
|||
|
||||
MemReqPtr memReq;
|
||||
|
||||
int latency;
|
||||
|
||||
// General back end width. Used if the more specific isn't given.
|
||||
int width;
|
||||
|
||||
// Dispatch width.
|
||||
int dispatchWidth;
|
||||
int numDispatchEntries;
|
||||
int dispatchSize;
|
||||
|
||||
int waitingInsts;
|
||||
|
@ -323,6 +325,7 @@ class LWBackEnd
|
|||
|
||||
int numROBEntries;
|
||||
int numInsts;
|
||||
bool lsqLimits;
|
||||
|
||||
std::set<InstSeqNum> waitingMemOps;
|
||||
typedef std::set<InstSeqNum>::iterator MemIt;
|
||||
|
@ -333,9 +336,6 @@ class LWBackEnd
|
|||
InstSeqNum squashSeqNum;
|
||||
Addr squashNextPC;
|
||||
|
||||
Fault faultFromFetch;
|
||||
bool fetchHasFault;
|
||||
|
||||
bool switchedOut;
|
||||
bool switchPending;
|
||||
|
||||
|
@ -359,8 +359,6 @@ class LWBackEnd
|
|||
std::list<DynInstPtr> replayList;
|
||||
std::list<DynInstPtr> writeback;
|
||||
|
||||
int latency;
|
||||
|
||||
int squashLatency;
|
||||
|
||||
bool exactFullStall;
|
||||
|
@ -397,9 +395,11 @@ class LWBackEnd
|
|||
Stats::Scalar<> lsqInversion;
|
||||
|
||||
Stats::Vector<> nIssuedDist;
|
||||
/*
|
||||
Stats::VectorDistribution<> issueDelayDist;
|
||||
|
||||
Stats::VectorDistribution<> queueResDist;
|
||||
*/
|
||||
/*
|
||||
Stats::Vector<> stat_fu_busy;
|
||||
Stats::Vector2d<> stat_fuBusy;
|
||||
|
@ -447,7 +447,7 @@ class LWBackEnd
|
|||
|
||||
Stats::Vector<> ROBCount; // cumulative ROB occupancy
|
||||
Stats::Formula ROBOccRate;
|
||||
Stats::VectorDistribution<> ROBOccDist;
|
||||
// Stats::VectorDistribution<> ROBOccDist;
|
||||
public:
|
||||
void dumpInsts();
|
||||
|
||||
|
|
|
@ -151,8 +151,10 @@ LWBackEnd<Impl>::LdWritebackEvent::process()
|
|||
|
||||
// iewStage->wakeCPU();
|
||||
|
||||
if (be->isSwitchedOut())
|
||||
return;
|
||||
assert(inst->isSquashed() || !be->isSwitchedOut());
|
||||
|
||||
// if (be->isSwitchedOut() && inst->isLoad())
|
||||
// return;
|
||||
|
||||
if (dcacheMiss) {
|
||||
be->removeDcacheMiss(inst);
|
||||
|
@ -208,14 +210,14 @@ LWBackEnd<Impl>::DCacheCompletionEvent::description()
|
|||
|
||||
template <class Impl>
|
||||
LWBackEnd<Impl>::LWBackEnd(Params *params)
|
||||
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
|
||||
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
|
||||
trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
|
||||
dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
|
||||
dcacheInterface(params->dcacheInterface), latency(params->backEndLatency),
|
||||
width(params->backEndWidth), lsqLimits(params->lsqLimits),
|
||||
exactFullStall(true)
|
||||
{
|
||||
numROBEntries = params->numROBEntries;
|
||||
numInsts = 0;
|
||||
numDispatchEntries = 32;
|
||||
maxOutstandingMemOps = params->maxOutstandingMemOps;
|
||||
numWaitingMemOps = 0;
|
||||
waitingInsts = 0;
|
||||
|
@ -251,6 +253,8 @@ void
|
|||
LWBackEnd<Impl>::regStats()
|
||||
{
|
||||
using namespace Stats;
|
||||
LSQ.regStats();
|
||||
|
||||
robCapEvents
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ROB:cap_events")
|
||||
|
@ -377,6 +381,7 @@ LWBackEnd<Impl>::regStats()
|
|||
.desc("Number of insts issued each cycle")
|
||||
.flags(total | pdf | dist)
|
||||
;
|
||||
/*
|
||||
issueDelayDist
|
||||
.init(Num_OpClasses,0,99,2)
|
||||
.name(name() + ".ISSUE:")
|
||||
|
@ -393,7 +398,7 @@ LWBackEnd<Impl>::regStats()
|
|||
for (int i = 0; i < Num_OpClasses; ++i) {
|
||||
queueResDist.subname(i, opClassStrings[i]);
|
||||
}
|
||||
|
||||
*/
|
||||
writebackCount
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".WB:count")
|
||||
|
@ -555,13 +560,14 @@ LWBackEnd<Impl>::regStats()
|
|||
.flags(total)
|
||||
;
|
||||
ROBOccRate = ROBCount / cpu->numCycles;
|
||||
|
||||
/*
|
||||
ROBOccDist
|
||||
.init(cpu->number_of_threads,0,numROBEntries,2)
|
||||
.name(name() + ".ROB:occ_dist")
|
||||
.desc("ROB Occupancy per cycle")
|
||||
.flags(total | cdf)
|
||||
;
|
||||
*/
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
@ -654,18 +660,22 @@ LWBackEnd<Impl>::tick()
|
|||
{
|
||||
DPRINTF(BE, "Ticking back end\n");
|
||||
|
||||
// Read in any done instruction information and update the IQ or LSQ.
|
||||
updateStructures();
|
||||
|
||||
if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
|
||||
cpu->signalSwitched();
|
||||
return;
|
||||
}
|
||||
|
||||
readyInstsForCommit();
|
||||
|
||||
numInstsToWB.advance();
|
||||
|
||||
ROBCount[0]+= numInsts;
|
||||
|
||||
wbCycle = 0;
|
||||
|
||||
// Read in any done instruction information and update the IQ or LSQ.
|
||||
updateStructures();
|
||||
|
||||
#if FULL_SYSTEM
|
||||
checkInterrupts();
|
||||
|
||||
|
@ -740,6 +750,10 @@ LWBackEnd<Impl>::dispatchInsts()
|
|||
while (numInsts < numROBEntries &&
|
||||
numWaitingMemOps < maxOutstandingMemOps) {
|
||||
// Get instruction from front of time buffer
|
||||
if (lsqLimits && LSQ.isFull()) {
|
||||
break;
|
||||
}
|
||||
|
||||
DynInstPtr inst = frontEnd->getInst();
|
||||
if (!inst) {
|
||||
break;
|
||||
|
@ -798,6 +812,7 @@ LWBackEnd<Impl>::dispatchInsts()
|
|||
inst->setIssued();
|
||||
inst->setExecuted();
|
||||
inst->setCanCommit();
|
||||
numInstsToWB[0]++;
|
||||
} else {
|
||||
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
|
||||
"exeList.\n",
|
||||
|
@ -987,16 +1002,10 @@ template<class Impl>
|
|||
void
|
||||
LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
|
||||
{
|
||||
|
||||
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
|
||||
inst->seqNum, inst->readPC());
|
||||
|
||||
if (!inst->isSquashed()) {
|
||||
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
|
||||
inst->seqNum, inst->readPC());
|
||||
|
||||
inst->setCanCommit();
|
||||
|
||||
if (inst->isExecuted()) {
|
||||
inst->setResultReady();
|
||||
int dependents = wakeDependents(inst);
|
||||
|
@ -1007,8 +1016,32 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
|
|||
}
|
||||
}
|
||||
|
||||
writeback.push_back(inst);
|
||||
|
||||
numInstsToWB[0]++;
|
||||
|
||||
writebackCount[0]++;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::readyInstsForCommit()
|
||||
{
|
||||
for (int i = numInstsToWB[-latency];
|
||||
!writeback.empty() && i;
|
||||
--i)
|
||||
{
|
||||
DynInstPtr inst = writeback.front();
|
||||
writeback.pop_front();
|
||||
if (!inst->isSquashed()) {
|
||||
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
|
||||
inst->seqNum, inst->readPC());
|
||||
|
||||
inst->setCanCommit();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
template <class Impl>
|
||||
void
|
||||
|
@ -1221,6 +1254,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
|
|||
++freed_regs;
|
||||
}
|
||||
|
||||
#if FULL_SYSTEM
|
||||
if (thread->profile) {
|
||||
// bool usermode =
|
||||
// (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
|
||||
// thread->profilePC = usermode ? 1 : inst->readPC();
|
||||
thread->profilePC = inst->readPC();
|
||||
ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
|
||||
inst->staticInst);
|
||||
|
||||
if (node)
|
||||
thread->profileNode = node;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (inst->traceData) {
|
||||
inst->traceData->setFetchSeq(inst->seqNum);
|
||||
inst->traceData->setCPSeq(thread->numInst);
|
||||
|
@ -1280,9 +1327,9 @@ LWBackEnd<Impl>::commitInsts()
|
|||
while (!instList.empty() && inst_num < commitWidth) {
|
||||
if (instList.back()->isSquashed()) {
|
||||
instList.back()->clearDependents();
|
||||
ROBSquashedInsts[instList.back()->threadNumber]++;
|
||||
instList.pop_back();
|
||||
--numInsts;
|
||||
ROBSquashedInsts[instList.back()->threadNumber]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1304,10 +1351,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
|
|||
LSQ.squash(sn);
|
||||
|
||||
int freed_regs = 0;
|
||||
InstListIt waiting_list_end = waitingList.end();
|
||||
InstListIt insts_end_it = waitingList.end();
|
||||
InstListIt insts_it = waitingList.begin();
|
||||
|
||||
while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
|
||||
while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
|
||||
{
|
||||
if ((*insts_it)->isSquashed()) {
|
||||
++insts_it;
|
||||
|
@ -1333,6 +1380,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
|
|||
while (!instList.empty() && (*insts_it)->seqNum > sn)
|
||||
{
|
||||
if ((*insts_it)->isSquashed()) {
|
||||
panic("Instruction should not be already squashed and on list!");
|
||||
++insts_it;
|
||||
continue;
|
||||
}
|
||||
|
@ -1364,18 +1412,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
|
|||
--numInsts;
|
||||
}
|
||||
|
||||
insts_it = waitingList.begin();
|
||||
while (!waitingList.empty() && insts_it != waitingList.end()) {
|
||||
if ((*insts_it)->seqNum < sn) {
|
||||
++insts_it;
|
||||
continue;
|
||||
}
|
||||
assert((*insts_it)->isSquashed());
|
||||
|
||||
waitingList.erase(insts_it++);
|
||||
waitingInsts--;
|
||||
}
|
||||
|
||||
while (memBarrier && memBarrier->seqNum > sn) {
|
||||
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
|
||||
"squashed)\n", memBarrier->seqNum);
|
||||
|
@ -1393,6 +1429,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
|
|||
}
|
||||
}
|
||||
|
||||
insts_it = replayList.begin();
|
||||
insts_end_it = replayList.end();
|
||||
while (!replayList.empty() && insts_it != insts_end_it) {
|
||||
if ((*insts_it)->seqNum < sn) {
|
||||
++insts_it;
|
||||
continue;
|
||||
}
|
||||
assert((*insts_it)->isSquashed());
|
||||
|
||||
replayList.erase(insts_it++);
|
||||
}
|
||||
|
||||
frontEnd->addFreeRegs(freed_regs);
|
||||
}
|
||||
|
||||
|
@ -1463,14 +1511,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
|
|||
frontEnd->squash(inst->seqNum - 1, inst->readPC());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::fetchFault(Fault &fault)
|
||||
{
|
||||
faultFromFetch = fault;
|
||||
fetchHasFault = true;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::switchOut()
|
||||
|
@ -1489,16 +1529,25 @@ LWBackEnd<Impl>::doSwitchOut()
|
|||
// yet written back.
|
||||
assert(robEmpty());
|
||||
assert(!LSQ.hasStoresToWB());
|
||||
writeback.clear();
|
||||
for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
|
||||
numInstsToWB.advance();
|
||||
|
||||
// squash(0);
|
||||
assert(waitingList.empty());
|
||||
assert(instList.empty());
|
||||
assert(replayList.empty());
|
||||
assert(writeback.empty());
|
||||
LSQ.switchOut();
|
||||
|
||||
squash(0);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
|
||||
{
|
||||
assert(!squashPending);
|
||||
squashSeqNum = 0;
|
||||
squashNextPC = 0;
|
||||
xcSquash = false;
|
||||
trapSquash = false;
|
||||
|
||||
|
@ -1641,6 +1690,45 @@ LWBackEnd<Impl>::dumpInsts()
|
|||
++num;
|
||||
}
|
||||
|
||||
inst_list_it = --(writeback.end());
|
||||
|
||||
cprintf("Writeback list size: %i\n", writeback.size());
|
||||
|
||||
while (inst_list_it != writeback.end())
|
||||
{
|
||||
cprintf("Instruction:%i\n",
|
||||
num);
|
||||
if (!(*inst_list_it)->isSquashed()) {
|
||||
if (!(*inst_list_it)->isIssued()) {
|
||||
++valid_num;
|
||||
cprintf("Count:%i\n", valid_num);
|
||||
} else if ((*inst_list_it)->isMemRef() &&
|
||||
!(*inst_list_it)->memOpDone) {
|
||||
// Loads that have not been marked as executed still count
|
||||
// towards the total instructions.
|
||||
++valid_num;
|
||||
cprintf("Count:%i\n", valid_num);
|
||||
}
|
||||
}
|
||||
|
||||
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
|
||||
"Issued:%i\nSquashed:%i\n",
|
||||
(*inst_list_it)->readPC(),
|
||||
(*inst_list_it)->seqNum,
|
||||
(*inst_list_it)->threadNumber,
|
||||
(*inst_list_it)->isIssued(),
|
||||
(*inst_list_it)->isSquashed());
|
||||
|
||||
if ((*inst_list_it)->isMemRef()) {
|
||||
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
|
||||
}
|
||||
|
||||
cprintf("\n");
|
||||
|
||||
inst_list_it--;
|
||||
++num;
|
||||
}
|
||||
|
||||
cprintf("Waiting list size: %i\n", waitingList.size());
|
||||
|
||||
inst_list_it = --(waitingList.end());
|
||||
|
|
|
@ -110,6 +110,8 @@ class OzoneLWLSQ {
|
|||
/** Returns the name of the LSQ unit. */
|
||||
std::string name() const;
|
||||
|
||||
void regStats();
|
||||
|
||||
/** Sets the CPU pointer. */
|
||||
void setCPU(FullCPU *cpu_ptr)
|
||||
{ cpu = cpu_ptr; }
|
||||
|
@ -203,7 +205,7 @@ class OzoneLWLSQ {
|
|||
int numLoads() { return loads; }
|
||||
|
||||
/** Returns the number of stores in the SQ. */
|
||||
int numStores() { return stores; }
|
||||
int numStores() { return stores + storesInFlight; }
|
||||
|
||||
/** Returns if either the LQ or SQ is full. */
|
||||
bool isFull() { return lqFull() || sqFull(); }
|
||||
|
@ -212,7 +214,7 @@ class OzoneLWLSQ {
|
|||
bool lqFull() { return loads >= (LQEntries - 1); }
|
||||
|
||||
/** Returns if the SQ is full. */
|
||||
bool sqFull() { return stores >= (SQEntries - 1); }
|
||||
bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
|
||||
|
||||
/** Debugging function to dump instructions in the LSQ. */
|
||||
void dumpInsts();
|
||||
|
@ -241,7 +243,9 @@ class OzoneLWLSQ {
|
|||
|
||||
private:
|
||||
/** Completes the store at the specified index. */
|
||||
void completeStore(int store_idx);
|
||||
void completeStore(DynInstPtr &inst);
|
||||
|
||||
void removeStore(int store_idx);
|
||||
|
||||
private:
|
||||
/** Pointer to the CPU. */
|
||||
|
@ -342,6 +346,10 @@ class OzoneLWLSQ {
|
|||
|
||||
int storesToWB;
|
||||
|
||||
public:
|
||||
int storesInFlight;
|
||||
|
||||
private:
|
||||
/// @todo Consider moving to a more advanced model with write vs read ports
|
||||
/** The number of cache ports available each cycle. */
|
||||
int cachePorts;
|
||||
|
@ -351,6 +359,9 @@ class OzoneLWLSQ {
|
|||
|
||||
//list<InstSeqNum> mshrSeqNums;
|
||||
|
||||
/** Tota number of memory ordering violations. */
|
||||
Stats::Scalar<> lsqMemOrderViolation;
|
||||
|
||||
//Stats::Scalar<> dcacheStallCycles;
|
||||
Counter lastDcacheStall;
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
|
|||
|
||||
// lsqPtr->cpu->wakeCPU();
|
||||
if (lsqPtr->isSwitchedOut()) {
|
||||
panic("Should not be switched out!");
|
||||
if (wbEvent)
|
||||
delete wbEvent;
|
||||
|
||||
|
@ -68,7 +69,11 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
|
|||
delete wbEvent;
|
||||
}
|
||||
|
||||
lsqPtr->completeStore(inst->sqIdx);
|
||||
lsqPtr->completeStore(inst);
|
||||
lsqPtr->removeStore(inst->sqIdx);
|
||||
--(lsqPtr->storesInFlight);
|
||||
|
||||
DPRINTF(OzoneLSQ, "StoresInFlight: %i\n", lsqPtr->storesInFlight);
|
||||
if (miss)
|
||||
be->removeDcacheMiss(inst);
|
||||
}
|
||||
|
@ -82,7 +87,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
|
|||
|
||||
template <class Impl>
|
||||
OzoneLWLSQ<Impl>::OzoneLWLSQ()
|
||||
: loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
|
||||
: loads(0), stores(0), storesToWB(0), storesInFlight(0), stalled(false), isLoadBlocked(false),
|
||||
loadBlockedHandled(false)
|
||||
{
|
||||
}
|
||||
|
@ -121,6 +126,15 @@ OzoneLWLSQ<Impl>::name() const
|
|||
return "lsqunit";
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
OzoneLWLSQ<Impl>::regStats()
|
||||
{
|
||||
lsqMemOrderViolation
|
||||
.name(name() + ".memOrderViolation")
|
||||
.desc("Number of memory ordering violations");
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
OzoneLWLSQ<Impl>::clearLQ()
|
||||
|
@ -257,7 +271,7 @@ unsigned
|
|||
OzoneLWLSQ<Impl>::numFreeEntries()
|
||||
{
|
||||
unsigned free_lq_entries = LQEntries - loads;
|
||||
unsigned free_sq_entries = SQEntries - stores;
|
||||
unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
|
||||
|
||||
// Both the LQ and SQ entries have an extra dummy entry to differentiate
|
||||
// empty/full conditions. Subtract 1 from the free entries.
|
||||
|
@ -397,6 +411,7 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
|
|||
// A load incorrectly passed this store. Squash and refetch.
|
||||
// For now return a fault to show that it was unsuccessful.
|
||||
memDepViolator = (*lq_it);
|
||||
++lsqMemOrderViolation;
|
||||
|
||||
return TheISA::genMachineCheckFault();
|
||||
}
|
||||
|
@ -483,8 +498,8 @@ OzoneLWLSQ<Impl>::writebackStores()
|
|||
|
||||
if ((*sq_it).size == 0 && !(*sq_it).completed) {
|
||||
sq_it--;
|
||||
completeStore(inst->sqIdx);
|
||||
|
||||
removeStore(inst->sqIdx);
|
||||
completeStore(inst);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -540,6 +555,8 @@ OzoneLWLSQ<Impl>::writebackStores()
|
|||
inst->sqIdx,inst->readPC(),
|
||||
req->paddr, *(req->data),
|
||||
inst->seqNum);
|
||||
DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
|
||||
storesInFlight + 1);
|
||||
|
||||
if (dcacheInterface) {
|
||||
assert(!req->completionEvent);
|
||||
|
@ -601,6 +618,8 @@ OzoneLWLSQ<Impl>::writebackStores()
|
|||
}
|
||||
sq_it--;
|
||||
}
|
||||
++storesInFlight;
|
||||
// removeStore(inst->sqIdx);
|
||||
} else {
|
||||
panic("Must HAVE DCACHE!!!!!\n");
|
||||
}
|
||||
|
@ -617,7 +636,7 @@ void
|
|||
OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
|
||||
{
|
||||
DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
|
||||
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
|
||||
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
|
||||
|
||||
|
||||
LQIt lq_it = loadQueue.begin();
|
||||
|
@ -732,7 +751,7 @@ OzoneLWLSQ<Impl>::dumpInsts()
|
|||
|
||||
template <class Impl>
|
||||
void
|
||||
OzoneLWLSQ<Impl>::completeStore(int store_idx)
|
||||
OzoneLWLSQ<Impl>::removeStore(int store_idx)
|
||||
{
|
||||
SQHashIt sq_hash_it = SQItHash.find(store_idx);
|
||||
assert(sq_hash_it != SQItHash.end());
|
||||
|
@ -742,8 +761,6 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
|
|||
(*sq_it).completed = true;
|
||||
DynInstPtr inst = (*sq_it).inst;
|
||||
|
||||
--storesToWB;
|
||||
|
||||
if (isStalled() &&
|
||||
inst->seqNum == stallingStoreIsn) {
|
||||
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
|
||||
|
@ -761,6 +778,13 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
|
|||
SQItHash.erase(sq_hash_it);
|
||||
SQIndices.push(inst->sqIdx);
|
||||
storeQueue.erase(sq_it);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
|
||||
{
|
||||
--storesToWB;
|
||||
--stores;
|
||||
|
||||
inst->setCompleted();
|
||||
|
@ -839,9 +863,14 @@ OzoneLWLSQ<Impl>::switchOut()
|
|||
}
|
||||
|
||||
// Clear the queue to free up resources
|
||||
assert(stores == 0);
|
||||
assert(storeQueue.empty());
|
||||
assert(loads == 0);
|
||||
assert(loadQueue.empty());
|
||||
assert(storesInFlight == 0);
|
||||
storeQueue.clear();
|
||||
loadQueue.clear();
|
||||
loads = stores = storesToWB = 0;
|
||||
loads = stores = storesToWB = storesInFlight = 0;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
|
|
@ -70,10 +70,11 @@ class SimpleParams : public BaseCPU::Params
|
|||
|
||||
unsigned cachePorts;
|
||||
unsigned width;
|
||||
unsigned frontEndLatency;
|
||||
unsigned frontEndWidth;
|
||||
unsigned backEndLatency;
|
||||
unsigned backEndWidth;
|
||||
unsigned backEndSquashLatency;
|
||||
unsigned backEndLatency;
|
||||
unsigned maxInstBufferSize;
|
||||
unsigned numPhysicalRegs;
|
||||
unsigned maxOutstandingMemOps;
|
||||
|
@ -149,6 +150,7 @@ class SimpleParams : public BaseCPU::Params
|
|||
//
|
||||
unsigned LQEntries;
|
||||
unsigned SQEntries;
|
||||
bool lsqLimits;
|
||||
|
||||
//
|
||||
// Memory dependence
|
||||
|
|
Loading…
Reference in a new issue