Ozone updates.

cpu/ozone/front_end.hh:
cpu/ozone/front_end_impl.hh:
cpu/ozone/lw_back_end.hh:
    Support latency for Ozone FE and BE.
cpu/ozone/lw_back_end_impl.hh:
    Support latency for Ozone FE and BE.

    Also fixes for switching out, profiling.
cpu/ozone/lw_lsq.hh:
cpu/ozone/lw_lsq_impl.hh:
    Fixes for switching out.
cpu/ozone/simple_params.hh:
    Updated parameters.

--HG--
extra : convert_revision : 21d4846a59a2239bfdf8fe92e47fd0972debe4f5
This commit is contained in:
Kevin Lim 2006-08-24 17:45:04 -04:00
parent ad2fa1e1c9
commit 4ec5e90c8f
7 changed files with 236 additions and 67 deletions

View file

@ -31,6 +31,7 @@
#include <deque> #include <deque>
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh" #include "cpu/inst_seq.hh"
#include "cpu/o3/bpred_unit.hh" #include "cpu/o3/bpred_unit.hh"
#include "cpu/ozone/rename_table.hh" #include "cpu/ozone/rename_table.hh"
@ -210,15 +211,21 @@ class FrontEnd
void dumpInsts(); void dumpInsts();
private: private:
TimeBuffer<int> numInstsReady;
typedef typename std::deque<DynInstPtr> InstBuff; typedef typename std::deque<DynInstPtr> InstBuff;
typedef typename InstBuff::iterator InstBuffIt; typedef typename InstBuff::iterator InstBuffIt;
InstBuff feBuffer;
InstBuff instBuffer; InstBuff instBuffer;
int instBufferSize; int instBufferSize;
int maxInstBufferSize; int maxInstBufferSize;
int latency;
int width; int width;
int freeRegs; int freeRegs;

View file

@ -41,8 +41,10 @@ template <class Impl>
FrontEnd<Impl>::FrontEnd(Params *params) FrontEnd<Impl>::FrontEnd(Params *params)
: branchPred(params), : branchPred(params),
icacheInterface(params->icacheInterface), icacheInterface(params->icacheInterface),
numInstsReady(params->frontEndLatency, 0),
instBufferSize(0), instBufferSize(0),
maxInstBufferSize(params->maxInstBufferSize), maxInstBufferSize(params->maxInstBufferSize),
latency(params->frontEndLatency),
width(params->frontEndWidth), width(params->frontEndWidth),
freeRegs(params->numPhysicalRegs), freeRegs(params->numPhysicalRegs),
numPhysRegs(params->numPhysicalRegs), numPhysRegs(params->numPhysicalRegs),
@ -261,6 +263,18 @@ FrontEnd<Impl>::tick()
if (switchedOut) if (switchedOut)
return; return;
for (int insts_to_queue = numInstsReady[-latency];
!instBuffer.empty() && insts_to_queue;
--insts_to_queue)
{
DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
instBuffer.front()->seqNum);
feBuffer.push_back(instBuffer.front());
instBuffer.pop_front();
}
numInstsReady.advance();
// @todo: Maybe I want to just have direct communication... // @todo: Maybe I want to just have direct communication...
if (fromCommit->doneSeqNum) { if (fromCommit->doneSeqNum) {
branchPred.update(fromCommit->doneSeqNum, 0); branchPred.update(fromCommit->doneSeqNum, 0);
@ -349,6 +363,7 @@ FrontEnd<Impl>::tick()
// latency // latency
instBuffer.push_back(inst); instBuffer.push_back(inst);
++instBufferSize; ++instBufferSize;
numInstsReady[0]++;
++num_inst; ++num_inst;
#if FULL_SYSTEM #if FULL_SYSTEM
@ -570,6 +585,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
instruction->fault = fault; instruction->fault = fault;
instruction->setCanIssue(); instruction->setCanIssue();
instBuffer.push_back(instruction); instBuffer.push_back(instruction);
numInstsReady[0]++;
++instBufferSize; ++instBufferSize;
} }
@ -599,6 +615,21 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
freeRegs+= inst->numDestRegs(); freeRegs+= inst->numDestRegs();
} }
while (!feBuffer.empty() &&
feBuffer.back()->seqNum > squash_num) {
DynInstPtr inst = feBuffer.back();
DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
inst->seqNum, inst->readPC());
inst->clearDependents();
feBuffer.pop_back();
--instBufferSize;
freeRegs+= inst->numDestRegs();
}
// Copy over rename table from the back end. // Copy over rename table from the back end.
renameTable.copyFrom(backEnd->renameTable); renameTable.copyFrom(backEnd->renameTable);
@ -633,13 +664,13 @@ template <class Impl>
typename Impl::DynInstPtr typename Impl::DynInstPtr
FrontEnd<Impl>::getInst() FrontEnd<Impl>::getInst()
{ {
if (instBufferSize == 0) { if (feBuffer.empty()) {
return NULL; return NULL;
} }
DynInstPtr inst = instBuffer.front(); DynInstPtr inst = feBuffer.front();
instBuffer.pop_front(); feBuffer.pop_front();
--instBufferSize; --instBufferSize;
@ -857,6 +888,7 @@ FrontEnd<Impl>::doSwitchOut()
squash(0, 0); squash(0, 0);
instBuffer.clear(); instBuffer.clear();
instBufferSize = 0; instBufferSize = 0;
feBuffer.clear();
status = Idle; status = Idle;
} }

View file

@ -78,7 +78,7 @@ class LWBackEnd
TimeBuffer<IssueToExec> i2e; TimeBuffer<IssueToExec> i2e;
typename TimeBuffer<IssueToExec>::wire instsToExecute; typename TimeBuffer<IssueToExec>::wire instsToExecute;
TimeBuffer<ExecToCommit> e2c; TimeBuffer<ExecToCommit> e2c;
TimeBuffer<Writeback> numInstsToWB; TimeBuffer<int> numInstsToWB;
TimeBuffer<CommStruct> *comm; TimeBuffer<CommStruct> *comm;
typename TimeBuffer<CommStruct>::wire toIEW; typename TimeBuffer<CommStruct>::wire toIEW;
@ -157,7 +157,7 @@ class LWBackEnd
Tick lastCommitCycle; Tick lastCommitCycle;
bool robEmpty() { return instList.empty(); } bool robEmpty() { return numInsts == 0; }
bool isFull() { return numInsts >= numROBEntries; } bool isFull() { return numInsts >= numROBEntries; }
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
@ -212,6 +212,7 @@ class LWBackEnd
} }
void instToCommit(DynInstPtr &inst); void instToCommit(DynInstPtr &inst);
void readyInstsForCommit();
void switchOut(); void switchOut();
void doSwitchOut(); void doSwitchOut();
@ -293,12 +294,13 @@ class LWBackEnd
MemReqPtr memReq; MemReqPtr memReq;
int latency;
// General back end width. Used if the more specific isn't given. // General back end width. Used if the more specific isn't given.
int width; int width;
// Dispatch width. // Dispatch width.
int dispatchWidth; int dispatchWidth;
int numDispatchEntries;
int dispatchSize; int dispatchSize;
int waitingInsts; int waitingInsts;
@ -323,6 +325,7 @@ class LWBackEnd
int numROBEntries; int numROBEntries;
int numInsts; int numInsts;
bool lsqLimits;
std::set<InstSeqNum> waitingMemOps; std::set<InstSeqNum> waitingMemOps;
typedef std::set<InstSeqNum>::iterator MemIt; typedef std::set<InstSeqNum>::iterator MemIt;
@ -333,9 +336,6 @@ class LWBackEnd
InstSeqNum squashSeqNum; InstSeqNum squashSeqNum;
Addr squashNextPC; Addr squashNextPC;
Fault faultFromFetch;
bool fetchHasFault;
bool switchedOut; bool switchedOut;
bool switchPending; bool switchPending;
@ -359,8 +359,6 @@ class LWBackEnd
std::list<DynInstPtr> replayList; std::list<DynInstPtr> replayList;
std::list<DynInstPtr> writeback; std::list<DynInstPtr> writeback;
int latency;
int squashLatency; int squashLatency;
bool exactFullStall; bool exactFullStall;
@ -397,9 +395,11 @@ class LWBackEnd
Stats::Scalar<> lsqInversion; Stats::Scalar<> lsqInversion;
Stats::Vector<> nIssuedDist; Stats::Vector<> nIssuedDist;
/*
Stats::VectorDistribution<> issueDelayDist; Stats::VectorDistribution<> issueDelayDist;
Stats::VectorDistribution<> queueResDist; Stats::VectorDistribution<> queueResDist;
*/
/* /*
Stats::Vector<> stat_fu_busy; Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy; Stats::Vector2d<> stat_fuBusy;
@ -447,7 +447,7 @@ class LWBackEnd
Stats::Vector<> ROBCount; // cumulative ROB occupancy Stats::Vector<> ROBCount; // cumulative ROB occupancy
Stats::Formula ROBOccRate; Stats::Formula ROBOccRate;
Stats::VectorDistribution<> ROBOccDist; // Stats::VectorDistribution<> ROBOccDist;
public: public:
void dumpInsts(); void dumpInsts();

View file

@ -151,8 +151,10 @@ LWBackEnd<Impl>::LdWritebackEvent::process()
// iewStage->wakeCPU(); // iewStage->wakeCPU();
if (be->isSwitchedOut()) assert(inst->isSquashed() || !be->isSwitchedOut());
return;
// if (be->isSwitchedOut() && inst->isLoad())
// return;
if (dcacheMiss) { if (dcacheMiss) {
be->removeDcacheMiss(inst); be->removeDcacheMiss(inst);
@ -208,14 +210,14 @@ LWBackEnd<Impl>::DCacheCompletionEvent::description()
template <class Impl> template <class Impl>
LWBackEnd<Impl>::LWBackEnd(Params *params) LWBackEnd<Impl>::LWBackEnd(Params *params)
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
trapSquash(false), xcSquash(false), cacheCompletionEvent(this), trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
dcacheInterface(params->dcacheInterface), width(params->backEndWidth), dcacheInterface(params->dcacheInterface), latency(params->backEndLatency),
width(params->backEndWidth), lsqLimits(params->lsqLimits),
exactFullStall(true) exactFullStall(true)
{ {
numROBEntries = params->numROBEntries; numROBEntries = params->numROBEntries;
numInsts = 0; numInsts = 0;
numDispatchEntries = 32;
maxOutstandingMemOps = params->maxOutstandingMemOps; maxOutstandingMemOps = params->maxOutstandingMemOps;
numWaitingMemOps = 0; numWaitingMemOps = 0;
waitingInsts = 0; waitingInsts = 0;
@ -251,6 +253,8 @@ void
LWBackEnd<Impl>::regStats() LWBackEnd<Impl>::regStats()
{ {
using namespace Stats; using namespace Stats;
LSQ.regStats();
robCapEvents robCapEvents
.init(cpu->number_of_threads) .init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events") .name(name() + ".ROB:cap_events")
@ -377,6 +381,7 @@ LWBackEnd<Impl>::regStats()
.desc("Number of insts issued each cycle") .desc("Number of insts issued each cycle")
.flags(total | pdf | dist) .flags(total | pdf | dist)
; ;
/*
issueDelayDist issueDelayDist
.init(Num_OpClasses,0,99,2) .init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:") .name(name() + ".ISSUE:")
@ -393,7 +398,7 @@ LWBackEnd<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) { for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]); queueResDist.subname(i, opClassStrings[i]);
} }
*/
writebackCount writebackCount
.init(cpu->number_of_threads) .init(cpu->number_of_threads)
.name(name() + ".WB:count") .name(name() + ".WB:count")
@ -555,13 +560,14 @@ LWBackEnd<Impl>::regStats()
.flags(total) .flags(total)
; ;
ROBOccRate = ROBCount / cpu->numCycles; ROBOccRate = ROBCount / cpu->numCycles;
/*
ROBOccDist ROBOccDist
.init(cpu->number_of_threads,0,numROBEntries,2) .init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist") .name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle") .desc("ROB Occupancy per cycle")
.flags(total | cdf) .flags(total | cdf)
; ;
*/
} }
template <class Impl> template <class Impl>
@ -654,18 +660,22 @@ LWBackEnd<Impl>::tick()
{ {
DPRINTF(BE, "Ticking back end\n"); DPRINTF(BE, "Ticking back end\n");
// Read in any done instruction information and update the IQ or LSQ.
updateStructures();
if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) { if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
cpu->signalSwitched(); cpu->signalSwitched();
return; return;
} }
readyInstsForCommit();
numInstsToWB.advance();
ROBCount[0]+= numInsts; ROBCount[0]+= numInsts;
wbCycle = 0; wbCycle = 0;
// Read in any done instruction information and update the IQ or LSQ.
updateStructures();
#if FULL_SYSTEM #if FULL_SYSTEM
checkInterrupts(); checkInterrupts();
@ -740,6 +750,10 @@ LWBackEnd<Impl>::dispatchInsts()
while (numInsts < numROBEntries && while (numInsts < numROBEntries &&
numWaitingMemOps < maxOutstandingMemOps) { numWaitingMemOps < maxOutstandingMemOps) {
// Get instruction from front of time buffer // Get instruction from front of time buffer
if (lsqLimits && LSQ.isFull()) {
break;
}
DynInstPtr inst = frontEnd->getInst(); DynInstPtr inst = frontEnd->getInst();
if (!inst) { if (!inst) {
break; break;
@ -798,6 +812,7 @@ LWBackEnd<Impl>::dispatchInsts()
inst->setIssued(); inst->setIssued();
inst->setExecuted(); inst->setExecuted();
inst->setCanCommit(); inst->setCanCommit();
numInstsToWB[0]++;
} else { } else {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
"exeList.\n", "exeList.\n",
@ -987,16 +1002,10 @@ template<class Impl>
void void
LWBackEnd<Impl>::instToCommit(DynInstPtr &inst) LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
{ {
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC()); inst->seqNum, inst->readPC());
if (!inst->isSquashed()) { if (!inst->isSquashed()) {
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
inst->setCanCommit();
if (inst->isExecuted()) { if (inst->isExecuted()) {
inst->setResultReady(); inst->setResultReady();
int dependents = wakeDependents(inst); int dependents = wakeDependents(inst);
@ -1007,8 +1016,32 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
} }
} }
writeback.push_back(inst);
numInstsToWB[0]++;
writebackCount[0]++; writebackCount[0]++;
} }
template <class Impl>
void
LWBackEnd<Impl>::readyInstsForCommit()
{
for (int i = numInstsToWB[-latency];
!writeback.empty() && i;
--i)
{
DynInstPtr inst = writeback.front();
writeback.pop_front();
if (!inst->isSquashed()) {
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
inst->setCanCommit();
}
}
}
#if 0 #if 0
template <class Impl> template <class Impl>
void void
@ -1221,6 +1254,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
++freed_regs; ++freed_regs;
} }
#if FULL_SYSTEM
if (thread->profile) {
// bool usermode =
// (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
// thread->profilePC = usermode ? 1 : inst->readPC();
thread->profilePC = inst->readPC();
ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
inst->staticInst);
if (node)
thread->profileNode = node;
}
#endif
if (inst->traceData) { if (inst->traceData) {
inst->traceData->setFetchSeq(inst->seqNum); inst->traceData->setFetchSeq(inst->seqNum);
inst->traceData->setCPSeq(thread->numInst); inst->traceData->setCPSeq(thread->numInst);
@ -1280,9 +1327,9 @@ LWBackEnd<Impl>::commitInsts()
while (!instList.empty() && inst_num < commitWidth) { while (!instList.empty() && inst_num < commitWidth) {
if (instList.back()->isSquashed()) { if (instList.back()->isSquashed()) {
instList.back()->clearDependents(); instList.back()->clearDependents();
ROBSquashedInsts[instList.back()->threadNumber]++;
instList.pop_back(); instList.pop_back();
--numInsts; --numInsts;
ROBSquashedInsts[instList.back()->threadNumber]++;
continue; continue;
} }
@ -1304,10 +1351,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
LSQ.squash(sn); LSQ.squash(sn);
int freed_regs = 0; int freed_regs = 0;
InstListIt waiting_list_end = waitingList.end(); InstListIt insts_end_it = waitingList.end();
InstListIt insts_it = waitingList.begin(); InstListIt insts_it = waitingList.begin();
while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn) while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
{ {
if ((*insts_it)->isSquashed()) { if ((*insts_it)->isSquashed()) {
++insts_it; ++insts_it;
@ -1333,6 +1380,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
while (!instList.empty() && (*insts_it)->seqNum > sn) while (!instList.empty() && (*insts_it)->seqNum > sn)
{ {
if ((*insts_it)->isSquashed()) { if ((*insts_it)->isSquashed()) {
panic("Instruction should not be already squashed and on list!");
++insts_it; ++insts_it;
continue; continue;
} }
@ -1364,18 +1412,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
--numInsts; --numInsts;
} }
insts_it = waitingList.begin();
while (!waitingList.empty() && insts_it != waitingList.end()) {
if ((*insts_it)->seqNum < sn) {
++insts_it;
continue;
}
assert((*insts_it)->isSquashed());
waitingList.erase(insts_it++);
waitingInsts--;
}
while (memBarrier && memBarrier->seqNum > sn) { while (memBarrier && memBarrier->seqNum > sn) {
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously " DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
"squashed)\n", memBarrier->seqNum); "squashed)\n", memBarrier->seqNum);
@ -1393,6 +1429,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
} }
} }
insts_it = replayList.begin();
insts_end_it = replayList.end();
while (!replayList.empty() && insts_it != insts_end_it) {
if ((*insts_it)->seqNum < sn) {
++insts_it;
continue;
}
assert((*insts_it)->isSquashed());
replayList.erase(insts_it++);
}
frontEnd->addFreeRegs(freed_regs); frontEnd->addFreeRegs(freed_regs);
} }
@ -1463,14 +1511,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
frontEnd->squash(inst->seqNum - 1, inst->readPC()); frontEnd->squash(inst->seqNum - 1, inst->readPC());
} }
template <class Impl>
void
LWBackEnd<Impl>::fetchFault(Fault &fault)
{
faultFromFetch = fault;
fetchHasFault = true;
}
template <class Impl> template <class Impl>
void void
LWBackEnd<Impl>::switchOut() LWBackEnd<Impl>::switchOut()
@ -1489,16 +1529,25 @@ LWBackEnd<Impl>::doSwitchOut()
// yet written back. // yet written back.
assert(robEmpty()); assert(robEmpty());
assert(!LSQ.hasStoresToWB()); assert(!LSQ.hasStoresToWB());
writeback.clear();
for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
numInstsToWB.advance();
// squash(0);
assert(waitingList.empty());
assert(instList.empty());
assert(replayList.empty());
assert(writeback.empty());
LSQ.switchOut(); LSQ.switchOut();
squash(0);
} }
template <class Impl> template <class Impl>
void void
LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc) LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
{ {
assert(!squashPending);
squashSeqNum = 0;
squashNextPC = 0;
xcSquash = false; xcSquash = false;
trapSquash = false; trapSquash = false;
@ -1641,6 +1690,45 @@ LWBackEnd<Impl>::dumpInsts()
++num; ++num;
} }
inst_list_it = --(writeback.end());
cprintf("Writeback list size: %i\n", writeback.size());
while (inst_list_it != writeback.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it--;
++num;
}
cprintf("Waiting list size: %i\n", waitingList.size()); cprintf("Waiting list size: %i\n", waitingList.size());
inst_list_it = --(waitingList.end()); inst_list_it = --(waitingList.end());

View file

@ -110,6 +110,8 @@ class OzoneLWLSQ {
/** Returns the name of the LSQ unit. */ /** Returns the name of the LSQ unit. */
std::string name() const; std::string name() const;
void regStats();
/** Sets the CPU pointer. */ /** Sets the CPU pointer. */
void setCPU(FullCPU *cpu_ptr) void setCPU(FullCPU *cpu_ptr)
{ cpu = cpu_ptr; } { cpu = cpu_ptr; }
@ -203,7 +205,7 @@ class OzoneLWLSQ {
int numLoads() { return loads; } int numLoads() { return loads; }
/** Returns the number of stores in the SQ. */ /** Returns the number of stores in the SQ. */
int numStores() { return stores; } int numStores() { return stores + storesInFlight; }
/** Returns if either the LQ or SQ is full. */ /** Returns if either the LQ or SQ is full. */
bool isFull() { return lqFull() || sqFull(); } bool isFull() { return lqFull() || sqFull(); }
@ -212,7 +214,7 @@ class OzoneLWLSQ {
bool lqFull() { return loads >= (LQEntries - 1); } bool lqFull() { return loads >= (LQEntries - 1); }
/** Returns if the SQ is full. */ /** Returns if the SQ is full. */
bool sqFull() { return stores >= (SQEntries - 1); } bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
/** Debugging function to dump instructions in the LSQ. */ /** Debugging function to dump instructions in the LSQ. */
void dumpInsts(); void dumpInsts();
@ -241,7 +243,9 @@ class OzoneLWLSQ {
private: private:
/** Completes the store at the specified index. */ /** Completes the store at the specified index. */
void completeStore(int store_idx); void completeStore(DynInstPtr &inst);
void removeStore(int store_idx);
private: private:
/** Pointer to the CPU. */ /** Pointer to the CPU. */
@ -342,6 +346,10 @@ class OzoneLWLSQ {
int storesToWB; int storesToWB;
public:
int storesInFlight;
private:
/// @todo Consider moving to a more advanced model with write vs read ports /// @todo Consider moving to a more advanced model with write vs read ports
/** The number of cache ports available each cycle. */ /** The number of cache ports available each cycle. */
int cachePorts; int cachePorts;
@ -351,6 +359,9 @@ class OzoneLWLSQ {
//list<InstSeqNum> mshrSeqNums; //list<InstSeqNum> mshrSeqNums;
/** Tota number of memory ordering violations. */
Stats::Scalar<> lsqMemOrderViolation;
//Stats::Scalar<> dcacheStallCycles; //Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall; Counter lastDcacheStall;

View file

@ -57,6 +57,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
// lsqPtr->cpu->wakeCPU(); // lsqPtr->cpu->wakeCPU();
if (lsqPtr->isSwitchedOut()) { if (lsqPtr->isSwitchedOut()) {
panic("Should not be switched out!");
if (wbEvent) if (wbEvent)
delete wbEvent; delete wbEvent;
@ -68,7 +69,11 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
delete wbEvent; delete wbEvent;
} }
lsqPtr->completeStore(inst->sqIdx); lsqPtr->completeStore(inst);
lsqPtr->removeStore(inst->sqIdx);
--(lsqPtr->storesInFlight);
DPRINTF(OzoneLSQ, "StoresInFlight: %i\n", lsqPtr->storesInFlight);
if (miss) if (miss)
be->removeDcacheMiss(inst); be->removeDcacheMiss(inst);
} }
@ -82,7 +87,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
template <class Impl> template <class Impl>
OzoneLWLSQ<Impl>::OzoneLWLSQ() OzoneLWLSQ<Impl>::OzoneLWLSQ()
: loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), : loads(0), stores(0), storesToWB(0), storesInFlight(0), stalled(false), isLoadBlocked(false),
loadBlockedHandled(false) loadBlockedHandled(false)
{ {
} }
@ -121,6 +126,15 @@ OzoneLWLSQ<Impl>::name() const
return "lsqunit"; return "lsqunit";
} }
template<class Impl>
void
OzoneLWLSQ<Impl>::regStats()
{
lsqMemOrderViolation
.name(name() + ".memOrderViolation")
.desc("Number of memory ordering violations");
}
template<class Impl> template<class Impl>
void void
OzoneLWLSQ<Impl>::clearLQ() OzoneLWLSQ<Impl>::clearLQ()
@ -257,7 +271,7 @@ unsigned
OzoneLWLSQ<Impl>::numFreeEntries() OzoneLWLSQ<Impl>::numFreeEntries()
{ {
unsigned free_lq_entries = LQEntries - loads; unsigned free_lq_entries = LQEntries - loads;
unsigned free_sq_entries = SQEntries - stores; unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
// Both the LQ and SQ entries have an extra dummy entry to differentiate // Both the LQ and SQ entries have an extra dummy entry to differentiate
// empty/full conditions. Subtract 1 from the free entries. // empty/full conditions. Subtract 1 from the free entries.
@ -397,6 +411,7 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch. // A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful. // For now return a fault to show that it was unsuccessful.
memDepViolator = (*lq_it); memDepViolator = (*lq_it);
++lsqMemOrderViolation;
return TheISA::genMachineCheckFault(); return TheISA::genMachineCheckFault();
} }
@ -483,8 +498,8 @@ OzoneLWLSQ<Impl>::writebackStores()
if ((*sq_it).size == 0 && !(*sq_it).completed) { if ((*sq_it).size == 0 && !(*sq_it).completed) {
sq_it--; sq_it--;
completeStore(inst->sqIdx); removeStore(inst->sqIdx);
completeStore(inst);
continue; continue;
} }
@ -540,6 +555,8 @@ OzoneLWLSQ<Impl>::writebackStores()
inst->sqIdx,inst->readPC(), inst->sqIdx,inst->readPC(),
req->paddr, *(req->data), req->paddr, *(req->data),
inst->seqNum); inst->seqNum);
DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
storesInFlight + 1);
if (dcacheInterface) { if (dcacheInterface) {
assert(!req->completionEvent); assert(!req->completionEvent);
@ -601,6 +618,8 @@ OzoneLWLSQ<Impl>::writebackStores()
} }
sq_it--; sq_it--;
} }
++storesInFlight;
// removeStore(inst->sqIdx);
} else { } else {
panic("Must HAVE DCACHE!!!!!\n"); panic("Must HAVE DCACHE!!!!!\n");
} }
@ -617,7 +636,7 @@ void
OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num) OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
{ {
DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores); "(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
LQIt lq_it = loadQueue.begin(); LQIt lq_it = loadQueue.begin();
@ -732,7 +751,7 @@ OzoneLWLSQ<Impl>::dumpInsts()
template <class Impl> template <class Impl>
void void
OzoneLWLSQ<Impl>::completeStore(int store_idx) OzoneLWLSQ<Impl>::removeStore(int store_idx)
{ {
SQHashIt sq_hash_it = SQItHash.find(store_idx); SQHashIt sq_hash_it = SQItHash.find(store_idx);
assert(sq_hash_it != SQItHash.end()); assert(sq_hash_it != SQItHash.end());
@ -742,8 +761,6 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
(*sq_it).completed = true; (*sq_it).completed = true;
DynInstPtr inst = (*sq_it).inst; DynInstPtr inst = (*sq_it).inst;
--storesToWB;
if (isStalled() && if (isStalled() &&
inst->seqNum == stallingStoreIsn) { inst->seqNum == stallingStoreIsn) {
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
@ -761,6 +778,13 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
SQItHash.erase(sq_hash_it); SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx); SQIndices.push(inst->sqIdx);
storeQueue.erase(sq_it); storeQueue.erase(sq_it);
}
template <class Impl>
void
OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
{
--storesToWB;
--stores; --stores;
inst->setCompleted(); inst->setCompleted();
@ -839,9 +863,14 @@ OzoneLWLSQ<Impl>::switchOut()
} }
// Clear the queue to free up resources // Clear the queue to free up resources
assert(stores == 0);
assert(storeQueue.empty());
assert(loads == 0);
assert(loadQueue.empty());
assert(storesInFlight == 0);
storeQueue.clear(); storeQueue.clear();
loadQueue.clear(); loadQueue.clear();
loads = stores = storesToWB = 0; loads = stores = storesToWB = storesInFlight = 0;
} }
template <class Impl> template <class Impl>

View file

@ -70,10 +70,11 @@ class SimpleParams : public BaseCPU::Params
unsigned cachePorts; unsigned cachePorts;
unsigned width; unsigned width;
unsigned frontEndLatency;
unsigned frontEndWidth; unsigned frontEndWidth;
unsigned backEndLatency;
unsigned backEndWidth; unsigned backEndWidth;
unsigned backEndSquashLatency; unsigned backEndSquashLatency;
unsigned backEndLatency;
unsigned maxInstBufferSize; unsigned maxInstBufferSize;
unsigned numPhysicalRegs; unsigned numPhysicalRegs;
unsigned maxOutstandingMemOps; unsigned maxOutstandingMemOps;
@ -149,6 +150,7 @@ class SimpleParams : public BaseCPU::Params
// //
unsigned LQEntries; unsigned LQEntries;
unsigned SQEntries; unsigned SQEntries;
bool lsqLimits;
// //
// Memory dependence // Memory dependence