21df09cf7a
cpu/base_dyn_inst.hh: Remove snoop function (did not mean to commit it). cpu/ozone/back_end_impl.hh: Set instruction as having its result ready, not completed. cpu/ozone/cpu.hh: Fixes for store conditionals. Use an additional lock addr list to make sure that the access is valid. I don't know if this is fully necessary, but it gives me a peace of mind (at some performance cost). Make sure to schedule for cycles(1) and not just 1 cycle in the future as tick = 1ps. Also support the new Checker. cpu/ozone/cpu_builder.cc: Add parameter for maxOutstandingMemOps so it can be set through the config. Also add in the checker. Right now it's a BaseCPU simobject, but that may change in the future. cpu/ozone/cpu_impl.hh: Add support for the checker. For now there's a dynamic cast to convert the simobject passed back from the builder to the proper Checker type. It's ugly, but only happens at startup, and is probably a justified use of dynamic cast. Support switching out/taking over from other CPUs. Correct indexing problem for float registers. cpu/ozone/dyn_inst.hh: Add ability for instructions to wait on memory instructions in addition to source register instructions. This is needed for memory dependence predictors and memory barriers. cpu/ozone/dyn_inst_impl.hh: Support waiting on memory operations. Use "resultReady" to differentiate an instruction having its registers produced vs being totally completed. cpu/ozone/front_end.hh: Support switching out. Also record if an interrupt is pending. cpu/ozone/front_end_impl.hh: Support switching out. Also support stalling the front end if an interrupt is pending. cpu/ozone/lw_back_end.hh: Add checker in. Support switching out. Support memory barriers. cpu/ozone/lw_back_end_impl.hh: Lots of changes to get things to work right. Faults, traps, interrupts all wait until all stores have written back (important). Memory barriers are supported, as is the general ability for instructions to be dependent on other memory instructions. cpu/ozone/lw_lsq.hh: Support switching out. Also use store writeback events in all cases, not just dcache misses. cpu/ozone/lw_lsq_impl.hh: Support switching out. Also use store writeback events in all cases, not just dcache misses. Support the checker CPU. Marks instructions as completed once the functional access is done (which has to be done for the checker to be able to verify results). cpu/ozone/simple_params.hh: Add max outstanding mem ops parameter. python/m5/objects/OzoneCPU.py: Add max outstanding mem ops, checker. --HG-- extra : convert_revision : f4d408e1bb1f25836a097b6abe3856111e950c59
521 lines
14 KiB
C++
521 lines
14 KiB
C++
|
|
#ifndef __CPU_OZONE_LW_BACK_END_HH__
|
|
#define __CPU_OZONE_LW_BACK_END_HH__
|
|
|
|
#include <list>
|
|
#include <queue>
|
|
#include <set>
|
|
#include <string>
|
|
|
|
#include "arch/faults.hh"
|
|
#include "base/timebuf.hh"
|
|
#include "cpu/inst_seq.hh"
|
|
#include "cpu/ozone/rename_table.hh"
|
|
#include "cpu/ozone/thread_state.hh"
|
|
#include "mem/functional/functional.hh"
|
|
#include "mem/mem_interface.hh"
|
|
#include "mem/mem_req.hh"
|
|
#include "sim/eventq.hh"
|
|
|
|
template <class>
|
|
class Checker;
|
|
class ExecContext;
|
|
|
|
template <class Impl>
|
|
class OzoneThreadState;
|
|
|
|
template <class Impl>
|
|
class LWBackEnd
|
|
{
|
|
public:
|
|
typedef OzoneThreadState<Impl> Thread;
|
|
|
|
typedef typename Impl::Params Params;
|
|
typedef typename Impl::DynInst DynInst;
|
|
typedef typename Impl::DynInstPtr DynInstPtr;
|
|
typedef typename Impl::FullCPU FullCPU;
|
|
typedef typename Impl::FrontEnd FrontEnd;
|
|
typedef typename Impl::FullCPU::CommStruct CommStruct;
|
|
|
|
struct SizeStruct {
|
|
int size;
|
|
};
|
|
|
|
typedef SizeStruct DispatchToIssue;
|
|
typedef SizeStruct IssueToExec;
|
|
typedef SizeStruct ExecToCommit;
|
|
typedef SizeStruct Writeback;
|
|
|
|
TimeBuffer<DispatchToIssue> d2i;
|
|
typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
|
|
TimeBuffer<IssueToExec> i2e;
|
|
typename TimeBuffer<IssueToExec>::wire instsToExecute;
|
|
TimeBuffer<ExecToCommit> e2c;
|
|
TimeBuffer<Writeback> numInstsToWB;
|
|
|
|
TimeBuffer<CommStruct> *comm;
|
|
typename TimeBuffer<CommStruct>::wire toIEW;
|
|
typename TimeBuffer<CommStruct>::wire fromCommit;
|
|
|
|
class TrapEvent : public Event {
|
|
private:
|
|
LWBackEnd<Impl> *be;
|
|
|
|
public:
|
|
TrapEvent(LWBackEnd<Impl> *_be);
|
|
|
|
void process();
|
|
const char *description();
|
|
};
|
|
|
|
/** LdWriteback event for a load completion. */
|
|
class LdWritebackEvent : public Event {
|
|
private:
|
|
/** Instruction that is writing back data to the register file. */
|
|
DynInstPtr inst;
|
|
/** Pointer to IEW stage. */
|
|
LWBackEnd *be;
|
|
|
|
bool dcacheMiss;
|
|
|
|
public:
|
|
/** Constructs a load writeback event. */
|
|
LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be);
|
|
|
|
/** Processes writeback event. */
|
|
virtual void process();
|
|
/** Returns the description of the writeback event. */
|
|
virtual const char *description();
|
|
|
|
void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); }
|
|
};
|
|
|
|
LWBackEnd(Params *params);
|
|
|
|
std::string name() const;
|
|
|
|
void regStats();
|
|
|
|
void setCPU(FullCPU *cpu_ptr);
|
|
|
|
void setFrontEnd(FrontEnd *front_end_ptr)
|
|
{ frontEnd = front_end_ptr; }
|
|
|
|
void setXC(ExecContext *xc_ptr)
|
|
{ xc = xc_ptr; }
|
|
|
|
void setThreadState(Thread *thread_ptr)
|
|
{ thread = thread_ptr; }
|
|
|
|
void setCommBuffer(TimeBuffer<CommStruct> *_comm);
|
|
|
|
void tick();
|
|
void squash();
|
|
void generateXCEvent() { xcSquash = true; }
|
|
void squashFromXC();
|
|
void squashFromTrap();
|
|
void checkInterrupts();
|
|
bool trapSquash;
|
|
bool xcSquash;
|
|
|
|
template <class T>
|
|
Fault read(MemReqPtr &req, T &data, int load_idx);
|
|
|
|
template <class T>
|
|
Fault write(MemReqPtr &req, T &data, int store_idx);
|
|
|
|
Addr readCommitPC() { return commitPC; }
|
|
|
|
Addr commitPC;
|
|
|
|
Tick lastCommitCycle;
|
|
|
|
bool robEmpty() { return instList.empty(); }
|
|
|
|
bool isFull() { return numInsts >= numROBEntries; }
|
|
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
|
|
|
|
void fetchFault(Fault &fault);
|
|
|
|
int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
|
|
|
|
/** Tells memory dependence unit that a memory instruction needs to be
|
|
* rescheduled. It will re-execute once replayMemInst() is called.
|
|
*/
|
|
void rescheduleMemInst(DynInstPtr &inst);
|
|
|
|
/** Re-executes all rescheduled memory instructions. */
|
|
void replayMemInst(DynInstPtr &inst);
|
|
|
|
/** Completes memory instruction. */
|
|
void completeMemInst(DynInstPtr &inst) { }
|
|
|
|
void addDcacheMiss(DynInstPtr &inst)
|
|
{
|
|
waitingMemOps.insert(inst->seqNum);
|
|
numWaitingMemOps++;
|
|
DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
|
|
inst->seqNum, numWaitingMemOps);
|
|
}
|
|
|
|
void removeDcacheMiss(DynInstPtr &inst)
|
|
{
|
|
assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
|
|
waitingMemOps.erase(inst->seqNum);
|
|
numWaitingMemOps--;
|
|
DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
|
|
inst->seqNum, numWaitingMemOps);
|
|
}
|
|
|
|
void addWaitingMemOp(DynInstPtr &inst)
|
|
{
|
|
waitingMemOps.insert(inst->seqNum);
|
|
numWaitingMemOps++;
|
|
DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
|
|
inst->seqNum, numWaitingMemOps);
|
|
}
|
|
|
|
void removeWaitingMemOp(DynInstPtr &inst)
|
|
{
|
|
assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
|
|
waitingMemOps.erase(inst->seqNum);
|
|
numWaitingMemOps--;
|
|
DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
|
|
inst->seqNum, numWaitingMemOps);
|
|
}
|
|
|
|
void instToCommit(DynInstPtr &inst);
|
|
|
|
void switchOut();
|
|
|
|
void takeOverFrom(ExecContext *old_xc = NULL);
|
|
|
|
bool isSwitchedOut() { return switchedOut; }
|
|
|
|
private:
|
|
void generateTrapEvent(Tick latency = 0);
|
|
void handleFault(Fault &fault, Tick latency = 0);
|
|
void updateStructures();
|
|
void dispatchInsts();
|
|
void dispatchStall();
|
|
void checkDispatchStatus();
|
|
void executeInsts();
|
|
void commitInsts();
|
|
void addToLSQ(DynInstPtr &inst);
|
|
void writebackInsts();
|
|
bool commitInst(int inst_num);
|
|
void squash(const InstSeqNum &sn);
|
|
void squashDueToBranch(DynInstPtr &inst);
|
|
void squashDueToMemViolation(DynInstPtr &inst);
|
|
void squashDueToMemBlocked(DynInstPtr &inst);
|
|
void updateExeInstStats(DynInstPtr &inst);
|
|
void updateComInstStats(DynInstPtr &inst);
|
|
|
|
public:
|
|
FullCPU *cpu;
|
|
|
|
FrontEnd *frontEnd;
|
|
|
|
ExecContext *xc;
|
|
|
|
Thread *thread;
|
|
|
|
enum Status {
|
|
Running,
|
|
Idle,
|
|
DcacheMissStall,
|
|
DcacheMissComplete,
|
|
Blocked,
|
|
TrapPending
|
|
};
|
|
|
|
Status status;
|
|
|
|
Status dispatchStatus;
|
|
|
|
Status commitStatus;
|
|
|
|
Counter funcExeInst;
|
|
|
|
private:
|
|
// typedef typename Impl::InstQueue InstQueue;
|
|
|
|
// InstQueue IQ;
|
|
|
|
typedef typename Impl::LdstQueue LdstQueue;
|
|
|
|
LdstQueue LSQ;
|
|
public:
|
|
RenameTable<Impl> commitRenameTable;
|
|
|
|
RenameTable<Impl> renameTable;
|
|
private:
|
|
class DCacheCompletionEvent : public Event
|
|
{
|
|
private:
|
|
LWBackEnd *be;
|
|
|
|
public:
|
|
DCacheCompletionEvent(LWBackEnd *_be);
|
|
|
|
virtual void process();
|
|
virtual const char *description();
|
|
};
|
|
|
|
friend class DCacheCompletionEvent;
|
|
|
|
DCacheCompletionEvent cacheCompletionEvent;
|
|
|
|
MemInterface *dcacheInterface;
|
|
|
|
MemReqPtr memReq;
|
|
|
|
// General back end width. Used if the more specific isn't given.
|
|
int width;
|
|
|
|
// Dispatch width.
|
|
int dispatchWidth;
|
|
int numDispatchEntries;
|
|
int dispatchSize;
|
|
|
|
int waitingInsts;
|
|
|
|
int issueWidth;
|
|
|
|
// Writeback width
|
|
int wbWidth;
|
|
|
|
// Commit width
|
|
int commitWidth;
|
|
|
|
/** Index into queue of instructions being written back. */
|
|
unsigned wbNumInst;
|
|
|
|
/** Cycle number within the queue of instructions being written
|
|
* back. Used in case there are too many instructions writing
|
|
* back at the current cycle and writesbacks need to be scheduled
|
|
* for the future. See comments in instToCommit().
|
|
*/
|
|
unsigned wbCycle;
|
|
|
|
int numROBEntries;
|
|
int numInsts;
|
|
|
|
std::set<InstSeqNum> waitingMemOps;
|
|
typedef std::set<InstSeqNum>::iterator MemIt;
|
|
int numWaitingMemOps;
|
|
unsigned maxOutstandingMemOps;
|
|
|
|
bool squashPending;
|
|
InstSeqNum squashSeqNum;
|
|
Addr squashNextPC;
|
|
|
|
Fault faultFromFetch;
|
|
bool fetchHasFault;
|
|
|
|
bool switchedOut;
|
|
|
|
DynInstPtr memBarrier;
|
|
|
|
private:
|
|
struct pqCompare {
|
|
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
|
|
{
|
|
return lhs->seqNum > rhs->seqNum;
|
|
}
|
|
};
|
|
|
|
typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
|
|
ReadyInstQueue exeList;
|
|
|
|
typedef typename std::list<DynInstPtr>::iterator InstListIt;
|
|
|
|
std::list<DynInstPtr> instList;
|
|
std::list<DynInstPtr> waitingList;
|
|
std::list<DynInstPtr> replayList;
|
|
std::list<DynInstPtr> writeback;
|
|
|
|
int latency;
|
|
|
|
int squashLatency;
|
|
|
|
bool exactFullStall;
|
|
|
|
// bool fetchRedirect[Impl::MaxThreads];
|
|
|
|
// number of cycles stalled for D-cache misses
|
|
/* Stats::Scalar<> dcacheStallCycles;
|
|
Counter lastDcacheStall;
|
|
*/
|
|
Stats::Vector<> rob_cap_events;
|
|
Stats::Vector<> rob_cap_inst_count;
|
|
Stats::Vector<> iq_cap_events;
|
|
Stats::Vector<> iq_cap_inst_count;
|
|
// total number of instructions executed
|
|
Stats::Vector<> exe_inst;
|
|
Stats::Vector<> exe_swp;
|
|
Stats::Vector<> exe_nop;
|
|
Stats::Vector<> exe_refs;
|
|
Stats::Vector<> exe_loads;
|
|
Stats::Vector<> exe_branches;
|
|
|
|
Stats::Vector<> issued_ops;
|
|
|
|
// total number of loads forwaded from LSQ stores
|
|
Stats::Vector<> lsq_forw_loads;
|
|
|
|
// total number of loads ignored due to invalid addresses
|
|
Stats::Vector<> inv_addr_loads;
|
|
|
|
// total number of software prefetches ignored due to invalid addresses
|
|
Stats::Vector<> inv_addr_swpfs;
|
|
// ready loads blocked due to memory disambiguation
|
|
Stats::Vector<> lsq_blocked_loads;
|
|
|
|
Stats::Scalar<> lsqInversion;
|
|
|
|
Stats::Vector<> n_issued_dist;
|
|
Stats::VectorDistribution<> issue_delay_dist;
|
|
|
|
Stats::VectorDistribution<> queue_res_dist;
|
|
/*
|
|
Stats::Vector<> stat_fu_busy;
|
|
Stats::Vector2d<> stat_fuBusy;
|
|
Stats::Vector<> dist_unissued;
|
|
Stats::Vector2d<> stat_issued_inst_type;
|
|
|
|
Stats::Formula misspec_cnt;
|
|
Stats::Formula misspec_ipc;
|
|
Stats::Formula issue_rate;
|
|
Stats::Formula issue_stores;
|
|
Stats::Formula issue_op_rate;
|
|
Stats::Formula fu_busy_rate;
|
|
Stats::Formula commit_stores;
|
|
Stats::Formula commit_ipc;
|
|
Stats::Formula commit_ipb;
|
|
Stats::Formula lsq_inv_rate;
|
|
*/
|
|
Stats::Vector<> writeback_count;
|
|
Stats::Vector<> producer_inst;
|
|
Stats::Vector<> consumer_inst;
|
|
Stats::Vector<> wb_penalized;
|
|
|
|
Stats::Formula wb_rate;
|
|
Stats::Formula wb_fanout;
|
|
Stats::Formula wb_penalized_rate;
|
|
|
|
// total number of instructions committed
|
|
Stats::Vector<> stat_com_inst;
|
|
Stats::Vector<> stat_com_swp;
|
|
Stats::Vector<> stat_com_refs;
|
|
Stats::Vector<> stat_com_loads;
|
|
Stats::Vector<> stat_com_membars;
|
|
Stats::Vector<> stat_com_branches;
|
|
|
|
Stats::Distribution<> n_committed_dist;
|
|
|
|
Stats::Scalar<> commit_eligible_samples;
|
|
Stats::Vector<> commit_eligible;
|
|
|
|
Stats::Vector<> squashedInsts;
|
|
Stats::Vector<> ROBSquashedInsts;
|
|
|
|
Stats::Scalar<> ROB_fcount;
|
|
Stats::Formula ROB_full_rate;
|
|
|
|
Stats::Vector<> ROB_count; // cumulative ROB occupancy
|
|
Stats::Formula ROB_occ_rate;
|
|
Stats::VectorDistribution<> ROB_occ_dist;
|
|
public:
|
|
void dumpInsts();
|
|
|
|
Checker<DynInstPtr> *checker;
|
|
};
|
|
|
|
template <class Impl>
|
|
template <class T>
|
|
Fault
|
|
LWBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
|
|
{
|
|
/* memReq->reset(addr, sizeof(T), flags);
|
|
|
|
// translate to physical address
|
|
Fault fault = cpu->translateDataReadReq(memReq);
|
|
|
|
// if we have a cache, do cache access too
|
|
if (fault == NoFault && dcacheInterface) {
|
|
memReq->cmd = Read;
|
|
memReq->completionEvent = NULL;
|
|
memReq->time = curTick;
|
|
memReq->flags &= ~INST_READ;
|
|
MemAccessResult result = dcacheInterface->access(memReq);
|
|
|
|
// Ugly hack to get an event scheduled *only* if the access is
|
|
// a miss. We really should add first-class support for this
|
|
// at some point.
|
|
if (result != MA_HIT && dcacheInterface->doEvents()) {
|
|
// Fix this hack for keeping funcExeInst correct with loads that
|
|
// are executed twice.
|
|
--funcExeInst;
|
|
|
|
memReq->completionEvent = &cacheCompletionEvent;
|
|
lastDcacheStall = curTick;
|
|
// unscheduleTickEvent();
|
|
// status = DcacheMissStall;
|
|
DPRINTF(OzoneCPU, "Dcache miss stall!\n");
|
|
} else {
|
|
// do functional access
|
|
fault = thread->mem->read(memReq, data);
|
|
|
|
}
|
|
}
|
|
*/
|
|
/*
|
|
if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
|
|
recordEvent("Uncached Read");
|
|
*/
|
|
return LSQ.read(req, data, load_idx);
|
|
}
|
|
|
|
template <class Impl>
|
|
template <class T>
|
|
Fault
|
|
LWBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
|
|
{
|
|
/*
|
|
memReq->reset(addr, sizeof(T), flags);
|
|
|
|
// translate to physical address
|
|
Fault fault = cpu->translateDataWriteReq(memReq);
|
|
|
|
if (fault == NoFault && dcacheInterface) {
|
|
memReq->cmd = Write;
|
|
memcpy(memReq->data,(uint8_t *)&data,memReq->size);
|
|
memReq->completionEvent = NULL;
|
|
memReq->time = curTick;
|
|
memReq->flags &= ~INST_READ;
|
|
MemAccessResult result = dcacheInterface->access(memReq);
|
|
|
|
// Ugly hack to get an event scheduled *only* if the access is
|
|
// a miss. We really should add first-class support for this
|
|
// at some point.
|
|
if (result != MA_HIT && dcacheInterface->doEvents()) {
|
|
memReq->completionEvent = &cacheCompletionEvent;
|
|
lastDcacheStall = curTick;
|
|
// unscheduleTickEvent();
|
|
// status = DcacheMissStall;
|
|
DPRINTF(OzoneCPU, "Dcache miss stall!\n");
|
|
}
|
|
}
|
|
|
|
if (res && (fault == NoFault))
|
|
*res = memReq->result;
|
|
*/
|
|
/*
|
|
if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
|
|
recordEvent("Uncached Write");
|
|
*/
|
|
return LSQ.write(req, data, store_idx);
|
|
}
|
|
|
|
#endif // __CPU_OZONE_LW_BACK_END_HH__
|