Updates to O3 CPU. It should now work in FS mode, although sampling still has a bug.
src/cpu/o3/commit_impl.hh: Fixes for compile and sampling. src/cpu/o3/cpu.cc: Deallocate and activate threads properly. Also hopefully fix being able to use caches while switching over. src/cpu/o3/cpu.hh: Fixes for deallocating and activating threads. src/cpu/o3/fetch_impl.hh: src/cpu/o3/lsq_unit.hh: Handle getting back a BadAddress result from the access. src/cpu/o3/iew_impl.hh: More debug output. src/cpu/o3/lsq_unit_impl.hh: Fixup store conditional handling (still a bit of a hack, but works now). Also handle getting back a BadAddress result from the access. src/cpu/o3/thread_context_impl.hh: Deallocate context now records if the context should be fully removed. --HG-- extra : convert_revision : 55f81660602d0e25367ce1f5b0b9cfc62abe7bf9
This commit is contained in:
parent
c0e53b6d4c
commit
d48ea81ba2
8 changed files with 96 additions and 36 deletions
|
@ -342,12 +342,6 @@ DefaultCommit<Impl>::drain()
|
|||
{
|
||||
drainPending = true;
|
||||
|
||||
// If it's already drained, return true.
|
||||
if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
|
||||
cpu->signalDrained();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1218,16 +1212,16 @@ DefaultCommit<Impl>::skidInsert()
|
|||
|
||||
for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) {
|
||||
DynInstPtr inst = fromRename->insts[inst_num];
|
||||
int tid = inst->threadNumber;
|
||||
|
||||
if (!inst->isSquashed()) {
|
||||
DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
|
||||
"skidBuffer.\n", inst->readPC(), inst->seqNum, tid);
|
||||
"skidBuffer.\n", inst->readPC(), inst->seqNum,
|
||||
inst->threadNumber);
|
||||
skidBuffer.push(inst);
|
||||
} else {
|
||||
DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
|
||||
"squashed, skipping.\n",
|
||||
inst->readPC(), inst->seqNum, tid);
|
||||
inst->readPC(), inst->seqNum, inst->threadNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -88,7 +88,7 @@ FullO3CPU<Impl>::TickEvent::description()
|
|||
|
||||
template <class Impl>
|
||||
FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent()
|
||||
: Event(&mainEventQueue, CPU_Tick_Pri)
|
||||
: Event(&mainEventQueue, CPU_Switch_Pri)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -135,6 +135,7 @@ void
|
|||
FullO3CPU<Impl>::DeallocateContextEvent::process()
|
||||
{
|
||||
cpu->deactivateThread(tid);
|
||||
if (remove)
|
||||
cpu->removeThread(tid);
|
||||
}
|
||||
|
||||
|
@ -191,7 +192,11 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
|
|||
deferRegistration(params->deferRegistration),
|
||||
numThreads(number_of_threads)
|
||||
{
|
||||
if (!deferRegistration) {
|
||||
_status = Running;
|
||||
} else {
|
||||
_status = Idle;
|
||||
}
|
||||
|
||||
checker = NULL;
|
||||
|
||||
|
@ -304,6 +309,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
|
|||
|
||||
tid,
|
||||
bindRegs);
|
||||
|
||||
activateThreadEvent[tid].init(tid, this);
|
||||
deallocateContextEvent[tid].init(tid, this);
|
||||
}
|
||||
|
||||
rename.setRenameMap(renameMap);
|
||||
|
@ -449,7 +457,7 @@ FullO3CPU<Impl>::tick()
|
|||
getState() == SimObject::Drained) {
|
||||
// increment stat
|
||||
lastRunningCycle = curTick;
|
||||
} else if (!activityRec.active()) {
|
||||
} else if (!activityRec.active() || _status == Idle) {
|
||||
lastRunningCycle = curTick;
|
||||
timesIdled++;
|
||||
} else {
|
||||
|
@ -563,17 +571,20 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
|
|||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::deallocateContext(int tid, int delay)
|
||||
bool
|
||||
FullO3CPU<Impl>::deallocateContext(int tid, bool remove, int delay)
|
||||
{
|
||||
// Schedule removal of thread data from CPU
|
||||
if (delay){
|
||||
DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate "
|
||||
"on cycle %d\n", tid, curTick + cycles(delay));
|
||||
scheduleDeallocateContextEvent(tid, delay);
|
||||
scheduleDeallocateContextEvent(tid, remove, delay);
|
||||
return false;
|
||||
} else {
|
||||
deactivateThread(tid);
|
||||
if (remove)
|
||||
removeThread(tid);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -582,8 +593,9 @@ void
|
|||
FullO3CPU<Impl>::suspendContext(int tid)
|
||||
{
|
||||
DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
|
||||
deactivateThread(tid);
|
||||
if (activeThreads.size() == 0)
|
||||
bool deallocated = deallocateContext(tid, false, 1);
|
||||
// If this was the last thread then unschedule the tick event.
|
||||
if ((activeThreads.size() == 1 && !deallocated) || activeThreads.size() == 0)
|
||||
unscheduleTickEvent();
|
||||
_status = Idle;
|
||||
}
|
||||
|
@ -594,7 +606,7 @@ FullO3CPU<Impl>::haltContext(int tid)
|
|||
{
|
||||
//For now, this is the same as deallocate
|
||||
DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
|
||||
deallocateContext(tid, 1);
|
||||
deallocateContext(tid, true, 1);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
@ -935,6 +947,25 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
|||
}
|
||||
if (!tickEvent.scheduled())
|
||||
tickEvent.schedule(curTick);
|
||||
|
||||
Port *peer;
|
||||
Port *icachePort = fetch.getIcachePort();
|
||||
if (icachePort->getPeer() == NULL) {
|
||||
peer = oldCPU->getPort("icachePort")->getPeer();
|
||||
icachePort->setPeer(peer);
|
||||
} else {
|
||||
peer = icachePort->getPeer();
|
||||
}
|
||||
peer->setPeer(icachePort);
|
||||
|
||||
Port *dcachePort = iew.getDcachePort();
|
||||
if (dcachePort->getPeer() == NULL) {
|
||||
Port *peer = oldCPU->getPort("dcachePort")->getPeer();
|
||||
dcachePort->setPeer(peer);
|
||||
} else {
|
||||
peer = dcachePort->getPeer();
|
||||
}
|
||||
peer->setPeer(dcachePort);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
|
|
@ -202,9 +202,12 @@ class FullO3CPU : public BaseO3CPU
|
|||
class DeallocateContextEvent : public Event
|
||||
{
|
||||
private:
|
||||
/** Number of Thread to Activate */
|
||||
/** Number of Thread to deactivate */
|
||||
int tid;
|
||||
|
||||
/** Should the thread be removed from the CPU? */
|
||||
bool remove;
|
||||
|
||||
/** Pointer to the CPU. */
|
||||
FullO3CPU<Impl> *cpu;
|
||||
|
||||
|
@ -218,12 +221,15 @@ class FullO3CPU : public BaseO3CPU
|
|||
/** Processes the event, calling activateThread() on the CPU. */
|
||||
void process();
|
||||
|
||||
/** Sets whether the thread should also be removed from the CPU. */
|
||||
void setRemove(bool _remove) { remove = _remove; }
|
||||
|
||||
/** Returns the description of the event. */
|
||||
const char *description();
|
||||
};
|
||||
|
||||
/** Schedule cpu to deallocate thread context.*/
|
||||
void scheduleDeallocateContextEvent(int tid, int delay)
|
||||
void scheduleDeallocateContextEvent(int tid, bool remove, int delay)
|
||||
{
|
||||
// Schedule thread to activate, regardless of its current state.
|
||||
if (deallocateContextEvent[tid].squashed())
|
||||
|
@ -296,9 +302,9 @@ class FullO3CPU : public BaseO3CPU
|
|||
void suspendContext(int tid);
|
||||
|
||||
/** Remove Thread from Active Threads List &&
|
||||
* Remove Thread Context from CPU.
|
||||
* Possibly Remove Thread Context from CPU.
|
||||
*/
|
||||
void deallocateContext(int tid, int delay = 1);
|
||||
bool deallocateContext(int tid, bool remove, int delay = 1);
|
||||
|
||||
/** Remove Thread from Active Threads List &&
|
||||
* Remove Thread Context from CPU.
|
||||
|
@ -626,11 +632,6 @@ class FullO3CPU : public BaseO3CPU
|
|||
/** Pointers to all of the threads in the CPU. */
|
||||
std::vector<Thread *> thread;
|
||||
|
||||
/** Pointer to the icache interface. */
|
||||
MemInterface *icacheInterface;
|
||||
/** Pointer to the dcache interface. */
|
||||
MemInterface *dcacheInterface;
|
||||
|
||||
/** Whether or not the CPU should defer its registration. */
|
||||
bool deferRegistration;
|
||||
|
||||
|
|
|
@ -623,6 +623,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
|||
// Now do the timing access to see whether or not the instruction
|
||||
// exists within the cache.
|
||||
if (!icachePort->sendTiming(data_pkt)) {
|
||||
if (data_pkt->result == Packet::BadAddress) {
|
||||
fault = TheISA::genMachineCheckFault();
|
||||
delete mem_req;
|
||||
memReq[tid] = NULL;
|
||||
}
|
||||
assert(retryPkt == NULL);
|
||||
assert(retryTid == -1);
|
||||
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
|
||||
|
|
|
@ -600,6 +600,11 @@ template<class Impl>
|
|||
void
|
||||
DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
|
||||
{
|
||||
// This function should not be called after writebackInsts in a
|
||||
// single cycle. That will cause problems with an instruction
|
||||
// being added to the queue to commit without being processed by
|
||||
// writebackInsts prior to being sent to commit.
|
||||
|
||||
// First check the time slot that this instruction will write
|
||||
// to. If there are free write ports at the time, then go ahead
|
||||
// and write the instruction to that time. If there are not,
|
||||
|
@ -1286,6 +1291,7 @@ DefaultIEW<Impl>::executeInsts()
|
|||
} else if (fault != NoFault) {
|
||||
// If the instruction faulted, then we need to send it along to commit
|
||||
// without the instruction completing.
|
||||
DPRINTF(IEW, "Store has fault! [sn:%lli]\n", inst->seqNum);
|
||||
|
||||
// Send this instruction to commit, also make sure iew stage
|
||||
// realizes there is activity.
|
||||
|
|
|
@ -638,6 +638,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
|
|||
// if we the cache is not blocked, do cache access
|
||||
if (!lsq->cacheBlocked()) {
|
||||
if (!dcachePort->sendTiming(data_pkt)) {
|
||||
if (data_pkt->result == Packet::BadAddress) {
|
||||
delete data_pkt;
|
||||
return TheISA::genMachineCheckFault();
|
||||
}
|
||||
|
||||
// If the access didn't succeed, tell the LSQ by setting
|
||||
// the retry thread id.
|
||||
lsq->setRetryTid(lsqID);
|
||||
|
|
|
@ -608,9 +608,9 @@ LSQUnit<Impl>::writebackStores()
|
|||
|
||||
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
|
||||
"to Addr:%#x, data:%#x [sn:%lli]\n",
|
||||
storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
|
||||
storeWBIdx, inst->readPC(),
|
||||
req->getPaddr(), *(inst->memData),
|
||||
storeQueue[storeWBIdx].inst->seqNum);
|
||||
inst->seqNum);
|
||||
|
||||
// @todo: Remove this SC hack once the memory system handles it.
|
||||
if (req->getFlags() & LOCKED) {
|
||||
|
@ -619,10 +619,19 @@ LSQUnit<Impl>::writebackStores()
|
|||
} else {
|
||||
if (cpu->lockFlag) {
|
||||
req->setScResult(1);
|
||||
DPRINTF(LSQUnit, "Store conditional [sn:%lli] succeeded.",
|
||||
inst->seqNum);
|
||||
} else {
|
||||
req->setScResult(0);
|
||||
// Hack: Instantly complete this store.
|
||||
completeDataAccess(data_pkt);
|
||||
// completeDataAccess(data_pkt);
|
||||
DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
|
||||
"Instantly completing it.\n",
|
||||
inst->seqNum);
|
||||
WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
|
||||
wb->schedule(curTick + 1);
|
||||
delete state;
|
||||
completeStore(storeWBIdx);
|
||||
incrStIdx(storeWBIdx);
|
||||
continue;
|
||||
}
|
||||
|
@ -633,7 +642,13 @@ LSQUnit<Impl>::writebackStores()
|
|||
}
|
||||
|
||||
if (!dcachePort->sendTiming(data_pkt)) {
|
||||
if (data_pkt->result == Packet::BadAddress) {
|
||||
panic("LSQ sent out a bad address for a completed store!");
|
||||
}
|
||||
// Need to handle becoming blocked on a store.
|
||||
DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
|
||||
"retry later\n",
|
||||
inst->seqNum);
|
||||
isStoreBlocked = true;
|
||||
++lsqCacheBlocked;
|
||||
assert(retryPkt == NULL);
|
||||
|
@ -880,6 +895,9 @@ LSQUnit<Impl>::recvRetry()
|
|||
assert(retryPkt != NULL);
|
||||
|
||||
if (dcachePort->sendTiming(retryPkt)) {
|
||||
if (retryPkt->result == Packet::BadAddress) {
|
||||
panic("LSQ sent out a bad address for a completed store!");
|
||||
}
|
||||
storePostSend(retryPkt);
|
||||
retryPkt = NULL;
|
||||
isStoreBlocked = false;
|
||||
|
|
|
@ -165,14 +165,14 @@ template <class Impl>
|
|||
void
|
||||
O3ThreadContext<Impl>::deallocate(int delay)
|
||||
{
|
||||
DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n",
|
||||
getThreadNum());
|
||||
DPRINTF(O3CPU, "Calling deallocate on Thread Context %d delay %d\n",
|
||||
getThreadNum(), delay);
|
||||
|
||||
if (thread->status() == ThreadContext::Unallocated)
|
||||
return;
|
||||
|
||||
thread->setStatus(ThreadContext::Unallocated);
|
||||
cpu->deallocateContext(thread->readTid(), delay);
|
||||
cpu->deallocateContext(thread->readTid(), true, delay);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
|
Loading…
Reference in a new issue