From d48ea81ba2c4034936ada75bd4bac28640c53174 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sun, 8 Oct 2006 00:53:41 -0400 Subject: [PATCH] Updates to O3 CPU. It should now work in FS mode, although sampling still has a bug. src/cpu/o3/commit_impl.hh: Fixes for compile and sampling. src/cpu/o3/cpu.cc: Deallocate and activate threads properly. Also hopefully fix being able to use caches while switching over. src/cpu/o3/cpu.hh: Fixes for deallocating and activating threads. src/cpu/o3/fetch_impl.hh: src/cpu/o3/lsq_unit.hh: Handle getting back a BadAddress result from the access. src/cpu/o3/iew_impl.hh: More debug output. src/cpu/o3/lsq_unit_impl.hh: Fixup store conditional handling (still a bit of a hack, but works now). Also handle getting back a BadAddress result from the access. src/cpu/o3/thread_context_impl.hh: Deallocate context now records if the context should be fully removed. --HG-- extra : convert_revision : 55f81660602d0e25367ce1f5b0b9cfc62abe7bf9 --- src/cpu/o3/commit_impl.hh | 12 ++----- src/cpu/o3/cpu.cc | 55 ++++++++++++++++++++++++------- src/cpu/o3/cpu.hh | 19 ++++++----- src/cpu/o3/fetch_impl.hh | 5 +++ src/cpu/o3/iew_impl.hh | 6 ++++ src/cpu/o3/lsq_unit.hh | 5 +++ src/cpu/o3/lsq_unit_impl.hh | 24 ++++++++++++-- src/cpu/o3/thread_context_impl.hh | 6 ++-- 8 files changed, 96 insertions(+), 36 deletions(-) diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index c80e4d8c1..ecf6ed632 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -342,12 +342,6 @@ DefaultCommit::drain() { drainPending = true; - // If it's already drained, return true. - if (rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalDrained(); - return true; - } - return false; } @@ -1218,16 +1212,16 @@ DefaultCommit::skidInsert() for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) { DynInstPtr inst = fromRename->insts[inst_num]; - int tid = inst->threadNumber; if (!inst->isSquashed()) { DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ", - "skidBuffer.\n", inst->readPC(), inst->seqNum, tid); + "skidBuffer.\n", inst->readPC(), inst->seqNum, + inst->threadNumber); skidBuffer.push(inst); } else { DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " "squashed, skipping.\n", - inst->readPC(), inst->seqNum, tid); + inst->readPC(), inst->seqNum, inst->threadNumber); } } } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 7386dfadd..787ae2d14 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -88,7 +88,7 @@ FullO3CPU::TickEvent::description() template FullO3CPU::ActivateThreadEvent::ActivateThreadEvent() - : Event(&mainEventQueue, CPU_Tick_Pri) + : Event(&mainEventQueue, CPU_Switch_Pri) { } @@ -135,7 +135,8 @@ void FullO3CPU::DeallocateContextEvent::process() { cpu->deactivateThread(tid); - cpu->removeThread(tid); + if (remove) + cpu->removeThread(tid); } template @@ -191,7 +192,11 @@ FullO3CPU::FullO3CPU(Params *params) deferRegistration(params->deferRegistration), numThreads(number_of_threads) { - _status = Idle; + if (!deferRegistration) { + _status = Running; + } else { + _status = Idle; + } checker = NULL; @@ -304,6 +309,9 @@ FullO3CPU::FullO3CPU(Params *params) tid, bindRegs); + + activateThreadEvent[tid].init(tid, this); + deallocateContextEvent[tid].init(tid, this); } rename.setRenameMap(renameMap); @@ -449,7 +457,7 @@ FullO3CPU::tick() getState() == SimObject::Drained) { // increment stat lastRunningCycle = curTick; - } else if (!activityRec.active()) { + } else if (!activityRec.active() || _status == Idle) { lastRunningCycle = curTick; timesIdled++; } else { @@ -548,7 +556,7 @@ FullO3CPU::activateContext(int tid, int delay) activateThread(tid); } - if(lastActivatedCycle < curTick) { + if (lastActivatedCycle < curTick) { scheduleTickEvent(delay); // Be sure to signal that there's some activity so the CPU doesn't @@ -563,17 +571,20 @@ FullO3CPU::activateContext(int tid, int delay) } template -void -FullO3CPU::deallocateContext(int tid, int delay) +bool +FullO3CPU::deallocateContext(int tid, bool remove, int delay) { // Schedule removal of thread data from CPU if (delay){ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " "on cycle %d\n", tid, curTick + cycles(delay)); - scheduleDeallocateContextEvent(tid, delay); + scheduleDeallocateContextEvent(tid, remove, delay); + return false; } else { deactivateThread(tid); - removeThread(tid); + if (remove) + removeThread(tid); + return true; } } @@ -582,8 +593,9 @@ void FullO3CPU::suspendContext(int tid) { DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); - deactivateThread(tid); - if (activeThreads.size() == 0) + bool deallocated = deallocateContext(tid, false, 1); + // If this was the last thread then unschedule the tick event. + if ((activeThreads.size() == 1 && !deallocated) || activeThreads.size() == 0) unscheduleTickEvent(); _status = Idle; } @@ -594,7 +606,7 @@ FullO3CPU::haltContext(int tid) { //For now, this is the same as deallocate DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); - deallocateContext(tid, 1); + deallocateContext(tid, true, 1); } template @@ -935,6 +947,25 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU) } if (!tickEvent.scheduled()) tickEvent.schedule(curTick); + + Port *peer; + Port *icachePort = fetch.getIcachePort(); + if (icachePort->getPeer() == NULL) { + peer = oldCPU->getPort("icachePort")->getPeer(); + icachePort->setPeer(peer); + } else { + peer = icachePort->getPeer(); + } + peer->setPeer(icachePort); + + Port *dcachePort = iew.getDcachePort(); + if (dcachePort->getPeer() == NULL) { + Port *peer = oldCPU->getPort("dcachePort")->getPeer(); + dcachePort->setPeer(peer); + } else { + peer = dcachePort->getPeer(); + } + peer->setPeer(dcachePort); } template diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index dcdcd1fe6..fe510519c 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -202,9 +202,12 @@ class FullO3CPU : public BaseO3CPU class DeallocateContextEvent : public Event { private: - /** Number of Thread to Activate */ + /** Number of Thread to deactivate */ int tid; + /** Should the thread be removed from the CPU? */ + bool remove; + /** Pointer to the CPU. */ FullO3CPU *cpu; @@ -218,12 +221,15 @@ class FullO3CPU : public BaseO3CPU /** Processes the event, calling activateThread() on the CPU. */ void process(); + /** Sets whether the thread should also be removed from the CPU. */ + void setRemove(bool _remove) { remove = _remove; } + /** Returns the description of the event. */ const char *description(); }; /** Schedule cpu to deallocate thread context.*/ - void scheduleDeallocateContextEvent(int tid, int delay) + void scheduleDeallocateContextEvent(int tid, bool remove, int delay) { // Schedule thread to activate, regardless of its current state. if (deallocateContextEvent[tid].squashed()) @@ -296,9 +302,9 @@ class FullO3CPU : public BaseO3CPU void suspendContext(int tid); /** Remove Thread from Active Threads List && - * Remove Thread Context from CPU. + * Possibly Remove Thread Context from CPU. */ - void deallocateContext(int tid, int delay = 1); + bool deallocateContext(int tid, bool remove, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -626,11 +632,6 @@ class FullO3CPU : public BaseO3CPU /** Pointers to all of the threads in the CPU. */ std::vector thread; - /** Pointer to the icache interface. */ - MemInterface *icacheInterface; - /** Pointer to the dcache interface. */ - MemInterface *dcacheInterface; - /** Whether or not the CPU should defer its registration. */ bool deferRegistration; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 2d447bfe5..a01784f8f 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -623,6 +623,11 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Now do the timing access to see whether or not the instruction // exists within the cache. if (!icachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + fault = TheISA::genMachineCheckFault(); + delete mem_req; + memReq[tid] = NULL; + } assert(retryPkt == NULL); assert(retryTid == -1); DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index b2baae296..ba5260fe2 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -600,6 +600,11 @@ template void DefaultIEW::instToCommit(DynInstPtr &inst) { + // This function should not be called after writebackInsts in a + // single cycle. That will cause problems with an instruction + // being added to the queue to commit without being processed by + // writebackInsts prior to being sent to commit. + // First check the time slot that this instruction will write // to. If there are free write ports at the time, then go ahead // and write the instruction to that time. If there are not, @@ -1286,6 +1291,7 @@ DefaultIEW::executeInsts() } else if (fault != NoFault) { // If the instruction faulted, then we need to send it along to commit // without the instruction completing. + DPRINTF(IEW, "Store has fault! [sn:%lli]\n", inst->seqNum); // Send this instruction to commit, also make sure iew stage // realizes there is activity. diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 90d1a3d53..512124bb4 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -638,6 +638,11 @@ LSQUnit::read(Request *req, T &data, int load_idx) // if we the cache is not blocked, do cache access if (!lsq->cacheBlocked()) { if (!dcachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + delete data_pkt; + return TheISA::genMachineCheckFault(); + } + // If the access didn't succeed, tell the LSQ by setting // the retry thread id. lsq->setRetryTid(lsqID); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 98bea74fb..dc1a99d87 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -608,9 +608,9 @@ LSQUnit::writebackStores() DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", - storeWBIdx, storeQueue[storeWBIdx].inst->readPC(), + storeWBIdx, inst->readPC(), req->getPaddr(), *(inst->memData), - storeQueue[storeWBIdx].inst->seqNum); + inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. if (req->getFlags() & LOCKED) { @@ -619,10 +619,19 @@ LSQUnit::writebackStores() } else { if (cpu->lockFlag) { req->setScResult(1); + DPRINTF(LSQUnit, "Store conditional [sn:%lli] succeeded.", + inst->seqNum); } else { req->setScResult(0); // Hack: Instantly complete this store. - completeDataAccess(data_pkt); +// completeDataAccess(data_pkt); + DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " + "Instantly completing it.\n", + inst->seqNum); + WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); + wb->schedule(curTick + 1); + delete state; + completeStore(storeWBIdx); incrStIdx(storeWBIdx); continue; } @@ -633,7 +642,13 @@ LSQUnit::writebackStores() } if (!dcachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + panic("LSQ sent out a bad address for a completed store!"); + } // Need to handle becoming blocked on a store. + DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will" + "retry later\n", + inst->seqNum); isStoreBlocked = true; ++lsqCacheBlocked; assert(retryPkt == NULL); @@ -880,6 +895,9 @@ LSQUnit::recvRetry() assert(retryPkt != NULL); if (dcachePort->sendTiming(retryPkt)) { + if (retryPkt->result == Packet::BadAddress) { + panic("LSQ sent out a bad address for a completed store!"); + } storePostSend(retryPkt); retryPkt = NULL; isStoreBlocked = false; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index 25e1db21c..2bc194d53 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -165,14 +165,14 @@ template void O3ThreadContext::deallocate(int delay) { - DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", - getThreadNum()); + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d delay %d\n", + getThreadNum(), delay); if (thread->status() == ThreadContext::Unallocated) return; thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid(), delay); + cpu->deallocateContext(thread->readTid(), true, delay); } template