From 090496bf2d4c0f55f7f5869a374b4ec3826bccbc Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 5 Jun 2006 18:14:39 -0400 Subject: [PATCH] Fixes to get new CPU model working for simple test case. The CPU does not yet support retrying accesses. src/cpu/base_dyn_inst.cc: Delete the allocated data in destructor. src/cpu/base_dyn_inst.hh: Only copy the addresses if the translation succeeded. src/cpu/o3/alpha_cpu.hh: Return actual translating port. Don't panic on setNextNPC() as it's always called, regardless of the architecture, when the process initializes. src/cpu/o3/alpha_cpu_impl.hh: Pass in memobject to the thread state in SE mode. src/cpu/o3/commit_impl.hh: Initialize all variables. src/cpu/o3/decode_impl.hh: Handle early resolution of branches properly. src/cpu/o3/fetch.hh: Switch structure back to requests. src/cpu/o3/fetch_impl.hh: Initialize all variables, create/delete requests properly. src/cpu/o3/lsq_unit.hh: Include sender state along with the packet. Also include a more generic writeback event that's only used for stores forwarding data to loads. src/cpu/o3/lsq_unit_impl.hh: Redo writeback code to support the response path of the memory system. src/cpu/o3/mem_dep_unit.cc: src/cpu/o3/mem_dep_unit_impl.hh: Wrap variables in #ifdefs. src/cpu/o3/store_set.cc: Include to get panic() function. src/cpu/o3/thread_state.hh: Create with MemObject as well. src/cpu/thread_state.hh: Have a translating port in the thread state object. src/python/m5/objects/AlphaFullCPU.py: Mem parameter no longer needed. --HG-- extra : convert_revision : a99381fb25cb183322882ce20935a6f3d1f2b64d --- src/cpu/base_dyn_inst.cc | 8 +- src/cpu/base_dyn_inst.hh | 15 ++- src/cpu/o3/alpha_cpu.hh | 4 +- src/cpu/o3/alpha_cpu_impl.hh | 5 +- src/cpu/o3/commit_impl.hh | 2 + src/cpu/o3/decode_impl.hh | 5 +- src/cpu/o3/fetch.hh | 4 +- src/cpu/o3/fetch_impl.hh | 30 +++-- src/cpu/o3/lsq_unit.hh | 61 ++++++++-- src/cpu/o3/lsq_unit_impl.hh | 168 ++++++++++++++------------ src/cpu/o3/mem_dep_unit.cc | 2 + src/cpu/o3/mem_dep_unit_impl.hh | 14 +++ src/cpu/o3/store_set.cc | 1 + src/cpu/o3/thread_state.hh | 11 +- src/cpu/thread_state.hh | 26 +++- src/python/m5/objects/AlphaFullCPU.py | 3 - 16 files changed, 231 insertions(+), 128 deletions(-) diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc index 66e425d5c..a62930560 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst.cc @@ -96,12 +96,14 @@ void BaseDynInst::initVars() { req = NULL; + memData = NULL; effAddr = 0; physEffAddr = 0; storeSize = 0; readyRegs = 0; + // May want to turn this into a bit vector or something. completed = false; resultReady = false; canIssue = false; @@ -161,7 +163,11 @@ template BaseDynInst::~BaseDynInst() { if (req) { - req = NULL; + delete req; + } + + if (memData) { + delete [] memData; } if (traceData) { diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index a250427ce..1f2b44e02 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -660,11 +660,11 @@ BaseDynInst::read(Addr addr, T &data, unsigned flags) fault = cpu->translateDataReadReq(req); - effAddr = req->getVaddr(); - physEffAddr = req->getPaddr(); - memReqFlags = req->getFlags(); - if (fault == NoFault) { + effAddr = req->getVaddr(); + physEffAddr = req->getPaddr(); + memReqFlags = req->getFlags(); + #if FULL_SYSTEM if (cpu->system->memctrl->badaddr(physEffAddr)) { fault = TheISA::genMachineCheckFault(); @@ -715,11 +715,10 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) fault = cpu->translateDataWriteReq(req); - effAddr = req->getVaddr(); - physEffAddr = req->getPaddr(); - memReqFlags = req->getFlags(); - if (fault == NoFault) { + effAddr = req->getVaddr(); + physEffAddr = req->getPaddr(); + memReqFlags = req->getFlags(); #if FULL_SYSTEM if (cpu->system->memctrl->badaddr(physEffAddr)) { fault = TheISA::genMachineCheckFault(); diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh index 2e5c856a8..3c16c3b2e 100644 --- a/src/cpu/o3/alpha_cpu.hh +++ b/src/cpu/o3/alpha_cpu.hh @@ -96,7 +96,7 @@ class AlphaFullCPU : public FullO3CPU /** Reads this CPU's ID. */ virtual int readCpuId() { return cpu->cpu_id; } - virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; } + virtual TranslatingPort *getMemPort() { return thread->port; } #if FULL_SYSTEM /** Returns a pointer to the system. */ @@ -226,7 +226,7 @@ class AlphaFullCPU : public FullO3CPU } virtual void setNextNPC(uint64_t val) - { panic("Alpha has no NextNPC!"); } + { } /** Reads a miscellaneous register. */ virtual MiscReg readMiscReg(int misc_reg) diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh index ad4401f7e..7c136638d 100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha_cpu_impl.hh @@ -73,7 +73,8 @@ AlphaFullCPU::AlphaFullCPU(Params *params) if (i < params->workload.size()) { DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x", i, this->thread[i]); - this->thread[i] = new Thread(this, i, params->workload[i], i); + this->thread[i] = new Thread(this, i, params->workload[i], + i, params->mem); this->thread[i]->setStatus(ExecContext::Suspended); //usedTids[i] = true; @@ -83,7 +84,7 @@ AlphaFullCPU::AlphaFullCPU(Params *params) //when scheduling threads to CPU Process* dummy_proc = NULL; - this->thread[i] = new Thread(this, i, dummy_proc, i); + this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); //usedTids[i] = false; } #endif // !FULL_SYSTEM diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index f8a252b87..629acb310 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -75,6 +75,7 @@ DefaultCommit::DefaultCommit(Params *params) iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), + switchPending(false), switchedOut(false), trapLatency(params->trapLatency), fetchTrapLatency(params->fetchTrapLatency) @@ -115,6 +116,7 @@ DefaultCommit::DefaultCommit(Params *params) changedROBNumEntries[i] = false; trapSquash[i] = false; xcSquash[i] = false; + PC[i] = nextPC[i] = 0; } fetchFaultTick = 0; diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 64b04bc3d..8a6ea6626 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -280,7 +280,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; toFetch->decodeInfo[tid].squash = true; - toFetch->decodeInfo[tid].nextPC = inst->readNextPC(); + toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); @@ -723,9 +723,8 @@ DefaultDecode::decodeInsts(unsigned tid) // Go ahead and compute any PC-relative branches. if (inst->isDirectCtrl() && inst->isUncondCtrl()) { ++decodeBranchResolved; - inst->setNextPC(inst->branchTarget()); - if (inst->mispredicted()) { + if (inst->branchTarget() != inst->readPredTarg()) { ++decodeBranchMispred; // Might want to set some sort of boolean and just do diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 23328c534..9e8aeb8fb 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -323,8 +323,8 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; - /** Memory packet used to access cache. */ - PacketPtr memPkt[Impl::MaxThreads]; + /** Memory request used to access cache. */ + RequestPtr memReq[Impl::MaxThreads]; /** Variable that tracks if fetch has written to the time buffer this * cycle. Used to tell CPU if there is activity this cycle. diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 69c43a6a2..84f2c3b7e 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -105,7 +105,8 @@ DefaultFetch::IcachePort::recvRetry() template DefaultFetch::DefaultFetch(Params *params) - : branchPred(params), + : mem(params->mem), + branchPred(params), decodeToFetchDelay(params->decodeToFetchDelay), renameToFetchDelay(params->renameToFetchDelay), iewToFetchDelay(params->iewToFetchDelay), @@ -113,7 +114,8 @@ DefaultFetch::DefaultFetch(Params *params) fetchWidth(params->fetchWidth), numThreads(params->numberOfThreads), numFetchingThreads(params->smtNumFetchingThreads), - interruptPending(false) + interruptPending(false), + switchedOut(false) { if (numThreads > Impl::MaxThreads) fatal("numThreads is not a valid value\n"); @@ -161,7 +163,7 @@ DefaultFetch::DefaultFetch(Params *params) priorityList.push_back(tid); - memPkt[tid] = NULL; + memReq[tid] = NULL; // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; @@ -283,6 +285,10 @@ DefaultFetch::setCPU(FullCPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); + Port *mem_dport = mem->getPort(""); + icachePort->setPeer(mem_dport); + mem_dport->setPeer(icachePort); + // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. switchToActive(); @@ -355,10 +361,12 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) // Only change the status if it's still waiting on the icache access // to return. if (fetchStatus[tid] != IcacheWaitResponse || - pkt != memPkt[tid] || + pkt->req != memReq[tid] || isSwitchedOut()) { ++fetchIcacheSquashes; + delete pkt->req; delete pkt; + memReq[tid] = NULL; return; } @@ -383,7 +391,7 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) // Reset the mem req to NULL. delete pkt->req; delete pkt; - memPkt[tid] = NULL; + memReq[tid] = NULL; } template @@ -514,7 +522,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags, fetch_PC, cpu->readCpuId(), tid); - memPkt[tid] = NULL; + memReq[tid] = mem_req; // Translate the instruction request. //#if FULL_SYSTEM @@ -565,6 +573,9 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid "response.\n", tid); fetchStatus[tid] = IcacheWaitResponse; + } else { + delete mem_req; + memReq[tid] = NULL; } ret_fault = fault; @@ -585,8 +596,9 @@ DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) if (fetchStatus[tid] == IcacheWaitResponse) { DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", tid); - delete memPkt[tid]; - memPkt[tid] = NULL; + // Should I delete this here or when it comes back from the cache? +// delete memReq[tid]; + memReq[tid] = NULL; } fetchStatus[tid] = Squashing; @@ -1083,7 +1095,7 @@ DefaultFetch::fetch(bool &status_change) warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #else // !FULL_SYSTEM - fatal("fault (%d) detected @ PC %08p", fault, PC[tid]); + warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #endif // FULL_SYSTEM } } diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 393d8947d..414309679 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -130,8 +130,6 @@ class LSQUnit { void completeDataAccess(PacketPtr pkt); - void completeStoreDataAccess(DynInstPtr &inst); - // @todo: Include stats in the LSQ unit. //void regStats(); @@ -206,10 +204,12 @@ class LSQUnit { /** Returns if the LSQ unit will writeback on this cycle. */ bool willWB() { return storeQueue[storeWBIdx].canWB && - !storeQueue[storeWBIdx].completed/* && - !dcacheInterface->isBlocked()*/; } + !storeQueue[storeWBIdx].completed && + !isStoreBlocked; } private: + void writeback(DynInstPtr &inst, PacketPtr pkt); + /** Completes the store at the specified index. */ void completeStore(int store_idx); @@ -265,9 +265,43 @@ class LSQUnit { /** Pointer to the D-cache. */ DcachePort *dcachePort; + class LSQSenderState : public Packet::SenderState + { + public: + LSQSenderState() + : noWB(false) + { } + +// protected: + DynInstPtr inst; + bool isLoad; + int idx; + bool noWB; + }; + /** Pointer to the page table. */ // PageTable *pTable; + class WritebackEvent : public Event { + public: + /** Constructs a writeback event. */ + WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr); + + /** Processes the writeback event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + DynInstPtr inst; + + PacketPtr pkt; + + /** The pointer to the LSQ unit that issued the store. */ + LSQUnit *lsqPtr; + }; + public: struct SQEntry { /** Constructs an empty store queue entry. */ @@ -362,6 +396,8 @@ class LSQUnit { /** The index of the above store. */ int stallingLoadIdx; + bool isStoreBlocked; + /** Whether or not a load is blocked due to the memory system. */ bool isLoadBlocked; @@ -521,16 +557,17 @@ LSQUnit::read(Request *req, T &data, int load_idx) DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " "addr %#x, data %#x\n", store_idx, req->getVaddr(), *(load_inst->memData)); -/* - typename LdWritebackEvent *wb = - new typename LdWritebackEvent(load_inst, - iewStage); + + PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(load_inst->memData); + + WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); // We'll say this has a 1 cycle load-store forwarding latency // for now. // @todo: Need to make this a parameter. wb->schedule(curTick); -*/ + // Should keep track of stat for forwarded data return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || @@ -585,6 +622,12 @@ LSQUnit::read(Request *req, T &data, int load_idx) PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); + LSQSenderState *state = new LSQSenderState; + state->isLoad = true; + state->idx = load_idx; + state->inst = load_inst; + data_pkt->senderState = state; + // if we have a cache, do cache access too if (!dcachePort->sendTiming(data_pkt)) { // There's an older load that's already going to squash. diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 1ad561dc0..5398426e2 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -32,65 +32,57 @@ #include "mem/request.hh" template -void -LSQUnit::completeDataAccess(PacketPtr pkt) +LSQUnit::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, + LSQUnit *lsq_ptr) + : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) { -/* - DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum); - DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); - - //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - - if (iewStage->isSwitchedOut()) { - inst = NULL; - return; - } else if (inst->isSquashed()) { - iewStage->wakeCPU(); - inst = NULL; - return; - } - - iewStage->wakeCPU(); - - if (!inst->isExecuted()) { - inst->setExecuted(); - - // Complete access to copy data to proper place. - inst->completeAcc(); - } - - // Need to insert instruction into queue to commit - iewStage->instToCommit(inst); - - iewStage->activityThisCycle(); - - inst = NULL; -*/ + this->setFlags(Event::AutoDelete); } template void -LSQUnit::completeStoreDataAccess(DynInstPtr &inst) +LSQUnit::WritebackEvent::process() { -/* - DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx); - DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx); + if (!lsqPtr->isSwitchedOut()) { + lsqPtr->writeback(inst, pkt); + } + delete pkt; +} - //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); +template +const char * +LSQUnit::WritebackEvent::description() +{ + return "Store writeback event"; +} - if (lsqPtr->isSwitchedOut()) { - if (wbEvent) - delete wbEvent; +template +void +LSQUnit::completeDataAccess(PacketPtr pkt) +{ + LSQSenderState *state = dynamic_cast(pkt->senderState); + DynInstPtr inst = state->inst; + DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum); +// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + + if (isSwitchedOut() || inst->isSquashed()) { + delete state; + delete pkt; return; + } else { + if (!state->noWB) { + writeback(inst, pkt); + } + + if (inst->isStore()) { + completeStore(state->idx); + } } - lsqPtr->cpu->wakeCPU(); - - if (wb) - lsqPtr->completeDataAccess(storeIdx); - lsqPtr->completeStore(storeIdx); -*/ + delete state; + delete pkt; } template @@ -146,7 +138,8 @@ LSQUnit::DcachePort::recvRetry() template LSQUnit::LSQUnit() - : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), + : loads(0), stores(0), storesToWB(0), stalled(false), + isStoreBlocked(false), isLoadBlocked(false), loadBlockedHandled(false) { } @@ -176,9 +169,7 @@ LSQUnit::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - Port *mem_dport = params->mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); + mem = params->mem; memDepViolator = NULL; @@ -191,6 +182,10 @@ LSQUnit::setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; dcachePort = new DcachePort(cpu, this); + + Port *mem_dport = mem->getPort(""); + dcachePort->setPeer(mem_dport); + mem_dport->setPeer(dcachePort); } template @@ -446,7 +441,6 @@ LSQUnit::executeStore(DynInstPtr &store_inst) int load_idx = store_inst->lqIdx; Fault store_fault = store_inst->initiateAcc(); -// Fault store_fault = store_inst->execute(); if (storeQueue[store_idx].size == 0) { DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", @@ -562,6 +556,12 @@ LSQUnit::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { + if (isStoreBlocked) { + DPRINTF(LSQUnit, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + // Store didn't write any data so no need to write it back to // memory. if (storeQueue[storeWBIdx].size == 0) { @@ -571,13 +571,7 @@ LSQUnit::writebackStores() continue; } -/* - if (dcacheInterface && dcacheInterface->isBlocked()) { - DPRINTF(LSQUnit, "Unable to write back any more stores, cache" - " is blocked!\n"); - break; - } -*/ + ++usedPorts; if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { @@ -596,11 +590,18 @@ LSQUnit::writebackStores() assert(!inst->memData); inst->memData = new uint8_t[64]; - memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, req->getSize()); + memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, + req->getSize()); PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); data_pkt->dataStatic(inst->memData); + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = storeWBIdx; + state->inst = inst; + data_pkt->senderState = state; + DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", storeWBIdx, storeQueue[storeWBIdx].inst->readPC(), @@ -609,11 +610,8 @@ LSQUnit::writebackStores() if (!dcachePort->sendTiming(data_pkt)) { // Need to handle becoming blocked on a store. + isStoreBlocked = true; } else { - /* - StoreCompletionEvent *store_event = new - StoreCompletionEvent(storeWBIdx, NULL, this); - */ if (isStalled() && storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " @@ -623,18 +621,13 @@ LSQUnit::writebackStores() stallingStoreIsn = 0; iewStage->replayMemInst(loadQueue[stallingLoadIdx]); } -/* - typename LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { - // Stx_C should not generate a system port transaction - // if it misses in the cache, but that might be hard - // to accomplish without explicit cache support. - wb = new typename - LdWritebackEvent(storeQueue[storeWBIdx].inst, - iewStage); - store_event->wbEvent = wb; + + if (!(req->getFlags() & LOCKED)) { + assert(!storeQueue[storeWBIdx].inst->isStoreConditional()); + // Non-store conditionals do not need a writeback. + state->noWB = true; } -*/ + if (data_pkt->result != Packet::Success) { DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n", storeWBIdx); @@ -759,6 +752,31 @@ LSQUnit::squash(const InstSeqNum &squashed_num) } } +template +void +LSQUnit::writeback(DynInstPtr &inst, PacketPtr pkt) +{ + iewStage->wakeCPU(); + + // Squashed instructions do not need to complete their access. + if (inst->isSquashed()) { + assert(!inst->isStore()); + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Complete access to copy data to proper place. + inst->completeAcc(pkt); + } + + // Need to insert instruction into queue to commit + iewStage->instToCommit(inst); + + iewStage->activityThisCycle(); +} + template void LSQUnit::completeStore(int store_idx) diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index 1284361cc..a95103266 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -37,6 +37,7 @@ // AlphaSimpleImpl. template class MemDepUnit; +#ifdef DEBUG template <> int MemDepUnit::MemDepEntry::memdep_count = 0; @@ -46,3 +47,4 @@ MemDepUnit::MemDepEntry::memdep_insert = 0; template <> int MemDepUnit::MemDepEntry::memdep_erase = 0; +#endif diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index 50ad1e2c8..16f67a4e0 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -61,7 +61,9 @@ MemDepUnit::~MemDepUnit() } } +#ifdef DEBUG assert(MemDepEntry::memdep_count == 0); +#endif } template @@ -143,7 +145,9 @@ MemDepUnit::insert(DynInstPtr &inst) // Add the MemDepEntry to the hash. memDepHash.insert( std::pair(inst->seqNum, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif instList[tid].push_back(inst); @@ -229,7 +233,9 @@ MemDepUnit::insertNonSpec(DynInstPtr &inst) // Insert the MemDepEntry into the hash. memDepHash.insert( std::pair(inst->seqNum, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif // Add the instruction to the list. instList[tid].push_back(inst); @@ -277,7 +283,9 @@ MemDepUnit::insertBarrier(DynInstPtr &barr_inst) // Add the MemDepEntry to the hash. memDepHash.insert( std::pair(barr_sn, inst_entry)); +#ifdef DEBUG MemDepEntry::memdep_insert++; +#endif // Add the instruction to the instruction list. instList[tid].push_back(barr_inst); @@ -377,7 +385,9 @@ MemDepUnit::completed(DynInstPtr &inst) (*hash_it).second = NULL; memDepHash.erase(hash_it); +#ifdef DEBUG MemDepEntry::memdep_erase++; +#endif } template @@ -472,7 +482,9 @@ MemDepUnit::squash(const InstSeqNum &squashed_num, (*hash_it).second = NULL; memDepHash.erase(hash_it); +#ifdef DEBUG MemDepEntry::memdep_erase++; +#endif instList[tid].erase(squash_it--); } @@ -553,5 +565,7 @@ MemDepUnit::dumpLists() cprintf("Memory dependence hash size: %i\n", memDepHash.size()); +#ifdef DEBUG cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count); +#endif } diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc index 0023cee36..2d28b617f 100644 --- a/src/cpu/o3/store_set.cc +++ b/src/cpu/o3/store_set.cc @@ -29,6 +29,7 @@ */ #include "base/intmath.hh" +#include "base/misc.hh" #include "base/trace.hh" #include "cpu/o3/store_set.hh" diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh index dfb1530d0..3fa60f093 100644 --- a/src/cpu/o3/thread_state.hh +++ b/src/cpu/o3/thread_state.hh @@ -86,14 +86,9 @@ struct O3ThreadState : public ThreadState { inSyscall(0), trapPending(0) { } #else - O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) - : ThreadState(-1, _thread_num, NULL, _process, _asid), - cpu(_cpu), inSyscall(0), trapPending(0) - { } - - O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, - int _asid) - : ThreadState(-1, _thread_num, _mem, NULL, _asid), + O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid, + MemObject *mem) + : ThreadState(-1, _thread_num, mem, _process, _asid), cpu(_cpu), inSyscall(0), trapPending(0) { } #endif diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh index e09cb12fd..a96884d5b 100644 --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -31,6 +31,10 @@ #include "cpu/exec_context.hh" +#if !FULL_SYSTEM +#include "mem/translating_port.hh" +#endif + #if FULL_SYSTEM class EndQuiesceEvent; class FunctionProfile; @@ -51,17 +55,27 @@ class Process; */ struct ThreadState { #if FULL_SYSTEM - ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem) - : cpuId(_cpuId), tid(_tid), mem(_mem), lastActivate(0), lastSuspend(0), + ThreadState(int _cpuId, int _tid) + : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0), profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL) #else - ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem, + ThreadState(int _cpuId, int _tid, MemObject *mem, Process *_process, short _asid) - : cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid) + : cpuId(_cpuId), tid(_tid), process(_process), asid(_asid) #endif { funcExeInst = 0; storeCondFailures = 0; +#if !FULL_SYSTEM + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + port = new TranslatingPort(csprintf("%d-funcport", + tid), + process->pTable, false); + mem_port = mem->getPort("functional"); + mem_port->setPeer(port); + port->setPeer(mem_port); +#endif } ExecContext::Status status; @@ -79,8 +93,6 @@ struct ThreadState { Counter numLoad; Counter startNumLoad; - FunctionalMemory *mem; // functional storage for process address space - #if FULL_SYSTEM Tick lastActivate; Tick lastSuspend; @@ -93,6 +105,8 @@ struct ThreadState { Kernel::Statistics *kernelStats; #else + TranslatingPort *port; + Process *process; // Address space ID. Note that this is used for TIMING cache diff --git a/src/python/m5/objects/AlphaFullCPU.py b/src/python/m5/objects/AlphaFullCPU.py index 043c3c08f..7c772d3f2 100644 --- a/src/python/m5/objects/AlphaFullCPU.py +++ b/src/python/m5/objects/AlphaFullCPU.py @@ -6,9 +6,6 @@ class DerivAlphaFullCPU(BaseCPU): activity = Param.Unsigned("Initial count") numThreads = Param.Unsigned("number of HW thread contexts") - if not build_env['FULL_SYSTEM']: - mem = Param.FunctionalMemory(NULL, "memory") - checker = Param.BaseCPU(NULL, "checker") cachePorts = Param.Unsigned("Cache Ports")