diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index b62372f66..af1a91a62 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -728,8 +728,10 @@ decode OPCODE default Unknown::unknown() { 0: OpcdecFault::hw_st_quad(); 1: decode HW_LDST_QUAD { format HwLoad { - 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L); - 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q); + 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, + L, IsSerializing, IsSerializeBefore); + 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, + Q, IsSerializing, IsSerializeBefore); } } } @@ -740,9 +742,9 @@ decode OPCODE default Unknown::unknown() { 1: decode HW_LDST_COND { 0: decode HW_LDST_QUAD { 0: hw_st({{ EA = (Rb + disp) & ~3; }}, - {{ Mem.ul = Ra<31:0>; }}, L); + {{ Mem.ul = Ra<31:0>; }}, L, IsSerializing, IsSerializeBefore); 1: hw_st({{ EA = (Rb + disp) & ~7; }}, - {{ Mem.uq = Ra.uq; }}, Q); + {{ Mem.uq = Ra.uq; }}, Q, IsSerializing, IsSerializeBefore); } 1: FailUnimpl::hw_st_cond(); diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 3e0be6ad8..4dccee0d3 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -226,7 +226,8 @@ BaseCPU::startup() #endif if (params->progress_interval) { - new CPUProgressEvent(&mainEventQueue, params->progress_interval, + new CPUProgressEvent(&mainEventQueue, + cycles(params->progress_interval), this); } } diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 9ccdcdccc..6c6d90076 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -171,15 +171,15 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The kind of fault this instruction has generated. */ Fault fault; - /** The memory request. */ - Request *req; - /** Pointer to the data for the memory access. */ uint8_t *memData; /** The effective virtual address (lds & stores only). */ Addr effAddr; + /** Is the effective virtual address valid. */ + bool effAddrValid; + /** The effective physical address. */ Addr physEffAddr; @@ -601,12 +601,18 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns whether or not this instruction is ready to issue. */ bool readyToIssue() const { return status[CanIssue]; } + /** Clears this instruction being able to issue. */ + void clearCanIssue() { status.reset(CanIssue); } + /** Sets this instruction as issued from the IQ. */ void setIssued() { status.set(Issued); } /** Returns whether or not this instruction has issued. */ bool isIssued() const { return status[Issued]; } + /** Clears this instruction as being issued. */ + void clearIssued() { status.reset(Issued); } + /** Sets this instruction as executed. */ void setExecuted() { status.set(Executed); } @@ -729,6 +735,12 @@ class BaseDynInst : public FastAlloc, public RefCounted */ bool eaCalcDone; + /** Is this instruction's memory access uncacheable. */ + bool isUncacheable; + + /** Has this instruction generated a memory request. */ + bool reqMade; + public: /** Sets the effective address. */ void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } @@ -745,6 +757,12 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Whether or not the memory operation is done. */ bool memOpDone; + /** Is this instruction's memory access uncacheable. */ + bool uncacheable() { return isUncacheable; } + + /** Has this instruction generated a memory request. */ + bool hasRequest() { return reqMade; } + public: /** Load queue index. */ int16_t lqIdx; @@ -776,25 +794,25 @@ template inline Fault BaseDynInst::read(Addr addr, T &data, unsigned flags) { - // Sometimes reads will get retried, so they may come through here - // twice. - if (!req) { - req = new Request(); - req->setVirt(asid, addr, sizeof(T), flags, this->PC); - req->setThreadContext(thread->readCpuId(), threadNumber); - } else { - assert(addr == req->getVaddr()); - } + reqMade = true; + Request *req = new Request(); + req->setVirt(asid, addr, sizeof(T), flags, this->PC); + req->setThreadContext(thread->readCpuId(), threadNumber); if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() > TheISA::VMPageSize) { + delete req; return TheISA::genAlignmentFault(); } fault = cpu->translateDataReadReq(req, thread); + if (req->isUncacheable()) + isUncacheable = true; + if (fault == NoFault) { effAddr = req->getVaddr(); + effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); @@ -817,6 +835,7 @@ BaseDynInst::read(Addr addr, T &data, unsigned flags) // Commit will have to clean up whatever happened. Set this // instruction as executed. this->setExecuted(); + delete req; } if (traceData) { @@ -837,21 +856,25 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) traceData->setData(data); } - assert(req == NULL); - - req = new Request(); + reqMade = true; + Request *req = new Request(); req->setVirt(asid, addr, sizeof(T), flags, this->PC); req->setThreadContext(thread->readCpuId(), threadNumber); if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() > TheISA::VMPageSize) { + delete req; return TheISA::genAlignmentFault(); } fault = cpu->translateDataWriteReq(req, thread); + if (req->isUncacheable()) + isUncacheable = true; + if (fault == NoFault) { effAddr = req->getVaddr(); + effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); #if 0 @@ -863,12 +886,8 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) #else fault = cpu->write(req, data, sqIdx); #endif - } - - if (res) { - // always return some result to keep misspeculated paths - // (which will ignore faults) deterministic - *res = (fault == NoFault) ? req->getExtraData() : 0; + } else { + delete req; } return fault; diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index c3d71e428..a1c866336 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -92,11 +92,13 @@ template void BaseDynInst::initVars() { - req = NULL; memData = NULL; effAddr = 0; + effAddrValid = false; physEffAddr = 0; + isUncacheable = false; + reqMade = false; readyRegs = 0; instResult.integer = 0; @@ -140,10 +142,6 @@ BaseDynInst::initVars() template BaseDynInst::~BaseDynInst() { - if (req) { - delete req; - } - if (memData) { delete [] memData; } @@ -271,7 +269,7 @@ void BaseDynInst::markSrcRegReady() { if (++readyRegs == numSrcRegs()) { - status.set(CanIssue); + setCanIssue(); } } diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 5a375a4b8..34754d3c5 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param clock; Param phase; Param numThreads; +Param cpu_id; Param activity; #if FULL_SYSTEM SimObjectParam system; -Param cpu_id; SimObjectParam itb; SimObjectParam dtb; Param profile; @@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(numThreads, "number of HW thread contexts"), + INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM_DFLT(activity, "Initial activity count", 0), #if FULL_SYSTEM INIT_PARAM(system, "System object"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(itb, "Instruction translation buffer"), INIT_PARAM(dtb, "Data translation buffer"), INIT_PARAM(profile, ""), @@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU) AlphaSimpleParams *params = new AlphaSimpleParams; params->clock = clock; + params->phase = phase; params->name = getInstanceName(); params->numberOfThreads = actual_num_threads; + params->cpu_id = cpu_id; params->activity = activity; #if FULL_SYSTEM params->system = system; - params->cpu_id = cpu_id; params->itb = itb; params->dtb = dtb; params->profile = profile; diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh index b91972704..304ee6c38 100644 --- a/src/cpu/o3/alpha/cpu_impl.hh +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -114,6 +114,7 @@ AlphaO3CPU::AlphaO3CPU(Params *params) : FullO3CPU(params) #endif // Give the thread the TC. this->thread[i]->tc = tc; + this->thread[i]->setCpuId(params->cpu_id); // Add the TC to the CPU's list of TC's. this->threadContexts.push_back(tc); diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 0d7d82529..e2ad23954 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -247,6 +247,11 @@ class DefaultCommit /** Handles squashing due to an TC write. */ void squashFromTC(unsigned tid); +#if FULL_SYSTEM + /** Handles processing an interrupt. */ + void handleInterrupt(); +#endif // FULL_SYSTEM + /** Commits as many instructions as possible. */ void commitInsts(); @@ -409,6 +414,16 @@ class DefaultCommit /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; + /** Records if there is a trap currently in flight. */ + bool trapInFlight[Impl::MaxThreads]; + + /** Records if there were any stores committed this cycle. */ + bool committedStores[Impl::MaxThreads]; + + /** Records if commit should check if the ROB is truly empty (see + commit_impl.hh). */ + bool checkEmptyROB[Impl::MaxThreads]; + /** Pointer to the list of active threads. */ std::list *activeThreads; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 18fb2aaa3..c17c8836d 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -118,6 +118,9 @@ DefaultCommit::DefaultCommit(Params *params) for (int i=0; i < numThreads; i++) { commitStatus[i] = Idle; changedROBNumEntries[i] = false; + checkEmptyROB[i] = false; + trapInFlight[i] = false; + committedStores[i] = false; trapSquash[i] = false; tcSquash[i] = false; PC[i] = nextPC[i] = nextNPC[i] = 0; @@ -335,6 +338,7 @@ DefaultCommit::initStage() for (int i=0; i < numThreads; i++) { toIEW->commitInfo[i].usedROB = true; toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i); + toIEW->commitInfo[i].emptyROB = true; } cpu->activityThisCycle(); @@ -473,14 +477,14 @@ DefaultCommit::generateTrapEvent(unsigned tid) TrapEvent *trap = new TrapEvent(this, tid); trap->schedule(curTick + trapLatency); - - thread[tid]->trapPending = true; + trapInFlight[tid] = true; } template void DefaultCommit::generateTCEvent(unsigned tid) { + assert(!trapInFlight[tid]); DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid); tcSquash[tid] = true; @@ -495,7 +499,7 @@ DefaultCommit::squashAll(unsigned tid) // Hopefully this doesn't mess things up. Basically I want to squash // all instructions of this thread. InstSeqNum squashed_inst = rob->isEmpty() ? - 0 : rob->readHeadInst(tid)->seqNum - 1;; + 0 : rob->readHeadInst(tid)->seqNum - 1; // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB (0 in this case. @@ -532,6 +536,7 @@ DefaultCommit::squashFromTrap(unsigned tid) thread[tid]->trapPending = false; thread[tid]->inSyscall = false; + trapInFlight[tid] = false; trapSquash[tid] = false; @@ -580,6 +585,10 @@ DefaultCommit::tick() while (threads != end) { unsigned tid = *threads++; + // Clear the bit saying if the thread has committed stores + // this cycle. + committedStores[tid] = false; + if (commitStatus[tid] == ROBSquashing) { if (rob->isDoneSquashing(tid)) { @@ -635,16 +644,11 @@ DefaultCommit::tick() updateStatus(); } +#if FULL_SYSTEM template void -DefaultCommit::commit() +DefaultCommit::handleInterrupt() { - - ////////////////////////////////////// - // Check for interrupts - ////////////////////////////////////// - -#if FULL_SYSTEM if (interrupt != NoFault) { // Wait until the ROB is empty and all stores have drained in // order to enter the interrupt. @@ -653,6 +657,12 @@ DefaultCommit::commit() // an interrupt needed to be handled. DPRINTF(Commit, "Interrupt detected.\n"); + Fault new_interrupt = cpu->getInterrupts(); + assert(new_interrupt == interrupt); + + // Clear the interrupt now that it's going to be handled + toIEW->commitInfo[0].clearInterrupt = true; + assert(!thread[0]->inSyscall); thread[0]->inSyscall = true; @@ -666,16 +676,14 @@ DefaultCommit::commit() // Generate trap squash event. generateTrapEvent(0); - // Clear the interrupt now that it's been handled - toIEW->commitInfo[0].clearInterrupt = true; interrupt = NoFault; } else { DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n"); } - } else if (cpu->check_interrupts(cpu->tcBase(0)) && - commitStatus[0] != TrapPending && - !trapSquash[0] && - !tcSquash[0]) { + } else if (commitStatus[0] != TrapPending && + cpu->check_interrupts(cpu->tcBase(0)) && + !trapSquash[0] && + !tcSquash[0]) { // Process interrupts if interrupts are enabled, not in PAL // mode, and no other traps or external squashes are currently // pending. @@ -691,7 +699,21 @@ DefaultCommit::commit() toIEW->commitInfo[0].interruptPending = true; } } +} +#endif // FULL_SYSTEM +template +void +DefaultCommit::commit() +{ + +#if FULL_SYSTEM + // Check for any interrupt, and start processing it. Or if we + // have an outstanding interrupt and are at a point when it is + // valid to take an interrupt, process it. + if (cpu->check_interrupts(cpu->tcBase(0))) { + handleInterrupt(); + } #endif // FULL_SYSTEM //////////////////////////////////// @@ -709,6 +731,7 @@ DefaultCommit::commit() assert(!tcSquash[tid]); squashFromTrap(tid); } else if (tcSquash[tid] == true) { + assert(commitStatus[tid] != TrapPending); squashFromTC(tid); } @@ -753,6 +776,7 @@ DefaultCommit::commit() bdelay_done_seq_num--; #endif } + // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; @@ -817,13 +841,29 @@ DefaultCommit::commit() toIEW->commitInfo[tid].usedROB = true; toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); - if (rob->isEmpty(tid)) { - toIEW->commitInfo[tid].emptyROB = true; - } - wroteToTimeBuffer = true; changedROBNumEntries[tid] = false; + if (rob->isEmpty(tid)) + checkEmptyROB[tid] = true; } + + // ROB is only considered "empty" for previous stages if: a) + // ROB is empty, b) there are no outstanding stores, c) IEW + // stage has received any information regarding stores that + // committed. + // c) is checked by making sure to not consider the ROB empty + // on the same cycle as when stores have been committed. + // @todo: Make this handle multi-cycle communication between + // commit and IEW. + if (checkEmptyROB[tid] && rob->isEmpty(tid) && + !iewStage->hasStoresToWB() && !committedStores[tid]) { + checkEmptyROB[tid] = false; + toIEW->commitInfo[tid].usedROB = true; + toIEW->commitInfo[tid].emptyROB = true; + toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); + wroteToTimeBuffer = true; + } + } } @@ -966,8 +1006,6 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->setAtCommit(); - if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || head_inst->isMemBarrier() || @@ -977,19 +1015,9 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) "instruction [sn:%lli] at the head of the ROB, PC %#x.\n", head_inst->seqNum, head_inst->readPC()); - // Hack to make sure syscalls/memory barriers/quiesces - // aren't executed until all stores write back their data. - // This direct communication shouldn't be used for - // anything other than this. - if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() || - head_inst->isQuiesce()) && - iewStage->hasStoresToWB()) - { + if (inst_num > 0 || iewStage->hasStoresToWB()) { DPRINTF(Commit, "Waiting for all stores to writeback.\n"); return false; - } else if (inst_num > 0 || iewStage->hasStoresToWB()) { - DPRINTF(Commit, "Waiting to become head of commit.\n"); - return false; } toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum; @@ -1002,6 +1030,12 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } else if (head_inst->isLoad()) { + if (inst_num > 0 || iewStage->hasStoresToWB()) { + DPRINTF(Commit, "Waiting for all stores to writeback.\n"); + return false; + } + + assert(head_inst->uncacheable()); DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n", head_inst->seqNum, head_inst->readPC()); @@ -1025,8 +1059,11 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) panic("Thread sync instructions are not handled yet.\n"); } + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = head_inst->getFault(); + // Stores mark themselves as completed. - if (!head_inst->isStore()) { + if (!head_inst->isStore() && inst_fault == NoFault) { head_inst->setCompleted(); } @@ -1038,9 +1075,6 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) } #endif - // Check if the instruction caused a fault. If so, trap. - Fault inst_fault = head_inst->getFault(); - // DTB will sometimes need the machine instruction for when // faults happen. So we will set it here, prior to the DTB // possibly needing it for its fault. @@ -1048,7 +1082,6 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) static_cast(head_inst->staticInst->machInst)); if (inst_fault != NoFault) { - head_inst->setCompleted(); DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", head_inst->seqNum, head_inst->readPC()); @@ -1057,6 +1090,8 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } + head_inst->setCompleted(); + #if USE_CHECKER if (cpu->checker && head_inst->isStore()) { cpu->checker->verify(head_inst); @@ -1082,6 +1117,13 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) commitStatus[tid] = TrapPending; + if (head_inst->traceData) { + head_inst->traceData->setFetchSeq(head_inst->seqNum); + head_inst->traceData->setCPSeq(thread[tid]->numInst); + head_inst->traceData->finalize(); + head_inst->traceData = NULL; + } + // Generate trap squash event. generateTrapEvent(tid); // warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC()); @@ -1123,6 +1165,10 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Finally clear the head ROB entry. rob->retireHead(tid); + // If this was a store, record it for this cycle. + if (head_inst->isStore()) + committedStores[tid] = true; + // Return true to indicate that we have committed an instruction. return true; } @@ -1167,7 +1213,8 @@ DefaultCommit::getInsts() int tid = inst->threadNumber; if (!inst->isSquashed() && - commitStatus[tid] != ROBSquashing) { + commitStatus[tid] != ROBSquashing && + commitStatus[tid] != TrapPending) { changedROBNumEntries[tid] = true; DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n", diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 38e6a0b5b..354e3c490 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -466,7 +466,7 @@ FullO3CPU::tick() lastRunningCycle = curTick; timesIdled++; } else { - tickEvent.schedule(curTick + cycles(1)); + tickEvent.schedule(nextCycle(curTick + cycles(1))); DPRINTF(O3CPU, "Scheduling next tick!\n"); } } @@ -886,7 +886,7 @@ FullO3CPU::resume() #endif if (!tickEvent.scheduled()) - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); _status = Running; } @@ -979,11 +979,11 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU) ThreadContext *tc = threadContexts[i]; if (tc->status() == ThreadContext::Active && _status != Running) { _status = Running; - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } } if (!tickEvent.scheduled()) - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } template @@ -1393,7 +1393,7 @@ FullO3CPU::wakeCPU() idleCycles += (curTick - 1) - lastRunningCycle; - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } template diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index ea374dd57..0ab20ba2a 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU void scheduleTickEvent(int delay) { if (tickEvent.squashed()) - tickEvent.reschedule(curTick + cycles(delay)); + tickEvent.reschedule(nextCycle(curTick + cycles(delay))); else if (!tickEvent.scheduled()) - tickEvent.schedule(curTick + cycles(delay)); + tickEvent.schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule tick event, regardless of its current state. */ @@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU { // Schedule thread to activate, regardless of its current state. if (activateThreadEvent[tid].squashed()) - activateThreadEvent[tid].reschedule(curTick + cycles(delay)); + activateThreadEvent[tid]. + reschedule(nextCycle(curTick + cycles(delay))); else if (!activateThreadEvent[tid].scheduled()) - activateThreadEvent[tid].schedule(curTick + cycles(delay)); + activateThreadEvent[tid]. + schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule actiavte thread event, regardless of its current state. */ @@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU { // Schedule thread to activate, regardless of its current state. if (deallocateContextEvent[tid].squashed()) - deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + deallocateContextEvent[tid]. + reschedule(nextCycle(curTick + cycles(delay))); else if (!deallocateContextEvent[tid].scheduled()) - deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + deallocateContextEvent[tid]. + schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule thread deallocation in CPU */ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 1256dd233..663cd3142 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -620,6 +620,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid fault = TheISA::genMachineCheckFault(); delete mem_req; memReq[tid] = NULL; + warn("Bad address!\n"); } assert(retryPkt == NULL); assert(retryTid == -1); @@ -670,11 +671,12 @@ DefaultFetch::doSquash(const Addr &new_PC, // Get rid of the retrying packet if it was from this thread. if (retryTid == tid) { assert(cacheBlocked); - cacheBlocked = false; - retryTid = -1; - delete retryPkt->req; - delete retryPkt; + if (retryPkt) { + delete retryPkt->req; + delete retryPkt; + } retryPkt = NULL; + retryTid = -1; } fetchStatus[tid] = Squashing; @@ -1150,7 +1152,7 @@ DefaultFetch::fetch(bool &status_change) ///FIXME This needs to be more robust in dealing with delay slots #if !ISA_HAS_DELAY_SLOT - predicted_branch |= +// predicted_branch |= #endif lookupAndUpdateNextPC(instruction, next_PC, next_NPC); predicted_branch |= (next_PC != fetch_NPC); @@ -1221,7 +1223,7 @@ DefaultFetch::fetch(bool &status_change) // until commit handles the fault. The only other way it can // wake up is if a squash comes along and changes the PC. #if FULL_SYSTEM - assert(numInst != fetchWidth); + assert(numInst < fetchWidth); // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); // We will use a nop in order to carry the fault. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index f24eaf2c4..4883e5a5c 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1152,19 +1152,6 @@ DefaultIEW::dispatchInsts(unsigned tid) // Same as non-speculative stores. inst->setCanCommit(); instQueue.insertBarrier(inst); - add_to_iq = false; - } else if (inst->isNonSpeculative()) { - DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " - "encountered, skipping.\n", tid); - - // Same as non-speculative stores. - inst->setCanCommit(); - - // Specifically insert it as nonspeculative. - instQueue.insertNonSpec(inst); - - ++iewDispNonSpecInsts; - add_to_iq = false; } else if (inst->isNop()) { DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, " @@ -1193,6 +1180,20 @@ DefaultIEW::dispatchInsts(unsigned tid) } else { add_to_iq = true; } + if (inst->isNonSpeculative()) { + DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " + "encountered, skipping.\n", tid); + + // Same as non-speculative stores. + inst->setCanCommit(); + + // Specifically insert it as nonspeculative. + instQueue.insertNonSpec(inst); + + ++iewDispNonSpecInsts; + + add_to_iq = false; + } // If the instruction queue is not full, then add the // instruction. @@ -1379,6 +1380,7 @@ DefaultIEW::executeInsts() predictedNotTakenIncorrect++; } } else if (ldstQueue.violation(tid)) { + assert(inst->isMemRef()); // If there was an ordering violation, then get the // DynInst that caused the violation. Note that this // clears the violation signal. @@ -1391,10 +1393,10 @@ DefaultIEW::executeInsts() // Ensure the violating instruction is older than // current squash - if (fetchRedirect[tid] && - violator->seqNum >= toCommit->squashedSeqNum[tid]) +/* if (fetchRedirect[tid] && + violator->seqNum >= toCommit->squashedSeqNum[tid] + 1) continue; - +*/ fetchRedirect[tid] = true; // Tell the instruction queue that a violation has occured. @@ -1414,6 +1416,33 @@ DefaultIEW::executeInsts() squashDueToMemBlocked(inst, tid); } + } else { + // Reset any state associated with redirects that will not + // be used. + if (ldstQueue.violation(tid)) { + assert(inst->isMemRef()); + + DynInstPtr violator = ldstQueue.getMemDepViolator(tid); + + DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + DPRINTF(IEW, "Violation will not be handled because " + "already squashing\n"); + + ++memOrderViolationEvents; + } + if (ldstQueue.loadBlocked(tid) && + !ldstQueue.isLoadBlockedHandled(tid)) { + DPRINTF(IEW, "Load operation couldn't execute because the " + "memory system is blocked. PC: %#x [sn:%lli]\n", + inst->readPC(), inst->seqNum); + DPRINTF(IEW, "Blocked load will not be handled because " + "already squashing\n"); + + ldstQueue.setLoadBlockedHandled(tid); + } + } } @@ -1563,6 +1592,7 @@ DefaultIEW::tick() //DPRINTF(IEW,"NonspecInst from thread %i",tid); if (fromCommit->commitInfo[tid].uncached) { instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad); + fromCommit->commitInfo[tid].uncachedLoad->setAtCommit(); } else { instQueue.scheduleNonSpec( fromCommit->commitInfo[tid].nonSpecSeqNum); diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index d5781d89d..79e03d4bf 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -829,6 +829,8 @@ InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) unsigned tid = (*inst_it).second->threadNumber; + (*inst_it).second->setAtCommit(); + (*inst_it).second->setCanIssue(); if (!(*inst_it).second->isMemRef()) { @@ -960,6 +962,8 @@ template void InstructionQueue::rescheduleMemInst(DynInstPtr &resched_inst) { + DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum); + resched_inst->clearCanIssue(); memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); } @@ -984,7 +988,6 @@ InstructionQueue::completeMemInst(DynInstPtr &completed_inst) completed_inst->memOpDone = true; memDepUnit[tid].completed(completed_inst); - count[tid]--; } @@ -1084,16 +1087,21 @@ InstructionQueue::doSquash(unsigned tid) ++iqSquashedOperandsExamined; } - } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) { + } else if (!squashed_inst->isStoreConditional() || + !squashed_inst->isCompleted()) { NonSpecMapIt ns_inst_it = nonSpecInsts.find(squashed_inst->seqNum); assert(ns_inst_it != nonSpecInsts.end()); + if (ns_inst_it == nonSpecInsts.end()) { + assert(squashed_inst->getFault() != NoFault); + } else { - (*ns_inst_it).second = NULL; + (*ns_inst_it).second = NULL; - nonSpecInsts.erase(ns_inst_it); + nonSpecInsts.erase(ns_inst_it); - ++iqSquashedNonSpecRemoved; + ++iqSquashedNonSpecRemoved; + } } // Might want to also clear out the head of the dependency graph. diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 2419afe29..1b10843f5 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -497,6 +497,11 @@ LSQUnit::read(Request *req, T &data, int load_idx) (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; + + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the proper + // place to really handle request deletes. + delete req; return TheISA::genMachineCheckFault(); } @@ -534,6 +539,10 @@ LSQUnit::read(Request *req, T &data, int load_idx) if (store_size == 0) continue; + else if (storeQueue[store_idx].inst->uncacheable()) + continue; + + assert(storeQueue[store_idx].inst->effAddrValid); // Check if the store data is within the lower and upper bounds of // addresses that the request needs. @@ -550,7 +559,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) storeQueue[store_idx].inst->effAddr; // If the store's data has all of the data needed, we can forward. - if (store_has_lower_limit && store_has_upper_limit) { + if ((store_has_lower_limit && store_has_upper_limit)) { // Get shift amount for offset into the store's data. int shift_amt = req->getVaddr() & (store_size - 1); // @todo: Magic number, assumes byte addressing @@ -596,6 +605,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) // If it's already been written back, then don't worry about // stalling on it. if (storeQueue[store_idx].completed) { + panic("Should not check one of these"); continue; } @@ -614,6 +624,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) // rescheduled eventually iewStage->rescheduleMemInst(load_inst); iewStage->decrWb(load_inst->seqNum); + load_inst->clearIssued(); ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not @@ -622,7 +633,11 @@ LSQUnit::read(Request *req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); - ++lsqBlockedLoads; + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the + // proper place to really handle request deletes. + delete req; + return NoFault; } } @@ -654,8 +669,11 @@ LSQUnit::read(Request *req, T &data, int load_idx) // Delete state and data packet because a load retry // initiates a pipeline restart; it does not retry. delete state; + delete data_pkt->req; delete data_pkt; + req = NULL; + if (result == Packet::BadAddress) { return TheISA::genMachineCheckFault(); } @@ -669,6 +687,9 @@ LSQUnit::read(Request *req, T &data, int load_idx) // If the cache was blocked, or has become blocked due to the access, // handle it. if (lsq->cacheBlocked()) { + if (req) + delete req; + ++lsqCacheBlocked; iewStage->decrWb(load_inst->seqNum); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 3ba22a530..e70c960b3 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -81,6 +81,7 @@ LSQUnit::completeDataAccess(PacketPtr pkt) if (isSwitchedOut() || inst->isSquashed()) { iewStage->decrWb(inst->seqNum); delete state; + delete pkt->req; delete pkt; return; } else { @@ -94,6 +95,7 @@ LSQUnit::completeDataAccess(PacketPtr pkt) } delete state; + delete pkt->req; delete pkt; } @@ -403,12 +405,15 @@ template Fault LSQUnit::executeLoad(DynInstPtr &inst) { + using namespace TheISA; // Execute a specific load. Fault load_fault = NoFault; DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", inst->readPC(),inst->seqNum); + assert(!inst->isSquashed()); + load_fault = inst->initiateAcc(); // If the instruction faulted, then we need to send it along to commit @@ -418,12 +423,44 @@ LSQUnit::executeLoad(DynInstPtr &inst) // realizes there is activity. // Mark it as executed unless it is an uncached load that // needs to hit the head of commit. - if (!(inst->req && inst->req->isUncacheable()) || + if (!(inst->hasRequest() && inst->uncacheable()) || inst->isAtCommit()) { inst->setExecuted(); } iewStage->instToCommit(inst); iewStage->activityThisCycle(); + } else if (!loadBlocked()) { + assert(inst->effAddrValid); + int load_idx = inst->lqIdx; + incrLdIdx(load_idx); + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if (loadQueue[load_idx]->effAddrValid && + (loadQueue[load_idx]->effAddr >> 8) == + (inst->effAddr >> 8)) { + // A load incorrectly passed this load. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + DynInstPtr violator = loadQueue[load_idx]; + if (!memDepViolator || + (violator->seqNum < memDepViolator->seqNum)) { + memDepViolator = violator; + } else { + break; + } + + ++lsqMemOrderViolation; + + return genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } } return load_fault; @@ -442,6 +479,8 @@ LSQUnit::executeStore(DynInstPtr &store_inst) DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n", store_inst->readPC(), store_inst->seqNum); + assert(!store_inst->isSquashed()); + // Check the recently completed loads to see if any match this store's // address. If so, then we have a memory ordering violation. int load_idx = store_inst->lqIdx; @@ -465,32 +504,36 @@ LSQUnit::executeStore(DynInstPtr &store_inst) ++storesToWB; } - if (!memDepViolator) { - while (load_idx != loadTail) { - // Really only need to check loads that have actually executed - // It's safe to check all loads because effAddr is set to - // InvalAddr when the dyn inst is created. + assert(store_inst->effAddrValid); + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + // It's safe to check all loads because effAddr is set to + // InvalAddr when the dyn inst is created. - // @todo: For now this is extra conservative, detecting a - // violation if the addresses match assuming all accesses - // are quad word accesses. + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. - // @todo: Fix this, magic number being used here - if ((loadQueue[load_idx]->effAddr >> 8) == - (store_inst->effAddr >> 8)) { - // A load incorrectly passed this store. Squash and refetch. - // For now return a fault to show that it was unsuccessful. - memDepViolator = loadQueue[load_idx]; - ++lsqMemOrderViolation; - - return genMachineCheckFault(); + // @todo: Fix this, magic number being used here + if (loadQueue[load_idx]->effAddrValid && + (loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + DynInstPtr violator = loadQueue[load_idx]; + if (!memDepViolator || + (violator->seqNum < memDepViolator->seqNum)) { + memDepViolator = violator; + } else { + break; } - incrLdIdx(load_idx); + ++lsqMemOrderViolation; + + return genMachineCheckFault(); } - // If we've reached this point, there was no violation. - memDepViolator = NULL; + incrLdIdx(load_idx); } return store_fault; @@ -660,7 +703,7 @@ LSQUnit::writebackStores() panic("LSQ sent out a bad address for a completed store!"); } // Need to handle becoming blocked on a store. - DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will" + DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" "retry later\n", inst->seqNum); isStoreBlocked = true; @@ -735,6 +778,10 @@ LSQUnit::squash(const InstSeqNum &squashed_num) } } + if (memDepViolator && squashed_num < memDepViolator->seqNum) { + memDepViolator = NULL; + } + int store_idx = storeTail; decrStIdx(store_idx); @@ -764,6 +811,11 @@ LSQUnit::squash(const InstSeqNum &squashed_num) storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the proper + // place to really handle request deletes. + delete storeQueue[store_idx].req; + storeQueue[store_idx].req = NULL; --stores; diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index f19980fd5..64558efaa 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -214,6 +214,9 @@ MemDepUnit::insert(DynInstPtr &inst) inst_entry->regsReady = true; } + // Clear the bit saying this instruction can issue. + inst->clearCanIssue(); + // Add this instruction to the list of dependents. store_entry->dependInsts.push_back(inst_entry); @@ -357,7 +360,6 @@ void MemDepUnit::replay(DynInstPtr &inst) { DynInstPtr temp_inst; - bool found_inst = false; // For now this replay function replays all waiting memory ops. while (!instsToReplay.empty()) { @@ -371,14 +373,8 @@ MemDepUnit::replay(DynInstPtr &inst) moveToReady(inst_entry); - if (temp_inst == inst) { - found_inst = true; - } - instsToReplay.pop_front(); } - - assert(found_inst); } template diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 620daf691..b436ec1c3 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg) // known that the prev reg was outside the range of normal registers // so the free list can avoid adding it. prev_reg = renamed_reg; - - assert(renamed_reg < numPhysicalRegs + numMiscRegs); } DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n", diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 4988df3c5..6e6ba2380 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -171,8 +171,12 @@ Bus::recvTiming(PacketPtr pkt) } short dest = pkt->getDest(); + + // Make sure to clear the snoop commit flag so it doesn't think an + // access has been handled twice. if (dest == Packet::Broadcast) { port = findPort(pkt->getAddr(), pkt->getSrc()); + pkt->flags &= ~SNOOP_COMMIT; if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) { bool success; diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 5c6ab0950..fc4660269 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -545,8 +545,13 @@ Cache::access(PacketPtr &pkt) //We are determining prefetches on access stream, call prefetcher prefetcher->handleMiss(pkt, curTick); } + + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); + if (!pkt->req->isUncacheable()) { - blk = handleAccess(pkt, lat, writebacks); + if (!missQueue->findMSHR(blk_addr)) { + blk = handleAccess(pkt, lat, writebacks); + } } else { size = pkt->getSize(); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 25b8fcbeb..24ca9cfa2 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -599,6 +599,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time) MemCmd cmd = mshr->getTarget()->cmd; mshr->pkt->setDest(Packet::Broadcast); mshr->pkt->result = Packet::Unknown; + mshr->pkt->req = mshr->getTarget()->req; mq.markPending(mshr, cmd); mshr->order = order++; cache->setMasterRequest(Request_MSHR, time); diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py index a66cd436e..5600d9f22 100644 --- a/tests/configs/o3-timing.py +++ b/tests/configs/o3-timing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2006 The Regents of The University of Michigan +# Copyright (c) 2006-2007 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -37,7 +37,7 @@ class MyCache(BaseCache): mshrs = 10 tgts_per_mshr = 5 -cpu = DerivO3CPU() +cpu = DerivO3CPU(cpu_id=0) cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), MyCache(size = '2MB'))