6dd996aabb
This patch fixes two problems with the O3 cpu model. The first is an issue with an instruction fetch causing a fault on the next address while the current macro-op is being issued. This happens when the micro-ops exceed the fetch bandwdith and then on the next cycle the fetch stage attempts to issue a request to the next line while it still has micro-ops to issue if the next line faults a fault is attached to a micro-op in the currently executing macro-op rather than a "nop" from the next instruction block. This leads to an instruction incorrectly faulting when on fetch when it had no reason to fault. A similar problem occurs with interrupts. When an interrupt occurs the fetch stage nominally stops issuing instructions immediately. This is incorrect in the case of a macro-op as the current location might not be interruptable.
542 lines
17 KiB
C++
542 lines
17 KiB
C++
/*
|
|
* Copyright (c) 2010 ARM Limited
|
|
* All rights reserved
|
|
*
|
|
* The license below extends only to copyright in the software and shall
|
|
* not be construed as granting a license to any other intellectual
|
|
* property including but not limited to intellectual property relating
|
|
* to a hardware implementation of the functionality of the software
|
|
* licensed hereunder. You may use the software subject to the license
|
|
* terms below provided that you ensure that this notice is replicated
|
|
* unmodified and in its entirety in all distributions of the software,
|
|
* modified or unmodified, in source code or in binary form.
|
|
*
|
|
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Authors: Kevin Lim
|
|
* Korey Sewell
|
|
*/
|
|
|
|
#ifndef __CPU_O3_FETCH_HH__
|
|
#define __CPU_O3_FETCH_HH__
|
|
|
|
#include "arch/predecoder.hh"
|
|
#include "arch/utility.hh"
|
|
#include "base/statistics.hh"
|
|
#include "config/the_isa.hh"
|
|
#include "cpu/pc_event.hh"
|
|
#include "cpu/timebuf.hh"
|
|
#include "cpu/translation.hh"
|
|
#include "mem/packet.hh"
|
|
#include "mem/port.hh"
|
|
#include "sim/eventq.hh"
|
|
|
|
class DerivO3CPUParams;
|
|
|
|
/**
|
|
* DefaultFetch class handles both single threaded and SMT fetch. Its
|
|
* width is specified by the parameters; each cycle it tries to fetch
|
|
* that many instructions. It supports using a branch predictor to
|
|
* predict direction and targets.
|
|
* It supports the idling functionality of the CPU by indicating to
|
|
* the CPU when it is active and inactive.
|
|
*/
|
|
template <class Impl>
|
|
class DefaultFetch
|
|
{
|
|
public:
|
|
/** Typedefs from Impl. */
|
|
typedef typename Impl::CPUPol CPUPol;
|
|
typedef typename Impl::DynInst DynInst;
|
|
typedef typename Impl::DynInstPtr DynInstPtr;
|
|
typedef typename Impl::O3CPU O3CPU;
|
|
|
|
/** Typedefs from the CPU policy. */
|
|
typedef typename CPUPol::BPredUnit BPredUnit;
|
|
typedef typename CPUPol::FetchStruct FetchStruct;
|
|
typedef typename CPUPol::TimeStruct TimeStruct;
|
|
|
|
/** Typedefs from ISA. */
|
|
typedef TheISA::MachInst MachInst;
|
|
typedef TheISA::ExtMachInst ExtMachInst;
|
|
|
|
/** IcachePort class for DefaultFetch. Handles doing the
|
|
* communication with the cache/memory.
|
|
*/
|
|
class IcachePort : public Port
|
|
{
|
|
protected:
|
|
/** Pointer to fetch. */
|
|
DefaultFetch<Impl> *fetch;
|
|
|
|
public:
|
|
/** Default constructor. */
|
|
IcachePort(DefaultFetch<Impl> *_fetch)
|
|
: Port(_fetch->name() + "-iport", _fetch->cpu), fetch(_fetch)
|
|
{ }
|
|
|
|
bool snoopRangeSent;
|
|
|
|
virtual void setPeer(Port *port);
|
|
|
|
protected:
|
|
/** Atomic version of receive. Panics. */
|
|
virtual Tick recvAtomic(PacketPtr pkt);
|
|
|
|
/** Functional version of receive. Panics. */
|
|
virtual void recvFunctional(PacketPtr pkt);
|
|
|
|
/** Receives status change. Other than range changing, panics. */
|
|
virtual void recvStatusChange(Status status);
|
|
|
|
/** Returns the address ranges of this device. */
|
|
virtual void getDeviceAddressRanges(AddrRangeList &resp,
|
|
bool &snoop)
|
|
{ resp.clear(); snoop = true; }
|
|
|
|
/** Timing version of receive. Handles setting fetch to the
|
|
* proper status to start fetching. */
|
|
virtual bool recvTiming(PacketPtr pkt);
|
|
|
|
/** Handles doing a retry of a failed fetch. */
|
|
virtual void recvRetry();
|
|
};
|
|
|
|
class FetchTranslation : public BaseTLB::Translation
|
|
{
|
|
protected:
|
|
DefaultFetch<Impl> *fetch;
|
|
|
|
public:
|
|
FetchTranslation(DefaultFetch<Impl> *_fetch)
|
|
: fetch(_fetch)
|
|
{}
|
|
|
|
void
|
|
markDelayed()
|
|
{}
|
|
|
|
void
|
|
finish(Fault fault, RequestPtr req, ThreadContext *tc,
|
|
BaseTLB::Mode mode)
|
|
{
|
|
assert(mode == BaseTLB::Execute);
|
|
fetch->finishTranslation(fault, req);
|
|
delete this;
|
|
}
|
|
};
|
|
|
|
public:
|
|
/** Overall fetch status. Used to determine if the CPU can
|
|
* deschedule itsef due to a lack of activity.
|
|
*/
|
|
enum FetchStatus {
|
|
Active,
|
|
Inactive
|
|
};
|
|
|
|
/** Individual thread status. */
|
|
enum ThreadStatus {
|
|
Running,
|
|
Idle,
|
|
Squashing,
|
|
Blocked,
|
|
Fetching,
|
|
TrapPending,
|
|
QuiescePending,
|
|
SwitchOut,
|
|
ItlbWait,
|
|
IcacheWaitResponse,
|
|
IcacheWaitRetry,
|
|
IcacheAccessComplete
|
|
};
|
|
|
|
/** Fetching Policy, Add new policies here.*/
|
|
enum FetchPriority {
|
|
SingleThread,
|
|
RoundRobin,
|
|
Branch,
|
|
IQ,
|
|
LSQ
|
|
};
|
|
|
|
private:
|
|
/** Fetch status. */
|
|
FetchStatus _status;
|
|
|
|
/** Per-thread status. */
|
|
ThreadStatus fetchStatus[Impl::MaxThreads];
|
|
|
|
/** Fetch policy. */
|
|
FetchPriority fetchPolicy;
|
|
|
|
/** List that has the threads organized by priority. */
|
|
std::list<ThreadID> priorityList;
|
|
|
|
public:
|
|
/** DefaultFetch constructor. */
|
|
DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params);
|
|
|
|
/** Returns the name of fetch. */
|
|
std::string name() const;
|
|
|
|
/** Registers statistics. */
|
|
void regStats();
|
|
|
|
/** Returns the icache port. */
|
|
Port *getIcachePort() { return icachePort; }
|
|
|
|
/** Sets the main backwards communication time buffer pointer. */
|
|
void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer);
|
|
|
|
/** Sets pointer to list of active threads. */
|
|
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
|
|
|
/** Sets pointer to time buffer used to communicate to the next stage. */
|
|
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
|
|
|
|
/** Initialize stage. */
|
|
void initStage();
|
|
|
|
/** Tells the fetch stage that the Icache is set. */
|
|
void setIcache();
|
|
|
|
/** Processes cache completion event. */
|
|
void processCacheCompletion(PacketPtr pkt);
|
|
|
|
/** Begins the drain of the fetch stage. */
|
|
bool drain();
|
|
|
|
/** Resumes execution after a drain. */
|
|
void resume();
|
|
|
|
/** Tells fetch stage to prepare to be switched out. */
|
|
void switchOut();
|
|
|
|
/** Takes over from another CPU's thread. */
|
|
void takeOverFrom();
|
|
|
|
/** Checks if the fetch stage is switched out. */
|
|
bool isSwitchedOut() { return switchedOut; }
|
|
|
|
/** Tells fetch to wake up from a quiesce instruction. */
|
|
void wakeFromQuiesce();
|
|
|
|
private:
|
|
/** Changes the status of this stage to active, and indicates this
|
|
* to the CPU.
|
|
*/
|
|
inline void switchToActive();
|
|
|
|
/** Changes the status of this stage to inactive, and indicates
|
|
* this to the CPU.
|
|
*/
|
|
inline void switchToInactive();
|
|
|
|
/**
|
|
* Looks up in the branch predictor to see if the next PC should be
|
|
* either next PC+=MachInst or a branch target.
|
|
* @param next_PC Next PC variable passed in by reference. It is
|
|
* expected to be set to the current PC; it will be updated with what
|
|
* the next PC will be.
|
|
* @param next_NPC Used for ISAs which use delay slots.
|
|
* @return Whether or not a branch was predicted as taken.
|
|
*/
|
|
bool lookupAndUpdateNextPC(DynInstPtr &inst, TheISA::PCState &pc);
|
|
|
|
/**
|
|
* Fetches the cache line that contains fetch_PC. Returns any
|
|
* fault that happened. Puts the data into the class variable
|
|
* cacheData.
|
|
* @param vaddr The memory address that is being fetched from.
|
|
* @param ret_fault The fault reference that will be set to the result of
|
|
* the icache access.
|
|
* @param tid Thread id.
|
|
* @param pc The actual PC of the current instruction.
|
|
* @return Any fault that occured.
|
|
*/
|
|
bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc);
|
|
void finishTranslation(Fault fault, RequestPtr mem_req);
|
|
|
|
|
|
/** Check if an interrupt is pending and that we need to handle
|
|
*/
|
|
bool
|
|
checkInterrupt(Addr pc)
|
|
{
|
|
return (interruptPending && (THE_ISA != ALPHA_ISA || !(pc & 0x3)));
|
|
}
|
|
|
|
/** Squashes a specific thread and resets the PC. */
|
|
inline void doSquash(const TheISA::PCState &newPC, ThreadID tid);
|
|
|
|
/** Squashes a specific thread and resets the PC. Also tells the CPU to
|
|
* remove any instructions between fetch and decode that should be sqaushed.
|
|
*/
|
|
void squashFromDecode(const TheISA::PCState &newPC,
|
|
const InstSeqNum &seq_num, ThreadID tid);
|
|
|
|
/** Checks if a thread is stalled. */
|
|
bool checkStall(ThreadID tid) const;
|
|
|
|
/** Updates overall fetch stage status; to be called at the end of each
|
|
* cycle. */
|
|
FetchStatus updateFetchStatus();
|
|
|
|
public:
|
|
/** Squashes a specific thread and resets the PC. Also tells the CPU to
|
|
* remove any instructions that are not in the ROB. The source of this
|
|
* squash should be the commit stage.
|
|
*/
|
|
void squash(const TheISA::PCState &newPC, const InstSeqNum &seq_num,
|
|
DynInstPtr &squashInst, ThreadID tid);
|
|
|
|
/** Ticks the fetch stage, processing all inputs signals and fetching
|
|
* as many instructions as possible.
|
|
*/
|
|
void tick();
|
|
|
|
/** Checks all input signals and updates the status as necessary.
|
|
* @return: Returns if the status has changed due to input signals.
|
|
*/
|
|
bool checkSignalsAndUpdate(ThreadID tid);
|
|
|
|
/** Does the actual fetching of instructions and passing them on to the
|
|
* next stage.
|
|
* @param status_change fetch() sets this variable if there was a status
|
|
* change (ie switching to IcacheMissStall).
|
|
*/
|
|
void fetch(bool &status_change);
|
|
|
|
/** Align a PC to the start of an I-cache block. */
|
|
Addr icacheBlockAlignPC(Addr addr)
|
|
{
|
|
return (addr & ~(cacheBlkMask));
|
|
}
|
|
|
|
private:
|
|
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
|
|
StaticInstPtr curMacroop, TheISA::PCState thisPC,
|
|
TheISA::PCState nextPC, bool trace);
|
|
|
|
/** Handles retrying the fetch access. */
|
|
void recvRetry();
|
|
|
|
/** Returns the appropriate thread to fetch, given the fetch policy. */
|
|
ThreadID getFetchingThread(FetchPriority &fetch_priority);
|
|
|
|
/** Returns the appropriate thread to fetch using a round robin policy. */
|
|
ThreadID roundRobin();
|
|
|
|
/** Returns the appropriate thread to fetch using the IQ count policy. */
|
|
ThreadID iqCount();
|
|
|
|
/** Returns the appropriate thread to fetch using the LSQ count policy. */
|
|
ThreadID lsqCount();
|
|
|
|
/** Returns the appropriate thread to fetch using the branch count
|
|
* policy. */
|
|
ThreadID branchCount();
|
|
|
|
private:
|
|
/** Pointer to the O3CPU. */
|
|
O3CPU *cpu;
|
|
|
|
/** Time buffer interface. */
|
|
TimeBuffer<TimeStruct> *timeBuffer;
|
|
|
|
/** Wire to get decode's information from backwards time buffer. */
|
|
typename TimeBuffer<TimeStruct>::wire fromDecode;
|
|
|
|
/** Wire to get rename's information from backwards time buffer. */
|
|
typename TimeBuffer<TimeStruct>::wire fromRename;
|
|
|
|
/** Wire to get iew's information from backwards time buffer. */
|
|
typename TimeBuffer<TimeStruct>::wire fromIEW;
|
|
|
|
/** Wire to get commit's information from backwards time buffer. */
|
|
typename TimeBuffer<TimeStruct>::wire fromCommit;
|
|
|
|
/** Internal fetch instruction queue. */
|
|
TimeBuffer<FetchStruct> *fetchQueue;
|
|
|
|
//Might be annoying how this name is different than the queue.
|
|
/** Wire used to write any information heading to decode. */
|
|
typename TimeBuffer<FetchStruct>::wire toDecode;
|
|
|
|
/** Icache interface. */
|
|
IcachePort *icachePort;
|
|
|
|
/** BPredUnit. */
|
|
BPredUnit branchPred;
|
|
|
|
/** Predecoder. */
|
|
TheISA::Predecoder predecoder;
|
|
|
|
TheISA::PCState pc[Impl::MaxThreads];
|
|
|
|
Addr fetchOffset[Impl::MaxThreads];
|
|
|
|
StaticInstPtr macroop[Impl::MaxThreads];
|
|
|
|
/** Can the fetch stage redirect from an interrupt on this instruction? */
|
|
bool delayedCommit[Impl::MaxThreads];
|
|
|
|
/** Memory request used to access cache. */
|
|
RequestPtr memReq[Impl::MaxThreads];
|
|
|
|
/** Variable that tracks if fetch has written to the time buffer this
|
|
* cycle. Used to tell CPU if there is activity this cycle.
|
|
*/
|
|
bool wroteToTimeBuffer;
|
|
|
|
/** Tracks how many instructions has been fetched this cycle. */
|
|
int numInst;
|
|
|
|
/** Source of possible stalls. */
|
|
struct Stalls {
|
|
bool decode;
|
|
bool rename;
|
|
bool iew;
|
|
bool commit;
|
|
};
|
|
|
|
/** Tracks which stages are telling fetch to stall. */
|
|
Stalls stalls[Impl::MaxThreads];
|
|
|
|
/** Decode to fetch delay, in ticks. */
|
|
unsigned decodeToFetchDelay;
|
|
|
|
/** Rename to fetch delay, in ticks. */
|
|
unsigned renameToFetchDelay;
|
|
|
|
/** IEW to fetch delay, in ticks. */
|
|
unsigned iewToFetchDelay;
|
|
|
|
/** Commit to fetch delay, in ticks. */
|
|
unsigned commitToFetchDelay;
|
|
|
|
/** The width of fetch in instructions. */
|
|
unsigned fetchWidth;
|
|
|
|
/** Is the cache blocked? If so no threads can access it. */
|
|
bool cacheBlocked;
|
|
|
|
/** The packet that is waiting to be retried. */
|
|
PacketPtr retryPkt;
|
|
|
|
/** The thread that is waiting on the cache to tell fetch to retry. */
|
|
ThreadID retryTid;
|
|
|
|
/** Cache block size. */
|
|
int cacheBlkSize;
|
|
|
|
/** Mask to get a cache block's address. */
|
|
Addr cacheBlkMask;
|
|
|
|
/** The cache line being fetched. */
|
|
uint8_t *cacheData[Impl::MaxThreads];
|
|
|
|
/** The PC of the cacheline that has been loaded. */
|
|
Addr cacheDataPC[Impl::MaxThreads];
|
|
|
|
/** Whether or not the cache data is valid. */
|
|
bool cacheDataValid[Impl::MaxThreads];
|
|
|
|
/** Size of instructions. */
|
|
int instSize;
|
|
|
|
/** Icache stall statistics. */
|
|
Counter lastIcacheStall[Impl::MaxThreads];
|
|
|
|
/** List of Active Threads */
|
|
std::list<ThreadID> *activeThreads;
|
|
|
|
/** Number of threads. */
|
|
ThreadID numThreads;
|
|
|
|
/** Number of threads that are actively fetching. */
|
|
ThreadID numFetchingThreads;
|
|
|
|
/** Thread ID being fetched. */
|
|
ThreadID threadFetched;
|
|
|
|
/** Checks if there is an interrupt pending. If there is, fetch
|
|
* must stop once it is not fetching PAL instructions.
|
|
*/
|
|
bool interruptPending;
|
|
|
|
/** Is there a drain pending. */
|
|
bool drainPending;
|
|
|
|
/** Records if fetch is switched out. */
|
|
bool switchedOut;
|
|
|
|
// @todo: Consider making these vectors and tracking on a per thread basis.
|
|
/** Stat for total number of cycles stalled due to an icache miss. */
|
|
Stats::Scalar icacheStallCycles;
|
|
/** Stat for total number of fetched instructions. */
|
|
Stats::Scalar fetchedInsts;
|
|
/** Total number of fetched branches. */
|
|
Stats::Scalar fetchedBranches;
|
|
/** Stat for total number of predicted branches. */
|
|
Stats::Scalar predictedBranches;
|
|
/** Stat for total number of cycles spent fetching. */
|
|
Stats::Scalar fetchCycles;
|
|
/** Stat for total number of cycles spent squashing. */
|
|
Stats::Scalar fetchSquashCycles;
|
|
/** Stat for total number of cycles spent waiting for translation */
|
|
Stats::Scalar fetchTlbCycles;
|
|
/** Stat for total number of cycles spent blocked due to other stages in
|
|
* the pipeline.
|
|
*/
|
|
Stats::Scalar fetchIdleCycles;
|
|
/** Total number of cycles spent blocked. */
|
|
Stats::Scalar fetchBlockedCycles;
|
|
/** Total number of cycles spent in any other state. */
|
|
Stats::Scalar fetchMiscStallCycles;
|
|
/** Stat for total number of fetched cache lines. */
|
|
Stats::Scalar fetchedCacheLines;
|
|
/** Total number of outstanding icache accesses that were dropped
|
|
* due to a squash.
|
|
*/
|
|
Stats::Scalar fetchIcacheSquashes;
|
|
/** Total number of outstanding tlb accesses that were dropped
|
|
* due to a squash.
|
|
*/
|
|
Stats::Scalar fetchTlbSquashes;
|
|
/** Distribution of number of instructions fetched each cycle. */
|
|
Stats::Distribution fetchNisnDist;
|
|
/** Rate of how often fetch was idle. */
|
|
Stats::Formula idleRate;
|
|
/** Number of branch fetches per cycle. */
|
|
Stats::Formula branchRate;
|
|
/** Number of instruction fetched per cycle. */
|
|
Stats::Formula fetchRate;
|
|
};
|
|
|
|
#endif //__CPU_O3_FETCH_HH__
|