gem5/cpu/beta_cpu/fetch_impl.hh
Kevin Lim 90d4436351 Slight fixes, add in commit trace flag.
base/traceflags.py:
    Add new commit rate trace flag.
build/SConstruct:
    Add extra option for efence.
cpu/beta_cpu/alpha_full_cpu_impl.hh:
    Use function calls instead of direct indexing (avoids confusion).
cpu/beta_cpu/commit_impl.hh:
    Add commit rate trace output (might not be worthwhile in the future).
cpu/beta_cpu/decode_impl.hh:
    Remove some older hacks.  Fix it so that the isntruction properly sets its next
    PC to the one calculated by the branch.
cpu/beta_cpu/fetch_impl.hh:
    Remove old commented code.
cpu/beta_cpu/iew_impl.hh:
    Add extra check to ensure that the instruction is valid.
cpu/beta_cpu/regfile.hh:
    Include trace file.

--HG--
extra : convert_revision : 4ee1dc88f8a5ed9b65486c6c111a3718a8040e42
2005-01-11 18:52:29 -05:00

599 lines
18 KiB
C++

// Remove this later; used only for debugging.
#define OPCODE(X) (X >> 26) & 0x3f
#include "arch/alpha/byte_swap.hh"
#include "cpu/exetrace.hh"
#include "mem/base_mem.hh"
#include "mem/mem_interface.hh"
#include "mem/mem_req.hh"
#include "cpu/beta_cpu/fetch.hh"
#include "sim/universe.hh"
template<class Impl>
SimpleFetch<Impl>::CacheCompletionEvent
::CacheCompletionEvent(SimpleFetch *_fetch)
: Event(&mainEventQueue),
fetch(_fetch)
{
}
template<class Impl>
void
SimpleFetch<Impl>::CacheCompletionEvent::process()
{
fetch->processCacheCompletion();
}
template<class Impl>
const char *
SimpleFetch<Impl>::CacheCompletionEvent::description()
{
return "SimpleFetch cache completion event";
}
template<class Impl>
SimpleFetch<Impl>::SimpleFetch(Params &params)
: //cacheCompletionEvent(this),
icacheInterface(params.icacheInterface),
branchPred(params),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
iewToFetchDelay(params.iewToFetchDelay),
commitToFetchDelay(params.commitToFetchDelay),
fetchWidth(params.fetchWidth)
{
// Set status to idle.
_status = Idle;
// Create a new memory request.
memReq = new MemReq();
// Not sure of this parameter. I think it should be based on the
// thread number.
#ifndef FULL_SYSTEM
memReq->asid = params.asid;
#else
memReq->asid = 0;
#endif // FULL_SYSTEM
memReq->data = new uint8_t[64];
// Size of cache block.
cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
// Create mask to get rid of offset bits.
cacheBlkMask = (cacheBlkSize - 1);
// Get the size of an instruction.
instSize = sizeof(MachInst);
// Create space to store a cache line.
cacheData = new uint8_t[cacheBlkSize];
}
template <class Impl>
void
SimpleFetch<Impl>::regStats()
{
icacheStallCycles
.name(name() + ".icacheStallCycles")
.desc("Number of cycles fetch is stalled on an Icache miss")
.prereq(icacheStallCycles);
fetchedInsts
.name(name() + ".fetchedInsts")
.desc("Number of instructions fetch has processed")
.prereq(fetchedInsts);
predictedBranches
.name(name() + ".predictedBranches")
.desc("Number of branches that fetch has predicted taken")
.prereq(predictedBranches);
fetchCycles
.name(name() + ".fetchCycles")
.desc("Number of cycles fetch has run and was not squashing or"
" blocked")
.prereq(fetchCycles);
fetchSquashCycles
.name(name() + ".fetchSquashCycles")
.desc("Number of cycles fetch has spent squashing")
.prereq(fetchSquashCycles);
fetchBlockedCycles
.name(name() + ".fetchBlockedCycles")
.desc("Number of cycles fetch has spent blocked")
.prereq(fetchBlockedCycles);
fetchedCacheLines
.name(name() + ".fetchedCacheLines")
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
fetch_nisn_dist
.init(/* base value */ 0,
/* last value */ fetchWidth,
/* bucket size */ 1)
.name(name() + ".FETCH:rate_dist")
.desc("Number of instructions fetched each cycle (Total)")
.flags(Stats::pdf)
;
branchPred.regStats();
}
template<class Impl>
void
SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
{
DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
cpu = cpu_ptr;
// This line will be removed eventually.
memReq->xc = cpu->xcBase();
}
template<class Impl>
void
SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
{
DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
timeBuffer = time_buffer;
// Create wires to get information from proper places in time buffer.
fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
fromRename = timeBuffer->getWire(-renameToFetchDelay);
fromIEW = timeBuffer->getWire(-iewToFetchDelay);
fromCommit = timeBuffer->getWire(-commitToFetchDelay);
}
template<class Impl>
void
SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
fetchQueue = fq_ptr;
// Create wire to write information to proper place in fetch queue.
toDecode = fetchQueue->getWire(0);
}
template<class Impl>
void
SimpleFetch<Impl>::processCacheCompletion()
{
DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");
// Only change the status if it's still waiting on the icache access
// to return.
// Can keep track of how many cache accesses go unused due to
// misspeculation here.
// How to handle an outstanding miss which gets cancelled due to squash,
// then a new icache miss gets scheduled?
if (_status == IcacheMissStall)
_status = IcacheMissComplete;
}
#if 0
template <class Impl>
inline void
SimpleFetch<Impl>::recordGlobalHist(DynInstPtr &inst)
{
inst->setGlobalHist(branchPred.BPReadGlobalHist());
}
#endif
template <class Impl>
bool
SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
{
// Do branch prediction check here.
// A bit of a misnomer...next_PC is actually the current PC until
// this function updates it.
bool predict_taken;
if (!inst->isControl()) {
next_PC = next_PC + instSize;
inst->setPredTarg(next_PC);
return false;
}
predict_taken = branchPred.predict(inst, next_PC);
if (predict_taken) {
++predictedBranches;
}
return predict_taken;
}
template <class Impl>
Fault
SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
{
// Check if the instruction exists within the cache.
// If it does, then proceed on to read the instruction and the rest
// of the instructions in the cache line until either the end of the
// cache line or a predicted taken branch is encountered.
#ifdef FULL_SYSTEM
// Flag to say whether or not address is physical addr.
unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
#else
unsigned flags = 0;
#endif // FULL_SYSTEM
Fault fault = No_Fault;
// Align the fetch PC so it's at the start of a cache block.
fetch_PC = icacheBlockAlignPC(fetch_PC);
// Setup the memReq to do a read of the first isntruction's address.
// Set the appropriate read size and flags as well.
memReq->cmd = Read;
memReq->reset(fetch_PC, cacheBlkSize, flags);
// Translate the instruction request.
// Should this function be
// in the CPU class ? Probably...ITB/DTB should exist within the
// CPU.
fault = cpu->translateInstReq(memReq);
// In the case of faults, the fetch stage may need to stall and wait
// on what caused the fetch (ITB or Icache miss).
// If translation was successful, attempt to read the first
// instruction.
if (fault == No_Fault) {
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
fault = cpu->mem->read(memReq, cacheData);
// This read may change when the mem interface changes.
fetchedCacheLines++;
}
// Now do the timing access to see whether or not the instruction
// exists within the cache.
if (icacheInterface && fault == No_Fault) {
DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
memReq->completionEvent = NULL;
memReq->time = curTick;
MemAccessResult result = icacheInterface->access(memReq);
// If the cache missed (in this model functional and timing
// memories are different), then schedule an event to wake
// up this stage once the cache miss completes.
if (result != MA_HIT && icacheInterface->doEvents()) {
memReq->completionEvent = new CacheCompletionEvent(this);
// lastIcacheStall = curTick;
// How does current model work as far as individual
// stages scheduling/unscheduling?
// Perhaps have only the main CPU scheduled/unscheduled,
// and have it choose what stages to run appropriately.
DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
_status = IcacheMissStall;
}
}
return fault;
}
template <class Impl>
inline void
SimpleFetch<Impl>::doSquash(const Addr &new_PC)
{
DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
cpu->setNextPC(new_PC + instSize);
cpu->setPC(new_PC);
// Clear the icache miss if it's outstanding.
if (_status == IcacheMissStall && icacheInterface) {
DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n");
// @todo: Use an actual thread number here.
icacheInterface->squash(0);
}
_status = Squashing;
++fetchSquashCycles;
}
template<class Impl>
void
SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
const InstSeqNum &seq_num)
{
DPRINTF(Fetch, "Fetch: Squashing from decode.\n");
doSquash(new_PC);
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
cpu->removeInstsUntil(seq_num);
}
template <class Impl>
void
SimpleFetch<Impl>::squash(const Addr &new_PC)
{
DPRINTF(Fetch, "Fetch: Squash from commit.\n");
doSquash(new_PC);
// Tell the CPU to remove any instructions that are not in the ROB.
cpu->removeInstsNotInROB();
}
template<class Impl>
void
SimpleFetch<Impl>::tick()
{
// Check squash signals from commit.
if (fromCommit->commitInfo.squash) {
DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
"from commit.\n");
// In any case, squash.
squash(fromCommit->commitInfo.nextPC);
// Also check if there's a mispredict that happened.
if (fromCommit->commitInfo.branchMispredict) {
branchPred.squash(fromCommit->commitInfo.doneSeqNum,
fromCommit->commitInfo.nextPC,
fromCommit->commitInfo.branchTaken);
} else {
branchPred.squash(fromCommit->commitInfo.doneSeqNum);
}
return;
} else if (fromCommit->commitInfo.doneSeqNum) {
// Update the branch predictor if it wasn't a squashed instruction
// that was braodcasted.
branchPred.update(fromCommit->commitInfo.doneSeqNum);
}
// Check ROB squash signals from commit.
if (fromCommit->commitInfo.robSquashing) {
DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
// Continue to squash.
_status = Squashing;
++fetchSquashCycles;
return;
}
// Check squash signals from decode.
if (fromDecode->decodeInfo.squash) {
DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
"from decode.\n");
// Update the branch predictor.
if (fromDecode->decodeInfo.branchMispredict) {
branchPred.squash(fromDecode->decodeInfo.doneSeqNum,
fromDecode->decodeInfo.nextPC,
fromDecode->decodeInfo.branchTaken);
} else {
branchPred.squash(fromDecode->decodeInfo.doneSeqNum);
}
if (_status != Squashing) {
// Squash unless we're already squashing?
squashFromDecode(fromDecode->decodeInfo.nextPC,
fromDecode->decodeInfo.doneSeqNum);
return;
}
}
// Check if any of the stall signals are high.
if (fromDecode->decodeInfo.stall ||
fromRename->renameInfo.stall ||
fromIEW->iewInfo.stall ||
fromCommit->commitInfo.stall)
{
// Block stage, regardless of current status.
DPRINTF(Fetch, "Fetch: Stalling stage.\n");
DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
"Commit: %i\n",
fromDecode->decodeInfo.stall,
fromRename->renameInfo.stall,
fromIEW->iewInfo.stall,
fromCommit->commitInfo.stall);
_status = Blocked;
++fetchBlockedCycles;
return;
} else if (_status == Blocked) {
// Unblock stage if status is currently blocked and none of the
// stall signals are being held high.
_status = Running;
++fetchBlockedCycles;
return;
}
// If fetch has reached this point, then there are no squash signals
// still being held high. Check if fetch is in the squashing state;
// if so, fetch can switch to running.
// Similarly, there are no blocked signals still being held high.
// Check if fetch is in the blocked state; if so, fetch can switch to
// running.
if (_status == Squashing) {
DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n");
// Switch status to running
_status = Running;
++fetchSquashCycles;
} else if (_status != IcacheMissStall) {
DPRINTF(Fetch, "Fetch: Running stage.\n");
++fetchCycles;
fetch();
}
}
template<class Impl>
void
SimpleFetch<Impl>::fetch()
{
//////////////////////////////////////////
// Start actual fetch
//////////////////////////////////////////
// The current PC.
Addr fetch_PC = cpu->readPC();
// Fault code for memory access.
Fault fault = No_Fault;
// If returning from the delay of a cache miss, then update the status
// to running, otherwise do the cache access. Possibly move this up
// to tick() function.
if (_status == IcacheMissComplete) {
DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
// Reset the completion event to NULL.
memReq->completionEvent = NULL;
_status = Running;
} else {
DPRINTF(Fetch, "Fetch: Attempting to translate and read "
"instruction, starting at PC %08p.\n",
fetch_PC);
fault = fetchCacheLine(fetch_PC);
}
// If we had a stall due to an icache miss, then return. It'd
// be nicer if this were handled through the kind of fault that
// is returned by the function.
if (_status == IcacheMissStall) {
return;
}
// As far as timing goes, the CPU will need to send an event through
// the MemReq in order to be woken up once the memory access completes.
// Probably have a status on a per thread basis so each thread can
// block independently and be woken up independently.
Addr next_PC = fetch_PC;
InstSeqNum inst_seq;
MachInst inst;
unsigned offset = fetch_PC & cacheBlkMask;
unsigned fetched;
if (fault == No_Fault) {
// If the read of the first instruction was successful, then grab the
// instructions from the rest of the cache line and put them into the
// queue heading to decode.
DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
//////////////////////////
// Fetch first instruction
//////////////////////////
// Need to keep track of whether or not a predicted branch
// ended this fetch block.
bool predicted_branch = false;
for (fetched = 0;
offset < cacheBlkSize &&
fetched < fetchWidth &&
!predicted_branch;
++fetched)
{
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
// Make sure this is a valid index.
assert(offset <= cacheBlkSize - instSize);
// Get the instruction from the array of the cache line.
inst = htoa(*reinterpret_cast<MachInst *>
(&cacheData[offset]));
// Create a new DynInst from the instruction fetched.
DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
inst_seq, cpu);
DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
inst_seq, instruction->readPC());
DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
OPCODE(inst));
instruction->traceData =
Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
instruction->staticInst,
instruction->readPC(), 0);
predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
// Add instruction to the CPU's list of instructions.
cpu->addInst(instruction);
// Write the instruction to the first slot in the queue
// that heads to decode.
toDecode->insts[fetched] = instruction;
toDecode->size++;
// Increment stat of fetched instructions.
++fetchedInsts;
// Move to the next instruction, unless we have a branch.
fetch_PC = next_PC;
offset+= instSize;
}
fetch_nisn_dist.sample(fetched);
}
// Now that fetching is completed, update the PC to signify what the next
// cycle will be. Might want to move this to the beginning of this
// function so that the PC updates at the beginning of everything.
// Or might want to leave setting the PC to the main CPU, with fetch
// only changing the nextPC (will require correct determination of
// next PC).
if (fault == No_Fault) {
DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
cpu->setPC(next_PC);
cpu->setNextPC(next_PC + instSize);
} else {
// If the issue was an icache miss, then we can just return and
// wait until it is handled.
if (_status == IcacheMissStall) {
return;
}
// Handle the fault.
// This stage will not be able to continue until all the ROB
// slots are empty, at which point the fault can be handled.
// The only other way it can wake up is if a squash comes along
// and changes the PC. Not sure how to handle that case...perhaps
// have it handled by the upper level CPU class which peeks into the
// time buffer and sees if a squash comes along, in which case it
// changes the status.
DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");
_status = Blocked;
#ifdef FULL_SYSTEM
// cpu->trap(fault);
// Send a signal to the ROB indicating that there's a trap from the
// fetch stage that needs to be handled. Need to indicate that
// there's a fault, and the fault type.
#else // !FULL_SYSTEM
fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
#endif // FULL_SYSTEM
}
}