O3 code update/cleanup.

cpu/o3/commit_impl.hh:
    O3 code update/cleanup.  Fetch fault code no longer needed (see previous checkin).

--HG--
extra : convert_revision : f602e7f978e19b8900dce482f38f9c7a195e94da
This commit is contained in:
Kevin Lim 2006-05-19 15:53:17 -04:00
parent 1a6f21b8d2
commit e3d5588ca7
28 changed files with 381 additions and 907 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -87,7 +87,8 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual Status status() const { return thread->status(); }
virtual void setStatus(Status new_status) { thread->setStatus(new_status); }
virtual void setStatus(Status new_status)
{ thread->setStatus(new_status); }
/// Set the status to Active. Optional delay indicates number of
/// cycles to wait before beginning execution.
@ -168,12 +169,15 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
// @todo: Figure out where these store cond failures should go.
virtual unsigned readStCondFailures() { return thread->storeCondFailures; }
virtual unsigned readStCondFailures()
{ return thread->storeCondFailures; }
virtual void setStCondFailures(unsigned sc_failures) { thread->storeCondFailures = sc_failures; }
virtual void setStCondFailures(unsigned sc_failures)
{ thread->storeCondFailures = sc_failures; }
#if FULL_SYSTEM
virtual bool inPalMode() { return TheISA::PcPAL(cpu->readPC(thread->tid)); }
virtual bool inPalMode()
{ return TheISA::PcPAL(cpu->readPC(thread->tid)); }
#endif
// Only really makes sense for old CPU model. Lots of code
@ -194,10 +198,6 @@ class AlphaFullCPU : public FullO3CPU<Impl>
#endif
};
// friend class AlphaXC;
// std::vector<ExecContext *> xcProxies;
#if FULL_SYSTEM
/** ITB pointer. */
AlphaITB *itb;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View file

@ -43,12 +43,7 @@
/**
* Basically a wrapper class to hold both the branch predictor
* and the BTB. Right now I'm unsure of the implementation; it would
* be nicer to have something closer to the CPUPolicy or the Impl where
* this is just typedefs, but it forces the upper level stages to be
* aware of the constructors of the BP and the BTB. The nicer thing
* to do is have this templated on the Impl, accept the usual Params
* object, and be able to call the constructors on the BP and BTB.
* and the BTB.
*/
template<class Impl>
class TwobitBPredUnit

View file

@ -26,13 +26,13 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <list>
#include <vector>
#include "base/trace.hh"
#include "base/traceflags.hh"
#include "cpu/o3/bpred_unit.hh"
#include <vector>
#include <list>
using namespace std;
template<class Impl>

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -169,10 +169,6 @@ struct TimeBufStruct {
bool commitInsts;
InstSeqNum squashSeqNum;
// Extra bit of information so that the LDSTQ only updates when it
// needs to.
bool commitIsLoad;
// Communication specifically to the IQ to tell the IQ that it can
// schedule a non-speculative instruction.
InstSeqNum nonSpecSeqNum;

View file

@ -30,10 +30,10 @@
#define __CPU_O3_COMMIT_HH__
#include "arch/faults.hh"
#include "cpu/inst_seq.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/exetrace.hh"
#include "cpu/inst_seq.hh"
#include "mem/memory_interface.hh"
template <class>
@ -59,8 +59,7 @@ class O3ThreadState;
* squashing instruction's sequence number, and only broadcasting a
* redirect if it corresponds to an older instruction. Commit also
* supports multiple cycle squashing, to model a ROB that can only
* remove a certain number of instructions per cycle. Eventually traps
* and interrupts will most likely be handled here as well.
* remove a certain number of instructions per cycle.
*/
template<class Impl>
class DefaultCommit

View file

@ -27,12 +27,7 @@
*/
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <stdio.h>
#include <string.h>
#include <string>
#include "base/loader/symtab.hh"
#include "base/timebuf.hh"
@ -835,58 +830,6 @@ DefaultCommit<Impl>::commitInsts()
unsigned num_committed = 0;
DynInstPtr head_inst;
#if FULL_SYSTEM
// Not the best way to check if the front end is empty, but it should
// work.
// @todo: Try to avoid directly accessing fetch.
if (commitStatus[0] == FetchTrapPending && rob->isEmpty()) {
DPRINTF(Commit, "Fault from fetch is pending.\n");
fetchTrapWait++;
if (fetchTrapWait > 10000000) {
panic("Fetch trap has been pending for a long time!");
}
if (fetchFaultTick > curTick) {
DPRINTF(Commit, "Not enough cycles since fault, fault will "
"happen on %lli\n",
fetchFaultTick);
cpu->activityThisCycle();
return;
} else if (iewStage->hasStoresToWB()) {
DPRINTF(Commit, "IEW still has stores to WB. Waiting until "
"they are completed. fetchTrapWait:%i\n",
fetchTrapWait);
cpu->activityThisCycle();
return;
} else if (cpu->inPalMode(readPC())) {
DPRINTF(Commit, "In pal mode right now. fetchTrapWait:%i\n",
fetchTrapWait);
return;
} else if (fetchStage->getYoungestSN() > youngestSeqNum[0]) {
DPRINTF(Commit, "Waiting for front end to drain. fetchTrapWait:%i\n",
fetchTrapWait);
return;
}
fetchTrapWait = 0;
DPRINTF(Commit, "ROB is empty, handling fetch trap.\n");
assert(!thread[0]->inSyscall);
thread[0]->inSyscall = true;
// Consider holding onto the trap and waiting until the trap event
// happens for this to be executed.
cpu->trap(fetchFault, 0);
// Exit state update mode to avoid accidental updating.
thread[0]->inSyscall = false;
commitStatus[0] = TrapPending;
// Set it up so that we squash next cycle
trapSquash[0] = true;
return;
}
#endif
// Commit as many instructions as possible until the commit bandwidth
// limit is reached, or it becomes impossible to commit any more.

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -35,11 +35,11 @@
#include "base/timebuf.hh"
/**
* DefaultDecode class handles both single threaded and SMT decode. Its width is
* specified by the parameters; each cycles it tries to decode that many
* instructions. Because instructions are actually decoded when the StaticInst
* is created, this stage does not do much other than check any PC-relative
* branches.
* DefaultDecode class handles both single threaded and SMT
* decode. Its width is specified by the parameters; each cycles it
* tries to decode that many instructions. Because instructions are
* actually decoded when the StaticInst is created, this stage does
* not do much other than check any PC-relative branches.
*/
template<class Impl>
class DefaultDecode

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -39,7 +39,6 @@ DefaultDecode<Impl>::DefaultDecode(Params *params)
decodeWidth(params->decodeWidth),
numThreads(params->numberOfThreads)
{
DPRINTF(Decode, "decodeWidth=%i.\n", decodeWidth);
_status = Inactive;
for (int i = 0; i < numThreads; ++i) {
@ -249,8 +248,6 @@ template<class Impl>
bool
DefaultDecode<Impl>::unblock(unsigned tid)
{
DPRINTF(Decode, "[tid:%u]: Trying to unblock.\n", tid);
// Decode is done unblocking only if the skid buffer is empty.
if (skidBuffer[tid].empty()) {
DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid);
@ -261,6 +258,8 @@ DefaultDecode<Impl>::unblock(unsigned tid)
return true;
}
DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid);
return false;
}
@ -318,6 +317,7 @@ DefaultDecode<Impl>::squash(unsigned tid)
// In syscall emulation, we can have both a block and a squash due
// to a syscall in the same cycle. This would cause both signals to
// be high. This shouldn't happen in full system.
// @todo: Determine if this still happens.
if (toFetch->decodeBlock[tid]) {
toFetch->decodeBlock[tid] = 0;
} else {
@ -372,7 +372,7 @@ DefaultDecode<Impl>::skidInsert(unsigned tid)
skidBuffer[tid].push(inst);
}
// Eventually need to enforce this by not letting a thread
// @todo: Eventually need to enforce this by not letting a thread
// fetch past its skidbuffer
assert(skidBuffer[tid].size() <= skidBufferMax);
}
@ -436,10 +436,10 @@ void
DefaultDecode<Impl>::sortInsts()
{
int insts_from_fetch = fromFetch->size;
#ifdef DEBUG
for (int i=0; i < numThreads; i++)
assert(insts[i].empty());
#endif
for (int i = 0; i < insts_from_fetch; ++i) {
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -38,12 +38,12 @@
class Sampler;
/**
* DefaultFetch class handles both single threaded and SMT fetch. Its width is
* specified by the parameters; each cycle it tries to fetch that many
* instructions. It supports using a branch predictor to predict direction and
* targets.
* It supports the idling functionalitiy of the CPU by indicating to the CPU
* when it is active and inactive.
* DefaultFetch class handles both single threaded and SMT fetch. Its
* width is specified by the parameters; each cycle it tries to fetch
* that many instructions. It supports using a branch predictor to
* predict direction and targets.
* It supports the idling functionalitiy of the CPU by indicating to
* the CPU when it is active and inactive.
*/
template <class Impl>
class DefaultFetch
@ -66,8 +66,8 @@ class DefaultFetch
typedef TheISA::ExtMachInst ExtMachInst;
public:
/** Overall fetch status. Used to determine if the CPU can deschedule itsef
* due to a lack of activity.
/** Overall fetch status. Used to determine if the CPU can
* deschedule itsef due to a lack of activity.
*/
enum FetchStatus {
Active,
@ -174,13 +174,13 @@ class DefaultFetch
void wakeFromQuiesce();
private:
/** Changes the status of this stage to active, and indicates this to the
* CPU.
/** Changes the status of this stage to active, and indicates this
* to the CPU.
*/
inline void switchToActive();
/** Changes the status of this stage to inactive, and indicates this to the
* CPU.
/** Changes the status of this stage to inactive, and indicates
* this to the CPU.
*/
inline void switchToInactive();
@ -373,11 +373,6 @@ class DefaultFetch
bool switchedOut;
public:
InstSeqNum &getYoungestSN() { return youngestSN; }
private:
InstSeqNum youngestSN;
#if !FULL_SYSTEM
/** Page table pointer. */
// PageTable *pTable;

View file

@ -938,10 +938,6 @@ DefaultFetch<Impl>::fetch(bool &status_change)
DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
"decode.\n",tid);
//////////////////////////
// Fetch first instruction
//////////////////////////
// Need to keep track of whether or not a predicted branch
// ended this fetch block.
bool predicted_branch = false;
@ -1004,7 +1000,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
warn("%lli: Quiesce instruction encountered, halting fetch!",
curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@ -1022,24 +1019,20 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Now that fetching is completed, update the PC to signify what the next
// cycle will be.
if (fault == NoFault) {
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
PC[tid] = next_PC;
nextPC[tid] = next_PC + instSize;
} else {
// If the issue was an icache miss, then we can just return and
// wait until it is handled.
// We shouldn't be in an icache miss and also have a fault (an ITB
// miss)
if (fetchStatus[tid] == IcacheMissStall) {
panic("Fetch should have exited prior to this!");
}
// Handle the fault.
// This stage will not be able to continue until all the ROB
// slots are empty, at which point the fault can be handled.
// The only other way it can wake up is if a squash comes along
// and changes the PC.
// Send the fault to commit. This thread will not do anything
// until commit handles the fault. The only other way it can
// wake up is if a squash comes along and changes the PC.
#if FULL_SYSTEM
assert(numInst != fetchWidth);
// Get a sequence number.
@ -1067,20 +1060,12 @@ DefaultFetch<Impl>::fetch(bool &status_change)
toDecode->insts[numInst] = instruction;
toDecode->size++;
// Tell the commit stage the fault we had.
// toDecode->fetchFault = fault;
// toDecode->fetchFaultSN = cpu->globalSeqNum;
DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
fetchStatus[tid] = TrapPending;
status_change = true;
warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
// cpu->trap(fault);
// Send a signal to the ROB indicating that there's a trap from the
// fetch stage that needs to be handled. Need to indicate that
// there's a fault, and the fault type.
#else // !FULL_SYSTEM
fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
#endif // FULL_SYSTEM

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -32,10 +32,9 @@
#include <map>
#include <queue>
#include "base/hashmap.hh"
#include "config/full_system.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/cpu_policy.hh"
//#include "cpu/o3/cpu_policy.hh"
#include "cpu/o3/lsq_unit.hh"
#include "mem/mem_interface.hh"
//#include "mem/page_table.hh"
@ -85,7 +84,8 @@ class LSQ {
/** Ticks the LSQ. */
void tick();
/** Ticks a specific LSQ Unit. */
void tick(unsigned tid);
void tick(unsigned tid)
{ thread[tid].tick(); }
/** Inserts a load into the LSQ. */
void insertLoad(DynInstPtr &load_inst);
@ -95,18 +95,23 @@ class LSQ {
/** Executes a load. */
Fault executeLoad(DynInstPtr &inst);
Fault executeLoad(int lq_idx, unsigned tid);
Fault executeLoad(int lq_idx, unsigned tid)
{ return thread[tid].executeLoad(lq_idx); }
/** Executes a store. */
Fault executeStore(DynInstPtr &inst);
/**
* Commits loads up until the given sequence number for a specific thread.
*/
void commitLoads(InstSeqNum &youngest_inst, unsigned tid);
void commitLoads(InstSeqNum &youngest_inst, unsigned tid)
{ thread[tid].commitLoads(youngest_inst); }
/**
* Commits stores up until the given sequence number for a specific thread.
*/
void commitStores(InstSeqNum &youngest_inst, unsigned tid);
void commitStores(InstSeqNum &youngest_inst, unsigned tid)
{ thread[tid].commitStores(youngest_inst); }
/**
* Attempts to write back stores until all cache ports are used or the
@ -119,7 +124,8 @@ class LSQ {
/**
* Squash instructions from a thread until the specified sequence number.
*/
void squash(const InstSeqNum &squashed_num, unsigned tid);
void squash(const InstSeqNum &squashed_num, unsigned tid)
{ thread[tid].squash(squashed_num); }
/** Returns whether or not there was a memory ordering violation. */
bool violation();
@ -127,12 +133,14 @@ class LSQ {
* Returns whether or not there was a memory ordering violation for a
* specific thread.
*/
bool violation(unsigned tid);
bool violation(unsigned tid)
{ return thread[tid].violation(); }
/** Returns if a load is blocked due to the memory system for a specific
* thread.
*/
bool loadBlocked(unsigned tid);
bool loadBlocked(unsigned tid)
{ return thread[tid].loadBlocked(); }
bool isLoadBlockedHandled(unsigned tid)
{ return thread[tid].isLoadBlockedHandled(); }
@ -141,10 +149,13 @@ class LSQ {
{ thread[tid].setLoadBlockedHandled(); }
/** Gets the instruction that caused the memory ordering violation. */
DynInstPtr getMemDepViolator(unsigned tid);
DynInstPtr getMemDepViolator(unsigned tid)
{ return thread[tid].getMemDepViolator(); }
/** Returns the head index of the load queue for a specific thread. */
int getLoadHead(unsigned tid);
int getLoadHead(unsigned tid)
{ return thread[tid].getLoadHead(); }
/** Returns the sequence number of the head of the load queue. */
InstSeqNum getLoadHeadSeqNum(unsigned tid)
{
@ -152,7 +163,9 @@ class LSQ {
}
/** Returns the head index of the store queue. */
int getStoreHead(unsigned tid);
int getStoreHead(unsigned tid)
{ return thread[tid].getStoreHead(); }
/** Returns the sequence number of the head of the store queue. */
InstSeqNum getStoreHeadSeqNum(unsigned tid)
{
@ -162,22 +175,26 @@ class LSQ {
/** Returns the number of instructions in all of the queues. */
int getCount();
/** Returns the number of instructions in the queues of one thread. */
int getCount(unsigned tid);
int getCount(unsigned tid)
{ return thread[tid].getCount(); }
/** Returns the total number of loads in the load queue. */
int numLoads();
/** Returns the total number of loads for a single thread. */
int numLoads(unsigned tid);
int numLoads(unsigned tid)
{ return thread[tid].numLoads(); }
/** Returns the total number of stores in the store queue. */
int numStores();
/** Returns the total number of stores for a single thread. */
int numStores(unsigned tid);
int numStores(unsigned tid)
{ return thread[tid].numStores(); }
/** Returns the total number of loads that are ready. */
int numLoadsReady();
/** Returns the number of loads that are ready for a single thread. */
int numLoadsReady(unsigned tid);
int numLoadsReady(unsigned tid)
{ return thread[tid].numLoadsReady(); }
/** Returns the number of free entries. */
unsigned numFreeEntries();
@ -215,24 +232,30 @@ class LSQ {
/** Returns whether or not there are any stores to write back to memory. */
bool hasStoresToWB();
/** Returns whether or not a specific thread has any stores to write back
* to memory.
*/
bool hasStoresToWB(unsigned tid);
bool hasStoresToWB(unsigned tid)
{ return thread[tid].hasStoresToWB(); }
/** Returns the number of stores a specific thread has to write back. */
int numStoresToWB(unsigned tid);
int numStoresToWB(unsigned tid)
{ return thread[tid].numStoresToWB(); }
/** Returns if the LSQ will write back to memory this cycle. */
bool willWB();
/** Returns if the LSQ of a specific thread will write back to memory this
* cycle.
*/
bool willWB(unsigned tid);
bool willWB(unsigned tid)
{ return thread[tid].willWB(); }
/** Debugging function to print out all instructions. */
void dumpInsts();
/** Debugging function to print out instructions from a specific thread. */
void dumpInsts(unsigned tid);
void dumpInsts(unsigned tid)
{ thread[tid].dumpInsts(); }
/** Executes a read operation, using the load specified at the load index. */
template <class T>

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -26,6 +26,9 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <algorithm>
#include <string>
#include "cpu/o3/lsq.hh"
using namespace std;
@ -89,7 +92,7 @@ LSQ<Impl>::LSQ(Params *params)
//Initialize LSQs
for (int tid=0; tid < numThreads; tid++) {
thread[tid].init(params, maxLQEntries+1, maxSQEntries+1, tid);
thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
}
}
@ -226,13 +229,6 @@ LSQ<Impl>::tick()
}
}
template<class Impl>
void
LSQ<Impl>::tick(unsigned tid)
{
thread[tid].tick();
}
template<class Impl>
void
LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
@ -260,13 +256,6 @@ LSQ<Impl>::executeLoad(DynInstPtr &inst)
return thread[tid].executeLoad(inst);
}
template<class Impl>
Fault
LSQ<Impl>::executeLoad(int lq_idx, unsigned tid)
{
return thread[tid].executeLoad(lq_idx);
}
template<class Impl>
Fault
LSQ<Impl>::executeStore(DynInstPtr &inst)
@ -276,20 +265,6 @@ LSQ<Impl>::executeStore(DynInstPtr &inst)
return thread[tid].executeStore(inst);
}
template<class Impl>
void
LSQ<Impl>::commitLoads(InstSeqNum &youngest_inst,unsigned tid)
{
thread[tid].commitLoads(youngest_inst);
}
template<class Impl>
void
LSQ<Impl>::commitStores(InstSeqNum &youngest_inst,unsigned tid)
{
thread[tid].commitStores(youngest_inst);
}
template<class Impl>
void
LSQ<Impl>::writebackStores()
@ -300,28 +275,14 @@ LSQ<Impl>::writebackStores()
unsigned tid = *active_threads++;
if (numStoresToWB(tid) > 0) {
DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores available"
" for Writeback.\n", tid, numStoresToWB(tid));
DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
"available for Writeback.\n", tid, numStoresToWB(tid));
}
thread[tid].writebackStores();
}
}
template<class Impl>
int
LSQ<Impl>::numStoresToWB(unsigned tid)
{
return thread[tid].numStoresToWB();
}
template<class Impl>
void
LSQ<Impl>::squash(const InstSeqNum &squashed_num, unsigned tid)
{
thread[tid].squash(squashed_num);
}
template<class Impl>
bool
LSQ<Impl>::violation()
@ -338,41 +299,6 @@ LSQ<Impl>::violation()
return false;
}
template<class Impl>
bool
LSQ<Impl>::violation(unsigned tid)
{
return thread[tid].violation();
}
template<class Impl>
bool
LSQ<Impl>::loadBlocked(unsigned tid)
{
return thread[tid].loadBlocked();
}
template<class Impl>
typename Impl::DynInstPtr
LSQ<Impl>::getMemDepViolator(unsigned tid)
{
return thread[tid].getMemDepViolator();
}
template<class Impl>
int
LSQ<Impl>::getLoadHead(unsigned tid)
{
return thread[tid].getLoadHead();
}
template<class Impl>
int
LSQ<Impl>::getStoreHead(unsigned tid)
{
return thread[tid].getStoreHead();
}
template<class Impl>
int
LSQ<Impl>::getCount()
@ -389,13 +315,6 @@ LSQ<Impl>::getCount()
return total;
}
template<class Impl>
int
LSQ<Impl>::getCount(unsigned tid)
{
return thread[tid].getCount();
}
template<class Impl>
int
LSQ<Impl>::numLoads()
@ -412,13 +331,6 @@ LSQ<Impl>::numLoads()
return total;
}
template<class Impl>
int
LSQ<Impl>::numLoads(unsigned tid)
{
return thread[tid].numLoads();
}
template<class Impl>
int
LSQ<Impl>::numStores()
@ -435,13 +347,6 @@ LSQ<Impl>::numStores()
return total;
}
template<class Impl>
int
LSQ<Impl>::numStores(unsigned tid)
{
return thread[tid].numStores();
}
template<class Impl>
int
LSQ<Impl>::numLoadsReady()
@ -458,13 +363,6 @@ LSQ<Impl>::numLoadsReady()
return total;
}
template<class Impl>
int
LSQ<Impl>::numLoadsReady(unsigned tid)
{
return thread[tid].numLoadsReady();
}
template<class Impl>
unsigned
LSQ<Impl>::numFreeEntries()
@ -612,14 +510,6 @@ LSQ<Impl>::hasStoresToWB()
return true;
}
template<class Impl>
bool
LSQ<Impl>::hasStoresToWB(unsigned tid)
{
return thread[tid].hasStoresToWB();
}
template<class Impl>
bool
LSQ<Impl>::willWB()
@ -635,13 +525,6 @@ LSQ<Impl>::willWB()
return true;
}
template<class Impl>
bool
LSQ<Impl>::willWB(unsigned tid)
{
return thread[tid].willWB();
}
template<class Impl>
void
LSQ<Impl>::dumpInsts()
@ -653,10 +536,3 @@ LSQ<Impl>::dumpInsts()
thread[tid].dumpInsts();
}
}
template<class Impl>
void
LSQ<Impl>::dumpInsts(unsigned tid)
{
thread[tid].dumpInsts();
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -29,29 +29,30 @@
#ifndef __CPU_O3_LSQ_UNIT_HH__
#define __CPU_O3_LSQ_UNIT_HH__
#include <algorithm>
#include <map>
#include <queue>
#include <algorithm>
#include "arch/faults.hh"
#include "config/full_system.hh"
#include "base/hashmap.hh"
#include "cpu/inst_seq.hh"
#include "mem/mem_interface.hh"
//#include "mem/page_table.hh"
#include "sim/debug.hh"
#include "sim/sim_object.hh"
#include "arch/faults.hh"
//#include "sim/debug.hh"
//#include "sim/sim_object.hh"
/**
* Class that implements the actual LQ and SQ for each specific thread.
* Both are circular queues; load entries are freed upon committing, while
* store entries are freed once they writeback. The LSQUnit tracks if there
* are memory ordering violations, and also detects partial load to store
* forwarding cases (a store only has part of a load's data) that requires
* the load to wait until the store writes back. In the former case it
* holds onto the instruction until the dependence unit looks at it, and
* in the latter it stalls the LSQ until the store writes back. At that
* point the load is replayed.
* Class that implements the actual LQ and SQ for each specific
* thread. Both are circular queues; load entries are freed upon
* committing, while store entries are freed once they writeback. The
* LSQUnit tracks if there are memory ordering violations, and also
* detects partial load to store forwarding cases (a store only has
* part of a load's data) that requires the load to wait until the
* store writes back. In the former case it holds onto the instruction
* until the dependence unit looks at it, and in the latter it stalls
* the LSQ until the store writes back. At that point the load is
* replayed.
*/
template <class Impl>
class LSQUnit {
@ -76,21 +77,19 @@ class LSQUnit {
/** Returns the description of this event. */
const char *description();
private:
/** The store index of the store being written back. */
int storeIdx;
/** The writeback event for the store. Needed for store
* conditionals.
*/
public:
Event *wbEvent;
private:
/** The store index of the store being written back. */
int storeIdx;
private:
/** The pointer to the LSQ unit that issued the store. */
LSQUnit<Impl> *lsqPtr;
};
friend class StoreCompletionEvent;
public:
/** Constructs an LSQ unit. init() must be called prior to use. */
LSQUnit();
@ -136,14 +135,12 @@ class LSQUnit {
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
Fault executeLoad(int lq_idx);
Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
/** Executes a store instruction. */
Fault executeStore(DynInstPtr &inst);
/** Commits the head load. */
void commitLoad();
/** Commits a specific load, given by the sequence number. */
void commitLoad(InstSeqNum &inst);
/** Commits loads older than a specific sequence number. */
void commitLoads(InstSeqNum &youngest_inst);
@ -179,9 +176,7 @@ class LSQUnit {
/** Returns the memory ordering violator. */
DynInstPtr getMemDepViolator();
/** Returns if a load became blocked due to the memory system. It clears
* the bool's value upon this being called.
*/
/** Returns if a load became blocked due to the memory system. */
bool loadBlocked()
{ return isLoadBlocked; }
@ -215,9 +210,6 @@ class LSQUnit {
/** Returns if the SQ is full. */
bool sqFull() { return stores >= (SQEntries - 1); }
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts();
/** Returns the number of instructions in the LSQ. */
unsigned getCount() { return loads + stores; }
@ -245,6 +237,10 @@ class LSQUnit {
/** Decrements the given load index (circular queue). */
inline void decrLdIdx(int &load_idx);
public:
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts();
private:
/** Pointer to the CPU. */
FullCPU *cpu;
@ -287,38 +283,29 @@ class LSQUnit {
/** Whether or not the store is completed. */
bool completed;
};
/*
enum Status {
Running,
Idle,
DcacheMissStall,
DcacheMissSwitch
};
*/
private:
/** The LSQUnit thread id. */
unsigned lsqID;
/** The status of the LSQ unit. */
// Status _status;
/** The store queue. */
std::vector<SQEntry> storeQueue;
/** The load queue. */
std::vector<DynInstPtr> loadQueue;
// Consider making these 16 bits
/** The number of LQ entries. */
/** The number of LQ entries, plus a sentinel entry (circular queue).
* @todo: Consider having var that records the true number of LQ entries.
*/
unsigned LQEntries;
/** The number of SQ entries. */
/** The number of SQ entries, plus a sentinel entry (circular queue).
* @todo: Consider having var that records the true number of SQ entries.
*/
unsigned SQEntries;
/** The number of load instructions in the LQ. */
int loads;
/** The number of store instructions in the SQ (excludes those waiting to
* writeback).
*/
/** The number of store instructions in the SQ. */
int stores;
/** The number of store instructions in the SQ waiting to writeback. */
int storesToWB;
@ -330,8 +317,8 @@ class LSQUnit {
/** The index of the head instruction in the SQ. */
int storeHead;
/** The index of the first instruction that is ready to be written back,
* and has not yet been written back.
/** The index of the first instruction that may be ready to be
* written back, and has not yet been written back.
*/
int storeWBIdx;
/** The index of the tail instruction in the SQ. */
@ -348,13 +335,9 @@ class LSQUnit {
//list<InstSeqNum> mshrSeqNums;
//Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
/** Wire to read information from the issue stage time queue. */
typename TimeBuffer<IssueStruct>::wire fromIssue;
// Make these per thread?
/** Whether or not the LSQ is stalled. */
bool stalled;
/** The store that causes the stall due to partial store to load
@ -364,20 +347,13 @@ class LSQUnit {
/** The index of the above store. */
int stallingLoadIdx;
/** Whether or not a load is blocked due to the memory system. It is
* cleared when this value is checked via loadBlocked().
*/
/** Whether or not a load is blocked due to the memory system. */
bool isLoadBlocked;
bool loadBlockedHandled;
InstSeqNum blockedLoadSeqNum;
/** The oldest faulting load instruction. */
DynInstPtr loadFaultInst;
/** The oldest faulting store instruction. */
DynInstPtr storeFaultInst;
/** The oldest load that caused a memory ordering violation. */
DynInstPtr memDepViolator;
@ -447,23 +423,14 @@ template <class T>
Fault
LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
{
//Depending on issue2execute delay a squashed load could
//execute if it is found to be squashed in the same
//cycle it is scheduled to execute
assert(loadQueue[load_idx]);
if (loadQueue[load_idx]->isExecuted()) {
panic("Should not reach this point with split ops!");
memcpy(&data,req->data,req->size);
return NoFault;
}
assert(!loadQueue[load_idx]->isExecuted());
// Make sure this isn't an uncacheable access
// A bit of a hackish way to get uncached accesses to work only if they're
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
// @todo: Fix uncached accesses.
if (req->flags & UNCACHEABLE &&
(load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) {
iewStage->rescheduleMemInst(loadQueue[load_idx]);
@ -479,12 +446,16 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
"storeHead: %i addr: %#x\n",
load_idx, store_idx, storeHead, req->paddr);
#ifdef FULL_SYSTEM
#if 0
if (req->flags & LOCKED) {
cpu->lockAddr = req->paddr;
cpu->lockFlag = true;
}
#endif
req->cmd = Read;
assert(!req->completionEvent);
req->completionEvent = NULL;
req->time = curTick;
while (store_idx != -1) {
// End once we've reached the top of the LSQ
@ -518,18 +489,14 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// If the store's data has all of the data needed, we can forward.
if (store_has_lower_limit && store_has_upper_limit) {
// Get shift amount for offset into the store's data.
int shift_amt = req->vaddr & (store_size - 1);
// Assumes byte addressing
// @todo: Magic number, assumes byte addressing
shift_amt = shift_amt << 3;
// Cast this to type T?
data = storeQueue[store_idx].data >> shift_amt;
req->cmd = Read;
assert(!req->completionEvent);
req->completionEvent = NULL;
req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
@ -579,7 +546,6 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// Do not generate a writeback event as this instruction is not
// complete.
DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
"Store idx %i to load addr %#x\n",
store_idx, req->vaddr);
@ -588,16 +554,13 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
}
}
// If there's no forwarding case, then go access memory
DynInstPtr inst = loadQueue[load_idx];
DPRINTF(LSQUnit, "Doing functional access for inst PC %#x\n",
loadQueue[load_idx]->readPC());
DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC());
assert(!req->data);
req->cmd = Read;
req->completionEvent = NULL;
req->time = curTick;
req->data = new uint8_t[64];
Fault fault = cpu->read(req, data);
memcpy(req->data, &data, sizeof(T));
@ -611,20 +574,19 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
return NoFault;
// Record that the load was blocked due to memory. This
// load will squash all instructions after it, be
// refetched, and re-executed.
isLoadBlocked = true;
loadBlockedHandled = false;
blockedLoadSeqNum = inst->seqNum;
// No fault occurred, even though the interface is blocked.
return NoFault;
}
DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
loadQueue[load_idx]->readPC());
/*
Addr debug_addr = ULL(0xfffffc0000be81a8);
if (req->vaddr == debug_addr) {
debug_break();
}
*/
assert(!req->completionEvent);
req->completionEvent =
new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
@ -632,75 +594,16 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
assert(dcacheInterface->doEvents());
// Ugly hack to get an event scheduled *only* if the access is
// a miss. We really should add first-class support for this
// at some point.
if (result != MA_HIT) {
DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
inst->seqNum);
lastDcacheStall = curTick;
// _status = DcacheMissStall;
} else {
DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
inst->seqNum);
DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
}
}
#if 0
// if we have a cache, do cache access too
if (dcacheInterface) {
if (dcacheInterface->isBlocked()) {
isLoadBlocked = true;
// No fault occurred, even though the interface is blocked.
return NoFault;
}
DPRINTF(LSQUnit, "LSQUnit: D-cache: PC:%#x reading from paddr:%#x "
"vaddr:%#x flags:%i\n",
inst->readPC(), req->paddr, req->vaddr, req->flags);
// Setup MemReq pointer
req->cmd = Read;
req->completionEvent = NULL;
req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
assert(!req->completionEvent);
req->completionEvent =
new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
// Do Cache Access
MemAccessResult result = dcacheInterface->access(req);
// Ugly hack to get an event scheduled *only* if the access is
// a miss. We really should add first-class support for this
// at some point.
// @todo: Probably should support having no events
if (result != MA_HIT) {
DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
inst->seqNum);
lastDcacheStall = curTick;
_status = DcacheMissStall;
} else {
DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
inst->seqNum);
DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
}
} else {
fatal("Must use D-cache with new memory system");
}
#endif
return fault;
}
@ -716,24 +619,11 @@ LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx)
" | storeHead:%i [sn:%i]\n",
store_idx, req->paddr, data, storeHead,
storeQueue[store_idx].inst->seqNum);
/*
if (req->flags & LOCKED) {
if (req->flags & UNCACHEABLE) {
req->result = 2;
} else {
req->result = 1;
}
}
*/
storeQueue[store_idx].req = req;
storeQueue[store_idx].size = sizeof(T);
storeQueue[store_idx].data = data;
/*
Addr debug_addr = ULL(0xfffffc0000be81a8);
if (req->vaddr == debug_addr) {
debug_break();
}
*/
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;

View file

@ -35,8 +35,8 @@ LSQUnit<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx,
Event *wb_event,
LSQUnit<Impl> *lsq_ptr)
: Event(&mainEventQueue),
storeIdx(store_idx),
wbEvent(wb_event),
storeIdx(store_idx),
lsqPtr(lsq_ptr)
{
this->setFlags(Event::AutoDelete);
@ -86,15 +86,13 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
lsqID = id;
LQEntries = maxLQEntries;
SQEntries = maxSQEntries;
// Add 1 for the sentinel entry (they are circular queues).
LQEntries = maxLQEntries + 1;
SQEntries = maxSQEntries + 1;
loadQueue.resize(LQEntries);
storeQueue.resize(SQEntries);
// May want to initialize these entries to NULL
loadHead = loadTail = 0;
storeHead = storeWBIdx = storeTail = 0;
@ -104,7 +102,7 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
dcacheInterface = params->dcacheInterface;
loadFaultInst = storeFaultInst = memDepViolator = NULL;
memDepViolator = NULL;
blockedLoadSeqNum = 0;
}
@ -152,6 +150,8 @@ LSQUnit<Impl>::switchOut()
for (int i = 0; i < loadQueue.size(); ++i)
loadQueue[i] = NULL;
assert(storesToWB == 0);
while (storesToWB > 0 &&
storeWBIdx != storeTail &&
storeQueue[storeWBIdx].inst &&
@ -218,7 +218,7 @@ LSQUnit<Impl>::takeOverFrom()
usedPorts = 0;
loadFaultInst = storeFaultInst = memDepViolator = NULL;
memDepViolator = NULL;
blockedLoadSeqNum = 0;
@ -231,16 +231,17 @@ template<class Impl>
void
LSQUnit<Impl>::resizeLQ(unsigned size)
{
assert( size >= LQEntries);
unsigned size_plus_sentinel = size + 1;
assert(size_plus_sentinel >= LQEntries);
if (size > LQEntries) {
while (size > loadQueue.size()) {
if (size_plus_sentinel > LQEntries) {
while (size_plus_sentinel > loadQueue.size()) {
DynInstPtr dummy;
loadQueue.push_back(dummy);
LQEntries++;
}
} else {
LQEntries = size;
LQEntries = size_plus_sentinel;
}
}
@ -249,14 +250,15 @@ template<class Impl>
void
LSQUnit<Impl>::resizeSQ(unsigned size)
{
if (size > SQEntries) {
while (size > storeQueue.size()) {
unsigned size_plus_sentinel = size + 1;
if (size_plus_sentinel > SQEntries) {
while (size_plus_sentinel > storeQueue.size()) {
SQEntry dummy;
storeQueue.push_back(dummy);
SQEntries++;
}
} else {
SQEntries = size;
SQEntries = size_plus_sentinel;
}
}
@ -264,10 +266,8 @@ template <class Impl>
void
LSQUnit<Impl>::insert(DynInstPtr &inst)
{
// Make sure we really have a memory reference.
assert(inst->isMemRef());
// Make sure it's one of the two classes of memory references.
assert(inst->isLoad() || inst->isStore());
if (inst->isLoad()) {
@ -283,7 +283,8 @@ template <class Impl>
void
LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
{
assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries);
assert((loadTail + 1) % LQEntries != loadHead);
assert(loads < LQEntries);
DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
load_inst->readPC(), loadTail, load_inst->seqNum);
@ -322,7 +323,6 @@ LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
incrStIdx(storeTail);
++stores;
}
template <class Impl>
@ -370,39 +370,6 @@ LSQUnit<Impl>::numLoadsReady()
return retval;
}
#if 0
template <class Impl>
Fault
LSQUnit<Impl>::executeLoad()
{
Fault load_fault = NoFault;
DynInstPtr load_inst;
assert(readyLoads.size() != 0);
// Execute a ready load.
LdMapIt ready_it = readyLoads.begin();
load_inst = (*ready_it).second;
// Execute the instruction, which is held in the data portion of the
// iterator.
load_fault = load_inst->execute();
// If it executed successfully, then switch it over to the executed
// loads list.
if (load_fault == NoFault) {
executedLoads[load_inst->seqNum] = load_inst;
readyLoads.erase(ready_it);
} else {
loadFaultInst = load_inst;
}
return load_fault;
}
#endif
template <class Impl>
Fault
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
@ -413,33 +380,14 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
inst->readPC(),inst->seqNum);
// Make sure it's really in the list.
// Normally it should always be in the list. However,
/* due to a syscall it may not be the list.
#ifdef DEBUG
int i = loadHead;
while (1) {
if (i == loadTail && !find(inst)) {
assert(0 && "Load not in the queue!");
} else if (loadQueue[i] == inst) {
break;
}
i = i + 1;
if (i >= LQEntries) {
i = 0;
}
}
#endif // DEBUG*/
// load_fault = inst->initiateAcc();
load_fault = inst->execute();
// If the instruction faulted, then we need to send it along to commit
// without the instruction completing.
if (load_fault != NoFault) {
// Maybe just set it as can commit here, although that might cause
// some other problems with sending traps to the ROB too quickly.
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
}
@ -447,20 +395,6 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
return load_fault;
}
template <class Impl>
Fault
LSQUnit<Impl>::executeLoad(int lq_idx)
{
// Very hackish. Not sure the best way to check that this
// instruction is at the head of the ROB. I should have some sort
// of extra information here so that I'm not overloading the
// canCommit signal for 15 different things.
loadQueue[lq_idx]->setCanCommit();
Fault ret_fault = executeLoad(loadQueue[lq_idx]);
loadQueue[lq_idx]->clearCanCommit();
return ret_fault;
}
template <class Impl>
Fault
LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
@ -481,11 +415,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
Fault store_fault = store_inst->initiateAcc();
// Fault store_fault = store_inst->execute();
// Store size should now be available. Use it to get proper offset for
// addr comparisons.
int size = storeQueue[store_idx].size;
if (size == 0) {
if (storeQueue[store_idx].size == 0) {
DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
store_inst->readPC(),store_inst->seqNum);
@ -494,30 +424,25 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
assert(store_fault == NoFault);
if (!storeFaultInst) {
if (store_fault != NoFault) {
panic("Fault in a store instruction!");
storeFaultInst = store_inst;
} else if (store_inst->isNonSpeculative()) {
// Nonspeculative accesses (namely store conditionals)
// need to set themselves as able to writeback if we
// haven't had a fault by here.
storeQueue[store_idx].canWB = true;
if (store_inst->isNonSpeculative()) {
// Nonspeculative accesses (namely store conditionals)
// need to set themselves as able to writeback if we
// haven't had a fault by here.
storeQueue[store_idx].canWB = true;
++storesToWB;
}
++storesToWB;
}
if (!memDepViolator) {
while (load_idx != loadTail) {
// Actually should only check loads that have actually executed
// Might be safe because effAddr is set to InvalAddr when the
// dyn inst is created.
// Really only need to check loads that have actually executed
// It's safe to check all loads because effAddr is set to
// InvalAddr when the dyn inst is created.
// @todo: For now this is extra conservative, detecting a
// violation if the addresses match assuming all accesses
// are quad word accesses.
// Must actually check all addrs in the proper size range
// Which is more correct than needs to be. What if for now we just
// assume all loads are quad-word loads, and do the addr based
// on that.
// @todo: Fix this, magic number being used here
if ((loadQueue[load_idx]->effAddr >> 8) ==
(store_inst->effAddr >> 8)) {
@ -555,32 +480,6 @@ LSQUnit<Impl>::commitLoad()
--loads;
}
template <class Impl>
void
LSQUnit<Impl>::commitLoad(InstSeqNum &inst)
{
// Hopefully I don't use this function too much
panic("Don't use this function!");
int i = loadHead;
while (1) {
if (i == loadTail) {
assert(0 && "Load not in the queue!");
} else if (loadQueue[i]->seqNum == inst) {
break;
}
++i;
if (i >= LQEntries) {
i = 0;
}
}
loadQueue[i]->removeInLSQ();
loadQueue[i] = NULL;
--loads;
}
template <class Impl>
void
LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
@ -602,6 +501,8 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
while (store_idx != storeTail) {
assert(storeQueue[store_idx].inst);
// Mark any stores that are now committed and have not yet
// been marked as able to write back.
if (!storeQueue[store_idx].canWB) {
if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
break;
@ -613,7 +514,6 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
storeQueue[store_idx].canWB = true;
// --stores;
++storesToWB;
}
@ -631,6 +531,8 @@ LSQUnit<Impl>::writebackStores()
storeQueue[storeWBIdx].canWB &&
usedPorts < cachePorts) {
// Store didn't write any data so no need to write it back to
// memory.
if (storeQueue[storeWBIdx].size == 0) {
completeStore(storeWBIdx);
@ -659,7 +561,6 @@ LSQUnit<Impl>::writebackStores()
MemReqPtr req = storeQueue[storeWBIdx].req;
storeQueue[storeWBIdx].committed = true;
// Fault fault = cpu->translateDataWriteReq(req);
req->cmd = Write;
req->completionEvent = NULL;
req->time = curTick;
@ -689,6 +590,12 @@ LSQUnit<Impl>::writebackStores()
default:
panic("Unexpected store size!\n");
}
// Stores other than store conditionals are completed at this
// time. Mark them as completed and, if we have a checker,
// tell it that the instruction is completed.
// @todo: Figure out what time I can say stores are complete in
// the timing memory.
if (!(req->flags & LOCKED)) {
storeQueue[storeWBIdx].inst->setCompleted();
if (cpu->checker) {
@ -714,57 +621,35 @@ LSQUnit<Impl>::writebackStores()
iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
}
if (result != MA_HIT && dcacheInterface->doEvents()) {
typename IEW::LdWritebackEvent *wb = NULL;
if (req->flags & LOCKED) {
// Stx_C should not generate a system port transaction,
// but that might be hard to accomplish.
wb = new typename
IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
typename IEW::LdWritebackEvent *wb = NULL;
if (req->flags & LOCKED) {
// Stx_C should not generate a system port transaction
// if it misses in the cache, but that might be hard
// to accomplish without explicit cache support.
wb = new typename
IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
iewStage);
store_event->wbEvent = wb;
}
store_event->wbEvent = wb;
}
DPRINTF(LSQUnit,"D-Cache Write Miss!\n");
if (result != MA_HIT && dcacheInterface->doEvents()) {
DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
storeWBIdx);
DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
storeQueue[storeWBIdx].inst->seqNum);
lastDcacheStall = curTick;
// _status = DcacheMissStall;
//mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
//DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
// Increment stat here or something
// @todo: Increment stat here.
} else {
DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
storeWBIdx);
DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
storeQueue[storeWBIdx].inst->seqNum);
if (req->flags & LOCKED) {
// Stx_C does not generate a system port transaction.
/*
if (req->flags & UNCACHEABLE) {
req->result = 2;
} else {
if (cpu->lockFlag && cpu->lockAddr == req->paddr) {
req->result=1;
} else {
req->result = 0;
}
}
*/
typename IEW::LdWritebackEvent *wb =
new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
iewStage);
store_event->wbEvent = wb;
}
}
incrStIdx(storeWBIdx);
@ -798,14 +683,12 @@ void
LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
{
DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
"(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
int load_idx = loadTail;
decrLdIdx(load_idx);
while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
// Clear the smart pointer to make sure it is decremented.
DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
"[sn:%lli]\n",
loadQueue[load_idx]->readPC(),
@ -817,6 +700,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
stallingLoadIdx = 0;
}
// Clear the smart pointer to make sure it is decremented.
loadQueue[load_idx]->squashed = true;
loadQueue[load_idx] = NULL;
--loads;
@ -840,19 +724,18 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
while (stores != 0 &&
storeQueue[store_idx].inst->seqNum > squashed_num) {
// Instructions marked as can WB are already committed.
if (storeQueue[store_idx].canWB) {
break;
}
// Clear the smart pointer to make sure it is decremented.
DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
"idx:%i [sn:%lli]\n",
storeQueue[store_idx].inst->readPC(),
store_idx, storeQueue[store_idx].inst->seqNum);
// I don't think this can happen. It should have been cleared by the
// stalling load.
// I don't think this can happen. It should have been cleared
// by the stalling load.
if (isStalled() &&
storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
panic("Is stalled should have been cleared by stalling load!\n");
@ -860,13 +743,17 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
stallingStoreIsn = 0;
}
// Clear the smart pointer to make sure it is decremented.
storeQueue[store_idx].inst->squashed = true;
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;
if (storeQueue[store_idx].req) {
// There should not be a completion event if the store has
// not yet committed.
assert(!storeQueue[store_idx].req->completionEvent);
}
storeQueue[store_idx].req = NULL;
--stores;
@ -877,36 +764,6 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
}
template <class Impl>
void
LSQUnit<Impl>::dumpInsts()
{
cprintf("Load store queue: Dumping instructions.\n");
cprintf("Load queue size: %i\n", loads);
cprintf("Load queue: ");
int load_idx = loadHead;
while (load_idx != loadTail && loadQueue[load_idx]) {
cprintf("%#x ", loadQueue[load_idx]->readPC());
incrLdIdx(load_idx);
}
cprintf("Store queue size: %i\n", stores);
cprintf("Store queue: ");
int store_idx = storeHead;
while (store_idx != storeTail && storeQueue[store_idx].inst) {
cprintf("%#x ", storeQueue[store_idx].inst->readPC());
incrStIdx(store_idx);
}
cprintf("\n");
}
template <class Impl>
void
LSQUnit<Impl>::completeStore(int store_idx)
@ -930,7 +787,9 @@ LSQUnit<Impl>::completeStore(int store_idx)
iewStage->updateLSQNextCycle = true;
}
DPRINTF(LSQUnit, "Store head idx:%i\n", storeHead);
DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
"idx:%i\n",
storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
if (isStalled() &&
storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
@ -943,6 +802,10 @@ LSQUnit<Impl>::completeStore(int store_idx)
}
storeQueue[store_idx].inst->setCompleted();
// Tell the checker we've completed this instruction. Some stores
// may get reported twice to the checker, but the checker can
// handle that case.
if (cpu->checker) {
cpu->checker->tick(storeQueue[store_idx].inst);
}
@ -979,3 +842,33 @@ LSQUnit<Impl>::decrLdIdx(int &load_idx)
if (--load_idx < 0)
load_idx += LQEntries;
}
template <class Impl>
void
LSQUnit<Impl>::dumpInsts()
{
cprintf("Load store queue: Dumping instructions.\n");
cprintf("Load queue size: %i\n", loads);
cprintf("Load queue: ");
int load_idx = loadHead;
while (load_idx != loadTail && loadQueue[load_idx]) {
cprintf("%#x ", loadQueue[load_idx]->readPC());
incrLdIdx(load_idx);
}
cprintf("Store queue size: %i\n", stores);
cprintf("Store queue: ");
int store_idx = storeHead;
while (store_idx != storeTail && storeQueue[store_idx].inst) {
cprintf("%#x ", storeQueue[store_idx].inst->readPC());
incrStIdx(store_idx);
}
cprintf("\n");
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -201,13 +201,6 @@ class MemDepUnit {
static int memdep_erase;
};
struct ltMemDepEntry {
bool operator() (const MemDepEntryPtr &lhs, const MemDepEntryPtr &rhs)
{
return lhs->inst->seqNum < rhs->inst->seqNum;
}
};
/** Finds the memory dependence entry in the hash map. */
inline MemDepEntryPtr &findInHash(const DynInstPtr &inst);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -141,12 +141,12 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
MemDepEntry::memdep_insert++;
// Add the instruction to the instruction list.
instList[tid].push_back(inst);
inst_entry->listIt = --(instList[tid].end());
// Check the dependence predictor for any producing stores.
// Check any barriers and the dependence predictor for any
// producing stores.
InstSeqNum producing_store;
if (inst->isLoad() && loadBarrier) {
producing_store = loadBarrierSN;
@ -181,7 +181,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
moveToReady(inst_entry);
}
} else {
// Otherwise make the instruction dependent on the store.
// Otherwise make the instruction dependent on the store/barrier.
DPRINTF(MemDepUnit, "Adding to dependency list; "
"inst PC %#x is dependent on [sn:%lli].\n",
inst->readPC(), producing_store);
@ -193,8 +193,6 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
// Add this instruction to the list of dependents.
store_entry->dependInsts.push_back(inst_entry);
// inst_entry->producingStore = store_entry;
if (inst->isLoad()) {
++conflictingLoads;
} else {
@ -370,8 +368,6 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
instList[tid].erase((*hash_it).second->listIt);
// (*hash_it).second->inst = NULL;
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
@ -416,7 +412,6 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
if (!woken_inst->inst) {
// Potentially removed mem dep entries could be on this list
// inst_entry->dependInsts[i] = NULL;
continue;
}
@ -429,7 +424,6 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
} else {
woken_inst->memDepReady = true;
}
// inst_entry->dependInsts[i] = NULL;
}
inst_entry->dependInsts.clear();
@ -468,13 +462,7 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
assert(hash_it != memDepHash.end());
(*hash_it).second->squashed = true;
/*
for (int i = 0; i < (*hash_it).second->dependInsts.size(); ++i) {
(*hash_it).second->dependInsts[i] = NULL;
}
(*hash_it).second->inst = NULL;
*/
(*hash_it).second = NULL;
memDepHash.erase(hash_it);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -35,15 +35,16 @@
#include "base/timebuf.hh"
/**
* DefaultRename handles both single threaded and SMT rename. Its width is
* specified by the parameters; each cycle it tries to rename that many
* instructions. It holds onto the rename history of all instructions with
* destination registers, storing the arch. register, the new physical
* register, and the old physical register, to allow for undoing of mappings
* if squashing happens, or freeing up registers upon commit. Rename handles
* blocking if the ROB, IQ, or LSQ is going to be full. Rename also handles
* barriers, and does so by stalling on the instruction until the ROB is
* empty and there are no instructions in flight to the ROB.
* DefaultRename handles both single threaded and SMT rename. Its
* width is specified by the parameters; each cycle it tries to rename
* that many instructions. It holds onto the rename history of all
* instructions with destination registers, storing the
* arch. register, the new physical register, and the old physical
* register, to allow for undoing of mappings if squashing happens, or
* freeing up registers upon commit. Rename handles blocking if the
* ROB, IQ, or LSQ is going to be full. Rename also handles barriers,
* and does so by stalling on the instruction until the ROB is empty
* and there are no instructions in flight to the ROB.
*/
template<class Impl>
class DefaultRename
@ -68,14 +69,15 @@ class DefaultRename
// Typedefs from the ISA.
typedef TheISA::RegIndex RegIndex;
// A deque is used to queue the instructions. Barrier insts must be
// added to the front of the deque, which is the only reason for using
// a deque instead of a queue. (Most other stages use a queue)
// A list is used to queue the instructions. Barrier insts must
// be added to the front of the list, which is the only reason for
// using a list instead of a queue. (Most other stages use a
// queue)
typedef std::list<DynInstPtr> InstQueue;
public:
/** Overall rename status. Used to determine if the CPU can deschedule
* itself due to a lack of activity.
/** Overall rename status. Used to determine if the CPU can
* deschedule itself due to a lack of activity.
*/
enum RenameStatus {
Active,

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -209,17 +209,13 @@ template <class Impl>
void
DefaultRename<Impl>::initStage()
{
// Grab the number of free entries directly from the stages.
for (int tid=0; tid < numThreads; tid++) {
freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid);
freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid);
freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid);
emptyROB[tid] = true;
}
// Clear these pointers so they are not accidentally used in
// non-initialization code.
// iew_ptr = NULL;
// commit_ptr = NULL;
}
template<class Impl>
@ -299,6 +295,7 @@ DefaultRename<Impl>::takeOverFrom()
_status = Inactive;
initStage();
// Reset all state prior to taking over from the other CPU.
for (int i=0; i< numThreads; i++) {
renameStatus[i] = Idle;
@ -326,7 +323,7 @@ DefaultRename<Impl>::squash(unsigned tid)
if (renameStatus[tid] == Blocked ||
renameStatus[tid] == Unblocking ||
renameStatus[tid] == SerializeStall) {
#if !FULL_SYSTEM
#if 0
// In syscall emulation, we can have both a block and a squash due
// to a syscall in the same cycle. This would cause both signals to
// be high. This shouldn't happen in full system.
@ -344,7 +341,7 @@ DefaultRename<Impl>::squash(unsigned tid)
// Set the status to Squashing.
renameStatus[tid] = Squashing;
// Clear the skid buffer in case it has any data in it.
// Squash any instructions from decode.
unsigned squashCount = 0;
for (int i=0; i<fromDecode->size; i++) {
@ -367,9 +364,6 @@ template <class Impl>
void
DefaultRename<Impl>::tick()
{
// Rename will need to try to rename as many instructions as it
// has bandwidth, unless it is blocked.
wroteToTimeBuffer = false;
blockThisCycle = false;
@ -454,8 +448,6 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
} else if (renameStatus[tid] == Unblocking) {
renameInsts(tid);
// ++renameUnblockCycles;
if (validInsts()) {
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
@ -575,7 +567,6 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
insts_to_rename.pop_front();
//Use skidBuffer with oldest instructions
if (renameStatus[tid] == Unblocking) {
DPRINTF(Rename,"[tid:%u]: Removing [sn:%lli] PC:%#x from rename "
"skidBuffer\n",
@ -711,10 +702,10 @@ void
DefaultRename<Impl>::sortInsts()
{
int insts_from_decode = fromDecode->size;
#ifdef DEBUG
for (int i=0; i < numThreads; i++)
assert(insts[i].empty());
#endif
for (int i = 0; i < insts_from_decode; ++i) {
DynInstPtr inst = fromDecode->insts[i];
insts[inst->threadNumber].push_back(inst);
@ -794,8 +785,8 @@ DefaultRename<Impl>::block(unsigned tid)
wroteToTimeBuffer = true;
}
// Rename can not go from SerializeStall to Blocked, otherwise it would
// not know to complete the serialize stall.
// Rename can not go from SerializeStall to Blocked, otherwise
// it would not know to complete the serialize stall.
if (renameStatus[tid] != SerializeStall) {
// Set status to Blocked.
renameStatus[tid] = Blocked;
@ -835,15 +826,11 @@ DefaultRename<Impl>::doSquash(unsigned tid)
InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
//#if FULL_SYSTEM
// assert(!historyBuffer[tid].empty());
//#else
// After a syscall squashes everything, the history buffer may be empty
// but the ROB may still be squashing instructions.
if (historyBuffer[tid].empty()) {
return;
}
//#endif // FULL_SYSTEM
// Go through the most recent instructions, undoing the mappings
// they did and freeing up the registers.
@ -896,8 +883,8 @@ DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid)
hb_it != historyBuffer[tid].end() &&
(*hb_it).instSeqNum <= inst_seq_num) {
DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, sequence"
" number %i.\n",
DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, "
"[sn:%lli].\n",
tid, (*hb_it).prevPhysReg, (*hb_it).instSeqNum);
freeList->addReg((*hb_it).prevPhysReg);

View file

@ -32,18 +32,12 @@
using namespace std;
// Todo: Consider making functions inline. Avoid having things that are
// using the zero register or misc registers from adding on the registers
// to the free list. Possibly remove the direct communication between
// this and the freelist. Considering making inline bool functions that
// determine if the register is a logical int, logical fp, physical int,
// physical fp, etc.
// @todo: Consider making inline bool functions that determine if the
// register is a logical int, logical fp, physical int, physical fp,
// etc.
SimpleRenameMap::~SimpleRenameMap()
{
// Delete the rename maps as they were allocated with new.
//delete [] intRenameMap;
//delete [] floatRenameMap;
}
void
@ -105,7 +99,8 @@ SimpleRenameMap::init(unsigned _numLogicalIntRegs,
// Although the index refers purely to architected registers, because
// the floating reg indices come after the integer reg indices, they
// may exceed the size of a normal RegIndex (short).
for (PhysRegIndex index = numLogicalIntRegs; index < numLogicalRegs; ++index)
for (PhysRegIndex index = numLogicalIntRegs;
index < numLogicalRegs; ++index)
{
floatRenameMap[index].physical_reg = freg_idx++;
}
@ -132,14 +127,10 @@ SimpleRenameMap::init(unsigned _numLogicalIntRegs,
void
SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr)
{
//Setup the interface to the freelist.
freeList = fl_ptr;
}
// Don't allow this stage to fault; force that check to the rename stage.
// Simply ask to rename a logical register and get back a new physical
// register index.
SimpleRenameMap::RenameInfo
SimpleRenameMap::rename(RegIndex arch_reg)
{
@ -152,13 +143,11 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// requested architected register.
prev_reg = intRenameMap[arch_reg].physical_reg;
// If it's not referencing the zero register, then mark the register
// as not ready.
// If it's not referencing the zero register, then rename the
// register.
if (arch_reg != intZeroReg) {
// Get a free physical register to rename to.
renamed_reg = freeList->getIntReg();
// Update the integer rename map.
intRenameMap[arch_reg].physical_reg = renamed_reg;
assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs);
@ -168,20 +157,15 @@ SimpleRenameMap::rename(RegIndex arch_reg)
renamed_reg = intZeroReg;
}
} else if (arch_reg < numLogicalRegs) {
// Subtract off the base offset for floating point registers.
// arch_reg = arch_reg - numLogicalIntRegs;
// Record the current physical register that is renamed to the
// requested architected register.
prev_reg = floatRenameMap[arch_reg].physical_reg;
// If it's not referencing the zero register, then mark the register
// as not ready.
// If it's not referencing the zero register, then rename the
// register.
if (arch_reg != floatZeroReg) {
// Get a free floating point register to rename to.
renamed_reg = freeList->getFloatReg();
// Update the floating point rename map.
floatRenameMap[arch_reg].physical_reg = renamed_reg;
assert(renamed_reg < numPhysicalRegs &&
@ -194,10 +178,10 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// Subtract off the base offset for miscellaneous registers.
arch_reg = arch_reg - numLogicalRegs;
// No renaming happens to the misc. registers. They are simply the
// registers that come after all the physical registers; thus
// take the base architected register and add the physical registers
// to it.
// No renaming happens to the misc. registers. They are
// simply the registers that come after all the physical
// registers; thus take the base architected register and add
// the physical registers to it.
renamed_reg = arch_reg + numPhysicalRegs;
// Set the previous register to the same register; mainly it must be
@ -211,17 +195,12 @@ SimpleRenameMap::rename(RegIndex arch_reg)
return RenameInfo(renamed_reg, prev_reg);
}
//Perhaps give this a pair as a return value, of the physical register
//and whether or not it's ready.
PhysRegIndex
SimpleRenameMap::lookup(RegIndex arch_reg)
{
if (arch_reg < numLogicalIntRegs) {
return intRenameMap[arch_reg].physical_reg;
} else if (arch_reg < numLogicalRegs) {
// Subtract off the base FP offset.
// arch_reg = arch_reg - numLogicalIntRegs;
return floatRenameMap[arch_reg].physical_reg;
} else {
// Subtract off the misc registers offset.
@ -233,51 +212,23 @@ SimpleRenameMap::lookup(RegIndex arch_reg)
}
}
// In this implementation the miscellaneous registers do not actually rename,
// so this function does not allow you to try to change their mappings.
void
SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg)
{
// In this implementation the miscellaneous registers do not
// actually rename, so this function does not allow you to try to
// change their mappings.
if (arch_reg < numLogicalIntRegs) {
DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n",
(int)arch_reg, renamed_reg);
intRenameMap[arch_reg].physical_reg = renamed_reg;
} else if (arch_reg < numLogicalIntRegs + numLogicalFloatRegs) {
DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n",
(int)arch_reg - numLogicalIntRegs, renamed_reg);
floatRenameMap[arch_reg].physical_reg = renamed_reg;
}
//assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs));
}
void
SimpleRenameMap::squash(vector<RegIndex> freed_regs,
vector<UnmapInfo> unmaps)
{
panic("Not sure this function should be called.");
// Not sure the rename map should be able to access the free list
// like this.
while (!freed_regs.empty()) {
RegIndex free_register = freed_regs.back();
if (free_register < numPhysicalIntRegs) {
freeList->addIntReg(free_register);
} else {
// Subtract off the base FP dependence tag.
free_register = free_register - numPhysicalIntRegs;
freeList->addFloatReg(free_register);
}
freed_regs.pop_back();
}
// Take unmap info and roll back the rename map.
}
int

View file

@ -101,9 +101,6 @@ class SimpleRenameMap
*/
void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg);
void squash(std::vector<RegIndex> freed_regs,
std::vector<UnmapInfo> unmaps);
int numFreeEntries();
private:
@ -153,7 +150,7 @@ class SimpleRenameMap
};
//Change this to private
public:
private:
/** Integer rename map. */
std::vector<RenameEntry> intRenameMap;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -53,9 +53,7 @@ class ROB
enum Status {
Running,
Idle,
ROBSquashing,
DcacheMissStall,
DcacheMissComplete
ROBSquashing
};
/** SMT ROB Sharing Policy */
@ -112,7 +110,7 @@ class ROB
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the head of the ROB.
*/
DynInstPtr readHeadInst();
// DynInstPtr readHeadInst();
/** Returns a pointer to the head instruction of a specific thread within
* the ROB.
@ -124,7 +122,7 @@ class ROB
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the tail of the ROB.
*/
DynInstPtr readTailInst();
// DynInstPtr readTailInst();
/** Returns a pointer to the tail instruction of a specific thread within
* the ROB.
@ -133,7 +131,7 @@ class ROB
DynInstPtr readTailInst(unsigned tid);
/** Retires the head instruction, removing it from the ROB. */
void retireHead();
// void retireHead();
/** Retires the head instruction of a specific thread, removing it from the
* ROB.
@ -141,7 +139,7 @@ class ROB
void retireHead(unsigned tid);
/** Is the oldest instruction across all threads ready. */
bool isHeadReady();
// bool isHeadReady();
/** Is the oldest instruction across a particular thread ready. */
bool isHeadReady(unsigned tid);
@ -200,35 +198,35 @@ class ROB
void updateTail();
/** Reads the PC of the oldest head instruction. */
uint64_t readHeadPC();
// uint64_t readHeadPC();
/** Reads the PC of the head instruction of a specific thread. */
uint64_t readHeadPC(unsigned tid);
// uint64_t readHeadPC(unsigned tid);
/** Reads the next PC of the oldest head instruction. */
uint64_t readHeadNextPC();
// uint64_t readHeadNextPC();
/** Reads the next PC of the head instruction of a specific thread. */
uint64_t readHeadNextPC(unsigned tid);
// uint64_t readHeadNextPC(unsigned tid);
/** Reads the sequence number of the oldest head instruction. */
InstSeqNum readHeadSeqNum();
// InstSeqNum readHeadSeqNum();
/** Reads the sequence number of the head instruction of a specific thread.
*/
InstSeqNum readHeadSeqNum(unsigned tid);
// InstSeqNum readHeadSeqNum(unsigned tid);
/** Reads the PC of the youngest tail instruction. */
uint64_t readTailPC();
// uint64_t readTailPC();
/** Reads the PC of the tail instruction of a specific thread. */
uint64_t readTailPC(unsigned tid);
// uint64_t readTailPC(unsigned tid);
/** Reads the sequence number of the youngest tail instruction. */
InstSeqNum readTailSeqNum();
// InstSeqNum readTailSeqNum();
/** Reads the sequence number of tail instruction of a specific thread. */
InstSeqNum readTailSeqNum(unsigned tid);
// InstSeqNum readTailSeqNum(unsigned tid);
/** Checks if the ROB is still in the process of squashing instructions.
* @retval Whether or not the ROB is done squashing.

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -201,20 +201,15 @@ template <class Impl>
void
ROB<Impl>::insertInst(DynInstPtr &inst)
{
// Make sure we have the right number of instructions.
//assert(numInstsInROB == countInsts());
// Make sure the instruction is valid.
assert(inst);
DPRINTF(ROB, "Adding inst PC %#x to the ROB.\n", inst->readPC());
// If the ROB is full then exit.
assert(numInstsInROB != numEntries);
int tid = inst->threadNumber;
// Place into ROB
instList[tid].push_back(inst);
//Set Up head iterator if this is the 1st instruction in the ROB
@ -228,10 +223,8 @@ ROB<Impl>::insertInst(DynInstPtr &inst)
tail = instList[tid].end();
tail--;
// Mark as set in ROB
inst->setInROB();
// Increment ROB count
++numInstsInROB;
++threadEntries[tid];
@ -242,6 +235,7 @@ ROB<Impl>::insertInst(DynInstPtr &inst)
// Whatever calls this function needs to ensure that it properly frees up
// registers prior to this function.
/*
template <class Impl>
void
ROB<Impl>::retireHead()
@ -249,7 +243,6 @@ ROB<Impl>::retireHead()
//assert(numInstsInROB == countInsts());
assert(numInstsInROB > 0);
// Get the head ROB instruction's TID.
int tid = (*head)->threadNumber;
retireHead(tid);
@ -258,6 +251,7 @@ ROB<Impl>::retireHead()
tail = instList[tid].end();
}
}
*/
template <class Impl>
void
@ -271,18 +265,15 @@ ROB<Impl>::retireHead(unsigned tid)
DynInstPtr head_inst = (*head_it);
// Make certain this can retire.
assert(head_inst->readyToCommit());
DPRINTF(ROB, "[tid:%u]: Retiring head instruction, "
"instruction PC %#x,[sn:%lli]\n", tid, head_inst->readPC(),
head_inst->seqNum);
// Keep track of how many instructions are in the ROB.
--numInstsInROB;
--threadEntries[tid];
//Mark DynInstFlags
head_inst->removeInROB();
head_inst->setCommitted();
@ -291,12 +282,12 @@ ROB<Impl>::retireHead(unsigned tid)
//Update "Global" Head of ROB
updateHead();
// A special case is needed if the instruction being retired is the
// only instruction in the ROB; otherwise the tail iterator will become
// invalidated.
// @todo: A special case is needed if the instruction being
// retired is the only instruction in the ROB; otherwise the tail
// iterator will become invalidated.
cpu->removeFrontInst(head_inst);
}
/*
template <class Impl>
bool
ROB<Impl>::isHeadReady()
@ -307,7 +298,7 @@ ROB<Impl>::isHeadReady()
return false;
}
*/
template <class Impl>
bool
ROB<Impl>::isHeadReady(unsigned tid)
@ -537,7 +528,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
doSquash(tid);
}
}
/*
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readHeadInst()
@ -549,7 +540,7 @@ ROB<Impl>::readHeadInst()
return dummyInst;
}
}
*/
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readHeadInst(unsigned tid)
@ -564,7 +555,7 @@ ROB<Impl>::readHeadInst(unsigned tid)
return dummyInst;
}
}
/*
template <class Impl>
uint64_t
ROB<Impl>::readHeadPC()
@ -608,7 +599,6 @@ ROB<Impl>::readHeadNextPC(unsigned tid)
return (*head_thread)->readNextPC();
}
template <class Impl>
InstSeqNum
ROB<Impl>::readHeadSeqNum()
@ -637,7 +627,7 @@ ROB<Impl>::readTailInst()
return (*tail);
}
*/
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readTailInst(unsigned tid)
@ -650,7 +640,7 @@ ROB<Impl>::readTailInst(unsigned tid)
return *tail_thread;
}
/*
template <class Impl>
uint64_t
ROB<Impl>::readTailPC()
@ -698,4 +688,4 @@ ROB<Impl>::readTailSeqNum(unsigned tid)
return (*tail_thread)->seqNum;
}
*/

View file

@ -99,6 +99,7 @@ Scoreboard::unsetReg(PhysRegIndex ready_reg)
if (ready_reg == zeroRegIdx ||
ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
// Don't do anything if int or fp zero reg.
return;
}
regScoreBoard[ready_reg] = 0;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -278,11 +278,6 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
void
StoreSet::squash(InstSeqNum squashed_num, unsigned tid)
{
// Not really sure how to do this well.
// Generally this is small enough that it should be okay; short circuit
// evaluation should take care of invalid entries.
// Maybe keep a list of valid LFST's? Really ugly either way...
DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n",
squashed_num);

View file

@ -1,3 +1,30 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_O3_THREAD_STATE_HH__
#define __CPU_O3_THREAD_STATE_HH__
@ -15,27 +42,17 @@ class EndQuiesceEvent;
class FunctionProfile;
class ProfileNode;
#else
class Process;
class FunctionalMemory;
class Process;
#endif
// In the new CPU case this may be quite small...It depends on what I define
// ThreadState to be. Currently it's only the state that exists within
// ExecContext basically. Leaves the interface and manipulation up to the
// CPU. Not sure this is useful/flexible...probably can be if I can avoid
// including state here that parts of the pipeline can't modify directly,
// or at least don't let them. The only problem is for state that's needed
// per thread, per structure. I.e. rename table, memreqs.
// On the other hand, it might be nice to not have to pay the extra pointer
// lookup to get frequently used state such as a memreq (that isn't used much
// elsewhere)...
// Maybe this ozone thread state should only really have committed state?
// I need to think about why I'm using this and what it's useful for. Clearly
// has benefits for SMT; basically serves same use as CPUExecContext.
// Makes the ExecContext proxy easier. Gives organization/central access point
// to state of a thread that can be accessed normally (i.e. not in-flight
// stuff within a OoO processor). Does this need an XC proxy within it?
/**
* Class that has various thread state, such as the status, the
* current instruction being processed, whether or not the thread has
* a trap pending or is being externally updated, the ExecContext
* proxy pointer, etc. It also handles anything related to a specific
* thread's process, such as syscalls and checking valid addresses.
*/
template <class Impl>
struct O3ThreadState : public ThreadState {
typedef ExecContext::Status Status;
@ -43,7 +60,7 @@ struct O3ThreadState : public ThreadState {
Status _status;
// Current instruction?
// Current instruction
TheISA::MachInst inst;
private:
FullCPU *cpu;
@ -80,51 +97,11 @@ struct O3ThreadState : public ThreadState {
void setStatus(Status new_status) { _status = new_status; }
#if !FULL_SYSTEM
Fault dummyTranslation(MemReqPtr &req)
{
#if 0
assert((req->vaddr >> 48 & 0xffff) == 0);
#endif
// put the asid in the upper 16 bits of the paddr
req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
return NoFault;
}
Fault translateInstReq(MemReqPtr &req)
{
return dummyTranslation(req);
}
Fault translateDataReadReq(MemReqPtr &req)
{
return dummyTranslation(req);
}
Fault translateDataWriteReq(MemReqPtr &req)
{
return dummyTranslation(req);
}
bool validInstAddr(Addr addr)
{ return process->validInstAddr(addr); }
bool validDataAddr(Addr addr)
{ return process->validDataAddr(addr); }
#else
Fault translateInstReq(MemReqPtr &req)
{
return cpu->itb->translate(req);
}
Fault translateDataReadReq(MemReqPtr &req)
{
return cpu->dtb->translate(req, false);
}
Fault translateDataWriteReq(MemReqPtr &req)
{
return cpu->dtb->translate(req, true);
}
#endif
bool misspeculating() { return false; }