gem5/cpu/beta_cpu/inst_queue.hh
Kevin Lim 2fb632dbda Check in of various updates to the CPU. Mainly adds in stats, improves
branch prediction, and makes memory dependence work properly.

SConscript:
    Added return address stack, tournament predictor.
cpu/base_cpu.cc:
    Added debug break and print statements.
cpu/base_dyn_inst.cc:
cpu/base_dyn_inst.hh:
    Comment out possibly unneeded variables.
cpu/beta_cpu/2bit_local_pred.cc:
    2bit predictor no longer speculatively updates itself.
cpu/beta_cpu/alpha_dyn_inst.hh:
    Comment formatting.
cpu/beta_cpu/alpha_full_cpu.hh:
    Formatting
cpu/beta_cpu/alpha_full_cpu_builder.cc:
    Added new parameters for branch predictors, and IQ parameters.
cpu/beta_cpu/alpha_full_cpu_impl.hh:
    Register stats.
cpu/beta_cpu/alpha_params.hh:
    Added parameters for IQ, branch predictors, and store sets.
cpu/beta_cpu/bpred_unit.cc:
    Removed one class.
cpu/beta_cpu/bpred_unit.hh:
    Add in RAS, stats.  Changed branch predictor unit functionality
    so that it holds a history of past branches so it can update, and also
    hold a proper history of the RAS so it can be restored on branch
    mispredicts.
cpu/beta_cpu/bpred_unit_impl.hh:
    Added in stats, history of branches, RAS.  Now bpred unit actually
    modifies the instruction's predicted next PC.
cpu/beta_cpu/btb.cc:
    Add in sanity checks.
cpu/beta_cpu/comm.hh:
    Add in communication where needed, remove it where it's not.
cpu/beta_cpu/commit.hh:
cpu/beta_cpu/rename.hh:
cpu/beta_cpu/rename_impl.hh:
    Add in stats.
cpu/beta_cpu/commit_impl.hh:
    Stats, update what is sent back on branch mispredict.
cpu/beta_cpu/cpu_policy.hh:
    Change the bpred unit being used.
cpu/beta_cpu/decode.hh:
cpu/beta_cpu/decode_impl.hh:
    Stats.
cpu/beta_cpu/fetch.hh:
    Stats, change squash so it can handle squashes from decode differently
    than squashes from commit.
cpu/beta_cpu/fetch_impl.hh:
    Add in stats.  Change how a cache line is fetched.  Update to work with
    caches.  Also have separate functions for different behavior if squash
    is coming from decode vs commit.
cpu/beta_cpu/free_list.hh:
    Remove some old comments.
cpu/beta_cpu/full_cpu.cc:
cpu/beta_cpu/full_cpu.hh:
    Added function to remove instructions from back of instruction list
    until a certain sequence number.
cpu/beta_cpu/iew.hh:
    Stats, separate squashing behavior due to branches vs memory.
cpu/beta_cpu/iew_impl.hh:
    Stats, separate squashing behavior for branches vs memory.
cpu/beta_cpu/inst_queue.cc:
    Debug stuff
cpu/beta_cpu/inst_queue.hh:
    Stats, change how mem dep unit works, debug stuff
cpu/beta_cpu/inst_queue_impl.hh:
    Stats, change how mem dep unit works, debug stuff.  Also add in
    parameters that used to be hardcoded.
cpu/beta_cpu/mem_dep_unit.hh:
cpu/beta_cpu/mem_dep_unit_impl.hh:
    Add in stats, change how memory dependence unit works.  It now holds
    the memory instructions that are waiting for their memory dependences
    to resolve.  It provides which instructions are ready directly to the
    IQ.
cpu/beta_cpu/regfile.hh:
    Fix up sanity checks.
cpu/beta_cpu/rename_map.cc:
    Fix loop variable type.
cpu/beta_cpu/rob_impl.hh:
    Remove intermediate DynInstPtr
cpu/beta_cpu/store_set.cc:
    Add in debugging statements.
cpu/beta_cpu/store_set.hh:
    Reorder function arguments to match the rest of the calls.

--HG--
extra : convert_revision : aabf9b1fecd1d743265dfc3b174d6159937c6f44
2004-10-21 18:02:36 -04:00

309 lines
9.6 KiB
C++

#ifndef __INST_QUEUE_HH__
#define __INST_QUEUE_HH__
#include <list>
#include <map>
#include <queue>
#include <stdint.h>
#include <vector>
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
/**
* A standard instruction queue class. It holds instructions in an
* array, holds the ordering of the instructions within a linked list,
* and tracks producer/consumer dependencies within a separate linked
* list. Similar to the rename map and the free list, it expects that
* floating point registers have their indices start after the integer
* registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
* and 96-191 are fp). This remains true even for both logical and
* physical register indices.
*/
template <class Impl>
class InstructionQueue
{
public:
//Typedefs from the Impl.
typedef typename Impl::FullCPU FullCPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::Params Params;
typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
typedef typename Impl::CPUPol::TimeStruct TimeStruct;
// Typedef of iterator through the list of instructions. Might be
// better to untie this from the FullCPU or pass its information to
// the stages.
typedef typename std::list<DynInstPtr>::iterator ListIt;
/**
* Struct for comparing entries to be added to the priority queue. This
* gives reverse ordering to the instructions in terms of sequence
* numbers: the instructions with smaller sequence numbers (and hence
* are older) will be at the top of the priority queue.
*/
struct pqCompare
{
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
{
return lhs->seqNum > rhs->seqNum;
}
};
/**
* Struct for comparing entries to be added to the set. This gives
* standard ordering in terms of sequence numbers.
*/
struct setCompare
{
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
{
return lhs->seqNum < rhs->seqNum;
}
};
typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare>
ReadyInstQueue;
InstructionQueue(Params &params);
void regStats();
void setCPU(FullCPU *cpu);
void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
unsigned numFreeEntries();
bool isFull();
void insert(DynInstPtr &new_inst);
void insertNonSpec(DynInstPtr &new_inst);
void advanceTail(DynInstPtr &inst);
void scheduleReadyInsts();
void scheduleNonSpec(const InstSeqNum &inst);
void wakeDependents(DynInstPtr &completed_inst);
void violation(DynInstPtr &store, DynInstPtr &faulting_load);
// Change this to take in the sequence number
void squash();
void doSquash();
void stopSquash();
/** Debugging function to dump all the list sizes, as well as print
* out the list of nonspeculative instructions. Should not be used
* in any other capacity, but it has no harmful sideaffects.
*/
void dumpLists();
private:
/** Debugging function to count how many entries are in the IQ. It does
* a linear walk through the instructions, so do not call this function
* during normal execution.
*/
int countInsts();
private:
/** Pointer to the CPU. */
FullCPU *cpu;
/** The memory dependence unit, which tracks/predicts memory dependences
* between instructions.
*/
MemDepUnit memDepUnit;
/** The queue to the execute stage. Issued instructions will be written
* into it.
*/
TimeBuffer<IssueStruct> *issueToExecuteQueue;
/** The backwards time buffer. */
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to read information from timebuffer. */
typename TimeBuffer<TimeStruct>::wire fromCommit;
enum InstList {
Int,
Float,
Branch,
Memory,
Misc,
Squashed,
None
};
/** List of ready int instructions. Used to keep track of the order in
* which instructions should issue.
*/
ReadyInstQueue readyIntInsts;
/** List of ready floating point instructions. */
ReadyInstQueue readyFloatInsts;
/** List of ready branch instructions. */
ReadyInstQueue readyBranchInsts;
/** List of ready memory instructions. */
// ReadyInstQueue readyMemInsts;
/** List of ready miscellaneous instructions. */
ReadyInstQueue readyMiscInsts;
/** List of squashed instructions (which are still valid and in IQ).
* Implemented using a priority queue; the entries must contain both
* the IQ index and sequence number of each instruction so that
* ordering based on sequence numbers can be used.
*/
ReadyInstQueue squashedInsts;
/** List of non-speculative instructions that will be scheduled
* once the IQ gets a signal from commit. While it's redundant to
* have the key be a part of the value (the sequence number is stored
* inside of DynInst), when these instructions are woken up only
* the sequence number will be available. Thus it is necessary to be
* able to search by the sequence number alone.
*/
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t;
/** Number of free IQ entries left. */
unsigned freeEntries;
/** The number of entries in the instruction queue. */
unsigned numEntries;
/** The number of integer instructions that can be issued in one
* cycle.
*/
unsigned intWidth;
/** The number of floating point instructions that can be issued
* in one cycle.
*/
unsigned floatWidth;
/** The number of branches that can be issued in one cycle. */
unsigned branchWidth;
/** The number of memory instructions that can be issued in one cycle. */
unsigned memoryWidth;
/** The total number of instructions that can be issued in one cycle. */
unsigned totalWidth;
//The number of physical registers in the CPU.
unsigned numPhysRegs;
/** The number of physical integer registers in the CPU. */
unsigned numPhysIntRegs;
/** The number of floating point registers in the CPU. */
unsigned numPhysFloatRegs;
/** Delay between commit stage and the IQ.
* @todo: Make there be a distinction between the delays within IEW.
*/
unsigned commitToIEWDelay;
//////////////////////////////////
// Variables needed for squashing
//////////////////////////////////
/** The sequence number of the squashed instruction. */
InstSeqNum squashedSeqNum;
/** Iterator that points to the youngest instruction in the IQ. */
ListIt tail;
/** Iterator that points to the last instruction that has been squashed.
* This will not be valid unless the IQ is in the process of squashing.
*/
ListIt squashIt;
///////////////////////////////////
// Dependency graph stuff
///////////////////////////////////
class DependencyEntry
{
public:
DynInstPtr inst;
//Might want to include data about what arch. register the
//dependence is waiting on.
DependencyEntry *next;
//This function, and perhaps this whole class, stand out a little
//bit as they don't fit a classification well. I want access
//to the underlying structure of the linked list, yet at
//the same time it feels like this should be something abstracted
//away. So for now it will sit here, within the IQ, until
//a better implementation is decided upon.
// This function probably shouldn't be within the entry...
void insert(DynInstPtr &new_inst);
void remove(DynInstPtr &inst_to_remove);
// Debug variable, remove when done testing.
static unsigned mem_alloc_counter;
};
/** Array of linked lists. Each linked list is a list of all the
* instructions that depend upon a given register. The actual
* register's index is used to index into the graph; ie all
* instructions in flight that are dependent upon r34 will be
* in the linked list of dependGraph[34].
*/
DependencyEntry *dependGraph;
/** A cache of the recently woken registers. It is 1 if the register
* has been woken up recently, and 0 if the register has been added
* to the dependency graph and has not yet received its value. It
* is basically a secondary scoreboard, and should pretty much mirror
* the scoreboard that exists in the rename map.
*/
vector<bool> regScoreboard;
bool addToDependents(DynInstPtr &new_inst);
void insertDependency(DynInstPtr &new_inst);
void createDependency(DynInstPtr &new_inst);
void dumpDependGraph();
void addIfReady(DynInstPtr &inst);
Stats::Scalar<> iqInstsAdded;
Stats::Scalar<> iqNonSpecInstsAdded;
// Stats::Scalar<> iqIntInstsAdded;
Stats::Scalar<> iqIntInstsIssued;
// Stats::Scalar<> iqFloatInstsAdded;
Stats::Scalar<> iqFloatInstsIssued;
// Stats::Scalar<> iqBranchInstsAdded;
Stats::Scalar<> iqBranchInstsIssued;
// Stats::Scalar<> iqMemInstsAdded;
Stats::Scalar<> iqMemInstsIssued;
// Stats::Scalar<> iqMiscInstsAdded;
Stats::Scalar<> iqMiscInstsIssued;
Stats::Scalar<> iqSquashedInstsIssued;
Stats::Scalar<> iqLoopSquashStalls;
Stats::Scalar<> iqSquashedInstsExamined;
Stats::Scalar<> iqSquashedOperandsExamined;
Stats::Scalar<> iqSquashedNonSpecRemoved;
};
#endif //__INST_QUEUE_HH__