2fb632dbda
branch prediction, and makes memory dependence work properly. SConscript: Added return address stack, tournament predictor. cpu/base_cpu.cc: Added debug break and print statements. cpu/base_dyn_inst.cc: cpu/base_dyn_inst.hh: Comment out possibly unneeded variables. cpu/beta_cpu/2bit_local_pred.cc: 2bit predictor no longer speculatively updates itself. cpu/beta_cpu/alpha_dyn_inst.hh: Comment formatting. cpu/beta_cpu/alpha_full_cpu.hh: Formatting cpu/beta_cpu/alpha_full_cpu_builder.cc: Added new parameters for branch predictors, and IQ parameters. cpu/beta_cpu/alpha_full_cpu_impl.hh: Register stats. cpu/beta_cpu/alpha_params.hh: Added parameters for IQ, branch predictors, and store sets. cpu/beta_cpu/bpred_unit.cc: Removed one class. cpu/beta_cpu/bpred_unit.hh: Add in RAS, stats. Changed branch predictor unit functionality so that it holds a history of past branches so it can update, and also hold a proper history of the RAS so it can be restored on branch mispredicts. cpu/beta_cpu/bpred_unit_impl.hh: Added in stats, history of branches, RAS. Now bpred unit actually modifies the instruction's predicted next PC. cpu/beta_cpu/btb.cc: Add in sanity checks. cpu/beta_cpu/comm.hh: Add in communication where needed, remove it where it's not. cpu/beta_cpu/commit.hh: cpu/beta_cpu/rename.hh: cpu/beta_cpu/rename_impl.hh: Add in stats. cpu/beta_cpu/commit_impl.hh: Stats, update what is sent back on branch mispredict. cpu/beta_cpu/cpu_policy.hh: Change the bpred unit being used. cpu/beta_cpu/decode.hh: cpu/beta_cpu/decode_impl.hh: Stats. cpu/beta_cpu/fetch.hh: Stats, change squash so it can handle squashes from decode differently than squashes from commit. cpu/beta_cpu/fetch_impl.hh: Add in stats. Change how a cache line is fetched. Update to work with caches. Also have separate functions for different behavior if squash is coming from decode vs commit. cpu/beta_cpu/free_list.hh: Remove some old comments. cpu/beta_cpu/full_cpu.cc: cpu/beta_cpu/full_cpu.hh: Added function to remove instructions from back of instruction list until a certain sequence number. cpu/beta_cpu/iew.hh: Stats, separate squashing behavior due to branches vs memory. cpu/beta_cpu/iew_impl.hh: Stats, separate squashing behavior for branches vs memory. cpu/beta_cpu/inst_queue.cc: Debug stuff cpu/beta_cpu/inst_queue.hh: Stats, change how mem dep unit works, debug stuff cpu/beta_cpu/inst_queue_impl.hh: Stats, change how mem dep unit works, debug stuff. Also add in parameters that used to be hardcoded. cpu/beta_cpu/mem_dep_unit.hh: cpu/beta_cpu/mem_dep_unit_impl.hh: Add in stats, change how memory dependence unit works. It now holds the memory instructions that are waiting for their memory dependences to resolve. It provides which instructions are ready directly to the IQ. cpu/beta_cpu/regfile.hh: Fix up sanity checks. cpu/beta_cpu/rename_map.cc: Fix loop variable type. cpu/beta_cpu/rob_impl.hh: Remove intermediate DynInstPtr cpu/beta_cpu/store_set.cc: Add in debugging statements. cpu/beta_cpu/store_set.hh: Reorder function arguments to match the rest of the calls. --HG-- extra : convert_revision : aabf9b1fecd1d743265dfc3b174d6159937c6f44
308 lines
9.6 KiB
C++
308 lines
9.6 KiB
C++
#ifndef __INST_QUEUE_HH__
|
|
#define __INST_QUEUE_HH__
|
|
|
|
#include <list>
|
|
#include <map>
|
|
#include <queue>
|
|
#include <stdint.h>
|
|
#include <vector>
|
|
|
|
#include "base/statistics.hh"
|
|
#include "base/timebuf.hh"
|
|
#include "cpu/inst_seq.hh"
|
|
|
|
/**
|
|
* A standard instruction queue class. It holds instructions in an
|
|
* array, holds the ordering of the instructions within a linked list,
|
|
* and tracks producer/consumer dependencies within a separate linked
|
|
* list. Similar to the rename map and the free list, it expects that
|
|
* floating point registers have their indices start after the integer
|
|
* registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
|
|
* and 96-191 are fp). This remains true even for both logical and
|
|
* physical register indices.
|
|
*/
|
|
template <class Impl>
|
|
class InstructionQueue
|
|
{
|
|
public:
|
|
//Typedefs from the Impl.
|
|
typedef typename Impl::FullCPU FullCPU;
|
|
typedef typename Impl::DynInstPtr DynInstPtr;
|
|
typedef typename Impl::Params Params;
|
|
|
|
typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
|
|
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
|
|
typedef typename Impl::CPUPol::TimeStruct TimeStruct;
|
|
|
|
// Typedef of iterator through the list of instructions. Might be
|
|
// better to untie this from the FullCPU or pass its information to
|
|
// the stages.
|
|
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
|
|
|
/**
|
|
* Struct for comparing entries to be added to the priority queue. This
|
|
* gives reverse ordering to the instructions in terms of sequence
|
|
* numbers: the instructions with smaller sequence numbers (and hence
|
|
* are older) will be at the top of the priority queue.
|
|
*/
|
|
struct pqCompare
|
|
{
|
|
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
|
|
{
|
|
return lhs->seqNum > rhs->seqNum;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Struct for comparing entries to be added to the set. This gives
|
|
* standard ordering in terms of sequence numbers.
|
|
*/
|
|
struct setCompare
|
|
{
|
|
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
|
|
{
|
|
return lhs->seqNum < rhs->seqNum;
|
|
}
|
|
};
|
|
|
|
typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare>
|
|
ReadyInstQueue;
|
|
|
|
InstructionQueue(Params ¶ms);
|
|
|
|
void regStats();
|
|
|
|
void setCPU(FullCPU *cpu);
|
|
|
|
void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
|
|
|
|
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
|
|
|
unsigned numFreeEntries();
|
|
|
|
bool isFull();
|
|
|
|
void insert(DynInstPtr &new_inst);
|
|
|
|
void insertNonSpec(DynInstPtr &new_inst);
|
|
|
|
void advanceTail(DynInstPtr &inst);
|
|
|
|
void scheduleReadyInsts();
|
|
|
|
void scheduleNonSpec(const InstSeqNum &inst);
|
|
|
|
void wakeDependents(DynInstPtr &completed_inst);
|
|
|
|
void violation(DynInstPtr &store, DynInstPtr &faulting_load);
|
|
|
|
// Change this to take in the sequence number
|
|
void squash();
|
|
|
|
void doSquash();
|
|
|
|
void stopSquash();
|
|
|
|
/** Debugging function to dump all the list sizes, as well as print
|
|
* out the list of nonspeculative instructions. Should not be used
|
|
* in any other capacity, but it has no harmful sideaffects.
|
|
*/
|
|
void dumpLists();
|
|
|
|
private:
|
|
/** Debugging function to count how many entries are in the IQ. It does
|
|
* a linear walk through the instructions, so do not call this function
|
|
* during normal execution.
|
|
*/
|
|
int countInsts();
|
|
|
|
private:
|
|
/** Pointer to the CPU. */
|
|
FullCPU *cpu;
|
|
|
|
/** The memory dependence unit, which tracks/predicts memory dependences
|
|
* between instructions.
|
|
*/
|
|
MemDepUnit memDepUnit;
|
|
|
|
/** The queue to the execute stage. Issued instructions will be written
|
|
* into it.
|
|
*/
|
|
TimeBuffer<IssueStruct> *issueToExecuteQueue;
|
|
|
|
/** The backwards time buffer. */
|
|
TimeBuffer<TimeStruct> *timeBuffer;
|
|
|
|
/** Wire to read information from timebuffer. */
|
|
typename TimeBuffer<TimeStruct>::wire fromCommit;
|
|
|
|
enum InstList {
|
|
Int,
|
|
Float,
|
|
Branch,
|
|
Memory,
|
|
Misc,
|
|
Squashed,
|
|
None
|
|
};
|
|
|
|
/** List of ready int instructions. Used to keep track of the order in
|
|
* which instructions should issue.
|
|
*/
|
|
ReadyInstQueue readyIntInsts;
|
|
|
|
/** List of ready floating point instructions. */
|
|
ReadyInstQueue readyFloatInsts;
|
|
|
|
/** List of ready branch instructions. */
|
|
ReadyInstQueue readyBranchInsts;
|
|
|
|
/** List of ready memory instructions. */
|
|
// ReadyInstQueue readyMemInsts;
|
|
|
|
/** List of ready miscellaneous instructions. */
|
|
ReadyInstQueue readyMiscInsts;
|
|
|
|
/** List of squashed instructions (which are still valid and in IQ).
|
|
* Implemented using a priority queue; the entries must contain both
|
|
* the IQ index and sequence number of each instruction so that
|
|
* ordering based on sequence numbers can be used.
|
|
*/
|
|
ReadyInstQueue squashedInsts;
|
|
|
|
/** List of non-speculative instructions that will be scheduled
|
|
* once the IQ gets a signal from commit. While it's redundant to
|
|
* have the key be a part of the value (the sequence number is stored
|
|
* inside of DynInst), when these instructions are woken up only
|
|
* the sequence number will be available. Thus it is necessary to be
|
|
* able to search by the sequence number alone.
|
|
*/
|
|
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
|
|
|
|
typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t;
|
|
|
|
/** Number of free IQ entries left. */
|
|
unsigned freeEntries;
|
|
|
|
/** The number of entries in the instruction queue. */
|
|
unsigned numEntries;
|
|
|
|
/** The number of integer instructions that can be issued in one
|
|
* cycle.
|
|
*/
|
|
unsigned intWidth;
|
|
|
|
/** The number of floating point instructions that can be issued
|
|
* in one cycle.
|
|
*/
|
|
unsigned floatWidth;
|
|
|
|
/** The number of branches that can be issued in one cycle. */
|
|
unsigned branchWidth;
|
|
|
|
/** The number of memory instructions that can be issued in one cycle. */
|
|
unsigned memoryWidth;
|
|
|
|
/** The total number of instructions that can be issued in one cycle. */
|
|
unsigned totalWidth;
|
|
|
|
//The number of physical registers in the CPU.
|
|
unsigned numPhysRegs;
|
|
|
|
/** The number of physical integer registers in the CPU. */
|
|
unsigned numPhysIntRegs;
|
|
|
|
/** The number of floating point registers in the CPU. */
|
|
unsigned numPhysFloatRegs;
|
|
|
|
/** Delay between commit stage and the IQ.
|
|
* @todo: Make there be a distinction between the delays within IEW.
|
|
*/
|
|
unsigned commitToIEWDelay;
|
|
|
|
//////////////////////////////////
|
|
// Variables needed for squashing
|
|
//////////////////////////////////
|
|
|
|
/** The sequence number of the squashed instruction. */
|
|
InstSeqNum squashedSeqNum;
|
|
|
|
/** Iterator that points to the youngest instruction in the IQ. */
|
|
ListIt tail;
|
|
|
|
/** Iterator that points to the last instruction that has been squashed.
|
|
* This will not be valid unless the IQ is in the process of squashing.
|
|
*/
|
|
ListIt squashIt;
|
|
|
|
///////////////////////////////////
|
|
// Dependency graph stuff
|
|
///////////////////////////////////
|
|
|
|
class DependencyEntry
|
|
{
|
|
public:
|
|
DynInstPtr inst;
|
|
//Might want to include data about what arch. register the
|
|
//dependence is waiting on.
|
|
DependencyEntry *next;
|
|
|
|
//This function, and perhaps this whole class, stand out a little
|
|
//bit as they don't fit a classification well. I want access
|
|
//to the underlying structure of the linked list, yet at
|
|
//the same time it feels like this should be something abstracted
|
|
//away. So for now it will sit here, within the IQ, until
|
|
//a better implementation is decided upon.
|
|
// This function probably shouldn't be within the entry...
|
|
void insert(DynInstPtr &new_inst);
|
|
|
|
void remove(DynInstPtr &inst_to_remove);
|
|
|
|
// Debug variable, remove when done testing.
|
|
static unsigned mem_alloc_counter;
|
|
};
|
|
|
|
/** Array of linked lists. Each linked list is a list of all the
|
|
* instructions that depend upon a given register. The actual
|
|
* register's index is used to index into the graph; ie all
|
|
* instructions in flight that are dependent upon r34 will be
|
|
* in the linked list of dependGraph[34].
|
|
*/
|
|
DependencyEntry *dependGraph;
|
|
|
|
/** A cache of the recently woken registers. It is 1 if the register
|
|
* has been woken up recently, and 0 if the register has been added
|
|
* to the dependency graph and has not yet received its value. It
|
|
* is basically a secondary scoreboard, and should pretty much mirror
|
|
* the scoreboard that exists in the rename map.
|
|
*/
|
|
vector<bool> regScoreboard;
|
|
|
|
bool addToDependents(DynInstPtr &new_inst);
|
|
void insertDependency(DynInstPtr &new_inst);
|
|
void createDependency(DynInstPtr &new_inst);
|
|
void dumpDependGraph();
|
|
|
|
void addIfReady(DynInstPtr &inst);
|
|
|
|
Stats::Scalar<> iqInstsAdded;
|
|
Stats::Scalar<> iqNonSpecInstsAdded;
|
|
// Stats::Scalar<> iqIntInstsAdded;
|
|
Stats::Scalar<> iqIntInstsIssued;
|
|
// Stats::Scalar<> iqFloatInstsAdded;
|
|
Stats::Scalar<> iqFloatInstsIssued;
|
|
// Stats::Scalar<> iqBranchInstsAdded;
|
|
Stats::Scalar<> iqBranchInstsIssued;
|
|
// Stats::Scalar<> iqMemInstsAdded;
|
|
Stats::Scalar<> iqMemInstsIssued;
|
|
// Stats::Scalar<> iqMiscInstsAdded;
|
|
Stats::Scalar<> iqMiscInstsIssued;
|
|
Stats::Scalar<> iqSquashedInstsIssued;
|
|
Stats::Scalar<> iqLoopSquashStalls;
|
|
Stats::Scalar<> iqSquashedInstsExamined;
|
|
Stats::Scalar<> iqSquashedOperandsExamined;
|
|
Stats::Scalar<> iqSquashedNonSpecRemoved;
|
|
|
|
};
|
|
|
|
#endif //__INST_QUEUE_HH__
|