diff --git a/SConscript b/SConscript index 07cdcfdee..fb2b40325 100644 --- a/SConscript +++ b/SConscript @@ -91,8 +91,12 @@ base_sources = Split(''' cpu/exetrace.cc cpu/pc_event.cc cpu/static_inst.cc + cpu/beta_cpu/2bit_local_pred.cc cpu/beta_cpu/alpha_dyn_inst.cc cpu/beta_cpu/alpha_full_cpu.cc + cpu/beta_cpu/alpha_full_cpu_builder.cc + cpu/beta_cpu/bpred_unit.cc + cpu/beta_cpu/btb.cc cpu/beta_cpu/commit.cc cpu/beta_cpu/decode.cc cpu/beta_cpu/fetch.cc @@ -100,9 +104,12 @@ base_sources = Split(''' cpu/beta_cpu/full_cpu.cc cpu/beta_cpu/iew.cc cpu/beta_cpu/inst_queue.cc + cpu/beta_cpu/ldstq.cc + cpu/beta_cpu/mem_dep_unit.cc cpu/beta_cpu/rename.cc cpu/beta_cpu/rename_map.cc cpu/beta_cpu/rob.cc + cpu/beta_cpu/store_set.cc cpu/fast_cpu/fast_cpu.cc cpu/full_cpu/bpred.cc cpu/full_cpu/commit.cc diff --git a/arch/isa_parser.py b/arch/isa_parser.py index f7278628b..f86e6193d 100755 --- a/arch/isa_parser.py +++ b/arch/isa_parser.py @@ -638,7 +638,7 @@ CpuModel('FullCPU', 'full_cpu_exec.cc', { 'CPU_exec_context': 'DynInst' }) CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc', '#include "cpu/beta_cpu/alpha_dyn_inst.hh"', - { 'CPU_exec_context': 'AlphaDynInst' }) + { 'CPU_exec_context': 'AlphaDynInst' }) # Expand template with CPU-specific references into a dictionary with # an entry for each CPU model name. The entry key is the model name diff --git a/base/traceflags.py b/base/traceflags.py index 8b4208660..a1fb45177 100644 --- a/base/traceflags.py +++ b/base/traceflags.py @@ -132,6 +132,9 @@ baseFlags = [ 'ROB', 'FreeList', 'RenameMap', + 'LDSTQ', + 'StoreSet', + 'MemDepUnit', 'DynInst', 'FullCPU' ] @@ -150,7 +153,7 @@ compoundFlagMap = { 'DiskImageAll' : [ 'DiskImage', 'DiskImageRead', 'DiskImageWrite' ], 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], - 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'DynInst', 'FullCPU'] + 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LDSTQ', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU'] } ############################################################# diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index bd681e1dc..c527eb08b 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -34,6 +34,7 @@ #include #include "base/cprintf.hh" +#include "base/trace.hh" #include "arch/alpha/faults.hh" #include "cpu/exetrace.hh" @@ -67,12 +68,14 @@ my_hash_t thishash; //int break_inst = -1; -template +template BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, Addr pred_PC, InstSeqNum seq_num, FullCPU *cpu) : staticInst(machInst), traceData(NULL), cpu(cpu), xc(cpu->xcBase()) { + DPRINTF(FullCPU, "DynInst: Creating new DynInst.\n"); + effAddr = MemReq::inval_addr; physEffAddr = MemReq::inval_addr; @@ -123,11 +126,13 @@ BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, ++instcount; +// assert(instcount < 50); + DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", instcount); } -template +template BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) : staticInst(_staticInst), traceData(NULL) { @@ -155,7 +160,7 @@ BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) } } -template +template BaseDynInst::~BaseDynInst() { /* @@ -169,21 +174,21 @@ BaseDynInst::~BaseDynInst() instcount); } -template +template FunctionalMemory * BaseDynInst::getMemory(void) { return xc->mem; } /* -template +template IntReg * BaseDynInst::getIntegerRegs(void) { return (spec_mode ? xc->specIntRegFile : xc->regs.intRegFile); } */ -template +template void BaseDynInst::prefetch(Addr addr, unsigned flags) { @@ -229,7 +234,7 @@ BaseDynInst::prefetch(Addr addr, unsigned flags) } } -template +template void BaseDynInst::writeHint(Addr addr, int size, unsigned flags) { @@ -261,7 +266,7 @@ BaseDynInst::writeHint(Addr addr, int size, unsigned flags) /** * @todo Need to find a way to get the cache block size here. */ -template +template Fault BaseDynInst::copySrcTranslate(Addr src) { @@ -284,7 +289,7 @@ BaseDynInst::copySrcTranslate(Addr src) /** * @todo Need to find a way to get the cache block size here. */ -template +template Fault BaseDynInst::copy(Addr dest) { @@ -308,7 +313,7 @@ BaseDynInst::copy(Addr dest) return fault; } -template +template void BaseDynInst::dump() { @@ -317,7 +322,7 @@ BaseDynInst::dump() cprintf("'\n"); } -template +template void BaseDynInst::dump(std::string &outstring) { @@ -330,7 +335,7 @@ BaseDynInst::dump(std::string &outstring) #if 0 -template +template Fault BaseDynInst::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) { diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 7651b517e..fe30b5195 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -53,12 +53,12 @@ namespace Trace { class InstRecord; }; -class BaseInst -{ -}; +// Forward declaration. +template +class StaticInstPtr; template -class BaseDynInst : public FastAlloc +class BaseDynInst : public FastAlloc, public RefCounted { public: // Typedef for the CPU. @@ -74,7 +74,7 @@ class BaseDynInst : public FastAlloc /// Logical register index type. typedef typename ISA::RegIndex RegIndex; /// Integer register index type. - typedef typename ISA::IntReg IntReg; + typedef typename ISA::IntReg IntReg; enum { MaxInstSrcRegs = ISA::MaxInstSrcRegs, //< Max source regs @@ -430,6 +430,9 @@ class BaseDynInst : public FastAlloc /** Sets this instruction as ready to commit. */ void setCanCommit() { canCommit = true; } + /** Clears this instruction as being ready to commit. */ + void clearCanCommit() { canCommit = false; } + /** Returns whether or not this instruction is ready to commit. */ bool readyToCommit() const { return canCommit; } diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc new file mode 100644 index 000000000..88c39a9b0 --- /dev/null +++ b/cpu/beta_cpu/2bit_local_pred.cc @@ -0,0 +1,110 @@ +#include "base/trace.hh" +#include "cpu/beta_cpu/2bit_local_pred.hh" + +DefaultBP::SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +DefaultBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +DefaultBP::SatCounter::increment() +{ + if(counter < maxVal) { + ++counter; + } +} + +void +DefaultBP::SatCounter::decrement() +{ + if(counter > 0) { + --counter; + } +} + +DefaultBP::DefaultBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) + : localPredictorSize(_localPredictorSize), + localCtrBits(_localCtrBits), + instShiftAmt(_instShiftAmt) +{ + // Should do checks here to make sure sizes are correct (powers of 2). + + // Setup the index mask. + indexMask = localPredictorSize - 1; + + DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); + + // Setup the array of counters for the local predictor. + localCtrs = new SatCounter[localPredictorSize](localCtrBits); + + DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", + localPredictorSize); + + DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits); + + DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n", + instShiftAmt); +} + +inline +bool +DefaultBP::getPrediction(uint8_t &count) +{ + // Get the MSB of the count + return (count >> (localCtrBits - 1)); +} + +inline +unsigned +DefaultBP::getLocalIndex(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & indexMask; +} + +bool +DefaultBP::lookup(Addr &branch_addr) +{ + uint8_t local_prediction; + unsigned local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + local_prediction = localCtrs[local_predictor_idx].read(); + + DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", + (int)local_prediction); + + return getPrediction(local_prediction); +} + +void +DefaultBP::update(Addr &branch_addr, bool taken) +{ + unsigned local_predictor_idx; + + // Update the local predictor. + local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +} diff --git a/cpu/beta_cpu/2bit_local_pred.hh b/cpu/beta_cpu/2bit_local_pred.hh new file mode 100644 index 000000000..32a7972d0 --- /dev/null +++ b/cpu/beta_cpu/2bit_local_pred.hh @@ -0,0 +1,99 @@ +#ifndef __2BIT_LOCAL_PRED_HH__ +#define __2BIT_LOCAL_PRED_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class DefaultBP +{ + public: + /** + * Default branch predictor constructor. + */ + DefaultBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, bool taken); + + private: + + inline bool getPrediction(uint8_t &count); + + inline unsigned getLocalIndex(Addr &PC); + + /** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ + class SatCounter + { + public: + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + uint8_t read() + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; + }; + + /** Array of counters that make up the local predictor. */ + SatCounter *localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Number of bits to shift the PC when calculating index. */ + unsigned instShiftAmt; + + /** Mask to get index bits. */ + unsigned indexMask; +}; + +#endif // __2BIT_LOCAL_PRED_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.cc b/cpu/beta_cpu/alpha_dyn_inst.cc index a79d3082c..1bfcb8420 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.cc +++ b/cpu/beta_cpu/alpha_dyn_inst.cc @@ -1,102 +1,7 @@ -#ifndef __ALPHA_DYN_INST_CC__ -#define __ALPHA_DYN_INST_CC__ -#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_dyn_inst_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" -// Force instantiation of BaseDynInst -template BaseDynInst; - -AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) - : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) -{ - // Initialize these to illegal values. - robIdx = -1; - iqIdx = -1; -} - -AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) - : BaseDynInst(_staticInst) -{ -} - -uint64_t -AlphaDynInst::readUniq() -{ - return cpu->readUniq(); -} - -void -AlphaDynInst::setUniq(uint64_t val) -{ - cpu->setUniq(val); -} - -uint64_t -AlphaDynInst::readFpcr() -{ - return cpu->readFpcr(); -} - -void -AlphaDynInst::setFpcr(uint64_t val) -{ - cpu->setFpcr(val); -} - -#ifdef FULL_SYSTEM -uint64_t -AlphaDynInst::readIpr(int idx, Fault &fault) -{ - return cpu->readIpr(idx, fault); -} -Fault -AlphaDynInst::setIpr(int idx, uint64_t val) -{ - return cpu->setIpr(idx, val); -} - -Fault -AlphaDynInst::hwrei() -{ - return cpu->hwrei(); -} - -int -AlphaDynInst::readIntrFlag() -{ -return cpu->readIntrFlag(); -} - -void -AlphaDynInst::setIntrFlag(int val) -{ - cpu->setIntrFlag(val); -} - -bool -AlphaDynInst::inPalMode() -{ - return cpu->inPalMode(); -} - -void -AlphaDynInst::trap(Fault fault) -{ - cpu->trap(fault); -} - -bool -AlphaDynInst::simPalCheck(int palFunc) -{ - return cpu->simPalCheck(palFunc); -} -#else -void -AlphaDynInst::syscall() -{ - cpu->syscall(); -} -#endif - -#endif // __ALPHA_DYN_INST_CC__ +// Force instantiation of AlphaDynInst for all the implementations that +// are needed. +template AlphaDynInst; diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh index 69d145355..4e1cebd11 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.hh +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -8,10 +8,37 @@ #include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/inst_seq.hh" -using namespace std; +/** + * Mostly implementation specific AlphaDynInst. It is templated in case there + * are other implementations that are similar enough to be able to use this + * class without changes. This is mainly useful if there are multiple similar + * CPU implementations of the same ISA. + */ -class AlphaDynInst : public BaseDynInst +template +class AlphaDynInst : public BaseDynInst { + public: + // Typedef for the CPU. + typedef typename Impl::FullCPU FullCPU; + + //Typedef to get the ISA. + typedef typename Impl::ISA ISA; + + /// Binary machine instruction type. + typedef typename ISA::MachInst MachInst; + /// Memory address type. + typedef typename ISA::Addr Addr; + /// Logical register index type. + typedef typename ISA::RegIndex RegIndex; + /// Integer register index type. + typedef typename ISA::IntReg IntReg; + + enum { + MaxInstSrcRegs = ISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs + }; + public: /** BaseDynInst constructor given a binary instruction. */ AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, @@ -27,40 +54,6 @@ class AlphaDynInst : public BaseDynInst return fault; } - /** Location of this instruction within the ROB. Might be somewhat - * implementation specific. - * Might not want this data in the inst as it may be deleted prior to - * execution of the stage that needs it. - */ - int robIdx; - - int getROBEntry() - { - return robIdx; - } - - void setROBEntry(int rob_idx) - { - robIdx = rob_idx; - } - - /** Location of this instruction within the IQ. Might be somewhat - * implementation specific. - * Might not want this data in the inst as it may be deleted prior to - * execution of the stage that needs it. - */ - int iqIdx; - - int getIQEntry() - { - return iqIdx; - } - - void setIQEntry(int iq_idx) - { - iqIdx = iq_idx; - } - uint64_t readUniq(); void setUniq(uint64_t val); diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh new file mode 100644 index 000000000..8311067db --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh @@ -0,0 +1,109 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +template +AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +{ +} + +template +AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) + : BaseDynInst(_staticInst) +{ +} + +template +uint64_t +AlphaDynInst::readUniq() +{ + return cpu->readUniq(); +} + +template +void +AlphaDynInst::setUniq(uint64_t val) +{ + cpu->setUniq(val); +} + +template +uint64_t +AlphaDynInst::readFpcr() +{ + return cpu->readFpcr(); +} + +template +void +AlphaDynInst::setFpcr(uint64_t val) +{ + cpu->setFpcr(val); +} + +#ifdef FULL_SYSTEM +template +uint64_t +AlphaDynInst::readIpr(int idx, Fault &fault) +{ + return cpu->readIpr(idx, fault); +} + +template +Fault +AlphaDynInst::setIpr(int idx, uint64_t val) +{ + return cpu->setIpr(idx, val); +} + +template +Fault +AlphaDynInst::hwrei() +{ + return cpu->hwrei(); +} + +template +int +AlphaDynInst::readIntrFlag() +{ +return cpu->readIntrFlag(); +} + +template +void +AlphaDynInst::setIntrFlag(int val) +{ + cpu->setIntrFlag(val); +} + +template +bool +AlphaDynInst::inPalMode() +{ + return cpu->inPalMode(); +} + +template +void +AlphaDynInst::trap(Fault fault) +{ + cpu->trap(fault); +} + +template +bool +AlphaDynInst::simPalCheck(int palFunc) +{ + return cpu->simPalCheck(palFunc); +} +#else +template +void +AlphaDynInst::syscall() +{ + cpu->syscall(); +} +#endif + diff --git a/cpu/beta_cpu/alpha_full_cpu.cc b/cpu/beta_cpu/alpha_full_cpu.cc index 880418146..80c4bdec8 100644 --- a/cpu/beta_cpu/alpha_full_cpu.cc +++ b/cpu/beta_cpu/alpha_full_cpu.cc @@ -1,911 +1,9 @@ -#include "base/cprintf.hh" -#include "base/statistics.hh" -#include "base/timebuf.hh" -#include "cpu/full_cpu/dd_queue.hh" -#include "cpu/full_cpu/full_cpu.hh" -#include "cpu/full_cpu/rob_station.hh" -#include "mem/cache/cache.hh" // for dynamic cast -#include "mem/mem_interface.hh" -#include "sim/builder.hh" -#include "sim/sim_events.hh" -#include "sim/stats.hh" - -#include "cpu/beta_cpu/alpha_full_cpu.hh" -#include "cpu/beta_cpu/alpha_params.hh" -#include "cpu/beta_cpu/comm.hh" - -AlphaFullCPU::AlphaFullCPU(Params ¶ms) - : FullBetaCPU(params) -{ - - fetch.setCPU(this); - decode.setCPU(this); - rename.setCPU(this); - iew.setCPU(this); - commit.setCPU(this); - - rob.setCPU(this); -} - -#ifndef FULL_SYSTEM - -void -AlphaFullCPU::syscall() -{ - DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); - - squashStages(); - - // Copy over all important state to xc once all the unrolling is done. - copyToXC(); - - process->syscall(xc); - - // Copy over all important state back to normal. - copyFromXC(); -} - -// This is not a pretty function, and should only be used if it is necessary -// to fake having everything squash all at once (ie for non-full system -// syscalls). -void -AlphaFullCPU::squashStages() -{ - InstSeqNum rob_head = rob.readHeadSeqNum(); - - // Now hack the time buffer to put this sequence number in the places - // where the stages might read it. - for (int i = 0; i < 10; ++i) - { - timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; - } - - fetch.squash(rob.readHeadNextPC()); - fetchQueue.advance(); - - decode.squash(); - decodeQueue.advance(); - - rename.squash(); - renameQueue.advance(); - renameQueue.advance(); - - iew.squash(); - iewQueue.advance(); - iewQueue.advance(); - - rob.squash(rob_head); - commit.setSquashing(); -} - -#endif // FULL_SYSTEM - -void -AlphaFullCPU::copyToXC() -{ - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) - { - renamed_reg = renameMap.lookup(i); - xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg]; - DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", - renamed_reg, regFile.intRegFile[renamed_reg]); - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) - { - renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d; - xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q; - } - - xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; - xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; - xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; - xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; - - xc->regs.pc = rob.readHeadPC(); - xc->regs.npc = xc->regs.pc+4; - - xc->func_exe_inst = funcExeInst; -} - -// This function will probably mess things up unless the ROB is empty and -// there are no instructions in the pipeline. -void -AlphaFullCPU::copyFromXC() -{ - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) - { - renamed_reg = renameMap.lookup(i); - - DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " - "now has data %lli.\n", - renamed_reg, regFile.intRegFile[renamed_reg], - xc->regs.intRegFile[i]); - - regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i]; - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) - { - renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i]; - regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ; - } - - // Then loop through the misc registers. - regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; - regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; - regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; - regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; - - // Then finally set the PC and the next PC. -// regFile.pc = xc->regs.pc; -// regFile.npc = xc->regs.npc; - - funcExeInst = xc->func_exe_inst; -} - -#ifdef FULL_SYSTEM - -uint64_t * -AlphaFullCPU::getIpr() -{ - return regs.ipr; -} - -uint64_t -AlphaFullCPU::readIpr(int idx, Fault &fault) -{ - uint64_t *ipr = getIpr(); - uint64_t retval = 0; // return value, default 0 - - switch (idx) { - case AlphaISA::IPR_PALtemp0: - case AlphaISA::IPR_PALtemp1: - case AlphaISA::IPR_PALtemp2: - case AlphaISA::IPR_PALtemp3: - case AlphaISA::IPR_PALtemp4: - case AlphaISA::IPR_PALtemp5: - case AlphaISA::IPR_PALtemp6: - case AlphaISA::IPR_PALtemp7: - case AlphaISA::IPR_PALtemp8: - case AlphaISA::IPR_PALtemp9: - case AlphaISA::IPR_PALtemp10: - case AlphaISA::IPR_PALtemp11: - case AlphaISA::IPR_PALtemp12: - case AlphaISA::IPR_PALtemp13: - case AlphaISA::IPR_PALtemp14: - case AlphaISA::IPR_PALtemp15: - case AlphaISA::IPR_PALtemp16: - case AlphaISA::IPR_PALtemp17: - case AlphaISA::IPR_PALtemp18: - case AlphaISA::IPR_PALtemp19: - case AlphaISA::IPR_PALtemp20: - case AlphaISA::IPR_PALtemp21: - case AlphaISA::IPR_PALtemp22: - case AlphaISA::IPR_PALtemp23: - case AlphaISA::IPR_PAL_BASE: - - case AlphaISA::IPR_IVPTBR: - case AlphaISA::IPR_DC_MODE: - case AlphaISA::IPR_MAF_MODE: - case AlphaISA::IPR_ISR: - case AlphaISA::IPR_EXC_ADDR: - case AlphaISA::IPR_IC_PERR_STAT: - case AlphaISA::IPR_DC_PERR_STAT: - case AlphaISA::IPR_MCSR: - case AlphaISA::IPR_ASTRR: - case AlphaISA::IPR_ASTER: - case AlphaISA::IPR_SIRR: - case AlphaISA::IPR_ICSR: - case AlphaISA::IPR_ICM: - case AlphaISA::IPR_DTB_CM: - case AlphaISA::IPR_IPLR: - case AlphaISA::IPR_INTID: - case AlphaISA::IPR_PMCTR: - // no side-effect - retval = ipr[idx]; - break; - - case AlphaISA::IPR_CC: - retval |= ipr[idx] & ULL(0xffffffff00000000); - retval |= curTick & ULL(0x00000000ffffffff); - break; - - case AlphaISA::IPR_VA: - retval = ipr[idx]; - break; - - case AlphaISA::IPR_VA_FORM: - case AlphaISA::IPR_MM_STAT: - case AlphaISA::IPR_IFAULT_VA_FORM: - case AlphaISA::IPR_EXC_MASK: - case AlphaISA::IPR_EXC_SUM: - retval = ipr[idx]; - break; - - case AlphaISA::IPR_DTB_PTE: - { - AlphaISA::PTE &pte = dtb->index(!misspeculating()); - - retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; - retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; - retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; - retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; - retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; - retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; - retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; - } - break; - - // write only registers - case AlphaISA::IPR_HWINT_CLR: - case AlphaISA::IPR_SL_XMIT: - case AlphaISA::IPR_DC_FLUSH: - case AlphaISA::IPR_IC_FLUSH: - case AlphaISA::IPR_ALT_MODE: - case AlphaISA::IPR_DTB_IA: - case AlphaISA::IPR_DTB_IAP: - case AlphaISA::IPR_ITB_IA: - case AlphaISA::IPR_ITB_IAP: - fault = Unimplemented_Opcode_Fault; - break; - - default: - // invalid IPR - fault = Unimplemented_Opcode_Fault; - break; - } - - return retval; -} - -Fault -AlphaFullCPU::setIpr(int idx, uint64_t val) -{ - uint64_t *ipr = getIpr(); - uint64_t old; - - if (misspeculating()) - return No_Fault; - - switch (idx) { - case AlphaISA::IPR_PALtemp0: - case AlphaISA::IPR_PALtemp1: - case AlphaISA::IPR_PALtemp2: - case AlphaISA::IPR_PALtemp3: - case AlphaISA::IPR_PALtemp4: - case AlphaISA::IPR_PALtemp5: - case AlphaISA::IPR_PALtemp6: - case AlphaISA::IPR_PALtemp7: - case AlphaISA::IPR_PALtemp8: - case AlphaISA::IPR_PALtemp9: - case AlphaISA::IPR_PALtemp10: - case AlphaISA::IPR_PALtemp11: - case AlphaISA::IPR_PALtemp12: - case AlphaISA::IPR_PALtemp13: - case AlphaISA::IPR_PALtemp14: - case AlphaISA::IPR_PALtemp15: - case AlphaISA::IPR_PALtemp16: - case AlphaISA::IPR_PALtemp17: - case AlphaISA::IPR_PALtemp18: - case AlphaISA::IPR_PALtemp19: - case AlphaISA::IPR_PALtemp20: - case AlphaISA::IPR_PALtemp21: - case AlphaISA::IPR_PALtemp22: - case AlphaISA::IPR_PAL_BASE: - case AlphaISA::IPR_IC_PERR_STAT: - case AlphaISA::IPR_DC_PERR_STAT: - case AlphaISA::IPR_PMCTR: - // write entire quad w/ no side-effect - ipr[idx] = val; - break; - - case AlphaISA::IPR_CC_CTL: - // This IPR resets the cycle counter. We assume this only - // happens once... let's verify that. - assert(ipr[idx] == 0); - ipr[idx] = 1; - break; - - case AlphaISA::IPR_CC: - // This IPR only writes the upper 64 bits. It's ok to write - // all 64 here since we mask out the lower 32 in rpcc (see - // isa_desc). - ipr[idx] = val; - break; - - case AlphaISA::IPR_PALtemp23: - // write entire quad w/ no side-effect - old = ipr[idx]; - ipr[idx] = val; - kernelStats.context(old, val); - break; - - case AlphaISA::IPR_DTB_PTE: - // write entire quad w/ no side-effect, tag is forthcoming - ipr[idx] = val; - break; - - case AlphaISA::IPR_EXC_ADDR: - // second least significant bit in PC is always zero - ipr[idx] = val & ~2; - break; - - case AlphaISA::IPR_ASTRR: - case AlphaISA::IPR_ASTER: - // only write least significant four bits - privilege mask - ipr[idx] = val & 0xf; - break; - - case AlphaISA::IPR_IPLR: -#ifdef DEBUG - if (break_ipl != -1 && break_ipl == (val & 0x1f)) - debug_break(); -#endif - - // only write least significant five bits - interrupt level - ipr[idx] = val & 0x1f; - kernelStats.swpipl(ipr[idx]); - break; - - case AlphaISA::IPR_DTB_CM: - kernelStats.mode((val & 0x18) != 0); - - case AlphaISA::IPR_ICM: - // only write two mode bits - processor mode - ipr[idx] = val & 0x18; - break; - - case AlphaISA::IPR_ALT_MODE: - // only write two mode bits - processor mode - ipr[idx] = val & 0x18; - break; - - case AlphaISA::IPR_MCSR: - // more here after optimization... - ipr[idx] = val; - break; - - case AlphaISA::IPR_SIRR: - // only write software interrupt mask - ipr[idx] = val & 0x7fff0; - break; - - case AlphaISA::IPR_ICSR: - ipr[idx] = val & ULL(0xffffff0300); - break; - - case AlphaISA::IPR_IVPTBR: - case AlphaISA::IPR_MVPTBR: - ipr[idx] = val & ULL(0xffffffffc0000000); - break; - - case AlphaISA::IPR_DC_TEST_CTL: - ipr[idx] = val & 0x1ffb; - break; - - case AlphaISA::IPR_DC_MODE: - case AlphaISA::IPR_MAF_MODE: - ipr[idx] = val & 0x3f; - break; - - case AlphaISA::IPR_ITB_ASN: - ipr[idx] = val & 0x7f0; - break; - - case AlphaISA::IPR_DTB_ASN: - ipr[idx] = val & ULL(0xfe00000000000000); - break; - - case AlphaISA::IPR_EXC_SUM: - case AlphaISA::IPR_EXC_MASK: - // any write to this register clears it - ipr[idx] = 0; - break; - - case AlphaISA::IPR_INTID: - case AlphaISA::IPR_SL_RCV: - case AlphaISA::IPR_MM_STAT: - case AlphaISA::IPR_ITB_PTE_TEMP: - case AlphaISA::IPR_DTB_PTE_TEMP: - // read-only registers - return Unimplemented_Opcode_Fault; - - case AlphaISA::IPR_HWINT_CLR: - case AlphaISA::IPR_SL_XMIT: - case AlphaISA::IPR_DC_FLUSH: - case AlphaISA::IPR_IC_FLUSH: - // the following are write only - ipr[idx] = val; - break; - - case AlphaISA::IPR_DTB_IA: - // really a control write - ipr[idx] = 0; - - dtb->flushAll(); - break; - - case AlphaISA::IPR_DTB_IAP: - // really a control write - ipr[idx] = 0; - - dtb->flushProcesses(); - break; - - case AlphaISA::IPR_DTB_IS: - // really a control write - ipr[idx] = val; - - dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); - break; - - case AlphaISA::IPR_DTB_TAG: { - struct AlphaISA::PTE pte; - - // FIXME: granularity hints NYI... - if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) - panic("PTE GH field != 0"); - - // write entire quad - ipr[idx] = val; - - // construct PTE for new entry - pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); - pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); - pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); - pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); - pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); - pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); - pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); - - // insert new TAG/PTE value into data TLB - dtb->insert(val, pte); - } - break; - - case AlphaISA::IPR_ITB_PTE: { - struct AlphaISA::PTE pte; - - // FIXME: granularity hints NYI... - if (ITB_PTE_GH(val) != 0) - panic("PTE GH field != 0"); - - // write entire quad - ipr[idx] = val; - - // construct PTE for new entry - pte.ppn = ITB_PTE_PPN(val); - pte.xre = ITB_PTE_XRE(val); - pte.xwe = 0; - pte.fonr = ITB_PTE_FONR(val); - pte.fonw = ITB_PTE_FONW(val); - pte.asma = ITB_PTE_ASMA(val); - pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); - - // insert new TAG/PTE value into data TLB - itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); - } - break; - - case AlphaISA::IPR_ITB_IA: - // really a control write - ipr[idx] = 0; - - itb->flushAll(); - break; - - case AlphaISA::IPR_ITB_IAP: - // really a control write - ipr[idx] = 0; - - itb->flushProcesses(); - break; - - case AlphaISA::IPR_ITB_IS: - // really a control write - ipr[idx] = val; - - itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); - break; - - default: - // invalid IPR - return Unimplemented_Opcode_Fault; - } - - // no error... - return No_Fault; - -} - -int -AlphaFullCPU::readIntrFlag() -{ - return regs.intrflag; -} - -void -AlphaFullCPU::setIntrFlag(int val) -{ - regs.intrflag = val; -} - -// Maybe have this send back from IEW stage to squash and update PC. -Fault -AlphaFullCPU::hwrei() -{ - uint64_t *ipr = getIpr(); - - if (!PC_PAL(regs.pc)) - return Unimplemented_Opcode_Fault; - - setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]); - - if (!misspeculating()) { - kernelStats.hwrei(); - - if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) - AlphaISA::swap_palshadow(®s, false); - - AlphaISA::check_interrupts = true; - } - - // FIXME: XXX check for interrupts? XXX - return No_Fault; -} - -bool -AlphaFullCPU::inPalMode() -{ - return PC_PAL(readPC()); -} - -bool -AlphaFullCPU::simPalCheck(int palFunc) -{ - kernelStats.callpal(palFunc); - - switch (palFunc) { - case PAL::halt: - halt(); - if (--System::numSystemsRunning == 0) - new SimExitEvent("all cpus halted"); - break; - - case PAL::bpt: - case PAL::bugchk: - if (system->breakpoint()) - return false; - break; - } - - return true; -} - -// Probably shouldn't be able to switch to the trap handler as quickly as -// this. Also needs to get the exception restart address from the commit -// stage. -void -AlphaFullCPU::trap(Fault fault) -{ - uint64_t PC = commit.readPC(); - - DPRINTF(Fault, "Fault %s\n", FaultName(fault)); - Stats::recordEvent(csprintf("Fault %s", FaultName(fault))); - - assert(!misspeculating()); - kernelStats.fault(fault); - - if (fault == Arithmetic_Fault) - panic("Arithmetic traps are unimplemented!"); - - AlphaISA::InternalProcReg *ipr = getIpr(); - - // exception restart address - Get the commit PC - if (fault != Interrupt_Fault || !PC_PAL(PC)) - ipr[AlphaISA::IPR_EXC_ADDR] = PC; - - if (fault == Pal_Fault || fault == Arithmetic_Fault /* || - fault == Interrupt_Fault && !PC_PAL(regs.pc) */) { - // traps... skip faulting instruction - ipr[AlphaISA::IPR_EXC_ADDR] += 4; - } - - if (!PC_PAL(PC)) - AlphaISA::swap_palshadow(®s, true); - - setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] ); - setNextPC(PC + sizeof(MachInst)); -} - -void -AlphaFullCPU::processInterrupts() -{ - // Check for interrupts here. For now can copy the code that exists - // within isa_fullsys_traits.hh. -} - -// swap_palshadow swaps in the values of the shadow registers and -// swaps them with the values of the physical registers that map to the -// same logical index. -void -AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) -{ - if (palShadowEnabled == use_shadow) - panic("swap_palshadow: wrong PAL shadow state"); - - palShadowEnabled = use_shadow; - - // Will have to lookup in rename map to get physical registers, then - // swap. - for (int i = 0; i < AlphaISA::NumIntRegs; i++) { - if (reg_redir[i]) { - AlphaISA::IntReg temp = regs->intRegFile[i]; - regs->intRegFile[i] = regs->palregs[i]; - regs->palregs[i] = temp; - } - } -} - -#endif // FULL_SYSTEM - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU) - - Param numThreads; - -#ifdef FULL_SYSTEM -SimObjectParam system; -SimObjectParam itb; -SimObjectParam dtb; -Param mult; -#else -SimObjectVectorParam workload; -SimObjectParam process; -Param asid; -#endif // FULL_SYSTEM -SimObjectParam mem; - -Param max_insts_any_thread; -Param max_insts_all_threads; -Param max_loads_any_thread; -Param max_loads_all_threads; - -SimObjectParam icache; -SimObjectParam dcache; - -Param decodeToFetchDelay; -Param renameToFetchDelay; -Param iewToFetchDelay; -Param commitToFetchDelay; -Param fetchWidth; - -Param renameToDecodeDelay; -Param iewToDecodeDelay; -Param commitToDecodeDelay; -Param fetchToDecodeDelay; -Param decodeWidth; - -Param iewToRenameDelay; -Param commitToRenameDelay; -Param decodeToRenameDelay; -Param renameWidth; - -Param commitToIEWDelay; -Param renameToIEWDelay; -Param issueToExecuteDelay; -Param issueWidth; -Param executeWidth; -Param executeIntWidth; -Param executeFloatWidth; - -Param iewToCommitDelay; -Param renameToROBDelay; -Param commitWidth; -Param squashWidth; - -Param numPhysIntRegs; -Param numPhysFloatRegs; -Param numIQEntries; -Param numROBEntries; - -Param defReg; - -END_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU) - -BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU) - - INIT_PARAM(numThreads, "number of HW thread contexts"), - -#ifdef FULL_SYSTEM - INIT_PARAM(system, "System object"), - INIT_PARAM(itb, "Instruction translation buffer"), - INIT_PARAM(dtb, "Data translation buffer"), - INIT_PARAM_DFLT(mult, "System clock multiplier", 1), -#else - INIT_PARAM(workload, "Processes to run"), - INIT_PARAM_DFLT(process, "Process to run", NULL), - INIT_PARAM(asid, "Address space ID"), -#endif // FULL_SYSTEM - - INIT_PARAM_DFLT(mem, "Memory", NULL), - - INIT_PARAM_DFLT(max_insts_any_thread, - "Terminate when any thread reaches this inst count", - 0), - INIT_PARAM_DFLT(max_insts_all_threads, - "Terminate when all threads have reached" - "this inst count", - 0), - INIT_PARAM_DFLT(max_loads_any_thread, - "Terminate when any thread reaches this load count", - 0), - INIT_PARAM_DFLT(max_loads_all_threads, - "Terminate when all threads have reached this load" - "count", - 0), - - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - - INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), - INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), - INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" - "delay"), - INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), - INIT_PARAM(fetchWidth, "Fetch width"), - - INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), - INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" - "delay"), - INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), - INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), - INIT_PARAM(decodeWidth, "Decode width"), - - INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" - "delay"), - INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), - INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), - INIT_PARAM(renameWidth, "Rename width"), - - INIT_PARAM(commitToIEWDelay, "Commit to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(renameToIEWDelay, "Rename to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" - "to the IEW stage)"), - INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - - INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " - "delay"), - INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), - INIT_PARAM(commitWidth, "Commit width"), - INIT_PARAM(squashWidth, "Squash width"), - - INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), - INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " - "registers"), - INIT_PARAM(numIQEntries, "Number of instruction queue entries"), - INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), - - INIT_PARAM(defReg, "Defer registration") - -END_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU) - -CREATE_SIM_OBJECT(AlphaFullCPU) -{ - AlphaFullCPU *cpu; - -#ifdef FULL_SYSTEM - if (mult != 1) - panic("Processor clock multiplier must be 1?\n"); - - // Full-system only supports a single thread for the moment. - int actual_num_threads = 1; -#else - // In non-full-system mode, we infer the number of threads from - // the workload if it's not explicitly specified. - int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); - - if (workload.size() == 0) { - fatal("Must specify at least one workload!"); - } - - Process *actual_process; - - if (process == NULL) { - actual_process = workload[0]; - } else { - actual_process = process; - } - -#endif - - AlphaSimpleParams params; - - params.name = getInstanceName(); - params.numberOfThreads = actual_num_threads; - -#ifdef FULL_SYSTEM - params._system = system; - params.itb = itb; - params.dtb = dtb; - params.freq = ticksPerSecond * mult; -#else - params.workload = workload; - params.process = actual_process; - params.asid = asid; -#endif // FULL_SYSTEM - - params.mem = mem; - - params.maxInstsAnyThread = max_insts_any_thread; - params.maxInstsAllThreads = max_insts_all_threads; - params.maxLoadsAnyThread = max_loads_any_thread; - params.maxLoadsAllThreads = max_loads_all_threads; - - // - // Caches - // - params.icacheInterface = icache ? icache->getInterface() : NULL; - params.dcacheInterface = dcache ? dcache->getInterface() : NULL; - - params.decodeToFetchDelay = decodeToFetchDelay; - params.renameToFetchDelay = renameToFetchDelay; - params.iewToFetchDelay = iewToFetchDelay; - params.commitToFetchDelay = commitToFetchDelay; - params.fetchWidth = fetchWidth; - - params.renameToDecodeDelay = renameToDecodeDelay; - params.iewToDecodeDelay = iewToDecodeDelay; - params.commitToDecodeDelay = commitToDecodeDelay; - params.fetchToDecodeDelay = fetchToDecodeDelay; - params.decodeWidth = decodeWidth; - - params.iewToRenameDelay = iewToRenameDelay; - params.commitToRenameDelay = commitToRenameDelay; - params.decodeToRenameDelay = decodeToRenameDelay; - params.renameWidth = renameWidth; - - params.commitToIEWDelay = commitToIEWDelay; - params.renameToIEWDelay = renameToIEWDelay; - params.issueToExecuteDelay = issueToExecuteDelay; - params.issueWidth = issueWidth; - params.executeWidth = executeWidth; - params.executeIntWidth = executeIntWidth; - params.executeFloatWidth = executeFloatWidth; - - params.iewToCommitDelay = iewToCommitDelay; - params.renameToROBDelay = renameToROBDelay; - params.commitWidth = commitWidth; - params.squashWidth = squashWidth; - - params.numPhysIntRegs = numPhysIntRegs; - params.numPhysFloatRegs = numPhysFloatRegs; - params.numIQEntries = numIQEntries; - params.numROBEntries = numROBEntries; - - params.defReg = defReg; - - cpu = new AlphaFullCPU(params); - - return cpu; -} - -REGISTER_SIM_OBJECT("AlphaFullCPU", AlphaFullCPU) - +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_full_cpu_impl.hh" +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +// Force instantiation of AlphaFullCPU for all the implemntations that are +// needed. Consider merging this and alpha_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (alpha_impl.cc?). +template AlphaFullCPU; diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh index b098aaac1..0e094b122 100644 --- a/cpu/beta_cpu/alpha_full_cpu.hh +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -6,18 +6,19 @@ #ifndef __ALPHA_FULL_CPU_HH__ #define __ALPHA_FULL_CPU_HH__ -// To include: comm, impl, full cpu, ITB/DTB if full sys, -#include "cpu/beta_cpu/comm.hh" -#include "cpu/beta_cpu/alpha_impl.hh" +// To include: comm, full cpu, ITB/DTB if full sys, +//#include "cpu/beta_cpu/comm.hh" +//#include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/beta_cpu/full_cpu.hh" using namespace std; -class AlphaFullCPU : public FullBetaCPU +template +class AlphaFullCPU : public FullBetaCPU { public: - typedef AlphaSimpleImpl::ISA AlphaISA; - typedef AlphaSimpleImpl::Params Params; + typedef typename Impl::ISA AlphaISA; + typedef typename Impl::Params Params; public: AlphaFullCPU(Params ¶ms); diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc new file mode 100644 index 000000000..5fe96d656 --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc @@ -0,0 +1,306 @@ +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" + +#include "mem/cache/base_cache.hh" + +#include "base/inifile.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/debug.hh" +#include "sim/host.hh" +#include "sim/process.hh" +#include "sim/sim_events.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#ifdef FULL_SYSTEM +#include "base/remote_gdb.hh" +#include "dev/alpha_access.h" +#include "dev/pciareg.h" +#include "mem/functional_mem/memory_control.hh" +#include "mem/functional_mem/physical_memory.hh" +#include "sim/system.hh" +#include "targetarch/alpha_memory.hh" +#include "targetarch/vtophys.hh" +#else // !FULL_SYSTEM +#include "eio/eio.hh" +#include "mem/functional_mem/functional_memory.hh" +#endif // FULL_SYSTEM + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU) + + Param numThreads; + +#ifdef FULL_SYSTEM +SimObjectParam system; +SimObjectParam itb; +SimObjectParam dtb; +Param mult; +#else +SimObjectVectorParam workload; +SimObjectParam process; +Param asid; +#endif // FULL_SYSTEM +SimObjectParam mem; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param localPredictorSize; +Param localPredictorCtrBits; +Param BTBEntries; +Param BTBTagSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param instShiftAmt; + +Param defReg; + +END_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) + + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#ifdef FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), + INIT_PARAM_DFLT(mult, "System clock multiplier", 1), +#else + INIT_PARAM(workload, "Processes to run"), + INIT_PARAM_DFLT(process, "Process to run", NULL), + INIT_PARAM(asid, "Address space ID"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. " + "Must be a power of 2."), + INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"), + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + + INIT_PARAM(defReg, "Defer registration") + +END_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) + +CREATE_SIM_OBJECT(BaseFullCPU) +{ + AlphaFullCPU *cpu; + +#ifdef FULL_SYSTEM + if (mult != 1) + panic("Processor clock multiplier must be 1?\n"); + + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + Process *actual_process; + + if (process == NULL) { + actual_process = workload[0]; + } else { + actual_process = process; + } + +#endif + + AlphaSimpleParams params; + + params.name = getInstanceName(); + params.numberOfThreads = actual_num_threads; + +#ifdef FULL_SYSTEM + params._system = system; + params.itb = itb; + params.dtb = dtb; + params.freq = ticksPerSecond * mult; +#else + params.workload = workload; + params.process = actual_process; + params.asid = asid; +#endif // FULL_SYSTEM + + params.mem = mem; + + params.maxInstsAnyThread = max_insts_any_thread; + params.maxInstsAllThreads = max_insts_all_threads; + params.maxLoadsAnyThread = max_loads_any_thread; + params.maxLoadsAllThreads = max_loads_all_threads; + + // + // Caches + // + params.icacheInterface = icache ? icache->getInterface() : NULL; + params.dcacheInterface = dcache ? dcache->getInterface() : NULL; + + params.decodeToFetchDelay = decodeToFetchDelay; + params.renameToFetchDelay = renameToFetchDelay; + params.iewToFetchDelay = iewToFetchDelay; + params.commitToFetchDelay = commitToFetchDelay; + params.fetchWidth = fetchWidth; + + params.renameToDecodeDelay = renameToDecodeDelay; + params.iewToDecodeDelay = iewToDecodeDelay; + params.commitToDecodeDelay = commitToDecodeDelay; + params.fetchToDecodeDelay = fetchToDecodeDelay; + params.decodeWidth = decodeWidth; + + params.iewToRenameDelay = iewToRenameDelay; + params.commitToRenameDelay = commitToRenameDelay; + params.decodeToRenameDelay = decodeToRenameDelay; + params.renameWidth = renameWidth; + + params.commitToIEWDelay = commitToIEWDelay; + params.renameToIEWDelay = renameToIEWDelay; + params.issueToExecuteDelay = issueToExecuteDelay; + params.issueWidth = issueWidth; + params.executeWidth = executeWidth; + params.executeIntWidth = executeIntWidth; + params.executeFloatWidth = executeFloatWidth; + + params.iewToCommitDelay = iewToCommitDelay; + params.renameToROBDelay = renameToROBDelay; + params.commitWidth = commitWidth; + params.squashWidth = squashWidth; + + params.localPredictorSize = localPredictorSize; + params.localPredictorCtrBits = localPredictorCtrBits; + params.BTBEntries = BTBEntries; + params.BTBTagSize = BTBTagSize; + + params.numPhysIntRegs = numPhysIntRegs; + params.numPhysFloatRegs = numPhysFloatRegs; + params.numIQEntries = numIQEntries; + params.numROBEntries = numROBEntries; + + params.instShiftAmt = 2; + + params.defReg = defReg; + + cpu = new AlphaFullCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("AlphaFullCPU", BaseFullCPU) + diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh new file mode 100644 index 000000000..8bfc0777e --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -0,0 +1,690 @@ + +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "mem/cache/cache.hh" // for dynamic cast +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/alpha_params.hh" +#include "cpu/beta_cpu/comm.hh" + +template +AlphaFullCPU::AlphaFullCPU(Params ¶ms) + : FullBetaCPU(params) +{ + DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + + fetch.setCPU(this); + decode.setCPU(this); + rename.setCPU(this); + iew.setCPU(this); + commit.setCPU(this); + + rob.setCPU(this); +} + +#ifndef FULL_SYSTEM + +template +void +AlphaFullCPU::syscall() +{ + DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); + + // Commit stage needs to run as well. + commit.tick(); + + squashStages(); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++funcExeInst; + + // Copy over all important state to xc once all the unrolling is done. + copyToXC(); + + process->syscall(xc); + + // Copy over all important state back to CPU. + copyFromXC(); + + // Decrease funcExeInst by one as the normal commit will handle + // incrememnting it. + --funcExeInst; +} + +// This is not a pretty function, and should only be used if it is necessary +// to fake having everything squash all at once (ie for non-full system +// syscalls). Maybe put this at the FullCPU level? +template +void +AlphaFullCPU::squashStages() +{ + InstSeqNum rob_head = rob.readHeadSeqNum(); + + // Now hack the time buffer to put this sequence number in the places + // where the stages might read it. + for (int i = 0; i < 5; ++i) + { + timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + } + + fetch.squash(rob.readHeadNextPC()); + fetchQueue.advance(); + + decode.squash(); + decodeQueue.advance(); + + rename.squash(); + renameQueue.advance(); + renameQueue.advance(); + + // Be sure to advance the IEW queues so that the commit stage doesn't + // try to set an instruction as completed at the same time that it + // might be deleting it. + iew.squash(); + iewQueue.advance(); + iewQueue.advance(); + + rob.squash(rob_head); + commit.setSquashing(); +} + +#endif // FULL_SYSTEM + +template +void +AlphaFullCPU::copyToXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg]; + DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg]); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d; + xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q; + } + + xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; + xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; + xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; + xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; + + xc->regs.pc = rob.readHeadPC(); + xc->regs.npc = xc->regs.pc+4; + + xc->func_exe_inst = funcExeInst; +} + +// This function will probably mess things up unless the ROB is empty and +// there are no instructions in the pipeline. +template +void +AlphaFullCPU::copyFromXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + + DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg], + xc->regs.intRegFile[i]); + + regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i]; + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i]; + regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ; + } + + // Then loop through the misc registers. + regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + + // Then finally set the PC and the next PC. +// regFile.pc = xc->regs.pc; +// regFile.npc = xc->regs.npc; + + funcExeInst = xc->func_exe_inst; +} + +#ifdef FULL_SYSTEM + +template +uint64_t * +AlphaFullCPU::getIpr() +{ + return regs.ipr; +} + +template +uint64_t +AlphaFullCPU::readIpr(int idx, Fault &fault) +{ + uint64_t *ipr = getIpr(); + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PALtemp23: + case AlphaISA::IPR_PAL_BASE: + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + case AlphaISA::IPR_ISR: + case AlphaISA::IPR_EXC_ADDR: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_MCSR: + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + case AlphaISA::IPR_SIRR: + case AlphaISA::IPR_ICSR: + case AlphaISA::IPR_ICM: + case AlphaISA::IPR_DTB_CM: + case AlphaISA::IPR_IPLR: + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case AlphaISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; + + case AlphaISA::IPR_VA: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_VA_FORM: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_IFAULT_VA_FORM: + case AlphaISA::IPR_EXC_MASK: + case AlphaISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_DTB_PTE: + { + AlphaISA::PTE &pte = dtb->index(!misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + case AlphaISA::IPR_ALT_MODE: + case AlphaISA::IPR_DTB_IA: + case AlphaISA::IPR_DTB_IAP: + case AlphaISA::IPR_ITB_IA: + case AlphaISA::IPR_ITB_IAP: + fault = Unimplemented_Opcode_Fault; + break; + + default: + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; + } + + return retval; +} + +template +Fault +AlphaFullCPU::setIpr(int idx, uint64_t val) +{ + uint64_t *ipr = getIpr(); + uint64_t old; + + if (misspeculating()) + return No_Fault; + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PAL_BASE: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case AlphaISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case AlphaISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case AlphaISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; + kernelStats.context(old, val); + break; + + case AlphaISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case AlphaISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case AlphaISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; + kernelStats.swpipl(ipr[idx]); + break; + + case AlphaISA::IPR_DTB_CM: + kernelStats.mode((val & 0x18) != 0); + + case AlphaISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case AlphaISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case AlphaISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case AlphaISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case AlphaISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case AlphaISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case AlphaISA::IPR_EXC_SUM: + case AlphaISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_SL_RCV: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_ITB_PTE_TEMP: + case AlphaISA::IPR_DTB_PTE_TEMP: + // read-only registers + return Unimplemented_Opcode_Fault; + + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case AlphaISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + dtb->flushAll(); + break; + + case AlphaISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + dtb->flushProcesses(); + break; + + case AlphaISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); + break; + + case AlphaISA::IPR_DTB_TAG: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + dtb->insert(val, pte); + } + break; + + case AlphaISA::IPR_ITB_PTE: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); + } + break; + + case AlphaISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + itb->flushAll(); + break; + + case AlphaISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + itb->flushProcesses(); + break; + + case AlphaISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return Unimplemented_Opcode_Fault; + } + + // no error... + return No_Fault; + +} + +template +int +AlphaFullCPU::readIntrFlag() +{ + return regs.intrflag; +} + +template +void +AlphaFullCPU::setIntrFlag(int val) +{ + regs.intrflag = val; +} + +// Maybe have this send back from IEW stage to squash and update PC. +template +Fault +AlphaFullCPU::hwrei() +{ + uint64_t *ipr = getIpr(); + + if (!PC_PAL(regs.pc)) + return Unimplemented_Opcode_Fault; + + setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]); + + if (!misspeculating()) { + kernelStats.hwrei(); + + if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) + AlphaISA::swap_palshadow(®s, false); + + AlphaISA::check_interrupts = true; + } + + // FIXME: XXX check for interrupts? XXX + return No_Fault; +} + +template +bool +AlphaFullCPU::inPalMode() +{ + return PC_PAL(readPC()); +} + +template +bool +AlphaFullCPU::simPalCheck(int palFunc) +{ + kernelStats.callpal(palFunc); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (system->breakpoint()) + return false; + break; + } + + return true; +} + +// Probably shouldn't be able to switch to the trap handler as quickly as +// this. Also needs to get the exception restart address from the commit +// stage. +template +void +AlphaFullCPU::trap(Fault fault) +{ + uint64_t PC = commit.readPC(); + + DPRINTF(Fault, "Fault %s\n", FaultName(fault)); + Stats::recordEvent(csprintf("Fault %s", FaultName(fault))); + + assert(!misspeculating()); + kernelStats.fault(fault); + + if (fault == Arithmetic_Fault) + panic("Arithmetic traps are unimplemented!"); + + AlphaISA::InternalProcReg *ipr = getIpr(); + + // exception restart address - Get the commit PC + if (fault != Interrupt_Fault || !PC_PAL(PC)) + ipr[AlphaISA::IPR_EXC_ADDR] = PC; + + if (fault == Pal_Fault || fault == Arithmetic_Fault /* || + fault == Interrupt_Fault && !PC_PAL(regs.pc) */) { + // traps... skip faulting instruction + ipr[AlphaISA::IPR_EXC_ADDR] += 4; + } + + if (!PC_PAL(PC)) + AlphaISA::swap_palshadow(®s, true); + + setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] ); + setNextPC(PC + sizeof(MachInst)); +} + +template +void +AlphaFullCPU::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that exists + // within isa_fullsys_traits.hh. +} + +// swap_palshadow swaps in the values of the shadow registers and +// swaps them with the values of the physical registers that map to the +// same logical index. +template +void +AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) +{ + if (palShadowEnabled == use_shadow) + panic("swap_palshadow: wrong PAL shadow state"); + + palShadowEnabled = use_shadow; + + // Will have to lookup in rename map to get physical registers, then + // swap. + for (int i = 0; i < AlphaISA::NumIntRegs; i++) { + if (reg_redir[i]) { + AlphaISA::IntReg temp = regs->intRegFile[i]; + regs->intRegFile[i] = regs->palregs[i]; + regs->palregs[i] = temp; + } + } +} + +#endif // FULL_SYSTEM diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh index a80b116a8..fc86dacd7 100644 --- a/cpu/beta_cpu/alpha_impl.hh +++ b/cpu/beta_cpu/alpha_impl.hh @@ -3,23 +3,14 @@ #include "arch/alpha/isa_traits.hh" -#include "cpu/beta_cpu/comm.hh" #include "cpu/beta_cpu/cpu_policy.hh" #include "cpu/beta_cpu/alpha_params.hh" -#include "cpu/beta_cpu/commit.hh" -#include "cpu/beta_cpu/decode.hh" -#include "cpu/beta_cpu/fetch.hh" -#include "cpu/beta_cpu/free_list.hh" -#include "cpu/beta_cpu/iew.hh" - -#include "cpu/beta_cpu/inst_queue.hh" -#include "cpu/beta_cpu/regfile.hh" -#include "cpu/beta_cpu/rename.hh" -#include "cpu/beta_cpu/rename_map.hh" -#include "cpu/beta_cpu/rob.hh" - +// Forward declarations. +template class AlphaDynInst; + +template class AlphaFullCPU; /** Implementation specific struct that defines several key things to the @@ -42,33 +33,22 @@ struct AlphaSimpleImpl typedef SimpleCPUPolicy CPUPol; /** The DynInst to be used. */ - typedef AlphaDynInst DynInst; + typedef AlphaDynInst DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr DynInstPtr; /** The FullCPU to be used. */ - typedef AlphaFullCPU FullCPU; + typedef AlphaFullCPU FullCPU; /** The Params to be passed to each stage. */ typedef AlphaSimpleParams Params; - /** The struct for communication between fetch and decode. */ - typedef SimpleFetchSimpleDecode FetchStruct; - - /** The struct for communication between decode and rename. */ - typedef SimpleDecodeSimpleRename DecodeStruct; - - /** The struct for communication between rename and IEW. */ - typedef SimpleRenameSimpleIEW RenameStruct; - - /** The struct for communication between IEW and commit. */ - typedef SimpleIEWSimpleCommit IEWStruct; - - /** The struct for communication within the IEW stage. */ - typedef IssueStruct IssueStruct; - - /** The struct for all backwards communication. */ - typedef TimeBufStruct TimeStruct; + enum { + MaxWidth = 8 + }; }; - - #endif // __ALPHA_IMPL_HH__ diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh index b217ef8e3..92dfd35f5 100644 --- a/cpu/beta_cpu/alpha_params.hh +++ b/cpu/beta_cpu/alpha_params.hh @@ -1,6 +1,8 @@ #ifndef __ALPHA_SIMPLE_PARAMS_HH__ #define __ALPHA_SIMPLE_PARAMS_HH__ +#include "cpu/beta_cpu/full_cpu.hh" + //Forward declarations class System; class AlphaITB; @@ -15,16 +17,11 @@ class MemInterface; * defined that it can pass to all of the individual stages. */ -class AlphaSimpleParams +class AlphaSimpleParams : public BaseFullCPU::Params { public: - std::string name; - int numberOfThreads; - #ifdef FULL_SYSTEM - System *_system; AlphaITB *itb; AlphaDTB *dtb; - Tick freq; #else std::vector workload; Process *process; @@ -33,34 +30,41 @@ class AlphaSimpleParams FunctionalMemory *mem; - Counter maxInstsAnyThread; - Counter maxInstsAllThreads; - Counter maxLoadsAnyThread; - Counter maxLoadsAllThreads; - // // Caches // MemInterface *icacheInterface; MemInterface *dcacheInterface; + // + // Fetch + // unsigned decodeToFetchDelay; unsigned renameToFetchDelay; unsigned iewToFetchDelay; unsigned commitToFetchDelay; unsigned fetchWidth; + // + // Decode + // unsigned renameToDecodeDelay; unsigned iewToDecodeDelay; unsigned commitToDecodeDelay; unsigned fetchToDecodeDelay; unsigned decodeWidth; + // + // Rename + // unsigned iewToRenameDelay; unsigned commitToRenameDelay; unsigned decodeToRenameDelay; unsigned renameWidth; + // + // IEW + // unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; @@ -69,16 +73,39 @@ class AlphaSimpleParams unsigned executeIntWidth; unsigned executeFloatWidth; + // + // Commit + // unsigned iewToCommitDelay; unsigned renameToROBDelay; unsigned commitWidth; unsigned squashWidth; + // + // Branch predictor (BP & BTB) + // + unsigned localPredictorSize; + unsigned localPredictorCtrBits; + unsigned BTBEntries; + unsigned BTBTagSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Miscellaneous + // unsigned numPhysIntRegs; unsigned numPhysFloatRegs; unsigned numIQEntries; unsigned numROBEntries; + // Probably can get this from somewhere. + unsigned instShiftAmt; + bool defReg; }; diff --git a/cpu/beta_cpu/bpred_unit.cc b/cpu/beta_cpu/bpred_unit.cc new file mode 100644 index 000000000..6de2def44 --- /dev/null +++ b/cpu/beta_cpu/bpred_unit.cc @@ -0,0 +1,5 @@ + +#include "cpu/beta_cpu/bpred_unit_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template DefaultBPredUnit; diff --git a/cpu/beta_cpu/bpred_unit.hh b/cpu/beta_cpu/bpred_unit.hh new file mode 100644 index 000000000..71191f5b7 --- /dev/null +++ b/cpu/beta_cpu/bpred_unit.hh @@ -0,0 +1,51 @@ + +#ifndef __BPRED_UNIT_HH__ +#define __BPRED_UNIT_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +#include "cpu/beta_cpu/2bit_local_pred.hh" +#include "cpu/beta_cpu/btb.hh" + +/** + * Basically a wrapper class to hold both the branch predictor + * and the BTB. Right now I'm unsure of the implementation; it would + * be nicer to have something closer to the CPUPolicy or the Impl where + * this is just typedefs, but it forces the upper level stages to be + * aware of the constructors of the BP and the BTB. The nicer thing + * to do is have this templated on the Impl, accept the usual Params + * object, and be able to call the constructors on the BP and BTB. + */ +template +class DefaultBPredUnit +{ + public: + typedef typename Impl::Params Params; + + DefaultBPredUnit(Params ¶ms); + + bool BPLookup(Addr &inst_PC) + { return BP.lookup(inst_PC); } + + bool BTBValid(Addr &inst_PC) + { return BTB.valid(inst_PC); } + + Addr BTBLookup(Addr &inst_PC) + { return BTB.lookup(inst_PC); } + + void BPUpdate(Addr &inst_PC, bool taken) + { BP.update(inst_PC, taken); } + + void BTBUpdate(Addr &inst_PC, Addr &target_PC) + { BTB.update(inst_PC, target_PC); } + + private: + + DefaultBP BP; + + DefaultBTB BTB; + +}; + +#endif // __BPRED_UNIT_HH__ diff --git a/cpu/beta_cpu/bpred_unit_impl.hh b/cpu/beta_cpu/bpred_unit_impl.hh new file mode 100644 index 000000000..47415ce9b --- /dev/null +++ b/cpu/beta_cpu/bpred_unit_impl.hh @@ -0,0 +1,13 @@ + +#include "cpu/beta_cpu/bpred_unit.hh" + +template +DefaultBPredUnit::DefaultBPredUnit(Params ¶ms) + : BP(params.localPredictorSize, + params.localPredictorCtrBits, + params.instShiftAmt), + BTB(params.BTBEntries, + params.BTBTagSize, + params.instShiftAmt) +{ +} diff --git a/cpu/beta_cpu/btb.cc b/cpu/beta_cpu/btb.cc new file mode 100644 index 000000000..b49f30482 --- /dev/null +++ b/cpu/beta_cpu/btb.cc @@ -0,0 +1,85 @@ +#include + +#include "cpu/beta_cpu/btb.hh" +#include "base/trace.hh" + +DefaultBTB::DefaultBTB(unsigned _numEntries, + unsigned _tagBits, + unsigned _instShiftAmt) + : numEntries(_numEntries), + tagBits(_tagBits), + instShiftAmt(_instShiftAmt) +{ + // @todo Check to make sure num_entries is valid (a power of 2) + + DPRINTF(Fetch, "BTB: Creating BTB object.\n"); + + btb = new BTBEntry[numEntries]; + + for (int i = 0; i < numEntries; ++i) + { + btb[i].valid = false; + } + + idxMask = numEntries - 1; + + tagMask = (1 << tagBits) - 1; + + tagShiftAmt = instShiftAmt + (int)log2(numEntries); +} + +inline +unsigned +DefaultBTB::getIndex(const Addr &inst_PC) +{ + // Need to shift PC over by the word offset. + return (inst_PC >> instShiftAmt) & idxMask; +} + +inline +Addr +DefaultBTB::getTag(const Addr &inst_PC) +{ + return (inst_PC >> tagShiftAmt) & tagMask; +} + +bool +DefaultBTB::valid(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return true; + } else { + return false; + } +} + +// @todo Create some sort of return struct that has both whether or not the +// address is valid, and also the address. For now will just use addr = 0 to +// represent invalid entry. +Addr +DefaultBTB::lookup(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return btb[btb_idx].target; + } else { + return 0; + } +} + +void +DefaultBTB::update(const Addr &inst_PC, const Addr &target) +{ + unsigned btb_idx = getIndex(inst_PC); + + btb[btb_idx].valid = true; + btb[btb_idx].target = target; + btb[btb_idx].tag = getTag(inst_PC); +} diff --git a/cpu/beta_cpu/btb.hh b/cpu/beta_cpu/btb.hh new file mode 100644 index 000000000..81069eabe --- /dev/null +++ b/cpu/beta_cpu/btb.hh @@ -0,0 +1,52 @@ +#ifndef __BTB_HH__ +#define __BTB_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class DefaultBTB +{ + private: + struct BTBEntry + { + BTBEntry() + : tag(0), target(0), valid(false) + { + } + + Addr tag; + Addr target; + bool valid; + }; + + public: + DefaultBTB(unsigned numEntries, unsigned tagBits, + unsigned instShiftAmt); + + Addr lookup(const Addr &inst_PC); + + bool valid(const Addr &inst_PC); + + void update(const Addr &inst_PC, const Addr &target_PC); + + private: + inline unsigned getIndex(const Addr &inst_PC); + + inline Addr getTag(const Addr &inst_PC); + + BTBEntry *btb; + + unsigned numEntries; + + unsigned idxMask; + + unsigned tagBits; + + unsigned tagMask; + + unsigned instShiftAmt; + + unsigned tagShiftAmt; +}; + +#endif // __BTB_HH__ diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh index 21a530ecf..849a6c797 100644 --- a/cpu/beta_cpu/comm.hh +++ b/cpu/beta_cpu/comm.hh @@ -2,6 +2,7 @@ #define __COMM_HH__ #include +#include #include "arch/alpha/isa_traits.hh" #include "cpu/inst_seq.hh" @@ -10,34 +11,49 @@ using namespace std; // Find better place to put this typedef. typedef short int PhysRegIndex; -// Might want to put constructors/destructors here. template struct SimpleFetchSimpleDecode { - // Consider having a field of how many ready instructions. - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; template struct SimpleDecodeSimpleRename { - // Consider having a field of how many ready instructions. - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; template struct SimpleRenameSimpleIEW { - // Consider having a field of how many ready instructions. - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; template struct SimpleIEWSimpleCommit { - // Consider having a field of how many ready instructions. - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; template struct IssueStruct { - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; struct TimeBufStruct { @@ -47,11 +63,9 @@ struct TimeBufStruct { bool predIncorrect; uint64_t branchAddr; - //Question, is it worthwhile to have this Addr passed along - //by each stage, or just have Fetch look it up in the proper - //amount of cycles in the time buffer? - //Both might actually be needed because decode can send a different - //nextPC if the bpred was wrong. + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; uint64_t nextPC; }; @@ -72,14 +86,14 @@ struct TimeBufStruct { struct iewComm { bool squash; bool stall; - bool predIncorrect; // Also eventually include skid buffer space. unsigned freeIQEntries; + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; uint64_t nextPC; - // For now hardcode the type. - // Change this to sequence number eventually. InstSeqNum squashedSeqNum; }; @@ -90,18 +104,31 @@ struct TimeBufStruct { bool stall; unsigned freeROBEntries; + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; uint64_t nextPC; // Think of better names here. // Will need to be a variety of sizes... // Maybe make it a vector, that way only need one object. - vector freeRegs; + std::vector freeRegs; bool robSquashing; + // Represents the instruction that has either been retired or // squashed. Similar to having a single bus that broadcasts the // retired or squashed sequence number. InstSeqNum doneSeqNum; + + // Extra bits of information so that the LDSTQ only updates when it + // needs to. + bool commitIsStore; + bool commitIsLoad; + + // Communication specifically to the IQ to tell the IQ that it can + // schedule a non-speculative instruction. + InstSeqNum nonSpecSeqNum; }; commitComm commitInfo; diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh index 0e5a96e2a..981d9e78f 100644 --- a/cpu/beta_cpu/commit.hh +++ b/cpu/beta_cpu/commit.hh @@ -1,6 +1,4 @@ -// Todo: Squash properly. Have commit be able to send a squash signal -// to previous stages; will be needed when trap() is implemented. -// Maybe have a special method for handling interrupts/traps. +// Todo: Maybe have a special method for handling interrupts/traps. // // Traps: Have IEW send a signal to commit saying that there's a trap to // be handled. Have commit send the PC back to the fetch stage, along @@ -17,12 +15,11 @@ #ifndef __SIMPLE_COMMIT_HH__ #define __SIMPLE_COMMIT_HH__ -//Includes: ROB, time buffer, structs, memory interface -#include "arch/alpha/isa_traits.hh" +//#include "arch/alpha/isa_traits.hh" #include "base/timebuf.hh" -#include "cpu/beta_cpu/comm.hh" -#include "cpu/beta_cpu/rename_map.hh" -#include "cpu/beta_cpu/rob.hh" +//#include "cpu/beta_cpu/comm.hh" +//#include "cpu/beta_cpu/rename_map.hh" +//#include "cpu/beta_cpu/rob.hh" #include "mem/memory_interface.hh" template @@ -32,14 +29,15 @@ class SimpleCommit // Typedefs from the Impl. typedef typename Impl::ISA ISA; typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; - typedef typename Impl::CPUPol::ROB ROB; + typedef typename CPUPol::ROB ROB; - typedef typename Impl::TimeStruct TimeStruct; - typedef typename Impl::IEWStruct IEWStruct; - typedef typename Impl::RenameStruct RenameStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; public: // I don't believe commit can block, so it will only have two @@ -83,7 +81,7 @@ class SimpleCommit void commitInsts(); - bool commitHead(DynInst *head_inst, unsigned inst_num); + bool commitHead(DynInstPtr &head_inst, unsigned inst_num); void getInsts(); @@ -117,7 +115,7 @@ class SimpleCommit FullCPU *cpu; /** Pointer to the rename map. DO NOT USE if possible. */ - typename Impl::CPUPol::RenameMap *renameMap; +// typename Impl::CPUPol::RenameMap *renameMap; //Store buffer interface? Will need to move committed stores to the //store buffer diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh index bc8db0ce0..45b8bc7de 100644 --- a/cpu/beta_cpu/commit_impl.hh +++ b/cpu/beta_cpu/commit_impl.hh @@ -9,7 +9,7 @@ #include "cpu/beta_cpu/commit.hh" #include "cpu/exetrace.hh" -template +template SimpleCommit::SimpleCommit(Params ¶ms) : dcacheInterface(params.dcacheInterface), iewToCommitDelay(params.iewToCommitDelay), @@ -21,7 +21,7 @@ SimpleCommit::SimpleCommit(Params ¶ms) _status = Idle; } -template +template void SimpleCommit::setCPU(FullCPU *cpu_ptr) { @@ -29,7 +29,7 @@ SimpleCommit::setCPU(FullCPU *cpu_ptr) cpu = cpu_ptr; } -template +template void SimpleCommit::setTimeBuffer(TimeBuffer *tb_ptr) { @@ -43,7 +43,7 @@ SimpleCommit::setTimeBuffer(TimeBuffer *tb_ptr) robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay); } -template +template void SimpleCommit::setRenameQueue(TimeBuffer *rq_ptr) { @@ -54,7 +54,7 @@ SimpleCommit::setRenameQueue(TimeBuffer *rq_ptr) fromRename = renameQueue->getWire(-renameToROBDelay); } -template +template void SimpleCommit::setIEWQueue(TimeBuffer *iq_ptr) { @@ -65,7 +65,7 @@ SimpleCommit::setIEWQueue(TimeBuffer *iq_ptr) fromIEW = iewQueue->getWire(-iewToCommitDelay); } -template +template void SimpleCommit::setROB(ROB *rob_ptr) { @@ -73,7 +73,7 @@ SimpleCommit::setROB(ROB *rob_ptr) rob = rob_ptr; } -template +template void SimpleCommit::tick() { @@ -106,7 +106,7 @@ SimpleCommit::tick() toIEW->commitInfo.freeROBEntries = rob->numFreeEntries(); } -template +template void SimpleCommit::commit() { @@ -154,17 +154,30 @@ SimpleCommit::commit() // Send back the sequence number of the squashed instruction. toIEW->commitInfo.doneSeqNum = squashed_inst; + // Send back the squash signal to tell stages that they should squash. toIEW->commitInfo.squash = true; + // Send back the rob squashing signal so other stages know that the // ROB is in the process of squashing. toIEW->commitInfo.robSquashing = true; + + toIEW->commitInfo.branchMispredict = + robInfoFromIEW->iewInfo.branchMispredict; + + toIEW->commitInfo.branchTaken = + robInfoFromIEW->iewInfo.branchTaken; + toIEW->commitInfo.nextPC = robInfoFromIEW->iewInfo.nextPC; + + toIEW->commitInfo.mispredPC = robInfoFromIEW->iewInfo.mispredPC; } if (_status != ROBSquashing) { + // If we're not currently squashing, then get instructions. getInsts(); + // Try to commit any instructions. commitInsts(); } @@ -183,7 +196,7 @@ SimpleCommit::commit() // Loop that goes through as many instructions in the ROB as possible and // tries to commit them. The actual work for committing is done by the // commitHead() function. -template +template void SimpleCommit::commitInsts() { @@ -195,7 +208,7 @@ SimpleCommit::commitInsts() // Can't commit and squash things at the same time... //////////////////////////////////// - DynInst *head_inst = rob->readHeadInst(); + DynInstPtr head_inst = rob->readHeadInst(); unsigned num_committed = 0; @@ -224,12 +237,12 @@ SimpleCommit::commitInsts() // inst in the ROB without affecting any other stages. rob->retireHead(); - ++num_committed; } else { // Increment the total number of non-speculative instructions // executed. // Hack for now: it really shouldn't happen until after the - // commit is deemed to be successful. + // commit is deemed to be successful, but this count is needed + // for syscalls. cpu->funcExeInst++; // Try to commit the head instruction. @@ -256,9 +269,9 @@ SimpleCommit::commitInsts() } } -template +template bool -SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) +SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) { // Make sure instruction is valid assert(head_inst); @@ -271,21 +284,26 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) // Also check if it's nonspeculative. Or a nop. Then it will be // executed only when it reaches the head of the ROB. Actually // executing a nop is a bit overkill... - if (head_inst->isStore() || - head_inst->isLoad() || - head_inst->isNonSpeculative() || - head_inst->isNop()) { - DPRINTF(Commit, "Commit: Executing a memory reference or " - "nonspeculative instruction at commit, inst PC %#x\n", - head_inst->PC); - fault = head_inst->execute(); + if (!head_inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. + cpu->funcExeInst--; + if (head_inst->isStore() || head_inst->isNonSpeculative()) { + DPRINTF(Commit, "Commit: Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + head_inst->readPC()); - // Tell CPU to tell IEW to tell IQ (nasty chain of calls) that - // this instruction has completed. Could predicate this on - // whether or not the instruction has a destination. - // Slightly unrealistic, but will not really be a factor once - // a real load/store queue is added. - cpu->wakeDependents(head_inst); + toIEW->commitInfo.nonSpecSeqNum = head_inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + head_inst->clearCanCommit(); + + return false; + } else { + panic("Commit: Trying to commit un-executed instruction " + "of unknown type!\n"); + } } // Check if memory access was successful. @@ -320,8 +338,10 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) #ifdef FULL_SYSTEM cpu->trap(fault); #else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", head_inst->getFault(), - head_inst->PC); + if (!head_inst->isNop()) { + panic("fault (%d) detected @ PC %08p", head_inst->getFault(), + head_inst->PC); + } #endif // FULL_SYSTEM } @@ -333,8 +353,8 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) return false; } - //If it's a branch, then send back branch prediction update info - //to the fetch stage. + // If it's a branch, then send back branch prediction update info + // to the fetch stage. // This should be handled in the iew stage if a mispredict happens... #if 0 if (head_inst->isControl()) { @@ -358,6 +378,15 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) } #endif + // Explicit communication back to the LDSTQ that a load has been committed + // and can be removed from the LDSTQ. Stores don't need this because + // the LDSTQ will already have been told that a store has reached the head + // of the ROB. Consider including communication if it's a store as well + // to keep things orthagonal. + if (head_inst->isLoad()) { + toIEW->commitInfo.commitIsLoad = true; + } + // Now that the instruction is going to be committed, finalize its // trace data. if (head_inst->traceData) { @@ -371,7 +400,7 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) return true; } -template +template void SimpleCommit::getInsts() { @@ -382,24 +411,33 @@ SimpleCommit::getInsts() // Read any issued instructions and place them into the ROB. Do this // prior to squashing to avoid having instructions in the ROB that // don't get squashed properly. + int insts_to_process = min((int)renameWidth, fromRename->size); + for (int inst_num = 0; - fromRename->insts[inst_num] != NULL && inst_num < renameWidth; + inst_num < insts_to_process; ++inst_num) { - DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", - fromRename->insts[inst_num]->readPC()); - rob->insertInst(fromRename->insts[inst_num]); + if (!fromRename->insts[inst_num]->isSquashed()) { + DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", + fromRename->insts[inst_num]->readPC()); + rob->insertInst(fromRename->insts[inst_num]); + } else { + DPRINTF(Commit, "Commit: Instruction %i PC %#x was " + "squashed, skipping.\n", + fromRename->insts[inst_num]->seqNum, + fromRename->insts[inst_num]->readPC()); + } } } -template +template void SimpleCommit::markCompletedInsts() { // Grab completed insts out of the IEW instruction queue, and mark // instructions completed within the ROB. for (int inst_num = 0; - fromIEW->insts[inst_num] != NULL && inst_num < iewWidth; + inst_num < iewWidth && fromIEW->insts[inst_num]; ++inst_num) { DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", @@ -411,7 +449,7 @@ SimpleCommit::markCompletedInsts() } } -template +template uint64_t SimpleCommit::readCommitPC() { diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh index 676334249..ec8460b77 100644 --- a/cpu/beta_cpu/cpu_policy.hh +++ b/cpu/beta_cpu/cpu_policy.hh @@ -1,32 +1,60 @@ #ifndef __CPU_POLICY_HH__ #define __CPU_POLICY_HH__ +#include "cpu/beta_cpu/bpred_unit.hh" +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/regfile.hh" +#include "cpu/beta_cpu/free_list.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" +#include "cpu/beta_cpu/store_set.hh" +#include "cpu/beta_cpu/mem_dep_unit.hh" +#include "cpu/beta_cpu/ldstq.hh" + #include "cpu/beta_cpu/fetch.hh" #include "cpu/beta_cpu/decode.hh" #include "cpu/beta_cpu/rename.hh" #include "cpu/beta_cpu/iew.hh" #include "cpu/beta_cpu/commit.hh" -#include "cpu/beta_cpu/inst_queue.hh" -#include "cpu/beta_cpu/regfile.hh" -#include "cpu/beta_cpu/free_list.hh" -#include "cpu/beta_cpu/rename_map.hh" -#include "cpu/beta_cpu/rob.hh" +#include "cpu/beta_cpu/comm.hh" template struct SimpleCPUPolicy { + typedef DefaultBPredUnit BPredUnit; typedef PhysRegFile RegFile; typedef SimpleFreeList FreeList; typedef SimpleRenameMap RenameMap; typedef ROB ROB; typedef InstructionQueue IQ; + typedef MemDepUnit MemDepUnit; + typedef LDSTQ LDSTQ; typedef SimpleFetch Fetch; typedef SimpleDecode Decode; typedef SimpleRename Rename; typedef SimpleIEW IEW; typedef SimpleCommit Commit; + + /** The struct for communication between fetch and decode. */ + typedef SimpleFetchSimpleDecode FetchStruct; + + /** The struct for communication between decode and rename. */ + typedef SimpleDecodeSimpleRename DecodeStruct; + + /** The struct for communication between rename and IEW. */ + typedef SimpleRenameSimpleIEW RenameStruct; + + /** The struct for communication between IEW and commit. */ + typedef SimpleIEWSimpleCommit IEWStruct; + + /** The struct for communication within the IEW stage. */ + typedef IssueStruct IssueStruct; + + /** The struct for all backwards communication. */ + typedef TimeBufStruct TimeStruct; + }; #endif //__CPU_POLICY_HH__ diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh index c41955dcb..be88a4b36 100644 --- a/cpu/beta_cpu/decode.hh +++ b/cpu/beta_cpu/decode.hh @@ -10,11 +10,7 @@ #include -//Will want to include: time buffer, structs, #include "base/timebuf.hh" -#include "cpu/beta_cpu/comm.hh" - -using namespace std; template class SimpleDecode @@ -22,13 +18,15 @@ class SimpleDecode private: // Typedefs from the Impl. typedef typename Impl::ISA ISA; - typedef typename Impl::DynInst DynInst; typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; - typedef typename Impl::FetchStruct FetchStruct; - typedef typename Impl::DecodeStruct DecodeStruct; - typedef typename Impl::TimeStruct TimeStruct; + // Typedefs from the CPU policy. + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::TimeStruct TimeStruct; // Typedefs from the ISA. typedef typename ISA::Addr Addr; @@ -71,7 +69,7 @@ class SimpleDecode inline void unblock(); - void squash(DynInst *inst); + void squash(DynInstPtr &inst); // Interfaces to objects outside of decode. /** CPU interface. */ @@ -106,7 +104,7 @@ class SimpleDecode typename TimeBuffer::wire fromFetch; /** Skid buffer between fetch and decode. */ - queue skidBuffer; + std::queue skidBuffer; private: //Consider making these unsigned to avoid any confusion. @@ -124,6 +122,12 @@ class SimpleDecode /** The width of decode, in instructions. */ unsigned decodeWidth; + + /** The instruction that decode is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; }; #endif // __SIMPLE_DECODE_HH__ diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh index ecf19b8ea..d0f46eaa5 100644 --- a/cpu/beta_cpu/decode_impl.hh +++ b/cpu/beta_cpu/decode_impl.hh @@ -9,7 +9,8 @@ SimpleDecode::SimpleDecode(Params ¶ms) iewToDecodeDelay(params.iewToDecodeDelay), commitToDecodeDelay(params.commitToDecodeDelay), fetchToDecodeDelay(params.fetchToDecodeDelay), - decodeWidth(params.decodeWidth) + decodeWidth(params.decodeWidth), + numInst(0) { DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth); _status = Idle; @@ -103,7 +104,7 @@ SimpleDecode::unblock() // was predicted incorrectly. template void -SimpleDecode::squash(DynInst *inst) +SimpleDecode::squash(DynInstPtr &inst) { DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " "detected at decode.\n"); @@ -163,16 +164,22 @@ SimpleDecode::tick() // buffer were used. Remove those instructions and handle // the rest of unblocking. if (_status == Unblocking) { + if (fromFetch->size > 0) { + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + } + unblock(); } } else if (_status == Blocked) { - if (fromFetch->insts[0] != NULL) { + if (fromFetch->size > 0) { block(); } if (!fromRename->renameInfo.stall && - !fromIEW->iewInfo.stall && - !fromCommit->commitInfo.stall) { + !fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall) { DPRINTF(Decode, "Decode: Stall signals cleared, going to " "unblock.\n"); _status = Unblocking; @@ -204,9 +211,7 @@ void SimpleDecode::decode() { // Check time buffer if being told to squash. - if (/* fromRename->renameInfo.squash || */ - /* fromIEW->iewInfo.squash || */ - fromCommit->commitInfo.squash) { + if (fromCommit->commitInfo.squash) { squash(); return; } @@ -223,20 +228,22 @@ SimpleDecode::decode() // Check fetch queue to see if instructions are available. // If no available instructions, do nothing, unless this stage is // currently unblocking. - if (fromFetch->insts[0] == NULL && _status != Unblocking) { + if (!fromFetch->insts[0] && _status != Unblocking) { DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); // Should I change the status to idle? return; } - DynInst *inst; + DynInstPtr inst; + // Instead have a class member variable that records which instruction // was the last one that was ended on. At the tick() stage, it can // check if that's equal to 0. If not, then don't pop stuff off. - unsigned num_inst = 0; - bool insts_available = _status == Unblocking ? - skidBuffer.front().insts[num_inst] != NULL : - fromFetch->insts[num_inst] != NULL; + unsigned to_rename_index = 0; + + int insts_available = _status == Unblocking ? + skidBuffer.front().size : + fromFetch->size; // Debug block... #if 0 @@ -247,7 +254,7 @@ SimpleDecode::decode() DPRINTF(Decode, "Decode: No instructions available, skid buffer " "empty.\n"); } else if (_status != Unblocking && - fromFetch->insts[0] == NULL) { + !fromFetch->insts[0]) { DPRINTF(Decode, "Decode: No instructions available, fetch queue " "empty.\n"); } else { @@ -262,26 +269,39 @@ SimpleDecode::decode() // should be computed here. However in this simple model all // computation will take place at execute. Hence doneTargCalc() // will always be false. - while (num_inst < decodeWidth && - insts_available) + while (insts_available > 0) { DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); // Might create some sort of accessor to get an instruction // on a per thread basis. Or might be faster to just get // a pointer to an array or list of instructions and use that // within this code. - inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] : - fromFetch->insts[num_inst]; + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromFetch->insts[numInst]; + DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n", - inst, inst->readPC()); + inst->seqNum, inst->readPC()); + + if (inst->isSquashed()) { + DPRINTF(Decode, "Decode: Instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + ++numInst; + --insts_available; + + continue; + } // This current instruction is valid, so add it into the decode // queue. The next instruction may not be valid, so check to // see if branches were predicted correctly. - toRename->insts[num_inst] = inst; + toRename->insts[to_rename_index] = inst; + + ++(toRename->size); // Ensure that if it was predicted as a branch, it really is a - // branch. This case should never happen in this model. + // branch. if (inst->predTaken() && !inst->isControl()) { panic("Instruction predicted as a branch!"); @@ -306,20 +326,19 @@ SimpleDecode::decode() // them as ready to issue at any time. Not sure if this check // should exist here or at a later stage; however it doesn't matter // too much for function correctness. + // Isn't this handled by the inst queue? if (inst->numSrcRegs() == 0) { inst->setCanIssue(); } // Increment which instruction we're looking at. - ++num_inst; + ++numInst; + ++to_rename_index; - // Check whether or not there are instructions available. - // Either need to check within the skid buffer, or the fetch - // queue, depending if this stage is unblocking or not. - insts_available = _status == Unblocking ? - skidBuffer.front().insts[num_inst] == NULL : - fromFetch->insts[num_inst] == NULL; + --insts_available; } + + numInst = 0; } #endif // __SIMPLE_DECODE_CC__ diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh index 5717c65ac..e59a9df7f 100644 --- a/cpu/beta_cpu/fetch.hh +++ b/cpu/beta_cpu/fetch.hh @@ -13,16 +13,12 @@ #include "base/timebuf.hh" #include "sim/eventq.hh" #include "cpu/pc_event.hh" -#include "cpu/beta_cpu/comm.hh" #include "mem/mem_interface.hh" -using namespace std; - /** * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch * will stall if there's an Icache miss, but otherwise assumes a one cycle - * Icache hit. This will be replaced with a more fleshed out class in the - * future. + * Icache hit. */ template @@ -31,12 +27,15 @@ class SimpleFetch public: /** Typedefs from Impl. */ typedef typename Impl::ISA ISA; + typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::FullCPU FullCPU; typedef typename Impl::Params Params; - typedef typename Impl::FetchStruct FetchStruct; - typedef typename Impl::TimeStruct TimeStruct; + typedef typename CPUPol::BPredUnit BPredUnit; + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::TimeStruct TimeStruct; /** Typedefs from ISA. */ typedef typename ISA::MachInst MachInst; @@ -76,6 +75,17 @@ class SimpleFetch // Figure out PC vs next PC and how it should be updated void squash(Addr newPC); + private: + /** + * Looks up in the branch predictor to see if the next PC should be + * either next PC+=MachInst or a branch target. + * @params next_PC Next PC variable passed in by reference. It is + * expected to be set to the current PC; it will be updated with what + * the next PC will be. + * @return Whether or not a branch was predicted as taken. + */ + bool lookupAndUpdateNextPC(Addr &next_PC); + public: class CacheCompletionEvent : public Event { @@ -110,8 +120,6 @@ class SimpleFetch /** Wire to get commit's information from backwards time buffer. */ typename TimeBuffer::wire fromCommit; - // Will probably have this sit in the FullCPU and just pass a pointr in. - // Simplifies the constructors of all stages. /** Internal fetch instruction queue. */ TimeBuffer *fetchQueue; @@ -122,6 +130,9 @@ class SimpleFetch /** Icache interface. */ MemInterface *icacheInterface; + /** BPredUnit. */ + BPredUnit branchPred; + /** Memory request used to access cache. */ MemReqPtr memReq; diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh index 918d2dad2..93f7bf6d2 100644 --- a/cpu/beta_cpu/fetch_impl.hh +++ b/cpu/beta_cpu/fetch_impl.hh @@ -1,7 +1,5 @@ -// Todo: Rewrite this. Add in branch prediction. Fix up if squashing comes -// from decode; only the correct instructions should be killed. This will -// probably require changing the CPU's instList functions to take a seqNum -// instead of a dyninst. With probe path, should be able to specify +// Todo: Add in branch prediction. With probe path, should +// be able to specify // size of data to fetch. Will be able to get full cache line. // Remove this later. @@ -41,6 +39,7 @@ template SimpleFetch::SimpleFetch(Params ¶ms) : cacheCompletionEvent(this), icacheInterface(params.icacheInterface), + branchPred(params), decodeToFetchDelay(params.decodeToFetchDelay), renameToFetchDelay(params.renameToFetchDelay), iewToFetchDelay(params.iewToFetchDelay), @@ -66,7 +65,7 @@ SimpleFetch::SimpleFetch(Params ¶ms) blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; // Create mask to get rid of offset bits. - cacheBlockMask = ~((int)log2(blkSize) - 1); + cacheBlockMask = (blkSize - 1); // Get the size of an instruction. instSize = sizeof(MachInst); @@ -123,24 +122,59 @@ SimpleFetch::processCacheCompletion() _status = IcacheMissComplete; } -// Note that in the SimpleFetch<>, will most likely have to provide the -// template parameters to BP and BTB. +template +bool +SimpleFetch::lookupAndUpdateNextPC(Addr &next_PC) +{ +#if 1 + // Do branch prediction check here. + bool predict_taken = branchPred.BPLookup(next_PC); + Addr predict_target; + + DPRINTF(Fetch, "Fetch: Branch predictor predicts taken? %i\n", + predict_taken); + + if (branchPred.BTBValid(next_PC)) { + predict_target = branchPred.BTBLookup(next_PC); + DPRINTF(Fetch, "Fetch: BTB target is %#x.\n", predict_target); + } else { + predict_taken = false; + DPRINTF(Fetch, "Fetch: BTB does not have a valid entry.\n"); + } + + // Now update the PC to fetch the next instruction in the cache + // line. + if (!predict_taken) { + next_PC = next_PC + instSize; + return false; + } else { + next_PC = predict_target; + return true; + } +#endif + +#if 0 + next_PC = next_PC + instSize; + return false; +#endif +} + template void SimpleFetch::squash(Addr new_PC) { DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); + cpu->setNextPC(new_PC + instSize); cpu->setPC(new_PC); _status = Squashing; - // Clear out the instructions that are no longer valid. - // Actually maybe slightly unrealistic to kill instructions that are - // in flight like that between stages. Perhaps just have next - // stage ignore those instructions or something. In the cycle where it's - // returning from squashing, the other stages can just ignore the inputs - // for that cycle. + // Clear the icache miss if it's outstanding. + if (_status == IcacheMissStall && icacheInterface) { + // @todo: Use an actual thread number here. + icacheInterface->squash(0); + } // Tell the CPU to remove any instructions that aren't currently // in the ROB (instructions in flight that were killed). @@ -151,25 +185,27 @@ template void SimpleFetch::tick() { -#if 0 +#if 1 + // Check squash signals from commit. if (fromCommit->commitInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " "from commit.\n"); // In any case, squash. squash(fromCommit->commitInfo.nextPC); + + // Also check if there's a mispredict that happened. + if (fromCommit->commitInfo.branchMispredict) { + branchPred.BPUpdate(fromCommit->commitInfo.mispredPC, + fromCommit->commitInfo.branchTaken); + branchPred.BTBUpdate(fromCommit->commitInfo.mispredPC, + fromCommit->commitInfo.nextPC); + } + return; } - if (fromDecode->decodeInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from decode.\n"); - - // Squash unless we're already squashing? - squash(fromDecode->decodeInfo.nextPC); - return; - } - + // Check ROB squash signals from commit. if (fromCommit->commitInfo.robSquashing) { DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); @@ -178,11 +214,36 @@ SimpleFetch::tick() return; } + // Check squash signals from decode. + if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + + // Update the branch predictor. + if (fromCommit->decodeInfo.branchMispredict) { + branchPred.BPUpdate(fromDecode->decodeInfo.mispredPC, + fromDecode->decodeInfo.branchTaken); + branchPred.BTBUpdate(fromDecode->decodeInfo.mispredPC, + fromDecode->decodeInfo.nextPC); + } + + if (_status != Squashing) { + // Squash unless we're already squashing? + squash(fromDecode->decodeInfo.nextPC); + return; + } + } + + + + // Check if any of the stall signals are high. if (fromDecode->decodeInfo.stall || fromRename->renameInfo.stall || fromIEW->iewInfo.stall || fromCommit->commitInfo.stall) { + // Block stage, regardless of current status. + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " "Commit: %i\n", @@ -190,10 +251,36 @@ SimpleFetch::tick() fromRename->renameInfo.stall, fromIEW->iewInfo.stall, fromCommit->commitInfo.stall); - // What to do if we're already in an icache stall? + + _status = Blocked; + return; + } else if (_status == Blocked) { + // Unblock stage if status is currently blocked and none of the + // stall signals are being held high. + _status = Running; + + return; + } + + // If fetch has reached this point, then there are no squash signals + // still being held high. Check if fetch is in the squashing state; + // if so, fetch can switch to running. + // Similarly, there are no blocked signals still being held high. + // Check if fetch is in the blocked state; if so, fetch can switch to + // running. + if (_status == Squashing) { + DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n"); + + // Switch status to running + _status = Running; + } else if (_status != IcacheMissStall) { + DPRINTF(Fetch, "Fetch: Running stage.\n"); + + fetch(); } #endif +#if 0 if (_status != Blocked && _status != Squashing && _status != IcacheMissStall) { @@ -253,62 +340,17 @@ SimpleFetch::tick() DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); } } - +#endif } template void SimpleFetch::fetch() { - ////////////////////////////////////////// - // Check backwards communication - ////////////////////////////////////////// - - // If branch prediction is incorrect, squash any instructions, - // update PC, and do not fetch anything this cycle. - - // Might want to put all the PC changing stuff in one area. - // Normally should also check here to see if there is branch - // misprediction info to update with. - if (fromCommit->commitInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from commit.\n"); - squash(fromCommit->commitInfo.nextPC); - return; - } else if (fromDecode->decodeInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from decode.\n"); - squash(fromDecode->decodeInfo.nextPC); - return; - } else if (fromCommit->commitInfo.robSquashing) { - DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); - _status = Squashing; - return; - } - - // If being told to stall, do nothing. - if (fromDecode->decodeInfo.stall || - fromRename->renameInfo.stall || - fromIEW->iewInfo.stall || - fromCommit->commitInfo.stall) - { - DPRINTF(Fetch, "Fetch: Stalling stage.\n"); - DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " - "Commit: %i\n", - fromDecode->decodeInfo.stall, - fromRename->renameInfo.stall, - fromIEW->iewInfo.stall, - fromCommit->commitInfo.stall); - _status = Blocked; - return; - } - ////////////////////////////////////////// // Start actual fetch ////////////////////////////////////////// - // If nothing else outstanding, attempt to read instructions. - #ifdef FULL_SYSTEM // Flag to say whether or not address is physical addr. unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; @@ -317,13 +359,14 @@ SimpleFetch::fetch() #endif // FULL_SYSTEM // The current PC. - Addr PC = cpu->readPC(); + Addr fetch_PC = cpu->readPC(); // Fault code for memory access. Fault fault = No_Fault; // If returning from the delay of a cache miss, then update the status - // to running, otherwise do the cache access. + // to running, otherwise do the cache access. Possibly move this up + // to tick() function. if (_status == IcacheMissComplete) { DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); @@ -334,7 +377,7 @@ SimpleFetch::fetch() } else { DPRINTF(Fetch, "Fetch: Attempting to translate and read " "instruction, starting at PC %08p.\n", - PC); + fetch_PC); // Otherwise check if the instruction exists within the cache. // If it does, then proceed on to read the instruction and the rest @@ -347,7 +390,7 @@ SimpleFetch::fetch() // Setup the memReq to do a read of the first isntruction's address. // Set the appropriate read size and flags as well. memReq->cmd = Read; - memReq->reset(PC, instSize, flags); + memReq->reset(fetch_PC, instSize, flags); // Translate the instruction request. // Should this function be @@ -401,7 +444,7 @@ SimpleFetch::fetch() // Probably have a status on a per thread basis so each thread can // block independently and be woken up independently. - Addr next_PC = 0; + Addr next_PC = fetch_PC; InstSeqNum inst_seq; // If the read of the first instruction was successful, then grab the @@ -410,6 +453,10 @@ SimpleFetch::fetch() if (fault == No_Fault) { DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); + ////////////////////////// + // Fetch first instruction + ////////////////////////// + // Need to keep track of whether or not a predicted branch // ended this fetch block. bool predicted_branch = false; @@ -420,12 +467,17 @@ SimpleFetch::fetch() // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); + // Update the next PC; it either is PC+sizeof(MachInst), or + // branch_target. Check whether or not a branch was taken. + predicted_branch = lookupAndUpdateNextPC(next_PC); + // Because the first instruction was already fetched, create the // DynInst and put it into the queue to decode. - DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq, - cpu); + DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, + inst_seq, cpu); + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", - instruction, instruction->readPC()); + inst_seq, instruction->readPC()); DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", OPCODE(inst)); @@ -440,13 +492,17 @@ SimpleFetch::fetch() // that heads to decode. toDecode->insts[0] = instruction; - // Now update the PC to fetch the next instruction in the cache - // line. - PC = PC + instSize; + toDecode->size++; + + fetch_PC = next_PC; + + ////////////////////////// + // Fetch other instructions + ////////////////////////// // Obtain the index into the cache line by getting only the low - // order bits. - int line_index = PC & cacheBlockMask; + // order bits. Will need to do shifting as well. + int line_index = fetch_PC & cacheBlockMask; // Take instructions and put them into the queue heading to decode. // Then read the next instruction in the cache line. Continue @@ -461,12 +517,14 @@ SimpleFetch::fetch() // instructions, which can then be used to get all the instructions // needed. Figure out if I can roll it back into one loop. for (int fetched = 1; - line_index < blkSize && fetched < fetchWidth; + line_index < blkSize && + fetched < fetchWidth && + !predicted_branch; line_index+=instSize, ++fetched) { // Reset the mem request to setup the read of the next // instruction. - memReq->reset(PC, instSize, flags); + memReq->reset(fetch_PC, instSize, flags); // Translate the instruction request. fault = cpu->translateInstReq(memReq); @@ -485,16 +543,24 @@ SimpleFetch::fetch() // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); + predicted_branch = lookupAndUpdateNextPC(next_PC); + // Create the actual DynInst. Parameters are: // DynInst(instruction, PC, predicted PC, CPU pointer). // Because this simple model has no branch prediction, the // predicted PC will simply be PC+sizeof(MachInst). // Update to actually use a branch predictor to predict the // target in the future. - DynInst *instruction = new DynInst(inst, PC, PC+instSize, - inst_seq, cpu); + DynInstPtr instruction = + new DynInst(inst, fetch_PC, next_PC, inst_seq, cpu); + + instruction->traceData = + Trace::getInstRecord(curTick, cpu->xcBase(), cpu, + instruction->staticInst, + instruction->readPC(), 0); + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", - instruction, instruction->readPC()); + inst_seq, instruction->readPC()); DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", OPCODE(inst)); @@ -504,20 +570,15 @@ SimpleFetch::fetch() // that heads to decode. toDecode->insts[fetched] = instruction; + toDecode->size++; + // Might want to keep track of various stats. // numInstsFetched++; - // Now update the PC to fetch the next instruction in the cache - // line. - PC = PC + instSize; + // Update the PC with the next PC. + fetch_PC = next_PC; } - // If no branches predicted taken, then increment PC with - // fall-through path. This simple model always predicts not - // taken. - if (!predicted_branch) { - next_PC = PC; - } } // Now that fetching is completed, update the PC to signify what the next @@ -544,10 +605,10 @@ SimpleFetch::fetch() _status = Blocked; #ifdef FULL_SYSTEM - // Trap will probably need a pointer to the CPU to do accessing. - // Or an exec context. --Write ProxyExecContext eventually. - // Avoid using this for now as the xc really shouldn't be in here. - cpu->trap(fault); +// cpu->trap(fault); + // Send a signal to the ROB indicating that there's a trap from the + // fetch stage that needs to be handled. Need to indicate that + // there's a fault, and the fault type. #else // !FULL_SYSTEM fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); #endif // FULL_SYSTEM diff --git a/cpu/beta_cpu/free_list.cc b/cpu/beta_cpu/free_list.cc index 006bf4bf7..542b87471 100644 --- a/cpu/beta_cpu/free_list.cc +++ b/cpu/beta_cpu/free_list.cc @@ -1,3 +1,5 @@ +#include "base/trace.hh" + #include "cpu/beta_cpu/free_list.hh" SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, @@ -10,6 +12,16 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, numPhysicalFloatRegs(_numPhysicalFloatRegs), numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs) { + DPRINTF(FreeList, "FreeList: Creating new free list object.\n"); + + // DEBUG stuff. + freeIntRegsScoreboard.resize(numPhysicalIntRegs); + + freeFloatRegsScoreboard.resize(numPhysicalRegs); + + for (PhysRegIndex i = 0; i < numLogicalIntRegs; ++i) { + freeIntRegsScoreboard[i] = 0; + } // Put all of the extra physical registers onto the free list. This // means excluding all of the base logical registers. @@ -17,6 +29,14 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, i < numPhysicalIntRegs; ++i) { freeIntRegs.push(i); + + freeIntRegsScoreboard[i] = 1; + } + + for (PhysRegIndex i = 0; i < numPhysicalIntRegs + numLogicalFloatRegs; + ++i) + { + freeFloatRegsScoreboard[i] = 0; } // Put all of the extra physical registers onto the free list. This @@ -26,8 +46,9 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs; i < numPhysicalRegs; ++i) { - cprintf("Free List: Adding register %i to float list.\n", i); freeFloatRegs.push(i); + + freeFloatRegsScoreboard[i] = 1; } } diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh index 8521ad94c..0d2b2c421 100644 --- a/cpu/beta_cpu/free_list.hh +++ b/cpu/beta_cpu/free_list.hh @@ -8,8 +8,6 @@ #include "cpu/beta_cpu/comm.hh" #include "base/trace.hh" -using namespace std; - // Question: Do I even need the number of logical registers? // How to avoid freeing registers instantly? Same with ROB entries. @@ -33,10 +31,10 @@ class SimpleFreeList private: /** The list of free integer registers. */ - queue freeIntRegs; + std::queue freeIntRegs; /** The list of free floating point registers. */ - queue freeFloatRegs; + std::queue freeFloatRegs; /** Number of logical integer registers. */ int numLogicalIntRegs; @@ -53,6 +51,11 @@ class SimpleFreeList /** Total number of physical registers. */ int numPhysicalRegs; + /** DEBUG stuff below. */ + std::vector freeIntRegsScoreboard; + + std::vector freeFloatRegsScoreboard; + public: SimpleFreeList(unsigned _numLogicalIntRegs, unsigned _numPhysicalIntRegs, @@ -94,6 +97,10 @@ SimpleFreeList::getIntReg() freeIntRegs.pop(); + // DEBUG + assert(freeIntRegsScoreboard[free_reg]); + freeIntRegsScoreboard[free_reg] = 0; + return(free_reg); } @@ -109,6 +116,10 @@ SimpleFreeList::getFloatReg() freeFloatRegs.pop(); + // DEBUG + assert(freeFloatRegsScoreboard[free_reg]); + freeFloatRegsScoreboard[free_reg] = 0; + return(free_reg); } @@ -120,8 +131,16 @@ SimpleFreeList::addReg(PhysRegIndex freed_reg) //already in there. A bit vector or something similar would be useful. if (freed_reg < numPhysicalIntRegs) { freeIntRegs.push(freed_reg); + + // DEBUG + assert(freeIntRegsScoreboard[freed_reg] == false); + freeIntRegsScoreboard[freed_reg] = 1; } else if (freed_reg < numPhysicalRegs) { freeFloatRegs.push(freed_reg); + + // DEBUG + assert(freeFloatRegsScoreboard[freed_reg] == false); + freeFloatRegsScoreboard[freed_reg] = 1; } } @@ -130,6 +149,10 @@ SimpleFreeList::addIntReg(PhysRegIndex freed_reg) { DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg); + // DEBUG + assert(!freeIntRegsScoreboard[freed_reg]); + freeIntRegsScoreboard[freed_reg] = 1; + //Might want to add in a check for whether or not this register is //already in there. A bit vector or something similar would be useful. freeIntRegs.push(freed_reg); @@ -140,6 +163,10 @@ SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) { DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg); + // DEBUG + assert(!freeFloatRegsScoreboard[freed_reg]); + freeFloatRegsScoreboard[freed_reg] = 1; + //Might want to add in a check for whether or not this register is //already in there. A bit vector or something similar would be useful. freeFloatRegs.push(freed_reg); diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc index 6fbf5d69a..abeb4cb87 100644 --- a/cpu/beta_cpu/full_cpu.cc +++ b/cpu/beta_cpu/full_cpu.cc @@ -16,29 +16,18 @@ using namespace std; #ifdef FULL_SYSTEM -BaseFullCPU::BaseFullCPU(const std::string &_name, - int number_of_threads, - Counter max_insts_any_thread, - Counter max_insts_all_threads, - Counter max_loads_any_thread, - Counter max_loads_all_threads, - System *_system, Tick freq) - : BaseCPU(_name, number_of_threads, - max_insts_any_thread, max_insts_all_threads, - max_loads_any_thread, max_loads_all_threads, - _system, freq) +BaseFullCPU::BaseFullCPU(Params ¶ms) + : BaseCPU(params.name, params.numberOfThreads, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads, + params._system, params.freq) { } #else -BaseFullCPU::BaseFullCPU(const std::string &_name, - int number_of_threads, - Counter max_insts_any_thread, - Counter max_insts_all_threads, - Counter max_loads_any_thread, - Counter max_loads_all_threads) - : BaseCPU(_name, number_of_threads, - max_insts_any_thread, max_insts_all_threads, - max_loads_any_thread, max_loads_all_threads) +BaseFullCPU::BaseFullCPU(Params ¶ms) + : BaseCPU(params.name, params.numberOfThreads, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads) { } #endif // FULL_SYSTEM @@ -67,14 +56,9 @@ FullBetaCPU::TickEvent::description() template FullBetaCPU::FullBetaCPU(Params ¶ms) #ifdef FULL_SYSTEM - : BaseFullCPU(params.name, /* number_of_threads */ 1, - params.maxInstsAnyThread, params.maxInstsAllThreads, - params.maxLoadsAnyThread, params.maxLoadsAllThreads, - params.system, params.freq), + : BaseFullCPU(params), #else - : BaseFullCPU(params.name, /* number_of_threads */ 1, - params.maxInstsAnyThread, params.maxInstsAllThreads, - params.maxLoadsAnyThread, params.maxLoadsAllThreads), + : BaseFullCPU(params), #endif // FULL_SYSTEM tickEvent(this), fetch(params), @@ -91,17 +75,18 @@ FullBetaCPU::FullBetaCPU(Params ¶ms) renameMap(Impl::ISA::NumIntRegs, params.numPhysIntRegs, Impl::ISA::NumFloatRegs, params.numPhysFloatRegs, Impl::ISA::NumMiscRegs, - Impl::ISA::ZeroReg, Impl::ISA::ZeroReg), + Impl::ISA::ZeroReg, + Impl::ISA::ZeroReg + Impl::ISA::NumIntRegs), rob(params.numROBEntries, params.squashWidth), // What to pass to these time buffers? // For now just have these time buffers be pretty big. - timeBuffer(20, 20), - fetchQueue(20, 20), - decodeQueue(20, 20), - renameQueue(20, 20), - iewQueue(20, 20), + timeBuffer(5, 5), + fetchQueue(5, 5), + decodeQueue(5, 5), + renameQueue(5, 5), + iewQueue(5, 5), xc(NULL), @@ -133,9 +118,9 @@ FullBetaCPU::FullBetaCPU(Params ¶ms) // initialize CPU, including PC TheISA::initCPU(&xc->regs); #else - xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0); DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x", process->prog_entry, process); + xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0); assert(process->getMemory() != NULL); assert(mem != NULL); @@ -393,7 +378,7 @@ FullBetaCPU::setPC(Addr new_PC) template void -FullBetaCPU::addInst(DynInst *inst) +FullBetaCPU::addInst(DynInstPtr &inst) { instList.push_back(inst); } @@ -411,9 +396,9 @@ FullBetaCPU::instDone() template void -FullBetaCPU::removeBackInst(DynInst *inst) +FullBetaCPU::removeBackInst(DynInstPtr &inst) { - DynInst *inst_to_delete; + DynInstPtr inst_to_delete; // Walk through the instruction list, removing any instructions // that were inserted after the given instruction, inst. @@ -424,22 +409,22 @@ FullBetaCPU::removeBackInst(DynInst *inst) // Obtain the pointer to the instruction. inst_to_delete = instList.back(); - DPRINTF(FullCPU, "FullCPU: Deleting instruction %#x, PC %#x\n", - inst_to_delete, inst_to_delete->readPC()); + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); // Remove the instruction from the list. instList.pop_back(); - // Delete the instruction itself. - delete inst_to_delete; + // Mark it as squashed. + inst_to_delete->setSquashed(); } } template void -FullBetaCPU::removeFrontInst(DynInst *inst) +FullBetaCPU::removeFrontInst(DynInstPtr &inst) { - DynInst *inst_to_delete; + DynInstPtr inst_to_delete; // The front instruction should be the same one being asked to be deleted. assert(instList.front() == inst); @@ -451,7 +436,7 @@ FullBetaCPU::removeFrontInst(DynInst *inst) DPRINTF(FullCPU, "FullCPU: Deleting committed instruction %#x, PC %#x\n", inst_to_delete, inst_to_delete->readPC()); - delete inst_to_delete; +// delete inst_to_delete; } template @@ -461,7 +446,7 @@ FullBetaCPU::removeInstsNotInROB() DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " "list.\n"); - DynInst *rob_tail = rob.readTailInst(); + DynInstPtr rob_tail = rob.readTailInst(); removeBackInst(rob_tail); } @@ -478,13 +463,13 @@ void FullBetaCPU::dumpInsts() { int num = 0; - typename list::iterator inst_list_it = instList.begin(); + typename list::iterator inst_list_it = instList.begin(); while (inst_list_it != instList.end()) { - cprintf("Instruction:%i\nInst:%#x\nPC:%#x\nSN:%lli\n\n", - num, (*inst_list_it), (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum); + cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n", + num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, + (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); inst_list_it++; ++num; } @@ -492,7 +477,7 @@ FullBetaCPU::dumpInsts() template void -FullBetaCPU::wakeDependents(DynInst *inst) +FullBetaCPU::wakeDependents(DynInstPtr &inst) { iew.wakeDependents(inst); } diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh index 00ff1f878..cf753ad67 100644 --- a/cpu/beta_cpu/full_cpu.hh +++ b/cpu/beta_cpu/full_cpu.hh @@ -16,6 +16,7 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" #include "cpu/beta_cpu/cpu_policy.hh" #include "sim/process.hh" @@ -28,17 +29,32 @@ class BaseFullCPU : public BaseCPU { //Stuff that's pretty ISA independent will go here. public: + class Params + { + public: #ifdef FULL_SYSTEM - BaseFullCPU(const std::string &_name, int _number_of_threads, - Counter max_insts_any_thread, Counter max_insts_all_threads, - Counter max_loads_any_thread, Counter max_loads_all_threads, - System *_system, Tick freq); + std::string name; + int numberOfThreads; + Counter maxInstsAnyThread; + Counter maxInstsAllThreads; + Counter maxLoadsAnyThread; + Counter maxLoadsAllThreads; + System *_system; + Tick freq; #else - BaseFullCPU(const std::string &_name, int _number_of_threads, - Counter max_insts_any_thread = 0, - Counter max_insts_all_threads = 0, - Counter max_loads_any_thread = 0, - Counter max_loads_all_threads = 0); + std::string name; + int numberOfThreads; + Counter maxInstsAnyThread; + Counter maxInstsAllThreads; + Counter maxLoadsAnyThread; + Counter maxLoadsAllThreads; +#endif // FULL_SYSTEM + }; + +#ifdef FULL_SYSTEM + BaseFullCPU(Params ¶ms); +#else + BaseFullCPU(Params ¶ms); #endif // FULL_SYSTEM }; @@ -49,7 +65,7 @@ class FullBetaCPU : public BaseFullCPU //Put typedefs from the Impl here. typedef typename Impl::CPUPol CPUPolicy; typedef typename Impl::Params Params; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; public: enum Status { @@ -162,7 +178,7 @@ class FullBetaCPU : public BaseFullCPU /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. */ - void addInst(DynInst *inst); + void addInst(DynInstPtr &inst); /** Function to tell the CPU that an instruction has completed. */ void instDone(); @@ -175,7 +191,7 @@ class FullBetaCPU : public BaseFullCPU * @todo: Remove only up until that inst? Squashed inst is most likely * valid. */ - void removeBackInst(DynInst *inst); + void removeBackInst(DynInstPtr &inst); /** Remove an instruction from the front of the list. It is expected * that there are no instructions in front of it (that is, none are older @@ -184,7 +200,7 @@ class FullBetaCPU : public BaseFullCPU * last instruction once it's verified that commit has the same ordering * as the instruction list. */ - void removeFrontInst(DynInst *inst); + void removeFrontInst(DynInstPtr &inst); /** Remove all instructions that are not currently in the ROB. */ void removeInstsNotInROB(); @@ -198,11 +214,11 @@ class FullBetaCPU : public BaseFullCPU * commit can tell the instruction queue that they have completed. * Eventually this hack should be removed. */ - void wakeDependents(DynInst *inst); + void wakeDependents(DynInstPtr &inst); public: /** List of all the instructions in flight. */ - list instList; + list instList; //not sure these should be private. protected: @@ -255,15 +271,15 @@ class FullBetaCPU : public BaseFullCPU /** Typedefs from the Impl to get the structs that each of the * time buffers should use. */ - typedef typename Impl::TimeStruct TimeStruct; + typedef typename CPUPolicy::TimeStruct TimeStruct; - typedef typename Impl::FetchStruct FetchStruct; + typedef typename CPUPolicy::FetchStruct FetchStruct; - typedef typename Impl::DecodeStruct DecodeStruct; + typedef typename CPUPolicy::DecodeStruct DecodeStruct; - typedef typename Impl::RenameStruct RenameStruct; + typedef typename CPUPolicy::RenameStruct RenameStruct; - typedef typename Impl::IEWStruct IEWStruct; + typedef typename CPUPolicy::IEWStruct IEWStruct; /** The main time buffer to do backwards communication. */ TimeBuffer timeBuffer; diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh index 52b9ccdb0..de408ef0c 100644 --- a/cpu/beta_cpu/iew.hh +++ b/cpu/beta_cpu/iew.hh @@ -1,13 +1,10 @@ -//Todo: Update with statuses. Create constructor. Fix up time buffer stuff. -//Will also need a signal heading back at least one stage to rename to say -//how many empty skid buffer entries there are. Perhaps further back even. +//Todo: Update with statuses. //Need to handle delaying writes to the writeback bus if it's full at the -//given time. Squash properly. Load store queue. +//given time. Load store queue. #ifndef __SIMPLE_IEW_HH__ #define __SIMPLE_IEW_HH__ -// To include: time buffer, structs, queue, #include #include "base/timebuf.hh" @@ -22,16 +19,18 @@ class SimpleIEW private: //Typedefs from Impl typedef typename Impl::ISA ISA; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::FullCPU FullCPU; typedef typename Impl::Params Params; - typedef typename Impl::CPUPol::RenameMap RenameMap; + typedef typename CPUPol::RenameMap RenameMap; + typedef typename CPUPol::LDSTQ LDSTQ; - typedef typename Impl::TimeStruct TimeStruct; - typedef typename Impl::IEWStruct IEWStruct; - typedef typename Impl::RenameStruct RenameStruct; - typedef typename Impl::IssueStruct IssueStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::IssueStruct IssueStruct; public: enum Status { @@ -51,7 +50,7 @@ class SimpleIEW public: void squash(); - void squash(DynInst *inst); + void squash(DynInstPtr &inst); void block(); @@ -70,7 +69,7 @@ class SimpleIEW void setRenameMap(RenameMap *rm_ptr); - void wakeDependents(DynInst *inst); + void wakeDependents(DynInstPtr &inst); void tick(); @@ -111,11 +110,13 @@ class SimpleIEW //Will need internal queue to hold onto instructions coming from //the rename stage in case of a stall. /** Skid buffer between rename and IEW. */ - queue skidBuffer; + std::queue skidBuffer; /** Instruction queue. */ IQ instQueue; + LDSTQ ldstQueue; + /** Pointer to rename map. Might not want this stage to directly * access this though... */ diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh index b198220f5..521ce77f6 100644 --- a/cpu/beta_cpu/iew_impl.hh +++ b/cpu/beta_cpu/iew_impl.hh @@ -3,8 +3,8 @@ // communication happens simultaneously. Might not be that bad really... // it might skew stats a bit though. Issue would otherwise try to issue // instructions that would never be executed if there were a delay; without -// it issue will simply squash. Make this stage block properly. Make this -// stage delay after a squash properly. Update the statuses for each stage. +// it issue will simply squash. Make this stage block properly. +// Update the statuses for each stage. // Actually read instructions out of the skid buffer. #include @@ -15,8 +15,9 @@ template SimpleIEW::SimpleIEW(Params ¶ms) : // Just make this time buffer really big for now - issueToExecQueue(20, 20), + issueToExecQueue(5, 5), instQueue(params), + ldstQueue(params), commitToIEWDelay(params.commitToIEWDelay), renameToIEWDelay(params.renameToIEWDelay), issueToExecuteDelay(params.issueToExecuteDelay), @@ -45,6 +46,7 @@ SimpleIEW::setCPU(FullCPU *cpu_ptr) cpu = cpu_ptr; instQueue.setCPU(cpu_ptr); + ldstQueue.setCPU(cpu_ptr); } template @@ -96,7 +98,7 @@ SimpleIEW::setRenameMap(RenameMap *rm_ptr) template void -SimpleIEW::wakeDependents(DynInst *inst) +SimpleIEW::wakeDependents(DynInstPtr &inst) { instQueue.wakeDependents(inst); } @@ -150,17 +152,15 @@ SimpleIEW::squash() // Tell the IQ to start squashing. instQueue.squash(); - // Tell rename to squash through the time buffer. - // This communication may be redundant depending upon where squash() - // is called. -// toRename->iewInfo.squash = true; + // Tell the LDSTQ to start squashing. + ldstQueue.squash(fromCommit->commitInfo.doneSeqNum); } template void -SimpleIEW::squash(DynInst *inst) +SimpleIEW::squash(DynInstPtr &inst) { - DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC:%#x.\n", + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", inst->PC); // Perhaps leave the squashing up to the ROB stage to tell it when to // squash? @@ -170,8 +170,11 @@ SimpleIEW::squash(DynInst *inst) toRename->iewInfo.squash = true; // Also send PC update information back to prior stages. toRename->iewInfo.squashedSeqNum = inst->seqNum; + toRename->iewInfo.mispredPC = inst->readPC(); toRename->iewInfo.nextPC = inst->readCalcTarg(); - toRename->iewInfo.predIncorrect = true; + toRename->iewInfo.branchMispredict = true; + // Prediction was incorrect, so send back inverse. + toRename->iewInfo.branchTaken = !(inst->predTaken()); } template @@ -229,7 +232,7 @@ SimpleIEW::tick() // If there's still instructions coming from rename, continue to // put them on the skid buffer. - if (fromRename->insts[0] != NULL) { + if (fromRename->insts[0]) { block(); } @@ -244,6 +247,19 @@ SimpleIEW::tick() // Write back number of free IQ entries here. toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); + // Check the committed load/store signals to see if there's a load + // or store to commit. Also check if it's being told to execute a + // nonspeculative instruction. + if (fromCommit->commitInfo.commitIsStore) { + ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum); + } else if (fromCommit->commitInfo.commitIsLoad) { + ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum); + } + + if (fromCommit->commitInfo.nonSpecSeqNum != 0) { + instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum); + } + DPRINTF(IEW, "IEW: IQ has %i free entries.\n", instQueue.numFreeEntries()); } @@ -265,7 +281,7 @@ SimpleIEW::iew() } //////////////////////////////////////// - //ISSUE stage + // DISPATCH/ISSUE stage //////////////////////////////////////// //Put into its own function? @@ -273,16 +289,16 @@ SimpleIEW::iew() // Check if there are any instructions coming from rename, and we're. // not squashing. - if (fromRename->insts[0] != NULL && _status != Squashing) { + if (fromRename->insts[0] && _status != Squashing) { // Loop through the instructions, putting them in the instruction // queue. for (int inst_num = 0; inst_num < issueReadWidth; ++inst_num) { - DynInst *inst = fromRename->insts[inst_num]; + DynInstPtr inst = fromRename->insts[inst_num]; // Make sure there's a valid instruction there. - if (inst == NULL) + if (!inst) break; DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", @@ -294,25 +310,38 @@ SimpleIEW::iew() // Be sure to mark these instructions as ready so that the // commit stage can go ahead and execute them, and mark // them as issued so the IQ doesn't reprocess them. - if (inst->isMemRef()) { + if (inst->isSquashed()) { + continue; + } else if (inst->isLoad()) { DPRINTF(IEW, "IEW: Issue: Memory instruction " - "encountered, skipping.\n"); + "encountered, adding to LDSTQ.\n"); - inst->setIssued(); - inst->setExecuted(); + // Reserve a spot in the load store queue for this + // memory access. + ldstQueue.insertLoad(inst); + + } else if (inst->isStore()) { + ldstQueue.insertStore(inst); + + // A bit of a hack. Set that it can commit so that + // the commit stage will try committing it, and then + // once commit realizes it's a store it will send back + // a signal to this stage to issue and execute that + // store. inst->setCanCommit(); - instQueue.advanceTail(inst); + instQueue.insertNonSpec(inst); continue; } else if (inst->isNonSpeculative()) { DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " "encountered, skipping.\n"); - inst->setIssued(); - inst->setExecuted(); + // Same hack as with stores. inst->setCanCommit(); - instQueue.advanceTail(inst); + // Specificall insert it as nonspeculative. + instQueue.insertNonSpec(inst); + continue; } else if (inst->isNop()) { DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " @@ -355,6 +384,7 @@ SimpleIEW::iew() // @todo: Move to the FU pool used in the current full cpu. int fu_usage = 0; + bool fetch_redirect = false; // Execute/writeback any instructions that are available. for (int inst_num = 0; @@ -365,26 +395,48 @@ SimpleIEW::iew() DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); // Get instruction from issue's queue. - DynInst *inst = fromIssue->insts[inst_num]; + DynInstPtr inst = fromIssue->insts[inst_num]; DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); - inst->setExecuted(); - // Check if the instruction is squashed; if so then skip it // and don't count it towards the FU usage. if (inst->isSquashed()) { DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + toCommit->insts[inst_num] = inst; + continue; } + inst->setExecuted(); + // If an instruction is executed, then count it towards FU usage. ++fu_usage; // Execute instruction. // Note that if the instruction faults, it will be handled // at the commit stage. - inst->execute(); + if (inst->isMemRef()) { + DPRINTF(IEW, "IEW: Execute: Calculating address for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + ldstQueue.executeLoad(inst); + } else if (inst->isStore()) { + ldstQueue.executeStore(); + } else { + panic("IEW: Unexpected memory type!\n"); + } + + } else { + inst->execute(); + } // First check the time slot that this instruction will write // to. If there are free write ports at the time, then go ahead @@ -401,16 +453,34 @@ SimpleIEW::iew() // Check if branch was correct. This check happens after the // instruction is added to the queue because even if the branch // is mispredicted, the branch instruction itself is still valid. - if (inst->mispredicted()) { - DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); - DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", - inst->nextPC); + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + if (!fetch_redirect) { + if (inst->mispredicted()) { + fetch_redirect = true; - // If incorrect, then signal the ROB that it must be squashed. - squash(inst); + DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); - // Not sure it really needs to break. -// break; + // If incorrect, then signal the ROB that it must be squashed. + squash(inst); + } else if (ldstQueue.violation()) { + fetch_redirect = true; + + DynInstPtr violator = ldstQueue.getMemDepViolator(); + + DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + instQueue.violation(inst, violator); + + squash(inst); + // Otherwise check if there was a memory ordering violation. + // If there was, then signal ROB that it must be squashed. Also + // signal IQ that there was a violation. + } } } @@ -422,18 +492,20 @@ SimpleIEW::iew() // Either have IEW have direct access to rename map, or have this as // part of backwards communication. for (int inst_num = 0; inst_num < executeWidth && - toCommit->insts[inst_num] != NULL; inst_num++) + toCommit->insts[inst_num]; inst_num++) { - DynInst *inst = toCommit->insts[inst_num]; + DynInstPtr inst = toCommit->insts[inst_num]; DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n", inst->readPC()); - instQueue.wakeDependents(inst); + if(!inst->isSquashed()) { + instQueue.wakeDependents(inst); - for (int i = 0; i < inst->numDestRegs(); i++) - { - renameMap->markAsReady(inst->renamedDestRegIdx(i)); + for (int i = 0; i < inst->numDestRegs(); i++) + { + renameMap->markAsReady(inst->renamedDestRegIdx(i)); + } } } diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh index 5741bfcf5..a170979cb 100644 --- a/cpu/beta_cpu/inst_queue.hh +++ b/cpu/beta_cpu/inst_queue.hh @@ -2,12 +2,13 @@ #define __INST_QUEUE_HH__ #include +#include #include #include +#include #include "base/timebuf.hh" - -using namespace std; +#include "cpu/inst_seq.hh" //Perhaps have a better separation between the data structure underlying //and the actual algorithm. @@ -24,48 +25,53 @@ using namespace std; * and 96-191 are fp). This remains true even for both logical and * physical register indices. */ -template +template class InstructionQueue { public: //Typedefs from the Impl. typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; - typedef typename Impl::IssueStruct IssueStruct; - typedef typename Impl::TimeStruct TimeStruct; + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; // Typedef of iterator through the list of instructions. Might be // better to untie this from the FullCPU or pass its information to // the stages. - typedef typename list::iterator ListIt; + typedef typename std::list::iterator ListIt; /** - * Class for priority queue entries. Mainly made so that the < operator - * is defined. + * Struct for comparing entries to be added to the priority queue. This + * gives reverse ordering to the instructions in terms of sequence + * numbers: the instructions with smaller sequence numbers (and hence + * are older) will be at the top of the priority queue. */ - struct ReadyEntry { - DynInst *inst; - - ReadyEntry(DynInst *_inst) - : inst(_inst) - { } - - /** Compare(lhs,rhs) checks if rhs is "bigger" than lhs. If so, rhs - * goes higher on the priority queue. The oldest instruction should - * be on the top of the instruction queue, so in this case "bigger" - * has the reverse meaning; the instruction with the lowest - * sequence number is on the top. - */ - bool operator <(const ReadyEntry &rhs) const + struct pqCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const { - if (this->inst->seqNum > rhs.inst->seqNum) - return true; - return false; + return lhs->seqNum > rhs->seqNum; } }; + /** + * Struct for comparing entries to be added to the set. This gives + * standard ordering in terms of sequence numbers. + */ + struct setCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum < rhs->seqNum; + } + }; + + typedef std::priority_queue, pqCompare> + ReadyInstQueue; + InstructionQueue(Params ¶ms); void setCPU(FullCPU *cpu); @@ -78,20 +84,32 @@ class InstructionQueue bool isFull(); - void insert(DynInst *new_inst); + void insert(DynInstPtr &new_inst); - void advanceTail(DynInst *inst); + void insertNonSpec(DynInstPtr &new_inst); + + void advanceTail(DynInstPtr &inst); void scheduleReadyInsts(); - void wakeDependents(DynInst *completed_inst); + void scheduleNonSpec(const InstSeqNum &inst); - void doSquash(); + void wakeDependents(DynInstPtr &completed_inst); + + void violation(DynInstPtr &store, DynInstPtr &faulting_load); void squash(); + void doSquash(); + void stopSquash(); + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + private: /** Debugging function to count how many entries are in the IQ. It does * a linear walk through the instructions, so do not call this function @@ -103,6 +121,11 @@ class InstructionQueue /** Pointer to the CPU. */ FullCPU *cpu; + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit; + /** The queue to the execute stage. Issued instructions will be written * into it. */ @@ -118,26 +141,46 @@ class InstructionQueue Int, Float, Branch, + Memory, + Misc, Squashed, None }; /** List of ready int instructions. Used to keep track of the order in - * which */ - priority_queue readyIntInsts; + * which instructions should issue. + */ + ReadyInstQueue readyIntInsts; /** List of ready floating point instructions. */ - priority_queue readyFloatInsts; + ReadyInstQueue readyFloatInsts; /** List of ready branch instructions. */ - priority_queue readyBranchInsts; + ReadyInstQueue readyBranchInsts; + + /** List of ready memory instructions. */ + ReadyInstQueue readyMemInsts; + + /** List of ready miscellaneous instructions. */ + ReadyInstQueue readyMiscInsts; /** List of squashed instructions (which are still valid and in IQ). * Implemented using a priority queue; the entries must contain both * the IQ index and sequence number of each instruction so that * ordering based on sequence numbers can be used. */ - priority_queue squashedInsts; + ReadyInstQueue squashedInsts; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is necessary to be + * able to search by the sequence number alone. + */ + std::map nonSpecInsts; + + typedef typename std::map::iterator non_spec_it_t; /** Number of free IQ entries left. */ unsigned freeEntries; @@ -158,6 +201,9 @@ class InstructionQueue /** The number of branches that can be issued in one cycle. */ unsigned branchWidth; + /** The number of memory instructions that can be issued in one cycle. */ + unsigned memoryWidth; + /** The total number of instructions that can be issued in one cycle. */ unsigned totalWidth; @@ -183,7 +229,7 @@ class InstructionQueue InstSeqNum squashedSeqNum; /** Iterator that points to the oldest instruction in the IQ. */ - ListIt head; +// ListIt head; /** Iterator that points to the youngest instruction in the IQ. */ ListIt tail; @@ -200,7 +246,7 @@ class InstructionQueue class DependencyEntry { public: - DynInst *inst; + DynInstPtr inst; //Might want to include data about what arch. register the //dependence is waiting on. DependencyEntry *next; @@ -212,9 +258,9 @@ class InstructionQueue //away. So for now it will sit here, within the IQ, until //a better implementation is decided upon. // This function probably shouldn't be within the entry... - void insert(DynInst *new_inst); + void insert(DynInstPtr &new_inst); - void remove(DynInst *inst_to_remove); + void remove(DynInstPtr &inst_to_remove); }; /** Array of linked lists. Each linked list is a list of all the @@ -233,11 +279,12 @@ class InstructionQueue */ vector regScoreboard; - bool addToDependents(DynInst *new_inst); - void insertDependency(DynInst *new_inst); - void createDependency(DynInst *new_inst); + bool addToDependents(DynInstPtr &new_inst); + void insertDependency(DynInstPtr &new_inst); + void createDependency(DynInstPtr &new_inst); + void dumpDependGraph(); - void addIfReady(DynInst *inst); + void addIfReady(DynInstPtr &inst); }; #endif //__INST_QUEUE_HH__ diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh index 6f1f06858..03e3fed33 100644 --- a/cpu/beta_cpu/inst_queue_impl.hh +++ b/cpu/beta_cpu/inst_queue_impl.hh @@ -1,11 +1,8 @@ #ifndef __INST_QUEUE_IMPL_HH__ #define __INST_QUEUE_IMPL_HH__ -// Todo: Fix up consistency errors about back of the ready list being -// the oldest instructions in the queue. When woken up from the dependency -// graph they will be the oldest, but when they are immediately executable -// newer instructions will mistakenly get inserted onto the back. Also -// current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// Todo: +// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake // it; either do in reverse order, or have added instructions put into a // different ready queue that, in scheduleRreadyInsts(), gets put onto the // normal ready queue. This would however give only a one cycle delay, @@ -21,18 +18,21 @@ // Blatant hack to avoid compile warnings. const InstSeqNum MaxInstSeqNum = 0 - 1; -template +template InstructionQueue::InstructionQueue(Params ¶ms) - : numEntries(params.numIQEntries), + : memDepUnit(params), + numEntries(params.numIQEntries), intWidth(params.executeIntWidth), floatWidth(params.executeFloatWidth), + totalWidth(params.issueWidth), numPhysIntRegs(params.numPhysIntRegs), numPhysFloatRegs(params.numPhysFloatRegs), commitToIEWDelay(params.commitToIEWDelay) { // HACK: HARDCODED NUMBER. REMOVE LATER AND ADD TO PARAMETER. - totalWidth = 1; branchWidth = 1; + memoryWidth = 1; + DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth); // Initialize the number of free IQ entries. @@ -66,7 +66,7 @@ InstructionQueue::InstructionQueue(Params ¶ms) } -template +template void InstructionQueue::setCPU(FullCPU *cpu_ptr) { @@ -75,7 +75,7 @@ InstructionQueue::setCPU(FullCPU *cpu_ptr) tail = cpu->instList.begin(); } -template +template void InstructionQueue::setIssueToExecuteQueue( TimeBuffer *i2e_ptr) @@ -84,7 +84,7 @@ InstructionQueue::setIssueToExecuteQueue( issueToExecuteQueue = i2e_ptr; } -template +template void InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) { @@ -96,7 +96,7 @@ InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) // Might want to do something more complex if it knows how many instructions // will be issued this cycle. -template +template bool InstructionQueue::isFull() { @@ -107,16 +107,16 @@ InstructionQueue::isFull() } } -template +template unsigned InstructionQueue::numFreeEntries() { return freeEntries; } -template +template void -InstructionQueue::insert(DynInst *new_inst) +InstructionQueue::insert(DynInstPtr &new_inst) { // Make sure the instruction is valid assert(new_inst); @@ -157,18 +157,78 @@ InstructionQueue::insert(DynInst *new_inst) // register(s). createDependency(new_inst); + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit.insert(new_inst); + } + // If the instruction is ready then add it to the ready list. addIfReady(new_inst); assert(freeEntries == (numEntries - countInsts())); } +template +void +InstructionQueue::insertNonSpec(DynInstPtr &inst) +{ + nonSpecInsts[inst->seqNum] = inst; + + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + // Decrease the number of free entries. + --freeEntries; + + // Look through its source registers (physical regs), and mark any + // dependencies. +// addToDependents(inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (inst->isMemRef()) { + memDepUnit.insert(inst); + } +} + // Slightly hack function to advance the tail iterator in the case that // the IEW stage issues an instruction that is not added to the IQ. This // is needed in case a long chain of such instructions occurs. -template +template void -InstructionQueue::advanceTail(DynInst *inst) +InstructionQueue::advanceTail(DynInstPtr &inst) { // Make sure the instruction is valid assert(inst); @@ -205,10 +265,11 @@ InstructionQueue::advanceTail(DynInst *inst) } // Need to make sure the number of float and integer instructions -// issued does not exceed the total issue bandwidth. Probably should -// have some sort of limit of total number of branches that can be issued -// as well. -template +// issued does not exceed the total issue bandwidth. +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template void InstructionQueue::scheduleReadyInsts() { @@ -218,6 +279,7 @@ InstructionQueue::scheduleReadyInsts() int int_issued = 0; int float_issued = 0; int branch_issued = 0; + int memory_issued = 0; int squashed_issued = 0; int total_issued = 0; @@ -226,6 +288,8 @@ InstructionQueue::scheduleReadyInsts() bool insts_available = !readyBranchInsts.empty() || !readyIntInsts.empty() || !readyFloatInsts.empty() || + !readyMemInsts.empty() || + !readyMiscInsts.empty() || !squashedInsts.empty(); // Note: Requires a globally defined constant. @@ -233,10 +297,12 @@ InstructionQueue::scheduleReadyInsts() InstList list_with_oldest = None; // Temporary values. - DynInst *int_head_inst; - DynInst *float_head_inst; - DynInst *branch_head_inst; - DynInst *squashed_head_inst; + DynInstPtr int_head_inst; + DynInstPtr float_head_inst; + DynInstPtr branch_head_inst; + DynInstPtr mem_head_inst; + DynInstPtr misc_head_inst; + DynInstPtr squashed_head_inst; // Somewhat nasty code to look at all of the lists where issuable // instructions are located, and choose the oldest instruction among @@ -257,7 +323,7 @@ InstructionQueue::scheduleReadyInsts() insts_available = true; - int_head_inst = readyIntInsts.top().inst; + int_head_inst = readyIntInsts.top(); if (int_head_inst->isSquashed()) { readyIntInsts.pop(); @@ -274,7 +340,7 @@ InstructionQueue::scheduleReadyInsts() insts_available = true; - float_head_inst = readyFloatInsts.top().inst; + float_head_inst = readyFloatInsts.top(); if (float_head_inst->isSquashed()) { readyFloatInsts.pop(); @@ -291,7 +357,7 @@ InstructionQueue::scheduleReadyInsts() insts_available = true; - branch_head_inst = readyBranchInsts.top().inst; + branch_head_inst = readyBranchInsts.top(); if (branch_head_inst->isSquashed()) { readyBranchInsts.pop(); @@ -304,11 +370,44 @@ InstructionQueue::scheduleReadyInsts() } + if (!readyMemInsts.empty() && + memory_issued < memoryWidth) { + + insts_available = true; + + mem_head_inst = readyMemInsts.top(); + + if (mem_head_inst->isSquashed()) { + readyMemInsts.pop(); + continue; + } else if (mem_head_inst->seqNum < oldest_inst) { + oldest_inst = mem_head_inst->seqNum; + + list_with_oldest = Memory; + } + } + + if (!readyMiscInsts.empty()) { + + insts_available = true; + + misc_head_inst = readyMiscInsts.top(); + + if (misc_head_inst->isSquashed()) { + readyMiscInsts.pop(); + continue; + } else if (misc_head_inst->seqNum < oldest_inst) { + oldest_inst = misc_head_inst->seqNum; + + list_with_oldest = Misc; + } + } + if (!squashedInsts.empty()) { insts_available = true; - squashed_head_inst = squashedInsts.top().inst; + squashed_head_inst = squashedInsts.top(); if (squashed_head_inst->seqNum < oldest_inst) { list_with_oldest = Squashed; @@ -316,13 +415,14 @@ InstructionQueue::scheduleReadyInsts() } - DynInst *issuing_inst = NULL; + DynInstPtr issuing_inst = NULL; switch (list_with_oldest) { case None: DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing " "inst is %#x.\n", issuing_inst); break; + case Int: issuing_inst = int_head_inst; readyIntInsts.pop(); @@ -330,6 +430,7 @@ InstructionQueue::scheduleReadyInsts() DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n", issuing_inst->readPC()); break; + case Float: issuing_inst = float_head_inst; readyFloatInsts.pop(); @@ -337,6 +438,7 @@ InstructionQueue::scheduleReadyInsts() DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n", issuing_inst->readPC()); break; + case Branch: issuing_inst = branch_head_inst; readyBranchInsts.pop(); @@ -344,6 +446,25 @@ InstructionQueue::scheduleReadyInsts() DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n", issuing_inst->readPC()); break; + + case Memory: + issuing_inst = mem_head_inst; + + memDepUnit.issue(mem_head_inst); + + readyMemInsts.pop(); + ++memory_issued; + DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Misc: + issuing_inst = misc_head_inst; + readyMiscInsts.pop(); + DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n", + issuing_inst->readPC()); + break; + case Squashed: issuing_inst = squashed_head_inst; squashedInsts.pop(); @@ -366,61 +487,32 @@ InstructionQueue::scheduleReadyInsts() } } -template +template void -InstructionQueue::doSquash() +InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) { - // Make sure the squash iterator isn't pointing to nothing. - assert(squashIt != cpu->instList.end()); - // Make sure the squashed sequence number is valid. - assert(squashedSeqNum != 0); + non_spec_it_t inst_it = nonSpecInsts.find(inst); - DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + assert(inst_it != nonSpecInsts.end()); - // Squash any instructions younger than the squashed sequence number - // given. - while ((*squashIt)->seqNum > squashedSeqNum) { - DynInst *squashed_inst = (*squashIt); + // Mark this instruction as ready to issue. + (*inst_it).second->setCanIssue(); - // Only handle the instruction if it actually is in the IQ and - // hasn't already been squashed in the IQ. - if (!squashed_inst->isIssued() && - !squashed_inst->isSquashedInIQ()) { - // Remove the instruction from the dependency list. - int8_t total_src_regs = squashed_inst->numSrcRegs(); + // Now schedule the instruction. + addIfReady((*inst_it).second); - for (int src_reg_idx = 0; - src_reg_idx < total_src_regs; - src_reg_idx++) - { - // Only remove it from the dependency graph if it was - // placed there in the first place. - // HACK: This assumes that instructions woken up from the - // dependency chain aren't informed that a specific src - // register has become ready. This may not always be true - // in the future. - if (!squashed_inst->isReadySrcRegIdx(src_reg_idx)) { - int8_t src_reg = - squashed_inst->renamedSrcRegIdx(src_reg_idx); - dependGraph[src_reg].remove(squashed_inst); - } - } - - // Mark it as squashed within the IQ. - squashed_inst->setSquashedInIQ(); - - ReadyEntry temp(squashed_inst); - - squashedInsts.push(temp); - - DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", - squashed_inst->readPC()); - } - squashIt--; - } + nonSpecInsts.erase(inst_it); } -template +template +void +InstructionQueue::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + memDepUnit.violation(store, faulting_load); +} + +template void InstructionQueue::squash() { @@ -435,9 +527,78 @@ InstructionQueue::squash() // Call doSquash. doSquash(); + + // Also tell the memory dependence unit to squash. + memDepUnit.squash(squashedSeqNum); } -template +template +void +InstructionQueue::doSquash() +{ + // Make sure the squash iterator isn't pointing to nothing. + assert(squashIt != cpu->instList.end()); + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum != 0); + + DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + + // Squash any instructions younger than the squashed sequence number + // given. + while ((*squashIt)->seqNum > squashedSeqNum) { + DynInstPtr squashed_inst = (*squashIt); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (!squashed_inst->isIssued() && + !squashed_inst->isSquashedInIQ()) { + // Remove the instruction from the dependency list. + // Hack for now: These below don't add themselves to the + // dependency list, so don't try to remove them. + if (!squashed_inst->isNonSpeculative() && + !squashed_inst->isStore()) { + int8_t total_src_regs = squashed_inst->numSrcRegs(); + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { + dependGraph[src_reg].remove(squashed_inst); + } + } + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + squashedInsts.push(squashed_inst); + + DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + + if (squashed_inst->isNonSpeculative() || squashed_inst->isStore()) { + nonSpecInsts.erase(squashed_inst->seqNum); + } + + --squashIt; + } +} + +template void InstructionQueue::stopSquash() { @@ -448,36 +609,9 @@ InstructionQueue::stopSquash() squashIt = cpu->instList.end(); } -template -int -InstructionQueue::countInsts() -{ - ListIt count_it = cpu->instList.begin(); - int total_insts = 0; - - while (count_it != tail) { - if (!(*count_it)->isIssued()) { - ++total_insts; - } - - count_it++; - - assert(count_it != cpu->instList.end()); - } - - // Need to count the tail iterator as well. - if (count_it != cpu->instList.end() && - (*count_it) != NULL && - !(*count_it)->isIssued()) { - ++total_insts; - } - - return total_insts; -} - -template +template void -InstructionQueue::wakeDependents(DynInst *completed_inst) +InstructionQueue::wakeDependents(DynInstPtr &completed_inst) { DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); //Look at the physical destination register of the DynInst @@ -487,6 +621,13 @@ InstructionQueue::wakeDependents(DynInst *completed_inst) DependencyEntry *curr; + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. + + if (completed_inst->isMemRef()) { + memDepUnit.wakeDependents(completed_inst); + } + for (int dest_reg_idx = 0; dest_reg_idx < total_dest_regs; dest_reg_idx++) @@ -507,7 +648,7 @@ InstructionQueue::wakeDependents(DynInst *completed_inst) //Maybe abstract this part into a function. //Go through the dependency chain, marking the registers as ready //within the waiting instructions. - while (dependGraph[dest_reg].next != NULL) { + while (dependGraph[dest_reg].next) { curr = dependGraph[dest_reg].next; @@ -537,9 +678,9 @@ InstructionQueue::wakeDependents(DynInst *completed_inst) } } -template +template bool -InstructionQueue::addToDependents(DynInst *new_inst) +InstructionQueue::addToDependents(DynInstPtr &new_inst) { // Loop through the instruction's source registers, adding // them to the dependency list if they are not ready. @@ -558,7 +699,9 @@ InstructionQueue::addToDependents(DynInst *new_inst) // hasn't become ready while the instruction was in flight // between stages. Only if it really isn't ready should // it be added to the dependency graph. - if (regScoreboard[src_reg] == false) { + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " "is being added to the dependency chain.\n", new_inst->readPC(), src_reg); @@ -581,9 +724,9 @@ InstructionQueue::addToDependents(DynInst *new_inst) return return_val; } -template +template void -InstructionQueue::createDependency(DynInst *new_inst) +InstructionQueue::createDependency(DynInstPtr &new_inst) { //Actually nothing really needs to be marked when an //instruction becomes the producer of a register's value, @@ -595,20 +738,32 @@ InstructionQueue::createDependency(DynInst *new_inst) dest_reg_idx < total_dest_regs; dest_reg_idx++) { - int8_t dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); - dependGraph[dest_reg].inst = new_inst; - if (dependGraph[dest_reg].next != NULL) { - panic("Dependency chain is not empty.\n"); + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; } + dependGraph[dest_reg].inst = new_inst; +#if 0 + if (dependGraph[dest_reg].next) { + panic("Dependency chain of dest reg %i is not empty.\n", + dest_reg); + } +#endif + assert(!dependGraph[dest_reg].next); // Mark the scoreboard to say it's not yet ready. regScoreboard[dest_reg] = false; } } -template +template void -InstructionQueue::DependencyEntry::insert(DynInst *new_inst) +InstructionQueue::DependencyEntry::insert(DynInstPtr &new_inst) { //Add this new, dependent instruction at the head of the dependency //chain. @@ -623,9 +778,9 @@ InstructionQueue::DependencyEntry::insert(DynInst *new_inst) this->next = new_entry; } -template +template void -InstructionQueue::DependencyEntry::remove(DynInst *inst_to_remove) +InstructionQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) { DependencyEntry *prev = this; DependencyEntry *curr = this->next; @@ -643,6 +798,8 @@ InstructionQueue::DependencyEntry::remove(DynInst *inst_to_remove) { prev = curr; curr = curr->next; + + assert(curr != NULL); } // Now remove this instruction from the list. @@ -651,34 +808,140 @@ InstructionQueue::DependencyEntry::remove(DynInst *inst_to_remove) delete curr; } -template +template void -InstructionQueue::addIfReady(DynInst *inst) +InstructionQueue::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x consumer: ", i, + curr->inst->readPC()); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x ", curr->inst->readPC()); + } + + cprintf("\n"); + } +} + +template +void +InstructionQueue::addIfReady(DynInstPtr &inst) { //If the instruction now has all of its source registers // available, then add it to the list of ready instructions. if (inst->readyToIssue()) { - ReadyEntry to_add(inst); + //Add the instruction to the proper ready list. - if (inst->isInteger()) { - DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " - "putting it onto the ready list, PC %#x.\n", - inst->readPC()); - readyIntInsts.push(to_add); - } else if (inst->isFloating()) { - DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " - "putting it onto the ready list, PC %#x.\n", - inst->readPC()); - readyFloatInsts.push(to_add); - } else if (inst->isControl()) { + if (inst->isControl()) { + DPRINTF(IQ, "IQ: Branch instruction is ready to issue, " "putting it onto the ready list, PC %#x.\n", inst->readPC()); - readyBranchInsts.push(to_add); + readyBranchInsts.push(inst); + + } else if (inst->isMemRef()) { + + DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n"); + + if (memDepUnit.readyToIssue(inst)) { + DPRINTF(IQ, "IQ: Memory instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyMemInsts.push(inst); + } + + } else if (inst->isInteger()) { + + DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyIntInsts.push(inst); + + } else if (inst->isFloating()) { + + DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyFloatInsts.push(inst); + } else { - panic("IQ: Instruction not an expected type.\n"); + DPRINTF(IQ, "IQ: Miscellaneous instruction is ready to issue, " + "putting it onto the ready list, PC %#x..\n", + inst->readPC()); + + readyMiscInsts.push(inst); } } } +template +int +InstructionQueue::countInsts() +{ + ListIt count_it = cpu->instList.begin(); + int total_insts = 0; + + while (count_it != tail) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } + + ++count_it; + + assert(count_it != cpu->instList.end()); + } + + // Need to count the tail iterator as well. + if (count_it != cpu->instList.end() && + (*count_it) && + !(*count_it)->isIssued()) { + ++total_insts; + } + + return total_insts; +} + +template +void +InstructionQueue::dumpLists() +{ + cprintf("Ready integer list size: %i\n", readyIntInsts.size()); + + cprintf("Ready float list size: %i\n", readyFloatInsts.size()); + + cprintf("Ready branch list size: %i\n", readyBranchInsts.size()); + + cprintf("Ready memory list size: %i\n", readyMemInsts.size()); + + cprintf("Ready misc list size: %i\n", readyMiscInsts.size()); + + cprintf("Squashed list size: %i\n", squashedInsts.size()); + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + non_spec_it_t non_spec_it = nonSpecInsts.begin(); + + cprintf("Non speculative list: "); + + while (non_spec_it != nonSpecInsts.end()) { + cprintf("%#x ", (*non_spec_it).second->readPC()); + ++non_spec_it; + } + + cprintf("\n"); + +} + #endif // __INST_QUEUE_IMPL_HH__ diff --git a/cpu/beta_cpu/mem_dep_unit.cc b/cpu/beta_cpu/mem_dep_unit.cc new file mode 100644 index 000000000..3175997f6 --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit.cc @@ -0,0 +1,9 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/store_set.hh" +#include "cpu/beta_cpu/mem_dep_unit_impl.hh" + +// Force instantation of memory dependency unit using store sets and +// AlphaSimpleImpl. +template MemDepUnit; diff --git a/cpu/beta_cpu/mem_dep_unit.hh b/cpu/beta_cpu/mem_dep_unit.hh new file mode 100644 index 000000000..4821c63b7 --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit.hh @@ -0,0 +1,70 @@ + +#ifndef __MEM_DEP_UNIT_HH__ +#define __MEM_DEP_UNIT_HH__ + +#include +#include + +#include "cpu/inst_seq.hh" + +/** + * Memory dependency unit class. This holds the memory dependence predictor. + * As memory operations are issued to the IQ, they are also issued to this + * unit, which then looks up the prediction as to what they are dependent + * upon. This unit must be checked prior to a memory operation being able + * to issue. Although this is templated, it's somewhat hard to make a generic + * memory dependence unit. This one is mostly for store sets; it will be + * quite limited in what other memory dependence predictions it can also + * utilize. Thus this class should be most likely be rewritten for other + * dependence prediction schemes. + */ +template +class MemDepUnit { + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + typedef typename std::set::iterator sn_it_t; + typedef typename std::map >::iterator + dep_it_t; + + public: + MemDepUnit(Params ¶ms); + + void insert(DynInstPtr &inst); + + bool readyToIssue(DynInstPtr &inst); + + void issue(DynInstPtr &inst); + + void wakeDependents(DynInstPtr &inst); + + void squash(const InstSeqNum &squashed_num); + + void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); + + private: + /** List of instructions that have passed through rename, yet are still + * waiting on a memory dependence to resolve before they can issue. + */ + std::set renamedInsts; + + /** List of instructions that have all their predicted memory dependences + * resolved. They are ready in terms of being free of memory + * dependences; however they may still have to wait on source registers. + */ + std::set readyInsts; + + std::map > dependencies; + + /** The memory dependence predictor. It is accessed upon new + * instructions being added to the IQ, and responds by telling + * this unit what instruction the newly added instruction is dependent + * upon. + */ + MemDepPred depPred; + +}; + +#endif diff --git a/cpu/beta_cpu/mem_dep_unit_impl.hh b/cpu/beta_cpu/mem_dep_unit_impl.hh new file mode 100644 index 000000000..4299acb7a --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit_impl.hh @@ -0,0 +1,166 @@ + +#include + +#include "cpu/beta_cpu/mem_dep_unit.hh" + +// Hack: dependence predictor sizes are hardcoded. +template +MemDepUnit::MemDepUnit(Params ¶ms) + : depPred(4028, 128) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n"); +} + +template +void +MemDepUnit::insert(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + + InstSeqNum producing_store = depPred.checkInst(inst->readPC()); + + if (producing_store == 0 || + dependencies.find(producing_store) == dependencies.end()) { + readyInsts.insert(inst_seq_num); + } else { + // If it's not already ready, then add it to the renamed + // list and the dependencies. + renamedInsts.insert(inst_seq_num); + + dependencies[producing_store].push_back(inst_seq_num); + } + + if (inst->isStore()) { + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(dependencies.find(inst_seq_num) == dependencies.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + dependencies[inst_seq_num]; + } else if (!inst->isLoad()) { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } +} + +template +bool +MemDepUnit::readyToIssue(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + if (readyInsts.find(inst_seq_num) == readyInsts.end()) { + return false; + } else { + return true; + } +} + +template +void +MemDepUnit::issue(DynInstPtr &inst) +{ + assert(readyInsts.find(inst->seqNum) != readyInsts.end()); + + // Remove the instruction from the ready list. + readyInsts.erase(inst->seqNum); +} + +template +void +MemDepUnit::wakeDependents(DynInstPtr &inst) +{ + // Wake any dependencies. + dep_it_t dep_it = dependencies.find(inst); + + // If there's no entry, then return. Really there should only be + // no entry if the instruction is a load. + if (dep_it == dependencies.end()) { + return; + } + + assert(inst->isStore()); + + for(int i = 0; i < (*dep_it).second.size(); ++i ) { + InstSeqNum woken_inst = (*dep_it).second[i]; + + // Should we have reached instructions that are actually squashed, + // there will be no more useful instructions in this dependency + // list. Break out early. + if (renamedInsts.find(woken_inst) == renamedInsts.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x " + "are squashed, starting at SN %i. Breaking early.\n", + inst->readPC(), woken_inst); + break; + } + + // Remove it from the renamed instructions. + renamedInsts.erase(woken_inst); + + // Add it to the ready list. + readyInsts.insert(woken_inst); + } + + dependencies.erase(dep_it); +} + +template +void +MemDepUnit::squash(const InstSeqNum &squashed_num) +{ + + if (!renamedInsts.empty()) { + sn_it_t renamed_it = renamedInsts.end(); + + --renamed_it; + + // Remove entries from the renamed list as long as we haven't reached + // the end and the entries continue to be younger than the squashed. + while (!renamedInsts.empty() && + (*renamed_it) > squashed_num) + { + renamedInsts.erase(renamed_it--); + } + } + + if (!readyInsts.empty()) { + sn_it_t ready_it = readyInsts.end(); + + --ready_it; + + // Same for the ready list. + while (!readyInsts.empty() && + (*ready_it) > squashed_num) + { + readyInsts.erase(ready_it--); + } + } + + if (!dependencies.empty()) { + dep_it_t dep_it = dependencies.end(); + + --dep_it; + + // Same for the dependencies list. + while (!dependencies.empty() && + (*dep_it).first > squashed_num) + { + dependencies.erase(dep_it--); + } + } + + // Tell the dependency predictor to squash as well. + depPred.squash(squashed_num); +} + +template +void +MemDepUnit::violation(DynInstPtr &store_inst, + DynInstPtr &violating_load) +{ + // Tell the memory dependence unit of the violation. + depPred.violation(violating_load->readPC(), store_inst->readPC()); +} diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh index 21e0ce218..aba897fdc 100644 --- a/cpu/beta_cpu/regfile.hh +++ b/cpu/beta_cpu/regfile.hh @@ -13,11 +13,11 @@ using namespace std; // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, // and should go in the AlphaFullCPU. -template +template class PhysRegFile { //Note that most of the definitions of the IntReg, FloatReg, etc. exist - //within the Impl class and not within this PhysRegFile class. + //within the Impl/ISA class and not within this PhysRegFile class. //Will need some way to allow stuff like swap_palshadow to access the //correct registers. Might require code changes to swap_palshadow and @@ -42,6 +42,8 @@ class PhysRegFile uint64_t readIntReg(PhysRegIndex reg_idx) { + assert(reg_idx < numPhysicalIntRegs); + DPRINTF(IEW, "RegFile: Access to int register %i, has data " "%i\n", int(reg_idx), intRegFile[reg_idx]); return intRegFile[reg_idx]; @@ -52,8 +54,10 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Access to float register %i, has data " - "%f\n", int(reg_idx), (float)floatRegFile[reg_idx].d); + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as single, has " + "data %8.8f\n", int(reg_idx), (float)floatRegFile[reg_idx].d); return (float)floatRegFile[reg_idx].d; } @@ -63,8 +67,10 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Access to float register %i, has data " - "%f\n", int(reg_idx), floatRegFile[reg_idx].d); + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as double, has " + " data %8.8f\n", int(reg_idx), floatRegFile[reg_idx].d); return floatRegFile[reg_idx].d; } @@ -74,14 +80,18 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Access to float register %i, has data " - "%f\n", int(reg_idx), floatRegFile[reg_idx].q); + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as int, has data " + "%lli\n", int(reg_idx), floatRegFile[reg_idx].q); return floatRegFile[reg_idx].q; } void setIntReg(PhysRegIndex reg_idx, uint64_t val) { + assert(reg_idx < numPhysicalIntRegs); + DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n", int(reg_idx), val); @@ -93,7 +103,9 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Setting float register %i to %f\n", + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n", int(reg_idx), val); floatRegFile[reg_idx].d = (double)val; @@ -104,7 +116,9 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Setting float register %i to %f\n", + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n", int(reg_idx), val); floatRegFile[reg_idx].d = val; @@ -115,6 +129,8 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; + assert(reg_idx < numPhysicalFloatRegs); + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", int(reg_idx), val); @@ -185,7 +201,7 @@ class PhysRegFile unsigned numPhysicalFloatRegs; }; -template +template PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs) : numPhysicalIntRegs(_numPhysicalIntRegs), @@ -203,7 +219,7 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, //Problem: This code doesn't make sense at the RegFile level because it //needs things such as the itb and dtb. Either put it at the CPU level or //the DynInst level. -template +template uint64_t PhysRegFile::readIpr(int idx, Fault &fault) { @@ -319,7 +335,7 @@ PhysRegFile::readIpr(int idx, Fault &fault) int break_ipl = -1; #endif -template +template Fault PhysRegFile::setIpr(int idx, uint64_t val) { diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh index cd66ce686..9f031012a 100644 --- a/cpu/beta_cpu/rename.hh +++ b/cpu/beta_cpu/rename.hh @@ -1,25 +1,14 @@ // Todo: -// Figure out rename map for reg vs fp (probably just have one rename map). -// In simple case, there is no renaming, so have this stage do basically -// nothing. -// Fix up trap and barrier handling. Fix up squashing too, as it's too -// dependent upon the iew stage continually telling it to squash. -// Have commit send back information whenever a branch has committed. This -// way the history buffer can be cleared beyond the point where the branch -// was. +// Fix up trap and barrier handling. +// May want to have different statuses to differentiate the different stall +// conditions. #ifndef __SIMPLE_RENAME_HH__ #define __SIMPLE_RENAME_HH__ -//Will want to include: time buffer, structs, free list, rename map #include #include "base/timebuf.hh" -#include "cpu/beta_cpu/comm.hh" -#include "cpu/beta_cpu/rename_map.hh" -#include "cpu/beta_cpu/free_list.hh" - -using namespace std; // Will need rename maps for both the int reg file and fp reg file. // Or change rename map class to handle both. (RegFile handles both.) @@ -30,14 +19,14 @@ class SimpleRename // Typedefs from the Impl. typedef typename Impl::ISA ISA; typedef typename Impl::CPUPol CPUPol; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::FullCPU FullCPU; typedef typename Impl::Params Params; - typedef typename Impl::FetchStruct FetchStruct; - typedef typename Impl::DecodeStruct DecodeStruct; - typedef typename Impl::RenameStruct RenameStruct; - typedef typename Impl::TimeStruct TimeStruct; + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::TimeStruct TimeStruct; // Typedefs from the CPUPol typedef typename CPUPol::FreeList FreeList; @@ -94,6 +83,14 @@ class SimpleRename void removeFromHistory(InstSeqNum inst_seq_num); + inline void renameSrcRegs(DynInstPtr &inst); + + inline void renameDestRegs(DynInstPtr &inst); + + inline int calcFreeROBEntries(); + + inline int calcFreeIQEntries(); + /** Holds the previous information for each rename. * Note that often times the inst may have been deleted, so only access * the pointer for the address and do not dereference it. @@ -123,7 +120,7 @@ class SimpleRename bool placeHolder; }; - list historyBuffer; + std::list historyBuffer; /** CPU interface. */ FullCPU *cpu; @@ -155,7 +152,7 @@ class SimpleRename typename TimeBuffer::wire fromDecode; /** Skid buffer between rename and decode. */ - queue skidBuffer; + std::queue skidBuffer; /** Rename map interface. */ SimpleRenameMap *renameMap; @@ -179,6 +176,12 @@ class SimpleRename * instructions might have freed registers in the previous cycle. */ unsigned commitWidth; + + /** The instruction that rename is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; }; #endif // __SIMPLE_RENAME_HH__ diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh index 2b60c2f50..47464d961 100644 --- a/cpu/beta_cpu/rename_impl.hh +++ b/cpu/beta_cpu/rename_impl.hh @@ -2,18 +2,19 @@ #include "cpu/beta_cpu/rename.hh" -template +template SimpleRename::SimpleRename(Params ¶ms) : iewToRenameDelay(params.iewToRenameDelay), decodeToRenameDelay(params.decodeToRenameDelay), commitToRenameDelay(params.commitToRenameDelay), renameWidth(params.renameWidth), - commitWidth(params.commitWidth) + commitWidth(params.commitWidth), + numInst(0) { _status = Idle; } -template +template void SimpleRename::setCPU(FullCPU *cpu_ptr) { @@ -21,7 +22,7 @@ SimpleRename::setCPU(FullCPU *cpu_ptr) cpu = cpu_ptr; } -template +template void SimpleRename::setTimeBuffer(TimeBuffer *tb_ptr) { @@ -38,7 +39,7 @@ SimpleRename::setTimeBuffer(TimeBuffer *tb_ptr) toDecode = timeBuffer->getWire(0); } -template +template void SimpleRename::setRenameQueue(TimeBuffer *rq_ptr) { @@ -49,7 +50,7 @@ SimpleRename::setRenameQueue(TimeBuffer *rq_ptr) toIEW = renameQueue->getWire(0); } -template +template void SimpleRename::setDecodeQueue(TimeBuffer *dq_ptr) { @@ -61,7 +62,7 @@ SimpleRename::setDecodeQueue(TimeBuffer *dq_ptr) } -template +template void SimpleRename::setRenameMap(RenameMap *rm_ptr) { @@ -69,7 +70,7 @@ SimpleRename::setRenameMap(RenameMap *rm_ptr) renameMap = rm_ptr; } -template +template void SimpleRename::setFreeList(FreeList *fl_ptr) { @@ -77,7 +78,7 @@ SimpleRename::setFreeList(FreeList *fl_ptr) freeList = fl_ptr; } -template +template void SimpleRename::dumpHistory() { @@ -93,7 +94,7 @@ SimpleRename::dumpHistory() } } -template +template void SimpleRename::block() { @@ -110,12 +111,12 @@ SimpleRename::block() // the previous stages are expected to check all possible stall signals. } -template +template inline void SimpleRename::unblock() { - DPRINTF(Rename, "Rename: Reading instructions out of skid " - "buffer.\n"); + DPRINTF(Rename, "Rename: Read instructions out of skid buffer this " + "cycle.\n"); // Remove the now processed instructions from the skid buffer. skidBuffer.pop(); @@ -130,12 +131,12 @@ SimpleRename::unblock() } } -template +template void SimpleRename::doSquash() { typename list::iterator hb_it = historyBuffer.begin(); - typename list::iterator delete_it; +// typename list::iterator delete_it; InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; @@ -166,15 +167,17 @@ SimpleRename::doSquash() freeList->addReg(hb_it->newPhysReg); } - delete_it = hb_it; +// delete_it = hb_it; - hb_it++; +// hb_it++; - historyBuffer.erase(delete_it); + historyBuffer.erase(hb_it++); + + assert(hb_it != historyBuffer.end()); } } -template +template void SimpleRename::squash() { @@ -182,6 +185,8 @@ SimpleRename::squash() // Set the status to Squashing. _status = Squashing; + numInst = 0; + // Clear the skid buffer in case it has any data in it. while (!skidBuffer.empty()) { @@ -199,10 +204,10 @@ void SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) { DPRINTF(Rename, "Rename: Removing a committed instruction from the " - "history buffer, sequence number %lli.\n", inst_seq_num); + "history buffer, until sequence number %lli.\n", inst_seq_num); typename list::iterator hb_it = historyBuffer.end(); - hb_it--; + --hb_it; if (hb_it->instSeqNum > inst_seq_num) { DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure " @@ -210,7 +215,7 @@ SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) return; } - for ( ; hb_it->instSeqNum != inst_seq_num; hb_it--) + while ((*hb_it).instSeqNum != inst_seq_num) { // Make sure we haven't gone off the end of the list. assert(hb_it != historyBuffer.end()); @@ -222,10 +227,19 @@ SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) // be the last instruction in the list, as it is the instruction // that was just committed that is being removed. assert(hb_it->instSeqNum < inst_seq_num); - DPRINTF(Rename, "Rename: Committed instruction is not the last " - "entry in the history buffer.\n"); + DPRINTF(Rename, "Rename: Freeing up older rename of reg %i, sequence" + " number %i.\n", + (*hb_it).prevPhysReg, (*hb_it).instSeqNum); + + if (!(*hb_it).placeHolder) { + freeList->addReg((*hb_it).prevPhysReg); + } + + historyBuffer.erase(hb_it--); } + // Finally free up the previous register of the squashed instruction + // itself. if (!(*hb_it).placeHolder) { freeList->addReg(hb_it->prevPhysReg); } @@ -234,6 +248,113 @@ SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) } +template +inline void +SimpleRename::renameSrcRegs(DynInstPtr &inst) +{ + unsigned num_src_regs = inst->numSrcRegs(); + + // Get the architectual register numbers from the source and + // destination operands, and redirect them to the right register. + // Will need to mark dependencies though. + for (int src_idx = 0; src_idx < num_src_regs; src_idx++) + { + RegIndex src_reg = inst->srcRegIdx(src_idx); + + // Look up the source registers to get the phys. register they've + // been renamed to, and set the sources to those registers. + RegIndex renamed_reg = renameMap->lookup(src_reg); + + DPRINTF(Rename, "Rename: Looking up arch reg %i, got " + "physical reg %i.\n", (int)src_reg, (int)renamed_reg); + + inst->renameSrcReg(src_idx, renamed_reg); + + // Either incorporate it into the info passed back, + // or make another function call to see if that register is + // ready or not. + if (renameMap->isReady(renamed_reg)) { + DPRINTF(Rename, "Rename: Register is ready.\n"); + + inst->markSrcRegReady(src_idx); + } + } +} + +template +inline void +SimpleRename::renameDestRegs(DynInstPtr &inst) +{ + typename SimpleRenameMap::RenameInfo rename_result; + + unsigned num_dest_regs = inst->numDestRegs(); + + // Rename the destination registers. + for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) + { + RegIndex dest_reg = inst->destRegIdx(dest_idx); + + // Get the physical register that the destination will be + // renamed to. + rename_result = renameMap->rename(dest_reg); + + DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " + "reg %i.\n", (int)dest_reg, + (int)rename_result.first); + + // Record the rename information so that a history can be kept. + RenameHistory hb_entry(inst->seqNum, dest_reg, + rename_result.first, + rename_result.second); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding instruction to history buffer, " + "sequence number %lli.\n", + (*historyBuffer.begin()).instSeqNum); + + // Tell the instruction to rename the appropriate destination + // register (dest_idx) to the new physical register + // (rename_result.first), and record the previous physical + // register that the same logical register was renamed to + // (rename_result.second). + inst->renameDestReg(dest_idx, + rename_result.first, + rename_result.second); + } + + // If it's an instruction with no destination registers, then put + // a placeholder within the history buffer. It might be better + // to not put it in the history buffer at all (other than branches, + // which always need at least a place holder), and differentiate + // between instructions with and without destination registers + // when getting from commit the instructions that committed. + if (num_dest_regs == 0) { + RenameHistory hb_entry(inst->seqNum); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding placeholder instruction to " + "history buffer, sequence number %lli.\n", + inst->seqNum); + } +} + +template +inline int +SimpleRename::calcFreeROBEntries() +{ + return fromCommit->commitInfo.freeROBEntries - + renameWidth * iewToRenameDelay; +} + +template +inline int +SimpleRename::calcFreeIQEntries() +{ + return fromIEW->iewInfo.freeIQEntries - renameWidth * iewToRenameDelay; +} + template void SimpleRename::tick() @@ -258,12 +379,18 @@ SimpleRename::tick() // buffer were used. Remove those instructions and handle // the rest of unblocking. if (_status == Unblocking) { + if (fromDecode->size > 0) { + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + } + unblock(); } } else if (_status == Blocked) { // If stage is blocked and still receiving valid instructions, // make sure to store them in the skid buffer. - if (fromDecode->insts[0] != NULL) { + if (fromDecode->size > 0) { block(); @@ -273,8 +400,9 @@ SimpleRename::tick() if (!fromIEW->iewInfo.stall && !fromCommit->commitInfo.stall && - fromCommit->commitInfo.freeROBEntries != 0 && - fromIEW->iewInfo.freeIQEntries != 0) { + calcFreeROBEntries() > 0 && + calcFreeIQEntries() > 0 && + renameMap->numFreeEntries() > 0) { // Need to be sure to check all blocking conditions above. // If they have cleared, then start unblocking. @@ -344,6 +472,7 @@ SimpleRename::rename() // the rename map and the free list. if (fromCommit->commitInfo.squash || fromCommit->commitInfo.robSquashing) { + DPRINTF(Rename, "Rename: Receiving signal from Commit to squash.\n"); squash(); return; } @@ -368,37 +497,38 @@ SimpleRename::rename() // Check the decode queue to see if instructions are available. // If there are no available instructions to rename, then do nothing. // Or, if the stage is currently unblocking, then go ahead and run it. - if (fromDecode->insts[0] == NULL && _status != Unblocking) { + if (fromDecode->size == 0 && _status != Unblocking) { DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n"); // Should I change status to idle? return; } - DynInst *inst; - unsigned num_inst = 0; + //////////////////////////////////// + // Actual rename part. + //////////////////////////////////// - bool insts_available = _status == Unblocking ? - skidBuffer.front().insts[num_inst] != NULL : - fromDecode->insts[num_inst] != NULL; + DynInstPtr inst; - typename SimpleRenameMap::RenameInfo rename_result; + // If we're unblocking, then we may be in the middle of an instruction + // group. Subtract off numInst to get the proper number of instructions + // left. + int insts_available = _status == Unblocking ? + skidBuffer.front().size - numInst : + fromDecode->size; - unsigned num_src_regs; - unsigned num_dest_regs; + bool block_this_cycle = false; // Will have to do a different calculation for the number of free // entries. Number of free entries recorded on this cycle - // renameWidth * renameToDecodeDelay - // Can I avoid a multiply? - unsigned free_rob_entries = - fromCommit->commitInfo.freeROBEntries - iewToRenameDelay; - DPRINTF(Rename, "Rename: ROB has %d free entries.\n", - free_rob_entries); - unsigned free_iq_entries = - fromIEW->iewInfo.freeIQEntries - iewToRenameDelay; + int free_rob_entries = calcFreeROBEntries(); + int free_iq_entries = calcFreeIQEntries(); + int min_iq_rob = min(free_rob_entries, free_iq_entries); + + unsigned to_iew_index = 0; // Check if there's any space left. - if (free_rob_entries == 0 || free_iq_entries == 0) { + if (min_iq_rob <= 0) { DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ " "entries.\n" "Rename: ROB has %d free entries.\n" @@ -410,22 +540,40 @@ SimpleRename::rename() toDecode->renameInfo.stall = true; return; + } else if (min_iq_rob < insts_available) { + DPRINTF(Rename, "Rename: Will have to block this cycle. Only " + "%i insts can be renamed due to IQ/ROB limits.\n", + min_iq_rob); + + insts_available = min_iq_rob; + + block_this_cycle = true; } - unsigned min_iq_rob = min(free_rob_entries, free_iq_entries); - unsigned num_insts_to_rename = min(min_iq_rob, renameWidth); - - while (insts_available && - num_inst < num_insts_to_rename) { + while (insts_available > 0) { DPRINTF(Rename, "Rename: Sending instructions to iew.\n"); // Get the next instruction either from the skid buffer or the // decode queue. - inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] : - fromDecode->insts[num_inst]; + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromDecode->insts[numInst]; + + if (inst->isSquashed()) { + DPRINTF(Rename, "Rename: instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + // Go to the next instruction. + ++numInst; + + // Decrement how many instructions are available. + --insts_available; + + continue; + } DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n", - inst, inst->readPC()); + inst->seqNum, inst->readPC()); // If it's a trap instruction, then it needs to wait here within // rename until the ROB is empty. Needs a way to detect that the @@ -438,156 +586,59 @@ SimpleRename::rename() panic("Rename: Serializing instruction encountered.\n"); DPRINTF(Rename, "Rename: Serializing instruction " "encountered.\n"); - block(); // Change status over to BarrierStall so that other stages know // what this is blocked on. _status = BarrierStall; - // Tell the previous stage to stall. - toDecode->renameInfo.stall = true; + block_this_cycle = true; break; } - // Make sure there's enough room in the ROB and the IQ. - // This doesn't really need to be done dynamically; consider - // moving outside of this function. - if (free_rob_entries == 0 || free_iq_entries == 0) { - DPRINTF(Rename, "Rename: Blocking due to lack of ROB or IQ " - "entries.\n"); - // Call some sort of function to handle all the setup of being - // blocked. - block(); - - // Not really sure how to schedule an event properly, but an - // event must be scheduled such that upon freeing a ROB entry, - // this stage will restart up. Perhaps add in a ptr to an Event - // within the ROB that will be able to execute that Event - // if a free register is added to the freelist. - - // Tell the previous stage to stall. - toDecode->renameInfo.stall = true; - - break; - } - - // Temporary variables to hold number of source and destination regs. - num_src_regs = inst->numSrcRegs(); - num_dest_regs = inst->numDestRegs(); - // Check here to make sure there are enough destination registers // to rename to. Otherwise block. - if (renameMap->numFreeEntries() < num_dest_regs) + if (renameMap->numFreeEntries() < inst->numDestRegs()) { DPRINTF(Rename, "Rename: Blocking due to lack of free " "physical registers to rename to.\n"); - // Call function to handle blocking. - block(); - // Need some sort of event based on a register being freed. - // Tell the previous stage to stall. - toDecode->renameInfo.stall = true; + block_this_cycle = true; - // Break out of rename loop. break; } - // Get the architectual register numbers from the source and - // destination operands, and redirect them to the right register. - // Will need to mark dependencies though. - for (int src_idx = 0; src_idx < num_src_regs; src_idx++) - { - RegIndex src_reg = inst->srcRegIdx(src_idx); + renameSrcRegs(inst); - // Look up the source registers to get the phys. register they've - // been renamed to, and set the sources to those registers. - RegIndex renamed_reg = renameMap->lookup(src_reg); - - DPRINTF(Rename, "Rename: Looking up arch reg %i, got " - "physical reg %i.\n", (int)src_reg, (int)renamed_reg); - - inst->renameSrcReg(src_idx, renamed_reg); - - // Either incorporate it into the info passed back, - // or make another function call to see if that register is - // ready or not. - if (renameMap->isReady(renamed_reg)) { - DPRINTF(Rename, "Rename: Register is ready.\n"); - - inst->markSrcRegReady(src_idx); - } - } - - // Rename the destination registers. - for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) - { - RegIndex dest_reg = inst->destRegIdx(dest_idx); - - // Get the physical register that the destination will be - // renamed to. - rename_result = renameMap->rename(dest_reg); - - DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " - "register %i.\n", (int)dest_reg, - (int)rename_result.first); - - // Record the rename information so that a history can be kept. - RenameHistory hb_entry(inst->seqNum, dest_reg, - rename_result.first, - rename_result.second); - - historyBuffer.push_front(hb_entry); - - DPRINTF(Rename, "Rename: Adding instruction to history buffer, " - "sequence number %lli.\n", inst->seqNum); - - // Tell the instruction to rename the appropriate destination - // register (dest_idx) to the new physical register - // (rename_result.first), and record the previous physical - // register that the same logical register was renamed to - // (rename_result.second). - inst->renameDestReg(dest_idx, - rename_result.first, - rename_result.second); - } - - // If it's an instruction with no destination registers, then put - // a placeholder within the history buffer. It might be better - // to not put it in the history buffer at all (other than branches, - // which always need at least a place holder), and differentiate - // between instructions with and without destination registers - // when getting from commit the instructions that committed. - if (num_dest_regs == 0) { - RenameHistory hb_entry(inst->seqNum); - - historyBuffer.push_front(hb_entry); - - DPRINTF(Rename, "Rename: Adding placeholder instruction to " - "history buffer, sequence number %lli.\n", - inst->seqNum); - } + renameDestRegs(inst); // Put instruction in rename queue. - toIEW->insts[num_inst] = inst; + toIEW->insts[to_iew_index] = inst; + ++(toIEW->size); // Decrease the number of free ROB and IQ entries. --free_rob_entries; --free_iq_entries; // Increment which instruction we're on. - ++num_inst; + ++to_iew_index; + ++numInst; - // Check whether or not there are instructions available. - // Either need to check within the skid buffer, or the decode - // queue, depending if this stage is unblocking or not. - // Hmm, dangerous check. Can touch memory not allocated. Might - // be better to just do check at beginning of loop. Or better - // yet actually pass the number of instructions issued. - insts_available = _status == Unblocking ? - skidBuffer.front().insts[num_inst] != NULL : - fromDecode->insts[num_inst] != NULL; + // Decrement how many instructions are available. + --insts_available; } + // Check if there's any instructions left that haven't yet been renamed. + // If so then block. + if (block_this_cycle) { + block(); + + toDecode->renameInfo.stall = true; + } else { + // If we had a successful rename and didn't have to exit early, then + // reset numInst so it will refer to the correct instruction on next + // run. + numInst = 0; + } } diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc index c234182f0..cb9720d28 100644 --- a/cpu/beta_cpu/rename_map.cc +++ b/cpu/beta_cpu/rename_map.cc @@ -3,12 +3,10 @@ // Todo: Consider making functions inline. Avoid having things that are // using the zero register or misc registers from adding on the registers -// to the free list. - -SimpleRenameMap::RenameEntry::RenameEntry() - : physical_reg(0), valid(false) -{ -} +// to the free list. Possibly remove the direct communication between +// this and the freelist. Considering making inline bool functions that +// determine if the register is a logical int, logical fp, physical int, +// physical fp, etc. SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, unsigned _numPhysicalIntRegs, @@ -35,11 +33,12 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, //Create the rename maps, and their scoreboards. intRenameMap = new RenameEntry[numLogicalIntRegs]; - floatRenameMap = new RenameEntry[numLogicalFloatRegs]; + floatRenameMap = new RenameEntry[numLogicalRegs]; + // Should combine this into one scoreboard. intScoreboard.resize(numPhysicalIntRegs); - floatScoreboard.resize(numPhysicalFloatRegs); - miscScoreboard.resize(numMiscRegs); + floatScoreboard.resize(numPhysicalRegs); + miscScoreboard.resize(numPhysicalRegs + numMiscRegs); // Initialize the entries in the integer rename map to point to the // physical registers of the same index, and consider each register @@ -59,31 +58,50 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, intScoreboard[index] = 0; } + int float_reg_idx = numPhysicalIntRegs; + // Initialize the entries in the floating point rename map to point to // the physical registers of the same index, and consider each register // ready until the first rename occurs. - for (RegIndex index = 0; index < numLogicalFloatRegs; ++index) + // Although the index refers purely to architected registers, because + // the floating reg indices come after the integer reg indices, they + // may exceed the size of a normal RegIndex (short). + for (PhysRegIndex index = numLogicalIntRegs; + index < numLogicalRegs; ++index) + { + floatRenameMap[index].physical_reg = float_reg_idx++; + } + + for (RegIndex index = numPhysicalIntRegs; + index < numPhysicalIntRegs + numLogicalFloatRegs; ++index) { - floatRenameMap[index].physical_reg = index + numPhysicalIntRegs; floatScoreboard[index] = 1; } // Initialize the rest of the physical registers (the ones that don't // directly map to a logical register) as unready. - for (PhysRegIndex index = numLogicalFloatRegs; - index < numPhysicalFloatRegs; + for (PhysRegIndex index = numPhysicalIntRegs + numLogicalFloatRegs; + index < numPhysicalRegs; ++index) { floatScoreboard[index] = 0; } // Initialize the entries in the misc register scoreboard to be ready. - for (RegIndex index = 0; index < numMiscRegs; ++index) + for (RegIndex index = numPhysicalRegs; + index < numPhysicalRegs + numMiscRegs; ++index) { miscScoreboard[index] = 1; } } +SimpleRenameMap::~SimpleRenameMap() +{ + // Delete the rename maps as they were allocated with new. + delete [] intRenameMap; + delete [] floatRenameMap; +} + void SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) { @@ -116,6 +134,8 @@ SimpleRenameMap::rename(RegIndex arch_reg) // Update the integer rename map. intRenameMap[arch_reg].physical_reg = renamed_reg; + assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs); + // Mark register as not ready. intScoreboard[renamed_reg] = false; } else { @@ -124,7 +144,7 @@ SimpleRenameMap::rename(RegIndex arch_reg) } } else if (arch_reg < numLogicalRegs) { // Subtract off the base offset for floating point registers. - arch_reg = arch_reg - numLogicalIntRegs; +// arch_reg = arch_reg - numLogicalIntRegs; // Record the current physical register that is renamed to the // requested architected register. @@ -139,6 +159,9 @@ SimpleRenameMap::rename(RegIndex arch_reg) // Update the floating point rename map. floatRenameMap[arch_reg].physical_reg = renamed_reg; + assert(renamed_reg < numPhysicalRegs && + renamed_reg >= numPhysicalIntRegs); + // Mark register as not ready. floatScoreboard[renamed_reg] = false; } else { @@ -160,6 +183,8 @@ SimpleRenameMap::rename(RegIndex arch_reg) // so the free list can avoid adding it. prev_reg = renamed_reg; + assert(renamed_reg < numPhysicalRegs + numMiscRegs); + miscScoreboard[renamed_reg] = false; } @@ -175,7 +200,7 @@ SimpleRenameMap::lookup(RegIndex arch_reg) return intRenameMap[arch_reg].physical_reg; } else if (arch_reg < numLogicalRegs) { // Subtract off the base FP offset. - arch_reg = arch_reg - numLogicalIntRegs; +// arch_reg = arch_reg - numLogicalIntRegs; return floatRenameMap[arch_reg].physical_reg; } else { @@ -196,12 +221,12 @@ SimpleRenameMap::isReady(PhysRegIndex phys_reg) } else if (phys_reg < numPhysicalRegs) { // Subtract off the base FP offset. - phys_reg = phys_reg - numPhysicalIntRegs; +// phys_reg = phys_reg - numPhysicalIntRegs; return floatScoreboard[phys_reg]; } else { // Subtract off the misc registers offset. - phys_reg = phys_reg - numPhysicalRegs; +// phys_reg = phys_reg - numPhysicalRegs; return miscScoreboard[phys_reg]; } @@ -218,13 +243,10 @@ SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) intRenameMap[arch_reg].physical_reg = renamed_reg; } else { -// assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); - - // Subtract off the base FP offset. - arch_reg = arch_reg - numLogicalIntRegs; + assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", - (int)arch_reg, renamed_reg); + (int)arch_reg - numLogicalIntRegs, renamed_reg); floatRenameMap[arch_reg].physical_reg = renamed_reg; } @@ -234,6 +256,8 @@ void SimpleRenameMap::squash(vector freed_regs, vector unmaps) { + panic("Not sure this function should be called."); + // Not sure the rename map should be able to access the free list // like this. while (!freed_regs.empty()) { @@ -260,16 +284,18 @@ SimpleRenameMap::markAsReady(PhysRegIndex ready_reg) (int)ready_reg); if (ready_reg < numPhysicalIntRegs) { + assert(ready_reg >= 0); + intScoreboard[ready_reg] = 1; } else if (ready_reg < numPhysicalRegs) { // Subtract off the base FP offset. - ready_reg = ready_reg - numPhysicalIntRegs; +// ready_reg = ready_reg - numPhysicalIntRegs; floatScoreboard[ready_reg] = 1; } else { //Subtract off the misc registers offset. - ready_reg = ready_reg - numPhysicalRegs; +// ready_reg = ready_reg - numPhysicalRegs; miscScoreboard[ready_reg] = 1; } diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh index 05b52bfb2..e68fa05a8 100644 --- a/cpu/beta_cpu/rename_map.hh +++ b/cpu/beta_cpu/rename_map.hh @@ -1,6 +1,5 @@ // Todo: Create destructor. -// Make it so that there's a proper separation between int and fp. Also -// have it so that there's a more meaningful name given to the variable +// Have it so that there's a more meaningful name given to the variable // that marks the beginning of the FP registers. #ifndef __RENAME_MAP_HH__ @@ -10,7 +9,6 @@ #include #include -//Will want to include faults #include "cpu/beta_cpu/free_list.hh" using namespace std; @@ -18,8 +16,6 @@ using namespace std; class SimpleRenameMap { public: -// typedef typename Impl::RegIndex RegIndex; - /** * Pair of a logical register and a physical register. Tells the * previous mapping of a logical register to a physical register. @@ -45,6 +41,9 @@ class SimpleRenameMap RegIndex _intZeroReg, RegIndex _floatZeroReg); + /** Destructor. */ + ~SimpleRenameMap(); + void setFreeList(SimpleFreeList *fl_ptr); //Tell rename map to get a free physical register for a given @@ -110,7 +109,9 @@ class SimpleRenameMap PhysRegIndex physical_reg; bool valid; - RenameEntry(); + RenameEntry() + : physical_reg(0), valid(false) + { } }; /** Integer rename map. */ @@ -122,6 +123,8 @@ class SimpleRenameMap /** Free list interface. */ SimpleFreeList *freeList; + // Might want to make all these scoreboards into one large scoreboard. + /** Scoreboard of physical integer registers, saying whether or not they * are ready. */ diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh index 7963d1b01..c921c0619 100644 --- a/cpu/beta_cpu/rob.hh +++ b/cpu/beta_cpu/rob.hh @@ -16,24 +16,20 @@ using namespace std; /** * ROB class. Uses the instruction list that exists within the CPU to - * represent the ROB. This class doesn't contain that structure, but instead - * a pointer to the CPU to get access to the structure. The ROB has a large - * hand in squashing instructions within the CPU, and is responsible for - * sending out the squash signal as well as what instruction is to be - * squashed. The ROB also controls most of the calls to the CPU to delete - * instructions; the only other call is made in the first stage of the pipe- - * line, which tells the CPU to delete all instructions not in the ROB. + * represent the ROB. This class doesn't contain that list, but instead + * a pointer to the CPU to get access to the list. The ROB, in this first + * implementation, is largely what drives squashing. */ -template +template class ROB { public: //Typedefs from the Impl. typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; - typedef pair UnmapInfo; - typedef typename list::iterator InstIt; + typedef pair UnmapInfo_t; + typedef typename list::iterator InstIt_t; public: /** ROB constructor. @@ -56,15 +52,15 @@ class ROB * @params inst The instruction being inserted into the ROB. * @todo Remove the parameter once correctness is ensured. */ - void insertInst(DynInst *inst); + void insertInst(DynInstPtr &inst); /** Returns pointer to the head instruction within the ROB. There is * no guarantee as to the return value if the ROB is empty. * @retval Pointer to the DynInst that is at the head of the ROB. */ - DynInst *readHeadInst() { return cpu->instList.front(); } + DynInstPtr readHeadInst() { return cpu->instList.front(); } - DynInst *readTailInst() { return (*tail); } + DynInstPtr readTailInst() { return (*tail); } void retireHead(); @@ -108,15 +104,28 @@ class ROB /** Pointer to the CPU. */ FullCPU *cpu; + /** Number of instructions in the ROB. */ unsigned numEntries; /** Number of instructions that can be squashed in a single cycle. */ unsigned squashWidth; - InstIt tail; + /** Iterator pointing to the instruction which is the last instruction + * in the ROB. This may at times be invalid (ie when the ROB is empty), + * however it should never be incorrect. + */ + InstIt_t tail; - InstIt squashIt; + /** Iterator used for walking through the list of instructions when + * squashing. Used so that there is persistent state between cycles; + * when squashing, the instructions are marked as squashed but not + * immediately removed, meaning the tail iterator remains the same before + * and after a squash. + * This will always be set to cpu->instList.end() if it is invalid. + */ + InstIt_t squashIt; + /** Number of instructions in the ROB. */ int numInstsInROB; /** The sequence number of the squashed instruction. */ diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh index 308a8010f..862008429 100644 --- a/cpu/beta_cpu/rob_impl.hh +++ b/cpu/beta_cpu/rob_impl.hh @@ -3,7 +3,7 @@ #include "cpu/beta_cpu/rob.hh" -template +template ROB::ROB(unsigned _numEntries, unsigned _squashWidth) : numEntries(_numEntries), squashWidth(_squashWidth), @@ -13,43 +13,60 @@ ROB::ROB(unsigned _numEntries, unsigned _squashWidth) doneSquashing = true; } -template +template void ROB::setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; + // Set the tail to the beginning of the CPU instruction list so that + // upon the first instruction being inserted into the ROB, the tail + // iterator can simply be incremented. tail = cpu->instList.begin(); + // Set the squash iterator to the end of the instruction list. squashIt = cpu->instList.end(); } -template +template int ROB::countInsts() { -/* - int return_val = 0; + // Start at 1; if the tail matches cpu->instList.begin(), then there is + // one inst in the ROB. + int return_val = 1; + + // There are quite a few special cases. Do not use this function other + // than for debugging purposes. + if (cpu->instList.begin() == cpu->instList.end()) { + // In this case there are no instructions in the list. The ROB + // must be empty. + return 0; + } else if (tail == cpu->instList.end()) { + // In this case, the tail is not yet pointing to anything valid. + // The ROB must be empty. + return 0; + } // Iterate through the ROB from the head to the tail, counting the // entries. - for (InstIt i = cpu->instList.begin(); i != tail; i++) + for (InstIt_t i = cpu->instList.begin(); i != tail; ++i) { assert(i != cpu->instList.end()); - return_val++; + ++return_val; } return return_val; -*/ + // Because the head won't be tracked properly until the ROB gets the // first instruction, and any time that the ROB is empty and has not // yet gotten the instruction, this function doesn't work. - return numInstsInROB; +// return numInstsInROB; } -template +template void -ROB::insertInst(DynInst *inst) +ROB::insertInst(DynInstPtr &inst) { // Make sure we have the right number of instructions. assert(numInstsInROB == countInsts()); @@ -68,7 +85,7 @@ ROB::insertInst(DynInst *inst) // in which case the tail will be pointing at instList.end(). If that // happens, then reset the tail to the beginning of the list. if (tail != cpu->instList.end()) { - tail++; + ++tail; } else { tail = cpu->instList.begin(); } @@ -83,13 +100,14 @@ ROB::insertInst(DynInst *inst) // Whatever calls this function needs to ensure that it properly frees up // registers prior to this function. -template +template void ROB::retireHead() { assert(numInstsInROB == countInsts()); + assert(numInstsInROB > 0); - DynInst *head_inst; + DynInstPtr head_inst; // Get the head ROB instruction. head_inst = cpu->instList.front(); @@ -116,12 +134,12 @@ ROB::retireHead() } } -template +template bool ROB::isHeadReady() { if (numInstsInROB != 0) { - DynInst *head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); return head_inst->readyToCommit(); } @@ -129,7 +147,7 @@ ROB::isHeadReady() return false; } -template +template unsigned ROB::numFreeEntries() { @@ -138,7 +156,7 @@ ROB::numFreeEntries() return numEntries - numInstsInROB; } -template +template void ROB::doSquash() { @@ -162,6 +180,12 @@ ROB::doSquash() (*squashIt)->setCanCommit(); + // Special case for when squashing due to a syscall. It's possible + // that the squash happened after the head instruction was already + // committed, meaning that (*squashIt)->seqNum != squashedSeqNum + // will never be false. Normally the squash would never be able + // to go past the head of the ROB; in this case it might, so it + // must be handled otherwise it will segfault. #ifndef FULL_SYSTEM if (squashIt == cpu->instList.begin()) { DPRINTF(ROB, "ROB: Reached head of instruction list while " @@ -190,7 +214,7 @@ ROB::doSquash() } } -template +template void ROB::squash(InstSeqNum squash_num) { @@ -206,41 +230,41 @@ ROB::squash(InstSeqNum squash_num) doSquash(); } -template +template uint64_t ROB::readHeadPC() { assert(numInstsInROB == countInsts()); - DynInst *head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); return head_inst->readPC(); } -template +template uint64_t ROB::readHeadNextPC() { assert(numInstsInROB == countInsts()); - DynInst *head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); return head_inst->readNextPC(); } -template +template InstSeqNum ROB::readHeadSeqNum() { // Return the last sequence number that has not been squashed. Other // stages can use it to squash any instructions younger than the current // tail. - DynInst *head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); return head_inst->seqNum; } -template +template uint64_t ROB::readTailPC() { @@ -251,7 +275,7 @@ ROB::readTailPC() return (*tail)->readPC(); } -template +template InstSeqNum ROB::readTailSeqNum() { diff --git a/cpu/beta_cpu/store_set.cc b/cpu/beta_cpu/store_set.cc new file mode 100644 index 000000000..46d763d37 --- /dev/null +++ b/cpu/beta_cpu/store_set.cc @@ -0,0 +1,192 @@ +#include "cpu/beta_cpu/store_set.hh" +#include "base/trace.hh" + +StoreSet::StoreSet(int _SSIT_size, int _LFST_size) + : SSIT_size(_SSIT_size), LFST_size(_LFST_size) +{ + DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + + SSIT = new SSID[SSIT_size]; + + validSSIT.resize(SSIT_size); + + for (int i = 0; i < SSIT_size; ++i) + validSSIT[i] = false; + + LFST = new InstSeqNum[LFST_size]; + + validLFST.resize(LFST_size); + + SSCounters = new int[LFST_size]; + + for (int i = 0; i < LFST_size; ++i) + { + validLFST[i] = false; + SSCounters[i] = 0; + } + + index_mask = SSIT_size - 1; + + offset_bits = 2; +} + +void +StoreSet::violation(Addr load_PC, Addr store_PC) +{ + int load_index = calcIndex(load_PC); + int store_index = calcIndex(store_PC); + + bool valid_load_SSID = validSSIT[load_index]; + bool valid_store_SSID = validSSIT[store_index]; + + if (!valid_load_SSID && !valid_store_SSID) { + // Calculate a new SSID here. + SSID new_set = calcSSID(load_PC); + + validSSIT[load_index] = true; + + SSIT[load_index] = new_set; + + validSSIT[store_index] = true; + + SSIT[store_index] = new_set; + + SSCounters[new_set]++; + } else if (valid_load_SSID && !valid_store_SSID) { + SSID load_SSID = SSIT[load_index]; + + validSSIT[store_index] = true; + + SSIT[store_index] = load_SSID; + + SSCounters[load_SSID]++; + } else if (!valid_load_SSID && valid_store_SSID) { + SSID store_SSID = SSIT[store_index]; + + validSSIT[load_index] = true; + + SSIT[load_index] = store_SSID; + + // Because we are having a load point to an already existing set, + // the size of the store set is not incremented. + } else { + SSID load_SSID = SSIT[load_index]; + SSID store_SSID = SSIT[store_index]; + + int load_SS_size = SSCounters[load_SSID]; + int store_SS_size = SSCounters[store_SSID]; + + // If the load has the bigger store set, then assign the store + // to the same store set as the load. Otherwise vice-versa. + if (load_SS_size > store_SS_size) { + SSIT[store_index] = load_SSID; + + SSCounters[load_SSID]++; + SSCounters[store_SSID]--; + } else { + SSIT[load_index] = store_SSID; + + SSCounters[store_SSID]++; + SSCounters[load_SSID]--; + } + } +} + +void +StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) +{ + // Does nothing. + return; +} + +void +StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num) +{ + int index = calcIndex(store_PC); + + int store_SSID; + + if (!validSSIT[index]) { + // Do nothing if there's no valid entry. + return; + } else { + store_SSID = SSIT[index]; + + assert(store_SSID < LFST_size); + + // Update the last store that was fetched with the current one. + LFST[store_SSID] = store_seq_num; + } +} + +InstSeqNum +StoreSet::checkInst(Addr PC) +{ + int index = calcIndex(PC); + + int inst_SSID; + + if (!validSSIT[index]) { + // Return 0 if there's no valid entry. + return 0; + } else { + inst_SSID = SSIT[index]; + + assert(inst_SSID < LFST_size); + + if (!validLFST[inst_SSID]) { + return 0; + } else { + return LFST[inst_SSID]; + } + } +} + +void +StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) +{ + // This only is updated upon a store being issued. + if (!is_store) { + return; + } + + int index = calcIndex(issued_PC); + + int store_SSID; + + // Make sure the SSIT still has a valid entry for the issued store. + assert(validSSIT[index]); + + store_SSID = SSIT[index]; + + // If the last fetched store in the store set refers to the store that + // was just issued, then invalidate the entry. + if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) { + validLFST[store_SSID] = false; + } +} + +void +StoreSet::squash(InstSeqNum squashed_num) +{ + // Not really sure how to do this well. + + for (int i = 0; i < LFST_size; ++i) { + if (LFST[i] < squashed_num) { + validLFST[i] = false; + } + } +} + +void +StoreSet::clear() +{ + for (int i = 0; i < SSIT_size; ++i) { + validSSIT[i] = false; + } + + for (int i = 0; i < LFST_size; ++i) { + validLFST[i] = false; + } +} + diff --git a/cpu/beta_cpu/store_set.hh b/cpu/beta_cpu/store_set.hh new file mode 100644 index 000000000..701c60a2d --- /dev/null +++ b/cpu/beta_cpu/store_set.hh @@ -0,0 +1,58 @@ +#ifndef __STORE_SET_HH__ +#define __STORE_SET_HH__ + +#include + +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" + +class StoreSet +{ + public: + typedef unsigned SSID; + + public: + StoreSet(int SSIT_size, int LFST_size); + + void violation(Addr load_PC, Addr store_PC); + + void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + + void insertStore(Addr store_PC, InstSeqNum store_seq_num); + + InstSeqNum checkInst(Addr PC); + + void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + + void squash(InstSeqNum squashed_num); + + void clear(); + + private: + inline int calcIndex(Addr PC) + { return (PC >> offset_bits) & index_mask; } + + inline SSID calcSSID(Addr PC) + { return ((PC ^ (PC >> 10)) % LFST_size); } + + SSID *SSIT; + + std::vector validSSIT; + + InstSeqNum *LFST; + + std::vector validLFST; + + int *SSCounters; + + int SSIT_size; + + int LFST_size; + + int index_mask; + + // HACK: Hardcoded for now. + int offset_bits; +}; + +#endif // __STORE_SET_HH__ diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh index 7a707c86a..71e9ef441 100644 --- a/cpu/static_inst.hh +++ b/cpu/static_inst.hh @@ -40,9 +40,12 @@ #include "targetarch/isa_traits.hh" // forward declarations +struct AlphaSimpleImpl; class ExecContext; -class AlphaDynInst; class DynInst; +template +class AlphaDynInst; + class FastCPU; class SimpleCPU; class SymbolTable;