diff --git a/SConscript b/SConscript index 10722007a..07cdcfdee 100644 --- a/SConscript +++ b/SConscript @@ -44,6 +44,7 @@ Import('env') # Base sources used by all configurations. base_sources = Split(''' arch/alpha/decoder.cc + arch/alpha/alpha_full_cpu_exec.cc arch/alpha/fast_cpu_exec.cc arch/alpha/simple_cpu_exec.cc arch/alpha/full_cpu_exec.cc @@ -85,10 +86,23 @@ base_sources = Split(''' base/stats/text.cc cpu/base_cpu.cc + cpu/base_dyn_inst.cc cpu/exec_context.cc cpu/exetrace.cc cpu/pc_event.cc cpu/static_inst.cc + cpu/beta_cpu/alpha_dyn_inst.cc + cpu/beta_cpu/alpha_full_cpu.cc + cpu/beta_cpu/commit.cc + cpu/beta_cpu/decode.cc + cpu/beta_cpu/fetch.cc + cpu/beta_cpu/free_list.cc + cpu/beta_cpu/full_cpu.cc + cpu/beta_cpu/iew.cc + cpu/beta_cpu/inst_queue.cc + cpu/beta_cpu/rename.cc + cpu/beta_cpu/rename_map.cc + cpu/beta_cpu/rob.cc cpu/fast_cpu/fast_cpu.cc cpu/full_cpu/bpred.cc cpu/full_cpu/commit.cc @@ -395,6 +409,7 @@ env.Command(Split('base/traceflags.hh base/traceflags.cc'), # several files are generated from arch/$TARGET_ISA/isa_desc. env.Command(Split('''arch/alpha/decoder.cc arch/alpha/decoder.hh + arch/alpha/alpha_full_cpu_exec.cc arch/alpha/fast_cpu_exec.cc arch/alpha/simple_cpu_exec.cc arch/alpha/full_cpu_exec.cc'''), diff --git a/arch/alpha/isa_desc b/arch/alpha/isa_desc index d6b99a8ae..eaf3aa379 100644 --- a/arch/alpha/isa_desc +++ b/arch/alpha/isa_desc @@ -2482,9 +2482,9 @@ decode OPCODE default Unknown::unknown() { xc->syscall(); }}, IsNonSpeculative); // Read uniq reg into ABI return value register (r0) - 0x9e: rduniq({{ R0 = Runiq; }}); + 0x9e: rduniq({{ R0 = Runiq; }}, IsNonSpeculative); // Write uniq reg with value from ABI arg register (r16) - 0x9f: wruniq({{ Runiq = R16; }}); + 0x9f: wruniq({{ Runiq = R16; }}, IsNonSpeculative); } } #endif diff --git a/arch/isa_parser.py b/arch/isa_parser.py index 011ce7623..f7278628b 100755 --- a/arch/isa_parser.py +++ b/arch/isa_parser.py @@ -636,6 +636,9 @@ CpuModel('FastCPU', 'fast_cpu_exec.cc', CpuModel('FullCPU', 'full_cpu_exec.cc', '#include "cpu/full_cpu/dyn_inst.hh"', { 'CPU_exec_context': 'DynInst' }) +CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc', + '#include "cpu/beta_cpu/alpha_dyn_inst.hh"', + { 'CPU_exec_context': 'AlphaDynInst' }) # Expand template with CPU-specific references into a dictionary with # an entry for each CPU model name. The entry key is the model name diff --git a/base/statistics.hh b/base/statistics.hh index f3b8a3922..a0be64ce5 100644 --- a/base/statistics.hh +++ b/base/statistics.hh @@ -407,7 +407,7 @@ class Wrap : public Child public: Wrap() { - map(new Data(*this)); + this->map(new Data(*this)); } /** diff --git a/base/timebuf.hh b/base/timebuf.hh new file mode 100644 index 000000000..ea538212e --- /dev/null +++ b/base/timebuf.hh @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2004 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BASE_TIMEBUF_HH__ +#define __BASE_TIMEBUF_HH__ + +#include + +using namespace std; + +template +class TimeBuffer +{ + protected: + int past; + int future; + int size; + + char *data; + vector index; + int base; + + void valid(int idx) + { + assert (idx >= -past && idx <= future); + } + + public: + friend class wire; + class wire + { + friend class TimeBuffer; + protected: + TimeBuffer *buffer; + int index; + + void set(int idx) + { + buffer->valid(idx); + index = idx; + } + + wire(TimeBuffer *buf, int i) + : buffer(buf), index(i) + { } + + public: + wire() + { } + + wire(const wire &i) + : buffer(i.buffer), index(i.index) + { } + + const wire &operator=(const wire &i) + { + buffer = i.buffer; + set(i.index); + return *this; + } + + const wire &operator=(int idx) + { + set(idx); + return *this; + } + + const wire &operator+=(int offset) + { + set(index + offset); + return *this; + } + + const wire &operator-=(int offset) + { + set(index - offset); + return *this; + } + + wire &operator++() + { + set(index + 1); + return *this; + } + + wire &operator++(int) + { + int i = index; + set(index + 1); + return wire(this, i); + } + + wire &operator--() + { + set(index - 1); + return *this; + } + + wire &operator--(int) + { + int i = index; + set(index - 1); + return wire(this, i); + } + T &operator*() const { return *buffer->access(index); } + T *operator->() const { return buffer->access(index); } + }; + + + public: + TimeBuffer(int p, int f) + : past(p), future(f), size(past + future + 1), + data(new char[size * sizeof(T)]), index(size), base(0) + { + assert(past >= 0 && future >= 0); + char *ptr = data; + for (int i = 0; i < size; i++) { + index[i] = ptr; + memset(ptr, 0, sizeof(T)); + new (ptr) T; + ptr += sizeof(T); + } + } + + TimeBuffer() + : data(NULL) + { + } + + ~TimeBuffer() + { + for (int i = 0; i < size; ++i) + (reinterpret_cast(index[i]))->~T(); + delete [] data; + } + + void + advance() + { + if (++base >= size) + base = 0; + + int ptr = base + future; + if (ptr >= size) + ptr -= size; + (reinterpret_cast(index[ptr]))->~T(); + memset(index[ptr], 0, sizeof(T)); + new (index[ptr]) T; + } + + T *access(int idx) + { + //Need more complex math here to calculate index. + valid(idx); + + int vector_index = idx + base; + if (vector_index >= size) { + vector_index -= size; + } else if (vector_index < 0) { + vector_index += size; + } + + return reinterpret_cast(index[vector_index]); + } + + T &operator[](int idx) + { + //Need more complex math here to calculate index. + valid(idx); + + int vector_index = idx + base; + if (vector_index >= size) { + vector_index -= size; + } else if (vector_index < 0) { + vector_index += size; + } + + return reinterpret_cast(*index[vector_index]); + } + + wire getWire(int idx) + { + valid(idx); + + return wire(this, idx); + } + + wire zero() + { + return wire(this, 0); + } +}; + +#endif // __BASE_TIMEBUF_HH__ + diff --git a/base/traceflags.py b/base/traceflags.py index 4be61d7ee..8b4208660 100644 --- a/base/traceflags.py +++ b/base/traceflags.py @@ -122,7 +122,18 @@ baseFlags = [ 'Tsunami', 'Uart', 'Split', - 'SQL' + 'SQL', + 'Fetch', + 'Decode', + 'Rename', + 'IEW', + 'Commit', + 'IQ', + 'ROB', + 'FreeList', + 'RenameMap', + 'DynInst', + 'FullCPU' ] # @@ -138,7 +149,8 @@ compoundFlagMap = { 'ScsiAll' : [ 'ScsiDisk', 'ScsiCtrl', 'ScsiNone' ], 'DiskImageAll' : [ 'DiskImage', 'DiskImageRead', 'DiskImageWrite' ], 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], - 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ] + 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], + 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'DynInst', 'FullCPU'] } ############################################################# diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc new file mode 100644 index 000000000..bd681e1dc --- /dev/null +++ b/cpu/base_dyn_inst.cc @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2001-2004 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BASE_DYN_INST_CC__ +#define __BASE_DYN_INST_CC__ + +#include +#include +#include + +#include "base/cprintf.hh" + +#include "arch/alpha/faults.hh" +#include "cpu/exetrace.hh" +#include "mem/mem_req.hh" + +#include "cpu/base_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" + +using namespace std; + +#define NOHASH +#ifndef NOHASH + +#include "base/hashmap.hh" + +unsigned int MyHashFunc(const BaseDynInst *addr) +{ + unsigned a = (unsigned)addr; + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; +} + +typedef m5::hash_map my_hash_t; +my_hash_t thishash; +#endif + +/** This may need to be specific to an implementation. */ +//int BaseDynInst::instcount = 0; + +//int break_inst = -1; + +template +BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, + Addr pred_PC, InstSeqNum seq_num, + FullCPU *cpu) + : staticInst(machInst), traceData(NULL), cpu(cpu), xc(cpu->xcBase()) +{ + effAddr = MemReq::inval_addr; + physEffAddr = MemReq::inval_addr; + + readyRegs = 0; + + seqNum = seq_num; + + specMemWrite = false; + + canIssue = false; + issued = false; + executed = false; + canCommit = false; + squashed = false; + squashedInIQ = false; + + blockingInst = false; + recoverInst = false; + specMode = false; + btbMissed = false; + // Eventually make this a parameter. + threadNumber = 0; + // Also make this a parameter. + specMode = true; + // Also make this a parameter, or perhaps get it from xc or cpu. + asid = 0; + + // Initialize the fault to be unimplemented opcode. + fault = Unimplemented_Opcode_Fault; + + PC = inst_PC; + nextPC = PC + sizeof(MachInst); + predPC = pred_PC; + + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = staticInst->destRegIdx(i); + } + + for (int i = 0; i < staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = staticInst->srcRegIdx(i); + _readySrcRegIdx[i] = 0; + } + + ++instcount; + + DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", + instcount); +} + +template +BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) + : staticInst(_staticInst), traceData(NULL) +{ + effAddr = MemReq::inval_addr; + physEffAddr = MemReq::inval_addr; + + specMemWrite = false; + + blockingInst = false; + recoverInst = false; + specMode = false; + btbMissed = false; + + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = staticInst->destRegIdx(i); + } + + for (int i = 0; i < staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = staticInst->srcRegIdx(i); + } +} + +template +BaseDynInst::~BaseDynInst() +{ +/* + if (specMemWrite) { + // Remove effects of this instruction from speculative memory + xc->spec_mem->erase(effAddr); + } +*/ + --instcount; + DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n", + instcount); +} + +template +FunctionalMemory * +BaseDynInst::getMemory(void) +{ + return xc->mem; +} +/* +template +IntReg * +BaseDynInst::getIntegerRegs(void) +{ + return (spec_mode ? xc->specIntRegFile : xc->regs.intRegFile); +} +*/ +template +void +BaseDynInst::prefetch(Addr addr, unsigned flags) +{ + // This is the "functional" implementation of prefetch. Not much + // happens here since prefetches don't affect the architectural + // state. + + // Generate a MemReq so we can translate the effective address. + MemReqPtr req = new MemReq(addr, xc, 1, flags); + req->asid = asid; + + // Prefetches never cause faults. + fault = No_Fault; + + // note this is a local, not BaseDynInst::fault + Fault trans_fault = xc->translateDataReadReq(req); + + if (trans_fault == No_Fault && !(req->flags & UNCACHEABLE)) { + // It's a valid address to cacheable space. Record key MemReq + // parameters so we can generate another one just like it for + // the timing access without calling translate() again (which + // might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // Bogus address (invalid or uncacheable space). Mark it by + // setting the eff_addr to InvalidAddr. + effAddr = physEffAddr = MemReq::inval_addr; + } + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#ifndef FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (traceData) { + traceData->setAddr(addr); + } +} + +template +void +BaseDynInst::writeHint(Addr addr, int size, unsigned flags) +{ + // Need to create a MemReq here so we can do a translation. This + // will casue a TLB miss trap if necessary... not sure whether + // that's the best thing to do or not. We don't really need the + // MemReq otherwise, since wh64 has no functional effect. + MemReqPtr req = new MemReq(addr, xc, size, flags); + req->asid = asid; + + fault = xc->translateDataWriteReq(req); + + if (fault == No_Fault && !(req->flags & UNCACHEABLE)) { + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // ignore faults & accesses to uncacheable space... treat as no-op + effAddr = physEffAddr = MemReq::inval_addr; + } + + storeSize = size; + storeData = 0; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template +Fault +BaseDynInst::copySrcTranslate(Addr src) +{ + MemReqPtr req = new MemReq(src, xc, 64); + req->asid = asid; + + // translate to physical address + Fault fault = xc->translateDataReadReq(req); + + if (fault == No_Fault) { + xc->copySrcAddr = src; + xc->copySrcPhysAddr = req->paddr; + } else { + xc->copySrcAddr = 0; + xc->copySrcPhysAddr = 0; + } + return fault; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template +Fault +BaseDynInst::copy(Addr dest) +{ + uint8_t data[64]; + FunctionalMemory *mem = xc->mem; + assert(xc->copySrcPhysAddr || xc->misspeculating()); + MemReqPtr req = new MemReq(dest, xc, 64); + req->asid = asid; + + // translate to physical address + Fault fault = xc->translateDataWriteReq(req); + + if (fault == No_Fault) { + Addr dest_addr = req->paddr; + // Need to read straight from memory since we have more than 8 bytes. + req->paddr = xc->copySrcPhysAddr; + mem->read(req, data); + req->paddr = dest_addr; + mem->write(req, data); + } + return fault; +} + +template +void +BaseDynInst::dump() +{ + cprintf("T%d : %#08d `", threadNumber, PC); + cout << staticInst->disassemble(PC); + cprintf("'\n"); +} + +template +void +BaseDynInst::dump(std::string &outstring) +{ + std::ostringstream s; + s << "T" << threadNumber << " : 0x" << PC << " " + << staticInst->disassemble(PC); + + outstring = s.str(); +} + + +#if 0 +template +Fault +BaseDynInst::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) +{ + Fault fault; + + // check alignments, even speculative this test should always pass + if ((nbytes & nbytes - 1) != 0 || (addr & nbytes - 1) != 0) { + for (int i = 0; i < nbytes; i++) + ((char *) p)[i] = 0; + + // I added the following because according to the comment above, + // we should never get here. The comment lies +#if 0 + panic("unaligned access. Cycle = %n", curTick); +#endif + return No_Fault; + } + + MemReqPtr req = new MemReq(addr, thread, nbytes); + switch(cmd) { + case Read: + fault = spec_mem->read(req, (uint8_t *)p); + break; + + case Write: + fault = spec_mem->write(req, (uint8_t *)p); + if (fault != No_Fault) + break; + + specMemWrite = true; + storeSize = nbytes; + switch(nbytes) { + case sizeof(uint8_t): + *(uint8_t)&storeData = (uint8_t *)p; + break; + case sizeof(uint16_t): + *(uint16_t)&storeData = (uint16_t *)p; + break; + case sizeof(uint32_t): + *(uint32_t)&storeData = (uint32_t *)p; + break; + case sizeof(uint64_t): + *(uint64_t)&storeData = (uint64_t *)p; + break; + } + break; + + default: + fault = Machine_Check_Fault; + break; + } + + trace_mem(fault, cmd, addr, p, nbytes); + + return fault; +} + +#endif + +int +BaseDynInst::instcount = 0; + +// Forward declaration... +template BaseDynInst; + +#endif // __BASE_DYN_INST_CC__ diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh new file mode 100644 index 000000000..7651b517e --- /dev/null +++ b/cpu/base_dyn_inst.hh @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2001-2004 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BASE_DYN_INST_HH__ +#define __BASE_DYN_INST_HH__ + +#include +#include + +#include "base/fast_alloc.hh" +#include "base/trace.hh" + +#include "cpu/static_inst.hh" +#include "cpu/beta_cpu/comm.hh" +#include "cpu/full_cpu/bpred_update.hh" +#include "mem/functional_mem/main_memory.hh" +#include "cpu/full_cpu/spec_memory.hh" +#include "cpu/inst_seq.hh" +#include "cpu/full_cpu/op_class.hh" +#include "cpu/full_cpu/spec_state.hh" + +/** + * @file + * Defines a dynamic instruction context. + */ + +namespace Trace { + class InstRecord; +}; + +class BaseInst +{ +}; + +template +class BaseDynInst : public FastAlloc +{ + public: + // Typedef for the CPU. + typedef typename Impl::FullCPU FullCPU; + + //Typedef to get the ISA. + typedef typename Impl::ISA ISA; + + /// Binary machine instruction type. + typedef typename ISA::MachInst MachInst; + /// Memory address type. + typedef typename ISA::Addr Addr; + /// Logical register index type. + typedef typename ISA::RegIndex RegIndex; + /// Integer register index type. + typedef typename ISA::IntReg IntReg; + + enum { + MaxInstSrcRegs = ISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs + }; + + StaticInstPtr staticInst; + + //////////////////////////////////////////// + // + // INSTRUCTION EXECUTION + // + //////////////////////////////////////////// + Trace::InstRecord *traceData; + +// void setCPSeq(InstSeqNum seq); + + template + Fault read(Addr addr, T &data, unsigned flags); + + template + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res); + + + IntReg *getIntegerRegs(void); + FunctionalMemory *getMemory(void); + + void prefetch(Addr addr, unsigned flags); + void writeHint(Addr addr, int size, unsigned flags); + Fault copySrcTranslate(Addr src); + Fault copy(Addr dest); + + public: + /** Is this instruction valid. */ + bool valid; + + /** The sequence number of the instruction. */ + InstSeqNum seqNum; + + /** How many source registers are ready. */ + unsigned readyRegs; + + /** Can this instruction issue. */ + bool canIssue; + + /** Has this instruction issued. */ + bool issued; + + /** Has this instruction executed (or made it through execute) yet. */ + bool executed; + + /** Can this instruction commit. */ + bool canCommit; + + /** Is this instruction squashed. */ + bool squashed; + + /** Is this instruction squashed in the instruction queue. */ + bool squashedInIQ; + + /** Is this a recover instruction. */ + bool recoverInst; + + /** Is this a thread blocking instruction. */ + bool blockingInst; /* this inst has called thread_block() */ + + /** Is this a thread syncrhonization instruction. */ + bool threadsyncWait; + + /** If the BTB missed. */ + bool btbMissed; + + /** The thread this instruction is from. */ + short threadNumber; + + /** If instruction is speculative. */ + short specMode; + + /** data address space ID, for loads & stores. */ + short asid; + + /** Pointer to the FullCPU object. */ + FullCPU *cpu; + + /** Pointer to the exec context. Will not exist in the final version. */ + ExecContext *xc; + + /** The kind of fault this instruction has generated. */ + Fault fault; + + /** The effective virtual address (lds & stores only). */ + Addr effAddr; + + /** The effective physical address. */ + Addr physEffAddr; + + /** Effective virtual address for a copy source. */ + Addr copySrcEffAddr; + + /** Effective physical address for a copy source. */ + Addr copySrcPhysEffAddr; + + /** The memory request flags (from translation). */ + unsigned memReqFlags; + + /** The size of the data to be stored. */ + int storeSize; + + /** The data to be stored. */ + IntReg storeData; + + /** Result of this instruction, if an integer. */ + uint64_t intResult; + + /** Result of this instruction, if a float. */ + float floatResult; + + /** Result of this instruction, if a double. */ + double doubleResult; + + /** PC of this instruction. */ + Addr PC; + + /** Next non-speculative PC. It is not filled in at fetch, but rather + * once the target of the branch is truly known (either decode or + * execute). + */ + Addr nextPC; + + /** Predicted next PC. */ + Addr predPC; + + /** Count of total number of dynamic instructions. */ + static int instcount; + + /** Did this instruction do a spec write? */ + bool specMemWrite; + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Whether or not the source register is ready. */ + bool _readySrcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + /** BaseDynInst constructor given a binary instruction. */ + BaseDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + BaseDynInst(StaticInstPtr &_staticInst); + + /** BaseDynInst destructor. */ + ~BaseDynInst(); + +#if 0 + Fault + mem_access(MemCmd cmd, // Read or Write access cmd + Addr addr, // virtual address of access + void *p, // input/output buffer + int nbytes); // access size +#endif + + void + trace_mem(Fault fault, // last fault + MemCmd cmd, // last command + Addr addr, // virtual address of access + void *p, // memory accessed + int nbytes); // access size + + /** Dumps out contents of this BaseDynInst. */ + void dump(); + + /** Dumps out contents of this BaseDynInst into given string. */ + void dump(std::string &outstring); + + /** Returns the fault type. */ + Fault getFault() { return fault; } + + /** Checks whether or not this instruction has had its branch target + * calculated yet. For now it is not utilized and is hacked to be + * always false. + */ + bool doneTargCalc() { return false; } + + /** Returns the calculated target of the branch. */ + Addr readCalcTarg() { return nextPC; } + + Addr readNextPC() { return nextPC; } + + /** Set the predicted target of this current instruction. */ + void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } + + /** Returns the predicted target of the branch. */ + Addr readPredTarg() { return predPC; } + + /** Returns whether the instruction was predicted taken or not. */ + bool predTaken() { +// DPRINTF(FullCPU, "PC: %08p\n", PC); +// DPRINTF(FullCPU, "predPC: %08p\n", predPC); + + return( predPC != (PC + sizeof(MachInst) ) ); + } + + /** Returns whether the instruction mispredicted. */ + bool mispredicted() { return (predPC != nextPC); } + + // + // Instruction types. Forward checks to StaticInst object. + // + bool isNop() const { return staticInst->isNop(); } + bool isMemRef() const { return staticInst->isMemRef(); } + bool isLoad() const { return staticInst->isLoad(); } + bool isStore() const { return staticInst->isStore(); } + bool isInstPrefetch() const { return staticInst->isInstPrefetch(); } + bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } + bool isCopy() const { return staticInst->isCopy(); } + bool isInteger() const { return staticInst->isInteger(); } + bool isFloating() const { return staticInst->isFloating(); } + bool isControl() const { return staticInst->isControl(); } + bool isCall() const { return staticInst->isCall(); } + bool isReturn() const { return staticInst->isReturn(); } + bool isDirectCtrl() const { return staticInst->isDirectCtrl(); } + bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } + bool isCondCtrl() const { return staticInst->isCondCtrl(); } + bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isThreadSync() const { return staticInst->isThreadSync(); } + bool isSerializing() const { return staticInst->isSerializing(); } + bool isMemBarrier() const { return staticInst->isMemBarrier(); } + bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } + bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } + + int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } + int8_t numDestRegs() const { return staticInst->numDestRegs(); } + + // the following are used to track physical register usage + // for machines with separate int & FP reg files + int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } + int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } + + /** Returns the logical register index of the i'th destination register. */ + RegIndex destRegIdx(int i) const + { + return staticInst->destRegIdx(i); + } + + /** Returns the logical register index of the i'th source register. */ + RegIndex srcRegIdx(int i) const + { + return staticInst->srcRegIdx(i); + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + bool isReadySrcRegIdx(int idx) const + { + return _readySrcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + //Push to .cc file. + /** Records that one of the source registers is ready. */ + void markSrcRegReady() + { + ++readyRegs; + if(readyRegs == numSrcRegs()) { + canIssue = true; + } + } + + void markSrcRegReady(RegIndex src_idx) + { + ++readyRegs; + + _readySrcRegIdx[src_idx] = 1; + + if(readyRegs == numSrcRegs()) { + canIssue = true; + } + } + + /** Sets this instruction as ready to issue. */ + void setCanIssue() { canIssue = true; } + + /** Returns whether or not this instruction is ready to issue. */ + bool readyToIssue() const { return canIssue; } + + /** Sets this instruction as issued from the IQ. */ + void setIssued() { issued = true; } + + /** Returns whether or not this instruction has issued. */ + bool isIssued() { return issued; } + + /** Sets this instruction as executed. */ + void setExecuted() { executed = true; } + + /** Returns whether or not this instruction has executed. */ + bool isExecuted() { return executed; } + + /** Sets this instruction as ready to commit. */ + void setCanCommit() { canCommit = true; } + + /** Returns whether or not this instruction is ready to commit. */ + bool readyToCommit() const { return canCommit; } + + /** Sets this instruction as squashed. */ + void setSquashed() { squashed = true; } + + /** Returns whether or not this instruction is squashed. */ + bool isSquashed() const { return squashed; } + + /** Sets this instruction as squashed in the IQ. */ + void setSquashedInIQ() { squashedInIQ = true; } + + /** Returns whether or not this instruction is squashed in the IQ. */ + bool isSquashedInIQ() { return squashedInIQ; } + + /** Returns the opclass of this instruction. */ + OpClass opClass() const { return staticInst->opClass(); } + + /** Returns whether or not the BTB missed. */ + bool btbMiss() const { return btbMissed; } + + /** Returns the branch target address. */ + Addr branchTarget() const { return staticInst->branchTarget(PC); } + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(StaticInst *si, int idx) + { + return cpu->readIntReg(_srcRegIdx[idx]); + } + + float readFloatRegSingle(StaticInst *si, int idx) + { + return cpu->readFloatRegSingle(_srcRegIdx[idx]); + } + + double readFloatRegDouble(StaticInst *si, int idx) + { + return cpu->readFloatRegDouble(_srcRegIdx[idx]); + } + + uint64_t readFloatRegInt(StaticInst *si, int idx) + { + return cpu->readFloatRegInt(_srcRegIdx[idx]); + } + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(StaticInst *si, int idx, uint64_t val) + { + cpu->setIntReg(_destRegIdx[idx], val); + intResult = val; + } + + void setFloatRegSingle(StaticInst *si, int idx, float val) + { + cpu->setFloatRegSingle(_destRegIdx[idx], val); + floatResult = val; + } + + void setFloatRegDouble(StaticInst *si, int idx, double val) + { + cpu->setFloatRegDouble(_destRegIdx[idx], val); + doubleResult = val; + } + + void setFloatRegInt(StaticInst *si, int idx, uint64_t val) + { + cpu->setFloatRegInt(_destRegIdx[idx], val); + intResult = val; + } + + /** Read the PC of this instruction. */ + Addr readPC() { return PC; } + + /** Set the next PC of this instruction (its actual target). */ + void setNextPC(uint64_t val) { nextPC = val; } + +// bool misspeculating() { return cpu->misspeculating(); } + ExecContext *xcBase() { return xc; } +}; + +template +template +inline Fault +BaseDynInst::read(Addr addr, T &data, unsigned flags) +{ + MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); + req->asid = asid; + + fault = cpu->translateDataReadReq(req); + + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#ifndef FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (fault == No_Fault) { + fault = cpu->read(req, data); + } + else { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + data = (T)-1; + } + + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + return fault; +} + +template +template +inline Fault +BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + storeSize = sizeof(T); + storeData = data; + if (specMode) + specMemWrite = true; + + MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); + + req->asid = asid; + + fault = cpu->translateDataWriteReq(req); + + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#ifndef FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (fault == No_Fault) { + fault = cpu->write(req, data); + } + + if (res) { + // always return some result to keep misspeculated paths + // (which will ignore faults) deterministic + *res = (fault == No_Fault) ? req->result : 0; + } + + return fault; +} + +#endif // __DYN_INST_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.cc b/cpu/beta_cpu/alpha_dyn_inst.cc new file mode 100644 index 000000000..a79d3082c --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst.cc @@ -0,0 +1,102 @@ +#ifndef __ALPHA_DYN_INST_CC__ +#define __ALPHA_DYN_INST_CC__ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +// Force instantiation of BaseDynInst +template BaseDynInst; + +AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +{ + // Initialize these to illegal values. + robIdx = -1; + iqIdx = -1; +} + +AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) + : BaseDynInst(_staticInst) +{ +} + +uint64_t +AlphaDynInst::readUniq() +{ + return cpu->readUniq(); +} + +void +AlphaDynInst::setUniq(uint64_t val) +{ + cpu->setUniq(val); +} + +uint64_t +AlphaDynInst::readFpcr() +{ + return cpu->readFpcr(); +} + +void +AlphaDynInst::setFpcr(uint64_t val) +{ + cpu->setFpcr(val); +} + +#ifdef FULL_SYSTEM +uint64_t +AlphaDynInst::readIpr(int idx, Fault &fault) +{ + return cpu->readIpr(idx, fault); +} +Fault +AlphaDynInst::setIpr(int idx, uint64_t val) +{ + return cpu->setIpr(idx, val); +} + +Fault +AlphaDynInst::hwrei() +{ + return cpu->hwrei(); +} + +int +AlphaDynInst::readIntrFlag() +{ +return cpu->readIntrFlag(); +} + +void +AlphaDynInst::setIntrFlag(int val) +{ + cpu->setIntrFlag(val); +} + +bool +AlphaDynInst::inPalMode() +{ + return cpu->inPalMode(); +} + +void +AlphaDynInst::trap(Fault fault) +{ + cpu->trap(fault); +} + +bool +AlphaDynInst::simPalCheck(int palFunc) +{ + return cpu->simPalCheck(palFunc); +} +#else +void +AlphaDynInst::syscall() +{ + cpu->syscall(); +} +#endif + +#endif // __ALPHA_DYN_INST_CC__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh new file mode 100644 index 000000000..69d145355 --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -0,0 +1,86 @@ +//Todo: + +#ifndef __ALPHA_DYN_INST_HH__ +#define __ALPHA_DYN_INST_HH__ + +#include "cpu/base_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/inst_seq.hh" + +using namespace std; + +class AlphaDynInst : public BaseDynInst +{ + public: + /** BaseDynInst constructor given a binary instruction. */ + AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + AlphaDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction. */ + Fault execute() + { + fault = staticInst->execute(this, traceData); + return fault; + } + + /** Location of this instruction within the ROB. Might be somewhat + * implementation specific. + * Might not want this data in the inst as it may be deleted prior to + * execution of the stage that needs it. + */ + int robIdx; + + int getROBEntry() + { + return robIdx; + } + + void setROBEntry(int rob_idx) + { + robIdx = rob_idx; + } + + /** Location of this instruction within the IQ. Might be somewhat + * implementation specific. + * Might not want this data in the inst as it may be deleted prior to + * execution of the stage that needs it. + */ + int iqIdx; + + int getIQEntry() + { + return iqIdx; + } + + void setIQEntry(int iq_idx) + { + iqIdx = iq_idx; + } + + uint64_t readUniq(); + void setUniq(uint64_t val); + + uint64_t readFpcr(); + void setFpcr(uint64_t val); + +#ifdef FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + Fault hwrei(); + int readIntrFlag(); + void setIntrFlag(int val); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + void syscall(); +#endif + +}; + +#endif // __ALPHA_DYN_INST_HH__ + diff --git a/cpu/beta_cpu/alpha_full_cpu.cc b/cpu/beta_cpu/alpha_full_cpu.cc new file mode 100644 index 000000000..880418146 --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu.cc @@ -0,0 +1,911 @@ + +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/full_cpu/dd_queue.hh" +#include "cpu/full_cpu/full_cpu.hh" +#include "cpu/full_cpu/rob_station.hh" +#include "mem/cache/cache.hh" // for dynamic cast +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/alpha_params.hh" +#include "cpu/beta_cpu/comm.hh" + +AlphaFullCPU::AlphaFullCPU(Params ¶ms) + : FullBetaCPU(params) +{ + + fetch.setCPU(this); + decode.setCPU(this); + rename.setCPU(this); + iew.setCPU(this); + commit.setCPU(this); + + rob.setCPU(this); +} + +#ifndef FULL_SYSTEM + +void +AlphaFullCPU::syscall() +{ + DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); + + squashStages(); + + // Copy over all important state to xc once all the unrolling is done. + copyToXC(); + + process->syscall(xc); + + // Copy over all important state back to normal. + copyFromXC(); +} + +// This is not a pretty function, and should only be used if it is necessary +// to fake having everything squash all at once (ie for non-full system +// syscalls). +void +AlphaFullCPU::squashStages() +{ + InstSeqNum rob_head = rob.readHeadSeqNum(); + + // Now hack the time buffer to put this sequence number in the places + // where the stages might read it. + for (int i = 0; i < 10; ++i) + { + timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + } + + fetch.squash(rob.readHeadNextPC()); + fetchQueue.advance(); + + decode.squash(); + decodeQueue.advance(); + + rename.squash(); + renameQueue.advance(); + renameQueue.advance(); + + iew.squash(); + iewQueue.advance(); + iewQueue.advance(); + + rob.squash(rob_head); + commit.setSquashing(); +} + +#endif // FULL_SYSTEM + +void +AlphaFullCPU::copyToXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg]; + DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg]); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d; + xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q; + } + + xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; + xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; + xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; + xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; + + xc->regs.pc = rob.readHeadPC(); + xc->regs.npc = xc->regs.pc+4; + + xc->func_exe_inst = funcExeInst; +} + +// This function will probably mess things up unless the ROB is empty and +// there are no instructions in the pipeline. +void +AlphaFullCPU::copyFromXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + + DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg], + xc->regs.intRegFile[i]); + + regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i]; + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i]; + regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ; + } + + // Then loop through the misc registers. + regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + + // Then finally set the PC and the next PC. +// regFile.pc = xc->regs.pc; +// regFile.npc = xc->regs.npc; + + funcExeInst = xc->func_exe_inst; +} + +#ifdef FULL_SYSTEM + +uint64_t * +AlphaFullCPU::getIpr() +{ + return regs.ipr; +} + +uint64_t +AlphaFullCPU::readIpr(int idx, Fault &fault) +{ + uint64_t *ipr = getIpr(); + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PALtemp23: + case AlphaISA::IPR_PAL_BASE: + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + case AlphaISA::IPR_ISR: + case AlphaISA::IPR_EXC_ADDR: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_MCSR: + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + case AlphaISA::IPR_SIRR: + case AlphaISA::IPR_ICSR: + case AlphaISA::IPR_ICM: + case AlphaISA::IPR_DTB_CM: + case AlphaISA::IPR_IPLR: + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case AlphaISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; + + case AlphaISA::IPR_VA: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_VA_FORM: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_IFAULT_VA_FORM: + case AlphaISA::IPR_EXC_MASK: + case AlphaISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_DTB_PTE: + { + AlphaISA::PTE &pte = dtb->index(!misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + case AlphaISA::IPR_ALT_MODE: + case AlphaISA::IPR_DTB_IA: + case AlphaISA::IPR_DTB_IAP: + case AlphaISA::IPR_ITB_IA: + case AlphaISA::IPR_ITB_IAP: + fault = Unimplemented_Opcode_Fault; + break; + + default: + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; + } + + return retval; +} + +Fault +AlphaFullCPU::setIpr(int idx, uint64_t val) +{ + uint64_t *ipr = getIpr(); + uint64_t old; + + if (misspeculating()) + return No_Fault; + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PAL_BASE: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case AlphaISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case AlphaISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case AlphaISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; + kernelStats.context(old, val); + break; + + case AlphaISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case AlphaISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case AlphaISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; + kernelStats.swpipl(ipr[idx]); + break; + + case AlphaISA::IPR_DTB_CM: + kernelStats.mode((val & 0x18) != 0); + + case AlphaISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case AlphaISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case AlphaISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case AlphaISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case AlphaISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case AlphaISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case AlphaISA::IPR_EXC_SUM: + case AlphaISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_SL_RCV: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_ITB_PTE_TEMP: + case AlphaISA::IPR_DTB_PTE_TEMP: + // read-only registers + return Unimplemented_Opcode_Fault; + + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case AlphaISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + dtb->flushAll(); + break; + + case AlphaISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + dtb->flushProcesses(); + break; + + case AlphaISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); + break; + + case AlphaISA::IPR_DTB_TAG: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + dtb->insert(val, pte); + } + break; + + case AlphaISA::IPR_ITB_PTE: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); + } + break; + + case AlphaISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + itb->flushAll(); + break; + + case AlphaISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + itb->flushProcesses(); + break; + + case AlphaISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return Unimplemented_Opcode_Fault; + } + + // no error... + return No_Fault; + +} + +int +AlphaFullCPU::readIntrFlag() +{ + return regs.intrflag; +} + +void +AlphaFullCPU::setIntrFlag(int val) +{ + regs.intrflag = val; +} + +// Maybe have this send back from IEW stage to squash and update PC. +Fault +AlphaFullCPU::hwrei() +{ + uint64_t *ipr = getIpr(); + + if (!PC_PAL(regs.pc)) + return Unimplemented_Opcode_Fault; + + setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]); + + if (!misspeculating()) { + kernelStats.hwrei(); + + if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) + AlphaISA::swap_palshadow(®s, false); + + AlphaISA::check_interrupts = true; + } + + // FIXME: XXX check for interrupts? XXX + return No_Fault; +} + +bool +AlphaFullCPU::inPalMode() +{ + return PC_PAL(readPC()); +} + +bool +AlphaFullCPU::simPalCheck(int palFunc) +{ + kernelStats.callpal(palFunc); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (system->breakpoint()) + return false; + break; + } + + return true; +} + +// Probably shouldn't be able to switch to the trap handler as quickly as +// this. Also needs to get the exception restart address from the commit +// stage. +void +AlphaFullCPU::trap(Fault fault) +{ + uint64_t PC = commit.readPC(); + + DPRINTF(Fault, "Fault %s\n", FaultName(fault)); + Stats::recordEvent(csprintf("Fault %s", FaultName(fault))); + + assert(!misspeculating()); + kernelStats.fault(fault); + + if (fault == Arithmetic_Fault) + panic("Arithmetic traps are unimplemented!"); + + AlphaISA::InternalProcReg *ipr = getIpr(); + + // exception restart address - Get the commit PC + if (fault != Interrupt_Fault || !PC_PAL(PC)) + ipr[AlphaISA::IPR_EXC_ADDR] = PC; + + if (fault == Pal_Fault || fault == Arithmetic_Fault /* || + fault == Interrupt_Fault && !PC_PAL(regs.pc) */) { + // traps... skip faulting instruction + ipr[AlphaISA::IPR_EXC_ADDR] += 4; + } + + if (!PC_PAL(PC)) + AlphaISA::swap_palshadow(®s, true); + + setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] ); + setNextPC(PC + sizeof(MachInst)); +} + +void +AlphaFullCPU::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that exists + // within isa_fullsys_traits.hh. +} + +// swap_palshadow swaps in the values of the shadow registers and +// swaps them with the values of the physical registers that map to the +// same logical index. +void +AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) +{ + if (palShadowEnabled == use_shadow) + panic("swap_palshadow: wrong PAL shadow state"); + + palShadowEnabled = use_shadow; + + // Will have to lookup in rename map to get physical registers, then + // swap. + for (int i = 0; i < AlphaISA::NumIntRegs; i++) { + if (reg_redir[i]) { + AlphaISA::IntReg temp = regs->intRegFile[i]; + regs->intRegFile[i] = regs->palregs[i]; + regs->palregs[i] = temp; + } + } +} + +#endif // FULL_SYSTEM + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU) + + Param numThreads; + +#ifdef FULL_SYSTEM +SimObjectParam system; +SimObjectParam itb; +SimObjectParam dtb; +Param mult; +#else +SimObjectVectorParam workload; +SimObjectParam process; +Param asid; +#endif // FULL_SYSTEM +SimObjectParam mem; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param defReg; + +END_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU) + + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#ifdef FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), + INIT_PARAM_DFLT(mult, "System clock multiplier", 1), +#else + INIT_PARAM(workload, "Processes to run"), + INIT_PARAM_DFLT(process, "Process to run", NULL), + INIT_PARAM(asid, "Address space ID"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM(defReg, "Defer registration") + +END_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU) + +CREATE_SIM_OBJECT(AlphaFullCPU) +{ + AlphaFullCPU *cpu; + +#ifdef FULL_SYSTEM + if (mult != 1) + panic("Processor clock multiplier must be 1?\n"); + + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + Process *actual_process; + + if (process == NULL) { + actual_process = workload[0]; + } else { + actual_process = process; + } + +#endif + + AlphaSimpleParams params; + + params.name = getInstanceName(); + params.numberOfThreads = actual_num_threads; + +#ifdef FULL_SYSTEM + params._system = system; + params.itb = itb; + params.dtb = dtb; + params.freq = ticksPerSecond * mult; +#else + params.workload = workload; + params.process = actual_process; + params.asid = asid; +#endif // FULL_SYSTEM + + params.mem = mem; + + params.maxInstsAnyThread = max_insts_any_thread; + params.maxInstsAllThreads = max_insts_all_threads; + params.maxLoadsAnyThread = max_loads_any_thread; + params.maxLoadsAllThreads = max_loads_all_threads; + + // + // Caches + // + params.icacheInterface = icache ? icache->getInterface() : NULL; + params.dcacheInterface = dcache ? dcache->getInterface() : NULL; + + params.decodeToFetchDelay = decodeToFetchDelay; + params.renameToFetchDelay = renameToFetchDelay; + params.iewToFetchDelay = iewToFetchDelay; + params.commitToFetchDelay = commitToFetchDelay; + params.fetchWidth = fetchWidth; + + params.renameToDecodeDelay = renameToDecodeDelay; + params.iewToDecodeDelay = iewToDecodeDelay; + params.commitToDecodeDelay = commitToDecodeDelay; + params.fetchToDecodeDelay = fetchToDecodeDelay; + params.decodeWidth = decodeWidth; + + params.iewToRenameDelay = iewToRenameDelay; + params.commitToRenameDelay = commitToRenameDelay; + params.decodeToRenameDelay = decodeToRenameDelay; + params.renameWidth = renameWidth; + + params.commitToIEWDelay = commitToIEWDelay; + params.renameToIEWDelay = renameToIEWDelay; + params.issueToExecuteDelay = issueToExecuteDelay; + params.issueWidth = issueWidth; + params.executeWidth = executeWidth; + params.executeIntWidth = executeIntWidth; + params.executeFloatWidth = executeFloatWidth; + + params.iewToCommitDelay = iewToCommitDelay; + params.renameToROBDelay = renameToROBDelay; + params.commitWidth = commitWidth; + params.squashWidth = squashWidth; + + params.numPhysIntRegs = numPhysIntRegs; + params.numPhysFloatRegs = numPhysFloatRegs; + params.numIQEntries = numIQEntries; + params.numROBEntries = numROBEntries; + + params.defReg = defReg; + + cpu = new AlphaFullCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("AlphaFullCPU", AlphaFullCPU) + diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh new file mode 100644 index 000000000..b098aaac1 --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -0,0 +1,244 @@ +// Todo: Find all the stuff in ExecContext and ev5 that needs to be +// specifically designed for this CPU. +// Read and write are horribly hacked up between not being sure where to +// copy their code from, and Ron's memory changes. + +#ifndef __ALPHA_FULL_CPU_HH__ +#define __ALPHA_FULL_CPU_HH__ + +// To include: comm, impl, full cpu, ITB/DTB if full sys, +#include "cpu/beta_cpu/comm.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/full_cpu.hh" + +using namespace std; + +class AlphaFullCPU : public FullBetaCPU +{ + public: + typedef AlphaSimpleImpl::ISA AlphaISA; + typedef AlphaSimpleImpl::Params Params; + + public: + AlphaFullCPU(Params ¶ms); + +#ifdef FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#endif + + public: +#ifdef FULL_SYSTEM + bool inPalMode(); + + //Note that the interrupt stuff from the base CPU might be somewhat + //ISA specific (ie NumInterruptLevels). These functions might not + //be needed in FullCPU though. +// void post_interrupt(int int_num, int index); +// void clear_interrupt(int int_num, int index); +// void clear_interrupts(); + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return No_Fault; + } + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + template + Fault read(MemReqPtr &req, T &data) + { +#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) + if (req->flags & LOCKED) { + MiscRegFile *cregs = &req->xc->regs.miscRegs; + cregs->lock_addr = req->paddr; + cregs->lock_flag = true; + } +#endif + + Fault error; + error = mem->read(req, data); + data = htoa(data); + return error; + } + + template + Fault write(MemReqPtr &req, T &data) + { +#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) + + MiscRegFile *cregs; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + cregs = &xc->regs.miscRegs; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + req->xc->storeCondFailures = 0;//Needed? [RGD] + } else { + req->result = cregs->lock_flag; + if (!cregs->lock_flag || + ((cregs->lock_addr & ~0xf) != (req->paddr & ~0xf))) { + cregs->lock_flag = false; + if (((++req->xc->storeCondFailures) % 100000) == 0) { + std::cerr << "Warning: " + << req->xc->storeCondFailures + << " consecutive store conditional failures " + << "on cpu " << cpu_id + << std::endl; + } + return No_Fault; + } + else req->xc->storeCondFailures = 0; + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < system->execContexts.size(); i++){ + cregs = &system->execContexts[i]->regs.miscRegs; + if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) { + cregs->lock_flag = false; + } + } + +#endif + + return mem->write(req, (T)htoa(data)); + } + + // Later on may want to remove this misc stuff from the regfile and + // have it handled at this level. Might prove to be an issue when + // trying to rename source/destination registers... + uint64_t readUniq() + { + return regFile.readUniq(); + } + + void setUniq(uint64_t val) + { + regFile.setUniq(val); + } + + uint64_t readFpcr() + { + return regFile.readFpcr(); + } + + void setFpcr(uint64_t val) + { + regFile.setFpcr(val); + } + +#ifdef FULL_SYSTEM + uint64_t *getIPR(); + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + int readIntrFlag(); + void setIntrFlag(int val); + Fault hwrei(); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); + + void processInterrupts(); +#endif + + +#ifndef FULL_SYSTEM + // Need to change these into regfile calls that directly set a certain + // register. Actually, these functions should handle most of this + // functionality by themselves; should look up the rename and then + // set the register. + IntReg getSyscallArg(int i) + { + return xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i]; + } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { + xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i] = val; + } + + void setSyscallReturn(int64_t return_value) + { + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + const int RegA3 = 19; // only place this is used + if (return_value >= 0) { + // no error + xc->regs.intRegFile[RegA3] = 0; + xc->regs.intRegFile[AlphaISA::ReturnValueReg] = return_value; + } else { + // got an error, return details + xc->regs.intRegFile[RegA3] = (IntReg) -1; + xc->regs.intRegFile[AlphaISA::ReturnValueReg] = -return_value; + } + } + + void syscall(); + void squashStages(); + +#endif + + void copyToXC(); + void copyFromXC(); + + public: +#ifdef FULL_SYSTEM + bool palShadowEnabled; + + // Not sure this is used anywhere. + void intr_post(RegFile *regs, Fault fault, Addr pc); + // Actually used within exec files. Implement properly. + void swap_palshadow(RegFile *regs, bool use_shadow); + // Called by CPU constructor. Can implement as I please. + void initCPU(RegFile *regs); + // Called by initCPU. Implement as I please. + void initIPRs(RegFile *regs); +#endif +}; + +#endif // __ALPHA_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh new file mode 100644 index 000000000..a80b116a8 --- /dev/null +++ b/cpu/beta_cpu/alpha_impl.hh @@ -0,0 +1,74 @@ +#ifndef __ALPHA_IMPL_HH__ +#define __ALPHA_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" + +#include "cpu/beta_cpu/comm.hh" +#include "cpu/beta_cpu/cpu_policy.hh" +#include "cpu/beta_cpu/alpha_params.hh" + +#include "cpu/beta_cpu/commit.hh" +#include "cpu/beta_cpu/decode.hh" +#include "cpu/beta_cpu/fetch.hh" +#include "cpu/beta_cpu/free_list.hh" +#include "cpu/beta_cpu/iew.hh" + +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/regfile.hh" +#include "cpu/beta_cpu/rename.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" + +class AlphaDynInst; +class AlphaFullCPU; + +/** Implementation specific struct that defines several key things to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific FullCPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct AlphaSimpleImpl +{ + /** The ISA to be used. */ + typedef AlphaISA ISA; + + /** The type of MachInst. */ + typedef ISA::MachInst MachInst; + + /** The CPU policy to be used (ie fetch, decode, etc.). */ + typedef SimpleCPUPolicy CPUPol; + + /** The DynInst to be used. */ + typedef AlphaDynInst DynInst; + + /** The FullCPU to be used. */ + typedef AlphaFullCPU FullCPU; + + /** The Params to be passed to each stage. */ + typedef AlphaSimpleParams Params; + + /** The struct for communication between fetch and decode. */ + typedef SimpleFetchSimpleDecode FetchStruct; + + /** The struct for communication between decode and rename. */ + typedef SimpleDecodeSimpleRename DecodeStruct; + + /** The struct for communication between rename and IEW. */ + typedef SimpleRenameSimpleIEW RenameStruct; + + /** The struct for communication between IEW and commit. */ + typedef SimpleIEWSimpleCommit IEWStruct; + + /** The struct for communication within the IEW stage. */ + typedef IssueStruct IssueStruct; + + /** The struct for all backwards communication. */ + typedef TimeBufStruct TimeStruct; +}; + + + +#endif // __ALPHA_IMPL_HH__ diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh new file mode 100644 index 000000000..b217ef8e3 --- /dev/null +++ b/cpu/beta_cpu/alpha_params.hh @@ -0,0 +1,85 @@ +#ifndef __ALPHA_SIMPLE_PARAMS_HH__ +#define __ALPHA_SIMPLE_PARAMS_HH__ + +//Forward declarations +class System; +class AlphaITB; +class AlphaDTB; +class FunctionalMemory; +class Process; +class MemInterface; + +/** + * This file defines the parameters that will be used for the AlphaFullCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams +{ + public: + std::string name; + int numberOfThreads; + +#ifdef FULL_SYSTEM + System *_system; + AlphaITB *itb; AlphaDTB *dtb; + Tick freq; +#else + std::vector workload; + Process *process; + short asid; +#endif // FULL_SYSTEM + + FunctionalMemory *mem; + + Counter maxInstsAnyThread; + Counter maxInstsAllThreads; + Counter maxLoadsAnyThread; + Counter maxLoadsAllThreads; + + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; + + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + + bool defReg; +}; + +#endif diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh new file mode 100644 index 000000000..21a530ecf --- /dev/null +++ b/cpu/beta_cpu/comm.hh @@ -0,0 +1,110 @@ +#ifndef __COMM_HH__ +#define __COMM_HH__ + +#include +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" + +using namespace std; + +// Find better place to put this typedef. +typedef short int PhysRegIndex; + +// Might want to put constructors/destructors here. +template +struct SimpleFetchSimpleDecode { + // Consider having a field of how many ready instructions. + typename Impl::DynInst *insts[1]; +}; + +template +struct SimpleDecodeSimpleRename { + // Consider having a field of how many ready instructions. + typename Impl::DynInst *insts[1]; +}; + +template +struct SimpleRenameSimpleIEW { + // Consider having a field of how many ready instructions. + typename Impl::DynInst *insts[1]; +}; + +template +struct SimpleIEWSimpleCommit { + // Consider having a field of how many ready instructions. + typename Impl::DynInst *insts[1]; +}; + +template +struct IssueStruct { + typename Impl::DynInst *insts[1]; +}; + +struct TimeBufStruct { + struct decodeComm { + bool squash; + bool stall; + bool predIncorrect; + uint64_t branchAddr; + + //Question, is it worthwhile to have this Addr passed along + //by each stage, or just have Fetch look it up in the proper + //amount of cycles in the time buffer? + //Both might actually be needed because decode can send a different + //nextPC if the bpred was wrong. + uint64_t nextPC; + }; + + decodeComm decodeInfo; + + // Rename can't actually tell anything to squash or send a new PC back + // because it doesn't do anything along those lines. But maybe leave + // these fields in here to keep the stages mostly orthagonal. + struct renameComm { + bool squash; + bool stall; + + uint64_t nextPC; + }; + + renameComm renameInfo; + + struct iewComm { + bool squash; + bool stall; + bool predIncorrect; + + // Also eventually include skid buffer space. + unsigned freeIQEntries; + + uint64_t nextPC; + // For now hardcode the type. + // Change this to sequence number eventually. + InstSeqNum squashedSeqNum; + }; + + iewComm iewInfo; + + struct commitComm { + bool squash; + bool stall; + unsigned freeROBEntries; + + uint64_t nextPC; + + // Think of better names here. + // Will need to be a variety of sizes... + // Maybe make it a vector, that way only need one object. + vector freeRegs; + + bool robSquashing; + // Represents the instruction that has either been retired or + // squashed. Similar to having a single bus that broadcasts the + // retired or squashed sequence number. + InstSeqNum doneSeqNum; + }; + + commitComm commitInfo; +}; + +#endif //__COMM_HH__ diff --git a/cpu/beta_cpu/commit.cc b/cpu/beta_cpu/commit.cc new file mode 100644 index 000000000..2efb38976 --- /dev/null +++ b/cpu/beta_cpu/commit.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/commit_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleCommit; diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh new file mode 100644 index 000000000..0e5a96e2a --- /dev/null +++ b/cpu/beta_cpu/commit.hh @@ -0,0 +1,149 @@ +// Todo: Squash properly. Have commit be able to send a squash signal +// to previous stages; will be needed when trap() is implemented. +// Maybe have a special method for handling interrupts/traps. +// +// Traps: Have IEW send a signal to commit saying that there's a trap to +// be handled. Have commit send the PC back to the fetch stage, along +// with the current commit PC. Fetch will directly access the IPR and save +// off all the proper stuff. Commit can send out a squash, or something +// close to it. +// Do the same for hwrei(). However, requires that commit be specifically +// built to support that kind of stuff. Probably not horrible to have +// commit support having the CPU tell it to squash the other stages and +// restart at a given address. The IPR register does become an issue. +// Probably not a big deal if the IPR stuff isn't cycle accurate. Can just +// have the original function handle writing to the IPR register. + +#ifndef __SIMPLE_COMMIT_HH__ +#define __SIMPLE_COMMIT_HH__ + +//Includes: ROB, time buffer, structs, memory interface +#include "arch/alpha/isa_traits.hh" +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" +#include "mem/memory_interface.hh" + +template +class SimpleCommit +{ + public: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::Params Params; + + typedef typename Impl::CPUPol::ROB ROB; + + typedef typename Impl::TimeStruct TimeStruct; + typedef typename Impl::IEWStruct IEWStruct; + typedef typename Impl::RenameStruct RenameStruct; + + public: + // I don't believe commit can block, so it will only have two + // statuses for now. + // Actually if there's a cache access that needs to block (ie + // uncachable load or just a mem access in commit) then the stage + // may have to wait. + enum Status { + Running, + Idle, + ROBSquashing, + DcacheMissStall, + DcacheMissComplete + }; + + private: + Status _status; + + public: + SimpleCommit(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setRenameQueue(TimeBuffer *rq_ptr); + + void setIEWQueue(TimeBuffer *iq_ptr); + + void setROB(ROB *rob_ptr); + + void tick(); + + void commit(); + + uint64_t readCommitPC(); + + void setSquashing() { _status = ROBSquashing; } + + private: + + void commitInsts(); + + bool commitHead(DynInst *head_inst, unsigned inst_num); + + void getInsts(); + + void markCompletedInsts(); + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toIEW; + + /** Wire to read information from IEW (for ROB). */ + typename TimeBuffer::wire robInfoFromIEW; + + /** IEW instruction queue interface. */ + TimeBuffer *iewQueue; + + /** Wire to read information from IEW queue. */ + typename TimeBuffer::wire fromIEW; + + /** Rename instruction queue interface, for ROB. */ + TimeBuffer *renameQueue; + + /** Wire to read information from rename queue. */ + typename TimeBuffer::wire fromRename; + + /** ROB interface. */ + ROB *rob; + + /** Pointer to FullCPU. */ + FullCPU *cpu; + + /** Pointer to the rename map. DO NOT USE if possible. */ + typename Impl::CPUPol::RenameMap *renameMap; + + //Store buffer interface? Will need to move committed stores to the + //store buffer + + /** Memory interface. Used for d-cache accesses. */ + MemInterface *dcacheInterface; + + private: + /** IEW to Commit delay, in ticks. */ + unsigned iewToCommitDelay; + + /** Rename to ROB delay, in ticks. */ + unsigned renameToROBDelay; + + /** Rename width, in instructions. Used so ROB knows how many + * instructions to get from the rename instruction queue. + */ + unsigned renameWidth; + + /** IEW width, in instructions. Used so ROB knows how many + * instructions to get from the IEW instruction queue. + */ + unsigned iewWidth; + + /** Commit width, in instructions. */ + unsigned commitWidth; +}; + +#endif // __SIMPLE_COMMIT_HH__ diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh new file mode 100644 index 000000000..bc8db0ce0 --- /dev/null +++ b/cpu/beta_cpu/commit_impl.hh @@ -0,0 +1,421 @@ +// @todo: Bug when something reaches execute, and mispredicts, but is never +// put into the ROB because the ROB is full. Need rename stage to predict +// the free ROB entries better. + +#ifndef __COMMIT_IMPL_HH__ +#define __COMMIT_IMPL_HH__ + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/commit.hh" +#include "cpu/exetrace.hh" + +template +SimpleCommit::SimpleCommit(Params ¶ms) + : dcacheInterface(params.dcacheInterface), + iewToCommitDelay(params.iewToCommitDelay), + renameToROBDelay(params.renameToROBDelay), + renameWidth(params.renameWidth), + iewWidth(params.executeWidth), + commitWidth(params.commitWidth) +{ + _status = Idle; +} + +template +void +SimpleCommit::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template +void +SimpleCommit::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(Commit, "Commit: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to send information back to IEW. + toIEW = timeBuffer->getWire(0); + + // Setup wire to read data from IEW (for the ROB). + robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay); +} + +template +void +SimpleCommit::setRenameQueue(TimeBuffer *rq_ptr) +{ + DPRINTF(Commit, "Commit: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to get instructions from rename (for the ROB). + fromRename = renameQueue->getWire(-renameToROBDelay); +} + +template +void +SimpleCommit::setIEWQueue(TimeBuffer *iq_ptr) +{ + DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to get instructions from IEW. + fromIEW = iewQueue->getWire(-iewToCommitDelay); +} + +template +void +SimpleCommit::setROB(ROB *rob_ptr) +{ + DPRINTF(Commit, "Commit: Setting ROB pointer.\n"); + rob = rob_ptr; +} + +template +void +SimpleCommit::tick() +{ + // If the ROB is currently in its squash sequence, then continue + // to squash. In this case, commit does not do anything. Otherwise + // run commit. + if (_status == ROBSquashing) { + if (rob->isDoneSquashing()) { + _status = Running; + } else { + rob->doSquash(); + + // Send back sequence number of tail of ROB, so other stages + // can squash younger instructions. Note that really the only + // stage that this is important for is the IEW stage; other + // stages can just clear all their state as long as selective + // replay isn't used. + toIEW->commitInfo.doneSeqNum = rob->readTailSeqNum(); + toIEW->commitInfo.robSquashing = true; + } + } else { + commit(); + } + + markCompletedInsts(); + + // Writeback number of free ROB entries here. + DPRINTF(Commit, "Commit: ROB has %d free entries.\n", + rob->numFreeEntries()); + toIEW->commitInfo.freeROBEntries = rob->numFreeEntries(); +} + +template +void +SimpleCommit::commit() +{ + ////////////////////////////////////// + // Check for interrupts + ////////////////////////////////////// + + // Process interrupts if interrupts are enabled and not in PAL mode. + // Take the PC from commit and write it to the IPR, then squash. The + // interrupt completing will take care of restoring the PC from that value + // in the IPR. Look at IPR[EXC_ADDR]; + // hwrei() is what resets the PC to the place where instruction execution + // beings again. +#ifdef FULL_SYSTEM + if (ISA::check_interrupts && + cpu->check_interrupts() && + !xc->inPalMode()) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. + + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); + } +#endif // FULL_SYSTEM + + //////////////////////////////////// + // Check for squash signal, handle that first + //////////////////////////////////// + + // Want to mainly check if the IEW stage is telling the ROB to squash. + // Should I also check if the commit stage is telling the ROB to squah? + // This might be necessary to keep the same timing between the IQ and + // the ROB... + if (robInfoFromIEW->iewInfo.squash) { + DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n"); + + _status = ROBSquashing; + + InstSeqNum squashed_inst = robInfoFromIEW->iewInfo.squashedSeqNum; + + rob->squash(squashed_inst); + + // Send back the sequence number of the squashed instruction. + toIEW->commitInfo.doneSeqNum = squashed_inst; + // Send back the squash signal to tell stages that they should squash. + toIEW->commitInfo.squash = true; + // Send back the rob squashing signal so other stages know that the + // ROB is in the process of squashing. + toIEW->commitInfo.robSquashing = true; + toIEW->commitInfo.nextPC = robInfoFromIEW->iewInfo.nextPC; + } + + if (_status != ROBSquashing) { + getInsts(); + + commitInsts(); + } + + // If the ROB is empty, we can set this stage to idle. Use this + // in the future when the Idle status will actually be utilized. +#if 0 + if (rob->isEmpty()) { + DPRINTF(Commit, "Commit: ROB is empty. Status changed to idle.\n"); + _status = Idle; + // Schedule an event so that commit will actually wake up + // once something gets put in the ROB. + } +#endif +} + +// Loop that goes through as many instructions in the ROB as possible and +// tries to commit them. The actual work for committing is done by the +// commitHead() function. +template +void +SimpleCommit::commitInsts() +{ + //////////////////////////////////// + // Handle commit + // Note that commit will be handled prior to the ROB so that the ROB + // only tries to commit instructions it has in this current cycle, and + // not instructions it is writing in during this cycle. + // Can't commit and squash things at the same time... + //////////////////////////////////// + + DynInst *head_inst = rob->readHeadInst(); + + unsigned num_committed = 0; + + // Commit as many instructions as possible until the commit bandwidth + // limit is reached, or it becomes impossible to commit any more. + while (!rob->isEmpty() && + head_inst->readyToCommit() && + num_committed < commitWidth) + { + DPRINTF(Commit, "Commit: Trying to commit head instruction.\n"); + + // If the head instruction is squashed, it is ready to retire at any + // time. However, we need to avoid updating any other state + // incorrectly if it's already been squashed. + if (head_inst->isSquashed()) { + // Hack to avoid the instruction being retired (and deleted) if + // it hasn't been through the IEW stage yet. + if (!head_inst->isExecuted()) { + break; + } + + DPRINTF(Commit, "Commit: Retiring squashed instruction from " + "ROB.\n"); + + // Tell ROB to retire head instruction. This retires the head + // inst in the ROB without affecting any other stages. + rob->retireHead(); + + ++num_committed; + } else { + // Increment the total number of non-speculative instructions + // executed. + // Hack for now: it really shouldn't happen until after the + // commit is deemed to be successful. + cpu->funcExeInst++; + + // Try to commit the head instruction. + bool commit_success = commitHead(head_inst, num_committed); + + // Update what instruction we are looking at if the commit worked. + if(commit_success) { + ++num_committed; + + // Send back which instruction has been committed. + // @todo: Update this later when a wider pipeline is used. + // Hmm, can't really give a pointer here...perhaps the + // sequence number instead (copy). + toIEW->commitInfo.doneSeqNum = head_inst->seqNum; + + cpu->instDone(); + } else { + break; + } + } + + // Update the pointer to read the next instruction in the ROB. + head_inst = rob->readHeadInst(); + } +} + +template +bool +SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) +{ + // Make sure instruction is valid + assert(head_inst); + + Fault fault = No_Fault; + + // If the head instruction is a store or a load, then execute it + // because this simple model does no speculative memory access. + // Hopefully this covers all memory references. + // Also check if it's nonspeculative. Or a nop. Then it will be + // executed only when it reaches the head of the ROB. Actually + // executing a nop is a bit overkill... + if (head_inst->isStore() || + head_inst->isLoad() || + head_inst->isNonSpeculative() || + head_inst->isNop()) { + DPRINTF(Commit, "Commit: Executing a memory reference or " + "nonspeculative instruction at commit, inst PC %#x\n", + head_inst->PC); + fault = head_inst->execute(); + + // Tell CPU to tell IEW to tell IQ (nasty chain of calls) that + // this instruction has completed. Could predicate this on + // whether or not the instruction has a destination. + // Slightly unrealistic, but will not really be a factor once + // a real load/store queue is added. + cpu->wakeDependents(head_inst); + } + + // Check if memory access was successful. + if (fault != No_Fault) { + // Handle data cache miss here. In the future, set the status + // to data cache miss, then exit the stage. Have an event + // that handles commiting the head instruction, then setting + // the stage back to running, when the event is run. (just + // make sure that event is commit's run for that cycle) + panic("Commit: Load/store instruction failed, not sure what " + "to do.\n"); + // Also will want to clear the instruction's fault after being + // handled here so it's not handled again below. + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (head_inst->isThreadSync() || + head_inst->isSerializing() || + head_inst->isMemBarrier() || + head_inst->isWriteBarrier() ) + { + // Not handled for now. Mem barriers and write barriers are safe + // to simply let commit as memory accesses only happen once they + // reach the head of commit. Not sure about the other two. + panic("Serializing or barrier instructions" + " are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + if (head_inst->getFault() != No_Fault) { +#ifdef FULL_SYSTEM + cpu->trap(fault); +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", head_inst->getFault(), + head_inst->PC); +#endif // FULL_SYSTEM + } + + // Check if we're really ready to commit. If not then return false. + // I'm pretty sure all instructions should be able to commit if they've + // reached this far. For now leave this in as a check. + if(!rob->isHeadReady()) { + DPRINTF(Commit, "Commit: Unable to commit head instruction!\n"); + return false; + } + + //If it's a branch, then send back branch prediction update info + //to the fetch stage. + // This should be handled in the iew stage if a mispredict happens... +#if 0 + if (head_inst->isControl()) { + + toIEW->nextPC = head_inst->readPC(); + //Maybe switch over to BTB incorrect. + toIEW->btbMissed = head_inst->btbMiss(); + toIEW->target = head_inst->nextPC; + //Maybe also include global history information. + //This simple version will have no branch prediction however. + } +#endif + +#if 0 + // Check if the instruction has a destination register. + // If so add the previous physical register of its logical register's + // destination to the free list through the time buffer. + for (int i = 0; i < head_inst->numDestRegs(); i++) + { + toIEW->commitInfo.freeRegs.push_back(head_inst->prevDestRegIdx(i)); + } +#endif + + // Now that the instruction is going to be committed, finalize its + // trace data. + if (head_inst->traceData) { + head_inst->traceData->finalize(); + } + + //Finally clear the head ROB entry. + rob->retireHead(); + + // Return true to indicate that we have committed an instruction. + return true; +} + +template +void +SimpleCommit::getInsts() +{ + ////////////////////////////////////// + // Handle ROB functions + ////////////////////////////////////// + + // Read any issued instructions and place them into the ROB. Do this + // prior to squashing to avoid having instructions in the ROB that + // don't get squashed properly. + for (int inst_num = 0; + fromRename->insts[inst_num] != NULL && inst_num < renameWidth; + ++inst_num) + { + DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", + fromRename->insts[inst_num]->readPC()); + rob->insertInst(fromRename->insts[inst_num]); + } +} + +template +void +SimpleCommit::markCompletedInsts() +{ + // Grab completed insts out of the IEW instruction queue, and mark + // instructions completed within the ROB. + for (int inst_num = 0; + fromIEW->insts[inst_num] != NULL && inst_num < iewWidth; + ++inst_num) + { + DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", + fromIEW->insts[inst_num]->readPC(), + fromIEW->insts[inst_num]->seqNum); + + // Mark the instruction as ready to commit. + fromIEW->insts[inst_num]->setCanCommit(); + } +} + +template +uint64_t +SimpleCommit::readCommitPC() +{ + return rob->readHeadPC(); +} + +#endif // __COMMIT_IMPL_HH__ diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh new file mode 100644 index 000000000..676334249 --- /dev/null +++ b/cpu/beta_cpu/cpu_policy.hh @@ -0,0 +1,32 @@ +#ifndef __CPU_POLICY_HH__ +#define __CPU_POLICY_HH__ + +#include "cpu/beta_cpu/fetch.hh" +#include "cpu/beta_cpu/decode.hh" +#include "cpu/beta_cpu/rename.hh" +#include "cpu/beta_cpu/iew.hh" +#include "cpu/beta_cpu/commit.hh" + +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/regfile.hh" +#include "cpu/beta_cpu/free_list.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" + +template +struct SimpleCPUPolicy +{ + typedef PhysRegFile RegFile; + typedef SimpleFreeList FreeList; + typedef SimpleRenameMap RenameMap; + typedef ROB ROB; + typedef InstructionQueue IQ; + + typedef SimpleFetch Fetch; + typedef SimpleDecode Decode; + typedef SimpleRename Rename; + typedef SimpleIEW IEW; + typedef SimpleCommit Commit; +}; + +#endif //__CPU_POLICY_HH__ diff --git a/cpu/beta_cpu/decode.cc b/cpu/beta_cpu/decode.cc new file mode 100644 index 000000000..ffabcf18a --- /dev/null +++ b/cpu/beta_cpu/decode.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/decode_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleDecode; diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh new file mode 100644 index 000000000..c41955dcb --- /dev/null +++ b/cpu/beta_cpu/decode.hh @@ -0,0 +1,129 @@ +// Todo: +// Add a couple of the branch fields to DynInst. Figure out where DynInst +// should try to compute the target of a PC-relative branch. Try to avoid +// having so many returns within the code. +// Fix up squashing too, as it's too +// dependent upon the iew stage continually telling it to squash. + +#ifndef __SIMPLE_DECODE_HH__ +#define __SIMPLE_DECODE_HH__ + +#include + +//Will want to include: time buffer, structs, +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" + +using namespace std; + +template +class SimpleDecode +{ + private: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename Impl::FetchStruct FetchStruct; + typedef typename Impl::DecodeStruct DecodeStruct; + typedef typename Impl::TimeStruct TimeStruct; + + // Typedefs from the ISA. + typedef typename ISA::Addr Addr; + + public: + // The only time decode will become blocked is if dispatch becomes + // blocked, which means IQ or ROB is probably full. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking + }; + + private: + // May eventually need statuses on a per thread basis. + Status _status; + + public: + SimpleDecode(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setDecodeQueue(TimeBuffer *dq_ptr); + + void setFetchQueue(TimeBuffer *fq_ptr); + + void tick(); + + void decode(); + + // Might want to make squash a friend function. + void squash(); + + private: + void block(); + + inline void unblock(); + + void squash(DynInst *inst); + + // Interfaces to objects outside of decode. + /** CPU interface. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get rename's output from backwards time buffer. */ + typename TimeBuffer::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + // Might not be the best name as not only fetch will read it. + typename TimeBuffer::wire toFetch; + + /** Decode instruction queue. */ + TimeBuffer *decodeQueue; + + /** Wire used to write any information heading to rename. */ + typename TimeBuffer::wire toRename; + + /** Fetch instruction queue interface. */ + TimeBuffer *fetchQueue; + + /** Wire to get fetch's output from fetch queue. */ + typename TimeBuffer::wire fromFetch; + + /** Skid buffer between fetch and decode. */ + queue skidBuffer; + + private: + //Consider making these unsigned to avoid any confusion. + /** Rename to decode delay, in ticks. */ + unsigned renameToDecodeDelay; + + /** IEW to decode delay, in ticks. */ + unsigned iewToDecodeDelay; + + /** Commit to decode delay, in ticks. */ + unsigned commitToDecodeDelay; + + /** Fetch to decode delay, in ticks. */ + unsigned fetchToDecodeDelay; + + /** The width of decode, in instructions. */ + unsigned decodeWidth; +}; + +#endif // __SIMPLE_DECODE_HH__ diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh new file mode 100644 index 000000000..ecf19b8ea --- /dev/null +++ b/cpu/beta_cpu/decode_impl.hh @@ -0,0 +1,325 @@ +#ifndef __SIMPLE_DECODE_CC__ +#define __SIMPLE_DECODE_CC__ + +#include "cpu/beta_cpu/decode.hh" + +template +SimpleDecode::SimpleDecode(Params ¶ms) + : renameToDecodeDelay(params.renameToDecodeDelay), + iewToDecodeDelay(params.iewToDecodeDelay), + commitToDecodeDelay(params.commitToDecodeDelay), + fetchToDecodeDelay(params.fetchToDecodeDelay), + decodeWidth(params.decodeWidth) +{ + DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth); + _status = Idle; +} + +template +void +SimpleDecode::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Decode, "Decode: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template +void +SimpleDecode::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(Decode, "Decode: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to write information back to fetch. + toFetch = timeBuffer->getWire(0); + + // Create wires to get information from proper places in time buffer. + fromRename = timeBuffer->getWire(-renameToDecodeDelay); + fromIEW = timeBuffer->getWire(-iewToDecodeDelay); + fromCommit = timeBuffer->getWire(-commitToDecodeDelay); +} + +template +void +SimpleDecode::setDecodeQueue(TimeBuffer *dq_ptr) +{ + DPRINTF(Decode, "Decode: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to write information to proper place in decode queue. + toRename = decodeQueue->getWire(0); +} + +template +void +SimpleDecode::setFetchQueue(TimeBuffer *fq_ptr) +{ + DPRINTF(Decode, "Decode: Setting fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Setup wire to read information from fetch queue. + fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); +} + +template +void +SimpleDecode::block() +{ + DPRINTF(Decode, "Decode: Blocking.\n"); + + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template +inline void +SimpleDecode::unblock() +{ + DPRINTF(Decode, "Decode: Unblocking, going to remove " + "instructions from skid buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Finished unblocking.\n"); + _status = Running; + } +} + +// This squash is specifically for when Decode detects a PC-relative branch +// was predicted incorrectly. +template +void +SimpleDecode::squash(DynInst *inst) +{ + DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " + "detected at decode.\n"); + Addr new_PC = inst->nextPC; + + toFetch->decodeInfo.predIncorrect = true; + toFetch->decodeInfo.squash = true; + toFetch->decodeInfo.nextPC = new_PC; + + // Set status to squashing. + _status = Squashing; + + // Maybe advance the time buffer? Not sure what to do in the normal + // case. + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } +} + +template +void +SimpleDecode::squash() +{ + DPRINTF(Decode, "Decode: Squashing.\n"); + // Set status to squashing. + _status = Squashing; + + // Maybe advance the time buffer? Not sure what to do in the normal + // case. + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } +} + +template +void +SimpleDecode::tick() +{ + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + if (_status != Blocked && _status != Squashing) { + DPRINTF(Decode, "Decode: Not blocked, so attempting to run " + "stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + decode(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + unblock(); + } + } else if (_status == Blocked) { + if (fromFetch->insts[0] != NULL) { + block(); + } + + if (!fromRename->renameInfo.stall && + !fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall) { + DPRINTF(Decode, "Decode: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this + // stage is done unblocking. + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Still blocked.\n"); + toFetch->decodeInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + } else if (_status == Squashing) { + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + _status = Running; + } else if (fromCommit->commitInfo.squash) { + squash(); + } + } +} + +template +void +SimpleDecode::decode() +{ + // Check time buffer if being told to squash. + if (/* fromRename->renameInfo.squash || */ + /* fromIEW->iewInfo.squash || */ + fromCommit->commitInfo.squash) { + squash(); + return; + } + + // Check time buffer if being told to stall. + if (fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + block(); + return; + } + + // Check fetch queue to see if instructions are available. + // If no available instructions, do nothing, unless this stage is + // currently unblocking. + if (fromFetch->insts[0] == NULL && _status != Unblocking) { + DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); + // Should I change the status to idle? + return; + } + + DynInst *inst; + // Instead have a class member variable that records which instruction + // was the last one that was ended on. At the tick() stage, it can + // check if that's equal to 0. If not, then don't pop stuff off. + unsigned num_inst = 0; + bool insts_available = _status == Unblocking ? + skidBuffer.front().insts[num_inst] != NULL : + fromFetch->insts[num_inst] != NULL; + + // Debug block... +#if 0 + if (insts_available) { + DPRINTF(Decode, "Decode: Instructions available.\n"); + } else { + if (_status == Unblocking && skidBuffer.empty()) { + DPRINTF(Decode, "Decode: No instructions available, skid buffer " + "empty.\n"); + } else if (_status != Unblocking && + fromFetch->insts[0] == NULL) { + DPRINTF(Decode, "Decode: No instructions available, fetch queue " + "empty.\n"); + } else { + panic("Decode: No instructions available, unexpected condition!" + "\n"); + } + } +#endif + + // Check to make sure that instructions coming from fetch are valid. + // Normally at this stage the branch target of PC-relative branches + // should be computed here. However in this simple model all + // computation will take place at execute. Hence doneTargCalc() + // will always be false. + while (num_inst < decodeWidth && + insts_available) + { + DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); + // Might create some sort of accessor to get an instruction + // on a per thread basis. Or might be faster to just get + // a pointer to an array or list of instructions and use that + // within this code. + inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] : + fromFetch->insts[num_inst]; + DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n", + inst, inst->readPC()); + + // This current instruction is valid, so add it into the decode + // queue. The next instruction may not be valid, so check to + // see if branches were predicted correctly. + toRename->insts[num_inst] = inst; + + // Ensure that if it was predicted as a branch, it really is a + // branch. This case should never happen in this model. + if (inst->predTaken() && !inst->isControl()) { + panic("Instruction predicted as a branch!"); + + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + + // Ensure that the predicted branch target is the actual branch + // target if possible (branches that are PC relative). + if (inst->isControl() && inst->doneTargCalc()) { + if (inst->mispredicted()) { + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + } + + // Also check if instructions have no source registers. Mark + // them as ready to issue at any time. Not sure if this check + // should exist here or at a later stage; however it doesn't matter + // too much for function correctness. + if (inst->numSrcRegs() == 0) { + inst->setCanIssue(); + } + + // Increment which instruction we're looking at. + ++num_inst; + + // Check whether or not there are instructions available. + // Either need to check within the skid buffer, or the fetch + // queue, depending if this stage is unblocking or not. + insts_available = _status == Unblocking ? + skidBuffer.front().insts[num_inst] == NULL : + fromFetch->insts[num_inst] == NULL; + } +} + +#endif // __SIMPLE_DECODE_CC__ diff --git a/cpu/beta_cpu/fetch.cc b/cpu/beta_cpu/fetch.cc new file mode 100644 index 000000000..4d08754b6 --- /dev/null +++ b/cpu/beta_cpu/fetch.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/fetch_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleFetch; diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh new file mode 100644 index 000000000..5717c65ac --- /dev/null +++ b/cpu/beta_cpu/fetch.hh @@ -0,0 +1,160 @@ +// Todo: add in statistics, only get the MachInst and let decode actually +// decode, think about SMT fetch, +// fix up branch prediction stuff into one thing, +// Figure out where to advance time buffer. Add a way to get a +// stage's current status. + +#ifndef __SIMPLE_FETCH_HH__ +#define __SIMPLE_FETCH_HH__ + +//Will want to include: time buffer, structs, MemInterface, Event, +//whatever class bzero uses, MemReqPtr + +#include "base/timebuf.hh" +#include "sim/eventq.hh" +#include "cpu/pc_event.hh" +#include "cpu/beta_cpu/comm.hh" +#include "mem/mem_interface.hh" + +using namespace std; + +/** + * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch + * will stall if there's an Icache miss, but otherwise assumes a one cycle + * Icache hit. This will be replaced with a more fleshed out class in the + * future. + */ + +template +class SimpleFetch +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::ISA ISA; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename Impl::FetchStruct FetchStruct; + typedef typename Impl::TimeStruct TimeStruct; + + /** Typedefs from ISA. */ + typedef typename ISA::MachInst MachInst; + + public: + enum Status { + Running, + Idle, + Squashing, + Blocked, + IcacheMissStall, + IcacheMissComplete + }; + + // May eventually need statuses on a per thread basis. + Status _status; + + bool stalled; + + public: + /** SimpleFetch constructor. */ + SimpleFetch(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *time_buffer); + + void setFetchQueue(TimeBuffer *fq_ptr); + + void tick(); + + void fetch(); + + void processCacheCompletion(); + +// private: + // Figure out PC vs next PC and how it should be updated + void squash(Addr newPC); + + public: + class CacheCompletionEvent : public Event + { + private: + SimpleFetch *fetch; + + public: + CacheCompletionEvent(SimpleFetch *_fetch); + + virtual void process(); + virtual const char *description(); + }; + + CacheCompletionEvent cacheCompletionEvent; + + private: + /** Pointer to the FullCPU. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get decode's information from backwards time buffer. */ + typename TimeBuffer::wire fromDecode; + + /** Wire to get rename's information from backwards time buffer. */ + typename TimeBuffer::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + // Will probably have this sit in the FullCPU and just pass a pointr in. + // Simplifies the constructors of all stages. + /** Internal fetch instruction queue. */ + TimeBuffer *fetchQueue; + + //Might be annoying how this name is different than the queue. + /** Wire used to write any information heading to decode. */ + typename TimeBuffer::wire toDecode; + + /** Icache interface. */ + MemInterface *icacheInterface; + + /** Memory request used to access cache. */ + MemReqPtr memReq; + + /** Decode to fetch delay, in ticks. */ + unsigned decodeToFetchDelay; + + /** Rename to fetch delay, in ticks. */ + unsigned renameToFetchDelay; + + /** IEW to fetch delay, in ticks. */ + unsigned iewToFetchDelay; + + /** Commit to fetch delay, in ticks. */ + unsigned commitToFetchDelay; + + /** The width of fetch in instructions. */ + unsigned fetchWidth; + + /** Cache block size. */ + int blkSize; + + /** Mask to get a cache block's address. */ + Addr cacheBlockMask; + + /** The instruction being fetched. */ + MachInst inst; + + /** Size of instructions. */ + int instSize; + + /** Icache stall statistics. */ +// Stats::Scalar<> icacheStallCycles; +// Counter lastIcacheStall; +}; + +#endif //__SIMPLE_FETCH_HH__ diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh new file mode 100644 index 000000000..918d2dad2 --- /dev/null +++ b/cpu/beta_cpu/fetch_impl.hh @@ -0,0 +1,555 @@ +// Todo: Rewrite this. Add in branch prediction. Fix up if squashing comes +// from decode; only the correct instructions should be killed. This will +// probably require changing the CPU's instList functions to take a seqNum +// instead of a dyninst. With probe path, should be able to specify +// size of data to fetch. Will be able to get full cache line. + +// Remove this later. +#define OPCODE(X) (X >> 26) & 0x3f + +#include "cpu/exetrace.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "cpu/beta_cpu/fetch.hh" + +#include "sim/universe.hh" + +template +SimpleFetch::CacheCompletionEvent +::CacheCompletionEvent(SimpleFetch *_fetch) + : Event(&mainEventQueue), + fetch(_fetch) +{ +} + +template +void +SimpleFetch::CacheCompletionEvent::process() +{ + fetch->processCacheCompletion(); +} + +template +const char * +SimpleFetch::CacheCompletionEvent::description() +{ + return "SimpleFetch cache completion event"; +} + +template +SimpleFetch::SimpleFetch(Params ¶ms) + : cacheCompletionEvent(this), + icacheInterface(params.icacheInterface), + decodeToFetchDelay(params.decodeToFetchDelay), + renameToFetchDelay(params.renameToFetchDelay), + iewToFetchDelay(params.iewToFetchDelay), + commitToFetchDelay(params.commitToFetchDelay), + fetchWidth(params.fetchWidth), + inst(0) +{ + // Set status to idle. + _status = Idle; + + // Create a new memory request. + memReq = new MemReq(); + // Not sure of this parameter. I think it should be based on the + // thread number. +#ifndef FULL_SYSTEM + memReq->asid = params.asid; +#else + memReq->asid = 0; +#endif // FULL_SYSTEM + memReq->data = new uint8_t[64]; + + // Size of cache block. + blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + // Create mask to get rid of offset bits. + cacheBlockMask = ~((int)log2(blkSize) - 1); + + // Get the size of an instruction. + instSize = sizeof(MachInst); +} + +template +void +SimpleFetch::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n"); + cpu = cpu_ptr; + // This line will be removed eventually. + memReq->xc = cpu->xcBase(); +} + +template +void +SimpleFetch::setTimeBuffer(TimeBuffer *time_buffer) +{ + DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n"); + timeBuffer = time_buffer; + + // Create wires to get information from proper places in time buffer. + fromDecode = timeBuffer->getWire(-decodeToFetchDelay); + fromRename = timeBuffer->getWire(-renameToFetchDelay); + fromIEW = timeBuffer->getWire(-iewToFetchDelay); + fromCommit = timeBuffer->getWire(-commitToFetchDelay); +} + +template +void +SimpleFetch::setFetchQueue(TimeBuffer *fq_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Create wire to write information to proper place in fetch queue. + toDecode = fetchQueue->getWire(0); +} + +template +void +SimpleFetch::processCacheCompletion() +{ + DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n"); + + // Only change the status if it's still waiting on the icache access + // to return. + // Can keep track of how many cache accesses go unused due to + // misspeculation here. + // How to handle an outstanding miss which gets cancelled due to squash, + // then a new icache miss gets scheduled? + if (_status == IcacheMissStall) + _status = IcacheMissComplete; +} + +// Note that in the SimpleFetch<>, will most likely have to provide the +// template parameters to BP and BTB. +template +void +SimpleFetch::squash(Addr new_PC) +{ + DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); + cpu->setNextPC(new_PC + instSize); + cpu->setPC(new_PC); + + _status = Squashing; + + // Clear out the instructions that are no longer valid. + // Actually maybe slightly unrealistic to kill instructions that are + // in flight like that between stages. Perhaps just have next + // stage ignore those instructions or something. In the cycle where it's + // returning from squashing, the other stages can just ignore the inputs + // for that cycle. + + // Tell the CPU to remove any instructions that aren't currently + // in the ROB (instructions in flight that were killed). + cpu->removeInstsNotInROB(); +} + +template +void +SimpleFetch::tick() +{ +#if 0 + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + + // In any case, squash. + squash(fromCommit->commitInfo.nextPC); + return; + } + + if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + + // Squash unless we're already squashing? + squash(fromDecode->decodeInfo.nextPC); + return; + } + + if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); + + // Continue to squash. + _status = Squashing; + return; + } + + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + // What to do if we're already in an icache stall? + } +#endif + + if (_status != Blocked && + _status != Squashing && + _status != IcacheMissStall) { + DPRINTF(Fetch, "Fetch: Running stage.\n"); + + fetch(); + } else if (_status == Blocked) { + // If still being told to stall, do nothing. + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + } else { + + DPRINTF(Fetch, "Fetch: Done blocking.\n"); + _status = Running; + } + + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + squash(fromCommit->commitInfo.nextPC); + return; + } else if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + squash(fromDecode->decodeInfo.nextPC); + return; + } else if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); + _status = Squashing; + return; + } + } else if (_status == Squashing) { + // If there are no squash signals then change back to running. + // Note that when a squash starts happening, commitInfo.squash will + // be high. But if the squash is still in progress, then only + // commitInfo.robSquashing will be high. + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + + DPRINTF(Fetch, "Fetch: Done squashing.\n"); + _status = Running; + } else if (fromCommit->commitInfo.squash) { + // If there's a new squash, then start squashing again. + squash(fromCommit->commitInfo.nextPC); + } else { + // Purely a debugging statement. + DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); + } + } + +} + +template +void +SimpleFetch::fetch() +{ + ////////////////////////////////////////// + // Check backwards communication + ////////////////////////////////////////// + + // If branch prediction is incorrect, squash any instructions, + // update PC, and do not fetch anything this cycle. + + // Might want to put all the PC changing stuff in one area. + // Normally should also check here to see if there is branch + // misprediction info to update with. + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + squash(fromCommit->commitInfo.nextPC); + return; + } else if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + squash(fromDecode->decodeInfo.nextPC); + return; + } else if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); + _status = Squashing; + return; + } + + // If being told to stall, do nothing. + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + _status = Blocked; + return; + } + + ////////////////////////////////////////// + // Start actual fetch + ////////////////////////////////////////// + + // If nothing else outstanding, attempt to read instructions. + +#ifdef FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + + // The current PC. + Addr PC = cpu->readPC(); + + // Fault code for memory access. + Fault fault = No_Fault; + + // If returning from the delay of a cache miss, then update the status + // to running, otherwise do the cache access. + if (_status == IcacheMissComplete) { + DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); + + // Reset the completion event to NULL. + memReq->completionEvent = NULL; + + _status = Running; + } else { + DPRINTF(Fetch, "Fetch: Attempting to translate and read " + "instruction, starting at PC %08p.\n", + PC); + + // Otherwise check if the instruction exists within the cache. + // If it does, then proceed on to read the instruction and the rest + // of the instructions in the cache line until either the end of the + // cache line or a predicted taken branch is encountered. + // Note that this simply checks if the first instruction exists + // within the cache, assuming the rest of the cache line also exists + // within the cache. + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq->cmd = Read; + memReq->reset(PC, instSize, flags); + + // Translate the instruction request. + // Should this function be + // in the CPU class ? Probably...ITB/DTB should exist within the + // CPU. + + fault = cpu->translateInstReq(memReq); + + // In the case of faults, the fetch stage may need to stall and wait + // on what caused the fetch (ITB or Icache miss). + + // If translation was successful, attempt to read the first + // instruction. + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); + fault = cpu->mem->read(memReq, inst); + // This read may change when the mem interface changes. + } + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); + memReq->completionEvent = NULL; + + memReq->time = curTick; + + MemAccessResult result = icacheInterface->access(memReq); + + // If the cache missed (in this model functional and timing + // memories are different), then schedule an event to wake + // up this stage once the cache miss completes. + if (result != MA_HIT && icacheInterface->doEvents()) { + memReq->completionEvent = &cacheCompletionEvent; +// lastIcacheStall = curTick; + + // How does current model work as far as individual + // stages scheduling/unscheduling? + // Perhaps have only the main CPU scheduled/unscheduled, + // and have it choose what stages to run appropriately. + + DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); + _status = IcacheMissStall; + return; + } + } + } + + // As far as timing goes, the CPU will need to send an event through + // the MemReq in order to be woken up once the memory access completes. + // Probably have a status on a per thread basis so each thread can + // block independently and be woken up independently. + + Addr next_PC = 0; + InstSeqNum inst_seq; + + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); + + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + bool predicted_branch = false; + + // Might want to keep track of various stats. +// numLinesFetched++; + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + // Because the first instruction was already fetched, create the + // DynInst and put it into the queue to decode. + DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq, + cpu); + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", + instruction, instruction->readPC()); + DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", + OPCODE(inst)); + + instruction->traceData = + Trace::getInstRecord(curTick, cpu->xcBase(), cpu, + instruction->staticInst, + instruction->readPC(), 0); + + cpu->addInst(instruction); + + // Write the instruction to the first slot in the queue + // that heads to decode. + toDecode->insts[0] = instruction; + + // Now update the PC to fetch the next instruction in the cache + // line. + PC = PC + instSize; + + // Obtain the index into the cache line by getting only the low + // order bits. + int line_index = PC & cacheBlockMask; + + // Take instructions and put them into the queue heading to decode. + // Then read the next instruction in the cache line. Continue + // until either all of the fetch bandwidth is used (not an issue for + // non-SMT), or the end of the cache line is reached. Note that + // this assumes standard cachelines, and not something like a trace + // cache where lines might not end at cache-line size aligned + // addresses. + // @todo: Fix the horrible amount of translates/reads that must + // take place due to reading an entire cacheline. Ideally it + // should all take place at once, return an array of binary + // instructions, which can then be used to get all the instructions + // needed. Figure out if I can roll it back into one loop. + for (int fetched = 1; + line_index < blkSize && fetched < fetchWidth; + line_index+=instSize, ++fetched) + { + // Reset the mem request to setup the read of the next + // instruction. + memReq->reset(PC, instSize, flags); + + // Translate the instruction request. + fault = cpu->translateInstReq(memReq); + + // Read instruction. + if (fault == No_Fault) { + fault = cpu->mem->read(memReq, inst); + } + + // Check if there was a fault. + if (fault != No_Fault) { + panic("Fetch: Read of instruction faulted when it should " + "succeed; most likely exceeding cache line.\n"); + } + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + // Create the actual DynInst. Parameters are: + // DynInst(instruction, PC, predicted PC, CPU pointer). + // Because this simple model has no branch prediction, the + // predicted PC will simply be PC+sizeof(MachInst). + // Update to actually use a branch predictor to predict the + // target in the future. + DynInst *instruction = new DynInst(inst, PC, PC+instSize, + inst_seq, cpu); + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", + instruction, instruction->readPC()); + DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", + OPCODE(inst)); + + cpu->addInst(instruction); + + // Write the instruction to the proper slot in the queue + // that heads to decode. + toDecode->insts[fetched] = instruction; + + // Might want to keep track of various stats. +// numInstsFetched++; + + // Now update the PC to fetch the next instruction in the cache + // line. + PC = PC + instSize; + } + + // If no branches predicted taken, then increment PC with + // fall-through path. This simple model always predicts not + // taken. + if (!predicted_branch) { + next_PC = PC; + } + } + + // Now that fetching is completed, update the PC to signify what the next + // cycle will be. Might want to move this to the beginning of this + // function so that the PC updates at the beginning of everything. + // Or might want to leave setting the PC to the main CPU, with fetch + // only changing the nextPC (will require correct determination of + // next PC). + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC); + cpu->setPC(next_PC); + cpu->setNextPC(next_PC + instSize); + } else { + // Handle the fault. + // This stage will not be able to continue until all the ROB + // slots are empty, at which point the fault can be handled. + // The only other way it can wake up is if a squash comes along + // and changes the PC. Not sure how to handle that case...perhaps + // have it handled by the upper level CPU class which peeks into the + // time buffer and sees if a squash comes along, in which case it + // changes the status. + + DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n"); + + _status = Blocked; +#ifdef FULL_SYSTEM + // Trap will probably need a pointer to the CPU to do accessing. + // Or an exec context. --Write ProxyExecContext eventually. + // Avoid using this for now as the xc really shouldn't be in here. + cpu->trap(fault); +#else // !FULL_SYSTEM + fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); +#endif // FULL_SYSTEM + } +} diff --git a/cpu/beta_cpu/free_list.cc b/cpu/beta_cpu/free_list.cc new file mode 100644 index 000000000..006bf4bf7 --- /dev/null +++ b/cpu/beta_cpu/free_list.cc @@ -0,0 +1,33 @@ +#include "cpu/beta_cpu/free_list.hh" + +SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs) +{ + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. + for (PhysRegIndex i = numLogicalIntRegs; + i < numPhysicalIntRegs; ++i) + { + freeIntRegs.push(i); + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. Because the + // float registers' indices start where the physical registers end, + // some math must be done to determine where the free registers start. + for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs; + i < numPhysicalRegs; ++i) + { + cprintf("Free List: Adding register %i to float list.\n", i); + freeFloatRegs.push(i); + } +} + diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh new file mode 100644 index 000000000..8521ad94c --- /dev/null +++ b/cpu/beta_cpu/free_list.hh @@ -0,0 +1,148 @@ +#ifndef __FREE_LIST_HH__ +#define __FREE_LIST_HH__ + +#include +#include + +#include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/comm.hh" +#include "base/trace.hh" + +using namespace std; + +// Question: Do I even need the number of logical registers? +// How to avoid freeing registers instantly? Same with ROB entries. + +/** + * FreeList class that simply holds the list of free integer and floating + * point registers. Can request for a free register of either type, and + * also send back free registers of either type. This is a very simple + * class, but it should be sufficient for most implementations. Like all + * other classes, it assumes that the indices for the floating point + * registers starts after the integer registers end. Hence the variable + * numPhysicalIntRegs is logically equivalent to the baseFP dependency. + * Note that + * while this most likely should be called FreeList, the name "FreeList" + * is used in a typedef within the CPU Policy, and therefore no class + * can be named simply "FreeList". + * @todo: Give a better name to the base FP dependency. + */ +class SimpleFreeList +{ + public: + + private: + /** The list of free integer registers. */ + queue freeIntRegs; + + /** The list of free floating point registers. */ + queue freeFloatRegs; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Total number of physical registers. */ + int numPhysicalRegs; + + public: + SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs); + + PhysRegIndex getIntReg(); + + PhysRegIndex getFloatReg(); + + void addReg(PhysRegIndex freed_reg); + + void addIntReg(PhysRegIndex freed_reg); + + void addFloatReg(PhysRegIndex freed_reg); + + bool hasFreeIntRegs() + { return !freeIntRegs.empty(); } + + bool hasFreeFloatRegs() + { return !freeFloatRegs.empty(); } + + int numFreeIntRegs() + { return freeIntRegs.size(); } + + int numFreeFloatRegs() + { return freeFloatRegs.size(); } +}; + +inline PhysRegIndex +SimpleFreeList::getIntReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free integer register.\n"); + if (freeIntRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeIntRegs.front(); + + freeIntRegs.pop(); + + return(free_reg); +} + +inline PhysRegIndex +SimpleFreeList::getFloatReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free float register.\n"); + if (freeFloatRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeFloatRegs.front(); + + freeFloatRegs.pop(); + + return(free_reg); +} + +inline void +SimpleFreeList::addReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing register %i.\n", freed_reg); + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + if (freed_reg < numPhysicalIntRegs) { + freeIntRegs.push(freed_reg); + } else if (freed_reg < numPhysicalRegs) { + freeFloatRegs.push(freed_reg); + } +} + +inline void +SimpleFreeList::addIntReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg); + + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + freeIntRegs.push(freed_reg); +} + +inline void +SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg); + + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + freeFloatRegs.push(freed_reg); +} + +#endif // __FREE_LIST_HH__ diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc new file mode 100644 index 000000000..6fbf5d69a --- /dev/null +++ b/cpu/beta_cpu/full_cpu.cc @@ -0,0 +1,503 @@ +#ifndef __SIMPLE_FULL_CPU_CC__ +#define __SIMPLE_FULL_CPU_CC__ + +#ifdef FULL_SYSTEM +#include "sim/system.hh" +#else +#include "sim/process.hh" +#endif +#include "sim/universe.hh" + +#include "cpu/exec_context.hh" +#include "cpu/beta_cpu/full_cpu.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +using namespace std; + +#ifdef FULL_SYSTEM +BaseFullCPU::BaseFullCPU(const std::string &_name, + int number_of_threads, + Counter max_insts_any_thread, + Counter max_insts_all_threads, + Counter max_loads_any_thread, + Counter max_loads_all_threads, + System *_system, Tick freq) + : BaseCPU(_name, number_of_threads, + max_insts_any_thread, max_insts_all_threads, + max_loads_any_thread, max_loads_all_threads, + _system, freq) +{ +} +#else +BaseFullCPU::BaseFullCPU(const std::string &_name, + int number_of_threads, + Counter max_insts_any_thread, + Counter max_insts_all_threads, + Counter max_loads_any_thread, + Counter max_loads_all_threads) + : BaseCPU(_name, number_of_threads, + max_insts_any_thread, max_insts_all_threads, + max_loads_any_thread, max_loads_all_threads) +{ +} +#endif // FULL_SYSTEM + +template +FullBetaCPU::TickEvent::TickEvent(FullBetaCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +template +void +FullBetaCPU::TickEvent::process() +{ + cpu->tick(); +} + +template +const char * +FullBetaCPU::TickEvent::description() +{ + return "FullBetaCPU tick event"; +} + +//Call constructor to all the pipeline stages here +template +FullBetaCPU::FullBetaCPU(Params ¶ms) +#ifdef FULL_SYSTEM + : BaseFullCPU(params.name, /* number_of_threads */ 1, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads, + params.system, params.freq), +#else + : BaseFullCPU(params.name, /* number_of_threads */ 1, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads), +#endif // FULL_SYSTEM + tickEvent(this), + fetch(params), + decode(params), + rename(params), + iew(params), + commit(params), + + regFile(params.numPhysIntRegs, params.numPhysFloatRegs), + + freeList(Impl::ISA::NumIntRegs, params.numPhysIntRegs, + Impl::ISA::NumFloatRegs, params.numPhysFloatRegs), + + renameMap(Impl::ISA::NumIntRegs, params.numPhysIntRegs, + Impl::ISA::NumFloatRegs, params.numPhysFloatRegs, + Impl::ISA::NumMiscRegs, + Impl::ISA::ZeroReg, Impl::ISA::ZeroReg), + + rob(params.numROBEntries, params.squashWidth), + + // What to pass to these time buffers? + // For now just have these time buffers be pretty big. + timeBuffer(20, 20), + fetchQueue(20, 20), + decodeQueue(20, 20), + renameQueue(20, 20), + iewQueue(20, 20), + + xc(NULL), + + globalSeqNum(1), + +#ifdef FULL_SYSTEM + system(params.system), + memCtrl(system->memCtrl), + physmem(system->physmem), + itb(params.itb), + dtb(params.dtb), + mem(params.mem), +#else + process(params.process), + asid(params.asid), + mem(process->getMemory()), +#endif // FULL_SYSTEM + + icacheInterface(params.icacheInterface), + dcacheInterface(params.dcacheInterface), + deferRegistration(params.defReg), + numInsts(0), + funcExeInst(0) +{ + _status = Idle; +#ifdef FULL_SYSTEM + xc = new ExecContext(this, 0, system, itb, dtb, mem); + + // initialize CPU, including PC + TheISA::initCPU(&xc->regs); +#else + xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0); + DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x", + process->prog_entry, process); + + assert(process->getMemory() != NULL); + assert(mem != NULL); +#endif // !FULL_SYSTEM + execContexts.push_back(xc); + + // The stages also need their CPU pointer setup. However this must be + // done at the upper level CPU because they have pointers to the upper + // level CPU, and not this FullBetaCPU. + + // Give each of the stages the time buffer they will use. + fetch.setTimeBuffer(&timeBuffer); + decode.setTimeBuffer(&timeBuffer); + rename.setTimeBuffer(&timeBuffer); + iew.setTimeBuffer(&timeBuffer); + commit.setTimeBuffer(&timeBuffer); + + // Also setup each of the stages' queues. + fetch.setFetchQueue(&fetchQueue); + decode.setFetchQueue(&fetchQueue); + decode.setDecodeQueue(&decodeQueue); + rename.setDecodeQueue(&decodeQueue); + rename.setRenameQueue(&renameQueue); + iew.setRenameQueue(&renameQueue); + iew.setIEWQueue(&iewQueue); + commit.setIEWQueue(&iewQueue); + commit.setRenameQueue(&renameQueue); + + // Setup the rename map for whichever stages need it. + rename.setRenameMap(&renameMap); + iew.setRenameMap(&renameMap); + + // Setup the free list for whichever stages need it. + rename.setFreeList(&freeList); + renameMap.setFreeList(&freeList); + + // Setup the ROB for whichever stages need it. + commit.setROB(&rob); +} + +template +FullBetaCPU::~FullBetaCPU() +{ +} + +template +void +FullBetaCPU::tick() +{ + DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullBetaCPU.\n"); + + //Tick each of the stages if they're actually running. + //Will want to figure out a way to unschedule itself if they're all + //going to be idle for a long time. + fetch.tick(); + + decode.tick(); + + rename.tick(); + + iew.tick(); + + commit.tick(); + + // Now advance the time buffers, unless the stage is stalled. + timeBuffer.advance(); + + fetchQueue.advance(); + decodeQueue.advance(); + renameQueue.advance(); + iewQueue.advance(); + + if (_status == Running && !tickEvent.scheduled()) + tickEvent.schedule(curTick + 1); +} + +template +void +FullBetaCPU::init() +{ + if(!deferRegistration) + { + this->registerExecContexts(); + + // Need to do a copy of the xc->regs into the CPU's regfile so + // that it can start properly. + + // First loop through the integer registers. + for (int i = 0; i < Impl::ISA::NumIntRegs; ++i) + { + regFile.intRegFile[i] = xc->regs.intRegFile[i]; + } + + // Then loop through the floating point registers. + for (int i = 0; i < Impl::ISA::NumFloatRegs; ++i) + { + regFile.floatRegFile[i].d = xc->regs.floatRegFile.d[i]; + regFile.floatRegFile[i].q = xc->regs.floatRegFile.q[i]; + } + + // Then loop through the misc registers. + regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + + // Then finally set the PC and the next PC. + regFile.pc = xc->regs.pc; + regFile.npc = xc->regs.npc; + } +} + +template +void +FullBetaCPU::activateContext(int thread_num, int delay) +{ + // Needs to set each stage to running as well. + + scheduleTickEvent(delay); + + _status = Running; +} + +template +void +FullBetaCPU::suspendContext(int thread_num) +{ + panic("suspendContext unimplemented!"); +} + +template +void +FullBetaCPU::deallocateContext(int thread_num) +{ + panic("deallocateContext unimplemented!"); +} + +template +void +FullBetaCPU::haltContext(int thread_num) +{ + panic("haltContext unimplemented!"); +} + +template +void +FullBetaCPU::switchOut() +{ + panic("FullBetaCPU does not have a switch out function.\n"); +} + +template +void +FullBetaCPU::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + assert(!tickEvent.scheduled()); + + // Set all status's to active, schedule the + // CPU's tick event. + tickEvent.schedule(curTick); + for (int i = 0; i < execContexts.size(); ++i) { + execContexts[i]->activate(); + } + + // Switch out the other CPU. + oldCPU->switchOut(); +} + +template +InstSeqNum +FullBetaCPU::getAndIncrementInstSeq() +{ + // Hopefully this works right. + return globalSeqNum++; +} + +template +uint64_t +FullBetaCPU::readIntReg(int reg_idx) +{ + return regFile.readIntReg(reg_idx); +} + +template +float +FullBetaCPU::readFloatRegSingle(int reg_idx) +{ + return regFile.readFloatRegSingle(reg_idx); +} + +template +double +FullBetaCPU::readFloatRegDouble(int reg_idx) +{ + return regFile.readFloatRegDouble(reg_idx); +} + +template +uint64_t +FullBetaCPU::readFloatRegInt(int reg_idx) +{ + return regFile.readFloatRegInt(reg_idx); +} + +template +void +FullBetaCPU::setIntReg(int reg_idx, uint64_t val) +{ + regFile.setIntReg(reg_idx, val); +} + +template +void +FullBetaCPU::setFloatRegSingle(int reg_idx, float val) +{ + regFile.setFloatRegSingle(reg_idx, val); +} + +template +void +FullBetaCPU::setFloatRegDouble(int reg_idx, double val) +{ + regFile.setFloatRegDouble(reg_idx, val); +} + +template +void +FullBetaCPU::setFloatRegInt(int reg_idx, uint64_t val) +{ + regFile.setFloatRegInt(reg_idx, val); +} + +template +uint64_t +FullBetaCPU::readPC() +{ + return regFile.readPC(); +} + +template +void +FullBetaCPU::setNextPC(uint64_t val) +{ + regFile.setNextPC(val); +} + +template +void +FullBetaCPU::setPC(Addr new_PC) +{ + regFile.setPC(new_PC); +} + +template +void +FullBetaCPU::addInst(DynInst *inst) +{ + instList.push_back(inst); +} + +template +void +FullBetaCPU::instDone() +{ + // Keep an instruction count. + numInsts++; + + // Check for instruction-count-based events. + comInstEventQueue[0]->serviceEvents(numInsts); +} + +template +void +FullBetaCPU::removeBackInst(DynInst *inst) +{ + DynInst *inst_to_delete; + + // Walk through the instruction list, removing any instructions + // that were inserted after the given instruction, inst. + while (instList.back() != inst) + { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Deleting instruction %#x, PC %#x\n", + inst_to_delete, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.pop_back(); + + // Delete the instruction itself. + delete inst_to_delete; + } +} + +template +void +FullBetaCPU::removeFrontInst(DynInst *inst) +{ + DynInst *inst_to_delete; + + // The front instruction should be the same one being asked to be deleted. + assert(instList.front() == inst); + + // Remove the front instruction. + inst_to_delete = inst; + instList.pop_front(); + + DPRINTF(FullCPU, "FullCPU: Deleting committed instruction %#x, PC %#x\n", + inst_to_delete, inst_to_delete->readPC()); + + delete inst_to_delete; +} + +template +void +FullBetaCPU::removeInstsNotInROB() +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInst *rob_tail = rob.readTailInst(); + + removeBackInst(rob_tail); +} + +template +void +FullBetaCPU::removeAllInsts() +{ + instList.clear(); +} + +template +void +FullBetaCPU::dumpInsts() +{ + int num = 0; + typename list::iterator inst_list_it = instList.begin(); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\nInst:%#x\nPC:%#x\nSN:%lli\n\n", + num, (*inst_list_it), (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum); + inst_list_it++; + ++num; + } +} + +template +void +FullBetaCPU::wakeDependents(DynInst *inst) +{ + iew.wakeDependents(inst); +} + +// Forward declaration of FullBetaCPU. +template FullBetaCPU; + +#endif // __SIMPLE_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh new file mode 100644 index 000000000..00ff1f878 --- /dev/null +++ b/cpu/beta_cpu/full_cpu.hh @@ -0,0 +1,323 @@ +//Todo: Add in a lot of the functions that are ISA specific. Also define +//the functions that currently exist within the base cpu class. Define +//everything for the simobject stuff so it can be serialized and +//instantiated, add in debugging statements everywhere. Have CPU schedule +//itself properly. Constructor. Derived alpha class. Threads! +// Avoid running stages and advancing queues if idle/stalled. + +#ifndef __SIMPLE_FULL_CPU_HH__ +#define __SIMPLE_FULL_CPU_HH__ + +#include +#include + +#include "cpu/beta_cpu/comm.hh" + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/base_cpu.hh" +#include "cpu/beta_cpu/cpu_policy.hh" +#include "sim/process.hh" + +using namespace std; + +class FunctionalMemory; +class Process; + +class BaseFullCPU : public BaseCPU +{ + //Stuff that's pretty ISA independent will go here. + public: +#ifdef FULL_SYSTEM + BaseFullCPU(const std::string &_name, int _number_of_threads, + Counter max_insts_any_thread, Counter max_insts_all_threads, + Counter max_loads_any_thread, Counter max_loads_all_threads, + System *_system, Tick freq); +#else + BaseFullCPU(const std::string &_name, int _number_of_threads, + Counter max_insts_any_thread = 0, + Counter max_insts_all_threads = 0, + Counter max_loads_any_thread = 0, + Counter max_loads_all_threads = 0); +#endif // FULL_SYSTEM +}; + +template +class FullBetaCPU : public BaseFullCPU +{ + public: + //Put typedefs from the Impl here. + typedef typename Impl::CPUPol CPUPolicy; + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + + public: + enum Status { + Running, + Idle, + Halted, + Blocked // ? + }; + + Status _status; + + private: + class TickEvent : public Event + { + private: + FullBetaCPU *cpu; + + public: + TickEvent(FullBetaCPU *c); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + public: + void tick(); + + FullBetaCPU(Params ¶ms); + ~FullBetaCPU(); + + void init(); + + void activateContext(int thread_num, int delay); + void suspendContext(int thread_num); + void deallocateContext(int thread_num); + void haltContext(int thread_num); + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + + /** Get the current instruction sequence number, and increment it. */ + InstSeqNum getAndIncrementInstSeq(); + +#ifdef FULL_SYSTEM + /** Check if this address is a valid instruction address. */ + bool validInstAddr(Addr addr) { return true; } + + /** Check if this address is a valid data address. */ + bool validDataAddr(Addr addr) { return true; } + + /** Get instruction asid. */ + int getInstAsid() { return ITB_ASN_ASN(regs.ipr[ISA::IPR_ITB_ASN]); } + + /** Get data asid. */ + int getDataAsid() { return DTB_ASN_ASN(regs.ipr[ISA::IPR_DTB_ASN]); } +#else + bool validInstAddr(Addr addr) + { return process->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return process->validDataAddr(addr); } + + int getInstAsid() { return asid; } + int getDataAsid() { return asid; } + +#endif + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx); + + float readFloatRegSingle(int reg_idx); + + double readFloatRegDouble(int reg_idx); + + uint64_t readFloatRegInt(int reg_idx); + + void setIntReg(int reg_idx, uint64_t val); + + void setFloatRegSingle(int reg_idx, float val); + + void setFloatRegDouble(int reg_idx, double val); + + void setFloatRegInt(int reg_idx, uint64_t val); + + uint64_t readPC(); + + void setNextPC(uint64_t val); + + void setPC(Addr new_PC); + + /** Function to add instruction onto the head of the list of the + * instructions. Used when new instructions are fetched. + */ + void addInst(DynInst *inst); + + /** Function to tell the CPU that an instruction has completed. */ + void instDone(); + + /** Remove all instructions in back of the given instruction, but leave + * that instruction in the list. This is useful in a squash, when there + * are instructions in this list that don't exist in structures such as + * the ROB. The instruction doesn't have to be the last instruction in + * the list, but will be once this function completes. + * @todo: Remove only up until that inst? Squashed inst is most likely + * valid. + */ + void removeBackInst(DynInst *inst); + + /** Remove an instruction from the front of the list. It is expected + * that there are no instructions in front of it (that is, none are older + * than the instruction being removed). Used when retiring instructions. + * @todo: Remove the argument to this function, and just have it remove + * last instruction once it's verified that commit has the same ordering + * as the instruction list. + */ + void removeFrontInst(DynInst *inst); + + /** Remove all instructions that are not currently in the ROB. */ + void removeInstsNotInROB(); + + /** Remove all instructions from the list. */ + void removeAllInsts(); + + void dumpInsts(); + + /** Basically a wrapper function so that instructions executed at + * commit can tell the instruction queue that they have completed. + * Eventually this hack should be removed. + */ + void wakeDependents(DynInst *inst); + + public: + /** List of all the instructions in flight. */ + list instList; + + //not sure these should be private. + protected: + /** The fetch stage. */ + typename CPUPolicy::Fetch fetch; + + /** The fetch stage's status. */ + typename CPUPolicy::Fetch::Status fetchStatus; + + /** The decode stage. */ + typename CPUPolicy::Decode decode; + + /** The decode stage's status. */ + typename CPUPolicy::Decode::Status decodeStatus; + + /** The dispatch stage. */ + typename CPUPolicy::Rename rename; + + /** The dispatch stage's status. */ + typename CPUPolicy::Rename::Status renameStatus; + + /** The issue/execute/writeback stages. */ + typename CPUPolicy::IEW iew; + + /** The issue/execute/writeback stage's status. */ + typename CPUPolicy::IEW::Status iewStatus; + + /** The commit stage. */ + typename CPUPolicy::Commit commit; + + /** The fetch stage's status. */ + typename CPUPolicy::Commit::Status commitStatus; + + //Might want to just pass these objects in to the constructors of the + //appropriate stage. regFile is in iew, freeList in dispatch, renameMap + //in dispatch, and the rob in commit. + /** The register file. */ + typename CPUPolicy::RegFile regFile; + + /** The free list. */ + typename CPUPolicy::FreeList freeList; + + /** The rename map. */ + typename CPUPolicy::RenameMap renameMap; + + /** The re-order buffer. */ + typename CPUPolicy::ROB rob; + + public: + /** Typedefs from the Impl to get the structs that each of the + * time buffers should use. + */ + typedef typename Impl::TimeStruct TimeStruct; + + typedef typename Impl::FetchStruct FetchStruct; + + typedef typename Impl::DecodeStruct DecodeStruct; + + typedef typename Impl::RenameStruct RenameStruct; + + typedef typename Impl::IEWStruct IEWStruct; + + /** The main time buffer to do backwards communication. */ + TimeBuffer timeBuffer; + + /** The fetch stage's instruction queue. */ + TimeBuffer fetchQueue; + + /** The decode stage's instruction queue. */ + TimeBuffer decodeQueue; + + /** The rename stage's instruction queue. */ + TimeBuffer renameQueue; + + /** The IEW stage's instruction queue. */ + TimeBuffer iewQueue; + + public: + /** The temporary exec context to support older accessors. */ + ExecContext *xc; + + /** Temporary function to get pointer to exec context. */ + ExecContext *xcBase() { return xc; } + + InstSeqNum globalSeqNum; + +#ifdef FULL_SYSTEM + System *system; + + MemoryController *memCtrl; + PhysicalMemory *physmem; + + AlphaITB *itb; + AlphaDTB *dtb; + +// SWContext *swCtx; +#else + Process *process; + + // Address space ID. Note that this is used for TIMING cache + // simulation only; all functional memory accesses should use + // one of the FunctionalMemory pointers above. + short asid; +#endif + + FunctionalMemory *mem; + + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + bool deferRegistration; + + Counter numInsts; + + Counter funcExeInst; +}; + +#endif diff --git a/cpu/beta_cpu/iew.cc b/cpu/beta_cpu/iew.cc new file mode 100644 index 000000000..8abb2f196 --- /dev/null +++ b/cpu/beta_cpu/iew.cc @@ -0,0 +1,8 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/iew_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleIEW; diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh new file mode 100644 index 000000000..52b9ccdb0 --- /dev/null +++ b/cpu/beta_cpu/iew.hh @@ -0,0 +1,166 @@ +//Todo: Update with statuses. Create constructor. Fix up time buffer stuff. +//Will also need a signal heading back at least one stage to rename to say +//how many empty skid buffer entries there are. Perhaps further back even. +//Need to handle delaying writes to the writeback bus if it's full at the +//given time. Squash properly. Load store queue. + +#ifndef __SIMPLE_IEW_HH__ +#define __SIMPLE_IEW_HH__ + +// To include: time buffer, structs, queue, +#include + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" + +//Can IEW even stall? Space should be available/allocated already...maybe +//if there's not enough write ports on the ROB or waiting for CDB +//arbitration. +template +class SimpleIEW +{ + private: + //Typedefs from Impl + typedef typename Impl::ISA ISA; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename Impl::CPUPol::RenameMap RenameMap; + + typedef typename Impl::TimeStruct TimeStruct; + typedef typename Impl::IEWStruct IEWStruct; + typedef typename Impl::RenameStruct RenameStruct; + typedef typename Impl::IssueStruct IssueStruct; + + public: + enum Status { + Running, + Blocked, + Idle, + Squashing, + Unblocking + }; + + private: + Status _status; + Status _issueStatus; + Status _exeStatus; + Status _wbStatus; + + public: + void squash(); + + void squash(DynInst *inst); + + void block(); + + inline void unblock(); + + public: + SimpleIEW(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setRenameQueue(TimeBuffer *rq_ptr); + + void setIEWQueue(TimeBuffer *iq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void wakeDependents(DynInst *inst); + + void tick(); + + void iew(); + + private: + //Interfaces to objects inside and outside of IEW. + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toRename; + + /** Rename instruction queue interface. */ + TimeBuffer *renameQueue; + + /** Wire to get rename's output from rename queue. */ + typename TimeBuffer::wire fromRename; + + /** Issue stage queue. */ + TimeBuffer issueToExecQueue; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + /** + * IEW stage time buffer. Holds ROB indices of instructions that + * can be marked as completed. + */ + TimeBuffer *iewQueue; + + /** Wire to write infromation heading to commit. */ + typename TimeBuffer::wire toCommit; + + //Will need internal queue to hold onto instructions coming from + //the rename stage in case of a stall. + /** Skid buffer between rename and IEW. */ + queue skidBuffer; + + /** Instruction queue. */ + IQ instQueue; + + /** Pointer to rename map. Might not want this stage to directly + * access this though... + */ + RenameMap *renameMap; + + /** CPU interface. */ + FullCPU *cpu; + + private: + /** Commit to IEW delay, in ticks. */ + unsigned commitToIEWDelay; + + /** Rename to IEW delay, in ticks. */ + unsigned renameToIEWDelay; + + /** + * Issue to execute delay, in ticks. What this actually represents is + * the amount of time it takes for an instruction to wake up, be + * scheduled, and sent to a FU for execution. + */ + unsigned issueToExecuteDelay; + + /** Width of issue's read path, in instructions. The read path is both + * the skid buffer and the rename instruction queue. + * Note to self: is this really different than issueWidth? + */ + unsigned issueReadWidth; + + /** Width of issue, in instructions. */ + unsigned issueWidth; + + /** Width of execute, in instructions. Might make more sense to break + * down into FP vs int. + */ + unsigned executeWidth; + + /** Number of cycles stage has been squashing. Used so that the stage + * knows when it can start unblocking, which is when the previous stage + * has received the stall signal and clears up its outputs. + */ + unsigned cyclesSquashing; + + //Will implement later + //Load queue interface (probably one and the same) + //Store queue interface +}; + +#endif diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh new file mode 100644 index 000000000..b198220f5 --- /dev/null +++ b/cpu/beta_cpu/iew_impl.hh @@ -0,0 +1,443 @@ +// @todo: Fix the instantaneous communication among all the stages within +// iew. There's a clear delay between issue and execute, yet backwards +// communication happens simultaneously. Might not be that bad really... +// it might skew stats a bit though. Issue would otherwise try to issue +// instructions that would never be executed if there were a delay; without +// it issue will simply squash. Make this stage block properly. Make this +// stage delay after a squash properly. Update the statuses for each stage. +// Actually read instructions out of the skid buffer. + +#include + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/iew.hh" + +template +SimpleIEW::SimpleIEW(Params ¶ms) + : // Just make this time buffer really big for now + issueToExecQueue(20, 20), + instQueue(params), + commitToIEWDelay(params.commitToIEWDelay), + renameToIEWDelay(params.renameToIEWDelay), + issueToExecuteDelay(params.issueToExecuteDelay), + issueReadWidth(params.issueWidth), + issueWidth(params.issueWidth), + executeWidth(params.executeWidth) +{ + DPRINTF(IEW, "IEW: executeIntWidth: %i.\n", params.executeIntWidth); + _status = Idle; + _issueStatus = Idle; + _exeStatus = Idle; + _wbStatus = Idle; + + // Setup wire to read instructions coming from issue. + fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay); + + // Instruction queue needs the queue between issue and execute. + instQueue.setIssueToExecuteQueue(&issueToExecQueue); +} + +template +void +SimpleIEW::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(IEW, "IEW: Setting CPU pointer.\n"); + cpu = cpu_ptr; + + instQueue.setCPU(cpu_ptr); +} + +template +void +SimpleIEW::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(IEW, "IEW: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from commit. + fromCommit = timeBuffer->getWire(-commitToIEWDelay); + + // Setup wire to write information back to previous stages. + toRename = timeBuffer->getWire(0); + + // Instruction queue also needs main time buffer. + instQueue.setTimeBuffer(tb_ptr); +} + +template +void +SimpleIEW::setRenameQueue(TimeBuffer *rq_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to read information from rename queue. + fromRename = renameQueue->getWire(-renameToIEWDelay); +} + +template +void +SimpleIEW::setIEWQueue(TimeBuffer *iq_ptr) +{ + DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to write instructions to commit. + toCommit = iewQueue->getWire(0); +} + +template +void +SimpleIEW::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template +void +SimpleIEW::wakeDependents(DynInst *inst) +{ + instQueue.wakeDependents(inst); +} + +template +void +SimpleIEW::block() +{ + DPRINTF(IEW, "IEW: Blocking.\n"); + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromRename); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template +inline void +SimpleIEW::unblock() +{ + // Check if there's information in the skid buffer. If there is, then + // set status to unblocking, otherwise set it directly to running. + DPRINTF(IEW, "IEW: Reading instructions out of the skid " + "buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toRename->iewInfo.stall = true; + } else { + DPRINTF(IEW, "IEW: Stage is done unblocking.\n"); + _status = Running; + } +} + +template +void +SimpleIEW::squash() +{ + DPRINTF(IEW, "IEW: Squashing all instructions.\n"); + _status = Squashing; + + // Tell the IQ to start squashing. + instQueue.squash(); + + // Tell rename to squash through the time buffer. + // This communication may be redundant depending upon where squash() + // is called. +// toRename->iewInfo.squash = true; +} + +template +void +SimpleIEW::squash(DynInst *inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC:%#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toRename->iewInfo.squash = true; + // Also send PC update information back to prior stages. + toRename->iewInfo.squashedSeqNum = inst->seqNum; + toRename->iewInfo.nextPC = inst->readCalcTarg(); + toRename->iewInfo.predIncorrect = true; +} + +template +void +SimpleIEW::tick() +{ + // Considering putting all the state-determining stuff in this section. + + // Try to fill up issue queue with as many instructions as bandwidth + // allows. + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + + // Check if the stage is in a running status. + if (_status != Blocked && _status != Squashing) { + DPRINTF(IEW, "IEW: Status is not blocked, attempting to run " + "stage.\n"); + iew(); + + // If it's currently unblocking, check to see if it should switch + // to running. + if (_status == Unblocking) { + unblock(); + } + } else if (_status == Squashing) { + + DPRINTF(IEW, "IEW: Still squashing.\n"); + + // Check if stage should remain squashing. Stop squashing if the + // squash signal clears. + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + DPRINTF(IEW, "IEW: Done squashing, changing status to " + "running.\n"); + + _status = Running; + instQueue.stopSquash(); + } else { + instQueue.doSquash(); + } + + // Also should advance its own time buffers if the stage ran. + // Not sure about this... +// issueToExecQueue.advance(); + } else if (_status == Blocked) { + // Continue to tell previous stage to stall. + toRename->iewInfo.stall = true; + + // Check if possible stall conditions have cleared. + if (!fromCommit->commitInfo.stall && + !instQueue.isFull()) { + DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n"); + _status = Unblocking; + } + + // If there's still instructions coming from rename, continue to + // put them on the skid buffer. + if (fromRename->insts[0] != NULL) { + block(); + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + } + + // @todo: Maybe put these at the beginning, so if it's idle it can + // return early. + // Write back number of free IQ entries here. + toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); + + DPRINTF(IEW, "IEW: IQ has %i free entries.\n", + instQueue.numFreeEntries()); +} + +template +void +SimpleIEW::iew() +{ + // Might want to put all state checks in the tick() function. + // Check if being told to stall from commit. + if (fromCommit->commitInfo.stall) { + block(); + return; + } else if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + // Also check if commit is telling this stage to squash. + squash(); + return; + } + + //////////////////////////////////////// + //ISSUE stage + //////////////////////////////////////// + + //Put into its own function? + //Add instructions to IQ if there are any instructions there + + // Check if there are any instructions coming from rename, and we're. + // not squashing. + if (fromRename->insts[0] != NULL && _status != Squashing) { + + // Loop through the instructions, putting them in the instruction + // queue. + for (int inst_num = 0; inst_num < issueReadWidth; ++inst_num) + { + DynInst *inst = fromRename->insts[inst_num]; + + // Make sure there's a valid instruction there. + if (inst == NULL) + break; + + DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", + inst->readPC()); + + // If it's a memory reference, don't put it in the + // instruction queue. These will only be executed at commit. + // Do the same for nonspeculative instructions and nops. + // Be sure to mark these instructions as ready so that the + // commit stage can go ahead and execute them, and mark + // them as issued so the IQ doesn't reprocess them. + if (inst->isMemRef()) { + DPRINTF(IEW, "IEW: Issue: Memory instruction " + "encountered, skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + continue; + } else if (inst->isNonSpeculative()) { + DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " + "encountered, skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + continue; + } else if (inst->isNop()) { + DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " + ", skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + continue; + } else if (instQueue.isFull()) { + DPRINTF(IEW, "IEW: Issue: IQ has become full.\n"); + // Call function to start blocking. + block(); + // Tell previous stage to stall. + toRename->iewInfo.stall = true; + break; + } + + // If the instruction queue is not full, then add the + // instruction. + instQueue.insert(fromRename->insts[inst_num]); + } + } + + // Have the instruction queue try to schedule any ready instructions. + instQueue.scheduleReadyInsts(); + + //////////////////////////////////////// + //EXECUTE/WRITEBACK stage + //////////////////////////////////////// + + //Put into its own function? + //Similarly should probably have separate execution for int vs FP. + // Above comment is handled by the issue queue only issuing a valid + // mix of int/fp instructions. + //Actually okay to just have one execution, buuuuuut will need + //somewhere that defines the execution latency of all instructions. + // @todo: Move to the FU pool used in the current full cpu. + + int fu_usage = 0; + + // Execute/writeback any instructions that are available. + for (int inst_num = 0; + fu_usage < executeWidth && /* Haven't exceeded available FU's. */ + inst_num < issueWidth && /* Haven't exceeded issue width. */ + fromIssue->insts[inst_num]; /* There are available instructions. */ + ++inst_num) { + DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); + + // Get instruction from issue's queue. + DynInst *inst = fromIssue->insts[inst_num]; + + DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); + + inst->setExecuted(); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); + continue; + } + + // If an instruction is executed, then count it towards FU usage. + ++fu_usage; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + inst->execute(); + + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + + // Add finished instruction to queue to commit. + toCommit->insts[inst_num] = inst; + + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + if (inst->mispredicted()) { + DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squash(inst); + + // Not sure it really needs to break. +// break; + } + } + + // Loop through the head of the time buffer and wake any dependents. + // These instructions are about to write back. In the simple model + // this loop can really happen within the previous loop, but when + // instructions have actual latencies, this loop must be separate. + // Also mark scoreboard that this instruction is finally complete. + // Either have IEW have direct access to rename map, or have this as + // part of backwards communication. + for (int inst_num = 0; inst_num < executeWidth && + toCommit->insts[inst_num] != NULL; inst_num++) + { + DynInst *inst = toCommit->insts[inst_num]; + + DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n", + inst->readPC()); + + instQueue.wakeDependents(inst); + + for (int i = 0; i < inst->numDestRegs(); i++) + { + renameMap->markAsReady(inst->renamedDestRegIdx(i)); + } + } + + // Also should advance its own time buffers if the stage ran. + // Not the best place for it, but this works (hopefully). + issueToExecQueue.advance(); +} diff --git a/cpu/beta_cpu/inst_queue.cc b/cpu/beta_cpu/inst_queue.cc new file mode 100644 index 000000000..43b0a4572 --- /dev/null +++ b/cpu/beta_cpu/inst_queue.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template InstructionQueue; diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh new file mode 100644 index 000000000..5741bfcf5 --- /dev/null +++ b/cpu/beta_cpu/inst_queue.hh @@ -0,0 +1,243 @@ +#ifndef __INST_QUEUE_HH__ +#define __INST_QUEUE_HH__ + +#include +#include +#include + +#include "base/timebuf.hh" + +using namespace std; + +//Perhaps have a better separation between the data structure underlying +//and the actual algorithm. +//somewhat nasty to try to have a nice ordering. +// Consider moving to STL list or slist for the LL stuff. + +/** + * A standard instruction queue class. It holds instructions in an + * array, holds the ordering of the instructions within a linked list, + * and tracks producer/consumer dependencies within a separate linked + * list. Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and + * physical register indices. + */ +template +class InstructionQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::Params Params; + + typedef typename Impl::IssueStruct IssueStruct; + typedef typename Impl::TimeStruct TimeStruct; + + // Typedef of iterator through the list of instructions. Might be + // better to untie this from the FullCPU or pass its information to + // the stages. + typedef typename list::iterator ListIt; + + /** + * Class for priority queue entries. Mainly made so that the < operator + * is defined. + */ + struct ReadyEntry { + DynInst *inst; + + ReadyEntry(DynInst *_inst) + : inst(_inst) + { } + + /** Compare(lhs,rhs) checks if rhs is "bigger" than lhs. If so, rhs + * goes higher on the priority queue. The oldest instruction should + * be on the top of the instruction queue, so in this case "bigger" + * has the reverse meaning; the instruction with the lowest + * sequence number is on the top. + */ + bool operator <(const ReadyEntry &rhs) const + { + if (this->inst->seqNum > rhs.inst->seqNum) + return true; + return false; + } + }; + + InstructionQueue(Params ¶ms); + + void setCPU(FullCPU *cpu); + + void setIssueToExecuteQueue(TimeBuffer *i2eQueue); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + unsigned numFreeEntries(); + + bool isFull(); + + void insert(DynInst *new_inst); + + void advanceTail(DynInst *inst); + + void scheduleReadyInsts(); + + void wakeDependents(DynInst *completed_inst); + + void doSquash(); + + void squash(); + + void stopSquash(); + + private: + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer *issueToExecuteQueue; + + /** The backwards time buffer. */ + TimeBuffer *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer::wire fromCommit; + + enum InstList { + Int, + Float, + Branch, + Squashed, + None + }; + + /** List of ready int instructions. Used to keep track of the order in + * which */ + priority_queue readyIntInsts; + + /** List of ready floating point instructions. */ + priority_queue readyFloatInsts; + + /** List of ready branch instructions. */ + priority_queue readyBranchInsts; + + /** List of squashed instructions (which are still valid and in IQ). + * Implemented using a priority queue; the entries must contain both + * the IQ index and sequence number of each instruction so that + * ordering based on sequence numbers can be used. + */ + priority_queue squashedInsts; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The number of integer instructions that can be issued in one + * cycle. + */ + unsigned intWidth; + + /** The number of floating point instructions that can be issued + * in one cycle. + */ + unsigned floatWidth; + + /** The number of branches that can be issued in one cycle. */ + unsigned branchWidth; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; + + //The number of physical registers in the CPU. + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; + + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + + ////////////////////////////////// + // Variables needed for squashing + ////////////////////////////////// + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Iterator that points to the oldest instruction in the IQ. */ + ListIt head; + + /** Iterator that points to the youngest instruction in the IQ. */ + ListIt tail; + + /** Iterator that points to the last instruction that has been squashed. + * This will not be valid unless the IQ is in the process of squashing. + */ + ListIt squashIt; + + /////////////////////////////////// + // Dependency graph stuff + /////////////////////////////////// + + class DependencyEntry + { + public: + DynInst *inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; + + //This function, and perhaps this whole class, stand out a little + //bit as they don't fit a classification well. I want access + //to the underlying structure of the linked list, yet at + //the same time it feels like this should be something abstracted + //away. So for now it will sit here, within the IQ, until + //a better implementation is decided upon. + // This function probably shouldn't be within the entry... + void insert(DynInst *new_inst); + + void remove(DynInst *inst_to_remove); + }; + + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DependencyEntry *dependGraph; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ + vector regScoreboard; + + bool addToDependents(DynInst *new_inst); + void insertDependency(DynInst *new_inst); + void createDependency(DynInst *new_inst); + + void addIfReady(DynInst *inst); +}; + +#endif //__INST_QUEUE_HH__ diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh new file mode 100644 index 000000000..6f1f06858 --- /dev/null +++ b/cpu/beta_cpu/inst_queue_impl.hh @@ -0,0 +1,684 @@ +#ifndef __INST_QUEUE_IMPL_HH__ +#define __INST_QUEUE_IMPL_HH__ + +// Todo: Fix up consistency errors about back of the ready list being +// the oldest instructions in the queue. When woken up from the dependency +// graph they will be the oldest, but when they are immediately executable +// newer instructions will mistakenly get inserted onto the back. Also +// current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// it; either do in reverse order, or have added instructions put into a +// different ready queue that, in scheduleRreadyInsts(), gets put onto the +// normal ready queue. This would however give only a one cycle delay, +// but probably is more flexible to actually add in a delay parameter than +// just running it backwards. + +#include + +#include "sim/universe.hh" +#include "cpu/beta_cpu/inst_queue.hh" + +// Either compile error or max int due to sign extension. +// Blatant hack to avoid compile warnings. +const InstSeqNum MaxInstSeqNum = 0 - 1; + +template +InstructionQueue::InstructionQueue(Params ¶ms) + : numEntries(params.numIQEntries), + intWidth(params.executeIntWidth), + floatWidth(params.executeFloatWidth), + numPhysIntRegs(params.numPhysIntRegs), + numPhysFloatRegs(params.numPhysFloatRegs), + commitToIEWDelay(params.commitToIEWDelay) +{ + // HACK: HARDCODED NUMBER. REMOVE LATER AND ADD TO PARAMETER. + totalWidth = 1; + branchWidth = 1; + DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth); + + // Initialize the number of free IQ entries. + freeEntries = numEntries; + + // Set the number of physical registers as the number of int + float + numPhysRegs = numPhysIntRegs + numPhysFloatRegs; + + DPRINTF(IQ, "IQ: There are %i physical registers.\n", numPhysRegs); + + //Create an entry for each physical register within the + //dependency graph. + dependGraph = new DependencyEntry[numPhysRegs]; + + // Resize the register scoreboard. + regScoreboard.resize(numPhysRegs); + + // Initialize all the head pointers to point to NULL, and all the + // entries as unready. + // Note that in actuality, the registers corresponding to the logical + // registers start off as ready. However this doesn't matter for the + // IQ as the instruction should have been correctly told if those + // registers are ready in rename. Thus it can all be initialized as + // unready. + for (int i = 0; i < numPhysRegs; ++i) + { + dependGraph[i].next = NULL; + dependGraph[i].inst = NULL; + regScoreboard[i] = false; + } + +} + +template +void +InstructionQueue::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + tail = cpu->instList.begin(); +} + +template +void +InstructionQueue::setIssueToExecuteQueue( + TimeBuffer *i2e_ptr) +{ + DPRINTF(IQ, "IQ: Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} + +template +void +InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(IQ, "IQ: Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template +bool +InstructionQueue::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template +unsigned +InstructionQueue::numFreeEntries() +{ + return freeEntries; +} + +template +void +InstructionQueue::insert(DynInst *new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != new_inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + + // Decrease the number of free entries. + --freeEntries; + + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); + + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); + + assert(freeEntries == (numEntries - countInsts())); +} + +// Slightly hack function to advance the tail iterator in the case that +// the IEW stage issues an instruction that is not added to the IQ. This +// is needed in case a long chain of such instructions occurs. +template +void +InstructionQueue::advanceTail(DynInst *inst) +{ + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + assert(freeEntries <= numEntries); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); +} + +// Need to make sure the number of float and integer instructions +// issued does not exceed the total issue bandwidth. Probably should +// have some sort of limit of total number of branches that can be issued +// as well. +template +void +InstructionQueue::scheduleReadyInsts() +{ + DPRINTF(IQ, "IQ: Attempting to schedule ready instructions from " + "the IQ.\n"); + + int int_issued = 0; + int float_issued = 0; + int branch_issued = 0; + int squashed_issued = 0; + int total_issued = 0; + + IssueStruct *i2e_info = issueToExecuteQueue->access(0); + + bool insts_available = !readyBranchInsts.empty() || + !readyIntInsts.empty() || + !readyFloatInsts.empty() || + !squashedInsts.empty(); + + // Note: Requires a globally defined constant. + InstSeqNum oldest_inst = MaxInstSeqNum; + InstList list_with_oldest = None; + + // Temporary values. + DynInst *int_head_inst; + DynInst *float_head_inst; + DynInst *branch_head_inst; + DynInst *squashed_head_inst; + + // Somewhat nasty code to look at all of the lists where issuable + // instructions are located, and choose the oldest instruction among + // those lists. Consider a rewrite in the future. + while (insts_available && total_issued < totalWidth) + { + // Set this to false. Each if-block is required to set it to true + // if there were instructions available this check. This will cause + // this loop to run once more than necessary, but avoids extra calls. + insts_available = false; + + oldest_inst = MaxInstSeqNum; + + list_with_oldest = None; + + if (!readyIntInsts.empty() && + int_issued < intWidth) { + + insts_available = true; + + int_head_inst = readyIntInsts.top().inst; + + if (int_head_inst->isSquashed()) { + readyIntInsts.pop(); + continue; + } + + oldest_inst = int_head_inst->seqNum; + + list_with_oldest = Int; + } + + if (!readyFloatInsts.empty() && + float_issued < floatWidth) { + + insts_available = true; + + float_head_inst = readyFloatInsts.top().inst; + + if (float_head_inst->isSquashed()) { + readyFloatInsts.pop(); + continue; + } else if (float_head_inst->seqNum < oldest_inst) { + oldest_inst = float_head_inst->seqNum; + + list_with_oldest = Float; + } + } + + if (!readyBranchInsts.empty() && + branch_issued < branchWidth) { + + insts_available = true; + + branch_head_inst = readyBranchInsts.top().inst; + + if (branch_head_inst->isSquashed()) { + readyBranchInsts.pop(); + continue; + } else if (branch_head_inst->seqNum < oldest_inst) { + oldest_inst = branch_head_inst->seqNum; + + list_with_oldest = Branch; + } + + } + + if (!squashedInsts.empty()) { + + insts_available = true; + + squashed_head_inst = squashedInsts.top().inst; + + if (squashed_head_inst->seqNum < oldest_inst) { + list_with_oldest = Squashed; + } + + } + + DynInst *issuing_inst = NULL; + + switch (list_with_oldest) { + case None: + DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing " + "inst is %#x.\n", issuing_inst); + break; + case Int: + issuing_inst = int_head_inst; + readyIntInsts.pop(); + ++int_issued; + DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n", + issuing_inst->readPC()); + break; + case Float: + issuing_inst = float_head_inst; + readyFloatInsts.pop(); + ++float_issued; + DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n", + issuing_inst->readPC()); + break; + case Branch: + issuing_inst = branch_head_inst; + readyBranchInsts.pop(); + ++branch_issued; + DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n", + issuing_inst->readPC()); + break; + case Squashed: + issuing_inst = squashed_head_inst; + squashedInsts.pop(); + ++squashed_issued; + DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n", + issuing_inst->readPC()); + break; + } + + if (list_with_oldest != None) { + i2e_info->insts[total_issued] = issuing_inst; + + issuing_inst->setIssued(); + + ++freeEntries; + ++total_issued; + } + + assert(freeEntries == (numEntries - countInsts())); + } +} + +template +void +InstructionQueue::doSquash() +{ + // Make sure the squash iterator isn't pointing to nothing. + assert(squashIt != cpu->instList.end()); + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum != 0); + + DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + + // Squash any instructions younger than the squashed sequence number + // given. + while ((*squashIt)->seqNum > squashedSeqNum) { + DynInst *squashed_inst = (*squashIt); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (!squashed_inst->isIssued() && + !squashed_inst->isSquashedInIQ()) { + // Remove the instruction from the dependency list. + int8_t total_src_regs = squashed_inst->numSrcRegs(); + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx)) { + int8_t src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + dependGraph[src_reg].remove(squashed_inst); + } + } + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + ReadyEntry temp(squashed_inst); + + squashedInsts.push(temp); + + DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + squashIt--; + } +} + +template +void +InstructionQueue::squash() +{ + DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n"); + + // Read instruction sequence number of last instruction out of the + // time buffer. + squashedSeqNum = fromCommit->commitInfo.doneSeqNum; + + // Setup the squash iterator to point to the tail. + squashIt = tail; + + // Call doSquash. + doSquash(); +} + +template +void +InstructionQueue::stopSquash() +{ + // Clear up the squash variables to ensure that squashing doesn't + // get called improperly. + squashedSeqNum = 0; + + squashIt = cpu->instList.end(); +} + +template +int +InstructionQueue::countInsts() +{ + ListIt count_it = cpu->instList.begin(); + int total_insts = 0; + + while (count_it != tail) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } + + count_it++; + + assert(count_it != cpu->instList.end()); + } + + // Need to count the tail iterator as well. + if (count_it != cpu->instList.end() && + (*count_it) != NULL && + !(*count_it)->isIssued()) { + ++total_insts; + } + + return total_insts; +} + +template +void +InstructionQueue::wakeDependents(DynInst *completed_inst) +{ + DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); + //Look at the physical destination register of the DynInst + //and look it up on the dependency graph. Then mark as ready + //any instructions within the instruction queue. + int8_t total_dest_regs = completed_inst->numDestRegs(); + + DependencyEntry *curr; + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle thie. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + while (dependGraph[dest_reg].next != NULL) { + + curr = dependGraph[dest_reg].next; + + DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + dependGraph[dest_reg].next = curr->next; + + delete curr; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +} + +template +bool +InstructionQueue::addToDependents(DynInst *new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (regScoreboard[src_reg] == false) { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + dependGraph[src_reg].insert(new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(); + } + } + } + + return return_val; +} + +template +void +InstructionQueue::createDependency(DynInst *new_inst) +{ + //Actually nothing really needs to be marked when an + //instruction becomes the producer of a register's value, + //but for convenience a ptr to the producing instruction will + //be placed in the head node of the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + int8_t dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + dependGraph[dest_reg].inst = new_inst; + if (dependGraph[dest_reg].next != NULL) { + panic("Dependency chain is not empty.\n"); + } + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} + +template +void +InstructionQueue::DependencyEntry::insert(DynInst *new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + this->next = new_entry; +} + +template +void +InstructionQueue::DependencyEntry::remove(DynInst *inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + // Find the instruction to remove within the dependency linked list. + while(curr->inst != inst_to_remove) + { + prev = curr; + curr = curr->next; + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + delete curr; +} + +template +void +InstructionQueue::addIfReady(DynInst *inst) +{ + //If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + ReadyEntry to_add(inst); + //Add the instruction to the proper ready list. + if (inst->isInteger()) { + DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyIntInsts.push(to_add); + } else if (inst->isFloating()) { + DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyFloatInsts.push(to_add); + } else if (inst->isControl()) { + DPRINTF(IQ, "IQ: Branch instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyBranchInsts.push(to_add); + } else { + panic("IQ: Instruction not an expected type.\n"); + } + } +} + +#endif // __INST_QUEUE_IMPL_HH__ diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh new file mode 100644 index 000000000..21e0ce218 --- /dev/null +++ b/cpu/beta_cpu/regfile.hh @@ -0,0 +1,583 @@ +#ifndef __REGFILE_HH__ +#define __REGFILE_HH__ + +// @todo: Destructor + +using namespace std; + +#include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/comm.hh" + +// This really only depends on the ISA, and not the Impl. It might be nicer +// to see if I can make it depend on nothing... +// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, +// and should go in the AlphaFullCPU. + +template +class PhysRegFile +{ + //Note that most of the definitions of the IntReg, FloatReg, etc. exist + //within the Impl class and not within this PhysRegFile class. + + //Will need some way to allow stuff like swap_palshadow to access the + //correct registers. Might require code changes to swap_palshadow and + //other execution contexts. + + //Will make these registers public for now, but they probably should + //be private eventually with some accessor functions. + public: + typedef typename Impl::ISA ISA; + + PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs); + + //Everything below should be pretty well identical to the normal + //register file that exists within AlphaISA class. + //The duplication is unfortunate but it's better than having + //different ways to access certain registers. + + //Add these in later when everything else is in place +// void serialize(std::ostream &os); +// void unserialize(Checkpoint *cp, const std::string §ion); + + uint64_t readIntReg(PhysRegIndex reg_idx) + { + DPRINTF(IEW, "RegFile: Access to int register %i, has data " + "%i\n", int(reg_idx), intRegFile[reg_idx]); + return intRegFile[reg_idx]; + } + + float readFloatRegSingle(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Access to float register %i, has data " + "%f\n", int(reg_idx), (float)floatRegFile[reg_idx].d); + + return (float)floatRegFile[reg_idx].d; + } + + double readFloatRegDouble(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Access to float register %i, has data " + "%f\n", int(reg_idx), floatRegFile[reg_idx].d); + + return floatRegFile[reg_idx].d; + } + + uint64_t readFloatRegInt(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Access to float register %i, has data " + "%f\n", int(reg_idx), floatRegFile[reg_idx].q); + + return floatRegFile[reg_idx].q; + } + + void setIntReg(PhysRegIndex reg_idx, uint64_t val) + { + DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n", + int(reg_idx), val); + + intRegFile[reg_idx] = val; + } + + void setFloatRegSingle(PhysRegIndex reg_idx, float val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Setting float register %i to %f\n", + int(reg_idx), val); + + floatRegFile[reg_idx].d = (double)val; + } + + void setFloatRegDouble(PhysRegIndex reg_idx, double val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Setting float register %i to %f\n", + int(reg_idx), val); + + floatRegFile[reg_idx].d = val; + } + + void setFloatRegInt(PhysRegIndex reg_idx, uint64_t val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), val); + + floatRegFile[reg_idx].q = val; + } + + uint64_t readPC() + { + return pc; + } + + void setPC(uint64_t val) + { + pc = val; + } + + void setNextPC(uint64_t val) + { + npc = val; + } + + //Consider leaving this stuff and below in some implementation specific + //file as opposed to the general register file. Or have a derived class. + uint64_t readUniq() + { + return miscRegs.uniq; + } + + void setUniq(uint64_t val) + { + miscRegs.uniq = val; + } + + uint64_t readFpcr() + { + return miscRegs.fpcr; + } + + void setFpcr(uint64_t val) + { + miscRegs.fpcr = val; + } + +#ifdef FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + int readIntrFlag() { return intrflag; } + void setIntrFlag(int val) { intrflag = val; } +#endif + + // These should be private eventually, but will be public for now + // so that I can hack around the initregs issue. + public: + /** (signed) integer register file. */ + IntReg *intRegFile; + + /** Floating point register file. */ + FloatReg *floatRegFile; + + /** Miscellaneous register file. */ + MiscRegFile miscRegs; + + Addr pc; // program counter + Addr npc; // next-cycle program counter + + private: + unsigned numPhysicalIntRegs; + unsigned numPhysicalFloatRegs; +}; + +template +PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs) + : numPhysicalIntRegs(_numPhysicalIntRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs) +{ + intRegFile = new IntReg[numPhysicalIntRegs]; + floatRegFile = new FloatReg[numPhysicalFloatRegs]; + + memset(intRegFile, 0, sizeof(*intRegFile)); + memset(floatRegFile, 0, sizeof(*floatRegFile)); +} + +#ifdef FULL_SYSTEM + +//Problem: This code doesn't make sense at the RegFile level because it +//needs things such as the itb and dtb. Either put it at the CPU level or +//the DynInst level. +template +uint64_t +PhysRegFile::readIpr(int idx, Fault &fault) +{ + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case ISA::IPR_PALtemp0: + case ISA::IPR_PALtemp1: + case ISA::IPR_PALtemp2: + case ISA::IPR_PALtemp3: + case ISA::IPR_PALtemp4: + case ISA::IPR_PALtemp5: + case ISA::IPR_PALtemp6: + case ISA::IPR_PALtemp7: + case ISA::IPR_PALtemp8: + case ISA::IPR_PALtemp9: + case ISA::IPR_PALtemp10: + case ISA::IPR_PALtemp11: + case ISA::IPR_PALtemp12: + case ISA::IPR_PALtemp13: + case ISA::IPR_PALtemp14: + case ISA::IPR_PALtemp15: + case ISA::IPR_PALtemp16: + case ISA::IPR_PALtemp17: + case ISA::IPR_PALtemp18: + case ISA::IPR_PALtemp19: + case ISA::IPR_PALtemp20: + case ISA::IPR_PALtemp21: + case ISA::IPR_PALtemp22: + case ISA::IPR_PALtemp23: + case ISA::IPR_PAL_BASE: + + case ISA::IPR_IVPTBR: + case ISA::IPR_DC_MODE: + case ISA::IPR_MAF_MODE: + case ISA::IPR_ISR: + case ISA::IPR_EXC_ADDR: + case ISA::IPR_IC_PERR_STAT: + case ISA::IPR_DC_PERR_STAT: + case ISA::IPR_MCSR: + case ISA::IPR_ASTRR: + case ISA::IPR_ASTER: + case ISA::IPR_SIRR: + case ISA::IPR_ICSR: + case ISA::IPR_ICM: + case ISA::IPR_DTB_CM: + case ISA::IPR_IPLR: + case ISA::IPR_INTID: + case ISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case ISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; + + case ISA::IPR_VA: + // SFX: unlocks interrupt status registers + retval = ipr[idx]; + + if (!misspeculating()) + regs.intrlock = false; + break; + + case ISA::IPR_VA_FORM: + case ISA::IPR_MM_STAT: + case ISA::IPR_IFAULT_VA_FORM: + case ISA::IPR_EXC_MASK: + case ISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case ISA::IPR_DTB_PTE: + { + ISA::PTE &pte = dtb->index(!misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case ISA::IPR_HWINT_CLR: + case ISA::IPR_SL_XMIT: + case ISA::IPR_DC_FLUSH: + case ISA::IPR_IC_FLUSH: + case ISA::IPR_ALT_MODE: + case ISA::IPR_DTB_IA: + case ISA::IPR_DTB_IAP: + case ISA::IPR_ITB_IA: + case ISA::IPR_ITB_IAP: + fault = Unimplemented_Opcode_Fault; + break; + + default: + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; + } + + return retval; +} + +#ifdef DEBUG +// Cause the simulator to break when changing to the following IPL +int break_ipl = -1; +#endif + +template +Fault +PhysRegFile::setIpr(int idx, uint64_t val) +{ + uint64_t old; + + if (misspeculating()) + return No_Fault; + + switch (idx) { + case ISA::IPR_PALtemp0: + case ISA::IPR_PALtemp1: + case ISA::IPR_PALtemp2: + case ISA::IPR_PALtemp3: + case ISA::IPR_PALtemp4: + case ISA::IPR_PALtemp5: + case ISA::IPR_PALtemp6: + case ISA::IPR_PALtemp7: + case ISA::IPR_PALtemp8: + case ISA::IPR_PALtemp9: + case ISA::IPR_PALtemp10: + case ISA::IPR_PALtemp11: + case ISA::IPR_PALtemp12: + case ISA::IPR_PALtemp13: + case ISA::IPR_PALtemp14: + case ISA::IPR_PALtemp15: + case ISA::IPR_PALtemp16: + case ISA::IPR_PALtemp17: + case ISA::IPR_PALtemp18: + case ISA::IPR_PALtemp19: + case ISA::IPR_PALtemp20: + case ISA::IPR_PALtemp21: + case ISA::IPR_PALtemp22: + case ISA::IPR_PAL_BASE: + case ISA::IPR_IC_PERR_STAT: + case ISA::IPR_DC_PERR_STAT: + case ISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case ISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case ISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case ISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; + kernelStats.context(old, val); + break; + + case ISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case ISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case ISA::IPR_ASTRR: + case ISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case ISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; + kernelStats.swpipl(ipr[idx]); + break; + + case ISA::IPR_DTB_CM: + kernelStats.mode((val & 0x18) != 0); + + case ISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case ISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case ISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case ISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case ISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case ISA::IPR_IVPTBR: + case ISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case ISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case ISA::IPR_DC_MODE: + case ISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case ISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case ISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case ISA::IPR_EXC_SUM: + case ISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case ISA::IPR_INTID: + case ISA::IPR_SL_RCV: + case ISA::IPR_MM_STAT: + case ISA::IPR_ITB_PTE_TEMP: + case ISA::IPR_DTB_PTE_TEMP: + // read-only registers + return Unimplemented_Opcode_Fault; + + case ISA::IPR_HWINT_CLR: + case ISA::IPR_SL_XMIT: + case ISA::IPR_DC_FLUSH: + case ISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case ISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + dtb->flushAll(); + break; + + case ISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + dtb->flushProcesses(); + break; + + case ISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN])); + break; + + case ISA::IPR_DTB_TAG: { + struct ISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + dtb->insert(val, pte); + } + break; + + case ISA::IPR_ITB_PTE: { + struct ISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + itb->insert(ipr[ISA::IPR_ITB_TAG], pte); + } + break; + + case ISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + itb->flushAll(); + break; + + case ISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + itb->flushProcesses(); + break; + + case ISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return Unimplemented_Opcode_Fault; + } + + // no error... + return No_Fault; +} + +#endif // #ifdef FULL_SYSTEM + +#endif // __REGFILE_HH__ diff --git a/cpu/beta_cpu/rename.cc b/cpu/beta_cpu/rename.cc new file mode 100644 index 000000000..bcce7ef49 --- /dev/null +++ b/cpu/beta_cpu/rename.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/rename_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleRename; diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh new file mode 100644 index 000000000..cd66ce686 --- /dev/null +++ b/cpu/beta_cpu/rename.hh @@ -0,0 +1,184 @@ +// Todo: +// Figure out rename map for reg vs fp (probably just have one rename map). +// In simple case, there is no renaming, so have this stage do basically +// nothing. +// Fix up trap and barrier handling. Fix up squashing too, as it's too +// dependent upon the iew stage continually telling it to squash. +// Have commit send back information whenever a branch has committed. This +// way the history buffer can be cleared beyond the point where the branch +// was. + +#ifndef __SIMPLE_RENAME_HH__ +#define __SIMPLE_RENAME_HH__ + +//Will want to include: time buffer, structs, free list, rename map +#include + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/free_list.hh" + +using namespace std; + +// Will need rename maps for both the int reg file and fp reg file. +// Or change rename map class to handle both. (RegFile handles both.) +template +class SimpleRename +{ + public: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename Impl::FetchStruct FetchStruct; + typedef typename Impl::DecodeStruct DecodeStruct; + typedef typename Impl::RenameStruct RenameStruct; + typedef typename Impl::TimeStruct TimeStruct; + + // Typedefs from the CPUPol + typedef typename CPUPol::FreeList FreeList; + typedef typename CPUPol::RenameMap RenameMap; + + // Typedefs from the ISA. + typedef typename ISA::Addr Addr; + + public: + // Rename will block if ROB becomes full or issue queue becomes full, + // or there are no free registers to rename to. + // Only case where rename squashes is if IEW squashes. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking, + BarrierStall + }; + + private: + Status _status; + + public: + SimpleRename(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setRenameQueue(TimeBuffer *rq_ptr); + + void setDecodeQueue(TimeBuffer *dq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void setFreeList(FreeList *fl_ptr); + + void dumpHistory(); + + void tick(); + + void rename(); + + void squash(); + + private: + void block(); + + inline void unblock(); + + void doSquash(); + + void removeFromHistory(InstSeqNum inst_seq_num); + + /** Holds the previous information for each rename. + * Note that often times the inst may have been deleted, so only access + * the pointer for the address and do not dereference it. + */ + struct RenameHistory { + RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg, + PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg) + : instSeqNum(_instSeqNum), archReg(_archReg), + newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg), + placeHolder(false) + { + } + + /** Constructor used specifically for cases where a place holder + * rename history entry is being made. + */ + RenameHistory(InstSeqNum _instSeqNum) + : instSeqNum(_instSeqNum), archReg(0), newPhysReg(0), + prevPhysReg(0), placeHolder(true) + { + } + + InstSeqNum instSeqNum; + RegIndex archReg; + PhysRegIndex newPhysReg; + PhysRegIndex prevPhysReg; + bool placeHolder; + }; + + list historyBuffer; + + /** CPU interface. */ + FullCPU *cpu; + + // Interfaces to objects outside of rename. + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get IEW's output from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write infromation heading to previous stages. */ + // Might not be the best name as not only decode will read it. + typename TimeBuffer::wire toDecode; + + /** Rename instruction queue. */ + TimeBuffer *renameQueue; + + /** Wire to write any information heading to IEW. */ + typename TimeBuffer::wire toIEW; + + /** Decode instruction queue interface. */ + TimeBuffer *decodeQueue; + + /** Wire to get decode's output from decode queue. */ + typename TimeBuffer::wire fromDecode; + + /** Skid buffer between rename and decode. */ + queue skidBuffer; + + /** Rename map interface. */ + SimpleRenameMap *renameMap; + + /** Free list interface. */ + FreeList *freeList; + + /** Delay between iew and rename, in ticks. */ + int iewToRenameDelay; + + /** Delay between decode and rename, in ticks. */ + int decodeToRenameDelay; + + /** Delay between commit and rename, in ticks. */ + unsigned commitToRenameDelay; + + /** Rename width, in instructions. */ + unsigned renameWidth; + + /** Commit width, in instructions. Used so rename knows how many + * instructions might have freed registers in the previous cycle. + */ + unsigned commitWidth; +}; + +#endif // __SIMPLE_RENAME_HH__ diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh new file mode 100644 index 000000000..2b60c2f50 --- /dev/null +++ b/cpu/beta_cpu/rename_impl.hh @@ -0,0 +1,593 @@ +#include + +#include "cpu/beta_cpu/rename.hh" + +template +SimpleRename::SimpleRename(Params ¶ms) + : iewToRenameDelay(params.iewToRenameDelay), + decodeToRenameDelay(params.decodeToRenameDelay), + commitToRenameDelay(params.commitToRenameDelay), + renameWidth(params.renameWidth), + commitWidth(params.commitWidth) +{ + _status = Idle; +} + +template +void +SimpleRename::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Rename, "Rename: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template +void +SimpleRename::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(Rename, "Rename: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from IEW stage. + fromIEW = timeBuffer->getWire(-iewToRenameDelay); + + // Setup wire to read infromation from time buffer, from commit stage. + fromCommit = timeBuffer->getWire(-commitToRenameDelay); + + // Setup wire to write information to previous stages. + toDecode = timeBuffer->getWire(0); +} + +template +void +SimpleRename::setRenameQueue(TimeBuffer *rq_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to write information to future stages. + toIEW = renameQueue->getWire(0); +} + +template +void +SimpleRename::setDecodeQueue(TimeBuffer *dq_ptr) +{ + DPRINTF(Rename, "Rename: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to get information from decode. + fromDecode = decodeQueue->getWire(-decodeToRenameDelay); + +} + +template +void +SimpleRename::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template +void +SimpleRename::setFreeList(FreeList *fl_ptr) +{ + DPRINTF(Rename, "Rename: Setting free list pointer.\n"); + freeList = fl_ptr; +} + +template +void +SimpleRename::dumpHistory() +{ + typename list::iterator buf_it = historyBuffer.begin(); + + while (buf_it != historyBuffer.end()) + { + cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys " + "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg, + (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg); + + buf_it++; + } +} + +template +void +SimpleRename::block() +{ + DPRINTF(Rename, "Rename: Blocking.\n"); + // Set status to Blocked. + _status = Blocked; + + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template +inline void +SimpleRename::unblock() +{ + DPRINTF(Rename, "Rename: Reading instructions out of skid " + "buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toDecode->renameInfo.stall = true; + } else { + DPRINTF(Rename, "Rename: Done unblocking.\n"); + _status = Running; + } +} + +template +void +SimpleRename::doSquash() +{ + typename list::iterator hb_it = historyBuffer.begin(); + typename list::iterator delete_it; + + InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; + +#ifdef FULL_SYSTEM + assert(!historyBuffer.empty()); +#else + // After a syscall squashes everything, the history buffer may be empty + // but the ROB may still be squashing instructions. + if (historyBuffer.empty()) { + return; + } +#endif // FULL_SYSTEM + + // Go through the most recent instructions, undoing the mappings + // they did and freeing up the registers. + while ((*hb_it).instSeqNum > squashed_seq_num) + { + DPRINTF(Rename, "Rename: Removing history entry with sequence " + "number %i.\n", (*hb_it).instSeqNum); + + // If it's not simply a place holder, then add the registers. + if (!(*hb_it).placeHolder) { + // Tell the rename map to set the architected register to the + // previous physical register that it was renamed to. + renameMap->setEntry(hb_it->archReg, hb_it->prevPhysReg); + + // Put the renamed physical register back on the free list. + freeList->addReg(hb_it->newPhysReg); + } + + delete_it = hb_it; + + hb_it++; + + historyBuffer.erase(delete_it); + } +} + +template +void +SimpleRename::squash() +{ + DPRINTF(Rename, "Rename: Squashing instructions.\n"); + // Set the status to Squashing. + _status = Squashing; + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } + + doSquash(); +} + +// In the future, when a SmartPtr is used for DynInst, then this function +// itself can handle returning the instruction's physical registers to +// the free list. +template +void +SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) +{ + DPRINTF(Rename, "Rename: Removing a committed instruction from the " + "history buffer, sequence number %lli.\n", inst_seq_num); + typename list::iterator hb_it = historyBuffer.end(); + + hb_it--; + + if (hb_it->instSeqNum > inst_seq_num) { + DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure " + "that a syscall happened recently.\n"); + return; + } + + for ( ; hb_it->instSeqNum != inst_seq_num; hb_it--) + { + // Make sure we haven't gone off the end of the list. + assert(hb_it != historyBuffer.end()); + + // In theory instructions at the end of the history buffer + // should be older than the instruction being removed, which + // means they will have a lower sequence number. Also the + // instruction being removed from the history really should + // be the last instruction in the list, as it is the instruction + // that was just committed that is being removed. + assert(hb_it->instSeqNum < inst_seq_num); + DPRINTF(Rename, "Rename: Committed instruction is not the last " + "entry in the history buffer.\n"); + } + + if (!(*hb_it).placeHolder) { + freeList->addReg(hb_it->prevPhysReg); + } + + historyBuffer.erase(hb_it); + +} + +template +void +SimpleRename::tick() +{ + // Rename will need to try to rename as many instructions as it + // has bandwidth, unless it is blocked. + + // Check if _status is BarrierStall. If so, then check if the number + // of free ROB entries is equal to the number of total ROB entries. + // Once equal then wake this stage up. Set status to unblocking maybe. + + if (_status != Blocked && _status != Squashing) { + DPRINTF(Rename, "Rename: Status is not blocked, will attempt to " + "run stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + rename(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + unblock(); + } + } else if (_status == Blocked) { + // If stage is blocked and still receiving valid instructions, + // make sure to store them in the skid buffer. + if (fromDecode->insts[0] != NULL) { + + block(); + + // Continue to tell previous stage to stall. + toDecode->renameInfo.stall = true; + } + + if (!fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall && + fromCommit->commitInfo.freeROBEntries != 0 && + fromIEW->iewInfo.freeIQEntries != 0) { + + // Need to be sure to check all blocking conditions above. + // If they have cleared, then start unblocking. + DPRINTF(Rename, "Rename: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this stage + // is done unblocking. + toDecode->renameInfo.stall = true; + } else { + // Otherwise no conditions have changed. Tell previous + // stage to continue blocking. + toDecode->renameInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + return; + } + } else if (_status == Squashing) { + if (fromCommit->commitInfo.squash) { + squash(); + } else if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + + DPRINTF(Rename, "Rename: Done squashing, going to running.\n"); + _status = Running; + } else { + doSquash(); + } + } + + // Ugly code, revamp all of the tick() functions eventually. + if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) { + removeFromHistory(fromCommit->commitInfo.doneSeqNum); + } + + // Perhaps put this outside of this function, since this will + // happen regardless of whether or not the stage is blocked or + // squashing. + // Read from the time buffer any necessary data. + // Read registers that are freed, and add them to the freelist. + // This is unnecessary due to the history buffer (assuming the history + // buffer works properly). +/* + while(!fromCommit->commitInfo.freeRegs.empty()) + { + PhysRegIndex freed_reg = fromCommit->commitInfo.freeRegs.back(); + DPRINTF(Rename, "Rename: Adding freed register %i to freelist.\n", + (int)freed_reg); + freeList->addReg(freed_reg); + + fromCommit->commitInfo.freeRegs.pop_back(); + } +*/ + +} + +template +void +SimpleRename::rename() +{ + // Check if any of the stages ahead of rename are telling rename + // to squash. The squash() function will also take care of fixing up + // the rename map and the free list. + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + return; + } + + // Check if time buffer is telling this stage to stall. + if (fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) { + DPRINTF(Rename, "Rename: Receiving signal from IEW/Commit to " + "stall.\n"); + block(); + return; + } + + // Check if the current status is squashing. If so, set its status + // to running and resume execution the next cycle. + if (_status == Squashing) { + DPRINTF(Rename, "Rename: Done squashing.\n"); + _status = Running; + return; + } + + // Check the decode queue to see if instructions are available. + // If there are no available instructions to rename, then do nothing. + // Or, if the stage is currently unblocking, then go ahead and run it. + if (fromDecode->insts[0] == NULL && _status != Unblocking) { + DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n"); + // Should I change status to idle? + return; + } + + DynInst *inst; + unsigned num_inst = 0; + + bool insts_available = _status == Unblocking ? + skidBuffer.front().insts[num_inst] != NULL : + fromDecode->insts[num_inst] != NULL; + + typename SimpleRenameMap::RenameInfo rename_result; + + unsigned num_src_regs; + unsigned num_dest_regs; + + // Will have to do a different calculation for the number of free + // entries. Number of free entries recorded on this cycle - + // renameWidth * renameToDecodeDelay + // Can I avoid a multiply? + unsigned free_rob_entries = + fromCommit->commitInfo.freeROBEntries - iewToRenameDelay; + DPRINTF(Rename, "Rename: ROB has %d free entries.\n", + free_rob_entries); + unsigned free_iq_entries = + fromIEW->iewInfo.freeIQEntries - iewToRenameDelay; + + // Check if there's any space left. + if (free_rob_entries == 0 || free_iq_entries == 0) { + DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ " + "entries.\n" + "Rename: ROB has %d free entries.\n" + "Rename: IQ has %d free entries.\n", + free_rob_entries, + free_iq_entries); + block(); + // Tell previous stage to stall. + toDecode->renameInfo.stall = true; + + return; + } + + unsigned min_iq_rob = min(free_rob_entries, free_iq_entries); + unsigned num_insts_to_rename = min(min_iq_rob, renameWidth); + + while (insts_available && + num_inst < num_insts_to_rename) { + DPRINTF(Rename, "Rename: Sending instructions to iew.\n"); + + // Get the next instruction either from the skid buffer or the + // decode queue. + inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] : + fromDecode->insts[num_inst]; + + DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n", + inst, inst->readPC()); + + // If it's a trap instruction, then it needs to wait here within + // rename until the ROB is empty. Needs a way to detect that the + // ROB is empty. Maybe an event? + // Would be nice if it could be avoided putting this into a + // specific stage and instead just put it into the AlphaFullCPU. + // Might not really be feasible though... + // (EXCB, TRAPB) + if (inst->isSerializing()) { + panic("Rename: Serializing instruction encountered.\n"); + DPRINTF(Rename, "Rename: Serializing instruction " + "encountered.\n"); + block(); + + // Change status over to BarrierStall so that other stages know + // what this is blocked on. + _status = BarrierStall; + + // Tell the previous stage to stall. + toDecode->renameInfo.stall = true; + + break; + } + + // Make sure there's enough room in the ROB and the IQ. + // This doesn't really need to be done dynamically; consider + // moving outside of this function. + if (free_rob_entries == 0 || free_iq_entries == 0) { + DPRINTF(Rename, "Rename: Blocking due to lack of ROB or IQ " + "entries.\n"); + // Call some sort of function to handle all the setup of being + // blocked. + block(); + + // Not really sure how to schedule an event properly, but an + // event must be scheduled such that upon freeing a ROB entry, + // this stage will restart up. Perhaps add in a ptr to an Event + // within the ROB that will be able to execute that Event + // if a free register is added to the freelist. + + // Tell the previous stage to stall. + toDecode->renameInfo.stall = true; + + break; + } + + // Temporary variables to hold number of source and destination regs. + num_src_regs = inst->numSrcRegs(); + num_dest_regs = inst->numDestRegs(); + + // Check here to make sure there are enough destination registers + // to rename to. Otherwise block. + if (renameMap->numFreeEntries() < num_dest_regs) + { + DPRINTF(Rename, "Rename: Blocking due to lack of free " + "physical registers to rename to.\n"); + // Call function to handle blocking. + block(); + + // Need some sort of event based on a register being freed. + + // Tell the previous stage to stall. + toDecode->renameInfo.stall = true; + + // Break out of rename loop. + break; + } + + // Get the architectual register numbers from the source and + // destination operands, and redirect them to the right register. + // Will need to mark dependencies though. + for (int src_idx = 0; src_idx < num_src_regs; src_idx++) + { + RegIndex src_reg = inst->srcRegIdx(src_idx); + + // Look up the source registers to get the phys. register they've + // been renamed to, and set the sources to those registers. + RegIndex renamed_reg = renameMap->lookup(src_reg); + + DPRINTF(Rename, "Rename: Looking up arch reg %i, got " + "physical reg %i.\n", (int)src_reg, (int)renamed_reg); + + inst->renameSrcReg(src_idx, renamed_reg); + + // Either incorporate it into the info passed back, + // or make another function call to see if that register is + // ready or not. + if (renameMap->isReady(renamed_reg)) { + DPRINTF(Rename, "Rename: Register is ready.\n"); + + inst->markSrcRegReady(src_idx); + } + } + + // Rename the destination registers. + for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) + { + RegIndex dest_reg = inst->destRegIdx(dest_idx); + + // Get the physical register that the destination will be + // renamed to. + rename_result = renameMap->rename(dest_reg); + + DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " + "register %i.\n", (int)dest_reg, + (int)rename_result.first); + + // Record the rename information so that a history can be kept. + RenameHistory hb_entry(inst->seqNum, dest_reg, + rename_result.first, + rename_result.second); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding instruction to history buffer, " + "sequence number %lli.\n", inst->seqNum); + + // Tell the instruction to rename the appropriate destination + // register (dest_idx) to the new physical register + // (rename_result.first), and record the previous physical + // register that the same logical register was renamed to + // (rename_result.second). + inst->renameDestReg(dest_idx, + rename_result.first, + rename_result.second); + } + + // If it's an instruction with no destination registers, then put + // a placeholder within the history buffer. It might be better + // to not put it in the history buffer at all (other than branches, + // which always need at least a place holder), and differentiate + // between instructions with and without destination registers + // when getting from commit the instructions that committed. + if (num_dest_regs == 0) { + RenameHistory hb_entry(inst->seqNum); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding placeholder instruction to " + "history buffer, sequence number %lli.\n", + inst->seqNum); + } + + // Put instruction in rename queue. + toIEW->insts[num_inst] = inst; + + // Decrease the number of free ROB and IQ entries. + --free_rob_entries; + --free_iq_entries; + + // Increment which instruction we're on. + ++num_inst; + + // Check whether or not there are instructions available. + // Either need to check within the skid buffer, or the decode + // queue, depending if this stage is unblocking or not. + // Hmm, dangerous check. Can touch memory not allocated. Might + // be better to just do check at beginning of loop. Or better + // yet actually pass the number of instructions issued. + insts_available = _status == Unblocking ? + skidBuffer.front().insts[num_inst] != NULL : + fromDecode->insts[num_inst] != NULL; + } + +} diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc new file mode 100644 index 000000000..c234182f0 --- /dev/null +++ b/cpu/beta_cpu/rename_map.cc @@ -0,0 +1,289 @@ + +#include "cpu/beta_cpu/rename_map.hh" + +// Todo: Consider making functions inline. Avoid having things that are +// using the zero register or misc registers from adding on the registers +// to the free list. + +SimpleRenameMap::RenameEntry::RenameEntry() + : physical_reg(0), valid(false) +{ +} + +SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numMiscRegs(_numMiscRegs), + intZeroReg(_intZeroReg), + floatZeroReg(_floatZeroReg) +{ + DPRINTF(Rename, "Rename: Creating rename map. Phys: %i / %i, Float: " + "%i / %i.\n", numLogicalIntRegs, numPhysicalIntRegs, + numLogicalFloatRegs, numPhysicalFloatRegs); + + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + + //Create the rename maps, and their scoreboards. + intRenameMap = new RenameEntry[numLogicalIntRegs]; + floatRenameMap = new RenameEntry[numLogicalFloatRegs]; + + intScoreboard.resize(numPhysicalIntRegs); + floatScoreboard.resize(numPhysicalFloatRegs); + miscScoreboard.resize(numMiscRegs); + + // Initialize the entries in the integer rename map to point to the + // physical registers of the same index, and consider each register + // ready until the first rename occurs. + for (RegIndex index = 0; index < numLogicalIntRegs; ++index) + { + intRenameMap[index].physical_reg = index; + intScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numLogicalIntRegs; + index < numPhysicalIntRegs; + ++index) + { + intScoreboard[index] = 0; + } + + // Initialize the entries in the floating point rename map to point to + // the physical registers of the same index, and consider each register + // ready until the first rename occurs. + for (RegIndex index = 0; index < numLogicalFloatRegs; ++index) + { + floatRenameMap[index].physical_reg = index + numPhysicalIntRegs; + floatScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numLogicalFloatRegs; + index < numPhysicalFloatRegs; + ++index) + { + floatScoreboard[index] = 0; + } + + // Initialize the entries in the misc register scoreboard to be ready. + for (RegIndex index = 0; index < numMiscRegs; ++index) + { + miscScoreboard[index] = 1; + } +} + +void +SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) +{ + //Setup the interface to the freelist. + freeList = fl_ptr; +} + + +// Don't allow this stage to fault; force that check to the rename stage. +// Simply ask to rename a logical register and get back a new physical +// register index. +SimpleRenameMap::RenameInfo +SimpleRenameMap::rename(RegIndex arch_reg) +{ + PhysRegIndex renamed_reg; + PhysRegIndex prev_reg; + + if (arch_reg < numLogicalIntRegs) { + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = intRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != intZeroReg) { + // Get a free physical register to rename to. + renamed_reg = freeList->getIntReg(); + + // Update the integer rename map. + intRenameMap[arch_reg].physical_reg = renamed_reg; + + // Mark register as not ready. + intScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = intZeroReg; + } + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base offset for floating point registers. + arch_reg = arch_reg - numLogicalIntRegs; + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = floatRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != floatZeroReg) { + // Get a free floating point register to rename to. + renamed_reg = freeList->getFloatReg(); + + // Update the floating point rename map. + floatRenameMap[arch_reg].physical_reg = renamed_reg; + + // Mark register as not ready. + floatScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = floatZeroReg; + } + } else { + // Subtract off the base offset for miscellaneous registers. + arch_reg = arch_reg - numLogicalRegs; + + // No renaming happens to the misc. registers. They are simply the + // registers that come after all the physical registers; thus + // take the base architected register and add the physical registers + // to it. + renamed_reg = arch_reg + numPhysicalRegs; + + // Set the previous register to the same register; mainly it must be + // known that the prev reg was outside the range of normal registers + // so the free list can avoid adding it. + prev_reg = renamed_reg; + + miscScoreboard[renamed_reg] = false; + } + + return RenameInfo(renamed_reg, prev_reg); +} + +//Perhaps give this a pair as a return value, of the physical register +//and whether or not it's ready. +PhysRegIndex +SimpleRenameMap::lookup(RegIndex arch_reg) +{ + if (arch_reg < numLogicalIntRegs) { + return intRenameMap[arch_reg].physical_reg; + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base FP offset. + arch_reg = arch_reg - numLogicalIntRegs; + + return floatRenameMap[arch_reg].physical_reg; + } else { + // Subtract off the misc registers offset. + arch_reg = arch_reg - numLogicalRegs; + + // Misc. regs don't rename, so simply add the base arch reg to + // the number of physical registers. + return numPhysicalRegs + arch_reg; + } +} + +bool +SimpleRenameMap::isReady(PhysRegIndex phys_reg) +{ + if (phys_reg < numPhysicalIntRegs) { + return intScoreboard[phys_reg]; + } else if (phys_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. + phys_reg = phys_reg - numPhysicalIntRegs; + + return floatScoreboard[phys_reg]; + } else { + // Subtract off the misc registers offset. + phys_reg = phys_reg - numPhysicalRegs; + + return miscScoreboard[phys_reg]; + } +} + +// In this implementation the miscellaneous registers do not actually rename, +// so this function does not allow you to try to change their mappings. +void +SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) +{ + if (arch_reg < numLogicalIntRegs) { + DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n", + (int)arch_reg, renamed_reg); + + intRenameMap[arch_reg].physical_reg = renamed_reg; + } else { +// assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); + + // Subtract off the base FP offset. + arch_reg = arch_reg - numLogicalIntRegs; + + DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", + (int)arch_reg, renamed_reg); + + floatRenameMap[arch_reg].physical_reg = renamed_reg; + } +} + +void +SimpleRenameMap::squash(vector freed_regs, + vector unmaps) +{ + // Not sure the rename map should be able to access the free list + // like this. + while (!freed_regs.empty()) { + RegIndex free_register = freed_regs.back(); + + if (free_register < numPhysicalIntRegs) { + freeList->addIntReg(free_register); + } else { + // Subtract off the base FP dependence tag. + free_register = free_register - numPhysicalIntRegs; + freeList->addFloatReg(free_register); + } + + freed_regs.pop_back(); + } + + // Take unmap info and roll back the rename map. +} + +void +SimpleRenameMap::markAsReady(PhysRegIndex ready_reg) +{ + DPRINTF(Rename, "Rename map: Marking register %i as ready.\n", + (int)ready_reg); + + if (ready_reg < numPhysicalIntRegs) { + intScoreboard[ready_reg] = 1; + } else if (ready_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. + ready_reg = ready_reg - numPhysicalIntRegs; + + floatScoreboard[ready_reg] = 1; + } else { + //Subtract off the misc registers offset. + ready_reg = ready_reg - numPhysicalRegs; + + miscScoreboard[ready_reg] = 1; + } +} + +int +SimpleRenameMap::numFreeEntries() +{ + int free_int_regs = freeList->numFreeIntRegs(); + int free_float_regs = freeList->numFreeFloatRegs(); + + if (free_int_regs < free_float_regs) { + return free_int_regs; + } else { + return free_float_regs; + } +} diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh new file mode 100644 index 000000000..05b52bfb2 --- /dev/null +++ b/cpu/beta_cpu/rename_map.hh @@ -0,0 +1,141 @@ +// Todo: Create destructor. +// Make it so that there's a proper separation between int and fp. Also +// have it so that there's a more meaningful name given to the variable +// that marks the beginning of the FP registers. + +#ifndef __RENAME_MAP_HH__ +#define __RENAME_MAP_HH__ + +#include +#include +#include + +//Will want to include faults +#include "cpu/beta_cpu/free_list.hh" + +using namespace std; + +class SimpleRenameMap +{ + public: +// typedef typename Impl::RegIndex RegIndex; + + /** + * Pair of a logical register and a physical register. Tells the + * previous mapping of a logical register to a physical register. + * Used to roll back the rename map to a previous state. + */ + typedef pair UnmapInfo; + + /** + * Pair of a physical register and a physical register. Used to + * return the physical register that a logical register has been + * renamed to, and the previous physical register that the same + * logical register was previously mapped to. + */ + typedef pair RenameInfo; + + public: + //Constructor + SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg); + + void setFreeList(SimpleFreeList *fl_ptr); + + //Tell rename map to get a free physical register for a given + //architected register. Not sure it should have a return value, + //but perhaps it should have some sort of fault in case there are + //no free registers. + RenameInfo rename(RegIndex arch_reg); + + PhysRegIndex lookup(RegIndex phys_reg); + + bool isReady(PhysRegIndex arch_reg); + + /** + * Marks the given register as ready, meaning that its value has been + * calculated and written to the register file. + * @params ready_reg The index of the physical register that is now + * ready. + */ + void markAsReady(PhysRegIndex ready_reg); + + void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); + + void squash(vector freed_regs, + vector unmaps); + + int numFreeEntries(); + + private: + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The integer zero register. This implementation assumes it is always + * zero and never can be anything else. + */ + RegIndex intZeroReg; + + /** The floating point zero register. This implementation assumes it is + * always zero and never can be anything else. + */ + RegIndex floatZeroReg; + + class RenameEntry + { + public: + PhysRegIndex physical_reg; + bool valid; + + RenameEntry(); + }; + + /** Integer rename map. */ + RenameEntry *intRenameMap; + + /** Floating point rename map. */ + RenameEntry *floatRenameMap; + + /** Free list interface. */ + SimpleFreeList *freeList; + + /** Scoreboard of physical integer registers, saying whether or not they + * are ready. + */ + vector intScoreboard; + + /** Scoreboard of physical floating registers, saying whether or not they + * are ready. + */ + vector floatScoreboard; + + /** Scoreboard of miscellaneous registers, saying whether or not they + * are ready. + */ + vector miscScoreboard; +}; + +#endif //__RENAME_MAP_HH__ diff --git a/cpu/beta_cpu/rob.cc b/cpu/beta_cpu/rob.cc new file mode 100644 index 000000000..611cca0ba --- /dev/null +++ b/cpu/beta_cpu/rob.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/rob_impl.hh" + +// Force instantiation of InstructionQueue. +template ROB; diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh new file mode 100644 index 000000000..7963d1b01 --- /dev/null +++ b/cpu/beta_cpu/rob.hh @@ -0,0 +1,129 @@ +// Todo: Probably add in support for scheduling events (more than one as +// well) on the case of the ROB being empty or full. Considering tracking +// free entries instead of insts in ROB. Differentiate between squashing +// all instructions after the instruction, and all instructions after *and* +// including that instruction. + +#ifndef __ROB_HH__ +#define __ROB_HH__ + +#include +#include + +#include "arch/alpha/isa_traits.hh" + +using namespace std; + +/** + * ROB class. Uses the instruction list that exists within the CPU to + * represent the ROB. This class doesn't contain that structure, but instead + * a pointer to the CPU to get access to the structure. The ROB has a large + * hand in squashing instructions within the CPU, and is responsible for + * sending out the squash signal as well as what instruction is to be + * squashed. The ROB also controls most of the calls to the CPU to delete + * instructions; the only other call is made in the first stage of the pipe- + * line, which tells the CPU to delete all instructions not in the ROB. + */ +template +class ROB +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInst DynInst; + + typedef pair UnmapInfo; + typedef typename list::iterator InstIt; + + public: + /** ROB constructor. + * @params _numEntries Number of entries in ROB. + * @params _squashWidth Number of instructions that can be squashed in a + * single cycle. + */ + ROB(unsigned _numEntries, unsigned _squashWidth); + + /** Function to set the CPU pointer, necessary due to which object the ROB + * is created within. + * @params cpu_ptr Pointer to the implementation specific full CPU object. + */ + void setCPU(FullCPU *cpu_ptr); + + /** Function to insert an instruction into the ROB. The parameter inst is + * not truly required, but is useful for checking correctness. Note + * that whatever calls this function must ensure that there is enough + * space within the ROB for the new instruction. + * @params inst The instruction being inserted into the ROB. + * @todo Remove the parameter once correctness is ensured. + */ + void insertInst(DynInst *inst); + + /** Returns pointer to the head instruction within the ROB. There is + * no guarantee as to the return value if the ROB is empty. + * @retval Pointer to the DynInst that is at the head of the ROB. + */ + DynInst *readHeadInst() { return cpu->instList.front(); } + + DynInst *readTailInst() { return (*tail); } + + void retireHead(); + + bool isHeadReady(); + + unsigned numFreeEntries(); + + bool isFull() + { return numInstsInROB == numEntries; } + + bool isEmpty() + { return numInstsInROB == 0; } + + void doSquash(); + + void squash(InstSeqNum squash_num); + + uint64_t readHeadPC(); + + uint64_t readHeadNextPC(); + + InstSeqNum readHeadSeqNum(); + + uint64_t readTailPC(); + + InstSeqNum readTailSeqNum(); + + /** Checks if the ROB is still in the process of squashing instructions. + * @retval Whether or not the ROB is done squashing. + */ + bool isDoneSquashing() const { return doneSquashing; } + + /** This is more of a debugging function than anything. Use + * numInstsInROB to get the instructions in the ROB unless you are + * double checking that variable. + */ + int countInsts(); + + private: + + /** Pointer to the CPU. */ + FullCPU *cpu; + + unsigned numEntries; + + /** Number of instructions that can be squashed in a single cycle. */ + unsigned squashWidth; + + InstIt tail; + + InstIt squashIt; + + int numInstsInROB; + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Is the ROB done squashing. */ + bool doneSquashing; +}; + +#endif //__ROB_HH__ diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh new file mode 100644 index 000000000..308a8010f --- /dev/null +++ b/cpu/beta_cpu/rob_impl.hh @@ -0,0 +1,264 @@ +#ifndef __ROB_IMPL_HH__ +#define __ROB_IMPL_HH__ + +#include "cpu/beta_cpu/rob.hh" + +template +ROB::ROB(unsigned _numEntries, unsigned _squashWidth) + : numEntries(_numEntries), + squashWidth(_squashWidth), + numInstsInROB(0), + squashedSeqNum(0) +{ + doneSquashing = true; +} + +template +void +ROB::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + tail = cpu->instList.begin(); + + squashIt = cpu->instList.end(); +} + +template +int +ROB::countInsts() +{ +/* + int return_val = 0; + + // Iterate through the ROB from the head to the tail, counting the + // entries. + for (InstIt i = cpu->instList.begin(); i != tail; i++) + { + assert(i != cpu->instList.end()); + return_val++; + } + + return return_val; +*/ + // Because the head won't be tracked properly until the ROB gets the + // first instruction, and any time that the ROB is empty and has not + // yet gotten the instruction, this function doesn't work. + return numInstsInROB; +} + +template +void +ROB::insertInst(DynInst *inst) +{ + // Make sure we have the right number of instructions. + assert(numInstsInROB == countInsts()); + // Make sure the instruction is valid. + assert(inst); + + DPRINTF(ROB, "ROB: Adding inst PC %#x to the ROB.\n", inst->readPC()); + + // If the ROB is full then exit. + assert(numInstsInROB != numEntries); + + ++numInstsInROB; + + // Increment the tail iterator, moving it one instruction back. + // There is a special case if the ROB was empty prior to this insertion, + // in which case the tail will be pointing at instList.end(). If that + // happens, then reset the tail to the beginning of the list. + if (tail != cpu->instList.end()) { + tail++; + } else { + tail = cpu->instList.begin(); + } + + // Make sure the tail iterator is actually pointing at the instruction + // added. + assert((*tail) == inst); + + DPRINTF(ROB, "ROB: Now has %d instructions.\n", numInstsInROB); + +} + +// Whatever calls this function needs to ensure that it properly frees up +// registers prior to this function. +template +void +ROB::retireHead() +{ + assert(numInstsInROB == countInsts()); + + DynInst *head_inst; + + // Get the head ROB instruction. + head_inst = cpu->instList.front(); + + // Make certain this can retire. + assert(head_inst->readyToCommit()); + + DPRINTF(ROB, "ROB: Retiring head instruction of the ROB, " + "instruction PC %#x, seq num %i\n", head_inst->readPC(), + head_inst->seqNum); + + // Keep track of how many instructions are in the ROB. + --numInstsInROB; + + // Tell CPU to remove the instruction from the list of instructions. + // A special case is needed if the instruction being retired is the + // only instruction in the ROB; otherwise the tail iterator will become + // invalidated. + if (tail == cpu->instList.begin()) { + cpu->removeFrontInst(head_inst); + tail = cpu->instList.end(); + } else { + cpu->removeFrontInst(head_inst); + } +} + +template +bool +ROB::isHeadReady() +{ + if (numInstsInROB != 0) { + DynInst *head_inst = cpu->instList.front(); + + return head_inst->readyToCommit(); + } + + return false; +} + +template +unsigned +ROB::numFreeEntries() +{ + assert(numInstsInROB == countInsts()); + + return numEntries - numInstsInROB; +} + +template +void +ROB::doSquash() +{ + DPRINTF(ROB, "ROB: Squashing instructions.\n"); + + assert(squashIt != cpu->instList.end()); + + for (int numSquashed = 0; + numSquashed < squashWidth && (*squashIt)->seqNum != squashedSeqNum; + ++numSquashed) + { + // Ensure that the instruction is younger. + assert((*squashIt)->seqNum > squashedSeqNum); + + DPRINTF(ROB, "ROB: Squashing instruction PC %#x, seq num %i.\n", + (*squashIt)->readPC(), (*squashIt)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*squashIt)->setSquashed(); + + (*squashIt)->setCanCommit(); + +#ifndef FULL_SYSTEM + if (squashIt == cpu->instList.begin()) { + DPRINTF(ROB, "ROB: Reached head of instruction list while " + "squashing.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + + return; + } +#endif + + // Move the tail iterator to the next instruction. + squashIt--; + } + + + // Check if ROB is done squashing. + if ((*squashIt)->seqNum == squashedSeqNum) { + DPRINTF(ROB, "ROB: Done squashing instructions.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + } +} + +template +void +ROB::squash(InstSeqNum squash_num) +{ + DPRINTF(ROB, "ROB: Starting to squash within the ROB.\n"); + doneSquashing = false; + + squashedSeqNum = squash_num; + + assert(tail != cpu->instList.end()); + + squashIt = tail; + + doSquash(); +} + +template +uint64_t +ROB::readHeadPC() +{ + assert(numInstsInROB == countInsts()); + + DynInst *head_inst = cpu->instList.front(); + + return head_inst->readPC(); +} + +template +uint64_t +ROB::readHeadNextPC() +{ + assert(numInstsInROB == countInsts()); + + DynInst *head_inst = cpu->instList.front(); + + return head_inst->readNextPC(); +} + +template +InstSeqNum +ROB::readHeadSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + DynInst *head_inst = cpu->instList.front(); + + return head_inst->seqNum; +} + +template +uint64_t +ROB::readTailPC() +{ + assert(numInstsInROB == countInsts()); + + assert(tail != cpu->instList.end()); + + return (*tail)->readPC(); +} + +template +InstSeqNum +ROB::readTailSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + return (*tail)->seqNum; +} + +#endif // __ROB_IMPL_HH__ diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh index 0315ab7a9..7a707c86a 100644 --- a/cpu/static_inst.hh +++ b/cpu/static_inst.hh @@ -41,6 +41,7 @@ // forward declarations class ExecContext; +class AlphaDynInst; class DynInst; class FastCPU; class SimpleCPU; @@ -307,20 +308,7 @@ class StaticInst : public StaticInstBase delete cachedDisassembly; } - /** - * Execute this instruction under SimpleCPU model. - */ - virtual Fault execute(SimpleCPU *xc, Trace::InstRecord *traceData) = 0; - - /** - * Execute this instruction under FastCPU model. - */ - virtual Fault execute(FastCPU *xc, Trace::InstRecord *traceData) = 0; - - /** - * Execute this instruction under detailed FullCPU model. - */ - virtual Fault execute(DynInst *xc, Trace::InstRecord *traceData) = 0; +#include "static_inst_impl.hh" /** * Return the target address for a PC-relative branch.