From 04745696b6b523c5e90c335298099600d4a14a76 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 20 Aug 2004 14:54:07 -0400 Subject: [PATCH 1/6] Check in of new CPU. This checkin works under non-Fullsystem mode, with no caches. SConscript: Added new CPU files to build. arch/alpha/isa_desc: Changed rduniq and wruniq to be nonspeculative because the uniq register is not renamed. arch/isa_parser.py: Added new CPU exec method. base/statistics.hh: Minor change for namespace conflict. Probably can change back one the new CPU files are cleaned up. base/traceflags.py: Added new CPU trace flags. cpu/static_inst.hh: Changed static inst to use a file that defines the execute functions. --HG-- extra : convert_revision : bd4ce34361308280168324817fc1258dd253e519 --- SConscript | 15 + arch/alpha/isa_desc | 4 +- arch/isa_parser.py | 3 + base/statistics.hh | 2 +- base/timebuf.hh | 220 ++++++++ base/traceflags.py | 16 +- cpu/base_dyn_inst.cc | 399 ++++++++++++++ cpu/base_dyn_inst.hh | 617 +++++++++++++++++++++ cpu/beta_cpu/alpha_dyn_inst.cc | 102 ++++ cpu/beta_cpu/alpha_dyn_inst.hh | 86 +++ cpu/beta_cpu/alpha_full_cpu.cc | 911 ++++++++++++++++++++++++++++++++ cpu/beta_cpu/alpha_full_cpu.hh | 244 +++++++++ cpu/beta_cpu/alpha_impl.hh | 74 +++ cpu/beta_cpu/alpha_params.hh | 85 +++ cpu/beta_cpu/comm.hh | 110 ++++ cpu/beta_cpu/commit.cc | 6 + cpu/beta_cpu/commit.hh | 149 ++++++ cpu/beta_cpu/commit_impl.hh | 421 +++++++++++++++ cpu/beta_cpu/cpu_policy.hh | 32 ++ cpu/beta_cpu/decode.cc | 6 + cpu/beta_cpu/decode.hh | 129 +++++ cpu/beta_cpu/decode_impl.hh | 325 ++++++++++++ cpu/beta_cpu/fetch.cc | 7 + cpu/beta_cpu/fetch.hh | 160 ++++++ cpu/beta_cpu/fetch_impl.hh | 555 +++++++++++++++++++ cpu/beta_cpu/free_list.cc | 33 ++ cpu/beta_cpu/free_list.hh | 148 ++++++ cpu/beta_cpu/full_cpu.cc | 503 ++++++++++++++++++ cpu/beta_cpu/full_cpu.hh | 323 +++++++++++ cpu/beta_cpu/iew.cc | 8 + cpu/beta_cpu/iew.hh | 166 ++++++ cpu/beta_cpu/iew_impl.hh | 443 ++++++++++++++++ cpu/beta_cpu/inst_queue.cc | 7 + cpu/beta_cpu/inst_queue.hh | 243 +++++++++ cpu/beta_cpu/inst_queue_impl.hh | 684 ++++++++++++++++++++++++ cpu/beta_cpu/regfile.hh | 583 ++++++++++++++++++++ cpu/beta_cpu/rename.cc | 6 + cpu/beta_cpu/rename.hh | 184 +++++++ cpu/beta_cpu/rename_impl.hh | 593 +++++++++++++++++++++ cpu/beta_cpu/rename_map.cc | 289 ++++++++++ cpu/beta_cpu/rename_map.hh | 141 +++++ cpu/beta_cpu/rob.cc | 7 + cpu/beta_cpu/rob.hh | 129 +++++ cpu/beta_cpu/rob_impl.hh | 264 +++++++++ cpu/static_inst.hh | 16 +- 45 files changed, 9429 insertions(+), 19 deletions(-) create mode 100644 base/timebuf.hh create mode 100644 cpu/base_dyn_inst.cc create mode 100644 cpu/base_dyn_inst.hh create mode 100644 cpu/beta_cpu/alpha_dyn_inst.cc create mode 100644 cpu/beta_cpu/alpha_dyn_inst.hh create mode 100644 cpu/beta_cpu/alpha_full_cpu.cc create mode 100644 cpu/beta_cpu/alpha_full_cpu.hh create mode 100644 cpu/beta_cpu/alpha_impl.hh create mode 100644 cpu/beta_cpu/alpha_params.hh create mode 100644 cpu/beta_cpu/comm.hh create mode 100644 cpu/beta_cpu/commit.cc create mode 100644 cpu/beta_cpu/commit.hh create mode 100644 cpu/beta_cpu/commit_impl.hh create mode 100644 cpu/beta_cpu/cpu_policy.hh create mode 100644 cpu/beta_cpu/decode.cc create mode 100644 cpu/beta_cpu/decode.hh create mode 100644 cpu/beta_cpu/decode_impl.hh create mode 100644 cpu/beta_cpu/fetch.cc create mode 100644 cpu/beta_cpu/fetch.hh create mode 100644 cpu/beta_cpu/fetch_impl.hh create mode 100644 cpu/beta_cpu/free_list.cc create mode 100644 cpu/beta_cpu/free_list.hh create mode 100644 cpu/beta_cpu/full_cpu.cc create mode 100644 cpu/beta_cpu/full_cpu.hh create mode 100644 cpu/beta_cpu/iew.cc create mode 100644 cpu/beta_cpu/iew.hh create mode 100644 cpu/beta_cpu/iew_impl.hh create mode 100644 cpu/beta_cpu/inst_queue.cc create mode 100644 cpu/beta_cpu/inst_queue.hh create mode 100644 cpu/beta_cpu/inst_queue_impl.hh create mode 100644 cpu/beta_cpu/regfile.hh create mode 100644 cpu/beta_cpu/rename.cc create mode 100644 cpu/beta_cpu/rename.hh create mode 100644 cpu/beta_cpu/rename_impl.hh create mode 100644 cpu/beta_cpu/rename_map.cc create mode 100644 cpu/beta_cpu/rename_map.hh create mode 100644 cpu/beta_cpu/rob.cc create mode 100644 cpu/beta_cpu/rob.hh create mode 100644 cpu/beta_cpu/rob_impl.hh diff --git a/SConscript b/SConscript index 10722007a..07cdcfdee 100644 --- a/SConscript +++ b/SConscript @@ -44,6 +44,7 @@ Import('env') # Base sources used by all configurations. base_sources = Split(''' arch/alpha/decoder.cc + arch/alpha/alpha_full_cpu_exec.cc arch/alpha/fast_cpu_exec.cc arch/alpha/simple_cpu_exec.cc arch/alpha/full_cpu_exec.cc @@ -85,10 +86,23 @@ base_sources = Split(''' base/stats/text.cc cpu/base_cpu.cc + cpu/base_dyn_inst.cc cpu/exec_context.cc cpu/exetrace.cc cpu/pc_event.cc cpu/static_inst.cc + cpu/beta_cpu/alpha_dyn_inst.cc + cpu/beta_cpu/alpha_full_cpu.cc + cpu/beta_cpu/commit.cc + cpu/beta_cpu/decode.cc + cpu/beta_cpu/fetch.cc + cpu/beta_cpu/free_list.cc + cpu/beta_cpu/full_cpu.cc + cpu/beta_cpu/iew.cc + cpu/beta_cpu/inst_queue.cc + cpu/beta_cpu/rename.cc + cpu/beta_cpu/rename_map.cc + cpu/beta_cpu/rob.cc cpu/fast_cpu/fast_cpu.cc cpu/full_cpu/bpred.cc cpu/full_cpu/commit.cc @@ -395,6 +409,7 @@ env.Command(Split('base/traceflags.hh base/traceflags.cc'), # several files are generated from arch/$TARGET_ISA/isa_desc. env.Command(Split('''arch/alpha/decoder.cc arch/alpha/decoder.hh + arch/alpha/alpha_full_cpu_exec.cc arch/alpha/fast_cpu_exec.cc arch/alpha/simple_cpu_exec.cc arch/alpha/full_cpu_exec.cc'''), diff --git a/arch/alpha/isa_desc b/arch/alpha/isa_desc index d6b99a8ae..eaf3aa379 100644 --- a/arch/alpha/isa_desc +++ b/arch/alpha/isa_desc @@ -2482,9 +2482,9 @@ decode OPCODE default Unknown::unknown() { xc->syscall(); }}, IsNonSpeculative); // Read uniq reg into ABI return value register (r0) - 0x9e: rduniq({{ R0 = Runiq; }}); + 0x9e: rduniq({{ R0 = Runiq; }}, IsNonSpeculative); // Write uniq reg with value from ABI arg register (r16) - 0x9f: wruniq({{ Runiq = R16; }}); + 0x9f: wruniq({{ Runiq = R16; }}, IsNonSpeculative); } } #endif diff --git a/arch/isa_parser.py b/arch/isa_parser.py index 011ce7623..f7278628b 100755 --- a/arch/isa_parser.py +++ b/arch/isa_parser.py @@ -636,6 +636,9 @@ CpuModel('FastCPU', 'fast_cpu_exec.cc', CpuModel('FullCPU', 'full_cpu_exec.cc', '#include "cpu/full_cpu/dyn_inst.hh"', { 'CPU_exec_context': 'DynInst' }) +CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc', + '#include "cpu/beta_cpu/alpha_dyn_inst.hh"', + { 'CPU_exec_context': 'AlphaDynInst' }) # Expand template with CPU-specific references into a dictionary with # an entry for each CPU model name. The entry key is the model name diff --git a/base/statistics.hh b/base/statistics.hh index f3b8a3922..a0be64ce5 100644 --- a/base/statistics.hh +++ b/base/statistics.hh @@ -407,7 +407,7 @@ class Wrap : public Child public: Wrap() { - map(new Data(*this)); + this->map(new Data(*this)); } /** diff --git a/base/timebuf.hh b/base/timebuf.hh new file mode 100644 index 000000000..ea538212e --- /dev/null +++ b/base/timebuf.hh @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2004 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BASE_TIMEBUF_HH__ +#define __BASE_TIMEBUF_HH__ + +#include + +using namespace std; + +template +class TimeBuffer +{ + protected: + int past; + int future; + int size; + + char *data; + vector index; + int base; + + void valid(int idx) + { + assert (idx >= -past && idx <= future); + } + + public: + friend class wire; + class wire + { + friend class TimeBuffer; + protected: + TimeBuffer *buffer; + int index; + + void set(int idx) + { + buffer->valid(idx); + index = idx; + } + + wire(TimeBuffer *buf, int i) + : buffer(buf), index(i) + { } + + public: + wire() + { } + + wire(const wire &i) + : buffer(i.buffer), index(i.index) + { } + + const wire &operator=(const wire &i) + { + buffer = i.buffer; + set(i.index); + return *this; + } + + const wire &operator=(int idx) + { + set(idx); + return *this; + } + + const wire &operator+=(int offset) + { + set(index + offset); + return *this; + } + + const wire &operator-=(int offset) + { + set(index - offset); + return *this; + } + + wire &operator++() + { + set(index + 1); + return *this; + } + + wire &operator++(int) + { + int i = index; + set(index + 1); + return wire(this, i); + } + + wire &operator--() + { + set(index - 1); + return *this; + } + + wire &operator--(int) + { + int i = index; + set(index - 1); + return wire(this, i); + } + T &operator*() const { return *buffer->access(index); } + T *operator->() const { return buffer->access(index); } + }; + + + public: + TimeBuffer(int p, int f) + : past(p), future(f), size(past + future + 1), + data(new char[size * sizeof(T)]), index(size), base(0) + { + assert(past >= 0 && future >= 0); + char *ptr = data; + for (int i = 0; i < size; i++) { + index[i] = ptr; + memset(ptr, 0, sizeof(T)); + new (ptr) T; + ptr += sizeof(T); + } + } + + TimeBuffer() + : data(NULL) + { + } + + ~TimeBuffer() + { + for (int i = 0; i < size; ++i) + (reinterpret_cast(index[i]))->~T(); + delete [] data; + } + + void + advance() + { + if (++base >= size) + base = 0; + + int ptr = base + future; + if (ptr >= size) + ptr -= size; + (reinterpret_cast(index[ptr]))->~T(); + memset(index[ptr], 0, sizeof(T)); + new (index[ptr]) T; + } + + T *access(int idx) + { + //Need more complex math here to calculate index. + valid(idx); + + int vector_index = idx + base; + if (vector_index >= size) { + vector_index -= size; + } else if (vector_index < 0) { + vector_index += size; + } + + return reinterpret_cast(index[vector_index]); + } + + T &operator[](int idx) + { + //Need more complex math here to calculate index. + valid(idx); + + int vector_index = idx + base; + if (vector_index >= size) { + vector_index -= size; + } else if (vector_index < 0) { + vector_index += size; + } + + return reinterpret_cast(*index[vector_index]); + } + + wire getWire(int idx) + { + valid(idx); + + return wire(this, idx); + } + + wire zero() + { + return wire(this, 0); + } +}; + +#endif // __BASE_TIMEBUF_HH__ + diff --git a/base/traceflags.py b/base/traceflags.py index 4be61d7ee..8b4208660 100644 --- a/base/traceflags.py +++ b/base/traceflags.py @@ -122,7 +122,18 @@ baseFlags = [ 'Tsunami', 'Uart', 'Split', - 'SQL' + 'SQL', + 'Fetch', + 'Decode', + 'Rename', + 'IEW', + 'Commit', + 'IQ', + 'ROB', + 'FreeList', + 'RenameMap', + 'DynInst', + 'FullCPU' ] # @@ -138,7 +149,8 @@ compoundFlagMap = { 'ScsiAll' : [ 'ScsiDisk', 'ScsiCtrl', 'ScsiNone' ], 'DiskImageAll' : [ 'DiskImage', 'DiskImageRead', 'DiskImageWrite' ], 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], - 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ] + 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], + 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'DynInst', 'FullCPU'] } ############################################################# diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc new file mode 100644 index 000000000..bd681e1dc --- /dev/null +++ b/cpu/base_dyn_inst.cc @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2001-2004 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BASE_DYN_INST_CC__ +#define __BASE_DYN_INST_CC__ + +#include +#include +#include + +#include "base/cprintf.hh" + +#include "arch/alpha/faults.hh" +#include "cpu/exetrace.hh" +#include "mem/mem_req.hh" + +#include "cpu/base_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" + +using namespace std; + +#define NOHASH +#ifndef NOHASH + +#include "base/hashmap.hh" + +unsigned int MyHashFunc(const BaseDynInst *addr) +{ + unsigned a = (unsigned)addr; + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; +} + +typedef m5::hash_map my_hash_t; +my_hash_t thishash; +#endif + +/** This may need to be specific to an implementation. */ +//int BaseDynInst::instcount = 0; + +//int break_inst = -1; + +template +BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, + Addr pred_PC, InstSeqNum seq_num, + FullCPU *cpu) + : staticInst(machInst), traceData(NULL), cpu(cpu), xc(cpu->xcBase()) +{ + effAddr = MemReq::inval_addr; + physEffAddr = MemReq::inval_addr; + + readyRegs = 0; + + seqNum = seq_num; + + specMemWrite = false; + + canIssue = false; + issued = false; + executed = false; + canCommit = false; + squashed = false; + squashedInIQ = false; + + blockingInst = false; + recoverInst = false; + specMode = false; + btbMissed = false; + // Eventually make this a parameter. + threadNumber = 0; + // Also make this a parameter. + specMode = true; + // Also make this a parameter, or perhaps get it from xc or cpu. + asid = 0; + + // Initialize the fault to be unimplemented opcode. + fault = Unimplemented_Opcode_Fault; + + PC = inst_PC; + nextPC = PC + sizeof(MachInst); + predPC = pred_PC; + + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = staticInst->destRegIdx(i); + } + + for (int i = 0; i < staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = staticInst->srcRegIdx(i); + _readySrcRegIdx[i] = 0; + } + + ++instcount; + + DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", + instcount); +} + +template +BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) + : staticInst(_staticInst), traceData(NULL) +{ + effAddr = MemReq::inval_addr; + physEffAddr = MemReq::inval_addr; + + specMemWrite = false; + + blockingInst = false; + recoverInst = false; + specMode = false; + btbMissed = false; + + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = staticInst->destRegIdx(i); + } + + for (int i = 0; i < staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = staticInst->srcRegIdx(i); + } +} + +template +BaseDynInst::~BaseDynInst() +{ +/* + if (specMemWrite) { + // Remove effects of this instruction from speculative memory + xc->spec_mem->erase(effAddr); + } +*/ + --instcount; + DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n", + instcount); +} + +template +FunctionalMemory * +BaseDynInst::getMemory(void) +{ + return xc->mem; +} +/* +template +IntReg * +BaseDynInst::getIntegerRegs(void) +{ + return (spec_mode ? xc->specIntRegFile : xc->regs.intRegFile); +} +*/ +template +void +BaseDynInst::prefetch(Addr addr, unsigned flags) +{ + // This is the "functional" implementation of prefetch. Not much + // happens here since prefetches don't affect the architectural + // state. + + // Generate a MemReq so we can translate the effective address. + MemReqPtr req = new MemReq(addr, xc, 1, flags); + req->asid = asid; + + // Prefetches never cause faults. + fault = No_Fault; + + // note this is a local, not BaseDynInst::fault + Fault trans_fault = xc->translateDataReadReq(req); + + if (trans_fault == No_Fault && !(req->flags & UNCACHEABLE)) { + // It's a valid address to cacheable space. Record key MemReq + // parameters so we can generate another one just like it for + // the timing access without calling translate() again (which + // might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // Bogus address (invalid or uncacheable space). Mark it by + // setting the eff_addr to InvalidAddr. + effAddr = physEffAddr = MemReq::inval_addr; + } + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#ifndef FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (traceData) { + traceData->setAddr(addr); + } +} + +template +void +BaseDynInst::writeHint(Addr addr, int size, unsigned flags) +{ + // Need to create a MemReq here so we can do a translation. This + // will casue a TLB miss trap if necessary... not sure whether + // that's the best thing to do or not. We don't really need the + // MemReq otherwise, since wh64 has no functional effect. + MemReqPtr req = new MemReq(addr, xc, size, flags); + req->asid = asid; + + fault = xc->translateDataWriteReq(req); + + if (fault == No_Fault && !(req->flags & UNCACHEABLE)) { + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // ignore faults & accesses to uncacheable space... treat as no-op + effAddr = physEffAddr = MemReq::inval_addr; + } + + storeSize = size; + storeData = 0; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template +Fault +BaseDynInst::copySrcTranslate(Addr src) +{ + MemReqPtr req = new MemReq(src, xc, 64); + req->asid = asid; + + // translate to physical address + Fault fault = xc->translateDataReadReq(req); + + if (fault == No_Fault) { + xc->copySrcAddr = src; + xc->copySrcPhysAddr = req->paddr; + } else { + xc->copySrcAddr = 0; + xc->copySrcPhysAddr = 0; + } + return fault; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template +Fault +BaseDynInst::copy(Addr dest) +{ + uint8_t data[64]; + FunctionalMemory *mem = xc->mem; + assert(xc->copySrcPhysAddr || xc->misspeculating()); + MemReqPtr req = new MemReq(dest, xc, 64); + req->asid = asid; + + // translate to physical address + Fault fault = xc->translateDataWriteReq(req); + + if (fault == No_Fault) { + Addr dest_addr = req->paddr; + // Need to read straight from memory since we have more than 8 bytes. + req->paddr = xc->copySrcPhysAddr; + mem->read(req, data); + req->paddr = dest_addr; + mem->write(req, data); + } + return fault; +} + +template +void +BaseDynInst::dump() +{ + cprintf("T%d : %#08d `", threadNumber, PC); + cout << staticInst->disassemble(PC); + cprintf("'\n"); +} + +template +void +BaseDynInst::dump(std::string &outstring) +{ + std::ostringstream s; + s << "T" << threadNumber << " : 0x" << PC << " " + << staticInst->disassemble(PC); + + outstring = s.str(); +} + + +#if 0 +template +Fault +BaseDynInst::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) +{ + Fault fault; + + // check alignments, even speculative this test should always pass + if ((nbytes & nbytes - 1) != 0 || (addr & nbytes - 1) != 0) { + for (int i = 0; i < nbytes; i++) + ((char *) p)[i] = 0; + + // I added the following because according to the comment above, + // we should never get here. The comment lies +#if 0 + panic("unaligned access. Cycle = %n", curTick); +#endif + return No_Fault; + } + + MemReqPtr req = new MemReq(addr, thread, nbytes); + switch(cmd) { + case Read: + fault = spec_mem->read(req, (uint8_t *)p); + break; + + case Write: + fault = spec_mem->write(req, (uint8_t *)p); + if (fault != No_Fault) + break; + + specMemWrite = true; + storeSize = nbytes; + switch(nbytes) { + case sizeof(uint8_t): + *(uint8_t)&storeData = (uint8_t *)p; + break; + case sizeof(uint16_t): + *(uint16_t)&storeData = (uint16_t *)p; + break; + case sizeof(uint32_t): + *(uint32_t)&storeData = (uint32_t *)p; + break; + case sizeof(uint64_t): + *(uint64_t)&storeData = (uint64_t *)p; + break; + } + break; + + default: + fault = Machine_Check_Fault; + break; + } + + trace_mem(fault, cmd, addr, p, nbytes); + + return fault; +} + +#endif + +int +BaseDynInst::instcount = 0; + +// Forward declaration... +template BaseDynInst; + +#endif // __BASE_DYN_INST_CC__ diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh new file mode 100644 index 000000000..7651b517e --- /dev/null +++ b/cpu/base_dyn_inst.hh @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2001-2004 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BASE_DYN_INST_HH__ +#define __BASE_DYN_INST_HH__ + +#include +#include + +#include "base/fast_alloc.hh" +#include "base/trace.hh" + +#include "cpu/static_inst.hh" +#include "cpu/beta_cpu/comm.hh" +#include "cpu/full_cpu/bpred_update.hh" +#include "mem/functional_mem/main_memory.hh" +#include "cpu/full_cpu/spec_memory.hh" +#include "cpu/inst_seq.hh" +#include "cpu/full_cpu/op_class.hh" +#include "cpu/full_cpu/spec_state.hh" + +/** + * @file + * Defines a dynamic instruction context. + */ + +namespace Trace { + class InstRecord; +}; + +class BaseInst +{ +}; + +template +class BaseDynInst : public FastAlloc +{ + public: + // Typedef for the CPU. + typedef typename Impl::FullCPU FullCPU; + + //Typedef to get the ISA. + typedef typename Impl::ISA ISA; + + /// Binary machine instruction type. + typedef typename ISA::MachInst MachInst; + /// Memory address type. + typedef typename ISA::Addr Addr; + /// Logical register index type. + typedef typename ISA::RegIndex RegIndex; + /// Integer register index type. + typedef typename ISA::IntReg IntReg; + + enum { + MaxInstSrcRegs = ISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs + }; + + StaticInstPtr staticInst; + + //////////////////////////////////////////// + // + // INSTRUCTION EXECUTION + // + //////////////////////////////////////////// + Trace::InstRecord *traceData; + +// void setCPSeq(InstSeqNum seq); + + template + Fault read(Addr addr, T &data, unsigned flags); + + template + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res); + + + IntReg *getIntegerRegs(void); + FunctionalMemory *getMemory(void); + + void prefetch(Addr addr, unsigned flags); + void writeHint(Addr addr, int size, unsigned flags); + Fault copySrcTranslate(Addr src); + Fault copy(Addr dest); + + public: + /** Is this instruction valid. */ + bool valid; + + /** The sequence number of the instruction. */ + InstSeqNum seqNum; + + /** How many source registers are ready. */ + unsigned readyRegs; + + /** Can this instruction issue. */ + bool canIssue; + + /** Has this instruction issued. */ + bool issued; + + /** Has this instruction executed (or made it through execute) yet. */ + bool executed; + + /** Can this instruction commit. */ + bool canCommit; + + /** Is this instruction squashed. */ + bool squashed; + + /** Is this instruction squashed in the instruction queue. */ + bool squashedInIQ; + + /** Is this a recover instruction. */ + bool recoverInst; + + /** Is this a thread blocking instruction. */ + bool blockingInst; /* this inst has called thread_block() */ + + /** Is this a thread syncrhonization instruction. */ + bool threadsyncWait; + + /** If the BTB missed. */ + bool btbMissed; + + /** The thread this instruction is from. */ + short threadNumber; + + /** If instruction is speculative. */ + short specMode; + + /** data address space ID, for loads & stores. */ + short asid; + + /** Pointer to the FullCPU object. */ + FullCPU *cpu; + + /** Pointer to the exec context. Will not exist in the final version. */ + ExecContext *xc; + + /** The kind of fault this instruction has generated. */ + Fault fault; + + /** The effective virtual address (lds & stores only). */ + Addr effAddr; + + /** The effective physical address. */ + Addr physEffAddr; + + /** Effective virtual address for a copy source. */ + Addr copySrcEffAddr; + + /** Effective physical address for a copy source. */ + Addr copySrcPhysEffAddr; + + /** The memory request flags (from translation). */ + unsigned memReqFlags; + + /** The size of the data to be stored. */ + int storeSize; + + /** The data to be stored. */ + IntReg storeData; + + /** Result of this instruction, if an integer. */ + uint64_t intResult; + + /** Result of this instruction, if a float. */ + float floatResult; + + /** Result of this instruction, if a double. */ + double doubleResult; + + /** PC of this instruction. */ + Addr PC; + + /** Next non-speculative PC. It is not filled in at fetch, but rather + * once the target of the branch is truly known (either decode or + * execute). + */ + Addr nextPC; + + /** Predicted next PC. */ + Addr predPC; + + /** Count of total number of dynamic instructions. */ + static int instcount; + + /** Did this instruction do a spec write? */ + bool specMemWrite; + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Whether or not the source register is ready. */ + bool _readySrcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + /** BaseDynInst constructor given a binary instruction. */ + BaseDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + BaseDynInst(StaticInstPtr &_staticInst); + + /** BaseDynInst destructor. */ + ~BaseDynInst(); + +#if 0 + Fault + mem_access(MemCmd cmd, // Read or Write access cmd + Addr addr, // virtual address of access + void *p, // input/output buffer + int nbytes); // access size +#endif + + void + trace_mem(Fault fault, // last fault + MemCmd cmd, // last command + Addr addr, // virtual address of access + void *p, // memory accessed + int nbytes); // access size + + /** Dumps out contents of this BaseDynInst. */ + void dump(); + + /** Dumps out contents of this BaseDynInst into given string. */ + void dump(std::string &outstring); + + /** Returns the fault type. */ + Fault getFault() { return fault; } + + /** Checks whether or not this instruction has had its branch target + * calculated yet. For now it is not utilized and is hacked to be + * always false. + */ + bool doneTargCalc() { return false; } + + /** Returns the calculated target of the branch. */ + Addr readCalcTarg() { return nextPC; } + + Addr readNextPC() { return nextPC; } + + /** Set the predicted target of this current instruction. */ + void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } + + /** Returns the predicted target of the branch. */ + Addr readPredTarg() { return predPC; } + + /** Returns whether the instruction was predicted taken or not. */ + bool predTaken() { +// DPRINTF(FullCPU, "PC: %08p\n", PC); +// DPRINTF(FullCPU, "predPC: %08p\n", predPC); + + return( predPC != (PC + sizeof(MachInst) ) ); + } + + /** Returns whether the instruction mispredicted. */ + bool mispredicted() { return (predPC != nextPC); } + + // + // Instruction types. Forward checks to StaticInst object. + // + bool isNop() const { return staticInst->isNop(); } + bool isMemRef() const { return staticInst->isMemRef(); } + bool isLoad() const { return staticInst->isLoad(); } + bool isStore() const { return staticInst->isStore(); } + bool isInstPrefetch() const { return staticInst->isInstPrefetch(); } + bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } + bool isCopy() const { return staticInst->isCopy(); } + bool isInteger() const { return staticInst->isInteger(); } + bool isFloating() const { return staticInst->isFloating(); } + bool isControl() const { return staticInst->isControl(); } + bool isCall() const { return staticInst->isCall(); } + bool isReturn() const { return staticInst->isReturn(); } + bool isDirectCtrl() const { return staticInst->isDirectCtrl(); } + bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } + bool isCondCtrl() const { return staticInst->isCondCtrl(); } + bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isThreadSync() const { return staticInst->isThreadSync(); } + bool isSerializing() const { return staticInst->isSerializing(); } + bool isMemBarrier() const { return staticInst->isMemBarrier(); } + bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } + bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } + + int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } + int8_t numDestRegs() const { return staticInst->numDestRegs(); } + + // the following are used to track physical register usage + // for machines with separate int & FP reg files + int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } + int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } + + /** Returns the logical register index of the i'th destination register. */ + RegIndex destRegIdx(int i) const + { + return staticInst->destRegIdx(i); + } + + /** Returns the logical register index of the i'th source register. */ + RegIndex srcRegIdx(int i) const + { + return staticInst->srcRegIdx(i); + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + bool isReadySrcRegIdx(int idx) const + { + return _readySrcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + //Push to .cc file. + /** Records that one of the source registers is ready. */ + void markSrcRegReady() + { + ++readyRegs; + if(readyRegs == numSrcRegs()) { + canIssue = true; + } + } + + void markSrcRegReady(RegIndex src_idx) + { + ++readyRegs; + + _readySrcRegIdx[src_idx] = 1; + + if(readyRegs == numSrcRegs()) { + canIssue = true; + } + } + + /** Sets this instruction as ready to issue. */ + void setCanIssue() { canIssue = true; } + + /** Returns whether or not this instruction is ready to issue. */ + bool readyToIssue() const { return canIssue; } + + /** Sets this instruction as issued from the IQ. */ + void setIssued() { issued = true; } + + /** Returns whether or not this instruction has issued. */ + bool isIssued() { return issued; } + + /** Sets this instruction as executed. */ + void setExecuted() { executed = true; } + + /** Returns whether or not this instruction has executed. */ + bool isExecuted() { return executed; } + + /** Sets this instruction as ready to commit. */ + void setCanCommit() { canCommit = true; } + + /** Returns whether or not this instruction is ready to commit. */ + bool readyToCommit() const { return canCommit; } + + /** Sets this instruction as squashed. */ + void setSquashed() { squashed = true; } + + /** Returns whether or not this instruction is squashed. */ + bool isSquashed() const { return squashed; } + + /** Sets this instruction as squashed in the IQ. */ + void setSquashedInIQ() { squashedInIQ = true; } + + /** Returns whether or not this instruction is squashed in the IQ. */ + bool isSquashedInIQ() { return squashedInIQ; } + + /** Returns the opclass of this instruction. */ + OpClass opClass() const { return staticInst->opClass(); } + + /** Returns whether or not the BTB missed. */ + bool btbMiss() const { return btbMissed; } + + /** Returns the branch target address. */ + Addr branchTarget() const { return staticInst->branchTarget(PC); } + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(StaticInst *si, int idx) + { + return cpu->readIntReg(_srcRegIdx[idx]); + } + + float readFloatRegSingle(StaticInst *si, int idx) + { + return cpu->readFloatRegSingle(_srcRegIdx[idx]); + } + + double readFloatRegDouble(StaticInst *si, int idx) + { + return cpu->readFloatRegDouble(_srcRegIdx[idx]); + } + + uint64_t readFloatRegInt(StaticInst *si, int idx) + { + return cpu->readFloatRegInt(_srcRegIdx[idx]); + } + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(StaticInst *si, int idx, uint64_t val) + { + cpu->setIntReg(_destRegIdx[idx], val); + intResult = val; + } + + void setFloatRegSingle(StaticInst *si, int idx, float val) + { + cpu->setFloatRegSingle(_destRegIdx[idx], val); + floatResult = val; + } + + void setFloatRegDouble(StaticInst *si, int idx, double val) + { + cpu->setFloatRegDouble(_destRegIdx[idx], val); + doubleResult = val; + } + + void setFloatRegInt(StaticInst *si, int idx, uint64_t val) + { + cpu->setFloatRegInt(_destRegIdx[idx], val); + intResult = val; + } + + /** Read the PC of this instruction. */ + Addr readPC() { return PC; } + + /** Set the next PC of this instruction (its actual target). */ + void setNextPC(uint64_t val) { nextPC = val; } + +// bool misspeculating() { return cpu->misspeculating(); } + ExecContext *xcBase() { return xc; } +}; + +template +template +inline Fault +BaseDynInst::read(Addr addr, T &data, unsigned flags) +{ + MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); + req->asid = asid; + + fault = cpu->translateDataReadReq(req); + + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#ifndef FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (fault == No_Fault) { + fault = cpu->read(req, data); + } + else { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + data = (T)-1; + } + + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + return fault; +} + +template +template +inline Fault +BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + storeSize = sizeof(T); + storeData = data; + if (specMode) + specMemWrite = true; + + MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); + + req->asid = asid; + + fault = cpu->translateDataWriteReq(req); + + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#ifndef FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (fault == No_Fault) { + fault = cpu->write(req, data); + } + + if (res) { + // always return some result to keep misspeculated paths + // (which will ignore faults) deterministic + *res = (fault == No_Fault) ? req->result : 0; + } + + return fault; +} + +#endif // __DYN_INST_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.cc b/cpu/beta_cpu/alpha_dyn_inst.cc new file mode 100644 index 000000000..a79d3082c --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst.cc @@ -0,0 +1,102 @@ +#ifndef __ALPHA_DYN_INST_CC__ +#define __ALPHA_DYN_INST_CC__ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +// Force instantiation of BaseDynInst +template BaseDynInst; + +AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +{ + // Initialize these to illegal values. + robIdx = -1; + iqIdx = -1; +} + +AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) + : BaseDynInst(_staticInst) +{ +} + +uint64_t +AlphaDynInst::readUniq() +{ + return cpu->readUniq(); +} + +void +AlphaDynInst::setUniq(uint64_t val) +{ + cpu->setUniq(val); +} + +uint64_t +AlphaDynInst::readFpcr() +{ + return cpu->readFpcr(); +} + +void +AlphaDynInst::setFpcr(uint64_t val) +{ + cpu->setFpcr(val); +} + +#ifdef FULL_SYSTEM +uint64_t +AlphaDynInst::readIpr(int idx, Fault &fault) +{ + return cpu->readIpr(idx, fault); +} +Fault +AlphaDynInst::setIpr(int idx, uint64_t val) +{ + return cpu->setIpr(idx, val); +} + +Fault +AlphaDynInst::hwrei() +{ + return cpu->hwrei(); +} + +int +AlphaDynInst::readIntrFlag() +{ +return cpu->readIntrFlag(); +} + +void +AlphaDynInst::setIntrFlag(int val) +{ + cpu->setIntrFlag(val); +} + +bool +AlphaDynInst::inPalMode() +{ + return cpu->inPalMode(); +} + +void +AlphaDynInst::trap(Fault fault) +{ + cpu->trap(fault); +} + +bool +AlphaDynInst::simPalCheck(int palFunc) +{ + return cpu->simPalCheck(palFunc); +} +#else +void +AlphaDynInst::syscall() +{ + cpu->syscall(); +} +#endif + +#endif // __ALPHA_DYN_INST_CC__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh new file mode 100644 index 000000000..69d145355 --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -0,0 +1,86 @@ +//Todo: + +#ifndef __ALPHA_DYN_INST_HH__ +#define __ALPHA_DYN_INST_HH__ + +#include "cpu/base_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/inst_seq.hh" + +using namespace std; + +class AlphaDynInst : public BaseDynInst +{ + public: + /** BaseDynInst constructor given a binary instruction. */ + AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + AlphaDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction. */ + Fault execute() + { + fault = staticInst->execute(this, traceData); + return fault; + } + + /** Location of this instruction within the ROB. Might be somewhat + * implementation specific. + * Might not want this data in the inst as it may be deleted prior to + * execution of the stage that needs it. + */ + int robIdx; + + int getROBEntry() + { + return robIdx; + } + + void setROBEntry(int rob_idx) + { + robIdx = rob_idx; + } + + /** Location of this instruction within the IQ. Might be somewhat + * implementation specific. + * Might not want this data in the inst as it may be deleted prior to + * execution of the stage that needs it. + */ + int iqIdx; + + int getIQEntry() + { + return iqIdx; + } + + void setIQEntry(int iq_idx) + { + iqIdx = iq_idx; + } + + uint64_t readUniq(); + void setUniq(uint64_t val); + + uint64_t readFpcr(); + void setFpcr(uint64_t val); + +#ifdef FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + Fault hwrei(); + int readIntrFlag(); + void setIntrFlag(int val); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + void syscall(); +#endif + +}; + +#endif // __ALPHA_DYN_INST_HH__ + diff --git a/cpu/beta_cpu/alpha_full_cpu.cc b/cpu/beta_cpu/alpha_full_cpu.cc new file mode 100644 index 000000000..880418146 --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu.cc @@ -0,0 +1,911 @@ + +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/full_cpu/dd_queue.hh" +#include "cpu/full_cpu/full_cpu.hh" +#include "cpu/full_cpu/rob_station.hh" +#include "mem/cache/cache.hh" // for dynamic cast +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/alpha_params.hh" +#include "cpu/beta_cpu/comm.hh" + +AlphaFullCPU::AlphaFullCPU(Params ¶ms) + : FullBetaCPU(params) +{ + + fetch.setCPU(this); + decode.setCPU(this); + rename.setCPU(this); + iew.setCPU(this); + commit.setCPU(this); + + rob.setCPU(this); +} + +#ifndef FULL_SYSTEM + +void +AlphaFullCPU::syscall() +{ + DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); + + squashStages(); + + // Copy over all important state to xc once all the unrolling is done. + copyToXC(); + + process->syscall(xc); + + // Copy over all important state back to normal. + copyFromXC(); +} + +// This is not a pretty function, and should only be used if it is necessary +// to fake having everything squash all at once (ie for non-full system +// syscalls). +void +AlphaFullCPU::squashStages() +{ + InstSeqNum rob_head = rob.readHeadSeqNum(); + + // Now hack the time buffer to put this sequence number in the places + // where the stages might read it. + for (int i = 0; i < 10; ++i) + { + timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + } + + fetch.squash(rob.readHeadNextPC()); + fetchQueue.advance(); + + decode.squash(); + decodeQueue.advance(); + + rename.squash(); + renameQueue.advance(); + renameQueue.advance(); + + iew.squash(); + iewQueue.advance(); + iewQueue.advance(); + + rob.squash(rob_head); + commit.setSquashing(); +} + +#endif // FULL_SYSTEM + +void +AlphaFullCPU::copyToXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg]; + DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg]); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d; + xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q; + } + + xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; + xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; + xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; + xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; + + xc->regs.pc = rob.readHeadPC(); + xc->regs.npc = xc->regs.pc+4; + + xc->func_exe_inst = funcExeInst; +} + +// This function will probably mess things up unless the ROB is empty and +// there are no instructions in the pipeline. +void +AlphaFullCPU::copyFromXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + + DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg], + xc->regs.intRegFile[i]); + + regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i]; + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i]; + regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ; + } + + // Then loop through the misc registers. + regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + + // Then finally set the PC and the next PC. +// regFile.pc = xc->regs.pc; +// regFile.npc = xc->regs.npc; + + funcExeInst = xc->func_exe_inst; +} + +#ifdef FULL_SYSTEM + +uint64_t * +AlphaFullCPU::getIpr() +{ + return regs.ipr; +} + +uint64_t +AlphaFullCPU::readIpr(int idx, Fault &fault) +{ + uint64_t *ipr = getIpr(); + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PALtemp23: + case AlphaISA::IPR_PAL_BASE: + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + case AlphaISA::IPR_ISR: + case AlphaISA::IPR_EXC_ADDR: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_MCSR: + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + case AlphaISA::IPR_SIRR: + case AlphaISA::IPR_ICSR: + case AlphaISA::IPR_ICM: + case AlphaISA::IPR_DTB_CM: + case AlphaISA::IPR_IPLR: + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case AlphaISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; + + case AlphaISA::IPR_VA: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_VA_FORM: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_IFAULT_VA_FORM: + case AlphaISA::IPR_EXC_MASK: + case AlphaISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_DTB_PTE: + { + AlphaISA::PTE &pte = dtb->index(!misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + case AlphaISA::IPR_ALT_MODE: + case AlphaISA::IPR_DTB_IA: + case AlphaISA::IPR_DTB_IAP: + case AlphaISA::IPR_ITB_IA: + case AlphaISA::IPR_ITB_IAP: + fault = Unimplemented_Opcode_Fault; + break; + + default: + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; + } + + return retval; +} + +Fault +AlphaFullCPU::setIpr(int idx, uint64_t val) +{ + uint64_t *ipr = getIpr(); + uint64_t old; + + if (misspeculating()) + return No_Fault; + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PAL_BASE: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case AlphaISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case AlphaISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case AlphaISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; + kernelStats.context(old, val); + break; + + case AlphaISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case AlphaISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case AlphaISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; + kernelStats.swpipl(ipr[idx]); + break; + + case AlphaISA::IPR_DTB_CM: + kernelStats.mode((val & 0x18) != 0); + + case AlphaISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case AlphaISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case AlphaISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case AlphaISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case AlphaISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case AlphaISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case AlphaISA::IPR_EXC_SUM: + case AlphaISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_SL_RCV: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_ITB_PTE_TEMP: + case AlphaISA::IPR_DTB_PTE_TEMP: + // read-only registers + return Unimplemented_Opcode_Fault; + + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case AlphaISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + dtb->flushAll(); + break; + + case AlphaISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + dtb->flushProcesses(); + break; + + case AlphaISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); + break; + + case AlphaISA::IPR_DTB_TAG: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + dtb->insert(val, pte); + } + break; + + case AlphaISA::IPR_ITB_PTE: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); + } + break; + + case AlphaISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + itb->flushAll(); + break; + + case AlphaISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + itb->flushProcesses(); + break; + + case AlphaISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return Unimplemented_Opcode_Fault; + } + + // no error... + return No_Fault; + +} + +int +AlphaFullCPU::readIntrFlag() +{ + return regs.intrflag; +} + +void +AlphaFullCPU::setIntrFlag(int val) +{ + regs.intrflag = val; +} + +// Maybe have this send back from IEW stage to squash and update PC. +Fault +AlphaFullCPU::hwrei() +{ + uint64_t *ipr = getIpr(); + + if (!PC_PAL(regs.pc)) + return Unimplemented_Opcode_Fault; + + setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]); + + if (!misspeculating()) { + kernelStats.hwrei(); + + if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) + AlphaISA::swap_palshadow(®s, false); + + AlphaISA::check_interrupts = true; + } + + // FIXME: XXX check for interrupts? XXX + return No_Fault; +} + +bool +AlphaFullCPU::inPalMode() +{ + return PC_PAL(readPC()); +} + +bool +AlphaFullCPU::simPalCheck(int palFunc) +{ + kernelStats.callpal(palFunc); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (system->breakpoint()) + return false; + break; + } + + return true; +} + +// Probably shouldn't be able to switch to the trap handler as quickly as +// this. Also needs to get the exception restart address from the commit +// stage. +void +AlphaFullCPU::trap(Fault fault) +{ + uint64_t PC = commit.readPC(); + + DPRINTF(Fault, "Fault %s\n", FaultName(fault)); + Stats::recordEvent(csprintf("Fault %s", FaultName(fault))); + + assert(!misspeculating()); + kernelStats.fault(fault); + + if (fault == Arithmetic_Fault) + panic("Arithmetic traps are unimplemented!"); + + AlphaISA::InternalProcReg *ipr = getIpr(); + + // exception restart address - Get the commit PC + if (fault != Interrupt_Fault || !PC_PAL(PC)) + ipr[AlphaISA::IPR_EXC_ADDR] = PC; + + if (fault == Pal_Fault || fault == Arithmetic_Fault /* || + fault == Interrupt_Fault && !PC_PAL(regs.pc) */) { + // traps... skip faulting instruction + ipr[AlphaISA::IPR_EXC_ADDR] += 4; + } + + if (!PC_PAL(PC)) + AlphaISA::swap_palshadow(®s, true); + + setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] ); + setNextPC(PC + sizeof(MachInst)); +} + +void +AlphaFullCPU::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that exists + // within isa_fullsys_traits.hh. +} + +// swap_palshadow swaps in the values of the shadow registers and +// swaps them with the values of the physical registers that map to the +// same logical index. +void +AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) +{ + if (palShadowEnabled == use_shadow) + panic("swap_palshadow: wrong PAL shadow state"); + + palShadowEnabled = use_shadow; + + // Will have to lookup in rename map to get physical registers, then + // swap. + for (int i = 0; i < AlphaISA::NumIntRegs; i++) { + if (reg_redir[i]) { + AlphaISA::IntReg temp = regs->intRegFile[i]; + regs->intRegFile[i] = regs->palregs[i]; + regs->palregs[i] = temp; + } + } +} + +#endif // FULL_SYSTEM + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU) + + Param numThreads; + +#ifdef FULL_SYSTEM +SimObjectParam system; +SimObjectParam itb; +SimObjectParam dtb; +Param mult; +#else +SimObjectVectorParam workload; +SimObjectParam process; +Param asid; +#endif // FULL_SYSTEM +SimObjectParam mem; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param defReg; + +END_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU) + + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#ifdef FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), + INIT_PARAM_DFLT(mult, "System clock multiplier", 1), +#else + INIT_PARAM(workload, "Processes to run"), + INIT_PARAM_DFLT(process, "Process to run", NULL), + INIT_PARAM(asid, "Address space ID"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM(defReg, "Defer registration") + +END_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU) + +CREATE_SIM_OBJECT(AlphaFullCPU) +{ + AlphaFullCPU *cpu; + +#ifdef FULL_SYSTEM + if (mult != 1) + panic("Processor clock multiplier must be 1?\n"); + + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + Process *actual_process; + + if (process == NULL) { + actual_process = workload[0]; + } else { + actual_process = process; + } + +#endif + + AlphaSimpleParams params; + + params.name = getInstanceName(); + params.numberOfThreads = actual_num_threads; + +#ifdef FULL_SYSTEM + params._system = system; + params.itb = itb; + params.dtb = dtb; + params.freq = ticksPerSecond * mult; +#else + params.workload = workload; + params.process = actual_process; + params.asid = asid; +#endif // FULL_SYSTEM + + params.mem = mem; + + params.maxInstsAnyThread = max_insts_any_thread; + params.maxInstsAllThreads = max_insts_all_threads; + params.maxLoadsAnyThread = max_loads_any_thread; + params.maxLoadsAllThreads = max_loads_all_threads; + + // + // Caches + // + params.icacheInterface = icache ? icache->getInterface() : NULL; + params.dcacheInterface = dcache ? dcache->getInterface() : NULL; + + params.decodeToFetchDelay = decodeToFetchDelay; + params.renameToFetchDelay = renameToFetchDelay; + params.iewToFetchDelay = iewToFetchDelay; + params.commitToFetchDelay = commitToFetchDelay; + params.fetchWidth = fetchWidth; + + params.renameToDecodeDelay = renameToDecodeDelay; + params.iewToDecodeDelay = iewToDecodeDelay; + params.commitToDecodeDelay = commitToDecodeDelay; + params.fetchToDecodeDelay = fetchToDecodeDelay; + params.decodeWidth = decodeWidth; + + params.iewToRenameDelay = iewToRenameDelay; + params.commitToRenameDelay = commitToRenameDelay; + params.decodeToRenameDelay = decodeToRenameDelay; + params.renameWidth = renameWidth; + + params.commitToIEWDelay = commitToIEWDelay; + params.renameToIEWDelay = renameToIEWDelay; + params.issueToExecuteDelay = issueToExecuteDelay; + params.issueWidth = issueWidth; + params.executeWidth = executeWidth; + params.executeIntWidth = executeIntWidth; + params.executeFloatWidth = executeFloatWidth; + + params.iewToCommitDelay = iewToCommitDelay; + params.renameToROBDelay = renameToROBDelay; + params.commitWidth = commitWidth; + params.squashWidth = squashWidth; + + params.numPhysIntRegs = numPhysIntRegs; + params.numPhysFloatRegs = numPhysFloatRegs; + params.numIQEntries = numIQEntries; + params.numROBEntries = numROBEntries; + + params.defReg = defReg; + + cpu = new AlphaFullCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("AlphaFullCPU", AlphaFullCPU) + diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh new file mode 100644 index 000000000..b098aaac1 --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -0,0 +1,244 @@ +// Todo: Find all the stuff in ExecContext and ev5 that needs to be +// specifically designed for this CPU. +// Read and write are horribly hacked up between not being sure where to +// copy their code from, and Ron's memory changes. + +#ifndef __ALPHA_FULL_CPU_HH__ +#define __ALPHA_FULL_CPU_HH__ + +// To include: comm, impl, full cpu, ITB/DTB if full sys, +#include "cpu/beta_cpu/comm.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/full_cpu.hh" + +using namespace std; + +class AlphaFullCPU : public FullBetaCPU +{ + public: + typedef AlphaSimpleImpl::ISA AlphaISA; + typedef AlphaSimpleImpl::Params Params; + + public: + AlphaFullCPU(Params ¶ms); + +#ifdef FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#endif + + public: +#ifdef FULL_SYSTEM + bool inPalMode(); + + //Note that the interrupt stuff from the base CPU might be somewhat + //ISA specific (ie NumInterruptLevels). These functions might not + //be needed in FullCPU though. +// void post_interrupt(int int_num, int index); +// void clear_interrupt(int int_num, int index); +// void clear_interrupts(); + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return No_Fault; + } + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + template + Fault read(MemReqPtr &req, T &data) + { +#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) + if (req->flags & LOCKED) { + MiscRegFile *cregs = &req->xc->regs.miscRegs; + cregs->lock_addr = req->paddr; + cregs->lock_flag = true; + } +#endif + + Fault error; + error = mem->read(req, data); + data = htoa(data); + return error; + } + + template + Fault write(MemReqPtr &req, T &data) + { +#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) + + MiscRegFile *cregs; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + cregs = &xc->regs.miscRegs; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + req->xc->storeCondFailures = 0;//Needed? [RGD] + } else { + req->result = cregs->lock_flag; + if (!cregs->lock_flag || + ((cregs->lock_addr & ~0xf) != (req->paddr & ~0xf))) { + cregs->lock_flag = false; + if (((++req->xc->storeCondFailures) % 100000) == 0) { + std::cerr << "Warning: " + << req->xc->storeCondFailures + << " consecutive store conditional failures " + << "on cpu " << cpu_id + << std::endl; + } + return No_Fault; + } + else req->xc->storeCondFailures = 0; + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < system->execContexts.size(); i++){ + cregs = &system->execContexts[i]->regs.miscRegs; + if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) { + cregs->lock_flag = false; + } + } + +#endif + + return mem->write(req, (T)htoa(data)); + } + + // Later on may want to remove this misc stuff from the regfile and + // have it handled at this level. Might prove to be an issue when + // trying to rename source/destination registers... + uint64_t readUniq() + { + return regFile.readUniq(); + } + + void setUniq(uint64_t val) + { + regFile.setUniq(val); + } + + uint64_t readFpcr() + { + return regFile.readFpcr(); + } + + void setFpcr(uint64_t val) + { + regFile.setFpcr(val); + } + +#ifdef FULL_SYSTEM + uint64_t *getIPR(); + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + int readIntrFlag(); + void setIntrFlag(int val); + Fault hwrei(); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); + + void processInterrupts(); +#endif + + +#ifndef FULL_SYSTEM + // Need to change these into regfile calls that directly set a certain + // register. Actually, these functions should handle most of this + // functionality by themselves; should look up the rename and then + // set the register. + IntReg getSyscallArg(int i) + { + return xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i]; + } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { + xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i] = val; + } + + void setSyscallReturn(int64_t return_value) + { + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + const int RegA3 = 19; // only place this is used + if (return_value >= 0) { + // no error + xc->regs.intRegFile[RegA3] = 0; + xc->regs.intRegFile[AlphaISA::ReturnValueReg] = return_value; + } else { + // got an error, return details + xc->regs.intRegFile[RegA3] = (IntReg) -1; + xc->regs.intRegFile[AlphaISA::ReturnValueReg] = -return_value; + } + } + + void syscall(); + void squashStages(); + +#endif + + void copyToXC(); + void copyFromXC(); + + public: +#ifdef FULL_SYSTEM + bool palShadowEnabled; + + // Not sure this is used anywhere. + void intr_post(RegFile *regs, Fault fault, Addr pc); + // Actually used within exec files. Implement properly. + void swap_palshadow(RegFile *regs, bool use_shadow); + // Called by CPU constructor. Can implement as I please. + void initCPU(RegFile *regs); + // Called by initCPU. Implement as I please. + void initIPRs(RegFile *regs); +#endif +}; + +#endif // __ALPHA_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh new file mode 100644 index 000000000..a80b116a8 --- /dev/null +++ b/cpu/beta_cpu/alpha_impl.hh @@ -0,0 +1,74 @@ +#ifndef __ALPHA_IMPL_HH__ +#define __ALPHA_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" + +#include "cpu/beta_cpu/comm.hh" +#include "cpu/beta_cpu/cpu_policy.hh" +#include "cpu/beta_cpu/alpha_params.hh" + +#include "cpu/beta_cpu/commit.hh" +#include "cpu/beta_cpu/decode.hh" +#include "cpu/beta_cpu/fetch.hh" +#include "cpu/beta_cpu/free_list.hh" +#include "cpu/beta_cpu/iew.hh" + +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/regfile.hh" +#include "cpu/beta_cpu/rename.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" + +class AlphaDynInst; +class AlphaFullCPU; + +/** Implementation specific struct that defines several key things to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific FullCPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct AlphaSimpleImpl +{ + /** The ISA to be used. */ + typedef AlphaISA ISA; + + /** The type of MachInst. */ + typedef ISA::MachInst MachInst; + + /** The CPU policy to be used (ie fetch, decode, etc.). */ + typedef SimpleCPUPolicy CPUPol; + + /** The DynInst to be used. */ + typedef AlphaDynInst DynInst; + + /** The FullCPU to be used. */ + typedef AlphaFullCPU FullCPU; + + /** The Params to be passed to each stage. */ + typedef AlphaSimpleParams Params; + + /** The struct for communication between fetch and decode. */ + typedef SimpleFetchSimpleDecode FetchStruct; + + /** The struct for communication between decode and rename. */ + typedef SimpleDecodeSimpleRename DecodeStruct; + + /** The struct for communication between rename and IEW. */ + typedef SimpleRenameSimpleIEW RenameStruct; + + /** The struct for communication between IEW and commit. */ + typedef SimpleIEWSimpleCommit IEWStruct; + + /** The struct for communication within the IEW stage. */ + typedef IssueStruct IssueStruct; + + /** The struct for all backwards communication. */ + typedef TimeBufStruct TimeStruct; +}; + + + +#endif // __ALPHA_IMPL_HH__ diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh new file mode 100644 index 000000000..b217ef8e3 --- /dev/null +++ b/cpu/beta_cpu/alpha_params.hh @@ -0,0 +1,85 @@ +#ifndef __ALPHA_SIMPLE_PARAMS_HH__ +#define __ALPHA_SIMPLE_PARAMS_HH__ + +//Forward declarations +class System; +class AlphaITB; +class AlphaDTB; +class FunctionalMemory; +class Process; +class MemInterface; + +/** + * This file defines the parameters that will be used for the AlphaFullCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams +{ + public: + std::string name; + int numberOfThreads; + +#ifdef FULL_SYSTEM + System *_system; + AlphaITB *itb; AlphaDTB *dtb; + Tick freq; +#else + std::vector workload; + Process *process; + short asid; +#endif // FULL_SYSTEM + + FunctionalMemory *mem; + + Counter maxInstsAnyThread; + Counter maxInstsAllThreads; + Counter maxLoadsAnyThread; + Counter maxLoadsAllThreads; + + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; + + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + + bool defReg; +}; + +#endif diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh new file mode 100644 index 000000000..21a530ecf --- /dev/null +++ b/cpu/beta_cpu/comm.hh @@ -0,0 +1,110 @@ +#ifndef __COMM_HH__ +#define __COMM_HH__ + +#include +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" + +using namespace std; + +// Find better place to put this typedef. +typedef short int PhysRegIndex; + +// Might want to put constructors/destructors here. +template +struct SimpleFetchSimpleDecode { + // Consider having a field of how many ready instructions. + typename Impl::DynInst *insts[1]; +}; + +template +struct SimpleDecodeSimpleRename { + // Consider having a field of how many ready instructions. + typename Impl::DynInst *insts[1]; +}; + +template +struct SimpleRenameSimpleIEW { + // Consider having a field of how many ready instructions. + typename Impl::DynInst *insts[1]; +}; + +template +struct SimpleIEWSimpleCommit { + // Consider having a field of how many ready instructions. + typename Impl::DynInst *insts[1]; +}; + +template +struct IssueStruct { + typename Impl::DynInst *insts[1]; +}; + +struct TimeBufStruct { + struct decodeComm { + bool squash; + bool stall; + bool predIncorrect; + uint64_t branchAddr; + + //Question, is it worthwhile to have this Addr passed along + //by each stage, or just have Fetch look it up in the proper + //amount of cycles in the time buffer? + //Both might actually be needed because decode can send a different + //nextPC if the bpred was wrong. + uint64_t nextPC; + }; + + decodeComm decodeInfo; + + // Rename can't actually tell anything to squash or send a new PC back + // because it doesn't do anything along those lines. But maybe leave + // these fields in here to keep the stages mostly orthagonal. + struct renameComm { + bool squash; + bool stall; + + uint64_t nextPC; + }; + + renameComm renameInfo; + + struct iewComm { + bool squash; + bool stall; + bool predIncorrect; + + // Also eventually include skid buffer space. + unsigned freeIQEntries; + + uint64_t nextPC; + // For now hardcode the type. + // Change this to sequence number eventually. + InstSeqNum squashedSeqNum; + }; + + iewComm iewInfo; + + struct commitComm { + bool squash; + bool stall; + unsigned freeROBEntries; + + uint64_t nextPC; + + // Think of better names here. + // Will need to be a variety of sizes... + // Maybe make it a vector, that way only need one object. + vector freeRegs; + + bool robSquashing; + // Represents the instruction that has either been retired or + // squashed. Similar to having a single bus that broadcasts the + // retired or squashed sequence number. + InstSeqNum doneSeqNum; + }; + + commitComm commitInfo; +}; + +#endif //__COMM_HH__ diff --git a/cpu/beta_cpu/commit.cc b/cpu/beta_cpu/commit.cc new file mode 100644 index 000000000..2efb38976 --- /dev/null +++ b/cpu/beta_cpu/commit.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/commit_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleCommit; diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh new file mode 100644 index 000000000..0e5a96e2a --- /dev/null +++ b/cpu/beta_cpu/commit.hh @@ -0,0 +1,149 @@ +// Todo: Squash properly. Have commit be able to send a squash signal +// to previous stages; will be needed when trap() is implemented. +// Maybe have a special method for handling interrupts/traps. +// +// Traps: Have IEW send a signal to commit saying that there's a trap to +// be handled. Have commit send the PC back to the fetch stage, along +// with the current commit PC. Fetch will directly access the IPR and save +// off all the proper stuff. Commit can send out a squash, or something +// close to it. +// Do the same for hwrei(). However, requires that commit be specifically +// built to support that kind of stuff. Probably not horrible to have +// commit support having the CPU tell it to squash the other stages and +// restart at a given address. The IPR register does become an issue. +// Probably not a big deal if the IPR stuff isn't cycle accurate. Can just +// have the original function handle writing to the IPR register. + +#ifndef __SIMPLE_COMMIT_HH__ +#define __SIMPLE_COMMIT_HH__ + +//Includes: ROB, time buffer, structs, memory interface +#include "arch/alpha/isa_traits.hh" +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" +#include "mem/memory_interface.hh" + +template +class SimpleCommit +{ + public: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::Params Params; + + typedef typename Impl::CPUPol::ROB ROB; + + typedef typename Impl::TimeStruct TimeStruct; + typedef typename Impl::IEWStruct IEWStruct; + typedef typename Impl::RenameStruct RenameStruct; + + public: + // I don't believe commit can block, so it will only have two + // statuses for now. + // Actually if there's a cache access that needs to block (ie + // uncachable load or just a mem access in commit) then the stage + // may have to wait. + enum Status { + Running, + Idle, + ROBSquashing, + DcacheMissStall, + DcacheMissComplete + }; + + private: + Status _status; + + public: + SimpleCommit(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setRenameQueue(TimeBuffer *rq_ptr); + + void setIEWQueue(TimeBuffer *iq_ptr); + + void setROB(ROB *rob_ptr); + + void tick(); + + void commit(); + + uint64_t readCommitPC(); + + void setSquashing() { _status = ROBSquashing; } + + private: + + void commitInsts(); + + bool commitHead(DynInst *head_inst, unsigned inst_num); + + void getInsts(); + + void markCompletedInsts(); + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toIEW; + + /** Wire to read information from IEW (for ROB). */ + typename TimeBuffer::wire robInfoFromIEW; + + /** IEW instruction queue interface. */ + TimeBuffer *iewQueue; + + /** Wire to read information from IEW queue. */ + typename TimeBuffer::wire fromIEW; + + /** Rename instruction queue interface, for ROB. */ + TimeBuffer *renameQueue; + + /** Wire to read information from rename queue. */ + typename TimeBuffer::wire fromRename; + + /** ROB interface. */ + ROB *rob; + + /** Pointer to FullCPU. */ + FullCPU *cpu; + + /** Pointer to the rename map. DO NOT USE if possible. */ + typename Impl::CPUPol::RenameMap *renameMap; + + //Store buffer interface? Will need to move committed stores to the + //store buffer + + /** Memory interface. Used for d-cache accesses. */ + MemInterface *dcacheInterface; + + private: + /** IEW to Commit delay, in ticks. */ + unsigned iewToCommitDelay; + + /** Rename to ROB delay, in ticks. */ + unsigned renameToROBDelay; + + /** Rename width, in instructions. Used so ROB knows how many + * instructions to get from the rename instruction queue. + */ + unsigned renameWidth; + + /** IEW width, in instructions. Used so ROB knows how many + * instructions to get from the IEW instruction queue. + */ + unsigned iewWidth; + + /** Commit width, in instructions. */ + unsigned commitWidth; +}; + +#endif // __SIMPLE_COMMIT_HH__ diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh new file mode 100644 index 000000000..bc8db0ce0 --- /dev/null +++ b/cpu/beta_cpu/commit_impl.hh @@ -0,0 +1,421 @@ +// @todo: Bug when something reaches execute, and mispredicts, but is never +// put into the ROB because the ROB is full. Need rename stage to predict +// the free ROB entries better. + +#ifndef __COMMIT_IMPL_HH__ +#define __COMMIT_IMPL_HH__ + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/commit.hh" +#include "cpu/exetrace.hh" + +template +SimpleCommit::SimpleCommit(Params ¶ms) + : dcacheInterface(params.dcacheInterface), + iewToCommitDelay(params.iewToCommitDelay), + renameToROBDelay(params.renameToROBDelay), + renameWidth(params.renameWidth), + iewWidth(params.executeWidth), + commitWidth(params.commitWidth) +{ + _status = Idle; +} + +template +void +SimpleCommit::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template +void +SimpleCommit::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(Commit, "Commit: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to send information back to IEW. + toIEW = timeBuffer->getWire(0); + + // Setup wire to read data from IEW (for the ROB). + robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay); +} + +template +void +SimpleCommit::setRenameQueue(TimeBuffer *rq_ptr) +{ + DPRINTF(Commit, "Commit: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to get instructions from rename (for the ROB). + fromRename = renameQueue->getWire(-renameToROBDelay); +} + +template +void +SimpleCommit::setIEWQueue(TimeBuffer *iq_ptr) +{ + DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to get instructions from IEW. + fromIEW = iewQueue->getWire(-iewToCommitDelay); +} + +template +void +SimpleCommit::setROB(ROB *rob_ptr) +{ + DPRINTF(Commit, "Commit: Setting ROB pointer.\n"); + rob = rob_ptr; +} + +template +void +SimpleCommit::tick() +{ + // If the ROB is currently in its squash sequence, then continue + // to squash. In this case, commit does not do anything. Otherwise + // run commit. + if (_status == ROBSquashing) { + if (rob->isDoneSquashing()) { + _status = Running; + } else { + rob->doSquash(); + + // Send back sequence number of tail of ROB, so other stages + // can squash younger instructions. Note that really the only + // stage that this is important for is the IEW stage; other + // stages can just clear all their state as long as selective + // replay isn't used. + toIEW->commitInfo.doneSeqNum = rob->readTailSeqNum(); + toIEW->commitInfo.robSquashing = true; + } + } else { + commit(); + } + + markCompletedInsts(); + + // Writeback number of free ROB entries here. + DPRINTF(Commit, "Commit: ROB has %d free entries.\n", + rob->numFreeEntries()); + toIEW->commitInfo.freeROBEntries = rob->numFreeEntries(); +} + +template +void +SimpleCommit::commit() +{ + ////////////////////////////////////// + // Check for interrupts + ////////////////////////////////////// + + // Process interrupts if interrupts are enabled and not in PAL mode. + // Take the PC from commit and write it to the IPR, then squash. The + // interrupt completing will take care of restoring the PC from that value + // in the IPR. Look at IPR[EXC_ADDR]; + // hwrei() is what resets the PC to the place where instruction execution + // beings again. +#ifdef FULL_SYSTEM + if (ISA::check_interrupts && + cpu->check_interrupts() && + !xc->inPalMode()) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. + + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); + } +#endif // FULL_SYSTEM + + //////////////////////////////////// + // Check for squash signal, handle that first + //////////////////////////////////// + + // Want to mainly check if the IEW stage is telling the ROB to squash. + // Should I also check if the commit stage is telling the ROB to squah? + // This might be necessary to keep the same timing between the IQ and + // the ROB... + if (robInfoFromIEW->iewInfo.squash) { + DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n"); + + _status = ROBSquashing; + + InstSeqNum squashed_inst = robInfoFromIEW->iewInfo.squashedSeqNum; + + rob->squash(squashed_inst); + + // Send back the sequence number of the squashed instruction. + toIEW->commitInfo.doneSeqNum = squashed_inst; + // Send back the squash signal to tell stages that they should squash. + toIEW->commitInfo.squash = true; + // Send back the rob squashing signal so other stages know that the + // ROB is in the process of squashing. + toIEW->commitInfo.robSquashing = true; + toIEW->commitInfo.nextPC = robInfoFromIEW->iewInfo.nextPC; + } + + if (_status != ROBSquashing) { + getInsts(); + + commitInsts(); + } + + // If the ROB is empty, we can set this stage to idle. Use this + // in the future when the Idle status will actually be utilized. +#if 0 + if (rob->isEmpty()) { + DPRINTF(Commit, "Commit: ROB is empty. Status changed to idle.\n"); + _status = Idle; + // Schedule an event so that commit will actually wake up + // once something gets put in the ROB. + } +#endif +} + +// Loop that goes through as many instructions in the ROB as possible and +// tries to commit them. The actual work for committing is done by the +// commitHead() function. +template +void +SimpleCommit::commitInsts() +{ + //////////////////////////////////// + // Handle commit + // Note that commit will be handled prior to the ROB so that the ROB + // only tries to commit instructions it has in this current cycle, and + // not instructions it is writing in during this cycle. + // Can't commit and squash things at the same time... + //////////////////////////////////// + + DynInst *head_inst = rob->readHeadInst(); + + unsigned num_committed = 0; + + // Commit as many instructions as possible until the commit bandwidth + // limit is reached, or it becomes impossible to commit any more. + while (!rob->isEmpty() && + head_inst->readyToCommit() && + num_committed < commitWidth) + { + DPRINTF(Commit, "Commit: Trying to commit head instruction.\n"); + + // If the head instruction is squashed, it is ready to retire at any + // time. However, we need to avoid updating any other state + // incorrectly if it's already been squashed. + if (head_inst->isSquashed()) { + // Hack to avoid the instruction being retired (and deleted) if + // it hasn't been through the IEW stage yet. + if (!head_inst->isExecuted()) { + break; + } + + DPRINTF(Commit, "Commit: Retiring squashed instruction from " + "ROB.\n"); + + // Tell ROB to retire head instruction. This retires the head + // inst in the ROB without affecting any other stages. + rob->retireHead(); + + ++num_committed; + } else { + // Increment the total number of non-speculative instructions + // executed. + // Hack for now: it really shouldn't happen until after the + // commit is deemed to be successful. + cpu->funcExeInst++; + + // Try to commit the head instruction. + bool commit_success = commitHead(head_inst, num_committed); + + // Update what instruction we are looking at if the commit worked. + if(commit_success) { + ++num_committed; + + // Send back which instruction has been committed. + // @todo: Update this later when a wider pipeline is used. + // Hmm, can't really give a pointer here...perhaps the + // sequence number instead (copy). + toIEW->commitInfo.doneSeqNum = head_inst->seqNum; + + cpu->instDone(); + } else { + break; + } + } + + // Update the pointer to read the next instruction in the ROB. + head_inst = rob->readHeadInst(); + } +} + +template +bool +SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) +{ + // Make sure instruction is valid + assert(head_inst); + + Fault fault = No_Fault; + + // If the head instruction is a store or a load, then execute it + // because this simple model does no speculative memory access. + // Hopefully this covers all memory references. + // Also check if it's nonspeculative. Or a nop. Then it will be + // executed only when it reaches the head of the ROB. Actually + // executing a nop is a bit overkill... + if (head_inst->isStore() || + head_inst->isLoad() || + head_inst->isNonSpeculative() || + head_inst->isNop()) { + DPRINTF(Commit, "Commit: Executing a memory reference or " + "nonspeculative instruction at commit, inst PC %#x\n", + head_inst->PC); + fault = head_inst->execute(); + + // Tell CPU to tell IEW to tell IQ (nasty chain of calls) that + // this instruction has completed. Could predicate this on + // whether or not the instruction has a destination. + // Slightly unrealistic, but will not really be a factor once + // a real load/store queue is added. + cpu->wakeDependents(head_inst); + } + + // Check if memory access was successful. + if (fault != No_Fault) { + // Handle data cache miss here. In the future, set the status + // to data cache miss, then exit the stage. Have an event + // that handles commiting the head instruction, then setting + // the stage back to running, when the event is run. (just + // make sure that event is commit's run for that cycle) + panic("Commit: Load/store instruction failed, not sure what " + "to do.\n"); + // Also will want to clear the instruction's fault after being + // handled here so it's not handled again below. + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (head_inst->isThreadSync() || + head_inst->isSerializing() || + head_inst->isMemBarrier() || + head_inst->isWriteBarrier() ) + { + // Not handled for now. Mem barriers and write barriers are safe + // to simply let commit as memory accesses only happen once they + // reach the head of commit. Not sure about the other two. + panic("Serializing or barrier instructions" + " are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + if (head_inst->getFault() != No_Fault) { +#ifdef FULL_SYSTEM + cpu->trap(fault); +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", head_inst->getFault(), + head_inst->PC); +#endif // FULL_SYSTEM + } + + // Check if we're really ready to commit. If not then return false. + // I'm pretty sure all instructions should be able to commit if they've + // reached this far. For now leave this in as a check. + if(!rob->isHeadReady()) { + DPRINTF(Commit, "Commit: Unable to commit head instruction!\n"); + return false; + } + + //If it's a branch, then send back branch prediction update info + //to the fetch stage. + // This should be handled in the iew stage if a mispredict happens... +#if 0 + if (head_inst->isControl()) { + + toIEW->nextPC = head_inst->readPC(); + //Maybe switch over to BTB incorrect. + toIEW->btbMissed = head_inst->btbMiss(); + toIEW->target = head_inst->nextPC; + //Maybe also include global history information. + //This simple version will have no branch prediction however. + } +#endif + +#if 0 + // Check if the instruction has a destination register. + // If so add the previous physical register of its logical register's + // destination to the free list through the time buffer. + for (int i = 0; i < head_inst->numDestRegs(); i++) + { + toIEW->commitInfo.freeRegs.push_back(head_inst->prevDestRegIdx(i)); + } +#endif + + // Now that the instruction is going to be committed, finalize its + // trace data. + if (head_inst->traceData) { + head_inst->traceData->finalize(); + } + + //Finally clear the head ROB entry. + rob->retireHead(); + + // Return true to indicate that we have committed an instruction. + return true; +} + +template +void +SimpleCommit::getInsts() +{ + ////////////////////////////////////// + // Handle ROB functions + ////////////////////////////////////// + + // Read any issued instructions and place them into the ROB. Do this + // prior to squashing to avoid having instructions in the ROB that + // don't get squashed properly. + for (int inst_num = 0; + fromRename->insts[inst_num] != NULL && inst_num < renameWidth; + ++inst_num) + { + DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", + fromRename->insts[inst_num]->readPC()); + rob->insertInst(fromRename->insts[inst_num]); + } +} + +template +void +SimpleCommit::markCompletedInsts() +{ + // Grab completed insts out of the IEW instruction queue, and mark + // instructions completed within the ROB. + for (int inst_num = 0; + fromIEW->insts[inst_num] != NULL && inst_num < iewWidth; + ++inst_num) + { + DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", + fromIEW->insts[inst_num]->readPC(), + fromIEW->insts[inst_num]->seqNum); + + // Mark the instruction as ready to commit. + fromIEW->insts[inst_num]->setCanCommit(); + } +} + +template +uint64_t +SimpleCommit::readCommitPC() +{ + return rob->readHeadPC(); +} + +#endif // __COMMIT_IMPL_HH__ diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh new file mode 100644 index 000000000..676334249 --- /dev/null +++ b/cpu/beta_cpu/cpu_policy.hh @@ -0,0 +1,32 @@ +#ifndef __CPU_POLICY_HH__ +#define __CPU_POLICY_HH__ + +#include "cpu/beta_cpu/fetch.hh" +#include "cpu/beta_cpu/decode.hh" +#include "cpu/beta_cpu/rename.hh" +#include "cpu/beta_cpu/iew.hh" +#include "cpu/beta_cpu/commit.hh" + +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/regfile.hh" +#include "cpu/beta_cpu/free_list.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" + +template +struct SimpleCPUPolicy +{ + typedef PhysRegFile RegFile; + typedef SimpleFreeList FreeList; + typedef SimpleRenameMap RenameMap; + typedef ROB ROB; + typedef InstructionQueue IQ; + + typedef SimpleFetch Fetch; + typedef SimpleDecode Decode; + typedef SimpleRename Rename; + typedef SimpleIEW IEW; + typedef SimpleCommit Commit; +}; + +#endif //__CPU_POLICY_HH__ diff --git a/cpu/beta_cpu/decode.cc b/cpu/beta_cpu/decode.cc new file mode 100644 index 000000000..ffabcf18a --- /dev/null +++ b/cpu/beta_cpu/decode.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/decode_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleDecode; diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh new file mode 100644 index 000000000..c41955dcb --- /dev/null +++ b/cpu/beta_cpu/decode.hh @@ -0,0 +1,129 @@ +// Todo: +// Add a couple of the branch fields to DynInst. Figure out where DynInst +// should try to compute the target of a PC-relative branch. Try to avoid +// having so many returns within the code. +// Fix up squashing too, as it's too +// dependent upon the iew stage continually telling it to squash. + +#ifndef __SIMPLE_DECODE_HH__ +#define __SIMPLE_DECODE_HH__ + +#include + +//Will want to include: time buffer, structs, +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" + +using namespace std; + +template +class SimpleDecode +{ + private: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename Impl::FetchStruct FetchStruct; + typedef typename Impl::DecodeStruct DecodeStruct; + typedef typename Impl::TimeStruct TimeStruct; + + // Typedefs from the ISA. + typedef typename ISA::Addr Addr; + + public: + // The only time decode will become blocked is if dispatch becomes + // blocked, which means IQ or ROB is probably full. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking + }; + + private: + // May eventually need statuses on a per thread basis. + Status _status; + + public: + SimpleDecode(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setDecodeQueue(TimeBuffer *dq_ptr); + + void setFetchQueue(TimeBuffer *fq_ptr); + + void tick(); + + void decode(); + + // Might want to make squash a friend function. + void squash(); + + private: + void block(); + + inline void unblock(); + + void squash(DynInst *inst); + + // Interfaces to objects outside of decode. + /** CPU interface. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get rename's output from backwards time buffer. */ + typename TimeBuffer::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + // Might not be the best name as not only fetch will read it. + typename TimeBuffer::wire toFetch; + + /** Decode instruction queue. */ + TimeBuffer *decodeQueue; + + /** Wire used to write any information heading to rename. */ + typename TimeBuffer::wire toRename; + + /** Fetch instruction queue interface. */ + TimeBuffer *fetchQueue; + + /** Wire to get fetch's output from fetch queue. */ + typename TimeBuffer::wire fromFetch; + + /** Skid buffer between fetch and decode. */ + queue skidBuffer; + + private: + //Consider making these unsigned to avoid any confusion. + /** Rename to decode delay, in ticks. */ + unsigned renameToDecodeDelay; + + /** IEW to decode delay, in ticks. */ + unsigned iewToDecodeDelay; + + /** Commit to decode delay, in ticks. */ + unsigned commitToDecodeDelay; + + /** Fetch to decode delay, in ticks. */ + unsigned fetchToDecodeDelay; + + /** The width of decode, in instructions. */ + unsigned decodeWidth; +}; + +#endif // __SIMPLE_DECODE_HH__ diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh new file mode 100644 index 000000000..ecf19b8ea --- /dev/null +++ b/cpu/beta_cpu/decode_impl.hh @@ -0,0 +1,325 @@ +#ifndef __SIMPLE_DECODE_CC__ +#define __SIMPLE_DECODE_CC__ + +#include "cpu/beta_cpu/decode.hh" + +template +SimpleDecode::SimpleDecode(Params ¶ms) + : renameToDecodeDelay(params.renameToDecodeDelay), + iewToDecodeDelay(params.iewToDecodeDelay), + commitToDecodeDelay(params.commitToDecodeDelay), + fetchToDecodeDelay(params.fetchToDecodeDelay), + decodeWidth(params.decodeWidth) +{ + DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth); + _status = Idle; +} + +template +void +SimpleDecode::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Decode, "Decode: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template +void +SimpleDecode::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(Decode, "Decode: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to write information back to fetch. + toFetch = timeBuffer->getWire(0); + + // Create wires to get information from proper places in time buffer. + fromRename = timeBuffer->getWire(-renameToDecodeDelay); + fromIEW = timeBuffer->getWire(-iewToDecodeDelay); + fromCommit = timeBuffer->getWire(-commitToDecodeDelay); +} + +template +void +SimpleDecode::setDecodeQueue(TimeBuffer *dq_ptr) +{ + DPRINTF(Decode, "Decode: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to write information to proper place in decode queue. + toRename = decodeQueue->getWire(0); +} + +template +void +SimpleDecode::setFetchQueue(TimeBuffer *fq_ptr) +{ + DPRINTF(Decode, "Decode: Setting fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Setup wire to read information from fetch queue. + fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); +} + +template +void +SimpleDecode::block() +{ + DPRINTF(Decode, "Decode: Blocking.\n"); + + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template +inline void +SimpleDecode::unblock() +{ + DPRINTF(Decode, "Decode: Unblocking, going to remove " + "instructions from skid buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Finished unblocking.\n"); + _status = Running; + } +} + +// This squash is specifically for when Decode detects a PC-relative branch +// was predicted incorrectly. +template +void +SimpleDecode::squash(DynInst *inst) +{ + DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " + "detected at decode.\n"); + Addr new_PC = inst->nextPC; + + toFetch->decodeInfo.predIncorrect = true; + toFetch->decodeInfo.squash = true; + toFetch->decodeInfo.nextPC = new_PC; + + // Set status to squashing. + _status = Squashing; + + // Maybe advance the time buffer? Not sure what to do in the normal + // case. + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } +} + +template +void +SimpleDecode::squash() +{ + DPRINTF(Decode, "Decode: Squashing.\n"); + // Set status to squashing. + _status = Squashing; + + // Maybe advance the time buffer? Not sure what to do in the normal + // case. + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } +} + +template +void +SimpleDecode::tick() +{ + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + if (_status != Blocked && _status != Squashing) { + DPRINTF(Decode, "Decode: Not blocked, so attempting to run " + "stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + decode(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + unblock(); + } + } else if (_status == Blocked) { + if (fromFetch->insts[0] != NULL) { + block(); + } + + if (!fromRename->renameInfo.stall && + !fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall) { + DPRINTF(Decode, "Decode: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this + // stage is done unblocking. + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Still blocked.\n"); + toFetch->decodeInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + } else if (_status == Squashing) { + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + _status = Running; + } else if (fromCommit->commitInfo.squash) { + squash(); + } + } +} + +template +void +SimpleDecode::decode() +{ + // Check time buffer if being told to squash. + if (/* fromRename->renameInfo.squash || */ + /* fromIEW->iewInfo.squash || */ + fromCommit->commitInfo.squash) { + squash(); + return; + } + + // Check time buffer if being told to stall. + if (fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + block(); + return; + } + + // Check fetch queue to see if instructions are available. + // If no available instructions, do nothing, unless this stage is + // currently unblocking. + if (fromFetch->insts[0] == NULL && _status != Unblocking) { + DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); + // Should I change the status to idle? + return; + } + + DynInst *inst; + // Instead have a class member variable that records which instruction + // was the last one that was ended on. At the tick() stage, it can + // check if that's equal to 0. If not, then don't pop stuff off. + unsigned num_inst = 0; + bool insts_available = _status == Unblocking ? + skidBuffer.front().insts[num_inst] != NULL : + fromFetch->insts[num_inst] != NULL; + + // Debug block... +#if 0 + if (insts_available) { + DPRINTF(Decode, "Decode: Instructions available.\n"); + } else { + if (_status == Unblocking && skidBuffer.empty()) { + DPRINTF(Decode, "Decode: No instructions available, skid buffer " + "empty.\n"); + } else if (_status != Unblocking && + fromFetch->insts[0] == NULL) { + DPRINTF(Decode, "Decode: No instructions available, fetch queue " + "empty.\n"); + } else { + panic("Decode: No instructions available, unexpected condition!" + "\n"); + } + } +#endif + + // Check to make sure that instructions coming from fetch are valid. + // Normally at this stage the branch target of PC-relative branches + // should be computed here. However in this simple model all + // computation will take place at execute. Hence doneTargCalc() + // will always be false. + while (num_inst < decodeWidth && + insts_available) + { + DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); + // Might create some sort of accessor to get an instruction + // on a per thread basis. Or might be faster to just get + // a pointer to an array or list of instructions and use that + // within this code. + inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] : + fromFetch->insts[num_inst]; + DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n", + inst, inst->readPC()); + + // This current instruction is valid, so add it into the decode + // queue. The next instruction may not be valid, so check to + // see if branches were predicted correctly. + toRename->insts[num_inst] = inst; + + // Ensure that if it was predicted as a branch, it really is a + // branch. This case should never happen in this model. + if (inst->predTaken() && !inst->isControl()) { + panic("Instruction predicted as a branch!"); + + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + + // Ensure that the predicted branch target is the actual branch + // target if possible (branches that are PC relative). + if (inst->isControl() && inst->doneTargCalc()) { + if (inst->mispredicted()) { + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + } + + // Also check if instructions have no source registers. Mark + // them as ready to issue at any time. Not sure if this check + // should exist here or at a later stage; however it doesn't matter + // too much for function correctness. + if (inst->numSrcRegs() == 0) { + inst->setCanIssue(); + } + + // Increment which instruction we're looking at. + ++num_inst; + + // Check whether or not there are instructions available. + // Either need to check within the skid buffer, or the fetch + // queue, depending if this stage is unblocking or not. + insts_available = _status == Unblocking ? + skidBuffer.front().insts[num_inst] == NULL : + fromFetch->insts[num_inst] == NULL; + } +} + +#endif // __SIMPLE_DECODE_CC__ diff --git a/cpu/beta_cpu/fetch.cc b/cpu/beta_cpu/fetch.cc new file mode 100644 index 000000000..4d08754b6 --- /dev/null +++ b/cpu/beta_cpu/fetch.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/fetch_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleFetch; diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh new file mode 100644 index 000000000..5717c65ac --- /dev/null +++ b/cpu/beta_cpu/fetch.hh @@ -0,0 +1,160 @@ +// Todo: add in statistics, only get the MachInst and let decode actually +// decode, think about SMT fetch, +// fix up branch prediction stuff into one thing, +// Figure out where to advance time buffer. Add a way to get a +// stage's current status. + +#ifndef __SIMPLE_FETCH_HH__ +#define __SIMPLE_FETCH_HH__ + +//Will want to include: time buffer, structs, MemInterface, Event, +//whatever class bzero uses, MemReqPtr + +#include "base/timebuf.hh" +#include "sim/eventq.hh" +#include "cpu/pc_event.hh" +#include "cpu/beta_cpu/comm.hh" +#include "mem/mem_interface.hh" + +using namespace std; + +/** + * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch + * will stall if there's an Icache miss, but otherwise assumes a one cycle + * Icache hit. This will be replaced with a more fleshed out class in the + * future. + */ + +template +class SimpleFetch +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::ISA ISA; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename Impl::FetchStruct FetchStruct; + typedef typename Impl::TimeStruct TimeStruct; + + /** Typedefs from ISA. */ + typedef typename ISA::MachInst MachInst; + + public: + enum Status { + Running, + Idle, + Squashing, + Blocked, + IcacheMissStall, + IcacheMissComplete + }; + + // May eventually need statuses on a per thread basis. + Status _status; + + bool stalled; + + public: + /** SimpleFetch constructor. */ + SimpleFetch(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *time_buffer); + + void setFetchQueue(TimeBuffer *fq_ptr); + + void tick(); + + void fetch(); + + void processCacheCompletion(); + +// private: + // Figure out PC vs next PC and how it should be updated + void squash(Addr newPC); + + public: + class CacheCompletionEvent : public Event + { + private: + SimpleFetch *fetch; + + public: + CacheCompletionEvent(SimpleFetch *_fetch); + + virtual void process(); + virtual const char *description(); + }; + + CacheCompletionEvent cacheCompletionEvent; + + private: + /** Pointer to the FullCPU. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get decode's information from backwards time buffer. */ + typename TimeBuffer::wire fromDecode; + + /** Wire to get rename's information from backwards time buffer. */ + typename TimeBuffer::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + // Will probably have this sit in the FullCPU and just pass a pointr in. + // Simplifies the constructors of all stages. + /** Internal fetch instruction queue. */ + TimeBuffer *fetchQueue; + + //Might be annoying how this name is different than the queue. + /** Wire used to write any information heading to decode. */ + typename TimeBuffer::wire toDecode; + + /** Icache interface. */ + MemInterface *icacheInterface; + + /** Memory request used to access cache. */ + MemReqPtr memReq; + + /** Decode to fetch delay, in ticks. */ + unsigned decodeToFetchDelay; + + /** Rename to fetch delay, in ticks. */ + unsigned renameToFetchDelay; + + /** IEW to fetch delay, in ticks. */ + unsigned iewToFetchDelay; + + /** Commit to fetch delay, in ticks. */ + unsigned commitToFetchDelay; + + /** The width of fetch in instructions. */ + unsigned fetchWidth; + + /** Cache block size. */ + int blkSize; + + /** Mask to get a cache block's address. */ + Addr cacheBlockMask; + + /** The instruction being fetched. */ + MachInst inst; + + /** Size of instructions. */ + int instSize; + + /** Icache stall statistics. */ +// Stats::Scalar<> icacheStallCycles; +// Counter lastIcacheStall; +}; + +#endif //__SIMPLE_FETCH_HH__ diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh new file mode 100644 index 000000000..918d2dad2 --- /dev/null +++ b/cpu/beta_cpu/fetch_impl.hh @@ -0,0 +1,555 @@ +// Todo: Rewrite this. Add in branch prediction. Fix up if squashing comes +// from decode; only the correct instructions should be killed. This will +// probably require changing the CPU's instList functions to take a seqNum +// instead of a dyninst. With probe path, should be able to specify +// size of data to fetch. Will be able to get full cache line. + +// Remove this later. +#define OPCODE(X) (X >> 26) & 0x3f + +#include "cpu/exetrace.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "cpu/beta_cpu/fetch.hh" + +#include "sim/universe.hh" + +template +SimpleFetch::CacheCompletionEvent +::CacheCompletionEvent(SimpleFetch *_fetch) + : Event(&mainEventQueue), + fetch(_fetch) +{ +} + +template +void +SimpleFetch::CacheCompletionEvent::process() +{ + fetch->processCacheCompletion(); +} + +template +const char * +SimpleFetch::CacheCompletionEvent::description() +{ + return "SimpleFetch cache completion event"; +} + +template +SimpleFetch::SimpleFetch(Params ¶ms) + : cacheCompletionEvent(this), + icacheInterface(params.icacheInterface), + decodeToFetchDelay(params.decodeToFetchDelay), + renameToFetchDelay(params.renameToFetchDelay), + iewToFetchDelay(params.iewToFetchDelay), + commitToFetchDelay(params.commitToFetchDelay), + fetchWidth(params.fetchWidth), + inst(0) +{ + // Set status to idle. + _status = Idle; + + // Create a new memory request. + memReq = new MemReq(); + // Not sure of this parameter. I think it should be based on the + // thread number. +#ifndef FULL_SYSTEM + memReq->asid = params.asid; +#else + memReq->asid = 0; +#endif // FULL_SYSTEM + memReq->data = new uint8_t[64]; + + // Size of cache block. + blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + // Create mask to get rid of offset bits. + cacheBlockMask = ~((int)log2(blkSize) - 1); + + // Get the size of an instruction. + instSize = sizeof(MachInst); +} + +template +void +SimpleFetch::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n"); + cpu = cpu_ptr; + // This line will be removed eventually. + memReq->xc = cpu->xcBase(); +} + +template +void +SimpleFetch::setTimeBuffer(TimeBuffer *time_buffer) +{ + DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n"); + timeBuffer = time_buffer; + + // Create wires to get information from proper places in time buffer. + fromDecode = timeBuffer->getWire(-decodeToFetchDelay); + fromRename = timeBuffer->getWire(-renameToFetchDelay); + fromIEW = timeBuffer->getWire(-iewToFetchDelay); + fromCommit = timeBuffer->getWire(-commitToFetchDelay); +} + +template +void +SimpleFetch::setFetchQueue(TimeBuffer *fq_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Create wire to write information to proper place in fetch queue. + toDecode = fetchQueue->getWire(0); +} + +template +void +SimpleFetch::processCacheCompletion() +{ + DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n"); + + // Only change the status if it's still waiting on the icache access + // to return. + // Can keep track of how many cache accesses go unused due to + // misspeculation here. + // How to handle an outstanding miss which gets cancelled due to squash, + // then a new icache miss gets scheduled? + if (_status == IcacheMissStall) + _status = IcacheMissComplete; +} + +// Note that in the SimpleFetch<>, will most likely have to provide the +// template parameters to BP and BTB. +template +void +SimpleFetch::squash(Addr new_PC) +{ + DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); + cpu->setNextPC(new_PC + instSize); + cpu->setPC(new_PC); + + _status = Squashing; + + // Clear out the instructions that are no longer valid. + // Actually maybe slightly unrealistic to kill instructions that are + // in flight like that between stages. Perhaps just have next + // stage ignore those instructions or something. In the cycle where it's + // returning from squashing, the other stages can just ignore the inputs + // for that cycle. + + // Tell the CPU to remove any instructions that aren't currently + // in the ROB (instructions in flight that were killed). + cpu->removeInstsNotInROB(); +} + +template +void +SimpleFetch::tick() +{ +#if 0 + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + + // In any case, squash. + squash(fromCommit->commitInfo.nextPC); + return; + } + + if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + + // Squash unless we're already squashing? + squash(fromDecode->decodeInfo.nextPC); + return; + } + + if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); + + // Continue to squash. + _status = Squashing; + return; + } + + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + // What to do if we're already in an icache stall? + } +#endif + + if (_status != Blocked && + _status != Squashing && + _status != IcacheMissStall) { + DPRINTF(Fetch, "Fetch: Running stage.\n"); + + fetch(); + } else if (_status == Blocked) { + // If still being told to stall, do nothing. + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + } else { + + DPRINTF(Fetch, "Fetch: Done blocking.\n"); + _status = Running; + } + + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + squash(fromCommit->commitInfo.nextPC); + return; + } else if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + squash(fromDecode->decodeInfo.nextPC); + return; + } else if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); + _status = Squashing; + return; + } + } else if (_status == Squashing) { + // If there are no squash signals then change back to running. + // Note that when a squash starts happening, commitInfo.squash will + // be high. But if the squash is still in progress, then only + // commitInfo.robSquashing will be high. + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + + DPRINTF(Fetch, "Fetch: Done squashing.\n"); + _status = Running; + } else if (fromCommit->commitInfo.squash) { + // If there's a new squash, then start squashing again. + squash(fromCommit->commitInfo.nextPC); + } else { + // Purely a debugging statement. + DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); + } + } + +} + +template +void +SimpleFetch::fetch() +{ + ////////////////////////////////////////// + // Check backwards communication + ////////////////////////////////////////// + + // If branch prediction is incorrect, squash any instructions, + // update PC, and do not fetch anything this cycle. + + // Might want to put all the PC changing stuff in one area. + // Normally should also check here to see if there is branch + // misprediction info to update with. + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + squash(fromCommit->commitInfo.nextPC); + return; + } else if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + squash(fromDecode->decodeInfo.nextPC); + return; + } else if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); + _status = Squashing; + return; + } + + // If being told to stall, do nothing. + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + _status = Blocked; + return; + } + + ////////////////////////////////////////// + // Start actual fetch + ////////////////////////////////////////// + + // If nothing else outstanding, attempt to read instructions. + +#ifdef FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + + // The current PC. + Addr PC = cpu->readPC(); + + // Fault code for memory access. + Fault fault = No_Fault; + + // If returning from the delay of a cache miss, then update the status + // to running, otherwise do the cache access. + if (_status == IcacheMissComplete) { + DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); + + // Reset the completion event to NULL. + memReq->completionEvent = NULL; + + _status = Running; + } else { + DPRINTF(Fetch, "Fetch: Attempting to translate and read " + "instruction, starting at PC %08p.\n", + PC); + + // Otherwise check if the instruction exists within the cache. + // If it does, then proceed on to read the instruction and the rest + // of the instructions in the cache line until either the end of the + // cache line or a predicted taken branch is encountered. + // Note that this simply checks if the first instruction exists + // within the cache, assuming the rest of the cache line also exists + // within the cache. + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq->cmd = Read; + memReq->reset(PC, instSize, flags); + + // Translate the instruction request. + // Should this function be + // in the CPU class ? Probably...ITB/DTB should exist within the + // CPU. + + fault = cpu->translateInstReq(memReq); + + // In the case of faults, the fetch stage may need to stall and wait + // on what caused the fetch (ITB or Icache miss). + + // If translation was successful, attempt to read the first + // instruction. + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); + fault = cpu->mem->read(memReq, inst); + // This read may change when the mem interface changes. + } + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); + memReq->completionEvent = NULL; + + memReq->time = curTick; + + MemAccessResult result = icacheInterface->access(memReq); + + // If the cache missed (in this model functional and timing + // memories are different), then schedule an event to wake + // up this stage once the cache miss completes. + if (result != MA_HIT && icacheInterface->doEvents()) { + memReq->completionEvent = &cacheCompletionEvent; +// lastIcacheStall = curTick; + + // How does current model work as far as individual + // stages scheduling/unscheduling? + // Perhaps have only the main CPU scheduled/unscheduled, + // and have it choose what stages to run appropriately. + + DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); + _status = IcacheMissStall; + return; + } + } + } + + // As far as timing goes, the CPU will need to send an event through + // the MemReq in order to be woken up once the memory access completes. + // Probably have a status on a per thread basis so each thread can + // block independently and be woken up independently. + + Addr next_PC = 0; + InstSeqNum inst_seq; + + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); + + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + bool predicted_branch = false; + + // Might want to keep track of various stats. +// numLinesFetched++; + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + // Because the first instruction was already fetched, create the + // DynInst and put it into the queue to decode. + DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq, + cpu); + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", + instruction, instruction->readPC()); + DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", + OPCODE(inst)); + + instruction->traceData = + Trace::getInstRecord(curTick, cpu->xcBase(), cpu, + instruction->staticInst, + instruction->readPC(), 0); + + cpu->addInst(instruction); + + // Write the instruction to the first slot in the queue + // that heads to decode. + toDecode->insts[0] = instruction; + + // Now update the PC to fetch the next instruction in the cache + // line. + PC = PC + instSize; + + // Obtain the index into the cache line by getting only the low + // order bits. + int line_index = PC & cacheBlockMask; + + // Take instructions and put them into the queue heading to decode. + // Then read the next instruction in the cache line. Continue + // until either all of the fetch bandwidth is used (not an issue for + // non-SMT), or the end of the cache line is reached. Note that + // this assumes standard cachelines, and not something like a trace + // cache where lines might not end at cache-line size aligned + // addresses. + // @todo: Fix the horrible amount of translates/reads that must + // take place due to reading an entire cacheline. Ideally it + // should all take place at once, return an array of binary + // instructions, which can then be used to get all the instructions + // needed. Figure out if I can roll it back into one loop. + for (int fetched = 1; + line_index < blkSize && fetched < fetchWidth; + line_index+=instSize, ++fetched) + { + // Reset the mem request to setup the read of the next + // instruction. + memReq->reset(PC, instSize, flags); + + // Translate the instruction request. + fault = cpu->translateInstReq(memReq); + + // Read instruction. + if (fault == No_Fault) { + fault = cpu->mem->read(memReq, inst); + } + + // Check if there was a fault. + if (fault != No_Fault) { + panic("Fetch: Read of instruction faulted when it should " + "succeed; most likely exceeding cache line.\n"); + } + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + // Create the actual DynInst. Parameters are: + // DynInst(instruction, PC, predicted PC, CPU pointer). + // Because this simple model has no branch prediction, the + // predicted PC will simply be PC+sizeof(MachInst). + // Update to actually use a branch predictor to predict the + // target in the future. + DynInst *instruction = new DynInst(inst, PC, PC+instSize, + inst_seq, cpu); + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", + instruction, instruction->readPC()); + DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", + OPCODE(inst)); + + cpu->addInst(instruction); + + // Write the instruction to the proper slot in the queue + // that heads to decode. + toDecode->insts[fetched] = instruction; + + // Might want to keep track of various stats. +// numInstsFetched++; + + // Now update the PC to fetch the next instruction in the cache + // line. + PC = PC + instSize; + } + + // If no branches predicted taken, then increment PC with + // fall-through path. This simple model always predicts not + // taken. + if (!predicted_branch) { + next_PC = PC; + } + } + + // Now that fetching is completed, update the PC to signify what the next + // cycle will be. Might want to move this to the beginning of this + // function so that the PC updates at the beginning of everything. + // Or might want to leave setting the PC to the main CPU, with fetch + // only changing the nextPC (will require correct determination of + // next PC). + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC); + cpu->setPC(next_PC); + cpu->setNextPC(next_PC + instSize); + } else { + // Handle the fault. + // This stage will not be able to continue until all the ROB + // slots are empty, at which point the fault can be handled. + // The only other way it can wake up is if a squash comes along + // and changes the PC. Not sure how to handle that case...perhaps + // have it handled by the upper level CPU class which peeks into the + // time buffer and sees if a squash comes along, in which case it + // changes the status. + + DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n"); + + _status = Blocked; +#ifdef FULL_SYSTEM + // Trap will probably need a pointer to the CPU to do accessing. + // Or an exec context. --Write ProxyExecContext eventually. + // Avoid using this for now as the xc really shouldn't be in here. + cpu->trap(fault); +#else // !FULL_SYSTEM + fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); +#endif // FULL_SYSTEM + } +} diff --git a/cpu/beta_cpu/free_list.cc b/cpu/beta_cpu/free_list.cc new file mode 100644 index 000000000..006bf4bf7 --- /dev/null +++ b/cpu/beta_cpu/free_list.cc @@ -0,0 +1,33 @@ +#include "cpu/beta_cpu/free_list.hh" + +SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs) +{ + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. + for (PhysRegIndex i = numLogicalIntRegs; + i < numPhysicalIntRegs; ++i) + { + freeIntRegs.push(i); + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. Because the + // float registers' indices start where the physical registers end, + // some math must be done to determine where the free registers start. + for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs; + i < numPhysicalRegs; ++i) + { + cprintf("Free List: Adding register %i to float list.\n", i); + freeFloatRegs.push(i); + } +} + diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh new file mode 100644 index 000000000..8521ad94c --- /dev/null +++ b/cpu/beta_cpu/free_list.hh @@ -0,0 +1,148 @@ +#ifndef __FREE_LIST_HH__ +#define __FREE_LIST_HH__ + +#include +#include + +#include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/comm.hh" +#include "base/trace.hh" + +using namespace std; + +// Question: Do I even need the number of logical registers? +// How to avoid freeing registers instantly? Same with ROB entries. + +/** + * FreeList class that simply holds the list of free integer and floating + * point registers. Can request for a free register of either type, and + * also send back free registers of either type. This is a very simple + * class, but it should be sufficient for most implementations. Like all + * other classes, it assumes that the indices for the floating point + * registers starts after the integer registers end. Hence the variable + * numPhysicalIntRegs is logically equivalent to the baseFP dependency. + * Note that + * while this most likely should be called FreeList, the name "FreeList" + * is used in a typedef within the CPU Policy, and therefore no class + * can be named simply "FreeList". + * @todo: Give a better name to the base FP dependency. + */ +class SimpleFreeList +{ + public: + + private: + /** The list of free integer registers. */ + queue freeIntRegs; + + /** The list of free floating point registers. */ + queue freeFloatRegs; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Total number of physical registers. */ + int numPhysicalRegs; + + public: + SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs); + + PhysRegIndex getIntReg(); + + PhysRegIndex getFloatReg(); + + void addReg(PhysRegIndex freed_reg); + + void addIntReg(PhysRegIndex freed_reg); + + void addFloatReg(PhysRegIndex freed_reg); + + bool hasFreeIntRegs() + { return !freeIntRegs.empty(); } + + bool hasFreeFloatRegs() + { return !freeFloatRegs.empty(); } + + int numFreeIntRegs() + { return freeIntRegs.size(); } + + int numFreeFloatRegs() + { return freeFloatRegs.size(); } +}; + +inline PhysRegIndex +SimpleFreeList::getIntReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free integer register.\n"); + if (freeIntRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeIntRegs.front(); + + freeIntRegs.pop(); + + return(free_reg); +} + +inline PhysRegIndex +SimpleFreeList::getFloatReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free float register.\n"); + if (freeFloatRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeFloatRegs.front(); + + freeFloatRegs.pop(); + + return(free_reg); +} + +inline void +SimpleFreeList::addReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing register %i.\n", freed_reg); + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + if (freed_reg < numPhysicalIntRegs) { + freeIntRegs.push(freed_reg); + } else if (freed_reg < numPhysicalRegs) { + freeFloatRegs.push(freed_reg); + } +} + +inline void +SimpleFreeList::addIntReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg); + + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + freeIntRegs.push(freed_reg); +} + +inline void +SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg); + + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + freeFloatRegs.push(freed_reg); +} + +#endif // __FREE_LIST_HH__ diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc new file mode 100644 index 000000000..6fbf5d69a --- /dev/null +++ b/cpu/beta_cpu/full_cpu.cc @@ -0,0 +1,503 @@ +#ifndef __SIMPLE_FULL_CPU_CC__ +#define __SIMPLE_FULL_CPU_CC__ + +#ifdef FULL_SYSTEM +#include "sim/system.hh" +#else +#include "sim/process.hh" +#endif +#include "sim/universe.hh" + +#include "cpu/exec_context.hh" +#include "cpu/beta_cpu/full_cpu.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +using namespace std; + +#ifdef FULL_SYSTEM +BaseFullCPU::BaseFullCPU(const std::string &_name, + int number_of_threads, + Counter max_insts_any_thread, + Counter max_insts_all_threads, + Counter max_loads_any_thread, + Counter max_loads_all_threads, + System *_system, Tick freq) + : BaseCPU(_name, number_of_threads, + max_insts_any_thread, max_insts_all_threads, + max_loads_any_thread, max_loads_all_threads, + _system, freq) +{ +} +#else +BaseFullCPU::BaseFullCPU(const std::string &_name, + int number_of_threads, + Counter max_insts_any_thread, + Counter max_insts_all_threads, + Counter max_loads_any_thread, + Counter max_loads_all_threads) + : BaseCPU(_name, number_of_threads, + max_insts_any_thread, max_insts_all_threads, + max_loads_any_thread, max_loads_all_threads) +{ +} +#endif // FULL_SYSTEM + +template +FullBetaCPU::TickEvent::TickEvent(FullBetaCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +template +void +FullBetaCPU::TickEvent::process() +{ + cpu->tick(); +} + +template +const char * +FullBetaCPU::TickEvent::description() +{ + return "FullBetaCPU tick event"; +} + +//Call constructor to all the pipeline stages here +template +FullBetaCPU::FullBetaCPU(Params ¶ms) +#ifdef FULL_SYSTEM + : BaseFullCPU(params.name, /* number_of_threads */ 1, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads, + params.system, params.freq), +#else + : BaseFullCPU(params.name, /* number_of_threads */ 1, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads), +#endif // FULL_SYSTEM + tickEvent(this), + fetch(params), + decode(params), + rename(params), + iew(params), + commit(params), + + regFile(params.numPhysIntRegs, params.numPhysFloatRegs), + + freeList(Impl::ISA::NumIntRegs, params.numPhysIntRegs, + Impl::ISA::NumFloatRegs, params.numPhysFloatRegs), + + renameMap(Impl::ISA::NumIntRegs, params.numPhysIntRegs, + Impl::ISA::NumFloatRegs, params.numPhysFloatRegs, + Impl::ISA::NumMiscRegs, + Impl::ISA::ZeroReg, Impl::ISA::ZeroReg), + + rob(params.numROBEntries, params.squashWidth), + + // What to pass to these time buffers? + // For now just have these time buffers be pretty big. + timeBuffer(20, 20), + fetchQueue(20, 20), + decodeQueue(20, 20), + renameQueue(20, 20), + iewQueue(20, 20), + + xc(NULL), + + globalSeqNum(1), + +#ifdef FULL_SYSTEM + system(params.system), + memCtrl(system->memCtrl), + physmem(system->physmem), + itb(params.itb), + dtb(params.dtb), + mem(params.mem), +#else + process(params.process), + asid(params.asid), + mem(process->getMemory()), +#endif // FULL_SYSTEM + + icacheInterface(params.icacheInterface), + dcacheInterface(params.dcacheInterface), + deferRegistration(params.defReg), + numInsts(0), + funcExeInst(0) +{ + _status = Idle; +#ifdef FULL_SYSTEM + xc = new ExecContext(this, 0, system, itb, dtb, mem); + + // initialize CPU, including PC + TheISA::initCPU(&xc->regs); +#else + xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0); + DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x", + process->prog_entry, process); + + assert(process->getMemory() != NULL); + assert(mem != NULL); +#endif // !FULL_SYSTEM + execContexts.push_back(xc); + + // The stages also need their CPU pointer setup. However this must be + // done at the upper level CPU because they have pointers to the upper + // level CPU, and not this FullBetaCPU. + + // Give each of the stages the time buffer they will use. + fetch.setTimeBuffer(&timeBuffer); + decode.setTimeBuffer(&timeBuffer); + rename.setTimeBuffer(&timeBuffer); + iew.setTimeBuffer(&timeBuffer); + commit.setTimeBuffer(&timeBuffer); + + // Also setup each of the stages' queues. + fetch.setFetchQueue(&fetchQueue); + decode.setFetchQueue(&fetchQueue); + decode.setDecodeQueue(&decodeQueue); + rename.setDecodeQueue(&decodeQueue); + rename.setRenameQueue(&renameQueue); + iew.setRenameQueue(&renameQueue); + iew.setIEWQueue(&iewQueue); + commit.setIEWQueue(&iewQueue); + commit.setRenameQueue(&renameQueue); + + // Setup the rename map for whichever stages need it. + rename.setRenameMap(&renameMap); + iew.setRenameMap(&renameMap); + + // Setup the free list for whichever stages need it. + rename.setFreeList(&freeList); + renameMap.setFreeList(&freeList); + + // Setup the ROB for whichever stages need it. + commit.setROB(&rob); +} + +template +FullBetaCPU::~FullBetaCPU() +{ +} + +template +void +FullBetaCPU::tick() +{ + DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullBetaCPU.\n"); + + //Tick each of the stages if they're actually running. + //Will want to figure out a way to unschedule itself if they're all + //going to be idle for a long time. + fetch.tick(); + + decode.tick(); + + rename.tick(); + + iew.tick(); + + commit.tick(); + + // Now advance the time buffers, unless the stage is stalled. + timeBuffer.advance(); + + fetchQueue.advance(); + decodeQueue.advance(); + renameQueue.advance(); + iewQueue.advance(); + + if (_status == Running && !tickEvent.scheduled()) + tickEvent.schedule(curTick + 1); +} + +template +void +FullBetaCPU::init() +{ + if(!deferRegistration) + { + this->registerExecContexts(); + + // Need to do a copy of the xc->regs into the CPU's regfile so + // that it can start properly. + + // First loop through the integer registers. + for (int i = 0; i < Impl::ISA::NumIntRegs; ++i) + { + regFile.intRegFile[i] = xc->regs.intRegFile[i]; + } + + // Then loop through the floating point registers. + for (int i = 0; i < Impl::ISA::NumFloatRegs; ++i) + { + regFile.floatRegFile[i].d = xc->regs.floatRegFile.d[i]; + regFile.floatRegFile[i].q = xc->regs.floatRegFile.q[i]; + } + + // Then loop through the misc registers. + regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + + // Then finally set the PC and the next PC. + regFile.pc = xc->regs.pc; + regFile.npc = xc->regs.npc; + } +} + +template +void +FullBetaCPU::activateContext(int thread_num, int delay) +{ + // Needs to set each stage to running as well. + + scheduleTickEvent(delay); + + _status = Running; +} + +template +void +FullBetaCPU::suspendContext(int thread_num) +{ + panic("suspendContext unimplemented!"); +} + +template +void +FullBetaCPU::deallocateContext(int thread_num) +{ + panic("deallocateContext unimplemented!"); +} + +template +void +FullBetaCPU::haltContext(int thread_num) +{ + panic("haltContext unimplemented!"); +} + +template +void +FullBetaCPU::switchOut() +{ + panic("FullBetaCPU does not have a switch out function.\n"); +} + +template +void +FullBetaCPU::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + assert(!tickEvent.scheduled()); + + // Set all status's to active, schedule the + // CPU's tick event. + tickEvent.schedule(curTick); + for (int i = 0; i < execContexts.size(); ++i) { + execContexts[i]->activate(); + } + + // Switch out the other CPU. + oldCPU->switchOut(); +} + +template +InstSeqNum +FullBetaCPU::getAndIncrementInstSeq() +{ + // Hopefully this works right. + return globalSeqNum++; +} + +template +uint64_t +FullBetaCPU::readIntReg(int reg_idx) +{ + return regFile.readIntReg(reg_idx); +} + +template +float +FullBetaCPU::readFloatRegSingle(int reg_idx) +{ + return regFile.readFloatRegSingle(reg_idx); +} + +template +double +FullBetaCPU::readFloatRegDouble(int reg_idx) +{ + return regFile.readFloatRegDouble(reg_idx); +} + +template +uint64_t +FullBetaCPU::readFloatRegInt(int reg_idx) +{ + return regFile.readFloatRegInt(reg_idx); +} + +template +void +FullBetaCPU::setIntReg(int reg_idx, uint64_t val) +{ + regFile.setIntReg(reg_idx, val); +} + +template +void +FullBetaCPU::setFloatRegSingle(int reg_idx, float val) +{ + regFile.setFloatRegSingle(reg_idx, val); +} + +template +void +FullBetaCPU::setFloatRegDouble(int reg_idx, double val) +{ + regFile.setFloatRegDouble(reg_idx, val); +} + +template +void +FullBetaCPU::setFloatRegInt(int reg_idx, uint64_t val) +{ + regFile.setFloatRegInt(reg_idx, val); +} + +template +uint64_t +FullBetaCPU::readPC() +{ + return regFile.readPC(); +} + +template +void +FullBetaCPU::setNextPC(uint64_t val) +{ + regFile.setNextPC(val); +} + +template +void +FullBetaCPU::setPC(Addr new_PC) +{ + regFile.setPC(new_PC); +} + +template +void +FullBetaCPU::addInst(DynInst *inst) +{ + instList.push_back(inst); +} + +template +void +FullBetaCPU::instDone() +{ + // Keep an instruction count. + numInsts++; + + // Check for instruction-count-based events. + comInstEventQueue[0]->serviceEvents(numInsts); +} + +template +void +FullBetaCPU::removeBackInst(DynInst *inst) +{ + DynInst *inst_to_delete; + + // Walk through the instruction list, removing any instructions + // that were inserted after the given instruction, inst. + while (instList.back() != inst) + { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Deleting instruction %#x, PC %#x\n", + inst_to_delete, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.pop_back(); + + // Delete the instruction itself. + delete inst_to_delete; + } +} + +template +void +FullBetaCPU::removeFrontInst(DynInst *inst) +{ + DynInst *inst_to_delete; + + // The front instruction should be the same one being asked to be deleted. + assert(instList.front() == inst); + + // Remove the front instruction. + inst_to_delete = inst; + instList.pop_front(); + + DPRINTF(FullCPU, "FullCPU: Deleting committed instruction %#x, PC %#x\n", + inst_to_delete, inst_to_delete->readPC()); + + delete inst_to_delete; +} + +template +void +FullBetaCPU::removeInstsNotInROB() +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInst *rob_tail = rob.readTailInst(); + + removeBackInst(rob_tail); +} + +template +void +FullBetaCPU::removeAllInsts() +{ + instList.clear(); +} + +template +void +FullBetaCPU::dumpInsts() +{ + int num = 0; + typename list::iterator inst_list_it = instList.begin(); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\nInst:%#x\nPC:%#x\nSN:%lli\n\n", + num, (*inst_list_it), (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum); + inst_list_it++; + ++num; + } +} + +template +void +FullBetaCPU::wakeDependents(DynInst *inst) +{ + iew.wakeDependents(inst); +} + +// Forward declaration of FullBetaCPU. +template FullBetaCPU; + +#endif // __SIMPLE_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh new file mode 100644 index 000000000..00ff1f878 --- /dev/null +++ b/cpu/beta_cpu/full_cpu.hh @@ -0,0 +1,323 @@ +//Todo: Add in a lot of the functions that are ISA specific. Also define +//the functions that currently exist within the base cpu class. Define +//everything for the simobject stuff so it can be serialized and +//instantiated, add in debugging statements everywhere. Have CPU schedule +//itself properly. Constructor. Derived alpha class. Threads! +// Avoid running stages and advancing queues if idle/stalled. + +#ifndef __SIMPLE_FULL_CPU_HH__ +#define __SIMPLE_FULL_CPU_HH__ + +#include +#include + +#include "cpu/beta_cpu/comm.hh" + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/base_cpu.hh" +#include "cpu/beta_cpu/cpu_policy.hh" +#include "sim/process.hh" + +using namespace std; + +class FunctionalMemory; +class Process; + +class BaseFullCPU : public BaseCPU +{ + //Stuff that's pretty ISA independent will go here. + public: +#ifdef FULL_SYSTEM + BaseFullCPU(const std::string &_name, int _number_of_threads, + Counter max_insts_any_thread, Counter max_insts_all_threads, + Counter max_loads_any_thread, Counter max_loads_all_threads, + System *_system, Tick freq); +#else + BaseFullCPU(const std::string &_name, int _number_of_threads, + Counter max_insts_any_thread = 0, + Counter max_insts_all_threads = 0, + Counter max_loads_any_thread = 0, + Counter max_loads_all_threads = 0); +#endif // FULL_SYSTEM +}; + +template +class FullBetaCPU : public BaseFullCPU +{ + public: + //Put typedefs from the Impl here. + typedef typename Impl::CPUPol CPUPolicy; + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + + public: + enum Status { + Running, + Idle, + Halted, + Blocked // ? + }; + + Status _status; + + private: + class TickEvent : public Event + { + private: + FullBetaCPU *cpu; + + public: + TickEvent(FullBetaCPU *c); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + public: + void tick(); + + FullBetaCPU(Params ¶ms); + ~FullBetaCPU(); + + void init(); + + void activateContext(int thread_num, int delay); + void suspendContext(int thread_num); + void deallocateContext(int thread_num); + void haltContext(int thread_num); + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + + /** Get the current instruction sequence number, and increment it. */ + InstSeqNum getAndIncrementInstSeq(); + +#ifdef FULL_SYSTEM + /** Check if this address is a valid instruction address. */ + bool validInstAddr(Addr addr) { return true; } + + /** Check if this address is a valid data address. */ + bool validDataAddr(Addr addr) { return true; } + + /** Get instruction asid. */ + int getInstAsid() { return ITB_ASN_ASN(regs.ipr[ISA::IPR_ITB_ASN]); } + + /** Get data asid. */ + int getDataAsid() { return DTB_ASN_ASN(regs.ipr[ISA::IPR_DTB_ASN]); } +#else + bool validInstAddr(Addr addr) + { return process->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return process->validDataAddr(addr); } + + int getInstAsid() { return asid; } + int getDataAsid() { return asid; } + +#endif + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx); + + float readFloatRegSingle(int reg_idx); + + double readFloatRegDouble(int reg_idx); + + uint64_t readFloatRegInt(int reg_idx); + + void setIntReg(int reg_idx, uint64_t val); + + void setFloatRegSingle(int reg_idx, float val); + + void setFloatRegDouble(int reg_idx, double val); + + void setFloatRegInt(int reg_idx, uint64_t val); + + uint64_t readPC(); + + void setNextPC(uint64_t val); + + void setPC(Addr new_PC); + + /** Function to add instruction onto the head of the list of the + * instructions. Used when new instructions are fetched. + */ + void addInst(DynInst *inst); + + /** Function to tell the CPU that an instruction has completed. */ + void instDone(); + + /** Remove all instructions in back of the given instruction, but leave + * that instruction in the list. This is useful in a squash, when there + * are instructions in this list that don't exist in structures such as + * the ROB. The instruction doesn't have to be the last instruction in + * the list, but will be once this function completes. + * @todo: Remove only up until that inst? Squashed inst is most likely + * valid. + */ + void removeBackInst(DynInst *inst); + + /** Remove an instruction from the front of the list. It is expected + * that there are no instructions in front of it (that is, none are older + * than the instruction being removed). Used when retiring instructions. + * @todo: Remove the argument to this function, and just have it remove + * last instruction once it's verified that commit has the same ordering + * as the instruction list. + */ + void removeFrontInst(DynInst *inst); + + /** Remove all instructions that are not currently in the ROB. */ + void removeInstsNotInROB(); + + /** Remove all instructions from the list. */ + void removeAllInsts(); + + void dumpInsts(); + + /** Basically a wrapper function so that instructions executed at + * commit can tell the instruction queue that they have completed. + * Eventually this hack should be removed. + */ + void wakeDependents(DynInst *inst); + + public: + /** List of all the instructions in flight. */ + list instList; + + //not sure these should be private. + protected: + /** The fetch stage. */ + typename CPUPolicy::Fetch fetch; + + /** The fetch stage's status. */ + typename CPUPolicy::Fetch::Status fetchStatus; + + /** The decode stage. */ + typename CPUPolicy::Decode decode; + + /** The decode stage's status. */ + typename CPUPolicy::Decode::Status decodeStatus; + + /** The dispatch stage. */ + typename CPUPolicy::Rename rename; + + /** The dispatch stage's status. */ + typename CPUPolicy::Rename::Status renameStatus; + + /** The issue/execute/writeback stages. */ + typename CPUPolicy::IEW iew; + + /** The issue/execute/writeback stage's status. */ + typename CPUPolicy::IEW::Status iewStatus; + + /** The commit stage. */ + typename CPUPolicy::Commit commit; + + /** The fetch stage's status. */ + typename CPUPolicy::Commit::Status commitStatus; + + //Might want to just pass these objects in to the constructors of the + //appropriate stage. regFile is in iew, freeList in dispatch, renameMap + //in dispatch, and the rob in commit. + /** The register file. */ + typename CPUPolicy::RegFile regFile; + + /** The free list. */ + typename CPUPolicy::FreeList freeList; + + /** The rename map. */ + typename CPUPolicy::RenameMap renameMap; + + /** The re-order buffer. */ + typename CPUPolicy::ROB rob; + + public: + /** Typedefs from the Impl to get the structs that each of the + * time buffers should use. + */ + typedef typename Impl::TimeStruct TimeStruct; + + typedef typename Impl::FetchStruct FetchStruct; + + typedef typename Impl::DecodeStruct DecodeStruct; + + typedef typename Impl::RenameStruct RenameStruct; + + typedef typename Impl::IEWStruct IEWStruct; + + /** The main time buffer to do backwards communication. */ + TimeBuffer timeBuffer; + + /** The fetch stage's instruction queue. */ + TimeBuffer fetchQueue; + + /** The decode stage's instruction queue. */ + TimeBuffer decodeQueue; + + /** The rename stage's instruction queue. */ + TimeBuffer renameQueue; + + /** The IEW stage's instruction queue. */ + TimeBuffer iewQueue; + + public: + /** The temporary exec context to support older accessors. */ + ExecContext *xc; + + /** Temporary function to get pointer to exec context. */ + ExecContext *xcBase() { return xc; } + + InstSeqNum globalSeqNum; + +#ifdef FULL_SYSTEM + System *system; + + MemoryController *memCtrl; + PhysicalMemory *physmem; + + AlphaITB *itb; + AlphaDTB *dtb; + +// SWContext *swCtx; +#else + Process *process; + + // Address space ID. Note that this is used for TIMING cache + // simulation only; all functional memory accesses should use + // one of the FunctionalMemory pointers above. + short asid; +#endif + + FunctionalMemory *mem; + + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + bool deferRegistration; + + Counter numInsts; + + Counter funcExeInst; +}; + +#endif diff --git a/cpu/beta_cpu/iew.cc b/cpu/beta_cpu/iew.cc new file mode 100644 index 000000000..8abb2f196 --- /dev/null +++ b/cpu/beta_cpu/iew.cc @@ -0,0 +1,8 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/iew_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleIEW; diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh new file mode 100644 index 000000000..52b9ccdb0 --- /dev/null +++ b/cpu/beta_cpu/iew.hh @@ -0,0 +1,166 @@ +//Todo: Update with statuses. Create constructor. Fix up time buffer stuff. +//Will also need a signal heading back at least one stage to rename to say +//how many empty skid buffer entries there are. Perhaps further back even. +//Need to handle delaying writes to the writeback bus if it's full at the +//given time. Squash properly. Load store queue. + +#ifndef __SIMPLE_IEW_HH__ +#define __SIMPLE_IEW_HH__ + +// To include: time buffer, structs, queue, +#include + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" + +//Can IEW even stall? Space should be available/allocated already...maybe +//if there's not enough write ports on the ROB or waiting for CDB +//arbitration. +template +class SimpleIEW +{ + private: + //Typedefs from Impl + typedef typename Impl::ISA ISA; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename Impl::CPUPol::RenameMap RenameMap; + + typedef typename Impl::TimeStruct TimeStruct; + typedef typename Impl::IEWStruct IEWStruct; + typedef typename Impl::RenameStruct RenameStruct; + typedef typename Impl::IssueStruct IssueStruct; + + public: + enum Status { + Running, + Blocked, + Idle, + Squashing, + Unblocking + }; + + private: + Status _status; + Status _issueStatus; + Status _exeStatus; + Status _wbStatus; + + public: + void squash(); + + void squash(DynInst *inst); + + void block(); + + inline void unblock(); + + public: + SimpleIEW(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setRenameQueue(TimeBuffer *rq_ptr); + + void setIEWQueue(TimeBuffer *iq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void wakeDependents(DynInst *inst); + + void tick(); + + void iew(); + + private: + //Interfaces to objects inside and outside of IEW. + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toRename; + + /** Rename instruction queue interface. */ + TimeBuffer *renameQueue; + + /** Wire to get rename's output from rename queue. */ + typename TimeBuffer::wire fromRename; + + /** Issue stage queue. */ + TimeBuffer issueToExecQueue; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + /** + * IEW stage time buffer. Holds ROB indices of instructions that + * can be marked as completed. + */ + TimeBuffer *iewQueue; + + /** Wire to write infromation heading to commit. */ + typename TimeBuffer::wire toCommit; + + //Will need internal queue to hold onto instructions coming from + //the rename stage in case of a stall. + /** Skid buffer between rename and IEW. */ + queue skidBuffer; + + /** Instruction queue. */ + IQ instQueue; + + /** Pointer to rename map. Might not want this stage to directly + * access this though... + */ + RenameMap *renameMap; + + /** CPU interface. */ + FullCPU *cpu; + + private: + /** Commit to IEW delay, in ticks. */ + unsigned commitToIEWDelay; + + /** Rename to IEW delay, in ticks. */ + unsigned renameToIEWDelay; + + /** + * Issue to execute delay, in ticks. What this actually represents is + * the amount of time it takes for an instruction to wake up, be + * scheduled, and sent to a FU for execution. + */ + unsigned issueToExecuteDelay; + + /** Width of issue's read path, in instructions. The read path is both + * the skid buffer and the rename instruction queue. + * Note to self: is this really different than issueWidth? + */ + unsigned issueReadWidth; + + /** Width of issue, in instructions. */ + unsigned issueWidth; + + /** Width of execute, in instructions. Might make more sense to break + * down into FP vs int. + */ + unsigned executeWidth; + + /** Number of cycles stage has been squashing. Used so that the stage + * knows when it can start unblocking, which is when the previous stage + * has received the stall signal and clears up its outputs. + */ + unsigned cyclesSquashing; + + //Will implement later + //Load queue interface (probably one and the same) + //Store queue interface +}; + +#endif diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh new file mode 100644 index 000000000..b198220f5 --- /dev/null +++ b/cpu/beta_cpu/iew_impl.hh @@ -0,0 +1,443 @@ +// @todo: Fix the instantaneous communication among all the stages within +// iew. There's a clear delay between issue and execute, yet backwards +// communication happens simultaneously. Might not be that bad really... +// it might skew stats a bit though. Issue would otherwise try to issue +// instructions that would never be executed if there were a delay; without +// it issue will simply squash. Make this stage block properly. Make this +// stage delay after a squash properly. Update the statuses for each stage. +// Actually read instructions out of the skid buffer. + +#include + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/iew.hh" + +template +SimpleIEW::SimpleIEW(Params ¶ms) + : // Just make this time buffer really big for now + issueToExecQueue(20, 20), + instQueue(params), + commitToIEWDelay(params.commitToIEWDelay), + renameToIEWDelay(params.renameToIEWDelay), + issueToExecuteDelay(params.issueToExecuteDelay), + issueReadWidth(params.issueWidth), + issueWidth(params.issueWidth), + executeWidth(params.executeWidth) +{ + DPRINTF(IEW, "IEW: executeIntWidth: %i.\n", params.executeIntWidth); + _status = Idle; + _issueStatus = Idle; + _exeStatus = Idle; + _wbStatus = Idle; + + // Setup wire to read instructions coming from issue. + fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay); + + // Instruction queue needs the queue between issue and execute. + instQueue.setIssueToExecuteQueue(&issueToExecQueue); +} + +template +void +SimpleIEW::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(IEW, "IEW: Setting CPU pointer.\n"); + cpu = cpu_ptr; + + instQueue.setCPU(cpu_ptr); +} + +template +void +SimpleIEW::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(IEW, "IEW: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from commit. + fromCommit = timeBuffer->getWire(-commitToIEWDelay); + + // Setup wire to write information back to previous stages. + toRename = timeBuffer->getWire(0); + + // Instruction queue also needs main time buffer. + instQueue.setTimeBuffer(tb_ptr); +} + +template +void +SimpleIEW::setRenameQueue(TimeBuffer *rq_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to read information from rename queue. + fromRename = renameQueue->getWire(-renameToIEWDelay); +} + +template +void +SimpleIEW::setIEWQueue(TimeBuffer *iq_ptr) +{ + DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to write instructions to commit. + toCommit = iewQueue->getWire(0); +} + +template +void +SimpleIEW::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template +void +SimpleIEW::wakeDependents(DynInst *inst) +{ + instQueue.wakeDependents(inst); +} + +template +void +SimpleIEW::block() +{ + DPRINTF(IEW, "IEW: Blocking.\n"); + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromRename); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template +inline void +SimpleIEW::unblock() +{ + // Check if there's information in the skid buffer. If there is, then + // set status to unblocking, otherwise set it directly to running. + DPRINTF(IEW, "IEW: Reading instructions out of the skid " + "buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toRename->iewInfo.stall = true; + } else { + DPRINTF(IEW, "IEW: Stage is done unblocking.\n"); + _status = Running; + } +} + +template +void +SimpleIEW::squash() +{ + DPRINTF(IEW, "IEW: Squashing all instructions.\n"); + _status = Squashing; + + // Tell the IQ to start squashing. + instQueue.squash(); + + // Tell rename to squash through the time buffer. + // This communication may be redundant depending upon where squash() + // is called. +// toRename->iewInfo.squash = true; +} + +template +void +SimpleIEW::squash(DynInst *inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC:%#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toRename->iewInfo.squash = true; + // Also send PC update information back to prior stages. + toRename->iewInfo.squashedSeqNum = inst->seqNum; + toRename->iewInfo.nextPC = inst->readCalcTarg(); + toRename->iewInfo.predIncorrect = true; +} + +template +void +SimpleIEW::tick() +{ + // Considering putting all the state-determining stuff in this section. + + // Try to fill up issue queue with as many instructions as bandwidth + // allows. + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + + // Check if the stage is in a running status. + if (_status != Blocked && _status != Squashing) { + DPRINTF(IEW, "IEW: Status is not blocked, attempting to run " + "stage.\n"); + iew(); + + // If it's currently unblocking, check to see if it should switch + // to running. + if (_status == Unblocking) { + unblock(); + } + } else if (_status == Squashing) { + + DPRINTF(IEW, "IEW: Still squashing.\n"); + + // Check if stage should remain squashing. Stop squashing if the + // squash signal clears. + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + DPRINTF(IEW, "IEW: Done squashing, changing status to " + "running.\n"); + + _status = Running; + instQueue.stopSquash(); + } else { + instQueue.doSquash(); + } + + // Also should advance its own time buffers if the stage ran. + // Not sure about this... +// issueToExecQueue.advance(); + } else if (_status == Blocked) { + // Continue to tell previous stage to stall. + toRename->iewInfo.stall = true; + + // Check if possible stall conditions have cleared. + if (!fromCommit->commitInfo.stall && + !instQueue.isFull()) { + DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n"); + _status = Unblocking; + } + + // If there's still instructions coming from rename, continue to + // put them on the skid buffer. + if (fromRename->insts[0] != NULL) { + block(); + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + } + + // @todo: Maybe put these at the beginning, so if it's idle it can + // return early. + // Write back number of free IQ entries here. + toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); + + DPRINTF(IEW, "IEW: IQ has %i free entries.\n", + instQueue.numFreeEntries()); +} + +template +void +SimpleIEW::iew() +{ + // Might want to put all state checks in the tick() function. + // Check if being told to stall from commit. + if (fromCommit->commitInfo.stall) { + block(); + return; + } else if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + // Also check if commit is telling this stage to squash. + squash(); + return; + } + + //////////////////////////////////////// + //ISSUE stage + //////////////////////////////////////// + + //Put into its own function? + //Add instructions to IQ if there are any instructions there + + // Check if there are any instructions coming from rename, and we're. + // not squashing. + if (fromRename->insts[0] != NULL && _status != Squashing) { + + // Loop through the instructions, putting them in the instruction + // queue. + for (int inst_num = 0; inst_num < issueReadWidth; ++inst_num) + { + DynInst *inst = fromRename->insts[inst_num]; + + // Make sure there's a valid instruction there. + if (inst == NULL) + break; + + DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", + inst->readPC()); + + // If it's a memory reference, don't put it in the + // instruction queue. These will only be executed at commit. + // Do the same for nonspeculative instructions and nops. + // Be sure to mark these instructions as ready so that the + // commit stage can go ahead and execute them, and mark + // them as issued so the IQ doesn't reprocess them. + if (inst->isMemRef()) { + DPRINTF(IEW, "IEW: Issue: Memory instruction " + "encountered, skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + continue; + } else if (inst->isNonSpeculative()) { + DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " + "encountered, skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + continue; + } else if (inst->isNop()) { + DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " + ", skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + continue; + } else if (instQueue.isFull()) { + DPRINTF(IEW, "IEW: Issue: IQ has become full.\n"); + // Call function to start blocking. + block(); + // Tell previous stage to stall. + toRename->iewInfo.stall = true; + break; + } + + // If the instruction queue is not full, then add the + // instruction. + instQueue.insert(fromRename->insts[inst_num]); + } + } + + // Have the instruction queue try to schedule any ready instructions. + instQueue.scheduleReadyInsts(); + + //////////////////////////////////////// + //EXECUTE/WRITEBACK stage + //////////////////////////////////////// + + //Put into its own function? + //Similarly should probably have separate execution for int vs FP. + // Above comment is handled by the issue queue only issuing a valid + // mix of int/fp instructions. + //Actually okay to just have one execution, buuuuuut will need + //somewhere that defines the execution latency of all instructions. + // @todo: Move to the FU pool used in the current full cpu. + + int fu_usage = 0; + + // Execute/writeback any instructions that are available. + for (int inst_num = 0; + fu_usage < executeWidth && /* Haven't exceeded available FU's. */ + inst_num < issueWidth && /* Haven't exceeded issue width. */ + fromIssue->insts[inst_num]; /* There are available instructions. */ + ++inst_num) { + DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); + + // Get instruction from issue's queue. + DynInst *inst = fromIssue->insts[inst_num]; + + DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); + + inst->setExecuted(); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); + continue; + } + + // If an instruction is executed, then count it towards FU usage. + ++fu_usage; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + inst->execute(); + + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + + // Add finished instruction to queue to commit. + toCommit->insts[inst_num] = inst; + + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + if (inst->mispredicted()) { + DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squash(inst); + + // Not sure it really needs to break. +// break; + } + } + + // Loop through the head of the time buffer and wake any dependents. + // These instructions are about to write back. In the simple model + // this loop can really happen within the previous loop, but when + // instructions have actual latencies, this loop must be separate. + // Also mark scoreboard that this instruction is finally complete. + // Either have IEW have direct access to rename map, or have this as + // part of backwards communication. + for (int inst_num = 0; inst_num < executeWidth && + toCommit->insts[inst_num] != NULL; inst_num++) + { + DynInst *inst = toCommit->insts[inst_num]; + + DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n", + inst->readPC()); + + instQueue.wakeDependents(inst); + + for (int i = 0; i < inst->numDestRegs(); i++) + { + renameMap->markAsReady(inst->renamedDestRegIdx(i)); + } + } + + // Also should advance its own time buffers if the stage ran. + // Not the best place for it, but this works (hopefully). + issueToExecQueue.advance(); +} diff --git a/cpu/beta_cpu/inst_queue.cc b/cpu/beta_cpu/inst_queue.cc new file mode 100644 index 000000000..43b0a4572 --- /dev/null +++ b/cpu/beta_cpu/inst_queue.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template InstructionQueue; diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh new file mode 100644 index 000000000..5741bfcf5 --- /dev/null +++ b/cpu/beta_cpu/inst_queue.hh @@ -0,0 +1,243 @@ +#ifndef __INST_QUEUE_HH__ +#define __INST_QUEUE_HH__ + +#include +#include +#include + +#include "base/timebuf.hh" + +using namespace std; + +//Perhaps have a better separation between the data structure underlying +//and the actual algorithm. +//somewhat nasty to try to have a nice ordering. +// Consider moving to STL list or slist for the LL stuff. + +/** + * A standard instruction queue class. It holds instructions in an + * array, holds the ordering of the instructions within a linked list, + * and tracks producer/consumer dependencies within a separate linked + * list. Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and + * physical register indices. + */ +template +class InstructionQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::Params Params; + + typedef typename Impl::IssueStruct IssueStruct; + typedef typename Impl::TimeStruct TimeStruct; + + // Typedef of iterator through the list of instructions. Might be + // better to untie this from the FullCPU or pass its information to + // the stages. + typedef typename list::iterator ListIt; + + /** + * Class for priority queue entries. Mainly made so that the < operator + * is defined. + */ + struct ReadyEntry { + DynInst *inst; + + ReadyEntry(DynInst *_inst) + : inst(_inst) + { } + + /** Compare(lhs,rhs) checks if rhs is "bigger" than lhs. If so, rhs + * goes higher on the priority queue. The oldest instruction should + * be on the top of the instruction queue, so in this case "bigger" + * has the reverse meaning; the instruction with the lowest + * sequence number is on the top. + */ + bool operator <(const ReadyEntry &rhs) const + { + if (this->inst->seqNum > rhs.inst->seqNum) + return true; + return false; + } + }; + + InstructionQueue(Params ¶ms); + + void setCPU(FullCPU *cpu); + + void setIssueToExecuteQueue(TimeBuffer *i2eQueue); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + unsigned numFreeEntries(); + + bool isFull(); + + void insert(DynInst *new_inst); + + void advanceTail(DynInst *inst); + + void scheduleReadyInsts(); + + void wakeDependents(DynInst *completed_inst); + + void doSquash(); + + void squash(); + + void stopSquash(); + + private: + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer *issueToExecuteQueue; + + /** The backwards time buffer. */ + TimeBuffer *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer::wire fromCommit; + + enum InstList { + Int, + Float, + Branch, + Squashed, + None + }; + + /** List of ready int instructions. Used to keep track of the order in + * which */ + priority_queue readyIntInsts; + + /** List of ready floating point instructions. */ + priority_queue readyFloatInsts; + + /** List of ready branch instructions. */ + priority_queue readyBranchInsts; + + /** List of squashed instructions (which are still valid and in IQ). + * Implemented using a priority queue; the entries must contain both + * the IQ index and sequence number of each instruction so that + * ordering based on sequence numbers can be used. + */ + priority_queue squashedInsts; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The number of integer instructions that can be issued in one + * cycle. + */ + unsigned intWidth; + + /** The number of floating point instructions that can be issued + * in one cycle. + */ + unsigned floatWidth; + + /** The number of branches that can be issued in one cycle. */ + unsigned branchWidth; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; + + //The number of physical registers in the CPU. + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; + + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + + ////////////////////////////////// + // Variables needed for squashing + ////////////////////////////////// + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Iterator that points to the oldest instruction in the IQ. */ + ListIt head; + + /** Iterator that points to the youngest instruction in the IQ. */ + ListIt tail; + + /** Iterator that points to the last instruction that has been squashed. + * This will not be valid unless the IQ is in the process of squashing. + */ + ListIt squashIt; + + /////////////////////////////////// + // Dependency graph stuff + /////////////////////////////////// + + class DependencyEntry + { + public: + DynInst *inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; + + //This function, and perhaps this whole class, stand out a little + //bit as they don't fit a classification well. I want access + //to the underlying structure of the linked list, yet at + //the same time it feels like this should be something abstracted + //away. So for now it will sit here, within the IQ, until + //a better implementation is decided upon. + // This function probably shouldn't be within the entry... + void insert(DynInst *new_inst); + + void remove(DynInst *inst_to_remove); + }; + + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DependencyEntry *dependGraph; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ + vector regScoreboard; + + bool addToDependents(DynInst *new_inst); + void insertDependency(DynInst *new_inst); + void createDependency(DynInst *new_inst); + + void addIfReady(DynInst *inst); +}; + +#endif //__INST_QUEUE_HH__ diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh new file mode 100644 index 000000000..6f1f06858 --- /dev/null +++ b/cpu/beta_cpu/inst_queue_impl.hh @@ -0,0 +1,684 @@ +#ifndef __INST_QUEUE_IMPL_HH__ +#define __INST_QUEUE_IMPL_HH__ + +// Todo: Fix up consistency errors about back of the ready list being +// the oldest instructions in the queue. When woken up from the dependency +// graph they will be the oldest, but when they are immediately executable +// newer instructions will mistakenly get inserted onto the back. Also +// current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// it; either do in reverse order, or have added instructions put into a +// different ready queue that, in scheduleRreadyInsts(), gets put onto the +// normal ready queue. This would however give only a one cycle delay, +// but probably is more flexible to actually add in a delay parameter than +// just running it backwards. + +#include + +#include "sim/universe.hh" +#include "cpu/beta_cpu/inst_queue.hh" + +// Either compile error or max int due to sign extension. +// Blatant hack to avoid compile warnings. +const InstSeqNum MaxInstSeqNum = 0 - 1; + +template +InstructionQueue::InstructionQueue(Params ¶ms) + : numEntries(params.numIQEntries), + intWidth(params.executeIntWidth), + floatWidth(params.executeFloatWidth), + numPhysIntRegs(params.numPhysIntRegs), + numPhysFloatRegs(params.numPhysFloatRegs), + commitToIEWDelay(params.commitToIEWDelay) +{ + // HACK: HARDCODED NUMBER. REMOVE LATER AND ADD TO PARAMETER. + totalWidth = 1; + branchWidth = 1; + DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth); + + // Initialize the number of free IQ entries. + freeEntries = numEntries; + + // Set the number of physical registers as the number of int + float + numPhysRegs = numPhysIntRegs + numPhysFloatRegs; + + DPRINTF(IQ, "IQ: There are %i physical registers.\n", numPhysRegs); + + //Create an entry for each physical register within the + //dependency graph. + dependGraph = new DependencyEntry[numPhysRegs]; + + // Resize the register scoreboard. + regScoreboard.resize(numPhysRegs); + + // Initialize all the head pointers to point to NULL, and all the + // entries as unready. + // Note that in actuality, the registers corresponding to the logical + // registers start off as ready. However this doesn't matter for the + // IQ as the instruction should have been correctly told if those + // registers are ready in rename. Thus it can all be initialized as + // unready. + for (int i = 0; i < numPhysRegs; ++i) + { + dependGraph[i].next = NULL; + dependGraph[i].inst = NULL; + regScoreboard[i] = false; + } + +} + +template +void +InstructionQueue::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + tail = cpu->instList.begin(); +} + +template +void +InstructionQueue::setIssueToExecuteQueue( + TimeBuffer *i2e_ptr) +{ + DPRINTF(IQ, "IQ: Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} + +template +void +InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(IQ, "IQ: Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template +bool +InstructionQueue::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template +unsigned +InstructionQueue::numFreeEntries() +{ + return freeEntries; +} + +template +void +InstructionQueue::insert(DynInst *new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != new_inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + + // Decrease the number of free entries. + --freeEntries; + + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); + + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); + + assert(freeEntries == (numEntries - countInsts())); +} + +// Slightly hack function to advance the tail iterator in the case that +// the IEW stage issues an instruction that is not added to the IQ. This +// is needed in case a long chain of such instructions occurs. +template +void +InstructionQueue::advanceTail(DynInst *inst) +{ + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + assert(freeEntries <= numEntries); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); +} + +// Need to make sure the number of float and integer instructions +// issued does not exceed the total issue bandwidth. Probably should +// have some sort of limit of total number of branches that can be issued +// as well. +template +void +InstructionQueue::scheduleReadyInsts() +{ + DPRINTF(IQ, "IQ: Attempting to schedule ready instructions from " + "the IQ.\n"); + + int int_issued = 0; + int float_issued = 0; + int branch_issued = 0; + int squashed_issued = 0; + int total_issued = 0; + + IssueStruct *i2e_info = issueToExecuteQueue->access(0); + + bool insts_available = !readyBranchInsts.empty() || + !readyIntInsts.empty() || + !readyFloatInsts.empty() || + !squashedInsts.empty(); + + // Note: Requires a globally defined constant. + InstSeqNum oldest_inst = MaxInstSeqNum; + InstList list_with_oldest = None; + + // Temporary values. + DynInst *int_head_inst; + DynInst *float_head_inst; + DynInst *branch_head_inst; + DynInst *squashed_head_inst; + + // Somewhat nasty code to look at all of the lists where issuable + // instructions are located, and choose the oldest instruction among + // those lists. Consider a rewrite in the future. + while (insts_available && total_issued < totalWidth) + { + // Set this to false. Each if-block is required to set it to true + // if there were instructions available this check. This will cause + // this loop to run once more than necessary, but avoids extra calls. + insts_available = false; + + oldest_inst = MaxInstSeqNum; + + list_with_oldest = None; + + if (!readyIntInsts.empty() && + int_issued < intWidth) { + + insts_available = true; + + int_head_inst = readyIntInsts.top().inst; + + if (int_head_inst->isSquashed()) { + readyIntInsts.pop(); + continue; + } + + oldest_inst = int_head_inst->seqNum; + + list_with_oldest = Int; + } + + if (!readyFloatInsts.empty() && + float_issued < floatWidth) { + + insts_available = true; + + float_head_inst = readyFloatInsts.top().inst; + + if (float_head_inst->isSquashed()) { + readyFloatInsts.pop(); + continue; + } else if (float_head_inst->seqNum < oldest_inst) { + oldest_inst = float_head_inst->seqNum; + + list_with_oldest = Float; + } + } + + if (!readyBranchInsts.empty() && + branch_issued < branchWidth) { + + insts_available = true; + + branch_head_inst = readyBranchInsts.top().inst; + + if (branch_head_inst->isSquashed()) { + readyBranchInsts.pop(); + continue; + } else if (branch_head_inst->seqNum < oldest_inst) { + oldest_inst = branch_head_inst->seqNum; + + list_with_oldest = Branch; + } + + } + + if (!squashedInsts.empty()) { + + insts_available = true; + + squashed_head_inst = squashedInsts.top().inst; + + if (squashed_head_inst->seqNum < oldest_inst) { + list_with_oldest = Squashed; + } + + } + + DynInst *issuing_inst = NULL; + + switch (list_with_oldest) { + case None: + DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing " + "inst is %#x.\n", issuing_inst); + break; + case Int: + issuing_inst = int_head_inst; + readyIntInsts.pop(); + ++int_issued; + DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n", + issuing_inst->readPC()); + break; + case Float: + issuing_inst = float_head_inst; + readyFloatInsts.pop(); + ++float_issued; + DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n", + issuing_inst->readPC()); + break; + case Branch: + issuing_inst = branch_head_inst; + readyBranchInsts.pop(); + ++branch_issued; + DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n", + issuing_inst->readPC()); + break; + case Squashed: + issuing_inst = squashed_head_inst; + squashedInsts.pop(); + ++squashed_issued; + DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n", + issuing_inst->readPC()); + break; + } + + if (list_with_oldest != None) { + i2e_info->insts[total_issued] = issuing_inst; + + issuing_inst->setIssued(); + + ++freeEntries; + ++total_issued; + } + + assert(freeEntries == (numEntries - countInsts())); + } +} + +template +void +InstructionQueue::doSquash() +{ + // Make sure the squash iterator isn't pointing to nothing. + assert(squashIt != cpu->instList.end()); + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum != 0); + + DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + + // Squash any instructions younger than the squashed sequence number + // given. + while ((*squashIt)->seqNum > squashedSeqNum) { + DynInst *squashed_inst = (*squashIt); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (!squashed_inst->isIssued() && + !squashed_inst->isSquashedInIQ()) { + // Remove the instruction from the dependency list. + int8_t total_src_regs = squashed_inst->numSrcRegs(); + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx)) { + int8_t src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + dependGraph[src_reg].remove(squashed_inst); + } + } + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + ReadyEntry temp(squashed_inst); + + squashedInsts.push(temp); + + DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + squashIt--; + } +} + +template +void +InstructionQueue::squash() +{ + DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n"); + + // Read instruction sequence number of last instruction out of the + // time buffer. + squashedSeqNum = fromCommit->commitInfo.doneSeqNum; + + // Setup the squash iterator to point to the tail. + squashIt = tail; + + // Call doSquash. + doSquash(); +} + +template +void +InstructionQueue::stopSquash() +{ + // Clear up the squash variables to ensure that squashing doesn't + // get called improperly. + squashedSeqNum = 0; + + squashIt = cpu->instList.end(); +} + +template +int +InstructionQueue::countInsts() +{ + ListIt count_it = cpu->instList.begin(); + int total_insts = 0; + + while (count_it != tail) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } + + count_it++; + + assert(count_it != cpu->instList.end()); + } + + // Need to count the tail iterator as well. + if (count_it != cpu->instList.end() && + (*count_it) != NULL && + !(*count_it)->isIssued()) { + ++total_insts; + } + + return total_insts; +} + +template +void +InstructionQueue::wakeDependents(DynInst *completed_inst) +{ + DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); + //Look at the physical destination register of the DynInst + //and look it up on the dependency graph. Then mark as ready + //any instructions within the instruction queue. + int8_t total_dest_regs = completed_inst->numDestRegs(); + + DependencyEntry *curr; + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle thie. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + while (dependGraph[dest_reg].next != NULL) { + + curr = dependGraph[dest_reg].next; + + DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + dependGraph[dest_reg].next = curr->next; + + delete curr; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +} + +template +bool +InstructionQueue::addToDependents(DynInst *new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (regScoreboard[src_reg] == false) { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + dependGraph[src_reg].insert(new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(); + } + } + } + + return return_val; +} + +template +void +InstructionQueue::createDependency(DynInst *new_inst) +{ + //Actually nothing really needs to be marked when an + //instruction becomes the producer of a register's value, + //but for convenience a ptr to the producing instruction will + //be placed in the head node of the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + int8_t dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + dependGraph[dest_reg].inst = new_inst; + if (dependGraph[dest_reg].next != NULL) { + panic("Dependency chain is not empty.\n"); + } + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} + +template +void +InstructionQueue::DependencyEntry::insert(DynInst *new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + this->next = new_entry; +} + +template +void +InstructionQueue::DependencyEntry::remove(DynInst *inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + // Find the instruction to remove within the dependency linked list. + while(curr->inst != inst_to_remove) + { + prev = curr; + curr = curr->next; + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + delete curr; +} + +template +void +InstructionQueue::addIfReady(DynInst *inst) +{ + //If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + ReadyEntry to_add(inst); + //Add the instruction to the proper ready list. + if (inst->isInteger()) { + DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyIntInsts.push(to_add); + } else if (inst->isFloating()) { + DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyFloatInsts.push(to_add); + } else if (inst->isControl()) { + DPRINTF(IQ, "IQ: Branch instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyBranchInsts.push(to_add); + } else { + panic("IQ: Instruction not an expected type.\n"); + } + } +} + +#endif // __INST_QUEUE_IMPL_HH__ diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh new file mode 100644 index 000000000..21e0ce218 --- /dev/null +++ b/cpu/beta_cpu/regfile.hh @@ -0,0 +1,583 @@ +#ifndef __REGFILE_HH__ +#define __REGFILE_HH__ + +// @todo: Destructor + +using namespace std; + +#include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/comm.hh" + +// This really only depends on the ISA, and not the Impl. It might be nicer +// to see if I can make it depend on nothing... +// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, +// and should go in the AlphaFullCPU. + +template +class PhysRegFile +{ + //Note that most of the definitions of the IntReg, FloatReg, etc. exist + //within the Impl class and not within this PhysRegFile class. + + //Will need some way to allow stuff like swap_palshadow to access the + //correct registers. Might require code changes to swap_palshadow and + //other execution contexts. + + //Will make these registers public for now, but they probably should + //be private eventually with some accessor functions. + public: + typedef typename Impl::ISA ISA; + + PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs); + + //Everything below should be pretty well identical to the normal + //register file that exists within AlphaISA class. + //The duplication is unfortunate but it's better than having + //different ways to access certain registers. + + //Add these in later when everything else is in place +// void serialize(std::ostream &os); +// void unserialize(Checkpoint *cp, const std::string §ion); + + uint64_t readIntReg(PhysRegIndex reg_idx) + { + DPRINTF(IEW, "RegFile: Access to int register %i, has data " + "%i\n", int(reg_idx), intRegFile[reg_idx]); + return intRegFile[reg_idx]; + } + + float readFloatRegSingle(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Access to float register %i, has data " + "%f\n", int(reg_idx), (float)floatRegFile[reg_idx].d); + + return (float)floatRegFile[reg_idx].d; + } + + double readFloatRegDouble(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Access to float register %i, has data " + "%f\n", int(reg_idx), floatRegFile[reg_idx].d); + + return floatRegFile[reg_idx].d; + } + + uint64_t readFloatRegInt(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Access to float register %i, has data " + "%f\n", int(reg_idx), floatRegFile[reg_idx].q); + + return floatRegFile[reg_idx].q; + } + + void setIntReg(PhysRegIndex reg_idx, uint64_t val) + { + DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n", + int(reg_idx), val); + + intRegFile[reg_idx] = val; + } + + void setFloatRegSingle(PhysRegIndex reg_idx, float val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Setting float register %i to %f\n", + int(reg_idx), val); + + floatRegFile[reg_idx].d = (double)val; + } + + void setFloatRegDouble(PhysRegIndex reg_idx, double val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Setting float register %i to %f\n", + int(reg_idx), val); + + floatRegFile[reg_idx].d = val; + } + + void setFloatRegInt(PhysRegIndex reg_idx, uint64_t val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), val); + + floatRegFile[reg_idx].q = val; + } + + uint64_t readPC() + { + return pc; + } + + void setPC(uint64_t val) + { + pc = val; + } + + void setNextPC(uint64_t val) + { + npc = val; + } + + //Consider leaving this stuff and below in some implementation specific + //file as opposed to the general register file. Or have a derived class. + uint64_t readUniq() + { + return miscRegs.uniq; + } + + void setUniq(uint64_t val) + { + miscRegs.uniq = val; + } + + uint64_t readFpcr() + { + return miscRegs.fpcr; + } + + void setFpcr(uint64_t val) + { + miscRegs.fpcr = val; + } + +#ifdef FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault); + Fault setIpr(int idx, uint64_t val); + int readIntrFlag() { return intrflag; } + void setIntrFlag(int val) { intrflag = val; } +#endif + + // These should be private eventually, but will be public for now + // so that I can hack around the initregs issue. + public: + /** (signed) integer register file. */ + IntReg *intRegFile; + + /** Floating point register file. */ + FloatReg *floatRegFile; + + /** Miscellaneous register file. */ + MiscRegFile miscRegs; + + Addr pc; // program counter + Addr npc; // next-cycle program counter + + private: + unsigned numPhysicalIntRegs; + unsigned numPhysicalFloatRegs; +}; + +template +PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs) + : numPhysicalIntRegs(_numPhysicalIntRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs) +{ + intRegFile = new IntReg[numPhysicalIntRegs]; + floatRegFile = new FloatReg[numPhysicalFloatRegs]; + + memset(intRegFile, 0, sizeof(*intRegFile)); + memset(floatRegFile, 0, sizeof(*floatRegFile)); +} + +#ifdef FULL_SYSTEM + +//Problem: This code doesn't make sense at the RegFile level because it +//needs things such as the itb and dtb. Either put it at the CPU level or +//the DynInst level. +template +uint64_t +PhysRegFile::readIpr(int idx, Fault &fault) +{ + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case ISA::IPR_PALtemp0: + case ISA::IPR_PALtemp1: + case ISA::IPR_PALtemp2: + case ISA::IPR_PALtemp3: + case ISA::IPR_PALtemp4: + case ISA::IPR_PALtemp5: + case ISA::IPR_PALtemp6: + case ISA::IPR_PALtemp7: + case ISA::IPR_PALtemp8: + case ISA::IPR_PALtemp9: + case ISA::IPR_PALtemp10: + case ISA::IPR_PALtemp11: + case ISA::IPR_PALtemp12: + case ISA::IPR_PALtemp13: + case ISA::IPR_PALtemp14: + case ISA::IPR_PALtemp15: + case ISA::IPR_PALtemp16: + case ISA::IPR_PALtemp17: + case ISA::IPR_PALtemp18: + case ISA::IPR_PALtemp19: + case ISA::IPR_PALtemp20: + case ISA::IPR_PALtemp21: + case ISA::IPR_PALtemp22: + case ISA::IPR_PALtemp23: + case ISA::IPR_PAL_BASE: + + case ISA::IPR_IVPTBR: + case ISA::IPR_DC_MODE: + case ISA::IPR_MAF_MODE: + case ISA::IPR_ISR: + case ISA::IPR_EXC_ADDR: + case ISA::IPR_IC_PERR_STAT: + case ISA::IPR_DC_PERR_STAT: + case ISA::IPR_MCSR: + case ISA::IPR_ASTRR: + case ISA::IPR_ASTER: + case ISA::IPR_SIRR: + case ISA::IPR_ICSR: + case ISA::IPR_ICM: + case ISA::IPR_DTB_CM: + case ISA::IPR_IPLR: + case ISA::IPR_INTID: + case ISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case ISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; + + case ISA::IPR_VA: + // SFX: unlocks interrupt status registers + retval = ipr[idx]; + + if (!misspeculating()) + regs.intrlock = false; + break; + + case ISA::IPR_VA_FORM: + case ISA::IPR_MM_STAT: + case ISA::IPR_IFAULT_VA_FORM: + case ISA::IPR_EXC_MASK: + case ISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case ISA::IPR_DTB_PTE: + { + ISA::PTE &pte = dtb->index(!misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case ISA::IPR_HWINT_CLR: + case ISA::IPR_SL_XMIT: + case ISA::IPR_DC_FLUSH: + case ISA::IPR_IC_FLUSH: + case ISA::IPR_ALT_MODE: + case ISA::IPR_DTB_IA: + case ISA::IPR_DTB_IAP: + case ISA::IPR_ITB_IA: + case ISA::IPR_ITB_IAP: + fault = Unimplemented_Opcode_Fault; + break; + + default: + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; + } + + return retval; +} + +#ifdef DEBUG +// Cause the simulator to break when changing to the following IPL +int break_ipl = -1; +#endif + +template +Fault +PhysRegFile::setIpr(int idx, uint64_t val) +{ + uint64_t old; + + if (misspeculating()) + return No_Fault; + + switch (idx) { + case ISA::IPR_PALtemp0: + case ISA::IPR_PALtemp1: + case ISA::IPR_PALtemp2: + case ISA::IPR_PALtemp3: + case ISA::IPR_PALtemp4: + case ISA::IPR_PALtemp5: + case ISA::IPR_PALtemp6: + case ISA::IPR_PALtemp7: + case ISA::IPR_PALtemp8: + case ISA::IPR_PALtemp9: + case ISA::IPR_PALtemp10: + case ISA::IPR_PALtemp11: + case ISA::IPR_PALtemp12: + case ISA::IPR_PALtemp13: + case ISA::IPR_PALtemp14: + case ISA::IPR_PALtemp15: + case ISA::IPR_PALtemp16: + case ISA::IPR_PALtemp17: + case ISA::IPR_PALtemp18: + case ISA::IPR_PALtemp19: + case ISA::IPR_PALtemp20: + case ISA::IPR_PALtemp21: + case ISA::IPR_PALtemp22: + case ISA::IPR_PAL_BASE: + case ISA::IPR_IC_PERR_STAT: + case ISA::IPR_DC_PERR_STAT: + case ISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case ISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case ISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case ISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; + kernelStats.context(old, val); + break; + + case ISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case ISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case ISA::IPR_ASTRR: + case ISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case ISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; + kernelStats.swpipl(ipr[idx]); + break; + + case ISA::IPR_DTB_CM: + kernelStats.mode((val & 0x18) != 0); + + case ISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case ISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case ISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case ISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case ISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case ISA::IPR_IVPTBR: + case ISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case ISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case ISA::IPR_DC_MODE: + case ISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case ISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case ISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case ISA::IPR_EXC_SUM: + case ISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case ISA::IPR_INTID: + case ISA::IPR_SL_RCV: + case ISA::IPR_MM_STAT: + case ISA::IPR_ITB_PTE_TEMP: + case ISA::IPR_DTB_PTE_TEMP: + // read-only registers + return Unimplemented_Opcode_Fault; + + case ISA::IPR_HWINT_CLR: + case ISA::IPR_SL_XMIT: + case ISA::IPR_DC_FLUSH: + case ISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case ISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + dtb->flushAll(); + break; + + case ISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + dtb->flushProcesses(); + break; + + case ISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN])); + break; + + case ISA::IPR_DTB_TAG: { + struct ISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + dtb->insert(val, pte); + } + break; + + case ISA::IPR_ITB_PTE: { + struct ISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + itb->insert(ipr[ISA::IPR_ITB_TAG], pte); + } + break; + + case ISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + itb->flushAll(); + break; + + case ISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + itb->flushProcesses(); + break; + + case ISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return Unimplemented_Opcode_Fault; + } + + // no error... + return No_Fault; +} + +#endif // #ifdef FULL_SYSTEM + +#endif // __REGFILE_HH__ diff --git a/cpu/beta_cpu/rename.cc b/cpu/beta_cpu/rename.cc new file mode 100644 index 000000000..bcce7ef49 --- /dev/null +++ b/cpu/beta_cpu/rename.cc @@ -0,0 +1,6 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/rename_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template SimpleRename; diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh new file mode 100644 index 000000000..cd66ce686 --- /dev/null +++ b/cpu/beta_cpu/rename.hh @@ -0,0 +1,184 @@ +// Todo: +// Figure out rename map for reg vs fp (probably just have one rename map). +// In simple case, there is no renaming, so have this stage do basically +// nothing. +// Fix up trap and barrier handling. Fix up squashing too, as it's too +// dependent upon the iew stage continually telling it to squash. +// Have commit send back information whenever a branch has committed. This +// way the history buffer can be cleared beyond the point where the branch +// was. + +#ifndef __SIMPLE_RENAME_HH__ +#define __SIMPLE_RENAME_HH__ + +//Will want to include: time buffer, structs, free list, rename map +#include + +#include "base/timebuf.hh" +#include "cpu/beta_cpu/comm.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/free_list.hh" + +using namespace std; + +// Will need rename maps for both the int reg file and fp reg file. +// Or change rename map class to handle both. (RegFile handles both.) +template +class SimpleRename +{ + public: + // Typedefs from the Impl. + typedef typename Impl::ISA ISA; + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename Impl::FetchStruct FetchStruct; + typedef typename Impl::DecodeStruct DecodeStruct; + typedef typename Impl::RenameStruct RenameStruct; + typedef typename Impl::TimeStruct TimeStruct; + + // Typedefs from the CPUPol + typedef typename CPUPol::FreeList FreeList; + typedef typename CPUPol::RenameMap RenameMap; + + // Typedefs from the ISA. + typedef typename ISA::Addr Addr; + + public: + // Rename will block if ROB becomes full or issue queue becomes full, + // or there are no free registers to rename to. + // Only case where rename squashes is if IEW squashes. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking, + BarrierStall + }; + + private: + Status _status; + + public: + SimpleRename(Params ¶ms); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setRenameQueue(TimeBuffer *rq_ptr); + + void setDecodeQueue(TimeBuffer *dq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void setFreeList(FreeList *fl_ptr); + + void dumpHistory(); + + void tick(); + + void rename(); + + void squash(); + + private: + void block(); + + inline void unblock(); + + void doSquash(); + + void removeFromHistory(InstSeqNum inst_seq_num); + + /** Holds the previous information for each rename. + * Note that often times the inst may have been deleted, so only access + * the pointer for the address and do not dereference it. + */ + struct RenameHistory { + RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg, + PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg) + : instSeqNum(_instSeqNum), archReg(_archReg), + newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg), + placeHolder(false) + { + } + + /** Constructor used specifically for cases where a place holder + * rename history entry is being made. + */ + RenameHistory(InstSeqNum _instSeqNum) + : instSeqNum(_instSeqNum), archReg(0), newPhysReg(0), + prevPhysReg(0), placeHolder(true) + { + } + + InstSeqNum instSeqNum; + RegIndex archReg; + PhysRegIndex newPhysReg; + PhysRegIndex prevPhysReg; + bool placeHolder; + }; + + list historyBuffer; + + /** CPU interface. */ + FullCPU *cpu; + + // Interfaces to objects outside of rename. + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get IEW's output from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write infromation heading to previous stages. */ + // Might not be the best name as not only decode will read it. + typename TimeBuffer::wire toDecode; + + /** Rename instruction queue. */ + TimeBuffer *renameQueue; + + /** Wire to write any information heading to IEW. */ + typename TimeBuffer::wire toIEW; + + /** Decode instruction queue interface. */ + TimeBuffer *decodeQueue; + + /** Wire to get decode's output from decode queue. */ + typename TimeBuffer::wire fromDecode; + + /** Skid buffer between rename and decode. */ + queue skidBuffer; + + /** Rename map interface. */ + SimpleRenameMap *renameMap; + + /** Free list interface. */ + FreeList *freeList; + + /** Delay between iew and rename, in ticks. */ + int iewToRenameDelay; + + /** Delay between decode and rename, in ticks. */ + int decodeToRenameDelay; + + /** Delay between commit and rename, in ticks. */ + unsigned commitToRenameDelay; + + /** Rename width, in instructions. */ + unsigned renameWidth; + + /** Commit width, in instructions. Used so rename knows how many + * instructions might have freed registers in the previous cycle. + */ + unsigned commitWidth; +}; + +#endif // __SIMPLE_RENAME_HH__ diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh new file mode 100644 index 000000000..2b60c2f50 --- /dev/null +++ b/cpu/beta_cpu/rename_impl.hh @@ -0,0 +1,593 @@ +#include + +#include "cpu/beta_cpu/rename.hh" + +template +SimpleRename::SimpleRename(Params ¶ms) + : iewToRenameDelay(params.iewToRenameDelay), + decodeToRenameDelay(params.decodeToRenameDelay), + commitToRenameDelay(params.commitToRenameDelay), + renameWidth(params.renameWidth), + commitWidth(params.commitWidth) +{ + _status = Idle; +} + +template +void +SimpleRename::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Rename, "Rename: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template +void +SimpleRename::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(Rename, "Rename: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from IEW stage. + fromIEW = timeBuffer->getWire(-iewToRenameDelay); + + // Setup wire to read infromation from time buffer, from commit stage. + fromCommit = timeBuffer->getWire(-commitToRenameDelay); + + // Setup wire to write information to previous stages. + toDecode = timeBuffer->getWire(0); +} + +template +void +SimpleRename::setRenameQueue(TimeBuffer *rq_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to write information to future stages. + toIEW = renameQueue->getWire(0); +} + +template +void +SimpleRename::setDecodeQueue(TimeBuffer *dq_ptr) +{ + DPRINTF(Rename, "Rename: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to get information from decode. + fromDecode = decodeQueue->getWire(-decodeToRenameDelay); + +} + +template +void +SimpleRename::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template +void +SimpleRename::setFreeList(FreeList *fl_ptr) +{ + DPRINTF(Rename, "Rename: Setting free list pointer.\n"); + freeList = fl_ptr; +} + +template +void +SimpleRename::dumpHistory() +{ + typename list::iterator buf_it = historyBuffer.begin(); + + while (buf_it != historyBuffer.end()) + { + cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys " + "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg, + (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg); + + buf_it++; + } +} + +template +void +SimpleRename::block() +{ + DPRINTF(Rename, "Rename: Blocking.\n"); + // Set status to Blocked. + _status = Blocked; + + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template +inline void +SimpleRename::unblock() +{ + DPRINTF(Rename, "Rename: Reading instructions out of skid " + "buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toDecode->renameInfo.stall = true; + } else { + DPRINTF(Rename, "Rename: Done unblocking.\n"); + _status = Running; + } +} + +template +void +SimpleRename::doSquash() +{ + typename list::iterator hb_it = historyBuffer.begin(); + typename list::iterator delete_it; + + InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; + +#ifdef FULL_SYSTEM + assert(!historyBuffer.empty()); +#else + // After a syscall squashes everything, the history buffer may be empty + // but the ROB may still be squashing instructions. + if (historyBuffer.empty()) { + return; + } +#endif // FULL_SYSTEM + + // Go through the most recent instructions, undoing the mappings + // they did and freeing up the registers. + while ((*hb_it).instSeqNum > squashed_seq_num) + { + DPRINTF(Rename, "Rename: Removing history entry with sequence " + "number %i.\n", (*hb_it).instSeqNum); + + // If it's not simply a place holder, then add the registers. + if (!(*hb_it).placeHolder) { + // Tell the rename map to set the architected register to the + // previous physical register that it was renamed to. + renameMap->setEntry(hb_it->archReg, hb_it->prevPhysReg); + + // Put the renamed physical register back on the free list. + freeList->addReg(hb_it->newPhysReg); + } + + delete_it = hb_it; + + hb_it++; + + historyBuffer.erase(delete_it); + } +} + +template +void +SimpleRename::squash() +{ + DPRINTF(Rename, "Rename: Squashing instructions.\n"); + // Set the status to Squashing. + _status = Squashing; + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } + + doSquash(); +} + +// In the future, when a SmartPtr is used for DynInst, then this function +// itself can handle returning the instruction's physical registers to +// the free list. +template +void +SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) +{ + DPRINTF(Rename, "Rename: Removing a committed instruction from the " + "history buffer, sequence number %lli.\n", inst_seq_num); + typename list::iterator hb_it = historyBuffer.end(); + + hb_it--; + + if (hb_it->instSeqNum > inst_seq_num) { + DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure " + "that a syscall happened recently.\n"); + return; + } + + for ( ; hb_it->instSeqNum != inst_seq_num; hb_it--) + { + // Make sure we haven't gone off the end of the list. + assert(hb_it != historyBuffer.end()); + + // In theory instructions at the end of the history buffer + // should be older than the instruction being removed, which + // means they will have a lower sequence number. Also the + // instruction being removed from the history really should + // be the last instruction in the list, as it is the instruction + // that was just committed that is being removed. + assert(hb_it->instSeqNum < inst_seq_num); + DPRINTF(Rename, "Rename: Committed instruction is not the last " + "entry in the history buffer.\n"); + } + + if (!(*hb_it).placeHolder) { + freeList->addReg(hb_it->prevPhysReg); + } + + historyBuffer.erase(hb_it); + +} + +template +void +SimpleRename::tick() +{ + // Rename will need to try to rename as many instructions as it + // has bandwidth, unless it is blocked. + + // Check if _status is BarrierStall. If so, then check if the number + // of free ROB entries is equal to the number of total ROB entries. + // Once equal then wake this stage up. Set status to unblocking maybe. + + if (_status != Blocked && _status != Squashing) { + DPRINTF(Rename, "Rename: Status is not blocked, will attempt to " + "run stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + rename(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + unblock(); + } + } else if (_status == Blocked) { + // If stage is blocked and still receiving valid instructions, + // make sure to store them in the skid buffer. + if (fromDecode->insts[0] != NULL) { + + block(); + + // Continue to tell previous stage to stall. + toDecode->renameInfo.stall = true; + } + + if (!fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall && + fromCommit->commitInfo.freeROBEntries != 0 && + fromIEW->iewInfo.freeIQEntries != 0) { + + // Need to be sure to check all blocking conditions above. + // If they have cleared, then start unblocking. + DPRINTF(Rename, "Rename: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this stage + // is done unblocking. + toDecode->renameInfo.stall = true; + } else { + // Otherwise no conditions have changed. Tell previous + // stage to continue blocking. + toDecode->renameInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + return; + } + } else if (_status == Squashing) { + if (fromCommit->commitInfo.squash) { + squash(); + } else if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + + DPRINTF(Rename, "Rename: Done squashing, going to running.\n"); + _status = Running; + } else { + doSquash(); + } + } + + // Ugly code, revamp all of the tick() functions eventually. + if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) { + removeFromHistory(fromCommit->commitInfo.doneSeqNum); + } + + // Perhaps put this outside of this function, since this will + // happen regardless of whether or not the stage is blocked or + // squashing. + // Read from the time buffer any necessary data. + // Read registers that are freed, and add them to the freelist. + // This is unnecessary due to the history buffer (assuming the history + // buffer works properly). +/* + while(!fromCommit->commitInfo.freeRegs.empty()) + { + PhysRegIndex freed_reg = fromCommit->commitInfo.freeRegs.back(); + DPRINTF(Rename, "Rename: Adding freed register %i to freelist.\n", + (int)freed_reg); + freeList->addReg(freed_reg); + + fromCommit->commitInfo.freeRegs.pop_back(); + } +*/ + +} + +template +void +SimpleRename::rename() +{ + // Check if any of the stages ahead of rename are telling rename + // to squash. The squash() function will also take care of fixing up + // the rename map and the free list. + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + return; + } + + // Check if time buffer is telling this stage to stall. + if (fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) { + DPRINTF(Rename, "Rename: Receiving signal from IEW/Commit to " + "stall.\n"); + block(); + return; + } + + // Check if the current status is squashing. If so, set its status + // to running and resume execution the next cycle. + if (_status == Squashing) { + DPRINTF(Rename, "Rename: Done squashing.\n"); + _status = Running; + return; + } + + // Check the decode queue to see if instructions are available. + // If there are no available instructions to rename, then do nothing. + // Or, if the stage is currently unblocking, then go ahead and run it. + if (fromDecode->insts[0] == NULL && _status != Unblocking) { + DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n"); + // Should I change status to idle? + return; + } + + DynInst *inst; + unsigned num_inst = 0; + + bool insts_available = _status == Unblocking ? + skidBuffer.front().insts[num_inst] != NULL : + fromDecode->insts[num_inst] != NULL; + + typename SimpleRenameMap::RenameInfo rename_result; + + unsigned num_src_regs; + unsigned num_dest_regs; + + // Will have to do a different calculation for the number of free + // entries. Number of free entries recorded on this cycle - + // renameWidth * renameToDecodeDelay + // Can I avoid a multiply? + unsigned free_rob_entries = + fromCommit->commitInfo.freeROBEntries - iewToRenameDelay; + DPRINTF(Rename, "Rename: ROB has %d free entries.\n", + free_rob_entries); + unsigned free_iq_entries = + fromIEW->iewInfo.freeIQEntries - iewToRenameDelay; + + // Check if there's any space left. + if (free_rob_entries == 0 || free_iq_entries == 0) { + DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ " + "entries.\n" + "Rename: ROB has %d free entries.\n" + "Rename: IQ has %d free entries.\n", + free_rob_entries, + free_iq_entries); + block(); + // Tell previous stage to stall. + toDecode->renameInfo.stall = true; + + return; + } + + unsigned min_iq_rob = min(free_rob_entries, free_iq_entries); + unsigned num_insts_to_rename = min(min_iq_rob, renameWidth); + + while (insts_available && + num_inst < num_insts_to_rename) { + DPRINTF(Rename, "Rename: Sending instructions to iew.\n"); + + // Get the next instruction either from the skid buffer or the + // decode queue. + inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] : + fromDecode->insts[num_inst]; + + DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n", + inst, inst->readPC()); + + // If it's a trap instruction, then it needs to wait here within + // rename until the ROB is empty. Needs a way to detect that the + // ROB is empty. Maybe an event? + // Would be nice if it could be avoided putting this into a + // specific stage and instead just put it into the AlphaFullCPU. + // Might not really be feasible though... + // (EXCB, TRAPB) + if (inst->isSerializing()) { + panic("Rename: Serializing instruction encountered.\n"); + DPRINTF(Rename, "Rename: Serializing instruction " + "encountered.\n"); + block(); + + // Change status over to BarrierStall so that other stages know + // what this is blocked on. + _status = BarrierStall; + + // Tell the previous stage to stall. + toDecode->renameInfo.stall = true; + + break; + } + + // Make sure there's enough room in the ROB and the IQ. + // This doesn't really need to be done dynamically; consider + // moving outside of this function. + if (free_rob_entries == 0 || free_iq_entries == 0) { + DPRINTF(Rename, "Rename: Blocking due to lack of ROB or IQ " + "entries.\n"); + // Call some sort of function to handle all the setup of being + // blocked. + block(); + + // Not really sure how to schedule an event properly, but an + // event must be scheduled such that upon freeing a ROB entry, + // this stage will restart up. Perhaps add in a ptr to an Event + // within the ROB that will be able to execute that Event + // if a free register is added to the freelist. + + // Tell the previous stage to stall. + toDecode->renameInfo.stall = true; + + break; + } + + // Temporary variables to hold number of source and destination regs. + num_src_regs = inst->numSrcRegs(); + num_dest_regs = inst->numDestRegs(); + + // Check here to make sure there are enough destination registers + // to rename to. Otherwise block. + if (renameMap->numFreeEntries() < num_dest_regs) + { + DPRINTF(Rename, "Rename: Blocking due to lack of free " + "physical registers to rename to.\n"); + // Call function to handle blocking. + block(); + + // Need some sort of event based on a register being freed. + + // Tell the previous stage to stall. + toDecode->renameInfo.stall = true; + + // Break out of rename loop. + break; + } + + // Get the architectual register numbers from the source and + // destination operands, and redirect them to the right register. + // Will need to mark dependencies though. + for (int src_idx = 0; src_idx < num_src_regs; src_idx++) + { + RegIndex src_reg = inst->srcRegIdx(src_idx); + + // Look up the source registers to get the phys. register they've + // been renamed to, and set the sources to those registers. + RegIndex renamed_reg = renameMap->lookup(src_reg); + + DPRINTF(Rename, "Rename: Looking up arch reg %i, got " + "physical reg %i.\n", (int)src_reg, (int)renamed_reg); + + inst->renameSrcReg(src_idx, renamed_reg); + + // Either incorporate it into the info passed back, + // or make another function call to see if that register is + // ready or not. + if (renameMap->isReady(renamed_reg)) { + DPRINTF(Rename, "Rename: Register is ready.\n"); + + inst->markSrcRegReady(src_idx); + } + } + + // Rename the destination registers. + for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) + { + RegIndex dest_reg = inst->destRegIdx(dest_idx); + + // Get the physical register that the destination will be + // renamed to. + rename_result = renameMap->rename(dest_reg); + + DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " + "register %i.\n", (int)dest_reg, + (int)rename_result.first); + + // Record the rename information so that a history can be kept. + RenameHistory hb_entry(inst->seqNum, dest_reg, + rename_result.first, + rename_result.second); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding instruction to history buffer, " + "sequence number %lli.\n", inst->seqNum); + + // Tell the instruction to rename the appropriate destination + // register (dest_idx) to the new physical register + // (rename_result.first), and record the previous physical + // register that the same logical register was renamed to + // (rename_result.second). + inst->renameDestReg(dest_idx, + rename_result.first, + rename_result.second); + } + + // If it's an instruction with no destination registers, then put + // a placeholder within the history buffer. It might be better + // to not put it in the history buffer at all (other than branches, + // which always need at least a place holder), and differentiate + // between instructions with and without destination registers + // when getting from commit the instructions that committed. + if (num_dest_regs == 0) { + RenameHistory hb_entry(inst->seqNum); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding placeholder instruction to " + "history buffer, sequence number %lli.\n", + inst->seqNum); + } + + // Put instruction in rename queue. + toIEW->insts[num_inst] = inst; + + // Decrease the number of free ROB and IQ entries. + --free_rob_entries; + --free_iq_entries; + + // Increment which instruction we're on. + ++num_inst; + + // Check whether or not there are instructions available. + // Either need to check within the skid buffer, or the decode + // queue, depending if this stage is unblocking or not. + // Hmm, dangerous check. Can touch memory not allocated. Might + // be better to just do check at beginning of loop. Or better + // yet actually pass the number of instructions issued. + insts_available = _status == Unblocking ? + skidBuffer.front().insts[num_inst] != NULL : + fromDecode->insts[num_inst] != NULL; + } + +} diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc new file mode 100644 index 000000000..c234182f0 --- /dev/null +++ b/cpu/beta_cpu/rename_map.cc @@ -0,0 +1,289 @@ + +#include "cpu/beta_cpu/rename_map.hh" + +// Todo: Consider making functions inline. Avoid having things that are +// using the zero register or misc registers from adding on the registers +// to the free list. + +SimpleRenameMap::RenameEntry::RenameEntry() + : physical_reg(0), valid(false) +{ +} + +SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numMiscRegs(_numMiscRegs), + intZeroReg(_intZeroReg), + floatZeroReg(_floatZeroReg) +{ + DPRINTF(Rename, "Rename: Creating rename map. Phys: %i / %i, Float: " + "%i / %i.\n", numLogicalIntRegs, numPhysicalIntRegs, + numLogicalFloatRegs, numPhysicalFloatRegs); + + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + + //Create the rename maps, and their scoreboards. + intRenameMap = new RenameEntry[numLogicalIntRegs]; + floatRenameMap = new RenameEntry[numLogicalFloatRegs]; + + intScoreboard.resize(numPhysicalIntRegs); + floatScoreboard.resize(numPhysicalFloatRegs); + miscScoreboard.resize(numMiscRegs); + + // Initialize the entries in the integer rename map to point to the + // physical registers of the same index, and consider each register + // ready until the first rename occurs. + for (RegIndex index = 0; index < numLogicalIntRegs; ++index) + { + intRenameMap[index].physical_reg = index; + intScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numLogicalIntRegs; + index < numPhysicalIntRegs; + ++index) + { + intScoreboard[index] = 0; + } + + // Initialize the entries in the floating point rename map to point to + // the physical registers of the same index, and consider each register + // ready until the first rename occurs. + for (RegIndex index = 0; index < numLogicalFloatRegs; ++index) + { + floatRenameMap[index].physical_reg = index + numPhysicalIntRegs; + floatScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numLogicalFloatRegs; + index < numPhysicalFloatRegs; + ++index) + { + floatScoreboard[index] = 0; + } + + // Initialize the entries in the misc register scoreboard to be ready. + for (RegIndex index = 0; index < numMiscRegs; ++index) + { + miscScoreboard[index] = 1; + } +} + +void +SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) +{ + //Setup the interface to the freelist. + freeList = fl_ptr; +} + + +// Don't allow this stage to fault; force that check to the rename stage. +// Simply ask to rename a logical register and get back a new physical +// register index. +SimpleRenameMap::RenameInfo +SimpleRenameMap::rename(RegIndex arch_reg) +{ + PhysRegIndex renamed_reg; + PhysRegIndex prev_reg; + + if (arch_reg < numLogicalIntRegs) { + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = intRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != intZeroReg) { + // Get a free physical register to rename to. + renamed_reg = freeList->getIntReg(); + + // Update the integer rename map. + intRenameMap[arch_reg].physical_reg = renamed_reg; + + // Mark register as not ready. + intScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = intZeroReg; + } + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base offset for floating point registers. + arch_reg = arch_reg - numLogicalIntRegs; + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = floatRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != floatZeroReg) { + // Get a free floating point register to rename to. + renamed_reg = freeList->getFloatReg(); + + // Update the floating point rename map. + floatRenameMap[arch_reg].physical_reg = renamed_reg; + + // Mark register as not ready. + floatScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = floatZeroReg; + } + } else { + // Subtract off the base offset for miscellaneous registers. + arch_reg = arch_reg - numLogicalRegs; + + // No renaming happens to the misc. registers. They are simply the + // registers that come after all the physical registers; thus + // take the base architected register and add the physical registers + // to it. + renamed_reg = arch_reg + numPhysicalRegs; + + // Set the previous register to the same register; mainly it must be + // known that the prev reg was outside the range of normal registers + // so the free list can avoid adding it. + prev_reg = renamed_reg; + + miscScoreboard[renamed_reg] = false; + } + + return RenameInfo(renamed_reg, prev_reg); +} + +//Perhaps give this a pair as a return value, of the physical register +//and whether or not it's ready. +PhysRegIndex +SimpleRenameMap::lookup(RegIndex arch_reg) +{ + if (arch_reg < numLogicalIntRegs) { + return intRenameMap[arch_reg].physical_reg; + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base FP offset. + arch_reg = arch_reg - numLogicalIntRegs; + + return floatRenameMap[arch_reg].physical_reg; + } else { + // Subtract off the misc registers offset. + arch_reg = arch_reg - numLogicalRegs; + + // Misc. regs don't rename, so simply add the base arch reg to + // the number of physical registers. + return numPhysicalRegs + arch_reg; + } +} + +bool +SimpleRenameMap::isReady(PhysRegIndex phys_reg) +{ + if (phys_reg < numPhysicalIntRegs) { + return intScoreboard[phys_reg]; + } else if (phys_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. + phys_reg = phys_reg - numPhysicalIntRegs; + + return floatScoreboard[phys_reg]; + } else { + // Subtract off the misc registers offset. + phys_reg = phys_reg - numPhysicalRegs; + + return miscScoreboard[phys_reg]; + } +} + +// In this implementation the miscellaneous registers do not actually rename, +// so this function does not allow you to try to change their mappings. +void +SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) +{ + if (arch_reg < numLogicalIntRegs) { + DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n", + (int)arch_reg, renamed_reg); + + intRenameMap[arch_reg].physical_reg = renamed_reg; + } else { +// assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); + + // Subtract off the base FP offset. + arch_reg = arch_reg - numLogicalIntRegs; + + DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", + (int)arch_reg, renamed_reg); + + floatRenameMap[arch_reg].physical_reg = renamed_reg; + } +} + +void +SimpleRenameMap::squash(vector freed_regs, + vector unmaps) +{ + // Not sure the rename map should be able to access the free list + // like this. + while (!freed_regs.empty()) { + RegIndex free_register = freed_regs.back(); + + if (free_register < numPhysicalIntRegs) { + freeList->addIntReg(free_register); + } else { + // Subtract off the base FP dependence tag. + free_register = free_register - numPhysicalIntRegs; + freeList->addFloatReg(free_register); + } + + freed_regs.pop_back(); + } + + // Take unmap info and roll back the rename map. +} + +void +SimpleRenameMap::markAsReady(PhysRegIndex ready_reg) +{ + DPRINTF(Rename, "Rename map: Marking register %i as ready.\n", + (int)ready_reg); + + if (ready_reg < numPhysicalIntRegs) { + intScoreboard[ready_reg] = 1; + } else if (ready_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. + ready_reg = ready_reg - numPhysicalIntRegs; + + floatScoreboard[ready_reg] = 1; + } else { + //Subtract off the misc registers offset. + ready_reg = ready_reg - numPhysicalRegs; + + miscScoreboard[ready_reg] = 1; + } +} + +int +SimpleRenameMap::numFreeEntries() +{ + int free_int_regs = freeList->numFreeIntRegs(); + int free_float_regs = freeList->numFreeFloatRegs(); + + if (free_int_regs < free_float_regs) { + return free_int_regs; + } else { + return free_float_regs; + } +} diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh new file mode 100644 index 000000000..05b52bfb2 --- /dev/null +++ b/cpu/beta_cpu/rename_map.hh @@ -0,0 +1,141 @@ +// Todo: Create destructor. +// Make it so that there's a proper separation between int and fp. Also +// have it so that there's a more meaningful name given to the variable +// that marks the beginning of the FP registers. + +#ifndef __RENAME_MAP_HH__ +#define __RENAME_MAP_HH__ + +#include +#include +#include + +//Will want to include faults +#include "cpu/beta_cpu/free_list.hh" + +using namespace std; + +class SimpleRenameMap +{ + public: +// typedef typename Impl::RegIndex RegIndex; + + /** + * Pair of a logical register and a physical register. Tells the + * previous mapping of a logical register to a physical register. + * Used to roll back the rename map to a previous state. + */ + typedef pair UnmapInfo; + + /** + * Pair of a physical register and a physical register. Used to + * return the physical register that a logical register has been + * renamed to, and the previous physical register that the same + * logical register was previously mapped to. + */ + typedef pair RenameInfo; + + public: + //Constructor + SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg); + + void setFreeList(SimpleFreeList *fl_ptr); + + //Tell rename map to get a free physical register for a given + //architected register. Not sure it should have a return value, + //but perhaps it should have some sort of fault in case there are + //no free registers. + RenameInfo rename(RegIndex arch_reg); + + PhysRegIndex lookup(RegIndex phys_reg); + + bool isReady(PhysRegIndex arch_reg); + + /** + * Marks the given register as ready, meaning that its value has been + * calculated and written to the register file. + * @params ready_reg The index of the physical register that is now + * ready. + */ + void markAsReady(PhysRegIndex ready_reg); + + void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); + + void squash(vector freed_regs, + vector unmaps); + + int numFreeEntries(); + + private: + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The integer zero register. This implementation assumes it is always + * zero and never can be anything else. + */ + RegIndex intZeroReg; + + /** The floating point zero register. This implementation assumes it is + * always zero and never can be anything else. + */ + RegIndex floatZeroReg; + + class RenameEntry + { + public: + PhysRegIndex physical_reg; + bool valid; + + RenameEntry(); + }; + + /** Integer rename map. */ + RenameEntry *intRenameMap; + + /** Floating point rename map. */ + RenameEntry *floatRenameMap; + + /** Free list interface. */ + SimpleFreeList *freeList; + + /** Scoreboard of physical integer registers, saying whether or not they + * are ready. + */ + vector intScoreboard; + + /** Scoreboard of physical floating registers, saying whether or not they + * are ready. + */ + vector floatScoreboard; + + /** Scoreboard of miscellaneous registers, saying whether or not they + * are ready. + */ + vector miscScoreboard; +}; + +#endif //__RENAME_MAP_HH__ diff --git a/cpu/beta_cpu/rob.cc b/cpu/beta_cpu/rob.cc new file mode 100644 index 000000000..611cca0ba --- /dev/null +++ b/cpu/beta_cpu/rob.cc @@ -0,0 +1,7 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/rob_impl.hh" + +// Force instantiation of InstructionQueue. +template ROB; diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh new file mode 100644 index 000000000..7963d1b01 --- /dev/null +++ b/cpu/beta_cpu/rob.hh @@ -0,0 +1,129 @@ +// Todo: Probably add in support for scheduling events (more than one as +// well) on the case of the ROB being empty or full. Considering tracking +// free entries instead of insts in ROB. Differentiate between squashing +// all instructions after the instruction, and all instructions after *and* +// including that instruction. + +#ifndef __ROB_HH__ +#define __ROB_HH__ + +#include +#include + +#include "arch/alpha/isa_traits.hh" + +using namespace std; + +/** + * ROB class. Uses the instruction list that exists within the CPU to + * represent the ROB. This class doesn't contain that structure, but instead + * a pointer to the CPU to get access to the structure. The ROB has a large + * hand in squashing instructions within the CPU, and is responsible for + * sending out the squash signal as well as what instruction is to be + * squashed. The ROB also controls most of the calls to the CPU to delete + * instructions; the only other call is made in the first stage of the pipe- + * line, which tells the CPU to delete all instructions not in the ROB. + */ +template +class ROB +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInst DynInst; + + typedef pair UnmapInfo; + typedef typename list::iterator InstIt; + + public: + /** ROB constructor. + * @params _numEntries Number of entries in ROB. + * @params _squashWidth Number of instructions that can be squashed in a + * single cycle. + */ + ROB(unsigned _numEntries, unsigned _squashWidth); + + /** Function to set the CPU pointer, necessary due to which object the ROB + * is created within. + * @params cpu_ptr Pointer to the implementation specific full CPU object. + */ + void setCPU(FullCPU *cpu_ptr); + + /** Function to insert an instruction into the ROB. The parameter inst is + * not truly required, but is useful for checking correctness. Note + * that whatever calls this function must ensure that there is enough + * space within the ROB for the new instruction. + * @params inst The instruction being inserted into the ROB. + * @todo Remove the parameter once correctness is ensured. + */ + void insertInst(DynInst *inst); + + /** Returns pointer to the head instruction within the ROB. There is + * no guarantee as to the return value if the ROB is empty. + * @retval Pointer to the DynInst that is at the head of the ROB. + */ + DynInst *readHeadInst() { return cpu->instList.front(); } + + DynInst *readTailInst() { return (*tail); } + + void retireHead(); + + bool isHeadReady(); + + unsigned numFreeEntries(); + + bool isFull() + { return numInstsInROB == numEntries; } + + bool isEmpty() + { return numInstsInROB == 0; } + + void doSquash(); + + void squash(InstSeqNum squash_num); + + uint64_t readHeadPC(); + + uint64_t readHeadNextPC(); + + InstSeqNum readHeadSeqNum(); + + uint64_t readTailPC(); + + InstSeqNum readTailSeqNum(); + + /** Checks if the ROB is still in the process of squashing instructions. + * @retval Whether or not the ROB is done squashing. + */ + bool isDoneSquashing() const { return doneSquashing; } + + /** This is more of a debugging function than anything. Use + * numInstsInROB to get the instructions in the ROB unless you are + * double checking that variable. + */ + int countInsts(); + + private: + + /** Pointer to the CPU. */ + FullCPU *cpu; + + unsigned numEntries; + + /** Number of instructions that can be squashed in a single cycle. */ + unsigned squashWidth; + + InstIt tail; + + InstIt squashIt; + + int numInstsInROB; + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Is the ROB done squashing. */ + bool doneSquashing; +}; + +#endif //__ROB_HH__ diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh new file mode 100644 index 000000000..308a8010f --- /dev/null +++ b/cpu/beta_cpu/rob_impl.hh @@ -0,0 +1,264 @@ +#ifndef __ROB_IMPL_HH__ +#define __ROB_IMPL_HH__ + +#include "cpu/beta_cpu/rob.hh" + +template +ROB::ROB(unsigned _numEntries, unsigned _squashWidth) + : numEntries(_numEntries), + squashWidth(_squashWidth), + numInstsInROB(0), + squashedSeqNum(0) +{ + doneSquashing = true; +} + +template +void +ROB::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + tail = cpu->instList.begin(); + + squashIt = cpu->instList.end(); +} + +template +int +ROB::countInsts() +{ +/* + int return_val = 0; + + // Iterate through the ROB from the head to the tail, counting the + // entries. + for (InstIt i = cpu->instList.begin(); i != tail; i++) + { + assert(i != cpu->instList.end()); + return_val++; + } + + return return_val; +*/ + // Because the head won't be tracked properly until the ROB gets the + // first instruction, and any time that the ROB is empty and has not + // yet gotten the instruction, this function doesn't work. + return numInstsInROB; +} + +template +void +ROB::insertInst(DynInst *inst) +{ + // Make sure we have the right number of instructions. + assert(numInstsInROB == countInsts()); + // Make sure the instruction is valid. + assert(inst); + + DPRINTF(ROB, "ROB: Adding inst PC %#x to the ROB.\n", inst->readPC()); + + // If the ROB is full then exit. + assert(numInstsInROB != numEntries); + + ++numInstsInROB; + + // Increment the tail iterator, moving it one instruction back. + // There is a special case if the ROB was empty prior to this insertion, + // in which case the tail will be pointing at instList.end(). If that + // happens, then reset the tail to the beginning of the list. + if (tail != cpu->instList.end()) { + tail++; + } else { + tail = cpu->instList.begin(); + } + + // Make sure the tail iterator is actually pointing at the instruction + // added. + assert((*tail) == inst); + + DPRINTF(ROB, "ROB: Now has %d instructions.\n", numInstsInROB); + +} + +// Whatever calls this function needs to ensure that it properly frees up +// registers prior to this function. +template +void +ROB::retireHead() +{ + assert(numInstsInROB == countInsts()); + + DynInst *head_inst; + + // Get the head ROB instruction. + head_inst = cpu->instList.front(); + + // Make certain this can retire. + assert(head_inst->readyToCommit()); + + DPRINTF(ROB, "ROB: Retiring head instruction of the ROB, " + "instruction PC %#x, seq num %i\n", head_inst->readPC(), + head_inst->seqNum); + + // Keep track of how many instructions are in the ROB. + --numInstsInROB; + + // Tell CPU to remove the instruction from the list of instructions. + // A special case is needed if the instruction being retired is the + // only instruction in the ROB; otherwise the tail iterator will become + // invalidated. + if (tail == cpu->instList.begin()) { + cpu->removeFrontInst(head_inst); + tail = cpu->instList.end(); + } else { + cpu->removeFrontInst(head_inst); + } +} + +template +bool +ROB::isHeadReady() +{ + if (numInstsInROB != 0) { + DynInst *head_inst = cpu->instList.front(); + + return head_inst->readyToCommit(); + } + + return false; +} + +template +unsigned +ROB::numFreeEntries() +{ + assert(numInstsInROB == countInsts()); + + return numEntries - numInstsInROB; +} + +template +void +ROB::doSquash() +{ + DPRINTF(ROB, "ROB: Squashing instructions.\n"); + + assert(squashIt != cpu->instList.end()); + + for (int numSquashed = 0; + numSquashed < squashWidth && (*squashIt)->seqNum != squashedSeqNum; + ++numSquashed) + { + // Ensure that the instruction is younger. + assert((*squashIt)->seqNum > squashedSeqNum); + + DPRINTF(ROB, "ROB: Squashing instruction PC %#x, seq num %i.\n", + (*squashIt)->readPC(), (*squashIt)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*squashIt)->setSquashed(); + + (*squashIt)->setCanCommit(); + +#ifndef FULL_SYSTEM + if (squashIt == cpu->instList.begin()) { + DPRINTF(ROB, "ROB: Reached head of instruction list while " + "squashing.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + + return; + } +#endif + + // Move the tail iterator to the next instruction. + squashIt--; + } + + + // Check if ROB is done squashing. + if ((*squashIt)->seqNum == squashedSeqNum) { + DPRINTF(ROB, "ROB: Done squashing instructions.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + } +} + +template +void +ROB::squash(InstSeqNum squash_num) +{ + DPRINTF(ROB, "ROB: Starting to squash within the ROB.\n"); + doneSquashing = false; + + squashedSeqNum = squash_num; + + assert(tail != cpu->instList.end()); + + squashIt = tail; + + doSquash(); +} + +template +uint64_t +ROB::readHeadPC() +{ + assert(numInstsInROB == countInsts()); + + DynInst *head_inst = cpu->instList.front(); + + return head_inst->readPC(); +} + +template +uint64_t +ROB::readHeadNextPC() +{ + assert(numInstsInROB == countInsts()); + + DynInst *head_inst = cpu->instList.front(); + + return head_inst->readNextPC(); +} + +template +InstSeqNum +ROB::readHeadSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + DynInst *head_inst = cpu->instList.front(); + + return head_inst->seqNum; +} + +template +uint64_t +ROB::readTailPC() +{ + assert(numInstsInROB == countInsts()); + + assert(tail != cpu->instList.end()); + + return (*tail)->readPC(); +} + +template +InstSeqNum +ROB::readTailSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + return (*tail)->seqNum; +} + +#endif // __ROB_IMPL_HH__ diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh index 0315ab7a9..7a707c86a 100644 --- a/cpu/static_inst.hh +++ b/cpu/static_inst.hh @@ -41,6 +41,7 @@ // forward declarations class ExecContext; +class AlphaDynInst; class DynInst; class FastCPU; class SimpleCPU; @@ -307,20 +308,7 @@ class StaticInst : public StaticInstBase delete cachedDisassembly; } - /** - * Execute this instruction under SimpleCPU model. - */ - virtual Fault execute(SimpleCPU *xc, Trace::InstRecord *traceData) = 0; - - /** - * Execute this instruction under FastCPU model. - */ - virtual Fault execute(FastCPU *xc, Trace::InstRecord *traceData) = 0; - - /** - * Execute this instruction under detailed FullCPU model. - */ - virtual Fault execute(DynInst *xc, Trace::InstRecord *traceData) = 0; +#include "static_inst_impl.hh" /** * Return the target address for a PC-relative branch. From e3fb9afa79e37cb8c60a48b9ff3976665c2c7675 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 23 Sep 2004 14:06:03 -0400 Subject: [PATCH 2/6] Update to make multiple instruction issue and different latencies work. Also change to ref counted DynInst. SConscript: Add branch predictor, BTB, load store queue, and storesets. arch/isa_parser.py: Specify the template parameter for AlphaDynInst base/traceflags.py: Add load store queue, store set, and mem dependence unit to the list of trace flags. cpu/base_dyn_inst.cc: Change formating, add in debug statement. cpu/base_dyn_inst.hh: Change DynInst to be RefCounted, add flag to clear whether or not this instruction can commit. This is likely to be removed in the future. cpu/beta_cpu/alpha_dyn_inst.cc: AlphaDynInst has been changed to be templated, so now this CC file is just used to force instantiations of AlphaDynInst. cpu/beta_cpu/alpha_dyn_inst.hh: Changed AlphaDynInst to be templated on Impl. Removed some unnecessary functions. cpu/beta_cpu/alpha_full_cpu.cc: AlphaFullCPU has been changed to be templated, so this CC file is now just used to force instantation of AlphaFullCPU. cpu/beta_cpu/alpha_full_cpu.hh: Change AlphaFullCPU to be templated on Impl. cpu/beta_cpu/alpha_impl.hh: Update it to reflect AlphaDynInst and AlphaFullCPU being templated on Impl. Also removed time buffers from here, as they are really a part of the CPU and are thus in the CPU policy now. cpu/beta_cpu/alpha_params.hh: Make AlphaSimpleParams inherit from the BaseFullCPU so that it doesn't need to specifically declare any parameters that are already in the BaseFullCPU. cpu/beta_cpu/comm.hh: Changed the structure of the time buffer communication structs. Now they include the size of the packet of instructions it is sending. Added some parameters to the backwards communication struct, mainly for squashing. cpu/beta_cpu/commit.hh: Update typenames to reflect change in location of time buffer structs. Update DynInst to DynInstPtr (it is refcounted now). cpu/beta_cpu/commit_impl.hh: Formatting changes mainly. Also sends back proper information on branch mispredicts so that the bpred unit can update itself. Updated behavior for non-speculative instructions (stores, any other non-spec instructions): once they reach the head of the ROB, the ROB signals back to the IQ that it can go ahead and issue the non-speculative instruction. The instruction itself is updated so that commit won't try to commit it again until it is done executing. cpu/beta_cpu/cpu_policy.hh: Added branch prediction unit, mem dependence prediction unit, load store queue. Moved time buffer structs from AlphaSimpleImpl to here. cpu/beta_cpu/decode.hh: Changed typedefs to reflect change in location of time buffer structs and also the change from DynInst to ref counted DynInstPtr. cpu/beta_cpu/decode_impl.hh: Continues to buffer instructions even while unblocking now. Changed how it loops through groups of instructions so it can properly block during the middle of a group of instructions. cpu/beta_cpu/fetch.hh: Changed typedefs to reflect change in location of time buffer structs and the change to ref counted DynInsts. Also added in branch brediction unit. cpu/beta_cpu/fetch_impl.hh: Add in branch prediction. Changed how fetch checks inputs and its current state to make for easier logic. cpu/beta_cpu/free_list.cc: Changed int regs and float regs to logically use one flat namespace. Future change will be moving them to a single scoreboard to conserve space. cpu/beta_cpu/free_list.hh: Mostly debugging statements. Might be removed for performance in future. cpu/beta_cpu/full_cpu.cc: Added in some debugging statements. Updated BaseFullCPU to take a params object. cpu/beta_cpu/full_cpu.hh: Added params class within BaseCPU that other param classes will be able to inherit from. Updated typedefs to reflect change in location of time buffer structs and ref counted DynInst. cpu/beta_cpu/iew.hh: Updated typedefs to reflect change in location of time buffer structs and use of ref counted DynInsts. cpu/beta_cpu/iew_impl.hh: Added in load store queue, updated iew to be able to execute non- speculative instructions, instead of having them execute in commit. cpu/beta_cpu/inst_queue.hh: Updated change to ref counted DynInsts. Changed inst queue to hold non-speculative instructions as well, which are issued only when commit signals backwards that a nonspeculative instruction is at the head of the ROB. cpu/beta_cpu/inst_queue_impl.hh: Updated to allow for non-speculative instructions to be in the inst queue. Also added some debug functions. cpu/beta_cpu/regfile.hh: Added debugging statements, changed formatting. cpu/beta_cpu/rename.hh: Updated typedefs, added some functions to clean up code. cpu/beta_cpu/rename_impl.hh: Moved some code into functions to make it easier to read. cpu/beta_cpu/rename_map.cc: Changed int and float reg behavior to use a single flat namespace. In the future, the rename maps can be combined to a single rename map to save space. cpu/beta_cpu/rename_map.hh: Added destructor. cpu/beta_cpu/rob.hh: Updated it with change from DynInst to ref counted DynInst. cpu/beta_cpu/rob_impl.hh: Formatting, updated to use ref counted DynInst. cpu/static_inst.hh: Updated forward declaration for AlphaDynInst now that it is templated. --HG-- extra : convert_revision : 1045f240ee9b6a4bd368e1806aca029ebbdc6dd3 --- SConscript | 7 + arch/isa_parser.py | 2 +- base/traceflags.py | 5 +- cpu/base_dyn_inst.cc | 29 +- cpu/base_dyn_inst.hh | 13 +- cpu/beta_cpu/2bit_local_pred.cc | 110 +++ cpu/beta_cpu/2bit_local_pred.hh | 99 +++ cpu/beta_cpu/alpha_dyn_inst.cc | 105 +-- cpu/beta_cpu/alpha_dyn_inst.hh | 65 +- cpu/beta_cpu/alpha_dyn_inst_impl.hh | 109 +++ cpu/beta_cpu/alpha_full_cpu.cc | 918 +------------------------ cpu/beta_cpu/alpha_full_cpu.hh | 13 +- cpu/beta_cpu/alpha_full_cpu_builder.cc | 306 +++++++++ cpu/beta_cpu/alpha_full_cpu_impl.hh | 690 +++++++++++++++++++ cpu/beta_cpu/alpha_impl.hh | 48 +- cpu/beta_cpu/alpha_params.hh | 49 +- cpu/beta_cpu/bpred_unit.cc | 5 + cpu/beta_cpu/bpred_unit.hh | 51 ++ cpu/beta_cpu/bpred_unit_impl.hh | 13 + cpu/beta_cpu/btb.cc | 85 +++ cpu/beta_cpu/btb.hh | 52 ++ cpu/beta_cpu/comm.hh | 65 +- cpu/beta_cpu/commit.hh | 28 +- cpu/beta_cpu/commit_impl.hh | 118 ++-- cpu/beta_cpu/cpu_policy.hh | 38 +- cpu/beta_cpu/decode.hh | 24 +- cpu/beta_cpu/decode_impl.hh | 77 ++- cpu/beta_cpu/fetch.hh | 29 +- cpu/beta_cpu/fetch_impl.hh | 263 ++++--- cpu/beta_cpu/free_list.cc | 23 +- cpu/beta_cpu/free_list.hh | 35 +- cpu/beta_cpu/full_cpu.cc | 85 +-- cpu/beta_cpu/full_cpu.hh | 56 +- cpu/beta_cpu/iew.hh | 29 +- cpu/beta_cpu/iew_impl.hh | 156 +++-- cpu/beta_cpu/inst_queue.hh | 133 ++-- cpu/beta_cpu/inst_queue_impl.hh | 537 +++++++++++---- cpu/beta_cpu/mem_dep_unit.cc | 9 + cpu/beta_cpu/mem_dep_unit.hh | 70 ++ cpu/beta_cpu/mem_dep_unit_impl.hh | 166 +++++ cpu/beta_cpu/regfile.hh | 42 +- cpu/beta_cpu/rename.hh | 45 +- cpu/beta_cpu/rename_impl.hh | 395 ++++++----- cpu/beta_cpu/rename_map.cc | 76 +- cpu/beta_cpu/rename_map.hh | 15 +- cpu/beta_cpu/rob.hh | 41 +- cpu/beta_cpu/rob_impl.hh | 78 ++- cpu/beta_cpu/store_set.cc | 192 ++++++ cpu/beta_cpu/store_set.hh | 58 ++ cpu/static_inst.hh | 5 +- 50 files changed, 3726 insertions(+), 1936 deletions(-) create mode 100644 cpu/beta_cpu/2bit_local_pred.cc create mode 100644 cpu/beta_cpu/2bit_local_pred.hh create mode 100644 cpu/beta_cpu/alpha_dyn_inst_impl.hh create mode 100644 cpu/beta_cpu/alpha_full_cpu_builder.cc create mode 100644 cpu/beta_cpu/alpha_full_cpu_impl.hh create mode 100644 cpu/beta_cpu/bpred_unit.cc create mode 100644 cpu/beta_cpu/bpred_unit.hh create mode 100644 cpu/beta_cpu/bpred_unit_impl.hh create mode 100644 cpu/beta_cpu/btb.cc create mode 100644 cpu/beta_cpu/btb.hh create mode 100644 cpu/beta_cpu/mem_dep_unit.cc create mode 100644 cpu/beta_cpu/mem_dep_unit.hh create mode 100644 cpu/beta_cpu/mem_dep_unit_impl.hh create mode 100644 cpu/beta_cpu/store_set.cc create mode 100644 cpu/beta_cpu/store_set.hh diff --git a/SConscript b/SConscript index 07cdcfdee..fb2b40325 100644 --- a/SConscript +++ b/SConscript @@ -91,8 +91,12 @@ base_sources = Split(''' cpu/exetrace.cc cpu/pc_event.cc cpu/static_inst.cc + cpu/beta_cpu/2bit_local_pred.cc cpu/beta_cpu/alpha_dyn_inst.cc cpu/beta_cpu/alpha_full_cpu.cc + cpu/beta_cpu/alpha_full_cpu_builder.cc + cpu/beta_cpu/bpred_unit.cc + cpu/beta_cpu/btb.cc cpu/beta_cpu/commit.cc cpu/beta_cpu/decode.cc cpu/beta_cpu/fetch.cc @@ -100,9 +104,12 @@ base_sources = Split(''' cpu/beta_cpu/full_cpu.cc cpu/beta_cpu/iew.cc cpu/beta_cpu/inst_queue.cc + cpu/beta_cpu/ldstq.cc + cpu/beta_cpu/mem_dep_unit.cc cpu/beta_cpu/rename.cc cpu/beta_cpu/rename_map.cc cpu/beta_cpu/rob.cc + cpu/beta_cpu/store_set.cc cpu/fast_cpu/fast_cpu.cc cpu/full_cpu/bpred.cc cpu/full_cpu/commit.cc diff --git a/arch/isa_parser.py b/arch/isa_parser.py index f7278628b..f86e6193d 100755 --- a/arch/isa_parser.py +++ b/arch/isa_parser.py @@ -638,7 +638,7 @@ CpuModel('FullCPU', 'full_cpu_exec.cc', { 'CPU_exec_context': 'DynInst' }) CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc', '#include "cpu/beta_cpu/alpha_dyn_inst.hh"', - { 'CPU_exec_context': 'AlphaDynInst' }) + { 'CPU_exec_context': 'AlphaDynInst' }) # Expand template with CPU-specific references into a dictionary with # an entry for each CPU model name. The entry key is the model name diff --git a/base/traceflags.py b/base/traceflags.py index 8b4208660..a1fb45177 100644 --- a/base/traceflags.py +++ b/base/traceflags.py @@ -132,6 +132,9 @@ baseFlags = [ 'ROB', 'FreeList', 'RenameMap', + 'LDSTQ', + 'StoreSet', + 'MemDepUnit', 'DynInst', 'FullCPU' ] @@ -150,7 +153,7 @@ compoundFlagMap = { 'DiskImageAll' : [ 'DiskImage', 'DiskImageRead', 'DiskImageWrite' ], 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], - 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'DynInst', 'FullCPU'] + 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LDSTQ', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU'] } ############################################################# diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index bd681e1dc..c527eb08b 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -34,6 +34,7 @@ #include #include "base/cprintf.hh" +#include "base/trace.hh" #include "arch/alpha/faults.hh" #include "cpu/exetrace.hh" @@ -67,12 +68,14 @@ my_hash_t thishash; //int break_inst = -1; -template +template BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, Addr pred_PC, InstSeqNum seq_num, FullCPU *cpu) : staticInst(machInst), traceData(NULL), cpu(cpu), xc(cpu->xcBase()) { + DPRINTF(FullCPU, "DynInst: Creating new DynInst.\n"); + effAddr = MemReq::inval_addr; physEffAddr = MemReq::inval_addr; @@ -123,11 +126,13 @@ BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, ++instcount; +// assert(instcount < 50); + DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", instcount); } -template +template BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) : staticInst(_staticInst), traceData(NULL) { @@ -155,7 +160,7 @@ BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) } } -template +template BaseDynInst::~BaseDynInst() { /* @@ -169,21 +174,21 @@ BaseDynInst::~BaseDynInst() instcount); } -template +template FunctionalMemory * BaseDynInst::getMemory(void) { return xc->mem; } /* -template +template IntReg * BaseDynInst::getIntegerRegs(void) { return (spec_mode ? xc->specIntRegFile : xc->regs.intRegFile); } */ -template +template void BaseDynInst::prefetch(Addr addr, unsigned flags) { @@ -229,7 +234,7 @@ BaseDynInst::prefetch(Addr addr, unsigned flags) } } -template +template void BaseDynInst::writeHint(Addr addr, int size, unsigned flags) { @@ -261,7 +266,7 @@ BaseDynInst::writeHint(Addr addr, int size, unsigned flags) /** * @todo Need to find a way to get the cache block size here. */ -template +template Fault BaseDynInst::copySrcTranslate(Addr src) { @@ -284,7 +289,7 @@ BaseDynInst::copySrcTranslate(Addr src) /** * @todo Need to find a way to get the cache block size here. */ -template +template Fault BaseDynInst::copy(Addr dest) { @@ -308,7 +313,7 @@ BaseDynInst::copy(Addr dest) return fault; } -template +template void BaseDynInst::dump() { @@ -317,7 +322,7 @@ BaseDynInst::dump() cprintf("'\n"); } -template +template void BaseDynInst::dump(std::string &outstring) { @@ -330,7 +335,7 @@ BaseDynInst::dump(std::string &outstring) #if 0 -template +template Fault BaseDynInst::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) { diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 7651b517e..fe30b5195 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -53,12 +53,12 @@ namespace Trace { class InstRecord; }; -class BaseInst -{ -}; +// Forward declaration. +template +class StaticInstPtr; template -class BaseDynInst : public FastAlloc +class BaseDynInst : public FastAlloc, public RefCounted { public: // Typedef for the CPU. @@ -74,7 +74,7 @@ class BaseDynInst : public FastAlloc /// Logical register index type. typedef typename ISA::RegIndex RegIndex; /// Integer register index type. - typedef typename ISA::IntReg IntReg; + typedef typename ISA::IntReg IntReg; enum { MaxInstSrcRegs = ISA::MaxInstSrcRegs, //< Max source regs @@ -430,6 +430,9 @@ class BaseDynInst : public FastAlloc /** Sets this instruction as ready to commit. */ void setCanCommit() { canCommit = true; } + /** Clears this instruction as being ready to commit. */ + void clearCanCommit() { canCommit = false; } + /** Returns whether or not this instruction is ready to commit. */ bool readyToCommit() const { return canCommit; } diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc new file mode 100644 index 000000000..88c39a9b0 --- /dev/null +++ b/cpu/beta_cpu/2bit_local_pred.cc @@ -0,0 +1,110 @@ +#include "base/trace.hh" +#include "cpu/beta_cpu/2bit_local_pred.hh" + +DefaultBP::SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +DefaultBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +DefaultBP::SatCounter::increment() +{ + if(counter < maxVal) { + ++counter; + } +} + +void +DefaultBP::SatCounter::decrement() +{ + if(counter > 0) { + --counter; + } +} + +DefaultBP::DefaultBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) + : localPredictorSize(_localPredictorSize), + localCtrBits(_localCtrBits), + instShiftAmt(_instShiftAmt) +{ + // Should do checks here to make sure sizes are correct (powers of 2). + + // Setup the index mask. + indexMask = localPredictorSize - 1; + + DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); + + // Setup the array of counters for the local predictor. + localCtrs = new SatCounter[localPredictorSize](localCtrBits); + + DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", + localPredictorSize); + + DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits); + + DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n", + instShiftAmt); +} + +inline +bool +DefaultBP::getPrediction(uint8_t &count) +{ + // Get the MSB of the count + return (count >> (localCtrBits - 1)); +} + +inline +unsigned +DefaultBP::getLocalIndex(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & indexMask; +} + +bool +DefaultBP::lookup(Addr &branch_addr) +{ + uint8_t local_prediction; + unsigned local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + local_prediction = localCtrs[local_predictor_idx].read(); + + DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", + (int)local_prediction); + + return getPrediction(local_prediction); +} + +void +DefaultBP::update(Addr &branch_addr, bool taken) +{ + unsigned local_predictor_idx; + + // Update the local predictor. + local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +} diff --git a/cpu/beta_cpu/2bit_local_pred.hh b/cpu/beta_cpu/2bit_local_pred.hh new file mode 100644 index 000000000..32a7972d0 --- /dev/null +++ b/cpu/beta_cpu/2bit_local_pred.hh @@ -0,0 +1,99 @@ +#ifndef __2BIT_LOCAL_PRED_HH__ +#define __2BIT_LOCAL_PRED_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class DefaultBP +{ + public: + /** + * Default branch predictor constructor. + */ + DefaultBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, bool taken); + + private: + + inline bool getPrediction(uint8_t &count); + + inline unsigned getLocalIndex(Addr &PC); + + /** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ + class SatCounter + { + public: + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + uint8_t read() + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; + }; + + /** Array of counters that make up the local predictor. */ + SatCounter *localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Number of bits to shift the PC when calculating index. */ + unsigned instShiftAmt; + + /** Mask to get index bits. */ + unsigned indexMask; +}; + +#endif // __2BIT_LOCAL_PRED_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.cc b/cpu/beta_cpu/alpha_dyn_inst.cc index a79d3082c..1bfcb8420 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.cc +++ b/cpu/beta_cpu/alpha_dyn_inst.cc @@ -1,102 +1,7 @@ -#ifndef __ALPHA_DYN_INST_CC__ -#define __ALPHA_DYN_INST_CC__ -#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_dyn_inst_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" -// Force instantiation of BaseDynInst -template BaseDynInst; - -AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) - : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) -{ - // Initialize these to illegal values. - robIdx = -1; - iqIdx = -1; -} - -AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) - : BaseDynInst(_staticInst) -{ -} - -uint64_t -AlphaDynInst::readUniq() -{ - return cpu->readUniq(); -} - -void -AlphaDynInst::setUniq(uint64_t val) -{ - cpu->setUniq(val); -} - -uint64_t -AlphaDynInst::readFpcr() -{ - return cpu->readFpcr(); -} - -void -AlphaDynInst::setFpcr(uint64_t val) -{ - cpu->setFpcr(val); -} - -#ifdef FULL_SYSTEM -uint64_t -AlphaDynInst::readIpr(int idx, Fault &fault) -{ - return cpu->readIpr(idx, fault); -} -Fault -AlphaDynInst::setIpr(int idx, uint64_t val) -{ - return cpu->setIpr(idx, val); -} - -Fault -AlphaDynInst::hwrei() -{ - return cpu->hwrei(); -} - -int -AlphaDynInst::readIntrFlag() -{ -return cpu->readIntrFlag(); -} - -void -AlphaDynInst::setIntrFlag(int val) -{ - cpu->setIntrFlag(val); -} - -bool -AlphaDynInst::inPalMode() -{ - return cpu->inPalMode(); -} - -void -AlphaDynInst::trap(Fault fault) -{ - cpu->trap(fault); -} - -bool -AlphaDynInst::simPalCheck(int palFunc) -{ - return cpu->simPalCheck(palFunc); -} -#else -void -AlphaDynInst::syscall() -{ - cpu->syscall(); -} -#endif - -#endif // __ALPHA_DYN_INST_CC__ +// Force instantiation of AlphaDynInst for all the implementations that +// are needed. +template AlphaDynInst; diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh index 69d145355..4e1cebd11 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.hh +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -8,10 +8,37 @@ #include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/inst_seq.hh" -using namespace std; +/** + * Mostly implementation specific AlphaDynInst. It is templated in case there + * are other implementations that are similar enough to be able to use this + * class without changes. This is mainly useful if there are multiple similar + * CPU implementations of the same ISA. + */ -class AlphaDynInst : public BaseDynInst +template +class AlphaDynInst : public BaseDynInst { + public: + // Typedef for the CPU. + typedef typename Impl::FullCPU FullCPU; + + //Typedef to get the ISA. + typedef typename Impl::ISA ISA; + + /// Binary machine instruction type. + typedef typename ISA::MachInst MachInst; + /// Memory address type. + typedef typename ISA::Addr Addr; + /// Logical register index type. + typedef typename ISA::RegIndex RegIndex; + /// Integer register index type. + typedef typename ISA::IntReg IntReg; + + enum { + MaxInstSrcRegs = ISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs + }; + public: /** BaseDynInst constructor given a binary instruction. */ AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, @@ -27,40 +54,6 @@ class AlphaDynInst : public BaseDynInst return fault; } - /** Location of this instruction within the ROB. Might be somewhat - * implementation specific. - * Might not want this data in the inst as it may be deleted prior to - * execution of the stage that needs it. - */ - int robIdx; - - int getROBEntry() - { - return robIdx; - } - - void setROBEntry(int rob_idx) - { - robIdx = rob_idx; - } - - /** Location of this instruction within the IQ. Might be somewhat - * implementation specific. - * Might not want this data in the inst as it may be deleted prior to - * execution of the stage that needs it. - */ - int iqIdx; - - int getIQEntry() - { - return iqIdx; - } - - void setIQEntry(int iq_idx) - { - iqIdx = iq_idx; - } - uint64_t readUniq(); void setUniq(uint64_t val); diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh new file mode 100644 index 000000000..8311067db --- /dev/null +++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh @@ -0,0 +1,109 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +template +AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +{ +} + +template +AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) + : BaseDynInst(_staticInst) +{ +} + +template +uint64_t +AlphaDynInst::readUniq() +{ + return cpu->readUniq(); +} + +template +void +AlphaDynInst::setUniq(uint64_t val) +{ + cpu->setUniq(val); +} + +template +uint64_t +AlphaDynInst::readFpcr() +{ + return cpu->readFpcr(); +} + +template +void +AlphaDynInst::setFpcr(uint64_t val) +{ + cpu->setFpcr(val); +} + +#ifdef FULL_SYSTEM +template +uint64_t +AlphaDynInst::readIpr(int idx, Fault &fault) +{ + return cpu->readIpr(idx, fault); +} + +template +Fault +AlphaDynInst::setIpr(int idx, uint64_t val) +{ + return cpu->setIpr(idx, val); +} + +template +Fault +AlphaDynInst::hwrei() +{ + return cpu->hwrei(); +} + +template +int +AlphaDynInst::readIntrFlag() +{ +return cpu->readIntrFlag(); +} + +template +void +AlphaDynInst::setIntrFlag(int val) +{ + cpu->setIntrFlag(val); +} + +template +bool +AlphaDynInst::inPalMode() +{ + return cpu->inPalMode(); +} + +template +void +AlphaDynInst::trap(Fault fault) +{ + cpu->trap(fault); +} + +template +bool +AlphaDynInst::simPalCheck(int palFunc) +{ + return cpu->simPalCheck(palFunc); +} +#else +template +void +AlphaDynInst::syscall() +{ + cpu->syscall(); +} +#endif + diff --git a/cpu/beta_cpu/alpha_full_cpu.cc b/cpu/beta_cpu/alpha_full_cpu.cc index 880418146..80c4bdec8 100644 --- a/cpu/beta_cpu/alpha_full_cpu.cc +++ b/cpu/beta_cpu/alpha_full_cpu.cc @@ -1,911 +1,9 @@ -#include "base/cprintf.hh" -#include "base/statistics.hh" -#include "base/timebuf.hh" -#include "cpu/full_cpu/dd_queue.hh" -#include "cpu/full_cpu/full_cpu.hh" -#include "cpu/full_cpu/rob_station.hh" -#include "mem/cache/cache.hh" // for dynamic cast -#include "mem/mem_interface.hh" -#include "sim/builder.hh" -#include "sim/sim_events.hh" -#include "sim/stats.hh" - -#include "cpu/beta_cpu/alpha_full_cpu.hh" -#include "cpu/beta_cpu/alpha_params.hh" -#include "cpu/beta_cpu/comm.hh" - -AlphaFullCPU::AlphaFullCPU(Params ¶ms) - : FullBetaCPU(params) -{ - - fetch.setCPU(this); - decode.setCPU(this); - rename.setCPU(this); - iew.setCPU(this); - commit.setCPU(this); - - rob.setCPU(this); -} - -#ifndef FULL_SYSTEM - -void -AlphaFullCPU::syscall() -{ - DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); - - squashStages(); - - // Copy over all important state to xc once all the unrolling is done. - copyToXC(); - - process->syscall(xc); - - // Copy over all important state back to normal. - copyFromXC(); -} - -// This is not a pretty function, and should only be used if it is necessary -// to fake having everything squash all at once (ie for non-full system -// syscalls). -void -AlphaFullCPU::squashStages() -{ - InstSeqNum rob_head = rob.readHeadSeqNum(); - - // Now hack the time buffer to put this sequence number in the places - // where the stages might read it. - for (int i = 0; i < 10; ++i) - { - timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; - } - - fetch.squash(rob.readHeadNextPC()); - fetchQueue.advance(); - - decode.squash(); - decodeQueue.advance(); - - rename.squash(); - renameQueue.advance(); - renameQueue.advance(); - - iew.squash(); - iewQueue.advance(); - iewQueue.advance(); - - rob.squash(rob_head); - commit.setSquashing(); -} - -#endif // FULL_SYSTEM - -void -AlphaFullCPU::copyToXC() -{ - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) - { - renamed_reg = renameMap.lookup(i); - xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg]; - DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", - renamed_reg, regFile.intRegFile[renamed_reg]); - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) - { - renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d; - xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q; - } - - xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; - xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; - xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; - xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; - - xc->regs.pc = rob.readHeadPC(); - xc->regs.npc = xc->regs.pc+4; - - xc->func_exe_inst = funcExeInst; -} - -// This function will probably mess things up unless the ROB is empty and -// there are no instructions in the pipeline. -void -AlphaFullCPU::copyFromXC() -{ - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) - { - renamed_reg = renameMap.lookup(i); - - DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " - "now has data %lli.\n", - renamed_reg, regFile.intRegFile[renamed_reg], - xc->regs.intRegFile[i]); - - regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i]; - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) - { - renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i]; - regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ; - } - - // Then loop through the misc registers. - regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; - regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; - regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; - regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; - - // Then finally set the PC and the next PC. -// regFile.pc = xc->regs.pc; -// regFile.npc = xc->regs.npc; - - funcExeInst = xc->func_exe_inst; -} - -#ifdef FULL_SYSTEM - -uint64_t * -AlphaFullCPU::getIpr() -{ - return regs.ipr; -} - -uint64_t -AlphaFullCPU::readIpr(int idx, Fault &fault) -{ - uint64_t *ipr = getIpr(); - uint64_t retval = 0; // return value, default 0 - - switch (idx) { - case AlphaISA::IPR_PALtemp0: - case AlphaISA::IPR_PALtemp1: - case AlphaISA::IPR_PALtemp2: - case AlphaISA::IPR_PALtemp3: - case AlphaISA::IPR_PALtemp4: - case AlphaISA::IPR_PALtemp5: - case AlphaISA::IPR_PALtemp6: - case AlphaISA::IPR_PALtemp7: - case AlphaISA::IPR_PALtemp8: - case AlphaISA::IPR_PALtemp9: - case AlphaISA::IPR_PALtemp10: - case AlphaISA::IPR_PALtemp11: - case AlphaISA::IPR_PALtemp12: - case AlphaISA::IPR_PALtemp13: - case AlphaISA::IPR_PALtemp14: - case AlphaISA::IPR_PALtemp15: - case AlphaISA::IPR_PALtemp16: - case AlphaISA::IPR_PALtemp17: - case AlphaISA::IPR_PALtemp18: - case AlphaISA::IPR_PALtemp19: - case AlphaISA::IPR_PALtemp20: - case AlphaISA::IPR_PALtemp21: - case AlphaISA::IPR_PALtemp22: - case AlphaISA::IPR_PALtemp23: - case AlphaISA::IPR_PAL_BASE: - - case AlphaISA::IPR_IVPTBR: - case AlphaISA::IPR_DC_MODE: - case AlphaISA::IPR_MAF_MODE: - case AlphaISA::IPR_ISR: - case AlphaISA::IPR_EXC_ADDR: - case AlphaISA::IPR_IC_PERR_STAT: - case AlphaISA::IPR_DC_PERR_STAT: - case AlphaISA::IPR_MCSR: - case AlphaISA::IPR_ASTRR: - case AlphaISA::IPR_ASTER: - case AlphaISA::IPR_SIRR: - case AlphaISA::IPR_ICSR: - case AlphaISA::IPR_ICM: - case AlphaISA::IPR_DTB_CM: - case AlphaISA::IPR_IPLR: - case AlphaISA::IPR_INTID: - case AlphaISA::IPR_PMCTR: - // no side-effect - retval = ipr[idx]; - break; - - case AlphaISA::IPR_CC: - retval |= ipr[idx] & ULL(0xffffffff00000000); - retval |= curTick & ULL(0x00000000ffffffff); - break; - - case AlphaISA::IPR_VA: - retval = ipr[idx]; - break; - - case AlphaISA::IPR_VA_FORM: - case AlphaISA::IPR_MM_STAT: - case AlphaISA::IPR_IFAULT_VA_FORM: - case AlphaISA::IPR_EXC_MASK: - case AlphaISA::IPR_EXC_SUM: - retval = ipr[idx]; - break; - - case AlphaISA::IPR_DTB_PTE: - { - AlphaISA::PTE &pte = dtb->index(!misspeculating()); - - retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; - retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; - retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; - retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; - retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; - retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; - retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; - } - break; - - // write only registers - case AlphaISA::IPR_HWINT_CLR: - case AlphaISA::IPR_SL_XMIT: - case AlphaISA::IPR_DC_FLUSH: - case AlphaISA::IPR_IC_FLUSH: - case AlphaISA::IPR_ALT_MODE: - case AlphaISA::IPR_DTB_IA: - case AlphaISA::IPR_DTB_IAP: - case AlphaISA::IPR_ITB_IA: - case AlphaISA::IPR_ITB_IAP: - fault = Unimplemented_Opcode_Fault; - break; - - default: - // invalid IPR - fault = Unimplemented_Opcode_Fault; - break; - } - - return retval; -} - -Fault -AlphaFullCPU::setIpr(int idx, uint64_t val) -{ - uint64_t *ipr = getIpr(); - uint64_t old; - - if (misspeculating()) - return No_Fault; - - switch (idx) { - case AlphaISA::IPR_PALtemp0: - case AlphaISA::IPR_PALtemp1: - case AlphaISA::IPR_PALtemp2: - case AlphaISA::IPR_PALtemp3: - case AlphaISA::IPR_PALtemp4: - case AlphaISA::IPR_PALtemp5: - case AlphaISA::IPR_PALtemp6: - case AlphaISA::IPR_PALtemp7: - case AlphaISA::IPR_PALtemp8: - case AlphaISA::IPR_PALtemp9: - case AlphaISA::IPR_PALtemp10: - case AlphaISA::IPR_PALtemp11: - case AlphaISA::IPR_PALtemp12: - case AlphaISA::IPR_PALtemp13: - case AlphaISA::IPR_PALtemp14: - case AlphaISA::IPR_PALtemp15: - case AlphaISA::IPR_PALtemp16: - case AlphaISA::IPR_PALtemp17: - case AlphaISA::IPR_PALtemp18: - case AlphaISA::IPR_PALtemp19: - case AlphaISA::IPR_PALtemp20: - case AlphaISA::IPR_PALtemp21: - case AlphaISA::IPR_PALtemp22: - case AlphaISA::IPR_PAL_BASE: - case AlphaISA::IPR_IC_PERR_STAT: - case AlphaISA::IPR_DC_PERR_STAT: - case AlphaISA::IPR_PMCTR: - // write entire quad w/ no side-effect - ipr[idx] = val; - break; - - case AlphaISA::IPR_CC_CTL: - // This IPR resets the cycle counter. We assume this only - // happens once... let's verify that. - assert(ipr[idx] == 0); - ipr[idx] = 1; - break; - - case AlphaISA::IPR_CC: - // This IPR only writes the upper 64 bits. It's ok to write - // all 64 here since we mask out the lower 32 in rpcc (see - // isa_desc). - ipr[idx] = val; - break; - - case AlphaISA::IPR_PALtemp23: - // write entire quad w/ no side-effect - old = ipr[idx]; - ipr[idx] = val; - kernelStats.context(old, val); - break; - - case AlphaISA::IPR_DTB_PTE: - // write entire quad w/ no side-effect, tag is forthcoming - ipr[idx] = val; - break; - - case AlphaISA::IPR_EXC_ADDR: - // second least significant bit in PC is always zero - ipr[idx] = val & ~2; - break; - - case AlphaISA::IPR_ASTRR: - case AlphaISA::IPR_ASTER: - // only write least significant four bits - privilege mask - ipr[idx] = val & 0xf; - break; - - case AlphaISA::IPR_IPLR: -#ifdef DEBUG - if (break_ipl != -1 && break_ipl == (val & 0x1f)) - debug_break(); -#endif - - // only write least significant five bits - interrupt level - ipr[idx] = val & 0x1f; - kernelStats.swpipl(ipr[idx]); - break; - - case AlphaISA::IPR_DTB_CM: - kernelStats.mode((val & 0x18) != 0); - - case AlphaISA::IPR_ICM: - // only write two mode bits - processor mode - ipr[idx] = val & 0x18; - break; - - case AlphaISA::IPR_ALT_MODE: - // only write two mode bits - processor mode - ipr[idx] = val & 0x18; - break; - - case AlphaISA::IPR_MCSR: - // more here after optimization... - ipr[idx] = val; - break; - - case AlphaISA::IPR_SIRR: - // only write software interrupt mask - ipr[idx] = val & 0x7fff0; - break; - - case AlphaISA::IPR_ICSR: - ipr[idx] = val & ULL(0xffffff0300); - break; - - case AlphaISA::IPR_IVPTBR: - case AlphaISA::IPR_MVPTBR: - ipr[idx] = val & ULL(0xffffffffc0000000); - break; - - case AlphaISA::IPR_DC_TEST_CTL: - ipr[idx] = val & 0x1ffb; - break; - - case AlphaISA::IPR_DC_MODE: - case AlphaISA::IPR_MAF_MODE: - ipr[idx] = val & 0x3f; - break; - - case AlphaISA::IPR_ITB_ASN: - ipr[idx] = val & 0x7f0; - break; - - case AlphaISA::IPR_DTB_ASN: - ipr[idx] = val & ULL(0xfe00000000000000); - break; - - case AlphaISA::IPR_EXC_SUM: - case AlphaISA::IPR_EXC_MASK: - // any write to this register clears it - ipr[idx] = 0; - break; - - case AlphaISA::IPR_INTID: - case AlphaISA::IPR_SL_RCV: - case AlphaISA::IPR_MM_STAT: - case AlphaISA::IPR_ITB_PTE_TEMP: - case AlphaISA::IPR_DTB_PTE_TEMP: - // read-only registers - return Unimplemented_Opcode_Fault; - - case AlphaISA::IPR_HWINT_CLR: - case AlphaISA::IPR_SL_XMIT: - case AlphaISA::IPR_DC_FLUSH: - case AlphaISA::IPR_IC_FLUSH: - // the following are write only - ipr[idx] = val; - break; - - case AlphaISA::IPR_DTB_IA: - // really a control write - ipr[idx] = 0; - - dtb->flushAll(); - break; - - case AlphaISA::IPR_DTB_IAP: - // really a control write - ipr[idx] = 0; - - dtb->flushProcesses(); - break; - - case AlphaISA::IPR_DTB_IS: - // really a control write - ipr[idx] = val; - - dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); - break; - - case AlphaISA::IPR_DTB_TAG: { - struct AlphaISA::PTE pte; - - // FIXME: granularity hints NYI... - if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) - panic("PTE GH field != 0"); - - // write entire quad - ipr[idx] = val; - - // construct PTE for new entry - pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); - pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); - pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); - pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); - pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); - pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); - pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); - - // insert new TAG/PTE value into data TLB - dtb->insert(val, pte); - } - break; - - case AlphaISA::IPR_ITB_PTE: { - struct AlphaISA::PTE pte; - - // FIXME: granularity hints NYI... - if (ITB_PTE_GH(val) != 0) - panic("PTE GH field != 0"); - - // write entire quad - ipr[idx] = val; - - // construct PTE for new entry - pte.ppn = ITB_PTE_PPN(val); - pte.xre = ITB_PTE_XRE(val); - pte.xwe = 0; - pte.fonr = ITB_PTE_FONR(val); - pte.fonw = ITB_PTE_FONW(val); - pte.asma = ITB_PTE_ASMA(val); - pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); - - // insert new TAG/PTE value into data TLB - itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); - } - break; - - case AlphaISA::IPR_ITB_IA: - // really a control write - ipr[idx] = 0; - - itb->flushAll(); - break; - - case AlphaISA::IPR_ITB_IAP: - // really a control write - ipr[idx] = 0; - - itb->flushProcesses(); - break; - - case AlphaISA::IPR_ITB_IS: - // really a control write - ipr[idx] = val; - - itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); - break; - - default: - // invalid IPR - return Unimplemented_Opcode_Fault; - } - - // no error... - return No_Fault; - -} - -int -AlphaFullCPU::readIntrFlag() -{ - return regs.intrflag; -} - -void -AlphaFullCPU::setIntrFlag(int val) -{ - regs.intrflag = val; -} - -// Maybe have this send back from IEW stage to squash and update PC. -Fault -AlphaFullCPU::hwrei() -{ - uint64_t *ipr = getIpr(); - - if (!PC_PAL(regs.pc)) - return Unimplemented_Opcode_Fault; - - setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]); - - if (!misspeculating()) { - kernelStats.hwrei(); - - if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) - AlphaISA::swap_palshadow(®s, false); - - AlphaISA::check_interrupts = true; - } - - // FIXME: XXX check for interrupts? XXX - return No_Fault; -} - -bool -AlphaFullCPU::inPalMode() -{ - return PC_PAL(readPC()); -} - -bool -AlphaFullCPU::simPalCheck(int palFunc) -{ - kernelStats.callpal(palFunc); - - switch (palFunc) { - case PAL::halt: - halt(); - if (--System::numSystemsRunning == 0) - new SimExitEvent("all cpus halted"); - break; - - case PAL::bpt: - case PAL::bugchk: - if (system->breakpoint()) - return false; - break; - } - - return true; -} - -// Probably shouldn't be able to switch to the trap handler as quickly as -// this. Also needs to get the exception restart address from the commit -// stage. -void -AlphaFullCPU::trap(Fault fault) -{ - uint64_t PC = commit.readPC(); - - DPRINTF(Fault, "Fault %s\n", FaultName(fault)); - Stats::recordEvent(csprintf("Fault %s", FaultName(fault))); - - assert(!misspeculating()); - kernelStats.fault(fault); - - if (fault == Arithmetic_Fault) - panic("Arithmetic traps are unimplemented!"); - - AlphaISA::InternalProcReg *ipr = getIpr(); - - // exception restart address - Get the commit PC - if (fault != Interrupt_Fault || !PC_PAL(PC)) - ipr[AlphaISA::IPR_EXC_ADDR] = PC; - - if (fault == Pal_Fault || fault == Arithmetic_Fault /* || - fault == Interrupt_Fault && !PC_PAL(regs.pc) */) { - // traps... skip faulting instruction - ipr[AlphaISA::IPR_EXC_ADDR] += 4; - } - - if (!PC_PAL(PC)) - AlphaISA::swap_palshadow(®s, true); - - setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] ); - setNextPC(PC + sizeof(MachInst)); -} - -void -AlphaFullCPU::processInterrupts() -{ - // Check for interrupts here. For now can copy the code that exists - // within isa_fullsys_traits.hh. -} - -// swap_palshadow swaps in the values of the shadow registers and -// swaps them with the values of the physical registers that map to the -// same logical index. -void -AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) -{ - if (palShadowEnabled == use_shadow) - panic("swap_palshadow: wrong PAL shadow state"); - - palShadowEnabled = use_shadow; - - // Will have to lookup in rename map to get physical registers, then - // swap. - for (int i = 0; i < AlphaISA::NumIntRegs; i++) { - if (reg_redir[i]) { - AlphaISA::IntReg temp = regs->intRegFile[i]; - regs->intRegFile[i] = regs->palregs[i]; - regs->palregs[i] = temp; - } - } -} - -#endif // FULL_SYSTEM - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU) - - Param numThreads; - -#ifdef FULL_SYSTEM -SimObjectParam system; -SimObjectParam itb; -SimObjectParam dtb; -Param mult; -#else -SimObjectVectorParam workload; -SimObjectParam process; -Param asid; -#endif // FULL_SYSTEM -SimObjectParam mem; - -Param max_insts_any_thread; -Param max_insts_all_threads; -Param max_loads_any_thread; -Param max_loads_all_threads; - -SimObjectParam icache; -SimObjectParam dcache; - -Param decodeToFetchDelay; -Param renameToFetchDelay; -Param iewToFetchDelay; -Param commitToFetchDelay; -Param fetchWidth; - -Param renameToDecodeDelay; -Param iewToDecodeDelay; -Param commitToDecodeDelay; -Param fetchToDecodeDelay; -Param decodeWidth; - -Param iewToRenameDelay; -Param commitToRenameDelay; -Param decodeToRenameDelay; -Param renameWidth; - -Param commitToIEWDelay; -Param renameToIEWDelay; -Param issueToExecuteDelay; -Param issueWidth; -Param executeWidth; -Param executeIntWidth; -Param executeFloatWidth; - -Param iewToCommitDelay; -Param renameToROBDelay; -Param commitWidth; -Param squashWidth; - -Param numPhysIntRegs; -Param numPhysFloatRegs; -Param numIQEntries; -Param numROBEntries; - -Param defReg; - -END_DECLARE_SIM_OBJECT_PARAMS(AlphaFullCPU) - -BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU) - - INIT_PARAM(numThreads, "number of HW thread contexts"), - -#ifdef FULL_SYSTEM - INIT_PARAM(system, "System object"), - INIT_PARAM(itb, "Instruction translation buffer"), - INIT_PARAM(dtb, "Data translation buffer"), - INIT_PARAM_DFLT(mult, "System clock multiplier", 1), -#else - INIT_PARAM(workload, "Processes to run"), - INIT_PARAM_DFLT(process, "Process to run", NULL), - INIT_PARAM(asid, "Address space ID"), -#endif // FULL_SYSTEM - - INIT_PARAM_DFLT(mem, "Memory", NULL), - - INIT_PARAM_DFLT(max_insts_any_thread, - "Terminate when any thread reaches this inst count", - 0), - INIT_PARAM_DFLT(max_insts_all_threads, - "Terminate when all threads have reached" - "this inst count", - 0), - INIT_PARAM_DFLT(max_loads_any_thread, - "Terminate when any thread reaches this load count", - 0), - INIT_PARAM_DFLT(max_loads_all_threads, - "Terminate when all threads have reached this load" - "count", - 0), - - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - - INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), - INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), - INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" - "delay"), - INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), - INIT_PARAM(fetchWidth, "Fetch width"), - - INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), - INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" - "delay"), - INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), - INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), - INIT_PARAM(decodeWidth, "Decode width"), - - INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" - "delay"), - INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), - INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), - INIT_PARAM(renameWidth, "Rename width"), - - INIT_PARAM(commitToIEWDelay, "Commit to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(renameToIEWDelay, "Rename to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" - "to the IEW stage)"), - INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - - INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " - "delay"), - INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), - INIT_PARAM(commitWidth, "Commit width"), - INIT_PARAM(squashWidth, "Squash width"), - - INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), - INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " - "registers"), - INIT_PARAM(numIQEntries, "Number of instruction queue entries"), - INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), - - INIT_PARAM(defReg, "Defer registration") - -END_INIT_SIM_OBJECT_PARAMS(AlphaFullCPU) - -CREATE_SIM_OBJECT(AlphaFullCPU) -{ - AlphaFullCPU *cpu; - -#ifdef FULL_SYSTEM - if (mult != 1) - panic("Processor clock multiplier must be 1?\n"); - - // Full-system only supports a single thread for the moment. - int actual_num_threads = 1; -#else - // In non-full-system mode, we infer the number of threads from - // the workload if it's not explicitly specified. - int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); - - if (workload.size() == 0) { - fatal("Must specify at least one workload!"); - } - - Process *actual_process; - - if (process == NULL) { - actual_process = workload[0]; - } else { - actual_process = process; - } - -#endif - - AlphaSimpleParams params; - - params.name = getInstanceName(); - params.numberOfThreads = actual_num_threads; - -#ifdef FULL_SYSTEM - params._system = system; - params.itb = itb; - params.dtb = dtb; - params.freq = ticksPerSecond * mult; -#else - params.workload = workload; - params.process = actual_process; - params.asid = asid; -#endif // FULL_SYSTEM - - params.mem = mem; - - params.maxInstsAnyThread = max_insts_any_thread; - params.maxInstsAllThreads = max_insts_all_threads; - params.maxLoadsAnyThread = max_loads_any_thread; - params.maxLoadsAllThreads = max_loads_all_threads; - - // - // Caches - // - params.icacheInterface = icache ? icache->getInterface() : NULL; - params.dcacheInterface = dcache ? dcache->getInterface() : NULL; - - params.decodeToFetchDelay = decodeToFetchDelay; - params.renameToFetchDelay = renameToFetchDelay; - params.iewToFetchDelay = iewToFetchDelay; - params.commitToFetchDelay = commitToFetchDelay; - params.fetchWidth = fetchWidth; - - params.renameToDecodeDelay = renameToDecodeDelay; - params.iewToDecodeDelay = iewToDecodeDelay; - params.commitToDecodeDelay = commitToDecodeDelay; - params.fetchToDecodeDelay = fetchToDecodeDelay; - params.decodeWidth = decodeWidth; - - params.iewToRenameDelay = iewToRenameDelay; - params.commitToRenameDelay = commitToRenameDelay; - params.decodeToRenameDelay = decodeToRenameDelay; - params.renameWidth = renameWidth; - - params.commitToIEWDelay = commitToIEWDelay; - params.renameToIEWDelay = renameToIEWDelay; - params.issueToExecuteDelay = issueToExecuteDelay; - params.issueWidth = issueWidth; - params.executeWidth = executeWidth; - params.executeIntWidth = executeIntWidth; - params.executeFloatWidth = executeFloatWidth; - - params.iewToCommitDelay = iewToCommitDelay; - params.renameToROBDelay = renameToROBDelay; - params.commitWidth = commitWidth; - params.squashWidth = squashWidth; - - params.numPhysIntRegs = numPhysIntRegs; - params.numPhysFloatRegs = numPhysFloatRegs; - params.numIQEntries = numIQEntries; - params.numROBEntries = numROBEntries; - - params.defReg = defReg; - - cpu = new AlphaFullCPU(params); - - return cpu; -} - -REGISTER_SIM_OBJECT("AlphaFullCPU", AlphaFullCPU) - +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_full_cpu_impl.hh" +#include "cpu/beta_cpu/alpha_dyn_inst.hh" + +// Force instantiation of AlphaFullCPU for all the implemntations that are +// needed. Consider merging this and alpha_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (alpha_impl.cc?). +template AlphaFullCPU; diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh index b098aaac1..0e094b122 100644 --- a/cpu/beta_cpu/alpha_full_cpu.hh +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -6,18 +6,19 @@ #ifndef __ALPHA_FULL_CPU_HH__ #define __ALPHA_FULL_CPU_HH__ -// To include: comm, impl, full cpu, ITB/DTB if full sys, -#include "cpu/beta_cpu/comm.hh" -#include "cpu/beta_cpu/alpha_impl.hh" +// To include: comm, full cpu, ITB/DTB if full sys, +//#include "cpu/beta_cpu/comm.hh" +//#include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/beta_cpu/full_cpu.hh" using namespace std; -class AlphaFullCPU : public FullBetaCPU +template +class AlphaFullCPU : public FullBetaCPU { public: - typedef AlphaSimpleImpl::ISA AlphaISA; - typedef AlphaSimpleImpl::Params Params; + typedef typename Impl::ISA AlphaISA; + typedef typename Impl::Params Params; public: AlphaFullCPU(Params ¶ms); diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc new file mode 100644 index 000000000..5fe96d656 --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc @@ -0,0 +1,306 @@ +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_full_cpu.hh" + +#include "mem/cache/base_cache.hh" + +#include "base/inifile.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/debug.hh" +#include "sim/host.hh" +#include "sim/process.hh" +#include "sim/sim_events.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#ifdef FULL_SYSTEM +#include "base/remote_gdb.hh" +#include "dev/alpha_access.h" +#include "dev/pciareg.h" +#include "mem/functional_mem/memory_control.hh" +#include "mem/functional_mem/physical_memory.hh" +#include "sim/system.hh" +#include "targetarch/alpha_memory.hh" +#include "targetarch/vtophys.hh" +#else // !FULL_SYSTEM +#include "eio/eio.hh" +#include "mem/functional_mem/functional_memory.hh" +#endif // FULL_SYSTEM + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU) + + Param numThreads; + +#ifdef FULL_SYSTEM +SimObjectParam system; +SimObjectParam itb; +SimObjectParam dtb; +Param mult; +#else +SimObjectVectorParam workload; +SimObjectParam process; +Param asid; +#endif // FULL_SYSTEM +SimObjectParam mem; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param localPredictorSize; +Param localPredictorCtrBits; +Param BTBEntries; +Param BTBTagSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param instShiftAmt; + +Param defReg; + +END_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) + + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#ifdef FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), + INIT_PARAM_DFLT(mult, "System clock multiplier", 1), +#else + INIT_PARAM(workload, "Processes to run"), + INIT_PARAM_DFLT(process, "Process to run", NULL), + INIT_PARAM(asid, "Address space ID"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. " + "Must be a power of 2."), + INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"), + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + + INIT_PARAM(defReg, "Defer registration") + +END_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) + +CREATE_SIM_OBJECT(BaseFullCPU) +{ + AlphaFullCPU *cpu; + +#ifdef FULL_SYSTEM + if (mult != 1) + panic("Processor clock multiplier must be 1?\n"); + + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + Process *actual_process; + + if (process == NULL) { + actual_process = workload[0]; + } else { + actual_process = process; + } + +#endif + + AlphaSimpleParams params; + + params.name = getInstanceName(); + params.numberOfThreads = actual_num_threads; + +#ifdef FULL_SYSTEM + params._system = system; + params.itb = itb; + params.dtb = dtb; + params.freq = ticksPerSecond * mult; +#else + params.workload = workload; + params.process = actual_process; + params.asid = asid; +#endif // FULL_SYSTEM + + params.mem = mem; + + params.maxInstsAnyThread = max_insts_any_thread; + params.maxInstsAllThreads = max_insts_all_threads; + params.maxLoadsAnyThread = max_loads_any_thread; + params.maxLoadsAllThreads = max_loads_all_threads; + + // + // Caches + // + params.icacheInterface = icache ? icache->getInterface() : NULL; + params.dcacheInterface = dcache ? dcache->getInterface() : NULL; + + params.decodeToFetchDelay = decodeToFetchDelay; + params.renameToFetchDelay = renameToFetchDelay; + params.iewToFetchDelay = iewToFetchDelay; + params.commitToFetchDelay = commitToFetchDelay; + params.fetchWidth = fetchWidth; + + params.renameToDecodeDelay = renameToDecodeDelay; + params.iewToDecodeDelay = iewToDecodeDelay; + params.commitToDecodeDelay = commitToDecodeDelay; + params.fetchToDecodeDelay = fetchToDecodeDelay; + params.decodeWidth = decodeWidth; + + params.iewToRenameDelay = iewToRenameDelay; + params.commitToRenameDelay = commitToRenameDelay; + params.decodeToRenameDelay = decodeToRenameDelay; + params.renameWidth = renameWidth; + + params.commitToIEWDelay = commitToIEWDelay; + params.renameToIEWDelay = renameToIEWDelay; + params.issueToExecuteDelay = issueToExecuteDelay; + params.issueWidth = issueWidth; + params.executeWidth = executeWidth; + params.executeIntWidth = executeIntWidth; + params.executeFloatWidth = executeFloatWidth; + + params.iewToCommitDelay = iewToCommitDelay; + params.renameToROBDelay = renameToROBDelay; + params.commitWidth = commitWidth; + params.squashWidth = squashWidth; + + params.localPredictorSize = localPredictorSize; + params.localPredictorCtrBits = localPredictorCtrBits; + params.BTBEntries = BTBEntries; + params.BTBTagSize = BTBTagSize; + + params.numPhysIntRegs = numPhysIntRegs; + params.numPhysFloatRegs = numPhysFloatRegs; + params.numIQEntries = numIQEntries; + params.numROBEntries = numROBEntries; + + params.instShiftAmt = 2; + + params.defReg = defReg; + + cpu = new AlphaFullCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("AlphaFullCPU", BaseFullCPU) + diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh new file mode 100644 index 000000000..8bfc0777e --- /dev/null +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -0,0 +1,690 @@ + +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "mem/cache/cache.hh" // for dynamic cast +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/beta_cpu/alpha_params.hh" +#include "cpu/beta_cpu/comm.hh" + +template +AlphaFullCPU::AlphaFullCPU(Params ¶ms) + : FullBetaCPU(params) +{ + DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + + fetch.setCPU(this); + decode.setCPU(this); + rename.setCPU(this); + iew.setCPU(this); + commit.setCPU(this); + + rob.setCPU(this); +} + +#ifndef FULL_SYSTEM + +template +void +AlphaFullCPU::syscall() +{ + DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); + + // Commit stage needs to run as well. + commit.tick(); + + squashStages(); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++funcExeInst; + + // Copy over all important state to xc once all the unrolling is done. + copyToXC(); + + process->syscall(xc); + + // Copy over all important state back to CPU. + copyFromXC(); + + // Decrease funcExeInst by one as the normal commit will handle + // incrememnting it. + --funcExeInst; +} + +// This is not a pretty function, and should only be used if it is necessary +// to fake having everything squash all at once (ie for non-full system +// syscalls). Maybe put this at the FullCPU level? +template +void +AlphaFullCPU::squashStages() +{ + InstSeqNum rob_head = rob.readHeadSeqNum(); + + // Now hack the time buffer to put this sequence number in the places + // where the stages might read it. + for (int i = 0; i < 5; ++i) + { + timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + } + + fetch.squash(rob.readHeadNextPC()); + fetchQueue.advance(); + + decode.squash(); + decodeQueue.advance(); + + rename.squash(); + renameQueue.advance(); + renameQueue.advance(); + + // Be sure to advance the IEW queues so that the commit stage doesn't + // try to set an instruction as completed at the same time that it + // might be deleting it. + iew.squash(); + iewQueue.advance(); + iewQueue.advance(); + + rob.squash(rob_head); + commit.setSquashing(); +} + +#endif // FULL_SYSTEM + +template +void +AlphaFullCPU::copyToXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg]; + DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg]); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d; + xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q; + } + + xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; + xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; + xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; + xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; + + xc->regs.pc = rob.readHeadPC(); + xc->regs.npc = xc->regs.pc+4; + + xc->func_exe_inst = funcExeInst; +} + +// This function will probably mess things up unless the ROB is empty and +// there are no instructions in the pipeline. +template +void +AlphaFullCPU::copyFromXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = renameMap.lookup(i); + + DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, regFile.intRegFile[renamed_reg], + xc->regs.intRegFile[i]); + + regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i]; + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i]; + regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ; + } + + // Then loop through the misc registers. + regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + + // Then finally set the PC and the next PC. +// regFile.pc = xc->regs.pc; +// regFile.npc = xc->regs.npc; + + funcExeInst = xc->func_exe_inst; +} + +#ifdef FULL_SYSTEM + +template +uint64_t * +AlphaFullCPU::getIpr() +{ + return regs.ipr; +} + +template +uint64_t +AlphaFullCPU::readIpr(int idx, Fault &fault) +{ + uint64_t *ipr = getIpr(); + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PALtemp23: + case AlphaISA::IPR_PAL_BASE: + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + case AlphaISA::IPR_ISR: + case AlphaISA::IPR_EXC_ADDR: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_MCSR: + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + case AlphaISA::IPR_SIRR: + case AlphaISA::IPR_ICSR: + case AlphaISA::IPR_ICM: + case AlphaISA::IPR_DTB_CM: + case AlphaISA::IPR_IPLR: + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case AlphaISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; + + case AlphaISA::IPR_VA: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_VA_FORM: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_IFAULT_VA_FORM: + case AlphaISA::IPR_EXC_MASK: + case AlphaISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_DTB_PTE: + { + AlphaISA::PTE &pte = dtb->index(!misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + case AlphaISA::IPR_ALT_MODE: + case AlphaISA::IPR_DTB_IA: + case AlphaISA::IPR_DTB_IAP: + case AlphaISA::IPR_ITB_IA: + case AlphaISA::IPR_ITB_IAP: + fault = Unimplemented_Opcode_Fault; + break; + + default: + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; + } + + return retval; +} + +template +Fault +AlphaFullCPU::setIpr(int idx, uint64_t val) +{ + uint64_t *ipr = getIpr(); + uint64_t old; + + if (misspeculating()) + return No_Fault; + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PAL_BASE: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case AlphaISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case AlphaISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case AlphaISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; + kernelStats.context(old, val); + break; + + case AlphaISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case AlphaISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case AlphaISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; + kernelStats.swpipl(ipr[idx]); + break; + + case AlphaISA::IPR_DTB_CM: + kernelStats.mode((val & 0x18) != 0); + + case AlphaISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case AlphaISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case AlphaISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case AlphaISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case AlphaISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case AlphaISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case AlphaISA::IPR_EXC_SUM: + case AlphaISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_SL_RCV: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_ITB_PTE_TEMP: + case AlphaISA::IPR_DTB_PTE_TEMP: + // read-only registers + return Unimplemented_Opcode_Fault; + + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case AlphaISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + dtb->flushAll(); + break; + + case AlphaISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + dtb->flushProcesses(); + break; + + case AlphaISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); + break; + + case AlphaISA::IPR_DTB_TAG: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + dtb->insert(val, pte); + } + break; + + case AlphaISA::IPR_ITB_PTE: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); + } + break; + + case AlphaISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + itb->flushAll(); + break; + + case AlphaISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + itb->flushProcesses(); + break; + + case AlphaISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return Unimplemented_Opcode_Fault; + } + + // no error... + return No_Fault; + +} + +template +int +AlphaFullCPU::readIntrFlag() +{ + return regs.intrflag; +} + +template +void +AlphaFullCPU::setIntrFlag(int val) +{ + regs.intrflag = val; +} + +// Maybe have this send back from IEW stage to squash and update PC. +template +Fault +AlphaFullCPU::hwrei() +{ + uint64_t *ipr = getIpr(); + + if (!PC_PAL(regs.pc)) + return Unimplemented_Opcode_Fault; + + setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]); + + if (!misspeculating()) { + kernelStats.hwrei(); + + if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) + AlphaISA::swap_palshadow(®s, false); + + AlphaISA::check_interrupts = true; + } + + // FIXME: XXX check for interrupts? XXX + return No_Fault; +} + +template +bool +AlphaFullCPU::inPalMode() +{ + return PC_PAL(readPC()); +} + +template +bool +AlphaFullCPU::simPalCheck(int palFunc) +{ + kernelStats.callpal(palFunc); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (system->breakpoint()) + return false; + break; + } + + return true; +} + +// Probably shouldn't be able to switch to the trap handler as quickly as +// this. Also needs to get the exception restart address from the commit +// stage. +template +void +AlphaFullCPU::trap(Fault fault) +{ + uint64_t PC = commit.readPC(); + + DPRINTF(Fault, "Fault %s\n", FaultName(fault)); + Stats::recordEvent(csprintf("Fault %s", FaultName(fault))); + + assert(!misspeculating()); + kernelStats.fault(fault); + + if (fault == Arithmetic_Fault) + panic("Arithmetic traps are unimplemented!"); + + AlphaISA::InternalProcReg *ipr = getIpr(); + + // exception restart address - Get the commit PC + if (fault != Interrupt_Fault || !PC_PAL(PC)) + ipr[AlphaISA::IPR_EXC_ADDR] = PC; + + if (fault == Pal_Fault || fault == Arithmetic_Fault /* || + fault == Interrupt_Fault && !PC_PAL(regs.pc) */) { + // traps... skip faulting instruction + ipr[AlphaISA::IPR_EXC_ADDR] += 4; + } + + if (!PC_PAL(PC)) + AlphaISA::swap_palshadow(®s, true); + + setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] ); + setNextPC(PC + sizeof(MachInst)); +} + +template +void +AlphaFullCPU::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that exists + // within isa_fullsys_traits.hh. +} + +// swap_palshadow swaps in the values of the shadow registers and +// swaps them with the values of the physical registers that map to the +// same logical index. +template +void +AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) +{ + if (palShadowEnabled == use_shadow) + panic("swap_palshadow: wrong PAL shadow state"); + + palShadowEnabled = use_shadow; + + // Will have to lookup in rename map to get physical registers, then + // swap. + for (int i = 0; i < AlphaISA::NumIntRegs; i++) { + if (reg_redir[i]) { + AlphaISA::IntReg temp = regs->intRegFile[i]; + regs->intRegFile[i] = regs->palregs[i]; + regs->palregs[i] = temp; + } + } +} + +#endif // FULL_SYSTEM diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh index a80b116a8..fc86dacd7 100644 --- a/cpu/beta_cpu/alpha_impl.hh +++ b/cpu/beta_cpu/alpha_impl.hh @@ -3,23 +3,14 @@ #include "arch/alpha/isa_traits.hh" -#include "cpu/beta_cpu/comm.hh" #include "cpu/beta_cpu/cpu_policy.hh" #include "cpu/beta_cpu/alpha_params.hh" -#include "cpu/beta_cpu/commit.hh" -#include "cpu/beta_cpu/decode.hh" -#include "cpu/beta_cpu/fetch.hh" -#include "cpu/beta_cpu/free_list.hh" -#include "cpu/beta_cpu/iew.hh" - -#include "cpu/beta_cpu/inst_queue.hh" -#include "cpu/beta_cpu/regfile.hh" -#include "cpu/beta_cpu/rename.hh" -#include "cpu/beta_cpu/rename_map.hh" -#include "cpu/beta_cpu/rob.hh" - +// Forward declarations. +template class AlphaDynInst; + +template class AlphaFullCPU; /** Implementation specific struct that defines several key things to the @@ -42,33 +33,22 @@ struct AlphaSimpleImpl typedef SimpleCPUPolicy CPUPol; /** The DynInst to be used. */ - typedef AlphaDynInst DynInst; + typedef AlphaDynInst DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr DynInstPtr; /** The FullCPU to be used. */ - typedef AlphaFullCPU FullCPU; + typedef AlphaFullCPU FullCPU; /** The Params to be passed to each stage. */ typedef AlphaSimpleParams Params; - /** The struct for communication between fetch and decode. */ - typedef SimpleFetchSimpleDecode FetchStruct; - - /** The struct for communication between decode and rename. */ - typedef SimpleDecodeSimpleRename DecodeStruct; - - /** The struct for communication between rename and IEW. */ - typedef SimpleRenameSimpleIEW RenameStruct; - - /** The struct for communication between IEW and commit. */ - typedef SimpleIEWSimpleCommit IEWStruct; - - /** The struct for communication within the IEW stage. */ - typedef IssueStruct IssueStruct; - - /** The struct for all backwards communication. */ - typedef TimeBufStruct TimeStruct; + enum { + MaxWidth = 8 + }; }; - - #endif // __ALPHA_IMPL_HH__ diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh index b217ef8e3..92dfd35f5 100644 --- a/cpu/beta_cpu/alpha_params.hh +++ b/cpu/beta_cpu/alpha_params.hh @@ -1,6 +1,8 @@ #ifndef __ALPHA_SIMPLE_PARAMS_HH__ #define __ALPHA_SIMPLE_PARAMS_HH__ +#include "cpu/beta_cpu/full_cpu.hh" + //Forward declarations class System; class AlphaITB; @@ -15,16 +17,11 @@ class MemInterface; * defined that it can pass to all of the individual stages. */ -class AlphaSimpleParams +class AlphaSimpleParams : public BaseFullCPU::Params { public: - std::string name; - int numberOfThreads; - #ifdef FULL_SYSTEM - System *_system; AlphaITB *itb; AlphaDTB *dtb; - Tick freq; #else std::vector workload; Process *process; @@ -33,34 +30,41 @@ class AlphaSimpleParams FunctionalMemory *mem; - Counter maxInstsAnyThread; - Counter maxInstsAllThreads; - Counter maxLoadsAnyThread; - Counter maxLoadsAllThreads; - // // Caches // MemInterface *icacheInterface; MemInterface *dcacheInterface; + // + // Fetch + // unsigned decodeToFetchDelay; unsigned renameToFetchDelay; unsigned iewToFetchDelay; unsigned commitToFetchDelay; unsigned fetchWidth; + // + // Decode + // unsigned renameToDecodeDelay; unsigned iewToDecodeDelay; unsigned commitToDecodeDelay; unsigned fetchToDecodeDelay; unsigned decodeWidth; + // + // Rename + // unsigned iewToRenameDelay; unsigned commitToRenameDelay; unsigned decodeToRenameDelay; unsigned renameWidth; + // + // IEW + // unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; @@ -69,16 +73,39 @@ class AlphaSimpleParams unsigned executeIntWidth; unsigned executeFloatWidth; + // + // Commit + // unsigned iewToCommitDelay; unsigned renameToROBDelay; unsigned commitWidth; unsigned squashWidth; + // + // Branch predictor (BP & BTB) + // + unsigned localPredictorSize; + unsigned localPredictorCtrBits; + unsigned BTBEntries; + unsigned BTBTagSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Miscellaneous + // unsigned numPhysIntRegs; unsigned numPhysFloatRegs; unsigned numIQEntries; unsigned numROBEntries; + // Probably can get this from somewhere. + unsigned instShiftAmt; + bool defReg; }; diff --git a/cpu/beta_cpu/bpred_unit.cc b/cpu/beta_cpu/bpred_unit.cc new file mode 100644 index 000000000..6de2def44 --- /dev/null +++ b/cpu/beta_cpu/bpred_unit.cc @@ -0,0 +1,5 @@ + +#include "cpu/beta_cpu/bpred_unit_impl.hh" +#include "cpu/beta_cpu/alpha_impl.hh" + +template DefaultBPredUnit; diff --git a/cpu/beta_cpu/bpred_unit.hh b/cpu/beta_cpu/bpred_unit.hh new file mode 100644 index 000000000..71191f5b7 --- /dev/null +++ b/cpu/beta_cpu/bpred_unit.hh @@ -0,0 +1,51 @@ + +#ifndef __BPRED_UNIT_HH__ +#define __BPRED_UNIT_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +#include "cpu/beta_cpu/2bit_local_pred.hh" +#include "cpu/beta_cpu/btb.hh" + +/** + * Basically a wrapper class to hold both the branch predictor + * and the BTB. Right now I'm unsure of the implementation; it would + * be nicer to have something closer to the CPUPolicy or the Impl where + * this is just typedefs, but it forces the upper level stages to be + * aware of the constructors of the BP and the BTB. The nicer thing + * to do is have this templated on the Impl, accept the usual Params + * object, and be able to call the constructors on the BP and BTB. + */ +template +class DefaultBPredUnit +{ + public: + typedef typename Impl::Params Params; + + DefaultBPredUnit(Params ¶ms); + + bool BPLookup(Addr &inst_PC) + { return BP.lookup(inst_PC); } + + bool BTBValid(Addr &inst_PC) + { return BTB.valid(inst_PC); } + + Addr BTBLookup(Addr &inst_PC) + { return BTB.lookup(inst_PC); } + + void BPUpdate(Addr &inst_PC, bool taken) + { BP.update(inst_PC, taken); } + + void BTBUpdate(Addr &inst_PC, Addr &target_PC) + { BTB.update(inst_PC, target_PC); } + + private: + + DefaultBP BP; + + DefaultBTB BTB; + +}; + +#endif // __BPRED_UNIT_HH__ diff --git a/cpu/beta_cpu/bpred_unit_impl.hh b/cpu/beta_cpu/bpred_unit_impl.hh new file mode 100644 index 000000000..47415ce9b --- /dev/null +++ b/cpu/beta_cpu/bpred_unit_impl.hh @@ -0,0 +1,13 @@ + +#include "cpu/beta_cpu/bpred_unit.hh" + +template +DefaultBPredUnit::DefaultBPredUnit(Params ¶ms) + : BP(params.localPredictorSize, + params.localPredictorCtrBits, + params.instShiftAmt), + BTB(params.BTBEntries, + params.BTBTagSize, + params.instShiftAmt) +{ +} diff --git a/cpu/beta_cpu/btb.cc b/cpu/beta_cpu/btb.cc new file mode 100644 index 000000000..b49f30482 --- /dev/null +++ b/cpu/beta_cpu/btb.cc @@ -0,0 +1,85 @@ +#include + +#include "cpu/beta_cpu/btb.hh" +#include "base/trace.hh" + +DefaultBTB::DefaultBTB(unsigned _numEntries, + unsigned _tagBits, + unsigned _instShiftAmt) + : numEntries(_numEntries), + tagBits(_tagBits), + instShiftAmt(_instShiftAmt) +{ + // @todo Check to make sure num_entries is valid (a power of 2) + + DPRINTF(Fetch, "BTB: Creating BTB object.\n"); + + btb = new BTBEntry[numEntries]; + + for (int i = 0; i < numEntries; ++i) + { + btb[i].valid = false; + } + + idxMask = numEntries - 1; + + tagMask = (1 << tagBits) - 1; + + tagShiftAmt = instShiftAmt + (int)log2(numEntries); +} + +inline +unsigned +DefaultBTB::getIndex(const Addr &inst_PC) +{ + // Need to shift PC over by the word offset. + return (inst_PC >> instShiftAmt) & idxMask; +} + +inline +Addr +DefaultBTB::getTag(const Addr &inst_PC) +{ + return (inst_PC >> tagShiftAmt) & tagMask; +} + +bool +DefaultBTB::valid(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return true; + } else { + return false; + } +} + +// @todo Create some sort of return struct that has both whether or not the +// address is valid, and also the address. For now will just use addr = 0 to +// represent invalid entry. +Addr +DefaultBTB::lookup(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return btb[btb_idx].target; + } else { + return 0; + } +} + +void +DefaultBTB::update(const Addr &inst_PC, const Addr &target) +{ + unsigned btb_idx = getIndex(inst_PC); + + btb[btb_idx].valid = true; + btb[btb_idx].target = target; + btb[btb_idx].tag = getTag(inst_PC); +} diff --git a/cpu/beta_cpu/btb.hh b/cpu/beta_cpu/btb.hh new file mode 100644 index 000000000..81069eabe --- /dev/null +++ b/cpu/beta_cpu/btb.hh @@ -0,0 +1,52 @@ +#ifndef __BTB_HH__ +#define __BTB_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class DefaultBTB +{ + private: + struct BTBEntry + { + BTBEntry() + : tag(0), target(0), valid(false) + { + } + + Addr tag; + Addr target; + bool valid; + }; + + public: + DefaultBTB(unsigned numEntries, unsigned tagBits, + unsigned instShiftAmt); + + Addr lookup(const Addr &inst_PC); + + bool valid(const Addr &inst_PC); + + void update(const Addr &inst_PC, const Addr &target_PC); + + private: + inline unsigned getIndex(const Addr &inst_PC); + + inline Addr getTag(const Addr &inst_PC); + + BTBEntry *btb; + + unsigned numEntries; + + unsigned idxMask; + + unsigned tagBits; + + unsigned tagMask; + + unsigned instShiftAmt; + + unsigned tagShiftAmt; +}; + +#endif // __BTB_HH__ diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh index 21a530ecf..849a6c797 100644 --- a/cpu/beta_cpu/comm.hh +++ b/cpu/beta_cpu/comm.hh @@ -2,6 +2,7 @@ #define __COMM_HH__ #include +#include #include "arch/alpha/isa_traits.hh" #include "cpu/inst_seq.hh" @@ -10,34 +11,49 @@ using namespace std; // Find better place to put this typedef. typedef short int PhysRegIndex; -// Might want to put constructors/destructors here. template struct SimpleFetchSimpleDecode { - // Consider having a field of how many ready instructions. - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; template struct SimpleDecodeSimpleRename { - // Consider having a field of how many ready instructions. - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; template struct SimpleRenameSimpleIEW { - // Consider having a field of how many ready instructions. - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; template struct SimpleIEWSimpleCommit { - // Consider having a field of how many ready instructions. - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; template struct IssueStruct { - typename Impl::DynInst *insts[1]; + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth + 1]; }; struct TimeBufStruct { @@ -47,11 +63,9 @@ struct TimeBufStruct { bool predIncorrect; uint64_t branchAddr; - //Question, is it worthwhile to have this Addr passed along - //by each stage, or just have Fetch look it up in the proper - //amount of cycles in the time buffer? - //Both might actually be needed because decode can send a different - //nextPC if the bpred was wrong. + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; uint64_t nextPC; }; @@ -72,14 +86,14 @@ struct TimeBufStruct { struct iewComm { bool squash; bool stall; - bool predIncorrect; // Also eventually include skid buffer space. unsigned freeIQEntries; + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; uint64_t nextPC; - // For now hardcode the type. - // Change this to sequence number eventually. InstSeqNum squashedSeqNum; }; @@ -90,18 +104,31 @@ struct TimeBufStruct { bool stall; unsigned freeROBEntries; + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; uint64_t nextPC; // Think of better names here. // Will need to be a variety of sizes... // Maybe make it a vector, that way only need one object. - vector freeRegs; + std::vector freeRegs; bool robSquashing; + // Represents the instruction that has either been retired or // squashed. Similar to having a single bus that broadcasts the // retired or squashed sequence number. InstSeqNum doneSeqNum; + + // Extra bits of information so that the LDSTQ only updates when it + // needs to. + bool commitIsStore; + bool commitIsLoad; + + // Communication specifically to the IQ to tell the IQ that it can + // schedule a non-speculative instruction. + InstSeqNum nonSpecSeqNum; }; commitComm commitInfo; diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh index 0e5a96e2a..981d9e78f 100644 --- a/cpu/beta_cpu/commit.hh +++ b/cpu/beta_cpu/commit.hh @@ -1,6 +1,4 @@ -// Todo: Squash properly. Have commit be able to send a squash signal -// to previous stages; will be needed when trap() is implemented. -// Maybe have a special method for handling interrupts/traps. +// Todo: Maybe have a special method for handling interrupts/traps. // // Traps: Have IEW send a signal to commit saying that there's a trap to // be handled. Have commit send the PC back to the fetch stage, along @@ -17,12 +15,11 @@ #ifndef __SIMPLE_COMMIT_HH__ #define __SIMPLE_COMMIT_HH__ -//Includes: ROB, time buffer, structs, memory interface -#include "arch/alpha/isa_traits.hh" +//#include "arch/alpha/isa_traits.hh" #include "base/timebuf.hh" -#include "cpu/beta_cpu/comm.hh" -#include "cpu/beta_cpu/rename_map.hh" -#include "cpu/beta_cpu/rob.hh" +//#include "cpu/beta_cpu/comm.hh" +//#include "cpu/beta_cpu/rename_map.hh" +//#include "cpu/beta_cpu/rob.hh" #include "mem/memory_interface.hh" template @@ -32,14 +29,15 @@ class SimpleCommit // Typedefs from the Impl. typedef typename Impl::ISA ISA; typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; - typedef typename Impl::CPUPol::ROB ROB; + typedef typename CPUPol::ROB ROB; - typedef typename Impl::TimeStruct TimeStruct; - typedef typename Impl::IEWStruct IEWStruct; - typedef typename Impl::RenameStruct RenameStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; public: // I don't believe commit can block, so it will only have two @@ -83,7 +81,7 @@ class SimpleCommit void commitInsts(); - bool commitHead(DynInst *head_inst, unsigned inst_num); + bool commitHead(DynInstPtr &head_inst, unsigned inst_num); void getInsts(); @@ -117,7 +115,7 @@ class SimpleCommit FullCPU *cpu; /** Pointer to the rename map. DO NOT USE if possible. */ - typename Impl::CPUPol::RenameMap *renameMap; +// typename Impl::CPUPol::RenameMap *renameMap; //Store buffer interface? Will need to move committed stores to the //store buffer diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh index bc8db0ce0..45b8bc7de 100644 --- a/cpu/beta_cpu/commit_impl.hh +++ b/cpu/beta_cpu/commit_impl.hh @@ -9,7 +9,7 @@ #include "cpu/beta_cpu/commit.hh" #include "cpu/exetrace.hh" -template +template SimpleCommit::SimpleCommit(Params ¶ms) : dcacheInterface(params.dcacheInterface), iewToCommitDelay(params.iewToCommitDelay), @@ -21,7 +21,7 @@ SimpleCommit::SimpleCommit(Params ¶ms) _status = Idle; } -template +template void SimpleCommit::setCPU(FullCPU *cpu_ptr) { @@ -29,7 +29,7 @@ SimpleCommit::setCPU(FullCPU *cpu_ptr) cpu = cpu_ptr; } -template +template void SimpleCommit::setTimeBuffer(TimeBuffer *tb_ptr) { @@ -43,7 +43,7 @@ SimpleCommit::setTimeBuffer(TimeBuffer *tb_ptr) robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay); } -template +template void SimpleCommit::setRenameQueue(TimeBuffer *rq_ptr) { @@ -54,7 +54,7 @@ SimpleCommit::setRenameQueue(TimeBuffer *rq_ptr) fromRename = renameQueue->getWire(-renameToROBDelay); } -template +template void SimpleCommit::setIEWQueue(TimeBuffer *iq_ptr) { @@ -65,7 +65,7 @@ SimpleCommit::setIEWQueue(TimeBuffer *iq_ptr) fromIEW = iewQueue->getWire(-iewToCommitDelay); } -template +template void SimpleCommit::setROB(ROB *rob_ptr) { @@ -73,7 +73,7 @@ SimpleCommit::setROB(ROB *rob_ptr) rob = rob_ptr; } -template +template void SimpleCommit::tick() { @@ -106,7 +106,7 @@ SimpleCommit::tick() toIEW->commitInfo.freeROBEntries = rob->numFreeEntries(); } -template +template void SimpleCommit::commit() { @@ -154,17 +154,30 @@ SimpleCommit::commit() // Send back the sequence number of the squashed instruction. toIEW->commitInfo.doneSeqNum = squashed_inst; + // Send back the squash signal to tell stages that they should squash. toIEW->commitInfo.squash = true; + // Send back the rob squashing signal so other stages know that the // ROB is in the process of squashing. toIEW->commitInfo.robSquashing = true; + + toIEW->commitInfo.branchMispredict = + robInfoFromIEW->iewInfo.branchMispredict; + + toIEW->commitInfo.branchTaken = + robInfoFromIEW->iewInfo.branchTaken; + toIEW->commitInfo.nextPC = robInfoFromIEW->iewInfo.nextPC; + + toIEW->commitInfo.mispredPC = robInfoFromIEW->iewInfo.mispredPC; } if (_status != ROBSquashing) { + // If we're not currently squashing, then get instructions. getInsts(); + // Try to commit any instructions. commitInsts(); } @@ -183,7 +196,7 @@ SimpleCommit::commit() // Loop that goes through as many instructions in the ROB as possible and // tries to commit them. The actual work for committing is done by the // commitHead() function. -template +template void SimpleCommit::commitInsts() { @@ -195,7 +208,7 @@ SimpleCommit::commitInsts() // Can't commit and squash things at the same time... //////////////////////////////////// - DynInst *head_inst = rob->readHeadInst(); + DynInstPtr head_inst = rob->readHeadInst(); unsigned num_committed = 0; @@ -224,12 +237,12 @@ SimpleCommit::commitInsts() // inst in the ROB without affecting any other stages. rob->retireHead(); - ++num_committed; } else { // Increment the total number of non-speculative instructions // executed. // Hack for now: it really shouldn't happen until after the - // commit is deemed to be successful. + // commit is deemed to be successful, but this count is needed + // for syscalls. cpu->funcExeInst++; // Try to commit the head instruction. @@ -256,9 +269,9 @@ SimpleCommit::commitInsts() } } -template +template bool -SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) +SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) { // Make sure instruction is valid assert(head_inst); @@ -271,21 +284,26 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) // Also check if it's nonspeculative. Or a nop. Then it will be // executed only when it reaches the head of the ROB. Actually // executing a nop is a bit overkill... - if (head_inst->isStore() || - head_inst->isLoad() || - head_inst->isNonSpeculative() || - head_inst->isNop()) { - DPRINTF(Commit, "Commit: Executing a memory reference or " - "nonspeculative instruction at commit, inst PC %#x\n", - head_inst->PC); - fault = head_inst->execute(); + if (!head_inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. + cpu->funcExeInst--; + if (head_inst->isStore() || head_inst->isNonSpeculative()) { + DPRINTF(Commit, "Commit: Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + head_inst->readPC()); - // Tell CPU to tell IEW to tell IQ (nasty chain of calls) that - // this instruction has completed. Could predicate this on - // whether or not the instruction has a destination. - // Slightly unrealistic, but will not really be a factor once - // a real load/store queue is added. - cpu->wakeDependents(head_inst); + toIEW->commitInfo.nonSpecSeqNum = head_inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + head_inst->clearCanCommit(); + + return false; + } else { + panic("Commit: Trying to commit un-executed instruction " + "of unknown type!\n"); + } } // Check if memory access was successful. @@ -320,8 +338,10 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) #ifdef FULL_SYSTEM cpu->trap(fault); #else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", head_inst->getFault(), - head_inst->PC); + if (!head_inst->isNop()) { + panic("fault (%d) detected @ PC %08p", head_inst->getFault(), + head_inst->PC); + } #endif // FULL_SYSTEM } @@ -333,8 +353,8 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) return false; } - //If it's a branch, then send back branch prediction update info - //to the fetch stage. + // If it's a branch, then send back branch prediction update info + // to the fetch stage. // This should be handled in the iew stage if a mispredict happens... #if 0 if (head_inst->isControl()) { @@ -358,6 +378,15 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) } #endif + // Explicit communication back to the LDSTQ that a load has been committed + // and can be removed from the LDSTQ. Stores don't need this because + // the LDSTQ will already have been told that a store has reached the head + // of the ROB. Consider including communication if it's a store as well + // to keep things orthagonal. + if (head_inst->isLoad()) { + toIEW->commitInfo.commitIsLoad = true; + } + // Now that the instruction is going to be committed, finalize its // trace data. if (head_inst->traceData) { @@ -371,7 +400,7 @@ SimpleCommit::commitHead(DynInst *head_inst, unsigned inst_num) return true; } -template +template void SimpleCommit::getInsts() { @@ -382,24 +411,33 @@ SimpleCommit::getInsts() // Read any issued instructions and place them into the ROB. Do this // prior to squashing to avoid having instructions in the ROB that // don't get squashed properly. + int insts_to_process = min((int)renameWidth, fromRename->size); + for (int inst_num = 0; - fromRename->insts[inst_num] != NULL && inst_num < renameWidth; + inst_num < insts_to_process; ++inst_num) { - DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", - fromRename->insts[inst_num]->readPC()); - rob->insertInst(fromRename->insts[inst_num]); + if (!fromRename->insts[inst_num]->isSquashed()) { + DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", + fromRename->insts[inst_num]->readPC()); + rob->insertInst(fromRename->insts[inst_num]); + } else { + DPRINTF(Commit, "Commit: Instruction %i PC %#x was " + "squashed, skipping.\n", + fromRename->insts[inst_num]->seqNum, + fromRename->insts[inst_num]->readPC()); + } } } -template +template void SimpleCommit::markCompletedInsts() { // Grab completed insts out of the IEW instruction queue, and mark // instructions completed within the ROB. for (int inst_num = 0; - fromIEW->insts[inst_num] != NULL && inst_num < iewWidth; + inst_num < iewWidth && fromIEW->insts[inst_num]; ++inst_num) { DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", @@ -411,7 +449,7 @@ SimpleCommit::markCompletedInsts() } } -template +template uint64_t SimpleCommit::readCommitPC() { diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh index 676334249..ec8460b77 100644 --- a/cpu/beta_cpu/cpu_policy.hh +++ b/cpu/beta_cpu/cpu_policy.hh @@ -1,32 +1,60 @@ #ifndef __CPU_POLICY_HH__ #define __CPU_POLICY_HH__ +#include "cpu/beta_cpu/bpred_unit.hh" +#include "cpu/beta_cpu/inst_queue.hh" +#include "cpu/beta_cpu/regfile.hh" +#include "cpu/beta_cpu/free_list.hh" +#include "cpu/beta_cpu/rename_map.hh" +#include "cpu/beta_cpu/rob.hh" +#include "cpu/beta_cpu/store_set.hh" +#include "cpu/beta_cpu/mem_dep_unit.hh" +#include "cpu/beta_cpu/ldstq.hh" + #include "cpu/beta_cpu/fetch.hh" #include "cpu/beta_cpu/decode.hh" #include "cpu/beta_cpu/rename.hh" #include "cpu/beta_cpu/iew.hh" #include "cpu/beta_cpu/commit.hh" -#include "cpu/beta_cpu/inst_queue.hh" -#include "cpu/beta_cpu/regfile.hh" -#include "cpu/beta_cpu/free_list.hh" -#include "cpu/beta_cpu/rename_map.hh" -#include "cpu/beta_cpu/rob.hh" +#include "cpu/beta_cpu/comm.hh" template struct SimpleCPUPolicy { + typedef DefaultBPredUnit BPredUnit; typedef PhysRegFile RegFile; typedef SimpleFreeList FreeList; typedef SimpleRenameMap RenameMap; typedef ROB ROB; typedef InstructionQueue IQ; + typedef MemDepUnit MemDepUnit; + typedef LDSTQ LDSTQ; typedef SimpleFetch Fetch; typedef SimpleDecode Decode; typedef SimpleRename Rename; typedef SimpleIEW IEW; typedef SimpleCommit Commit; + + /** The struct for communication between fetch and decode. */ + typedef SimpleFetchSimpleDecode FetchStruct; + + /** The struct for communication between decode and rename. */ + typedef SimpleDecodeSimpleRename DecodeStruct; + + /** The struct for communication between rename and IEW. */ + typedef SimpleRenameSimpleIEW RenameStruct; + + /** The struct for communication between IEW and commit. */ + typedef SimpleIEWSimpleCommit IEWStruct; + + /** The struct for communication within the IEW stage. */ + typedef IssueStruct IssueStruct; + + /** The struct for all backwards communication. */ + typedef TimeBufStruct TimeStruct; + }; #endif //__CPU_POLICY_HH__ diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh index c41955dcb..be88a4b36 100644 --- a/cpu/beta_cpu/decode.hh +++ b/cpu/beta_cpu/decode.hh @@ -10,11 +10,7 @@ #include -//Will want to include: time buffer, structs, #include "base/timebuf.hh" -#include "cpu/beta_cpu/comm.hh" - -using namespace std; template class SimpleDecode @@ -22,13 +18,15 @@ class SimpleDecode private: // Typedefs from the Impl. typedef typename Impl::ISA ISA; - typedef typename Impl::DynInst DynInst; typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; - typedef typename Impl::FetchStruct FetchStruct; - typedef typename Impl::DecodeStruct DecodeStruct; - typedef typename Impl::TimeStruct TimeStruct; + // Typedefs from the CPU policy. + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::TimeStruct TimeStruct; // Typedefs from the ISA. typedef typename ISA::Addr Addr; @@ -71,7 +69,7 @@ class SimpleDecode inline void unblock(); - void squash(DynInst *inst); + void squash(DynInstPtr &inst); // Interfaces to objects outside of decode. /** CPU interface. */ @@ -106,7 +104,7 @@ class SimpleDecode typename TimeBuffer::wire fromFetch; /** Skid buffer between fetch and decode. */ - queue skidBuffer; + std::queue skidBuffer; private: //Consider making these unsigned to avoid any confusion. @@ -124,6 +122,12 @@ class SimpleDecode /** The width of decode, in instructions. */ unsigned decodeWidth; + + /** The instruction that decode is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; }; #endif // __SIMPLE_DECODE_HH__ diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh index ecf19b8ea..d0f46eaa5 100644 --- a/cpu/beta_cpu/decode_impl.hh +++ b/cpu/beta_cpu/decode_impl.hh @@ -9,7 +9,8 @@ SimpleDecode::SimpleDecode(Params ¶ms) iewToDecodeDelay(params.iewToDecodeDelay), commitToDecodeDelay(params.commitToDecodeDelay), fetchToDecodeDelay(params.fetchToDecodeDelay), - decodeWidth(params.decodeWidth) + decodeWidth(params.decodeWidth), + numInst(0) { DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth); _status = Idle; @@ -103,7 +104,7 @@ SimpleDecode::unblock() // was predicted incorrectly. template void -SimpleDecode::squash(DynInst *inst) +SimpleDecode::squash(DynInstPtr &inst) { DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " "detected at decode.\n"); @@ -163,16 +164,22 @@ SimpleDecode::tick() // buffer were used. Remove those instructions and handle // the rest of unblocking. if (_status == Unblocking) { + if (fromFetch->size > 0) { + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + } + unblock(); } } else if (_status == Blocked) { - if (fromFetch->insts[0] != NULL) { + if (fromFetch->size > 0) { block(); } if (!fromRename->renameInfo.stall && - !fromIEW->iewInfo.stall && - !fromCommit->commitInfo.stall) { + !fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall) { DPRINTF(Decode, "Decode: Stall signals cleared, going to " "unblock.\n"); _status = Unblocking; @@ -204,9 +211,7 @@ void SimpleDecode::decode() { // Check time buffer if being told to squash. - if (/* fromRename->renameInfo.squash || */ - /* fromIEW->iewInfo.squash || */ - fromCommit->commitInfo.squash) { + if (fromCommit->commitInfo.squash) { squash(); return; } @@ -223,20 +228,22 @@ SimpleDecode::decode() // Check fetch queue to see if instructions are available. // If no available instructions, do nothing, unless this stage is // currently unblocking. - if (fromFetch->insts[0] == NULL && _status != Unblocking) { + if (!fromFetch->insts[0] && _status != Unblocking) { DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); // Should I change the status to idle? return; } - DynInst *inst; + DynInstPtr inst; + // Instead have a class member variable that records which instruction // was the last one that was ended on. At the tick() stage, it can // check if that's equal to 0. If not, then don't pop stuff off. - unsigned num_inst = 0; - bool insts_available = _status == Unblocking ? - skidBuffer.front().insts[num_inst] != NULL : - fromFetch->insts[num_inst] != NULL; + unsigned to_rename_index = 0; + + int insts_available = _status == Unblocking ? + skidBuffer.front().size : + fromFetch->size; // Debug block... #if 0 @@ -247,7 +254,7 @@ SimpleDecode::decode() DPRINTF(Decode, "Decode: No instructions available, skid buffer " "empty.\n"); } else if (_status != Unblocking && - fromFetch->insts[0] == NULL) { + !fromFetch->insts[0]) { DPRINTF(Decode, "Decode: No instructions available, fetch queue " "empty.\n"); } else { @@ -262,26 +269,39 @@ SimpleDecode::decode() // should be computed here. However in this simple model all // computation will take place at execute. Hence doneTargCalc() // will always be false. - while (num_inst < decodeWidth && - insts_available) + while (insts_available > 0) { DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); // Might create some sort of accessor to get an instruction // on a per thread basis. Or might be faster to just get // a pointer to an array or list of instructions and use that // within this code. - inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] : - fromFetch->insts[num_inst]; + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromFetch->insts[numInst]; + DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n", - inst, inst->readPC()); + inst->seqNum, inst->readPC()); + + if (inst->isSquashed()) { + DPRINTF(Decode, "Decode: Instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + ++numInst; + --insts_available; + + continue; + } // This current instruction is valid, so add it into the decode // queue. The next instruction may not be valid, so check to // see if branches were predicted correctly. - toRename->insts[num_inst] = inst; + toRename->insts[to_rename_index] = inst; + + ++(toRename->size); // Ensure that if it was predicted as a branch, it really is a - // branch. This case should never happen in this model. + // branch. if (inst->predTaken() && !inst->isControl()) { panic("Instruction predicted as a branch!"); @@ -306,20 +326,19 @@ SimpleDecode::decode() // them as ready to issue at any time. Not sure if this check // should exist here or at a later stage; however it doesn't matter // too much for function correctness. + // Isn't this handled by the inst queue? if (inst->numSrcRegs() == 0) { inst->setCanIssue(); } // Increment which instruction we're looking at. - ++num_inst; + ++numInst; + ++to_rename_index; - // Check whether or not there are instructions available. - // Either need to check within the skid buffer, or the fetch - // queue, depending if this stage is unblocking or not. - insts_available = _status == Unblocking ? - skidBuffer.front().insts[num_inst] == NULL : - fromFetch->insts[num_inst] == NULL; + --insts_available; } + + numInst = 0; } #endif // __SIMPLE_DECODE_CC__ diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh index 5717c65ac..e59a9df7f 100644 --- a/cpu/beta_cpu/fetch.hh +++ b/cpu/beta_cpu/fetch.hh @@ -13,16 +13,12 @@ #include "base/timebuf.hh" #include "sim/eventq.hh" #include "cpu/pc_event.hh" -#include "cpu/beta_cpu/comm.hh" #include "mem/mem_interface.hh" -using namespace std; - /** * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch * will stall if there's an Icache miss, but otherwise assumes a one cycle - * Icache hit. This will be replaced with a more fleshed out class in the - * future. + * Icache hit. */ template @@ -31,12 +27,15 @@ class SimpleFetch public: /** Typedefs from Impl. */ typedef typename Impl::ISA ISA; + typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::FullCPU FullCPU; typedef typename Impl::Params Params; - typedef typename Impl::FetchStruct FetchStruct; - typedef typename Impl::TimeStruct TimeStruct; + typedef typename CPUPol::BPredUnit BPredUnit; + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::TimeStruct TimeStruct; /** Typedefs from ISA. */ typedef typename ISA::MachInst MachInst; @@ -76,6 +75,17 @@ class SimpleFetch // Figure out PC vs next PC and how it should be updated void squash(Addr newPC); + private: + /** + * Looks up in the branch predictor to see if the next PC should be + * either next PC+=MachInst or a branch target. + * @params next_PC Next PC variable passed in by reference. It is + * expected to be set to the current PC; it will be updated with what + * the next PC will be. + * @return Whether or not a branch was predicted as taken. + */ + bool lookupAndUpdateNextPC(Addr &next_PC); + public: class CacheCompletionEvent : public Event { @@ -110,8 +120,6 @@ class SimpleFetch /** Wire to get commit's information from backwards time buffer. */ typename TimeBuffer::wire fromCommit; - // Will probably have this sit in the FullCPU and just pass a pointr in. - // Simplifies the constructors of all stages. /** Internal fetch instruction queue. */ TimeBuffer *fetchQueue; @@ -122,6 +130,9 @@ class SimpleFetch /** Icache interface. */ MemInterface *icacheInterface; + /** BPredUnit. */ + BPredUnit branchPred; + /** Memory request used to access cache. */ MemReqPtr memReq; diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh index 918d2dad2..93f7bf6d2 100644 --- a/cpu/beta_cpu/fetch_impl.hh +++ b/cpu/beta_cpu/fetch_impl.hh @@ -1,7 +1,5 @@ -// Todo: Rewrite this. Add in branch prediction. Fix up if squashing comes -// from decode; only the correct instructions should be killed. This will -// probably require changing the CPU's instList functions to take a seqNum -// instead of a dyninst. With probe path, should be able to specify +// Todo: Add in branch prediction. With probe path, should +// be able to specify // size of data to fetch. Will be able to get full cache line. // Remove this later. @@ -41,6 +39,7 @@ template SimpleFetch::SimpleFetch(Params ¶ms) : cacheCompletionEvent(this), icacheInterface(params.icacheInterface), + branchPred(params), decodeToFetchDelay(params.decodeToFetchDelay), renameToFetchDelay(params.renameToFetchDelay), iewToFetchDelay(params.iewToFetchDelay), @@ -66,7 +65,7 @@ SimpleFetch::SimpleFetch(Params ¶ms) blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; // Create mask to get rid of offset bits. - cacheBlockMask = ~((int)log2(blkSize) - 1); + cacheBlockMask = (blkSize - 1); // Get the size of an instruction. instSize = sizeof(MachInst); @@ -123,24 +122,59 @@ SimpleFetch::processCacheCompletion() _status = IcacheMissComplete; } -// Note that in the SimpleFetch<>, will most likely have to provide the -// template parameters to BP and BTB. +template +bool +SimpleFetch::lookupAndUpdateNextPC(Addr &next_PC) +{ +#if 1 + // Do branch prediction check here. + bool predict_taken = branchPred.BPLookup(next_PC); + Addr predict_target; + + DPRINTF(Fetch, "Fetch: Branch predictor predicts taken? %i\n", + predict_taken); + + if (branchPred.BTBValid(next_PC)) { + predict_target = branchPred.BTBLookup(next_PC); + DPRINTF(Fetch, "Fetch: BTB target is %#x.\n", predict_target); + } else { + predict_taken = false; + DPRINTF(Fetch, "Fetch: BTB does not have a valid entry.\n"); + } + + // Now update the PC to fetch the next instruction in the cache + // line. + if (!predict_taken) { + next_PC = next_PC + instSize; + return false; + } else { + next_PC = predict_target; + return true; + } +#endif + +#if 0 + next_PC = next_PC + instSize; + return false; +#endif +} + template void SimpleFetch::squash(Addr new_PC) { DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); + cpu->setNextPC(new_PC + instSize); cpu->setPC(new_PC); _status = Squashing; - // Clear out the instructions that are no longer valid. - // Actually maybe slightly unrealistic to kill instructions that are - // in flight like that between stages. Perhaps just have next - // stage ignore those instructions or something. In the cycle where it's - // returning from squashing, the other stages can just ignore the inputs - // for that cycle. + // Clear the icache miss if it's outstanding. + if (_status == IcacheMissStall && icacheInterface) { + // @todo: Use an actual thread number here. + icacheInterface->squash(0); + } // Tell the CPU to remove any instructions that aren't currently // in the ROB (instructions in flight that were killed). @@ -151,25 +185,27 @@ template void SimpleFetch::tick() { -#if 0 +#if 1 + // Check squash signals from commit. if (fromCommit->commitInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " "from commit.\n"); // In any case, squash. squash(fromCommit->commitInfo.nextPC); + + // Also check if there's a mispredict that happened. + if (fromCommit->commitInfo.branchMispredict) { + branchPred.BPUpdate(fromCommit->commitInfo.mispredPC, + fromCommit->commitInfo.branchTaken); + branchPred.BTBUpdate(fromCommit->commitInfo.mispredPC, + fromCommit->commitInfo.nextPC); + } + return; } - if (fromDecode->decodeInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from decode.\n"); - - // Squash unless we're already squashing? - squash(fromDecode->decodeInfo.nextPC); - return; - } - + // Check ROB squash signals from commit. if (fromCommit->commitInfo.robSquashing) { DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); @@ -178,11 +214,36 @@ SimpleFetch::tick() return; } + // Check squash signals from decode. + if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + + // Update the branch predictor. + if (fromCommit->decodeInfo.branchMispredict) { + branchPred.BPUpdate(fromDecode->decodeInfo.mispredPC, + fromDecode->decodeInfo.branchTaken); + branchPred.BTBUpdate(fromDecode->decodeInfo.mispredPC, + fromDecode->decodeInfo.nextPC); + } + + if (_status != Squashing) { + // Squash unless we're already squashing? + squash(fromDecode->decodeInfo.nextPC); + return; + } + } + + + + // Check if any of the stall signals are high. if (fromDecode->decodeInfo.stall || fromRename->renameInfo.stall || fromIEW->iewInfo.stall || fromCommit->commitInfo.stall) { + // Block stage, regardless of current status. + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " "Commit: %i\n", @@ -190,10 +251,36 @@ SimpleFetch::tick() fromRename->renameInfo.stall, fromIEW->iewInfo.stall, fromCommit->commitInfo.stall); - // What to do if we're already in an icache stall? + + _status = Blocked; + return; + } else if (_status == Blocked) { + // Unblock stage if status is currently blocked and none of the + // stall signals are being held high. + _status = Running; + + return; + } + + // If fetch has reached this point, then there are no squash signals + // still being held high. Check if fetch is in the squashing state; + // if so, fetch can switch to running. + // Similarly, there are no blocked signals still being held high. + // Check if fetch is in the blocked state; if so, fetch can switch to + // running. + if (_status == Squashing) { + DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n"); + + // Switch status to running + _status = Running; + } else if (_status != IcacheMissStall) { + DPRINTF(Fetch, "Fetch: Running stage.\n"); + + fetch(); } #endif +#if 0 if (_status != Blocked && _status != Squashing && _status != IcacheMissStall) { @@ -253,62 +340,17 @@ SimpleFetch::tick() DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); } } - +#endif } template void SimpleFetch::fetch() { - ////////////////////////////////////////// - // Check backwards communication - ////////////////////////////////////////// - - // If branch prediction is incorrect, squash any instructions, - // update PC, and do not fetch anything this cycle. - - // Might want to put all the PC changing stuff in one area. - // Normally should also check here to see if there is branch - // misprediction info to update with. - if (fromCommit->commitInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from commit.\n"); - squash(fromCommit->commitInfo.nextPC); - return; - } else if (fromDecode->decodeInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from decode.\n"); - squash(fromDecode->decodeInfo.nextPC); - return; - } else if (fromCommit->commitInfo.robSquashing) { - DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); - _status = Squashing; - return; - } - - // If being told to stall, do nothing. - if (fromDecode->decodeInfo.stall || - fromRename->renameInfo.stall || - fromIEW->iewInfo.stall || - fromCommit->commitInfo.stall) - { - DPRINTF(Fetch, "Fetch: Stalling stage.\n"); - DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " - "Commit: %i\n", - fromDecode->decodeInfo.stall, - fromRename->renameInfo.stall, - fromIEW->iewInfo.stall, - fromCommit->commitInfo.stall); - _status = Blocked; - return; - } - ////////////////////////////////////////// // Start actual fetch ////////////////////////////////////////// - // If nothing else outstanding, attempt to read instructions. - #ifdef FULL_SYSTEM // Flag to say whether or not address is physical addr. unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; @@ -317,13 +359,14 @@ SimpleFetch::fetch() #endif // FULL_SYSTEM // The current PC. - Addr PC = cpu->readPC(); + Addr fetch_PC = cpu->readPC(); // Fault code for memory access. Fault fault = No_Fault; // If returning from the delay of a cache miss, then update the status - // to running, otherwise do the cache access. + // to running, otherwise do the cache access. Possibly move this up + // to tick() function. if (_status == IcacheMissComplete) { DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); @@ -334,7 +377,7 @@ SimpleFetch::fetch() } else { DPRINTF(Fetch, "Fetch: Attempting to translate and read " "instruction, starting at PC %08p.\n", - PC); + fetch_PC); // Otherwise check if the instruction exists within the cache. // If it does, then proceed on to read the instruction and the rest @@ -347,7 +390,7 @@ SimpleFetch::fetch() // Setup the memReq to do a read of the first isntruction's address. // Set the appropriate read size and flags as well. memReq->cmd = Read; - memReq->reset(PC, instSize, flags); + memReq->reset(fetch_PC, instSize, flags); // Translate the instruction request. // Should this function be @@ -401,7 +444,7 @@ SimpleFetch::fetch() // Probably have a status on a per thread basis so each thread can // block independently and be woken up independently. - Addr next_PC = 0; + Addr next_PC = fetch_PC; InstSeqNum inst_seq; // If the read of the first instruction was successful, then grab the @@ -410,6 +453,10 @@ SimpleFetch::fetch() if (fault == No_Fault) { DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); + ////////////////////////// + // Fetch first instruction + ////////////////////////// + // Need to keep track of whether or not a predicted branch // ended this fetch block. bool predicted_branch = false; @@ -420,12 +467,17 @@ SimpleFetch::fetch() // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); + // Update the next PC; it either is PC+sizeof(MachInst), or + // branch_target. Check whether or not a branch was taken. + predicted_branch = lookupAndUpdateNextPC(next_PC); + // Because the first instruction was already fetched, create the // DynInst and put it into the queue to decode. - DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq, - cpu); + DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, + inst_seq, cpu); + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", - instruction, instruction->readPC()); + inst_seq, instruction->readPC()); DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", OPCODE(inst)); @@ -440,13 +492,17 @@ SimpleFetch::fetch() // that heads to decode. toDecode->insts[0] = instruction; - // Now update the PC to fetch the next instruction in the cache - // line. - PC = PC + instSize; + toDecode->size++; + + fetch_PC = next_PC; + + ////////////////////////// + // Fetch other instructions + ////////////////////////// // Obtain the index into the cache line by getting only the low - // order bits. - int line_index = PC & cacheBlockMask; + // order bits. Will need to do shifting as well. + int line_index = fetch_PC & cacheBlockMask; // Take instructions and put them into the queue heading to decode. // Then read the next instruction in the cache line. Continue @@ -461,12 +517,14 @@ SimpleFetch::fetch() // instructions, which can then be used to get all the instructions // needed. Figure out if I can roll it back into one loop. for (int fetched = 1; - line_index < blkSize && fetched < fetchWidth; + line_index < blkSize && + fetched < fetchWidth && + !predicted_branch; line_index+=instSize, ++fetched) { // Reset the mem request to setup the read of the next // instruction. - memReq->reset(PC, instSize, flags); + memReq->reset(fetch_PC, instSize, flags); // Translate the instruction request. fault = cpu->translateInstReq(memReq); @@ -485,16 +543,24 @@ SimpleFetch::fetch() // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); + predicted_branch = lookupAndUpdateNextPC(next_PC); + // Create the actual DynInst. Parameters are: // DynInst(instruction, PC, predicted PC, CPU pointer). // Because this simple model has no branch prediction, the // predicted PC will simply be PC+sizeof(MachInst). // Update to actually use a branch predictor to predict the // target in the future. - DynInst *instruction = new DynInst(inst, PC, PC+instSize, - inst_seq, cpu); + DynInstPtr instruction = + new DynInst(inst, fetch_PC, next_PC, inst_seq, cpu); + + instruction->traceData = + Trace::getInstRecord(curTick, cpu->xcBase(), cpu, + instruction->staticInst, + instruction->readPC(), 0); + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", - instruction, instruction->readPC()); + inst_seq, instruction->readPC()); DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", OPCODE(inst)); @@ -504,20 +570,15 @@ SimpleFetch::fetch() // that heads to decode. toDecode->insts[fetched] = instruction; + toDecode->size++; + // Might want to keep track of various stats. // numInstsFetched++; - // Now update the PC to fetch the next instruction in the cache - // line. - PC = PC + instSize; + // Update the PC with the next PC. + fetch_PC = next_PC; } - // If no branches predicted taken, then increment PC with - // fall-through path. This simple model always predicts not - // taken. - if (!predicted_branch) { - next_PC = PC; - } } // Now that fetching is completed, update the PC to signify what the next @@ -544,10 +605,10 @@ SimpleFetch::fetch() _status = Blocked; #ifdef FULL_SYSTEM - // Trap will probably need a pointer to the CPU to do accessing. - // Or an exec context. --Write ProxyExecContext eventually. - // Avoid using this for now as the xc really shouldn't be in here. - cpu->trap(fault); +// cpu->trap(fault); + // Send a signal to the ROB indicating that there's a trap from the + // fetch stage that needs to be handled. Need to indicate that + // there's a fault, and the fault type. #else // !FULL_SYSTEM fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); #endif // FULL_SYSTEM diff --git a/cpu/beta_cpu/free_list.cc b/cpu/beta_cpu/free_list.cc index 006bf4bf7..542b87471 100644 --- a/cpu/beta_cpu/free_list.cc +++ b/cpu/beta_cpu/free_list.cc @@ -1,3 +1,5 @@ +#include "base/trace.hh" + #include "cpu/beta_cpu/free_list.hh" SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, @@ -10,6 +12,16 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, numPhysicalFloatRegs(_numPhysicalFloatRegs), numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs) { + DPRINTF(FreeList, "FreeList: Creating new free list object.\n"); + + // DEBUG stuff. + freeIntRegsScoreboard.resize(numPhysicalIntRegs); + + freeFloatRegsScoreboard.resize(numPhysicalRegs); + + for (PhysRegIndex i = 0; i < numLogicalIntRegs; ++i) { + freeIntRegsScoreboard[i] = 0; + } // Put all of the extra physical registers onto the free list. This // means excluding all of the base logical registers. @@ -17,6 +29,14 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, i < numPhysicalIntRegs; ++i) { freeIntRegs.push(i); + + freeIntRegsScoreboard[i] = 1; + } + + for (PhysRegIndex i = 0; i < numPhysicalIntRegs + numLogicalFloatRegs; + ++i) + { + freeFloatRegsScoreboard[i] = 0; } // Put all of the extra physical registers onto the free list. This @@ -26,8 +46,9 @@ SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs; i < numPhysicalRegs; ++i) { - cprintf("Free List: Adding register %i to float list.\n", i); freeFloatRegs.push(i); + + freeFloatRegsScoreboard[i] = 1; } } diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh index 8521ad94c..0d2b2c421 100644 --- a/cpu/beta_cpu/free_list.hh +++ b/cpu/beta_cpu/free_list.hh @@ -8,8 +8,6 @@ #include "cpu/beta_cpu/comm.hh" #include "base/trace.hh" -using namespace std; - // Question: Do I even need the number of logical registers? // How to avoid freeing registers instantly? Same with ROB entries. @@ -33,10 +31,10 @@ class SimpleFreeList private: /** The list of free integer registers. */ - queue freeIntRegs; + std::queue freeIntRegs; /** The list of free floating point registers. */ - queue freeFloatRegs; + std::queue freeFloatRegs; /** Number of logical integer registers. */ int numLogicalIntRegs; @@ -53,6 +51,11 @@ class SimpleFreeList /** Total number of physical registers. */ int numPhysicalRegs; + /** DEBUG stuff below. */ + std::vector freeIntRegsScoreboard; + + std::vector freeFloatRegsScoreboard; + public: SimpleFreeList(unsigned _numLogicalIntRegs, unsigned _numPhysicalIntRegs, @@ -94,6 +97,10 @@ SimpleFreeList::getIntReg() freeIntRegs.pop(); + // DEBUG + assert(freeIntRegsScoreboard[free_reg]); + freeIntRegsScoreboard[free_reg] = 0; + return(free_reg); } @@ -109,6 +116,10 @@ SimpleFreeList::getFloatReg() freeFloatRegs.pop(); + // DEBUG + assert(freeFloatRegsScoreboard[free_reg]); + freeFloatRegsScoreboard[free_reg] = 0; + return(free_reg); } @@ -120,8 +131,16 @@ SimpleFreeList::addReg(PhysRegIndex freed_reg) //already in there. A bit vector or something similar would be useful. if (freed_reg < numPhysicalIntRegs) { freeIntRegs.push(freed_reg); + + // DEBUG + assert(freeIntRegsScoreboard[freed_reg] == false); + freeIntRegsScoreboard[freed_reg] = 1; } else if (freed_reg < numPhysicalRegs) { freeFloatRegs.push(freed_reg); + + // DEBUG + assert(freeFloatRegsScoreboard[freed_reg] == false); + freeFloatRegsScoreboard[freed_reg] = 1; } } @@ -130,6 +149,10 @@ SimpleFreeList::addIntReg(PhysRegIndex freed_reg) { DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg); + // DEBUG + assert(!freeIntRegsScoreboard[freed_reg]); + freeIntRegsScoreboard[freed_reg] = 1; + //Might want to add in a check for whether or not this register is //already in there. A bit vector or something similar would be useful. freeIntRegs.push(freed_reg); @@ -140,6 +163,10 @@ SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) { DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg); + // DEBUG + assert(!freeFloatRegsScoreboard[freed_reg]); + freeFloatRegsScoreboard[freed_reg] = 1; + //Might want to add in a check for whether or not this register is //already in there. A bit vector or something similar would be useful. freeFloatRegs.push(freed_reg); diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc index 6fbf5d69a..abeb4cb87 100644 --- a/cpu/beta_cpu/full_cpu.cc +++ b/cpu/beta_cpu/full_cpu.cc @@ -16,29 +16,18 @@ using namespace std; #ifdef FULL_SYSTEM -BaseFullCPU::BaseFullCPU(const std::string &_name, - int number_of_threads, - Counter max_insts_any_thread, - Counter max_insts_all_threads, - Counter max_loads_any_thread, - Counter max_loads_all_threads, - System *_system, Tick freq) - : BaseCPU(_name, number_of_threads, - max_insts_any_thread, max_insts_all_threads, - max_loads_any_thread, max_loads_all_threads, - _system, freq) +BaseFullCPU::BaseFullCPU(Params ¶ms) + : BaseCPU(params.name, params.numberOfThreads, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads, + params._system, params.freq) { } #else -BaseFullCPU::BaseFullCPU(const std::string &_name, - int number_of_threads, - Counter max_insts_any_thread, - Counter max_insts_all_threads, - Counter max_loads_any_thread, - Counter max_loads_all_threads) - : BaseCPU(_name, number_of_threads, - max_insts_any_thread, max_insts_all_threads, - max_loads_any_thread, max_loads_all_threads) +BaseFullCPU::BaseFullCPU(Params ¶ms) + : BaseCPU(params.name, params.numberOfThreads, + params.maxInstsAnyThread, params.maxInstsAllThreads, + params.maxLoadsAnyThread, params.maxLoadsAllThreads) { } #endif // FULL_SYSTEM @@ -67,14 +56,9 @@ FullBetaCPU::TickEvent::description() template FullBetaCPU::FullBetaCPU(Params ¶ms) #ifdef FULL_SYSTEM - : BaseFullCPU(params.name, /* number_of_threads */ 1, - params.maxInstsAnyThread, params.maxInstsAllThreads, - params.maxLoadsAnyThread, params.maxLoadsAllThreads, - params.system, params.freq), + : BaseFullCPU(params), #else - : BaseFullCPU(params.name, /* number_of_threads */ 1, - params.maxInstsAnyThread, params.maxInstsAllThreads, - params.maxLoadsAnyThread, params.maxLoadsAllThreads), + : BaseFullCPU(params), #endif // FULL_SYSTEM tickEvent(this), fetch(params), @@ -91,17 +75,18 @@ FullBetaCPU::FullBetaCPU(Params ¶ms) renameMap(Impl::ISA::NumIntRegs, params.numPhysIntRegs, Impl::ISA::NumFloatRegs, params.numPhysFloatRegs, Impl::ISA::NumMiscRegs, - Impl::ISA::ZeroReg, Impl::ISA::ZeroReg), + Impl::ISA::ZeroReg, + Impl::ISA::ZeroReg + Impl::ISA::NumIntRegs), rob(params.numROBEntries, params.squashWidth), // What to pass to these time buffers? // For now just have these time buffers be pretty big. - timeBuffer(20, 20), - fetchQueue(20, 20), - decodeQueue(20, 20), - renameQueue(20, 20), - iewQueue(20, 20), + timeBuffer(5, 5), + fetchQueue(5, 5), + decodeQueue(5, 5), + renameQueue(5, 5), + iewQueue(5, 5), xc(NULL), @@ -133,9 +118,9 @@ FullBetaCPU::FullBetaCPU(Params ¶ms) // initialize CPU, including PC TheISA::initCPU(&xc->regs); #else - xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0); DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x", process->prog_entry, process); + xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0); assert(process->getMemory() != NULL); assert(mem != NULL); @@ -393,7 +378,7 @@ FullBetaCPU::setPC(Addr new_PC) template void -FullBetaCPU::addInst(DynInst *inst) +FullBetaCPU::addInst(DynInstPtr &inst) { instList.push_back(inst); } @@ -411,9 +396,9 @@ FullBetaCPU::instDone() template void -FullBetaCPU::removeBackInst(DynInst *inst) +FullBetaCPU::removeBackInst(DynInstPtr &inst) { - DynInst *inst_to_delete; + DynInstPtr inst_to_delete; // Walk through the instruction list, removing any instructions // that were inserted after the given instruction, inst. @@ -424,22 +409,22 @@ FullBetaCPU::removeBackInst(DynInst *inst) // Obtain the pointer to the instruction. inst_to_delete = instList.back(); - DPRINTF(FullCPU, "FullCPU: Deleting instruction %#x, PC %#x\n", - inst_to_delete, inst_to_delete->readPC()); + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); // Remove the instruction from the list. instList.pop_back(); - // Delete the instruction itself. - delete inst_to_delete; + // Mark it as squashed. + inst_to_delete->setSquashed(); } } template void -FullBetaCPU::removeFrontInst(DynInst *inst) +FullBetaCPU::removeFrontInst(DynInstPtr &inst) { - DynInst *inst_to_delete; + DynInstPtr inst_to_delete; // The front instruction should be the same one being asked to be deleted. assert(instList.front() == inst); @@ -451,7 +436,7 @@ FullBetaCPU::removeFrontInst(DynInst *inst) DPRINTF(FullCPU, "FullCPU: Deleting committed instruction %#x, PC %#x\n", inst_to_delete, inst_to_delete->readPC()); - delete inst_to_delete; +// delete inst_to_delete; } template @@ -461,7 +446,7 @@ FullBetaCPU::removeInstsNotInROB() DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " "list.\n"); - DynInst *rob_tail = rob.readTailInst(); + DynInstPtr rob_tail = rob.readTailInst(); removeBackInst(rob_tail); } @@ -478,13 +463,13 @@ void FullBetaCPU::dumpInsts() { int num = 0; - typename list::iterator inst_list_it = instList.begin(); + typename list::iterator inst_list_it = instList.begin(); while (inst_list_it != instList.end()) { - cprintf("Instruction:%i\nInst:%#x\nPC:%#x\nSN:%lli\n\n", - num, (*inst_list_it), (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum); + cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n", + num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, + (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); inst_list_it++; ++num; } @@ -492,7 +477,7 @@ FullBetaCPU::dumpInsts() template void -FullBetaCPU::wakeDependents(DynInst *inst) +FullBetaCPU::wakeDependents(DynInstPtr &inst) { iew.wakeDependents(inst); } diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh index 00ff1f878..cf753ad67 100644 --- a/cpu/beta_cpu/full_cpu.hh +++ b/cpu/beta_cpu/full_cpu.hh @@ -16,6 +16,7 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" #include "cpu/beta_cpu/cpu_policy.hh" #include "sim/process.hh" @@ -28,17 +29,32 @@ class BaseFullCPU : public BaseCPU { //Stuff that's pretty ISA independent will go here. public: + class Params + { + public: #ifdef FULL_SYSTEM - BaseFullCPU(const std::string &_name, int _number_of_threads, - Counter max_insts_any_thread, Counter max_insts_all_threads, - Counter max_loads_any_thread, Counter max_loads_all_threads, - System *_system, Tick freq); + std::string name; + int numberOfThreads; + Counter maxInstsAnyThread; + Counter maxInstsAllThreads; + Counter maxLoadsAnyThread; + Counter maxLoadsAllThreads; + System *_system; + Tick freq; #else - BaseFullCPU(const std::string &_name, int _number_of_threads, - Counter max_insts_any_thread = 0, - Counter max_insts_all_threads = 0, - Counter max_loads_any_thread = 0, - Counter max_loads_all_threads = 0); + std::string name; + int numberOfThreads; + Counter maxInstsAnyThread; + Counter maxInstsAllThreads; + Counter maxLoadsAnyThread; + Counter maxLoadsAllThreads; +#endif // FULL_SYSTEM + }; + +#ifdef FULL_SYSTEM + BaseFullCPU(Params ¶ms); +#else + BaseFullCPU(Params ¶ms); #endif // FULL_SYSTEM }; @@ -49,7 +65,7 @@ class FullBetaCPU : public BaseFullCPU //Put typedefs from the Impl here. typedef typename Impl::CPUPol CPUPolicy; typedef typename Impl::Params Params; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; public: enum Status { @@ -162,7 +178,7 @@ class FullBetaCPU : public BaseFullCPU /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. */ - void addInst(DynInst *inst); + void addInst(DynInstPtr &inst); /** Function to tell the CPU that an instruction has completed. */ void instDone(); @@ -175,7 +191,7 @@ class FullBetaCPU : public BaseFullCPU * @todo: Remove only up until that inst? Squashed inst is most likely * valid. */ - void removeBackInst(DynInst *inst); + void removeBackInst(DynInstPtr &inst); /** Remove an instruction from the front of the list. It is expected * that there are no instructions in front of it (that is, none are older @@ -184,7 +200,7 @@ class FullBetaCPU : public BaseFullCPU * last instruction once it's verified that commit has the same ordering * as the instruction list. */ - void removeFrontInst(DynInst *inst); + void removeFrontInst(DynInstPtr &inst); /** Remove all instructions that are not currently in the ROB. */ void removeInstsNotInROB(); @@ -198,11 +214,11 @@ class FullBetaCPU : public BaseFullCPU * commit can tell the instruction queue that they have completed. * Eventually this hack should be removed. */ - void wakeDependents(DynInst *inst); + void wakeDependents(DynInstPtr &inst); public: /** List of all the instructions in flight. */ - list instList; + list instList; //not sure these should be private. protected: @@ -255,15 +271,15 @@ class FullBetaCPU : public BaseFullCPU /** Typedefs from the Impl to get the structs that each of the * time buffers should use. */ - typedef typename Impl::TimeStruct TimeStruct; + typedef typename CPUPolicy::TimeStruct TimeStruct; - typedef typename Impl::FetchStruct FetchStruct; + typedef typename CPUPolicy::FetchStruct FetchStruct; - typedef typename Impl::DecodeStruct DecodeStruct; + typedef typename CPUPolicy::DecodeStruct DecodeStruct; - typedef typename Impl::RenameStruct RenameStruct; + typedef typename CPUPolicy::RenameStruct RenameStruct; - typedef typename Impl::IEWStruct IEWStruct; + typedef typename CPUPolicy::IEWStruct IEWStruct; /** The main time buffer to do backwards communication. */ TimeBuffer timeBuffer; diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh index 52b9ccdb0..de408ef0c 100644 --- a/cpu/beta_cpu/iew.hh +++ b/cpu/beta_cpu/iew.hh @@ -1,13 +1,10 @@ -//Todo: Update with statuses. Create constructor. Fix up time buffer stuff. -//Will also need a signal heading back at least one stage to rename to say -//how many empty skid buffer entries there are. Perhaps further back even. +//Todo: Update with statuses. //Need to handle delaying writes to the writeback bus if it's full at the -//given time. Squash properly. Load store queue. +//given time. Load store queue. #ifndef __SIMPLE_IEW_HH__ #define __SIMPLE_IEW_HH__ -// To include: time buffer, structs, queue, #include #include "base/timebuf.hh" @@ -22,16 +19,18 @@ class SimpleIEW private: //Typedefs from Impl typedef typename Impl::ISA ISA; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::FullCPU FullCPU; typedef typename Impl::Params Params; - typedef typename Impl::CPUPol::RenameMap RenameMap; + typedef typename CPUPol::RenameMap RenameMap; + typedef typename CPUPol::LDSTQ LDSTQ; - typedef typename Impl::TimeStruct TimeStruct; - typedef typename Impl::IEWStruct IEWStruct; - typedef typename Impl::RenameStruct RenameStruct; - typedef typename Impl::IssueStruct IssueStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::IssueStruct IssueStruct; public: enum Status { @@ -51,7 +50,7 @@ class SimpleIEW public: void squash(); - void squash(DynInst *inst); + void squash(DynInstPtr &inst); void block(); @@ -70,7 +69,7 @@ class SimpleIEW void setRenameMap(RenameMap *rm_ptr); - void wakeDependents(DynInst *inst); + void wakeDependents(DynInstPtr &inst); void tick(); @@ -111,11 +110,13 @@ class SimpleIEW //Will need internal queue to hold onto instructions coming from //the rename stage in case of a stall. /** Skid buffer between rename and IEW. */ - queue skidBuffer; + std::queue skidBuffer; /** Instruction queue. */ IQ instQueue; + LDSTQ ldstQueue; + /** Pointer to rename map. Might not want this stage to directly * access this though... */ diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh index b198220f5..521ce77f6 100644 --- a/cpu/beta_cpu/iew_impl.hh +++ b/cpu/beta_cpu/iew_impl.hh @@ -3,8 +3,8 @@ // communication happens simultaneously. Might not be that bad really... // it might skew stats a bit though. Issue would otherwise try to issue // instructions that would never be executed if there were a delay; without -// it issue will simply squash. Make this stage block properly. Make this -// stage delay after a squash properly. Update the statuses for each stage. +// it issue will simply squash. Make this stage block properly. +// Update the statuses for each stage. // Actually read instructions out of the skid buffer. #include @@ -15,8 +15,9 @@ template SimpleIEW::SimpleIEW(Params ¶ms) : // Just make this time buffer really big for now - issueToExecQueue(20, 20), + issueToExecQueue(5, 5), instQueue(params), + ldstQueue(params), commitToIEWDelay(params.commitToIEWDelay), renameToIEWDelay(params.renameToIEWDelay), issueToExecuteDelay(params.issueToExecuteDelay), @@ -45,6 +46,7 @@ SimpleIEW::setCPU(FullCPU *cpu_ptr) cpu = cpu_ptr; instQueue.setCPU(cpu_ptr); + ldstQueue.setCPU(cpu_ptr); } template @@ -96,7 +98,7 @@ SimpleIEW::setRenameMap(RenameMap *rm_ptr) template void -SimpleIEW::wakeDependents(DynInst *inst) +SimpleIEW::wakeDependents(DynInstPtr &inst) { instQueue.wakeDependents(inst); } @@ -150,17 +152,15 @@ SimpleIEW::squash() // Tell the IQ to start squashing. instQueue.squash(); - // Tell rename to squash through the time buffer. - // This communication may be redundant depending upon where squash() - // is called. -// toRename->iewInfo.squash = true; + // Tell the LDSTQ to start squashing. + ldstQueue.squash(fromCommit->commitInfo.doneSeqNum); } template void -SimpleIEW::squash(DynInst *inst) +SimpleIEW::squash(DynInstPtr &inst) { - DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC:%#x.\n", + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", inst->PC); // Perhaps leave the squashing up to the ROB stage to tell it when to // squash? @@ -170,8 +170,11 @@ SimpleIEW::squash(DynInst *inst) toRename->iewInfo.squash = true; // Also send PC update information back to prior stages. toRename->iewInfo.squashedSeqNum = inst->seqNum; + toRename->iewInfo.mispredPC = inst->readPC(); toRename->iewInfo.nextPC = inst->readCalcTarg(); - toRename->iewInfo.predIncorrect = true; + toRename->iewInfo.branchMispredict = true; + // Prediction was incorrect, so send back inverse. + toRename->iewInfo.branchTaken = !(inst->predTaken()); } template @@ -229,7 +232,7 @@ SimpleIEW::tick() // If there's still instructions coming from rename, continue to // put them on the skid buffer. - if (fromRename->insts[0] != NULL) { + if (fromRename->insts[0]) { block(); } @@ -244,6 +247,19 @@ SimpleIEW::tick() // Write back number of free IQ entries here. toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); + // Check the committed load/store signals to see if there's a load + // or store to commit. Also check if it's being told to execute a + // nonspeculative instruction. + if (fromCommit->commitInfo.commitIsStore) { + ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum); + } else if (fromCommit->commitInfo.commitIsLoad) { + ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum); + } + + if (fromCommit->commitInfo.nonSpecSeqNum != 0) { + instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum); + } + DPRINTF(IEW, "IEW: IQ has %i free entries.\n", instQueue.numFreeEntries()); } @@ -265,7 +281,7 @@ SimpleIEW::iew() } //////////////////////////////////////// - //ISSUE stage + // DISPATCH/ISSUE stage //////////////////////////////////////// //Put into its own function? @@ -273,16 +289,16 @@ SimpleIEW::iew() // Check if there are any instructions coming from rename, and we're. // not squashing. - if (fromRename->insts[0] != NULL && _status != Squashing) { + if (fromRename->insts[0] && _status != Squashing) { // Loop through the instructions, putting them in the instruction // queue. for (int inst_num = 0; inst_num < issueReadWidth; ++inst_num) { - DynInst *inst = fromRename->insts[inst_num]; + DynInstPtr inst = fromRename->insts[inst_num]; // Make sure there's a valid instruction there. - if (inst == NULL) + if (!inst) break; DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", @@ -294,25 +310,38 @@ SimpleIEW::iew() // Be sure to mark these instructions as ready so that the // commit stage can go ahead and execute them, and mark // them as issued so the IQ doesn't reprocess them. - if (inst->isMemRef()) { + if (inst->isSquashed()) { + continue; + } else if (inst->isLoad()) { DPRINTF(IEW, "IEW: Issue: Memory instruction " - "encountered, skipping.\n"); + "encountered, adding to LDSTQ.\n"); - inst->setIssued(); - inst->setExecuted(); + // Reserve a spot in the load store queue for this + // memory access. + ldstQueue.insertLoad(inst); + + } else if (inst->isStore()) { + ldstQueue.insertStore(inst); + + // A bit of a hack. Set that it can commit so that + // the commit stage will try committing it, and then + // once commit realizes it's a store it will send back + // a signal to this stage to issue and execute that + // store. inst->setCanCommit(); - instQueue.advanceTail(inst); + instQueue.insertNonSpec(inst); continue; } else if (inst->isNonSpeculative()) { DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " "encountered, skipping.\n"); - inst->setIssued(); - inst->setExecuted(); + // Same hack as with stores. inst->setCanCommit(); - instQueue.advanceTail(inst); + // Specificall insert it as nonspeculative. + instQueue.insertNonSpec(inst); + continue; } else if (inst->isNop()) { DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " @@ -355,6 +384,7 @@ SimpleIEW::iew() // @todo: Move to the FU pool used in the current full cpu. int fu_usage = 0; + bool fetch_redirect = false; // Execute/writeback any instructions that are available. for (int inst_num = 0; @@ -365,26 +395,48 @@ SimpleIEW::iew() DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); // Get instruction from issue's queue. - DynInst *inst = fromIssue->insts[inst_num]; + DynInstPtr inst = fromIssue->insts[inst_num]; DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); - inst->setExecuted(); - // Check if the instruction is squashed; if so then skip it // and don't count it towards the FU usage. if (inst->isSquashed()) { DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + toCommit->insts[inst_num] = inst; + continue; } + inst->setExecuted(); + // If an instruction is executed, then count it towards FU usage. ++fu_usage; // Execute instruction. // Note that if the instruction faults, it will be handled // at the commit stage. - inst->execute(); + if (inst->isMemRef()) { + DPRINTF(IEW, "IEW: Execute: Calculating address for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + ldstQueue.executeLoad(inst); + } else if (inst->isStore()) { + ldstQueue.executeStore(); + } else { + panic("IEW: Unexpected memory type!\n"); + } + + } else { + inst->execute(); + } // First check the time slot that this instruction will write // to. If there are free write ports at the time, then go ahead @@ -401,16 +453,34 @@ SimpleIEW::iew() // Check if branch was correct. This check happens after the // instruction is added to the queue because even if the branch // is mispredicted, the branch instruction itself is still valid. - if (inst->mispredicted()) { - DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); - DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", - inst->nextPC); + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + if (!fetch_redirect) { + if (inst->mispredicted()) { + fetch_redirect = true; - // If incorrect, then signal the ROB that it must be squashed. - squash(inst); + DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); - // Not sure it really needs to break. -// break; + // If incorrect, then signal the ROB that it must be squashed. + squash(inst); + } else if (ldstQueue.violation()) { + fetch_redirect = true; + + DynInstPtr violator = ldstQueue.getMemDepViolator(); + + DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + instQueue.violation(inst, violator); + + squash(inst); + // Otherwise check if there was a memory ordering violation. + // If there was, then signal ROB that it must be squashed. Also + // signal IQ that there was a violation. + } } } @@ -422,18 +492,20 @@ SimpleIEW::iew() // Either have IEW have direct access to rename map, or have this as // part of backwards communication. for (int inst_num = 0; inst_num < executeWidth && - toCommit->insts[inst_num] != NULL; inst_num++) + toCommit->insts[inst_num]; inst_num++) { - DynInst *inst = toCommit->insts[inst_num]; + DynInstPtr inst = toCommit->insts[inst_num]; DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n", inst->readPC()); - instQueue.wakeDependents(inst); + if(!inst->isSquashed()) { + instQueue.wakeDependents(inst); - for (int i = 0; i < inst->numDestRegs(); i++) - { - renameMap->markAsReady(inst->renamedDestRegIdx(i)); + for (int i = 0; i < inst->numDestRegs(); i++) + { + renameMap->markAsReady(inst->renamedDestRegIdx(i)); + } } } diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh index 5741bfcf5..a170979cb 100644 --- a/cpu/beta_cpu/inst_queue.hh +++ b/cpu/beta_cpu/inst_queue.hh @@ -2,12 +2,13 @@ #define __INST_QUEUE_HH__ #include +#include #include #include +#include #include "base/timebuf.hh" - -using namespace std; +#include "cpu/inst_seq.hh" //Perhaps have a better separation between the data structure underlying //and the actual algorithm. @@ -24,48 +25,53 @@ using namespace std; * and 96-191 are fp). This remains true even for both logical and * physical register indices. */ -template +template class InstructionQueue { public: //Typedefs from the Impl. typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; - typedef typename Impl::IssueStruct IssueStruct; - typedef typename Impl::TimeStruct TimeStruct; + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; // Typedef of iterator through the list of instructions. Might be // better to untie this from the FullCPU or pass its information to // the stages. - typedef typename list::iterator ListIt; + typedef typename std::list::iterator ListIt; /** - * Class for priority queue entries. Mainly made so that the < operator - * is defined. + * Struct for comparing entries to be added to the priority queue. This + * gives reverse ordering to the instructions in terms of sequence + * numbers: the instructions with smaller sequence numbers (and hence + * are older) will be at the top of the priority queue. */ - struct ReadyEntry { - DynInst *inst; - - ReadyEntry(DynInst *_inst) - : inst(_inst) - { } - - /** Compare(lhs,rhs) checks if rhs is "bigger" than lhs. If so, rhs - * goes higher on the priority queue. The oldest instruction should - * be on the top of the instruction queue, so in this case "bigger" - * has the reverse meaning; the instruction with the lowest - * sequence number is on the top. - */ - bool operator <(const ReadyEntry &rhs) const + struct pqCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const { - if (this->inst->seqNum > rhs.inst->seqNum) - return true; - return false; + return lhs->seqNum > rhs->seqNum; } }; + /** + * Struct for comparing entries to be added to the set. This gives + * standard ordering in terms of sequence numbers. + */ + struct setCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum < rhs->seqNum; + } + }; + + typedef std::priority_queue, pqCompare> + ReadyInstQueue; + InstructionQueue(Params ¶ms); void setCPU(FullCPU *cpu); @@ -78,20 +84,32 @@ class InstructionQueue bool isFull(); - void insert(DynInst *new_inst); + void insert(DynInstPtr &new_inst); - void advanceTail(DynInst *inst); + void insertNonSpec(DynInstPtr &new_inst); + + void advanceTail(DynInstPtr &inst); void scheduleReadyInsts(); - void wakeDependents(DynInst *completed_inst); + void scheduleNonSpec(const InstSeqNum &inst); - void doSquash(); + void wakeDependents(DynInstPtr &completed_inst); + + void violation(DynInstPtr &store, DynInstPtr &faulting_load); void squash(); + void doSquash(); + void stopSquash(); + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + private: /** Debugging function to count how many entries are in the IQ. It does * a linear walk through the instructions, so do not call this function @@ -103,6 +121,11 @@ class InstructionQueue /** Pointer to the CPU. */ FullCPU *cpu; + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit; + /** The queue to the execute stage. Issued instructions will be written * into it. */ @@ -118,26 +141,46 @@ class InstructionQueue Int, Float, Branch, + Memory, + Misc, Squashed, None }; /** List of ready int instructions. Used to keep track of the order in - * which */ - priority_queue readyIntInsts; + * which instructions should issue. + */ + ReadyInstQueue readyIntInsts; /** List of ready floating point instructions. */ - priority_queue readyFloatInsts; + ReadyInstQueue readyFloatInsts; /** List of ready branch instructions. */ - priority_queue readyBranchInsts; + ReadyInstQueue readyBranchInsts; + + /** List of ready memory instructions. */ + ReadyInstQueue readyMemInsts; + + /** List of ready miscellaneous instructions. */ + ReadyInstQueue readyMiscInsts; /** List of squashed instructions (which are still valid and in IQ). * Implemented using a priority queue; the entries must contain both * the IQ index and sequence number of each instruction so that * ordering based on sequence numbers can be used. */ - priority_queue squashedInsts; + ReadyInstQueue squashedInsts; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is necessary to be + * able to search by the sequence number alone. + */ + std::map nonSpecInsts; + + typedef typename std::map::iterator non_spec_it_t; /** Number of free IQ entries left. */ unsigned freeEntries; @@ -158,6 +201,9 @@ class InstructionQueue /** The number of branches that can be issued in one cycle. */ unsigned branchWidth; + /** The number of memory instructions that can be issued in one cycle. */ + unsigned memoryWidth; + /** The total number of instructions that can be issued in one cycle. */ unsigned totalWidth; @@ -183,7 +229,7 @@ class InstructionQueue InstSeqNum squashedSeqNum; /** Iterator that points to the oldest instruction in the IQ. */ - ListIt head; +// ListIt head; /** Iterator that points to the youngest instruction in the IQ. */ ListIt tail; @@ -200,7 +246,7 @@ class InstructionQueue class DependencyEntry { public: - DynInst *inst; + DynInstPtr inst; //Might want to include data about what arch. register the //dependence is waiting on. DependencyEntry *next; @@ -212,9 +258,9 @@ class InstructionQueue //away. So for now it will sit here, within the IQ, until //a better implementation is decided upon. // This function probably shouldn't be within the entry... - void insert(DynInst *new_inst); + void insert(DynInstPtr &new_inst); - void remove(DynInst *inst_to_remove); + void remove(DynInstPtr &inst_to_remove); }; /** Array of linked lists. Each linked list is a list of all the @@ -233,11 +279,12 @@ class InstructionQueue */ vector regScoreboard; - bool addToDependents(DynInst *new_inst); - void insertDependency(DynInst *new_inst); - void createDependency(DynInst *new_inst); + bool addToDependents(DynInstPtr &new_inst); + void insertDependency(DynInstPtr &new_inst); + void createDependency(DynInstPtr &new_inst); + void dumpDependGraph(); - void addIfReady(DynInst *inst); + void addIfReady(DynInstPtr &inst); }; #endif //__INST_QUEUE_HH__ diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh index 6f1f06858..03e3fed33 100644 --- a/cpu/beta_cpu/inst_queue_impl.hh +++ b/cpu/beta_cpu/inst_queue_impl.hh @@ -1,11 +1,8 @@ #ifndef __INST_QUEUE_IMPL_HH__ #define __INST_QUEUE_IMPL_HH__ -// Todo: Fix up consistency errors about back of the ready list being -// the oldest instructions in the queue. When woken up from the dependency -// graph they will be the oldest, but when they are immediately executable -// newer instructions will mistakenly get inserted onto the back. Also -// current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// Todo: +// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake // it; either do in reverse order, or have added instructions put into a // different ready queue that, in scheduleRreadyInsts(), gets put onto the // normal ready queue. This would however give only a one cycle delay, @@ -21,18 +18,21 @@ // Blatant hack to avoid compile warnings. const InstSeqNum MaxInstSeqNum = 0 - 1; -template +template InstructionQueue::InstructionQueue(Params ¶ms) - : numEntries(params.numIQEntries), + : memDepUnit(params), + numEntries(params.numIQEntries), intWidth(params.executeIntWidth), floatWidth(params.executeFloatWidth), + totalWidth(params.issueWidth), numPhysIntRegs(params.numPhysIntRegs), numPhysFloatRegs(params.numPhysFloatRegs), commitToIEWDelay(params.commitToIEWDelay) { // HACK: HARDCODED NUMBER. REMOVE LATER AND ADD TO PARAMETER. - totalWidth = 1; branchWidth = 1; + memoryWidth = 1; + DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth); // Initialize the number of free IQ entries. @@ -66,7 +66,7 @@ InstructionQueue::InstructionQueue(Params ¶ms) } -template +template void InstructionQueue::setCPU(FullCPU *cpu_ptr) { @@ -75,7 +75,7 @@ InstructionQueue::setCPU(FullCPU *cpu_ptr) tail = cpu->instList.begin(); } -template +template void InstructionQueue::setIssueToExecuteQueue( TimeBuffer *i2e_ptr) @@ -84,7 +84,7 @@ InstructionQueue::setIssueToExecuteQueue( issueToExecuteQueue = i2e_ptr; } -template +template void InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) { @@ -96,7 +96,7 @@ InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) // Might want to do something more complex if it knows how many instructions // will be issued this cycle. -template +template bool InstructionQueue::isFull() { @@ -107,16 +107,16 @@ InstructionQueue::isFull() } } -template +template unsigned InstructionQueue::numFreeEntries() { return freeEntries; } -template +template void -InstructionQueue::insert(DynInst *new_inst) +InstructionQueue::insert(DynInstPtr &new_inst) { // Make sure the instruction is valid assert(new_inst); @@ -157,18 +157,78 @@ InstructionQueue::insert(DynInst *new_inst) // register(s). createDependency(new_inst); + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit.insert(new_inst); + } + // If the instruction is ready then add it to the ready list. addIfReady(new_inst); assert(freeEntries == (numEntries - countInsts())); } +template +void +InstructionQueue::insertNonSpec(DynInstPtr &inst) +{ + nonSpecInsts[inst->seqNum] = inst; + + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + // Decrease the number of free entries. + --freeEntries; + + // Look through its source registers (physical regs), and mark any + // dependencies. +// addToDependents(inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (inst->isMemRef()) { + memDepUnit.insert(inst); + } +} + // Slightly hack function to advance the tail iterator in the case that // the IEW stage issues an instruction that is not added to the IQ. This // is needed in case a long chain of such instructions occurs. -template +template void -InstructionQueue::advanceTail(DynInst *inst) +InstructionQueue::advanceTail(DynInstPtr &inst) { // Make sure the instruction is valid assert(inst); @@ -205,10 +265,11 @@ InstructionQueue::advanceTail(DynInst *inst) } // Need to make sure the number of float and integer instructions -// issued does not exceed the total issue bandwidth. Probably should -// have some sort of limit of total number of branches that can be issued -// as well. -template +// issued does not exceed the total issue bandwidth. +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template void InstructionQueue::scheduleReadyInsts() { @@ -218,6 +279,7 @@ InstructionQueue::scheduleReadyInsts() int int_issued = 0; int float_issued = 0; int branch_issued = 0; + int memory_issued = 0; int squashed_issued = 0; int total_issued = 0; @@ -226,6 +288,8 @@ InstructionQueue::scheduleReadyInsts() bool insts_available = !readyBranchInsts.empty() || !readyIntInsts.empty() || !readyFloatInsts.empty() || + !readyMemInsts.empty() || + !readyMiscInsts.empty() || !squashedInsts.empty(); // Note: Requires a globally defined constant. @@ -233,10 +297,12 @@ InstructionQueue::scheduleReadyInsts() InstList list_with_oldest = None; // Temporary values. - DynInst *int_head_inst; - DynInst *float_head_inst; - DynInst *branch_head_inst; - DynInst *squashed_head_inst; + DynInstPtr int_head_inst; + DynInstPtr float_head_inst; + DynInstPtr branch_head_inst; + DynInstPtr mem_head_inst; + DynInstPtr misc_head_inst; + DynInstPtr squashed_head_inst; // Somewhat nasty code to look at all of the lists where issuable // instructions are located, and choose the oldest instruction among @@ -257,7 +323,7 @@ InstructionQueue::scheduleReadyInsts() insts_available = true; - int_head_inst = readyIntInsts.top().inst; + int_head_inst = readyIntInsts.top(); if (int_head_inst->isSquashed()) { readyIntInsts.pop(); @@ -274,7 +340,7 @@ InstructionQueue::scheduleReadyInsts() insts_available = true; - float_head_inst = readyFloatInsts.top().inst; + float_head_inst = readyFloatInsts.top(); if (float_head_inst->isSquashed()) { readyFloatInsts.pop(); @@ -291,7 +357,7 @@ InstructionQueue::scheduleReadyInsts() insts_available = true; - branch_head_inst = readyBranchInsts.top().inst; + branch_head_inst = readyBranchInsts.top(); if (branch_head_inst->isSquashed()) { readyBranchInsts.pop(); @@ -304,11 +370,44 @@ InstructionQueue::scheduleReadyInsts() } + if (!readyMemInsts.empty() && + memory_issued < memoryWidth) { + + insts_available = true; + + mem_head_inst = readyMemInsts.top(); + + if (mem_head_inst->isSquashed()) { + readyMemInsts.pop(); + continue; + } else if (mem_head_inst->seqNum < oldest_inst) { + oldest_inst = mem_head_inst->seqNum; + + list_with_oldest = Memory; + } + } + + if (!readyMiscInsts.empty()) { + + insts_available = true; + + misc_head_inst = readyMiscInsts.top(); + + if (misc_head_inst->isSquashed()) { + readyMiscInsts.pop(); + continue; + } else if (misc_head_inst->seqNum < oldest_inst) { + oldest_inst = misc_head_inst->seqNum; + + list_with_oldest = Misc; + } + } + if (!squashedInsts.empty()) { insts_available = true; - squashed_head_inst = squashedInsts.top().inst; + squashed_head_inst = squashedInsts.top(); if (squashed_head_inst->seqNum < oldest_inst) { list_with_oldest = Squashed; @@ -316,13 +415,14 @@ InstructionQueue::scheduleReadyInsts() } - DynInst *issuing_inst = NULL; + DynInstPtr issuing_inst = NULL; switch (list_with_oldest) { case None: DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing " "inst is %#x.\n", issuing_inst); break; + case Int: issuing_inst = int_head_inst; readyIntInsts.pop(); @@ -330,6 +430,7 @@ InstructionQueue::scheduleReadyInsts() DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n", issuing_inst->readPC()); break; + case Float: issuing_inst = float_head_inst; readyFloatInsts.pop(); @@ -337,6 +438,7 @@ InstructionQueue::scheduleReadyInsts() DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n", issuing_inst->readPC()); break; + case Branch: issuing_inst = branch_head_inst; readyBranchInsts.pop(); @@ -344,6 +446,25 @@ InstructionQueue::scheduleReadyInsts() DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n", issuing_inst->readPC()); break; + + case Memory: + issuing_inst = mem_head_inst; + + memDepUnit.issue(mem_head_inst); + + readyMemInsts.pop(); + ++memory_issued; + DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Misc: + issuing_inst = misc_head_inst; + readyMiscInsts.pop(); + DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n", + issuing_inst->readPC()); + break; + case Squashed: issuing_inst = squashed_head_inst; squashedInsts.pop(); @@ -366,61 +487,32 @@ InstructionQueue::scheduleReadyInsts() } } -template +template void -InstructionQueue::doSquash() +InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) { - // Make sure the squash iterator isn't pointing to nothing. - assert(squashIt != cpu->instList.end()); - // Make sure the squashed sequence number is valid. - assert(squashedSeqNum != 0); + non_spec_it_t inst_it = nonSpecInsts.find(inst); - DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + assert(inst_it != nonSpecInsts.end()); - // Squash any instructions younger than the squashed sequence number - // given. - while ((*squashIt)->seqNum > squashedSeqNum) { - DynInst *squashed_inst = (*squashIt); + // Mark this instruction as ready to issue. + (*inst_it).second->setCanIssue(); - // Only handle the instruction if it actually is in the IQ and - // hasn't already been squashed in the IQ. - if (!squashed_inst->isIssued() && - !squashed_inst->isSquashedInIQ()) { - // Remove the instruction from the dependency list. - int8_t total_src_regs = squashed_inst->numSrcRegs(); + // Now schedule the instruction. + addIfReady((*inst_it).second); - for (int src_reg_idx = 0; - src_reg_idx < total_src_regs; - src_reg_idx++) - { - // Only remove it from the dependency graph if it was - // placed there in the first place. - // HACK: This assumes that instructions woken up from the - // dependency chain aren't informed that a specific src - // register has become ready. This may not always be true - // in the future. - if (!squashed_inst->isReadySrcRegIdx(src_reg_idx)) { - int8_t src_reg = - squashed_inst->renamedSrcRegIdx(src_reg_idx); - dependGraph[src_reg].remove(squashed_inst); - } - } - - // Mark it as squashed within the IQ. - squashed_inst->setSquashedInIQ(); - - ReadyEntry temp(squashed_inst); - - squashedInsts.push(temp); - - DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", - squashed_inst->readPC()); - } - squashIt--; - } + nonSpecInsts.erase(inst_it); } -template +template +void +InstructionQueue::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + memDepUnit.violation(store, faulting_load); +} + +template void InstructionQueue::squash() { @@ -435,9 +527,78 @@ InstructionQueue::squash() // Call doSquash. doSquash(); + + // Also tell the memory dependence unit to squash. + memDepUnit.squash(squashedSeqNum); } -template +template +void +InstructionQueue::doSquash() +{ + // Make sure the squash iterator isn't pointing to nothing. + assert(squashIt != cpu->instList.end()); + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum != 0); + + DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + + // Squash any instructions younger than the squashed sequence number + // given. + while ((*squashIt)->seqNum > squashedSeqNum) { + DynInstPtr squashed_inst = (*squashIt); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (!squashed_inst->isIssued() && + !squashed_inst->isSquashedInIQ()) { + // Remove the instruction from the dependency list. + // Hack for now: These below don't add themselves to the + // dependency list, so don't try to remove them. + if (!squashed_inst->isNonSpeculative() && + !squashed_inst->isStore()) { + int8_t total_src_regs = squashed_inst->numSrcRegs(); + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { + dependGraph[src_reg].remove(squashed_inst); + } + } + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + squashedInsts.push(squashed_inst); + + DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + + if (squashed_inst->isNonSpeculative() || squashed_inst->isStore()) { + nonSpecInsts.erase(squashed_inst->seqNum); + } + + --squashIt; + } +} + +template void InstructionQueue::stopSquash() { @@ -448,36 +609,9 @@ InstructionQueue::stopSquash() squashIt = cpu->instList.end(); } -template -int -InstructionQueue::countInsts() -{ - ListIt count_it = cpu->instList.begin(); - int total_insts = 0; - - while (count_it != tail) { - if (!(*count_it)->isIssued()) { - ++total_insts; - } - - count_it++; - - assert(count_it != cpu->instList.end()); - } - - // Need to count the tail iterator as well. - if (count_it != cpu->instList.end() && - (*count_it) != NULL && - !(*count_it)->isIssued()) { - ++total_insts; - } - - return total_insts; -} - -template +template void -InstructionQueue::wakeDependents(DynInst *completed_inst) +InstructionQueue::wakeDependents(DynInstPtr &completed_inst) { DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); //Look at the physical destination register of the DynInst @@ -487,6 +621,13 @@ InstructionQueue::wakeDependents(DynInst *completed_inst) DependencyEntry *curr; + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. + + if (completed_inst->isMemRef()) { + memDepUnit.wakeDependents(completed_inst); + } + for (int dest_reg_idx = 0; dest_reg_idx < total_dest_regs; dest_reg_idx++) @@ -507,7 +648,7 @@ InstructionQueue::wakeDependents(DynInst *completed_inst) //Maybe abstract this part into a function. //Go through the dependency chain, marking the registers as ready //within the waiting instructions. - while (dependGraph[dest_reg].next != NULL) { + while (dependGraph[dest_reg].next) { curr = dependGraph[dest_reg].next; @@ -537,9 +678,9 @@ InstructionQueue::wakeDependents(DynInst *completed_inst) } } -template +template bool -InstructionQueue::addToDependents(DynInst *new_inst) +InstructionQueue::addToDependents(DynInstPtr &new_inst) { // Loop through the instruction's source registers, adding // them to the dependency list if they are not ready. @@ -558,7 +699,9 @@ InstructionQueue::addToDependents(DynInst *new_inst) // hasn't become ready while the instruction was in flight // between stages. Only if it really isn't ready should // it be added to the dependency graph. - if (regScoreboard[src_reg] == false) { + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " "is being added to the dependency chain.\n", new_inst->readPC(), src_reg); @@ -581,9 +724,9 @@ InstructionQueue::addToDependents(DynInst *new_inst) return return_val; } -template +template void -InstructionQueue::createDependency(DynInst *new_inst) +InstructionQueue::createDependency(DynInstPtr &new_inst) { //Actually nothing really needs to be marked when an //instruction becomes the producer of a register's value, @@ -595,20 +738,32 @@ InstructionQueue::createDependency(DynInst *new_inst) dest_reg_idx < total_dest_regs; dest_reg_idx++) { - int8_t dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); - dependGraph[dest_reg].inst = new_inst; - if (dependGraph[dest_reg].next != NULL) { - panic("Dependency chain is not empty.\n"); + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; } + dependGraph[dest_reg].inst = new_inst; +#if 0 + if (dependGraph[dest_reg].next) { + panic("Dependency chain of dest reg %i is not empty.\n", + dest_reg); + } +#endif + assert(!dependGraph[dest_reg].next); // Mark the scoreboard to say it's not yet ready. regScoreboard[dest_reg] = false; } } -template +template void -InstructionQueue::DependencyEntry::insert(DynInst *new_inst) +InstructionQueue::DependencyEntry::insert(DynInstPtr &new_inst) { //Add this new, dependent instruction at the head of the dependency //chain. @@ -623,9 +778,9 @@ InstructionQueue::DependencyEntry::insert(DynInst *new_inst) this->next = new_entry; } -template +template void -InstructionQueue::DependencyEntry::remove(DynInst *inst_to_remove) +InstructionQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) { DependencyEntry *prev = this; DependencyEntry *curr = this->next; @@ -643,6 +798,8 @@ InstructionQueue::DependencyEntry::remove(DynInst *inst_to_remove) { prev = curr; curr = curr->next; + + assert(curr != NULL); } // Now remove this instruction from the list. @@ -651,34 +808,140 @@ InstructionQueue::DependencyEntry::remove(DynInst *inst_to_remove) delete curr; } -template +template void -InstructionQueue::addIfReady(DynInst *inst) +InstructionQueue::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x consumer: ", i, + curr->inst->readPC()); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x ", curr->inst->readPC()); + } + + cprintf("\n"); + } +} + +template +void +InstructionQueue::addIfReady(DynInstPtr &inst) { //If the instruction now has all of its source registers // available, then add it to the list of ready instructions. if (inst->readyToIssue()) { - ReadyEntry to_add(inst); + //Add the instruction to the proper ready list. - if (inst->isInteger()) { - DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " - "putting it onto the ready list, PC %#x.\n", - inst->readPC()); - readyIntInsts.push(to_add); - } else if (inst->isFloating()) { - DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " - "putting it onto the ready list, PC %#x.\n", - inst->readPC()); - readyFloatInsts.push(to_add); - } else if (inst->isControl()) { + if (inst->isControl()) { + DPRINTF(IQ, "IQ: Branch instruction is ready to issue, " "putting it onto the ready list, PC %#x.\n", inst->readPC()); - readyBranchInsts.push(to_add); + readyBranchInsts.push(inst); + + } else if (inst->isMemRef()) { + + DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n"); + + if (memDepUnit.readyToIssue(inst)) { + DPRINTF(IQ, "IQ: Memory instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyMemInsts.push(inst); + } + + } else if (inst->isInteger()) { + + DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyIntInsts.push(inst); + + } else if (inst->isFloating()) { + + DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyFloatInsts.push(inst); + } else { - panic("IQ: Instruction not an expected type.\n"); + DPRINTF(IQ, "IQ: Miscellaneous instruction is ready to issue, " + "putting it onto the ready list, PC %#x..\n", + inst->readPC()); + + readyMiscInsts.push(inst); } } } +template +int +InstructionQueue::countInsts() +{ + ListIt count_it = cpu->instList.begin(); + int total_insts = 0; + + while (count_it != tail) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } + + ++count_it; + + assert(count_it != cpu->instList.end()); + } + + // Need to count the tail iterator as well. + if (count_it != cpu->instList.end() && + (*count_it) && + !(*count_it)->isIssued()) { + ++total_insts; + } + + return total_insts; +} + +template +void +InstructionQueue::dumpLists() +{ + cprintf("Ready integer list size: %i\n", readyIntInsts.size()); + + cprintf("Ready float list size: %i\n", readyFloatInsts.size()); + + cprintf("Ready branch list size: %i\n", readyBranchInsts.size()); + + cprintf("Ready memory list size: %i\n", readyMemInsts.size()); + + cprintf("Ready misc list size: %i\n", readyMiscInsts.size()); + + cprintf("Squashed list size: %i\n", squashedInsts.size()); + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + non_spec_it_t non_spec_it = nonSpecInsts.begin(); + + cprintf("Non speculative list: "); + + while (non_spec_it != nonSpecInsts.end()) { + cprintf("%#x ", (*non_spec_it).second->readPC()); + ++non_spec_it; + } + + cprintf("\n"); + +} + #endif // __INST_QUEUE_IMPL_HH__ diff --git a/cpu/beta_cpu/mem_dep_unit.cc b/cpu/beta_cpu/mem_dep_unit.cc new file mode 100644 index 000000000..3175997f6 --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit.cc @@ -0,0 +1,9 @@ + +#include "cpu/beta_cpu/alpha_dyn_inst.hh" +#include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/store_set.hh" +#include "cpu/beta_cpu/mem_dep_unit_impl.hh" + +// Force instantation of memory dependency unit using store sets and +// AlphaSimpleImpl. +template MemDepUnit; diff --git a/cpu/beta_cpu/mem_dep_unit.hh b/cpu/beta_cpu/mem_dep_unit.hh new file mode 100644 index 000000000..4821c63b7 --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit.hh @@ -0,0 +1,70 @@ + +#ifndef __MEM_DEP_UNIT_HH__ +#define __MEM_DEP_UNIT_HH__ + +#include +#include + +#include "cpu/inst_seq.hh" + +/** + * Memory dependency unit class. This holds the memory dependence predictor. + * As memory operations are issued to the IQ, they are also issued to this + * unit, which then looks up the prediction as to what they are dependent + * upon. This unit must be checked prior to a memory operation being able + * to issue. Although this is templated, it's somewhat hard to make a generic + * memory dependence unit. This one is mostly for store sets; it will be + * quite limited in what other memory dependence predictions it can also + * utilize. Thus this class should be most likely be rewritten for other + * dependence prediction schemes. + */ +template +class MemDepUnit { + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + typedef typename std::set::iterator sn_it_t; + typedef typename std::map >::iterator + dep_it_t; + + public: + MemDepUnit(Params ¶ms); + + void insert(DynInstPtr &inst); + + bool readyToIssue(DynInstPtr &inst); + + void issue(DynInstPtr &inst); + + void wakeDependents(DynInstPtr &inst); + + void squash(const InstSeqNum &squashed_num); + + void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); + + private: + /** List of instructions that have passed through rename, yet are still + * waiting on a memory dependence to resolve before they can issue. + */ + std::set renamedInsts; + + /** List of instructions that have all their predicted memory dependences + * resolved. They are ready in terms of being free of memory + * dependences; however they may still have to wait on source registers. + */ + std::set readyInsts; + + std::map > dependencies; + + /** The memory dependence predictor. It is accessed upon new + * instructions being added to the IQ, and responds by telling + * this unit what instruction the newly added instruction is dependent + * upon. + */ + MemDepPred depPred; + +}; + +#endif diff --git a/cpu/beta_cpu/mem_dep_unit_impl.hh b/cpu/beta_cpu/mem_dep_unit_impl.hh new file mode 100644 index 000000000..4299acb7a --- /dev/null +++ b/cpu/beta_cpu/mem_dep_unit_impl.hh @@ -0,0 +1,166 @@ + +#include + +#include "cpu/beta_cpu/mem_dep_unit.hh" + +// Hack: dependence predictor sizes are hardcoded. +template +MemDepUnit::MemDepUnit(Params ¶ms) + : depPred(4028, 128) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n"); +} + +template +void +MemDepUnit::insert(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + + InstSeqNum producing_store = depPred.checkInst(inst->readPC()); + + if (producing_store == 0 || + dependencies.find(producing_store) == dependencies.end()) { + readyInsts.insert(inst_seq_num); + } else { + // If it's not already ready, then add it to the renamed + // list and the dependencies. + renamedInsts.insert(inst_seq_num); + + dependencies[producing_store].push_back(inst_seq_num); + } + + if (inst->isStore()) { + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(dependencies.find(inst_seq_num) == dependencies.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + dependencies[inst_seq_num]; + } else if (!inst->isLoad()) { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } +} + +template +bool +MemDepUnit::readyToIssue(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + if (readyInsts.find(inst_seq_num) == readyInsts.end()) { + return false; + } else { + return true; + } +} + +template +void +MemDepUnit::issue(DynInstPtr &inst) +{ + assert(readyInsts.find(inst->seqNum) != readyInsts.end()); + + // Remove the instruction from the ready list. + readyInsts.erase(inst->seqNum); +} + +template +void +MemDepUnit::wakeDependents(DynInstPtr &inst) +{ + // Wake any dependencies. + dep_it_t dep_it = dependencies.find(inst); + + // If there's no entry, then return. Really there should only be + // no entry if the instruction is a load. + if (dep_it == dependencies.end()) { + return; + } + + assert(inst->isStore()); + + for(int i = 0; i < (*dep_it).second.size(); ++i ) { + InstSeqNum woken_inst = (*dep_it).second[i]; + + // Should we have reached instructions that are actually squashed, + // there will be no more useful instructions in this dependency + // list. Break out early. + if (renamedInsts.find(woken_inst) == renamedInsts.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x " + "are squashed, starting at SN %i. Breaking early.\n", + inst->readPC(), woken_inst); + break; + } + + // Remove it from the renamed instructions. + renamedInsts.erase(woken_inst); + + // Add it to the ready list. + readyInsts.insert(woken_inst); + } + + dependencies.erase(dep_it); +} + +template +void +MemDepUnit::squash(const InstSeqNum &squashed_num) +{ + + if (!renamedInsts.empty()) { + sn_it_t renamed_it = renamedInsts.end(); + + --renamed_it; + + // Remove entries from the renamed list as long as we haven't reached + // the end and the entries continue to be younger than the squashed. + while (!renamedInsts.empty() && + (*renamed_it) > squashed_num) + { + renamedInsts.erase(renamed_it--); + } + } + + if (!readyInsts.empty()) { + sn_it_t ready_it = readyInsts.end(); + + --ready_it; + + // Same for the ready list. + while (!readyInsts.empty() && + (*ready_it) > squashed_num) + { + readyInsts.erase(ready_it--); + } + } + + if (!dependencies.empty()) { + dep_it_t dep_it = dependencies.end(); + + --dep_it; + + // Same for the dependencies list. + while (!dependencies.empty() && + (*dep_it).first > squashed_num) + { + dependencies.erase(dep_it--); + } + } + + // Tell the dependency predictor to squash as well. + depPred.squash(squashed_num); +} + +template +void +MemDepUnit::violation(DynInstPtr &store_inst, + DynInstPtr &violating_load) +{ + // Tell the memory dependence unit of the violation. + depPred.violation(violating_load->readPC(), store_inst->readPC()); +} diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh index 21e0ce218..aba897fdc 100644 --- a/cpu/beta_cpu/regfile.hh +++ b/cpu/beta_cpu/regfile.hh @@ -13,11 +13,11 @@ using namespace std; // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, // and should go in the AlphaFullCPU. -template +template class PhysRegFile { //Note that most of the definitions of the IntReg, FloatReg, etc. exist - //within the Impl class and not within this PhysRegFile class. + //within the Impl/ISA class and not within this PhysRegFile class. //Will need some way to allow stuff like swap_palshadow to access the //correct registers. Might require code changes to swap_palshadow and @@ -42,6 +42,8 @@ class PhysRegFile uint64_t readIntReg(PhysRegIndex reg_idx) { + assert(reg_idx < numPhysicalIntRegs); + DPRINTF(IEW, "RegFile: Access to int register %i, has data " "%i\n", int(reg_idx), intRegFile[reg_idx]); return intRegFile[reg_idx]; @@ -52,8 +54,10 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Access to float register %i, has data " - "%f\n", int(reg_idx), (float)floatRegFile[reg_idx].d); + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as single, has " + "data %8.8f\n", int(reg_idx), (float)floatRegFile[reg_idx].d); return (float)floatRegFile[reg_idx].d; } @@ -63,8 +67,10 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Access to float register %i, has data " - "%f\n", int(reg_idx), floatRegFile[reg_idx].d); + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as double, has " + " data %8.8f\n", int(reg_idx), floatRegFile[reg_idx].d); return floatRegFile[reg_idx].d; } @@ -74,14 +80,18 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Access to float register %i, has data " - "%f\n", int(reg_idx), floatRegFile[reg_idx].q); + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Access to float register %i as int, has data " + "%lli\n", int(reg_idx), floatRegFile[reg_idx].q); return floatRegFile[reg_idx].q; } void setIntReg(PhysRegIndex reg_idx, uint64_t val) { + assert(reg_idx < numPhysicalIntRegs); + DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n", int(reg_idx), val); @@ -93,7 +103,9 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Setting float register %i to %f\n", + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n", int(reg_idx), val); floatRegFile[reg_idx].d = (double)val; @@ -104,7 +116,9 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - DPRINTF(IEW, "RegFile: Setting float register %i to %f\n", + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n", int(reg_idx), val); floatRegFile[reg_idx].d = val; @@ -115,6 +129,8 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; + assert(reg_idx < numPhysicalFloatRegs); + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", int(reg_idx), val); @@ -185,7 +201,7 @@ class PhysRegFile unsigned numPhysicalFloatRegs; }; -template +template PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs) : numPhysicalIntRegs(_numPhysicalIntRegs), @@ -203,7 +219,7 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, //Problem: This code doesn't make sense at the RegFile level because it //needs things such as the itb and dtb. Either put it at the CPU level or //the DynInst level. -template +template uint64_t PhysRegFile::readIpr(int idx, Fault &fault) { @@ -319,7 +335,7 @@ PhysRegFile::readIpr(int idx, Fault &fault) int break_ipl = -1; #endif -template +template Fault PhysRegFile::setIpr(int idx, uint64_t val) { diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh index cd66ce686..9f031012a 100644 --- a/cpu/beta_cpu/rename.hh +++ b/cpu/beta_cpu/rename.hh @@ -1,25 +1,14 @@ // Todo: -// Figure out rename map for reg vs fp (probably just have one rename map). -// In simple case, there is no renaming, so have this stage do basically -// nothing. -// Fix up trap and barrier handling. Fix up squashing too, as it's too -// dependent upon the iew stage continually telling it to squash. -// Have commit send back information whenever a branch has committed. This -// way the history buffer can be cleared beyond the point where the branch -// was. +// Fix up trap and barrier handling. +// May want to have different statuses to differentiate the different stall +// conditions. #ifndef __SIMPLE_RENAME_HH__ #define __SIMPLE_RENAME_HH__ -//Will want to include: time buffer, structs, free list, rename map #include #include "base/timebuf.hh" -#include "cpu/beta_cpu/comm.hh" -#include "cpu/beta_cpu/rename_map.hh" -#include "cpu/beta_cpu/free_list.hh" - -using namespace std; // Will need rename maps for both the int reg file and fp reg file. // Or change rename map class to handle both. (RegFile handles both.) @@ -30,14 +19,14 @@ class SimpleRename // Typedefs from the Impl. typedef typename Impl::ISA ISA; typedef typename Impl::CPUPol CPUPol; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::FullCPU FullCPU; typedef typename Impl::Params Params; - typedef typename Impl::FetchStruct FetchStruct; - typedef typename Impl::DecodeStruct DecodeStruct; - typedef typename Impl::RenameStruct RenameStruct; - typedef typename Impl::TimeStruct TimeStruct; + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::TimeStruct TimeStruct; // Typedefs from the CPUPol typedef typename CPUPol::FreeList FreeList; @@ -94,6 +83,14 @@ class SimpleRename void removeFromHistory(InstSeqNum inst_seq_num); + inline void renameSrcRegs(DynInstPtr &inst); + + inline void renameDestRegs(DynInstPtr &inst); + + inline int calcFreeROBEntries(); + + inline int calcFreeIQEntries(); + /** Holds the previous information for each rename. * Note that often times the inst may have been deleted, so only access * the pointer for the address and do not dereference it. @@ -123,7 +120,7 @@ class SimpleRename bool placeHolder; }; - list historyBuffer; + std::list historyBuffer; /** CPU interface. */ FullCPU *cpu; @@ -155,7 +152,7 @@ class SimpleRename typename TimeBuffer::wire fromDecode; /** Skid buffer between rename and decode. */ - queue skidBuffer; + std::queue skidBuffer; /** Rename map interface. */ SimpleRenameMap *renameMap; @@ -179,6 +176,12 @@ class SimpleRename * instructions might have freed registers in the previous cycle. */ unsigned commitWidth; + + /** The instruction that rename is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; }; #endif // __SIMPLE_RENAME_HH__ diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh index 2b60c2f50..47464d961 100644 --- a/cpu/beta_cpu/rename_impl.hh +++ b/cpu/beta_cpu/rename_impl.hh @@ -2,18 +2,19 @@ #include "cpu/beta_cpu/rename.hh" -template +template SimpleRename::SimpleRename(Params ¶ms) : iewToRenameDelay(params.iewToRenameDelay), decodeToRenameDelay(params.decodeToRenameDelay), commitToRenameDelay(params.commitToRenameDelay), renameWidth(params.renameWidth), - commitWidth(params.commitWidth) + commitWidth(params.commitWidth), + numInst(0) { _status = Idle; } -template +template void SimpleRename::setCPU(FullCPU *cpu_ptr) { @@ -21,7 +22,7 @@ SimpleRename::setCPU(FullCPU *cpu_ptr) cpu = cpu_ptr; } -template +template void SimpleRename::setTimeBuffer(TimeBuffer *tb_ptr) { @@ -38,7 +39,7 @@ SimpleRename::setTimeBuffer(TimeBuffer *tb_ptr) toDecode = timeBuffer->getWire(0); } -template +template void SimpleRename::setRenameQueue(TimeBuffer *rq_ptr) { @@ -49,7 +50,7 @@ SimpleRename::setRenameQueue(TimeBuffer *rq_ptr) toIEW = renameQueue->getWire(0); } -template +template void SimpleRename::setDecodeQueue(TimeBuffer *dq_ptr) { @@ -61,7 +62,7 @@ SimpleRename::setDecodeQueue(TimeBuffer *dq_ptr) } -template +template void SimpleRename::setRenameMap(RenameMap *rm_ptr) { @@ -69,7 +70,7 @@ SimpleRename::setRenameMap(RenameMap *rm_ptr) renameMap = rm_ptr; } -template +template void SimpleRename::setFreeList(FreeList *fl_ptr) { @@ -77,7 +78,7 @@ SimpleRename::setFreeList(FreeList *fl_ptr) freeList = fl_ptr; } -template +template void SimpleRename::dumpHistory() { @@ -93,7 +94,7 @@ SimpleRename::dumpHistory() } } -template +template void SimpleRename::block() { @@ -110,12 +111,12 @@ SimpleRename::block() // the previous stages are expected to check all possible stall signals. } -template +template inline void SimpleRename::unblock() { - DPRINTF(Rename, "Rename: Reading instructions out of skid " - "buffer.\n"); + DPRINTF(Rename, "Rename: Read instructions out of skid buffer this " + "cycle.\n"); // Remove the now processed instructions from the skid buffer. skidBuffer.pop(); @@ -130,12 +131,12 @@ SimpleRename::unblock() } } -template +template void SimpleRename::doSquash() { typename list::iterator hb_it = historyBuffer.begin(); - typename list::iterator delete_it; +// typename list::iterator delete_it; InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; @@ -166,15 +167,17 @@ SimpleRename::doSquash() freeList->addReg(hb_it->newPhysReg); } - delete_it = hb_it; +// delete_it = hb_it; - hb_it++; +// hb_it++; - historyBuffer.erase(delete_it); + historyBuffer.erase(hb_it++); + + assert(hb_it != historyBuffer.end()); } } -template +template void SimpleRename::squash() { @@ -182,6 +185,8 @@ SimpleRename::squash() // Set the status to Squashing. _status = Squashing; + numInst = 0; + // Clear the skid buffer in case it has any data in it. while (!skidBuffer.empty()) { @@ -199,10 +204,10 @@ void SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) { DPRINTF(Rename, "Rename: Removing a committed instruction from the " - "history buffer, sequence number %lli.\n", inst_seq_num); + "history buffer, until sequence number %lli.\n", inst_seq_num); typename list::iterator hb_it = historyBuffer.end(); - hb_it--; + --hb_it; if (hb_it->instSeqNum > inst_seq_num) { DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure " @@ -210,7 +215,7 @@ SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) return; } - for ( ; hb_it->instSeqNum != inst_seq_num; hb_it--) + while ((*hb_it).instSeqNum != inst_seq_num) { // Make sure we haven't gone off the end of the list. assert(hb_it != historyBuffer.end()); @@ -222,10 +227,19 @@ SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) // be the last instruction in the list, as it is the instruction // that was just committed that is being removed. assert(hb_it->instSeqNum < inst_seq_num); - DPRINTF(Rename, "Rename: Committed instruction is not the last " - "entry in the history buffer.\n"); + DPRINTF(Rename, "Rename: Freeing up older rename of reg %i, sequence" + " number %i.\n", + (*hb_it).prevPhysReg, (*hb_it).instSeqNum); + + if (!(*hb_it).placeHolder) { + freeList->addReg((*hb_it).prevPhysReg); + } + + historyBuffer.erase(hb_it--); } + // Finally free up the previous register of the squashed instruction + // itself. if (!(*hb_it).placeHolder) { freeList->addReg(hb_it->prevPhysReg); } @@ -234,6 +248,113 @@ SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) } +template +inline void +SimpleRename::renameSrcRegs(DynInstPtr &inst) +{ + unsigned num_src_regs = inst->numSrcRegs(); + + // Get the architectual register numbers from the source and + // destination operands, and redirect them to the right register. + // Will need to mark dependencies though. + for (int src_idx = 0; src_idx < num_src_regs; src_idx++) + { + RegIndex src_reg = inst->srcRegIdx(src_idx); + + // Look up the source registers to get the phys. register they've + // been renamed to, and set the sources to those registers. + RegIndex renamed_reg = renameMap->lookup(src_reg); + + DPRINTF(Rename, "Rename: Looking up arch reg %i, got " + "physical reg %i.\n", (int)src_reg, (int)renamed_reg); + + inst->renameSrcReg(src_idx, renamed_reg); + + // Either incorporate it into the info passed back, + // or make another function call to see if that register is + // ready or not. + if (renameMap->isReady(renamed_reg)) { + DPRINTF(Rename, "Rename: Register is ready.\n"); + + inst->markSrcRegReady(src_idx); + } + } +} + +template +inline void +SimpleRename::renameDestRegs(DynInstPtr &inst) +{ + typename SimpleRenameMap::RenameInfo rename_result; + + unsigned num_dest_regs = inst->numDestRegs(); + + // Rename the destination registers. + for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) + { + RegIndex dest_reg = inst->destRegIdx(dest_idx); + + // Get the physical register that the destination will be + // renamed to. + rename_result = renameMap->rename(dest_reg); + + DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " + "reg %i.\n", (int)dest_reg, + (int)rename_result.first); + + // Record the rename information so that a history can be kept. + RenameHistory hb_entry(inst->seqNum, dest_reg, + rename_result.first, + rename_result.second); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding instruction to history buffer, " + "sequence number %lli.\n", + (*historyBuffer.begin()).instSeqNum); + + // Tell the instruction to rename the appropriate destination + // register (dest_idx) to the new physical register + // (rename_result.first), and record the previous physical + // register that the same logical register was renamed to + // (rename_result.second). + inst->renameDestReg(dest_idx, + rename_result.first, + rename_result.second); + } + + // If it's an instruction with no destination registers, then put + // a placeholder within the history buffer. It might be better + // to not put it in the history buffer at all (other than branches, + // which always need at least a place holder), and differentiate + // between instructions with and without destination registers + // when getting from commit the instructions that committed. + if (num_dest_regs == 0) { + RenameHistory hb_entry(inst->seqNum); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding placeholder instruction to " + "history buffer, sequence number %lli.\n", + inst->seqNum); + } +} + +template +inline int +SimpleRename::calcFreeROBEntries() +{ + return fromCommit->commitInfo.freeROBEntries - + renameWidth * iewToRenameDelay; +} + +template +inline int +SimpleRename::calcFreeIQEntries() +{ + return fromIEW->iewInfo.freeIQEntries - renameWidth * iewToRenameDelay; +} + template void SimpleRename::tick() @@ -258,12 +379,18 @@ SimpleRename::tick() // buffer were used. Remove those instructions and handle // the rest of unblocking. if (_status == Unblocking) { + if (fromDecode->size > 0) { + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + } + unblock(); } } else if (_status == Blocked) { // If stage is blocked and still receiving valid instructions, // make sure to store them in the skid buffer. - if (fromDecode->insts[0] != NULL) { + if (fromDecode->size > 0) { block(); @@ -273,8 +400,9 @@ SimpleRename::tick() if (!fromIEW->iewInfo.stall && !fromCommit->commitInfo.stall && - fromCommit->commitInfo.freeROBEntries != 0 && - fromIEW->iewInfo.freeIQEntries != 0) { + calcFreeROBEntries() > 0 && + calcFreeIQEntries() > 0 && + renameMap->numFreeEntries() > 0) { // Need to be sure to check all blocking conditions above. // If they have cleared, then start unblocking. @@ -344,6 +472,7 @@ SimpleRename::rename() // the rename map and the free list. if (fromCommit->commitInfo.squash || fromCommit->commitInfo.robSquashing) { + DPRINTF(Rename, "Rename: Receiving signal from Commit to squash.\n"); squash(); return; } @@ -368,37 +497,38 @@ SimpleRename::rename() // Check the decode queue to see if instructions are available. // If there are no available instructions to rename, then do nothing. // Or, if the stage is currently unblocking, then go ahead and run it. - if (fromDecode->insts[0] == NULL && _status != Unblocking) { + if (fromDecode->size == 0 && _status != Unblocking) { DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n"); // Should I change status to idle? return; } - DynInst *inst; - unsigned num_inst = 0; + //////////////////////////////////// + // Actual rename part. + //////////////////////////////////// - bool insts_available = _status == Unblocking ? - skidBuffer.front().insts[num_inst] != NULL : - fromDecode->insts[num_inst] != NULL; + DynInstPtr inst; - typename SimpleRenameMap::RenameInfo rename_result; + // If we're unblocking, then we may be in the middle of an instruction + // group. Subtract off numInst to get the proper number of instructions + // left. + int insts_available = _status == Unblocking ? + skidBuffer.front().size - numInst : + fromDecode->size; - unsigned num_src_regs; - unsigned num_dest_regs; + bool block_this_cycle = false; // Will have to do a different calculation for the number of free // entries. Number of free entries recorded on this cycle - // renameWidth * renameToDecodeDelay - // Can I avoid a multiply? - unsigned free_rob_entries = - fromCommit->commitInfo.freeROBEntries - iewToRenameDelay; - DPRINTF(Rename, "Rename: ROB has %d free entries.\n", - free_rob_entries); - unsigned free_iq_entries = - fromIEW->iewInfo.freeIQEntries - iewToRenameDelay; + int free_rob_entries = calcFreeROBEntries(); + int free_iq_entries = calcFreeIQEntries(); + int min_iq_rob = min(free_rob_entries, free_iq_entries); + + unsigned to_iew_index = 0; // Check if there's any space left. - if (free_rob_entries == 0 || free_iq_entries == 0) { + if (min_iq_rob <= 0) { DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ " "entries.\n" "Rename: ROB has %d free entries.\n" @@ -410,22 +540,40 @@ SimpleRename::rename() toDecode->renameInfo.stall = true; return; + } else if (min_iq_rob < insts_available) { + DPRINTF(Rename, "Rename: Will have to block this cycle. Only " + "%i insts can be renamed due to IQ/ROB limits.\n", + min_iq_rob); + + insts_available = min_iq_rob; + + block_this_cycle = true; } - unsigned min_iq_rob = min(free_rob_entries, free_iq_entries); - unsigned num_insts_to_rename = min(min_iq_rob, renameWidth); - - while (insts_available && - num_inst < num_insts_to_rename) { + while (insts_available > 0) { DPRINTF(Rename, "Rename: Sending instructions to iew.\n"); // Get the next instruction either from the skid buffer or the // decode queue. - inst = _status == Unblocking ? skidBuffer.front().insts[num_inst] : - fromDecode->insts[num_inst]; + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromDecode->insts[numInst]; + + if (inst->isSquashed()) { + DPRINTF(Rename, "Rename: instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + // Go to the next instruction. + ++numInst; + + // Decrement how many instructions are available. + --insts_available; + + continue; + } DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n", - inst, inst->readPC()); + inst->seqNum, inst->readPC()); // If it's a trap instruction, then it needs to wait here within // rename until the ROB is empty. Needs a way to detect that the @@ -438,156 +586,59 @@ SimpleRename::rename() panic("Rename: Serializing instruction encountered.\n"); DPRINTF(Rename, "Rename: Serializing instruction " "encountered.\n"); - block(); // Change status over to BarrierStall so that other stages know // what this is blocked on. _status = BarrierStall; - // Tell the previous stage to stall. - toDecode->renameInfo.stall = true; + block_this_cycle = true; break; } - // Make sure there's enough room in the ROB and the IQ. - // This doesn't really need to be done dynamically; consider - // moving outside of this function. - if (free_rob_entries == 0 || free_iq_entries == 0) { - DPRINTF(Rename, "Rename: Blocking due to lack of ROB or IQ " - "entries.\n"); - // Call some sort of function to handle all the setup of being - // blocked. - block(); - - // Not really sure how to schedule an event properly, but an - // event must be scheduled such that upon freeing a ROB entry, - // this stage will restart up. Perhaps add in a ptr to an Event - // within the ROB that will be able to execute that Event - // if a free register is added to the freelist. - - // Tell the previous stage to stall. - toDecode->renameInfo.stall = true; - - break; - } - - // Temporary variables to hold number of source and destination regs. - num_src_regs = inst->numSrcRegs(); - num_dest_regs = inst->numDestRegs(); - // Check here to make sure there are enough destination registers // to rename to. Otherwise block. - if (renameMap->numFreeEntries() < num_dest_regs) + if (renameMap->numFreeEntries() < inst->numDestRegs()) { DPRINTF(Rename, "Rename: Blocking due to lack of free " "physical registers to rename to.\n"); - // Call function to handle blocking. - block(); - // Need some sort of event based on a register being freed. - // Tell the previous stage to stall. - toDecode->renameInfo.stall = true; + block_this_cycle = true; - // Break out of rename loop. break; } - // Get the architectual register numbers from the source and - // destination operands, and redirect them to the right register. - // Will need to mark dependencies though. - for (int src_idx = 0; src_idx < num_src_regs; src_idx++) - { - RegIndex src_reg = inst->srcRegIdx(src_idx); + renameSrcRegs(inst); - // Look up the source registers to get the phys. register they've - // been renamed to, and set the sources to those registers. - RegIndex renamed_reg = renameMap->lookup(src_reg); - - DPRINTF(Rename, "Rename: Looking up arch reg %i, got " - "physical reg %i.\n", (int)src_reg, (int)renamed_reg); - - inst->renameSrcReg(src_idx, renamed_reg); - - // Either incorporate it into the info passed back, - // or make another function call to see if that register is - // ready or not. - if (renameMap->isReady(renamed_reg)) { - DPRINTF(Rename, "Rename: Register is ready.\n"); - - inst->markSrcRegReady(src_idx); - } - } - - // Rename the destination registers. - for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) - { - RegIndex dest_reg = inst->destRegIdx(dest_idx); - - // Get the physical register that the destination will be - // renamed to. - rename_result = renameMap->rename(dest_reg); - - DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " - "register %i.\n", (int)dest_reg, - (int)rename_result.first); - - // Record the rename information so that a history can be kept. - RenameHistory hb_entry(inst->seqNum, dest_reg, - rename_result.first, - rename_result.second); - - historyBuffer.push_front(hb_entry); - - DPRINTF(Rename, "Rename: Adding instruction to history buffer, " - "sequence number %lli.\n", inst->seqNum); - - // Tell the instruction to rename the appropriate destination - // register (dest_idx) to the new physical register - // (rename_result.first), and record the previous physical - // register that the same logical register was renamed to - // (rename_result.second). - inst->renameDestReg(dest_idx, - rename_result.first, - rename_result.second); - } - - // If it's an instruction with no destination registers, then put - // a placeholder within the history buffer. It might be better - // to not put it in the history buffer at all (other than branches, - // which always need at least a place holder), and differentiate - // between instructions with and without destination registers - // when getting from commit the instructions that committed. - if (num_dest_regs == 0) { - RenameHistory hb_entry(inst->seqNum); - - historyBuffer.push_front(hb_entry); - - DPRINTF(Rename, "Rename: Adding placeholder instruction to " - "history buffer, sequence number %lli.\n", - inst->seqNum); - } + renameDestRegs(inst); // Put instruction in rename queue. - toIEW->insts[num_inst] = inst; + toIEW->insts[to_iew_index] = inst; + ++(toIEW->size); // Decrease the number of free ROB and IQ entries. --free_rob_entries; --free_iq_entries; // Increment which instruction we're on. - ++num_inst; + ++to_iew_index; + ++numInst; - // Check whether or not there are instructions available. - // Either need to check within the skid buffer, or the decode - // queue, depending if this stage is unblocking or not. - // Hmm, dangerous check. Can touch memory not allocated. Might - // be better to just do check at beginning of loop. Or better - // yet actually pass the number of instructions issued. - insts_available = _status == Unblocking ? - skidBuffer.front().insts[num_inst] != NULL : - fromDecode->insts[num_inst] != NULL; + // Decrement how many instructions are available. + --insts_available; } + // Check if there's any instructions left that haven't yet been renamed. + // If so then block. + if (block_this_cycle) { + block(); + + toDecode->renameInfo.stall = true; + } else { + // If we had a successful rename and didn't have to exit early, then + // reset numInst so it will refer to the correct instruction on next + // run. + numInst = 0; + } } diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc index c234182f0..cb9720d28 100644 --- a/cpu/beta_cpu/rename_map.cc +++ b/cpu/beta_cpu/rename_map.cc @@ -3,12 +3,10 @@ // Todo: Consider making functions inline. Avoid having things that are // using the zero register or misc registers from adding on the registers -// to the free list. - -SimpleRenameMap::RenameEntry::RenameEntry() - : physical_reg(0), valid(false) -{ -} +// to the free list. Possibly remove the direct communication between +// this and the freelist. Considering making inline bool functions that +// determine if the register is a logical int, logical fp, physical int, +// physical fp, etc. SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, unsigned _numPhysicalIntRegs, @@ -35,11 +33,12 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, //Create the rename maps, and their scoreboards. intRenameMap = new RenameEntry[numLogicalIntRegs]; - floatRenameMap = new RenameEntry[numLogicalFloatRegs]; + floatRenameMap = new RenameEntry[numLogicalRegs]; + // Should combine this into one scoreboard. intScoreboard.resize(numPhysicalIntRegs); - floatScoreboard.resize(numPhysicalFloatRegs); - miscScoreboard.resize(numMiscRegs); + floatScoreboard.resize(numPhysicalRegs); + miscScoreboard.resize(numPhysicalRegs + numMiscRegs); // Initialize the entries in the integer rename map to point to the // physical registers of the same index, and consider each register @@ -59,31 +58,50 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, intScoreboard[index] = 0; } + int float_reg_idx = numPhysicalIntRegs; + // Initialize the entries in the floating point rename map to point to // the physical registers of the same index, and consider each register // ready until the first rename occurs. - for (RegIndex index = 0; index < numLogicalFloatRegs; ++index) + // Although the index refers purely to architected registers, because + // the floating reg indices come after the integer reg indices, they + // may exceed the size of a normal RegIndex (short). + for (PhysRegIndex index = numLogicalIntRegs; + index < numLogicalRegs; ++index) + { + floatRenameMap[index].physical_reg = float_reg_idx++; + } + + for (RegIndex index = numPhysicalIntRegs; + index < numPhysicalIntRegs + numLogicalFloatRegs; ++index) { - floatRenameMap[index].physical_reg = index + numPhysicalIntRegs; floatScoreboard[index] = 1; } // Initialize the rest of the physical registers (the ones that don't // directly map to a logical register) as unready. - for (PhysRegIndex index = numLogicalFloatRegs; - index < numPhysicalFloatRegs; + for (PhysRegIndex index = numPhysicalIntRegs + numLogicalFloatRegs; + index < numPhysicalRegs; ++index) { floatScoreboard[index] = 0; } // Initialize the entries in the misc register scoreboard to be ready. - for (RegIndex index = 0; index < numMiscRegs; ++index) + for (RegIndex index = numPhysicalRegs; + index < numPhysicalRegs + numMiscRegs; ++index) { miscScoreboard[index] = 1; } } +SimpleRenameMap::~SimpleRenameMap() +{ + // Delete the rename maps as they were allocated with new. + delete [] intRenameMap; + delete [] floatRenameMap; +} + void SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) { @@ -116,6 +134,8 @@ SimpleRenameMap::rename(RegIndex arch_reg) // Update the integer rename map. intRenameMap[arch_reg].physical_reg = renamed_reg; + assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs); + // Mark register as not ready. intScoreboard[renamed_reg] = false; } else { @@ -124,7 +144,7 @@ SimpleRenameMap::rename(RegIndex arch_reg) } } else if (arch_reg < numLogicalRegs) { // Subtract off the base offset for floating point registers. - arch_reg = arch_reg - numLogicalIntRegs; +// arch_reg = arch_reg - numLogicalIntRegs; // Record the current physical register that is renamed to the // requested architected register. @@ -139,6 +159,9 @@ SimpleRenameMap::rename(RegIndex arch_reg) // Update the floating point rename map. floatRenameMap[arch_reg].physical_reg = renamed_reg; + assert(renamed_reg < numPhysicalRegs && + renamed_reg >= numPhysicalIntRegs); + // Mark register as not ready. floatScoreboard[renamed_reg] = false; } else { @@ -160,6 +183,8 @@ SimpleRenameMap::rename(RegIndex arch_reg) // so the free list can avoid adding it. prev_reg = renamed_reg; + assert(renamed_reg < numPhysicalRegs + numMiscRegs); + miscScoreboard[renamed_reg] = false; } @@ -175,7 +200,7 @@ SimpleRenameMap::lookup(RegIndex arch_reg) return intRenameMap[arch_reg].physical_reg; } else if (arch_reg < numLogicalRegs) { // Subtract off the base FP offset. - arch_reg = arch_reg - numLogicalIntRegs; +// arch_reg = arch_reg - numLogicalIntRegs; return floatRenameMap[arch_reg].physical_reg; } else { @@ -196,12 +221,12 @@ SimpleRenameMap::isReady(PhysRegIndex phys_reg) } else if (phys_reg < numPhysicalRegs) { // Subtract off the base FP offset. - phys_reg = phys_reg - numPhysicalIntRegs; +// phys_reg = phys_reg - numPhysicalIntRegs; return floatScoreboard[phys_reg]; } else { // Subtract off the misc registers offset. - phys_reg = phys_reg - numPhysicalRegs; +// phys_reg = phys_reg - numPhysicalRegs; return miscScoreboard[phys_reg]; } @@ -218,13 +243,10 @@ SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) intRenameMap[arch_reg].physical_reg = renamed_reg; } else { -// assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); - - // Subtract off the base FP offset. - arch_reg = arch_reg - numLogicalIntRegs; + assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", - (int)arch_reg, renamed_reg); + (int)arch_reg - numLogicalIntRegs, renamed_reg); floatRenameMap[arch_reg].physical_reg = renamed_reg; } @@ -234,6 +256,8 @@ void SimpleRenameMap::squash(vector freed_regs, vector unmaps) { + panic("Not sure this function should be called."); + // Not sure the rename map should be able to access the free list // like this. while (!freed_regs.empty()) { @@ -260,16 +284,18 @@ SimpleRenameMap::markAsReady(PhysRegIndex ready_reg) (int)ready_reg); if (ready_reg < numPhysicalIntRegs) { + assert(ready_reg >= 0); + intScoreboard[ready_reg] = 1; } else if (ready_reg < numPhysicalRegs) { // Subtract off the base FP offset. - ready_reg = ready_reg - numPhysicalIntRegs; +// ready_reg = ready_reg - numPhysicalIntRegs; floatScoreboard[ready_reg] = 1; } else { //Subtract off the misc registers offset. - ready_reg = ready_reg - numPhysicalRegs; +// ready_reg = ready_reg - numPhysicalRegs; miscScoreboard[ready_reg] = 1; } diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh index 05b52bfb2..e68fa05a8 100644 --- a/cpu/beta_cpu/rename_map.hh +++ b/cpu/beta_cpu/rename_map.hh @@ -1,6 +1,5 @@ // Todo: Create destructor. -// Make it so that there's a proper separation between int and fp. Also -// have it so that there's a more meaningful name given to the variable +// Have it so that there's a more meaningful name given to the variable // that marks the beginning of the FP registers. #ifndef __RENAME_MAP_HH__ @@ -10,7 +9,6 @@ #include #include -//Will want to include faults #include "cpu/beta_cpu/free_list.hh" using namespace std; @@ -18,8 +16,6 @@ using namespace std; class SimpleRenameMap { public: -// typedef typename Impl::RegIndex RegIndex; - /** * Pair of a logical register and a physical register. Tells the * previous mapping of a logical register to a physical register. @@ -45,6 +41,9 @@ class SimpleRenameMap RegIndex _intZeroReg, RegIndex _floatZeroReg); + /** Destructor. */ + ~SimpleRenameMap(); + void setFreeList(SimpleFreeList *fl_ptr); //Tell rename map to get a free physical register for a given @@ -110,7 +109,9 @@ class SimpleRenameMap PhysRegIndex physical_reg; bool valid; - RenameEntry(); + RenameEntry() + : physical_reg(0), valid(false) + { } }; /** Integer rename map. */ @@ -122,6 +123,8 @@ class SimpleRenameMap /** Free list interface. */ SimpleFreeList *freeList; + // Might want to make all these scoreboards into one large scoreboard. + /** Scoreboard of physical integer registers, saying whether or not they * are ready. */ diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh index 7963d1b01..c921c0619 100644 --- a/cpu/beta_cpu/rob.hh +++ b/cpu/beta_cpu/rob.hh @@ -16,24 +16,20 @@ using namespace std; /** * ROB class. Uses the instruction list that exists within the CPU to - * represent the ROB. This class doesn't contain that structure, but instead - * a pointer to the CPU to get access to the structure. The ROB has a large - * hand in squashing instructions within the CPU, and is responsible for - * sending out the squash signal as well as what instruction is to be - * squashed. The ROB also controls most of the calls to the CPU to delete - * instructions; the only other call is made in the first stage of the pipe- - * line, which tells the CPU to delete all instructions not in the ROB. + * represent the ROB. This class doesn't contain that list, but instead + * a pointer to the CPU to get access to the list. The ROB, in this first + * implementation, is largely what drives squashing. */ -template +template class ROB { public: //Typedefs from the Impl. typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; - typedef pair UnmapInfo; - typedef typename list::iterator InstIt; + typedef pair UnmapInfo_t; + typedef typename list::iterator InstIt_t; public: /** ROB constructor. @@ -56,15 +52,15 @@ class ROB * @params inst The instruction being inserted into the ROB. * @todo Remove the parameter once correctness is ensured. */ - void insertInst(DynInst *inst); + void insertInst(DynInstPtr &inst); /** Returns pointer to the head instruction within the ROB. There is * no guarantee as to the return value if the ROB is empty. * @retval Pointer to the DynInst that is at the head of the ROB. */ - DynInst *readHeadInst() { return cpu->instList.front(); } + DynInstPtr readHeadInst() { return cpu->instList.front(); } - DynInst *readTailInst() { return (*tail); } + DynInstPtr readTailInst() { return (*tail); } void retireHead(); @@ -108,15 +104,28 @@ class ROB /** Pointer to the CPU. */ FullCPU *cpu; + /** Number of instructions in the ROB. */ unsigned numEntries; /** Number of instructions that can be squashed in a single cycle. */ unsigned squashWidth; - InstIt tail; + /** Iterator pointing to the instruction which is the last instruction + * in the ROB. This may at times be invalid (ie when the ROB is empty), + * however it should never be incorrect. + */ + InstIt_t tail; - InstIt squashIt; + /** Iterator used for walking through the list of instructions when + * squashing. Used so that there is persistent state between cycles; + * when squashing, the instructions are marked as squashed but not + * immediately removed, meaning the tail iterator remains the same before + * and after a squash. + * This will always be set to cpu->instList.end() if it is invalid. + */ + InstIt_t squashIt; + /** Number of instructions in the ROB. */ int numInstsInROB; /** The sequence number of the squashed instruction. */ diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh index 308a8010f..862008429 100644 --- a/cpu/beta_cpu/rob_impl.hh +++ b/cpu/beta_cpu/rob_impl.hh @@ -3,7 +3,7 @@ #include "cpu/beta_cpu/rob.hh" -template +template ROB::ROB(unsigned _numEntries, unsigned _squashWidth) : numEntries(_numEntries), squashWidth(_squashWidth), @@ -13,43 +13,60 @@ ROB::ROB(unsigned _numEntries, unsigned _squashWidth) doneSquashing = true; } -template +template void ROB::setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; + // Set the tail to the beginning of the CPU instruction list so that + // upon the first instruction being inserted into the ROB, the tail + // iterator can simply be incremented. tail = cpu->instList.begin(); + // Set the squash iterator to the end of the instruction list. squashIt = cpu->instList.end(); } -template +template int ROB::countInsts() { -/* - int return_val = 0; + // Start at 1; if the tail matches cpu->instList.begin(), then there is + // one inst in the ROB. + int return_val = 1; + + // There are quite a few special cases. Do not use this function other + // than for debugging purposes. + if (cpu->instList.begin() == cpu->instList.end()) { + // In this case there are no instructions in the list. The ROB + // must be empty. + return 0; + } else if (tail == cpu->instList.end()) { + // In this case, the tail is not yet pointing to anything valid. + // The ROB must be empty. + return 0; + } // Iterate through the ROB from the head to the tail, counting the // entries. - for (InstIt i = cpu->instList.begin(); i != tail; i++) + for (InstIt_t i = cpu->instList.begin(); i != tail; ++i) { assert(i != cpu->instList.end()); - return_val++; + ++return_val; } return return_val; -*/ + // Because the head won't be tracked properly until the ROB gets the // first instruction, and any time that the ROB is empty and has not // yet gotten the instruction, this function doesn't work. - return numInstsInROB; +// return numInstsInROB; } -template +template void -ROB::insertInst(DynInst *inst) +ROB::insertInst(DynInstPtr &inst) { // Make sure we have the right number of instructions. assert(numInstsInROB == countInsts()); @@ -68,7 +85,7 @@ ROB::insertInst(DynInst *inst) // in which case the tail will be pointing at instList.end(). If that // happens, then reset the tail to the beginning of the list. if (tail != cpu->instList.end()) { - tail++; + ++tail; } else { tail = cpu->instList.begin(); } @@ -83,13 +100,14 @@ ROB::insertInst(DynInst *inst) // Whatever calls this function needs to ensure that it properly frees up // registers prior to this function. -template +template void ROB::retireHead() { assert(numInstsInROB == countInsts()); + assert(numInstsInROB > 0); - DynInst *head_inst; + DynInstPtr head_inst; // Get the head ROB instruction. head_inst = cpu->instList.front(); @@ -116,12 +134,12 @@ ROB::retireHead() } } -template +template bool ROB::isHeadReady() { if (numInstsInROB != 0) { - DynInst *head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); return head_inst->readyToCommit(); } @@ -129,7 +147,7 @@ ROB::isHeadReady() return false; } -template +template unsigned ROB::numFreeEntries() { @@ -138,7 +156,7 @@ ROB::numFreeEntries() return numEntries - numInstsInROB; } -template +template void ROB::doSquash() { @@ -162,6 +180,12 @@ ROB::doSquash() (*squashIt)->setCanCommit(); + // Special case for when squashing due to a syscall. It's possible + // that the squash happened after the head instruction was already + // committed, meaning that (*squashIt)->seqNum != squashedSeqNum + // will never be false. Normally the squash would never be able + // to go past the head of the ROB; in this case it might, so it + // must be handled otherwise it will segfault. #ifndef FULL_SYSTEM if (squashIt == cpu->instList.begin()) { DPRINTF(ROB, "ROB: Reached head of instruction list while " @@ -190,7 +214,7 @@ ROB::doSquash() } } -template +template void ROB::squash(InstSeqNum squash_num) { @@ -206,41 +230,41 @@ ROB::squash(InstSeqNum squash_num) doSquash(); } -template +template uint64_t ROB::readHeadPC() { assert(numInstsInROB == countInsts()); - DynInst *head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); return head_inst->readPC(); } -template +template uint64_t ROB::readHeadNextPC() { assert(numInstsInROB == countInsts()); - DynInst *head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); return head_inst->readNextPC(); } -template +template InstSeqNum ROB::readHeadSeqNum() { // Return the last sequence number that has not been squashed. Other // stages can use it to squash any instructions younger than the current // tail. - DynInst *head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); return head_inst->seqNum; } -template +template uint64_t ROB::readTailPC() { @@ -251,7 +275,7 @@ ROB::readTailPC() return (*tail)->readPC(); } -template +template InstSeqNum ROB::readTailSeqNum() { diff --git a/cpu/beta_cpu/store_set.cc b/cpu/beta_cpu/store_set.cc new file mode 100644 index 000000000..46d763d37 --- /dev/null +++ b/cpu/beta_cpu/store_set.cc @@ -0,0 +1,192 @@ +#include "cpu/beta_cpu/store_set.hh" +#include "base/trace.hh" + +StoreSet::StoreSet(int _SSIT_size, int _LFST_size) + : SSIT_size(_SSIT_size), LFST_size(_LFST_size) +{ + DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + + SSIT = new SSID[SSIT_size]; + + validSSIT.resize(SSIT_size); + + for (int i = 0; i < SSIT_size; ++i) + validSSIT[i] = false; + + LFST = new InstSeqNum[LFST_size]; + + validLFST.resize(LFST_size); + + SSCounters = new int[LFST_size]; + + for (int i = 0; i < LFST_size; ++i) + { + validLFST[i] = false; + SSCounters[i] = 0; + } + + index_mask = SSIT_size - 1; + + offset_bits = 2; +} + +void +StoreSet::violation(Addr load_PC, Addr store_PC) +{ + int load_index = calcIndex(load_PC); + int store_index = calcIndex(store_PC); + + bool valid_load_SSID = validSSIT[load_index]; + bool valid_store_SSID = validSSIT[store_index]; + + if (!valid_load_SSID && !valid_store_SSID) { + // Calculate a new SSID here. + SSID new_set = calcSSID(load_PC); + + validSSIT[load_index] = true; + + SSIT[load_index] = new_set; + + validSSIT[store_index] = true; + + SSIT[store_index] = new_set; + + SSCounters[new_set]++; + } else if (valid_load_SSID && !valid_store_SSID) { + SSID load_SSID = SSIT[load_index]; + + validSSIT[store_index] = true; + + SSIT[store_index] = load_SSID; + + SSCounters[load_SSID]++; + } else if (!valid_load_SSID && valid_store_SSID) { + SSID store_SSID = SSIT[store_index]; + + validSSIT[load_index] = true; + + SSIT[load_index] = store_SSID; + + // Because we are having a load point to an already existing set, + // the size of the store set is not incremented. + } else { + SSID load_SSID = SSIT[load_index]; + SSID store_SSID = SSIT[store_index]; + + int load_SS_size = SSCounters[load_SSID]; + int store_SS_size = SSCounters[store_SSID]; + + // If the load has the bigger store set, then assign the store + // to the same store set as the load. Otherwise vice-versa. + if (load_SS_size > store_SS_size) { + SSIT[store_index] = load_SSID; + + SSCounters[load_SSID]++; + SSCounters[store_SSID]--; + } else { + SSIT[load_index] = store_SSID; + + SSCounters[store_SSID]++; + SSCounters[load_SSID]--; + } + } +} + +void +StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) +{ + // Does nothing. + return; +} + +void +StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num) +{ + int index = calcIndex(store_PC); + + int store_SSID; + + if (!validSSIT[index]) { + // Do nothing if there's no valid entry. + return; + } else { + store_SSID = SSIT[index]; + + assert(store_SSID < LFST_size); + + // Update the last store that was fetched with the current one. + LFST[store_SSID] = store_seq_num; + } +} + +InstSeqNum +StoreSet::checkInst(Addr PC) +{ + int index = calcIndex(PC); + + int inst_SSID; + + if (!validSSIT[index]) { + // Return 0 if there's no valid entry. + return 0; + } else { + inst_SSID = SSIT[index]; + + assert(inst_SSID < LFST_size); + + if (!validLFST[inst_SSID]) { + return 0; + } else { + return LFST[inst_SSID]; + } + } +} + +void +StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) +{ + // This only is updated upon a store being issued. + if (!is_store) { + return; + } + + int index = calcIndex(issued_PC); + + int store_SSID; + + // Make sure the SSIT still has a valid entry for the issued store. + assert(validSSIT[index]); + + store_SSID = SSIT[index]; + + // If the last fetched store in the store set refers to the store that + // was just issued, then invalidate the entry. + if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) { + validLFST[store_SSID] = false; + } +} + +void +StoreSet::squash(InstSeqNum squashed_num) +{ + // Not really sure how to do this well. + + for (int i = 0; i < LFST_size; ++i) { + if (LFST[i] < squashed_num) { + validLFST[i] = false; + } + } +} + +void +StoreSet::clear() +{ + for (int i = 0; i < SSIT_size; ++i) { + validSSIT[i] = false; + } + + for (int i = 0; i < LFST_size; ++i) { + validLFST[i] = false; + } +} + diff --git a/cpu/beta_cpu/store_set.hh b/cpu/beta_cpu/store_set.hh new file mode 100644 index 000000000..701c60a2d --- /dev/null +++ b/cpu/beta_cpu/store_set.hh @@ -0,0 +1,58 @@ +#ifndef __STORE_SET_HH__ +#define __STORE_SET_HH__ + +#include + +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" + +class StoreSet +{ + public: + typedef unsigned SSID; + + public: + StoreSet(int SSIT_size, int LFST_size); + + void violation(Addr load_PC, Addr store_PC); + + void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + + void insertStore(Addr store_PC, InstSeqNum store_seq_num); + + InstSeqNum checkInst(Addr PC); + + void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + + void squash(InstSeqNum squashed_num); + + void clear(); + + private: + inline int calcIndex(Addr PC) + { return (PC >> offset_bits) & index_mask; } + + inline SSID calcSSID(Addr PC) + { return ((PC ^ (PC >> 10)) % LFST_size); } + + SSID *SSIT; + + std::vector validSSIT; + + InstSeqNum *LFST; + + std::vector validLFST; + + int *SSCounters; + + int SSIT_size; + + int LFST_size; + + int index_mask; + + // HACK: Hardcoded for now. + int offset_bits; +}; + +#endif // __STORE_SET_HH__ diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh index 7a707c86a..71e9ef441 100644 --- a/cpu/static_inst.hh +++ b/cpu/static_inst.hh @@ -40,9 +40,12 @@ #include "targetarch/isa_traits.hh" // forward declarations +struct AlphaSimpleImpl; class ExecContext; -class AlphaDynInst; class DynInst; +template +class AlphaDynInst; + class FastCPU; class SimpleCPU; class SymbolTable; From 2fb632dbda1b5db9163322541676cef52a55029f Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 21 Oct 2004 18:02:36 -0400 Subject: [PATCH 3/6] Check in of various updates to the CPU. Mainly adds in stats, improves branch prediction, and makes memory dependence work properly. SConscript: Added return address stack, tournament predictor. cpu/base_cpu.cc: Added debug break and print statements. cpu/base_dyn_inst.cc: cpu/base_dyn_inst.hh: Comment out possibly unneeded variables. cpu/beta_cpu/2bit_local_pred.cc: 2bit predictor no longer speculatively updates itself. cpu/beta_cpu/alpha_dyn_inst.hh: Comment formatting. cpu/beta_cpu/alpha_full_cpu.hh: Formatting cpu/beta_cpu/alpha_full_cpu_builder.cc: Added new parameters for branch predictors, and IQ parameters. cpu/beta_cpu/alpha_full_cpu_impl.hh: Register stats. cpu/beta_cpu/alpha_params.hh: Added parameters for IQ, branch predictors, and store sets. cpu/beta_cpu/bpred_unit.cc: Removed one class. cpu/beta_cpu/bpred_unit.hh: Add in RAS, stats. Changed branch predictor unit functionality so that it holds a history of past branches so it can update, and also hold a proper history of the RAS so it can be restored on branch mispredicts. cpu/beta_cpu/bpred_unit_impl.hh: Added in stats, history of branches, RAS. Now bpred unit actually modifies the instruction's predicted next PC. cpu/beta_cpu/btb.cc: Add in sanity checks. cpu/beta_cpu/comm.hh: Add in communication where needed, remove it where it's not. cpu/beta_cpu/commit.hh: cpu/beta_cpu/rename.hh: cpu/beta_cpu/rename_impl.hh: Add in stats. cpu/beta_cpu/commit_impl.hh: Stats, update what is sent back on branch mispredict. cpu/beta_cpu/cpu_policy.hh: Change the bpred unit being used. cpu/beta_cpu/decode.hh: cpu/beta_cpu/decode_impl.hh: Stats. cpu/beta_cpu/fetch.hh: Stats, change squash so it can handle squashes from decode differently than squashes from commit. cpu/beta_cpu/fetch_impl.hh: Add in stats. Change how a cache line is fetched. Update to work with caches. Also have separate functions for different behavior if squash is coming from decode vs commit. cpu/beta_cpu/free_list.hh: Remove some old comments. cpu/beta_cpu/full_cpu.cc: cpu/beta_cpu/full_cpu.hh: Added function to remove instructions from back of instruction list until a certain sequence number. cpu/beta_cpu/iew.hh: Stats, separate squashing behavior due to branches vs memory. cpu/beta_cpu/iew_impl.hh: Stats, separate squashing behavior for branches vs memory. cpu/beta_cpu/inst_queue.cc: Debug stuff cpu/beta_cpu/inst_queue.hh: Stats, change how mem dep unit works, debug stuff cpu/beta_cpu/inst_queue_impl.hh: Stats, change how mem dep unit works, debug stuff. Also add in parameters that used to be hardcoded. cpu/beta_cpu/mem_dep_unit.hh: cpu/beta_cpu/mem_dep_unit_impl.hh: Add in stats, change how memory dependence unit works. It now holds the memory instructions that are waiting for their memory dependences to resolve. It provides which instructions are ready directly to the IQ. cpu/beta_cpu/regfile.hh: Fix up sanity checks. cpu/beta_cpu/rename_map.cc: Fix loop variable type. cpu/beta_cpu/rob_impl.hh: Remove intermediate DynInstPtr cpu/beta_cpu/store_set.cc: Add in debugging statements. cpu/beta_cpu/store_set.hh: Reorder function arguments to match the rest of the calls. --HG-- extra : convert_revision : aabf9b1fecd1d743265dfc3b174d6159937c6f44 --- SConscript | 4 +- cpu/base_cpu.cc | 10 + cpu/base_dyn_inst.cc | 8 +- cpu/base_dyn_inst.hh | 26 +- cpu/beta_cpu/2bit_local_pred.cc | 24 +- cpu/beta_cpu/alpha_dyn_inst.hh | 13 +- cpu/beta_cpu/alpha_full_cpu.hh | 142 ++++--- cpu/beta_cpu/alpha_full_cpu_builder.cc | 63 ++- cpu/beta_cpu/alpha_full_cpu_impl.hh | 25 +- cpu/beta_cpu/alpha_params.hh | 23 + cpu/beta_cpu/bpred_unit.cc | 3 +- cpu/beta_cpu/bpred_unit.hh | 64 ++- cpu/beta_cpu/bpred_unit_impl.hh | 242 ++++++++++- cpu/beta_cpu/btb.cc | 6 + cpu/beta_cpu/comm.hh | 22 +- cpu/beta_cpu/commit.hh | 13 + cpu/beta_cpu/commit_impl.hh | 129 ++++-- cpu/beta_cpu/cpu_policy.hh | 2 +- cpu/beta_cpu/decode.hh | 11 + cpu/beta_cpu/decode_impl.hh | 87 +++- cpu/beta_cpu/fetch.hh | 55 ++- cpu/beta_cpu/fetch_impl.hh | 541 ++++++++++++----------- cpu/beta_cpu/free_list.hh | 8 +- cpu/beta_cpu/full_cpu.cc | 46 +- cpu/beta_cpu/full_cpu.hh | 5 + cpu/beta_cpu/iew.hh | 31 +- cpu/beta_cpu/iew_impl.hh | 566 ++++++++++++++++--------- cpu/beta_cpu/inst_queue.cc | 3 + cpu/beta_cpu/inst_queue.hh | 36 +- cpu/beta_cpu/inst_queue_impl.hh | 190 +++++++-- cpu/beta_cpu/mem_dep_unit.hh | 92 +++- cpu/beta_cpu/mem_dep_unit_impl.hh | 310 ++++++++++++-- cpu/beta_cpu/ras.cc | 42 ++ cpu/beta_cpu/ras.hh | 40 ++ cpu/beta_cpu/regfile.hh | 12 +- cpu/beta_cpu/rename.hh | 18 + cpu/beta_cpu/rename_impl.hh | 195 ++++++--- cpu/beta_cpu/rename_map.cc | 4 +- cpu/beta_cpu/rob_impl.hh | 4 +- cpu/beta_cpu/store_set.cc | 68 ++- cpu/beta_cpu/store_set.hh | 2 +- cpu/beta_cpu/tournament_pred.cc | 243 +++++++++++ cpu/beta_cpu/tournament_pred.hh | 160 +++++++ 43 files changed, 2769 insertions(+), 819 deletions(-) create mode 100644 cpu/beta_cpu/ras.cc create mode 100644 cpu/beta_cpu/ras.hh create mode 100644 cpu/beta_cpu/tournament_pred.cc create mode 100644 cpu/beta_cpu/tournament_pred.hh diff --git a/SConscript b/SConscript index fb2b40325..8a9b99cb5 100644 --- a/SConscript +++ b/SConscript @@ -106,10 +106,12 @@ base_sources = Split(''' cpu/beta_cpu/inst_queue.cc cpu/beta_cpu/ldstq.cc cpu/beta_cpu/mem_dep_unit.cc + cpu/beta_cpu/ras.cc cpu/beta_cpu/rename.cc cpu/beta_cpu/rename_map.cc cpu/beta_cpu/rob.cc cpu/beta_cpu/store_set.cc + cpu/beta_cpu/tournament_pred.cc cpu/fast_cpu/fast_cpu.cc cpu/full_cpu/bpred.cc cpu/full_cpu/commit.cc @@ -481,7 +483,7 @@ env.Append(CPPPATH='.') # Debug binary debug = env.Copy(OBJSUFFIX='.do') -debug.Append(CCFLAGS=Split('-g -gstabs+ -O0')) +debug.Append(CCFLAGS=Split('-g -gstabs+ -O0 -lefence')) debug.Append(CPPDEFINES='DEBUG') debug.Program(target = 'm5.debug', source = make_objs(sources, debug)) diff --git a/cpu/base_cpu.cc b/cpu/base_cpu.cc index 3ee7a3892..988c7a602 100644 --- a/cpu/base_cpu.cc +++ b/cpu/base_cpu.cc @@ -37,6 +37,8 @@ #include "sim/param.hh" #include "sim/sim_events.hh" +#include "base/trace.hh" + using namespace std; vector BaseCPU::cpuList; @@ -46,6 +48,7 @@ vector BaseCPU::cpuList; // been initialized int maxThreadsPerCPU = 1; +extern void debug_break(); #ifdef FULL_SYSTEM BaseCPU::BaseCPU(const string &_name, int _number_of_threads, Counter max_insts_any_thread, @@ -64,9 +67,16 @@ BaseCPU::BaseCPU(const string &_name, int _number_of_threads, : SimObject(_name), number_of_threads(_number_of_threads) #endif { + DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); + + debug_break(); + // add self to global list of CPUs cpuList.push_back(this); + DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", + this); + if (number_of_threads > maxThreadsPerCPU) maxThreadsPerCPU = number_of_threads; diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index c527eb08b..74f6b8a6c 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -83,7 +83,7 @@ BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, seqNum = seq_num; - specMemWrite = false; +// specMemWrite = false; canIssue = false; issued = false; @@ -95,7 +95,7 @@ BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, blockingInst = false; recoverInst = false; specMode = false; - btbMissed = false; +// btbMissed = false; // Eventually make this a parameter. threadNumber = 0; // Also make this a parameter. @@ -139,12 +139,12 @@ BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) effAddr = MemReq::inval_addr; physEffAddr = MemReq::inval_addr; - specMemWrite = false; +// specMemWrite = false; blockingInst = false; recoverInst = false; specMode = false; - btbMissed = false; +// btbMissed = false; // Make sure to have the renamed register entries set to the same // as the normal register entries. It will allow the IQ to work diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index fe30b5195..171721e61 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -146,7 +146,10 @@ class BaseDynInst : public FastAlloc, public RefCounted bool threadsyncWait; /** If the BTB missed. */ - bool btbMissed; +// bool btbMissed; + + /** The global history of this instruction (branch). */ +// unsigned globalHistory; /** The thread this instruction is from. */ short threadNumber; @@ -212,7 +215,7 @@ class BaseDynInst : public FastAlloc, public RefCounted static int instcount; /** Did this instruction do a spec write? */ - bool specMemWrite; +// bool specMemWrite; private: /** Physical register index of the destination registers of this @@ -287,15 +290,22 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns whether the instruction was predicted taken or not. */ bool predTaken() { -// DPRINTF(FullCPU, "PC: %08p\n", PC); -// DPRINTF(FullCPU, "predPC: %08p\n", predPC); - return( predPC != (PC + sizeof(MachInst) ) ); } /** Returns whether the instruction mispredicted. */ bool mispredicted() { return (predPC != nextPC); } +/* + unsigned readGlobalHist() { + return globalHistory; + } + + void setGlobalHist(unsigned history) { + globalHistory = history; + } +*/ + // // Instruction types. Forward checks to StaticInst object. // @@ -452,7 +462,7 @@ class BaseDynInst : public FastAlloc, public RefCounted OpClass opClass() const { return staticInst->opClass(); } /** Returns whether or not the BTB missed. */ - bool btbMiss() const { return btbMissed; } +// bool btbMiss() const { return btbMissed; } /** Returns the branch target address. */ Addr branchTarget() const { return staticInst->branchTarget(PC); } @@ -579,8 +589,8 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) storeSize = sizeof(T); storeData = data; - if (specMode) - specMemWrite = true; +// if (specMode) +// specMemWrite = true; MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc index 88c39a9b0..ef7f23d49 100644 --- a/cpu/beta_cpu/2bit_local_pred.cc +++ b/cpu/beta_cpu/2bit_local_pred.cc @@ -75,18 +75,34 @@ DefaultBP::getLocalIndex(Addr &branch_addr) bool DefaultBP::lookup(Addr &branch_addr) { + bool taken; uint8_t local_prediction; unsigned local_predictor_idx = getLocalIndex(branch_addr); DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", local_predictor_idx); + assert(local_predictor_idx < localPredictorSize); + local_prediction = localCtrs[local_predictor_idx].read(); DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", (int)local_prediction); - return getPrediction(local_prediction); + taken = getPrediction(local_prediction); + +#if 0 + // Speculative update. + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +#endif + + return taken; } void @@ -100,11 +116,17 @@ DefaultBP::update(Addr &branch_addr, bool taken) DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", local_predictor_idx); + assert(local_predictor_idx < localPredictorSize); + + // Increment or decrement twice to undo speculative update, then + // properly update if (taken) { DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); localCtrs[local_predictor_idx].increment(); +// localCtrs[local_predictor_idx].increment(); } else { DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); localCtrs[local_predictor_idx].decrement(); +// localCtrs[local_predictor_idx].decrement(); } } diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh index 4e1cebd11..c964762db 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.hh +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -19,19 +19,19 @@ template class AlphaDynInst : public BaseDynInst { public: - // Typedef for the CPU. + /** Typedef for the CPU. */ typedef typename Impl::FullCPU FullCPU; - //Typedef to get the ISA. + /** Typedef to get the ISA. */ typedef typename Impl::ISA ISA; - /// Binary machine instruction type. + /** Binary machine instruction type. */ typedef typename ISA::MachInst MachInst; - /// Memory address type. + /** Memory address type. */ typedef typename ISA::Addr Addr; - /// Logical register index type. + /** Logical register index type. */ typedef typename ISA::RegIndex RegIndex; - /// Integer register index type. + /** Integer register index type. */ typedef typename ISA::IntReg IntReg; enum { @@ -54,6 +54,7 @@ class AlphaDynInst : public BaseDynInst return fault; } + public: uint64_t readUniq(); void setUniq(uint64_t val); diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh index 0e094b122..e01eba3bf 100644 --- a/cpu/beta_cpu/alpha_full_cpu.hh +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -29,6 +29,8 @@ class AlphaFullCPU : public FullBetaCPU #endif public: + void regStats(); + #ifdef FULL_SYSTEM bool inPalMode(); @@ -66,14 +68,17 @@ class AlphaFullCPU : public FullBetaCPU req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; return No_Fault; } + Fault translateInstReq(MemReqPtr &req) { return dummyTranslation(req); } + Fault translateDataReadReq(MemReqPtr &req) { return dummyTranslation(req); } + Fault translateDataWriteReq(MemReqPtr &req) { return dummyTranslation(req); @@ -81,73 +86,6 @@ class AlphaFullCPU : public FullBetaCPU #endif - template - Fault read(MemReqPtr &req, T &data) - { -#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) - if (req->flags & LOCKED) { - MiscRegFile *cregs = &req->xc->regs.miscRegs; - cregs->lock_addr = req->paddr; - cregs->lock_flag = true; - } -#endif - - Fault error; - error = mem->read(req, data); - data = htoa(data); - return error; - } - - template - Fault write(MemReqPtr &req, T &data) - { -#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) - - MiscRegFile *cregs; - - // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { - cregs = &xc->regs.miscRegs; - - if (req->flags & UNCACHEABLE) { - // Don't update result register (see stq_c in isa_desc) - req->result = 2; - req->xc->storeCondFailures = 0;//Needed? [RGD] - } else { - req->result = cregs->lock_flag; - if (!cregs->lock_flag || - ((cregs->lock_addr & ~0xf) != (req->paddr & ~0xf))) { - cregs->lock_flag = false; - if (((++req->xc->storeCondFailures) % 100000) == 0) { - std::cerr << "Warning: " - << req->xc->storeCondFailures - << " consecutive store conditional failures " - << "on cpu " << cpu_id - << std::endl; - } - return No_Fault; - } - else req->xc->storeCondFailures = 0; - } - } - - // Need to clear any locked flags on other proccessors for - // this address. Only do this for succsful Store Conditionals - // and all other stores (WH64?). Unsuccessful Store - // Conditionals would have returned above, and wouldn't fall - // through. - for (int i = 0; i < system->execContexts.size(); i++){ - cregs = &system->execContexts[i]->regs.miscRegs; - if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) { - cregs->lock_flag = false; - } - } - -#endif - - return mem->write(req, (T)htoa(data)); - } - // Later on may want to remove this misc stuff from the regfile and // have it handled at this level. Might prove to be an issue when // trying to rename source/destination registers... @@ -240,6 +178,76 @@ class AlphaFullCPU : public FullBetaCPU // Called by initCPU. Implement as I please. void initIPRs(RegFile *regs); #endif + + + template + Fault read(MemReqPtr &req, T &data) + { +#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) + if (req->flags & LOCKED) { + MiscRegFile *cregs = &req->xc->regs.miscRegs; + cregs->lock_addr = req->paddr; + cregs->lock_flag = true; + } +#endif + + Fault error; + error = mem->read(req, data); + data = htoa(data); + return error; + } + + + template + Fault write(MemReqPtr &req, T &data) + { +#if defined(TARGET_ALPHA) && defined(FULL_SYSTEM) + + MiscRegFile *cregs; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + cregs = &xc->regs.miscRegs; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + req->xc->storeCondFailures = 0;//Needed? [RGD] + } else { + req->result = cregs->lock_flag; + if (!cregs->lock_flag || + ((cregs->lock_addr & ~0xf) != (req->paddr & ~0xf))) { + cregs->lock_flag = false; + if (((++req->xc->storeCondFailures) % 100000) == 0) { + std::cerr << "Warning: " + << req->xc->storeCondFailures + << " consecutive store conditional failures " + << "on cpu " << cpu_id + << std::endl; + } + return No_Fault; + } + else req->xc->storeCondFailures = 0; + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < system->execContexts.size(); i++){ + cregs = &system->execContexts[i]->regs.miscRegs; + if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) { + cregs->lock_flag = false; + } + } + +#endif + + return mem->write(req, (T)htoa(data)); + } + }; #endif // __ALPHA_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc index 5fe96d656..f37081232 100644 --- a/cpu/beta_cpu/alpha_full_cpu_builder.cc +++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc @@ -81,17 +81,38 @@ Param issueWidth; Param executeWidth; Param executeIntWidth; Param executeFloatWidth; +Param executeBranchWidth; +Param executeMemoryWidth; Param iewToCommitDelay; Param renameToROBDelay; Param commitWidth; Param squashWidth; +#if 0 Param localPredictorSize; Param localPredictorCtrBits; +#endif +Param local_predictor_size; +Param local_ctr_bits; +Param local_history_table_size; +Param local_history_bits; +Param global_predictor_size; +Param global_ctr_bits; +Param global_history_bits; +Param choice_predictor_size; +Param choice_ctr_bits; + Param BTBEntries; Param BTBTagSize; +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + Param numPhysIntRegs; Param numPhysFloatRegs; Param numIQEntries; @@ -168,6 +189,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) INIT_PARAM(executeWidth, "Execute width"), INIT_PARAM(executeIntWidth, "Integer execute width"), INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " "delay"), @@ -175,12 +198,30 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), +#if 0 INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. " "Must be a power of 2."), INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"), +#endif + INIT_PARAM(local_predictor_size, "Size of local predictor"), + INIT_PARAM(local_ctr_bits, "Bits per counter"), + INIT_PARAM(local_history_table_size, "Size of local history table"), + INIT_PARAM(local_history_bits, "Bits for the local history"), + INIT_PARAM(global_predictor_size, "Size of global predictor"), + INIT_PARAM(global_ctr_bits, "Bits per counter"), + INIT_PARAM(global_history_bits, "Bits of history"), + INIT_PARAM(choice_predictor_size, "Size of choice predictor"), + INIT_PARAM(choice_ctr_bits, "Bits of choice counters"), + INIT_PARAM(BTBEntries, "Number of BTB entries"), INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " @@ -277,17 +318,37 @@ CREATE_SIM_OBJECT(BaseFullCPU) params.executeWidth = executeWidth; params.executeIntWidth = executeIntWidth; params.executeFloatWidth = executeFloatWidth; + params.executeBranchWidth = executeBranchWidth; + params.executeMemoryWidth = executeMemoryWidth; params.iewToCommitDelay = iewToCommitDelay; params.renameToROBDelay = renameToROBDelay; params.commitWidth = commitWidth; params.squashWidth = squashWidth; - +#if 0 params.localPredictorSize = localPredictorSize; params.localPredictorCtrBits = localPredictorCtrBits; +#endif + params.local_predictor_size = local_predictor_size; + params.local_ctr_bits = local_ctr_bits; + params.local_history_table_size = local_history_table_size; + params.local_history_bits = local_history_bits; + params.global_predictor_size = global_predictor_size; + params.global_ctr_bits = global_ctr_bits; + params.global_history_bits = global_history_bits; + params.choice_predictor_size = choice_predictor_size; + params.choice_ctr_bits = choice_ctr_bits; + params.BTBEntries = BTBEntries; params.BTBTagSize = BTBTagSize; + params.RASSize = RASSize; + + params.LQEntries = LQEntries; + params.SQEntries = SQEntries; + params.SSITSize = SSITSize; + params.LFSTSize = LFSTSize; + params.numPhysIntRegs = numPhysIntRegs; params.numPhysFloatRegs = numPhysFloatRegs; params.numIQEntries = numIQEntries; diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh index 8bfc0777e..ee8f9f33b 100644 --- a/cpu/beta_cpu/alpha_full_cpu_impl.hh +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -27,6 +27,19 @@ AlphaFullCPU::AlphaFullCPU(Params ¶ms) rob.setCPU(this); } +template +void +AlphaFullCPU::regStats() +{ + // Register stats for everything that has stats. + fullCPURegStats(); + fetch.regStats(); + decode.regStats(); + rename.regStats(); + iew.regStats(); + commit.regStats(); +} + #ifndef FULL_SYSTEM template @@ -92,6 +105,14 @@ AlphaFullCPU::squashStages() rob.squash(rob_head); commit.setSquashing(); + + // Now hack the time buffer to clear the sequence numbers in the places + // where the stages might read it.? + for (int i = 0; i < 5; ++i) + { + timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; + } + } #endif // FULL_SYSTEM @@ -178,7 +199,7 @@ template uint64_t * AlphaFullCPU::getIpr() { - return regs.ipr; + return regFile.getIpr(); } template @@ -564,7 +585,7 @@ AlphaFullCPU::setIntrFlag(int val) regs.intrflag = val; } -// Maybe have this send back from IEW stage to squash and update PC. +// Can force commit stage to squash and stuff. template Fault AlphaFullCPU::hwrei() diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh index 92dfd35f5..ecde4b016 100644 --- a/cpu/beta_cpu/alpha_params.hh +++ b/cpu/beta_cpu/alpha_params.hh @@ -72,6 +72,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned executeWidth; unsigned executeIntWidth; unsigned executeFloatWidth; + unsigned executeBranchWidth; + unsigned executeMemoryWidth; // // Commit @@ -84,17 +86,38 @@ class AlphaSimpleParams : public BaseFullCPU::Params // // Branch predictor (BP & BTB) // +/* unsigned localPredictorSize; unsigned localPredictorCtrBits; +*/ + + unsigned local_predictor_size; + unsigned local_ctr_bits; + unsigned local_history_table_size; + unsigned local_history_bits; + unsigned global_predictor_size; + unsigned global_ctr_bits; + unsigned global_history_bits; + unsigned choice_predictor_size; + unsigned choice_ctr_bits; + unsigned BTBEntries; unsigned BTBTagSize; + unsigned RASSize; + // // Load store queue // unsigned LQEntries; unsigned SQEntries; + // + // Memory dependence + // + unsigned SSITSize; + unsigned LFSTSize; + // // Miscellaneous // diff --git a/cpu/beta_cpu/bpred_unit.cc b/cpu/beta_cpu/bpred_unit.cc index 6de2def44..c4a79fbbe 100644 --- a/cpu/beta_cpu/bpred_unit.cc +++ b/cpu/beta_cpu/bpred_unit.cc @@ -1,5 +1,6 @@ #include "cpu/beta_cpu/bpred_unit_impl.hh" #include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/alpha_dyn_inst.hh" -template DefaultBPredUnit; +template TwobitBPredUnit; diff --git a/cpu/beta_cpu/bpred_unit.hh b/cpu/beta_cpu/bpred_unit.hh index 71191f5b7..53c7146c5 100644 --- a/cpu/beta_cpu/bpred_unit.hh +++ b/cpu/beta_cpu/bpred_unit.hh @@ -4,9 +4,15 @@ // For Addr type. #include "arch/alpha/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" #include "cpu/beta_cpu/2bit_local_pred.hh" +#include "cpu/beta_cpu/tournament_pred.hh" #include "cpu/beta_cpu/btb.hh" +#include "cpu/beta_cpu/ras.hh" + +#include /** * Basically a wrapper class to hold both the branch predictor @@ -18,34 +24,86 @@ * object, and be able to call the constructors on the BP and BTB. */ template -class DefaultBPredUnit +class TwobitBPredUnit { public: typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; - DefaultBPredUnit(Params ¶ms); + TwobitBPredUnit(Params ¶ms); + + void regStats(); + + bool predict(DynInstPtr &inst, Addr &PC); + + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, + bool actually_taken); + + void squash(const InstSeqNum &squashed_sn); + + void update(const InstSeqNum &done_sn); bool BPLookup(Addr &inst_PC) { return BP.lookup(inst_PC); } + unsigned BPReadGlobalHist() + { return 0; } + bool BTBValid(Addr &inst_PC) { return BTB.valid(inst_PC); } Addr BTBLookup(Addr &inst_PC) { return BTB.lookup(inst_PC); } - void BPUpdate(Addr &inst_PC, bool taken) + // Will want to include global history. + void BPUpdate(Addr &inst_PC, unsigned global_history, bool taken) { BP.update(inst_PC, taken); } void BTBUpdate(Addr &inst_PC, Addr &target_PC) { BTB.update(inst_PC, target_PC); } private: + struct PredictorHistory { + PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC, + const bool pred_taken) + : seqNum(seq_num), PC(inst_PC), predTaken(pred_taken), + globalHistory(0), usedRAS(0), wasCall(0), RASIndex(0), + RASTarget(0) + { } + + InstSeqNum seqNum; + + Addr PC; + + bool predTaken; + + unsigned globalHistory; + + bool usedRAS; + + bool wasCall; + + unsigned RASIndex; + + Addr RASTarget; + }; + + std::list predHist; DefaultBP BP; DefaultBTB BTB; + ReturnAddrStack RAS; + + Stats::Scalar<> lookups; + Stats::Scalar<> condPredicted; + Stats::Scalar<> condIncorrect; + Stats::Scalar<> BTBLookups; + Stats::Scalar<> BTBHits; + Stats::Scalar<> BTBCorrect; + Stats::Scalar<> usedRAS; + Stats::Scalar<> RASIncorrect; }; #endif // __BPRED_UNIT_HH__ diff --git a/cpu/beta_cpu/bpred_unit_impl.hh b/cpu/beta_cpu/bpred_unit_impl.hh index 47415ce9b..02c613d34 100644 --- a/cpu/beta_cpu/bpred_unit_impl.hh +++ b/cpu/beta_cpu/bpred_unit_impl.hh @@ -1,13 +1,247 @@ #include "cpu/beta_cpu/bpred_unit.hh" +#include "base/traceflags.hh" +#include "base/trace.hh" template -DefaultBPredUnit::DefaultBPredUnit(Params ¶ms) - : BP(params.localPredictorSize, - params.localPredictorCtrBits, +TwobitBPredUnit::TwobitBPredUnit(Params ¶ms) + : BP(params.local_predictor_size, + params.local_ctr_bits, params.instShiftAmt), BTB(params.BTBEntries, params.BTBTagSize, - params.instShiftAmt) + params.instShiftAmt), + RAS(params.RASSize) { } + +template +void +TwobitBPredUnit::regStats() +{ + lookups + .name(name() + ".BPredUnit.lookups") + .desc("Number of BP lookups") + ; + + condPredicted + .name(name() + ".BPredUnit.condPredicted") + .desc("Number of conditional branches predicted") + ; + + condIncorrect + .name(name() + ".BPredUnit.condIncorrect") + .desc("Number of conditional branches incorrect") + ; + + BTBLookups + .name(name() + ".BPredUnit.BTBLookups") + .desc("Number of BTB lookups") + ; + + BTBHits + .name(name() + ".BPredUnit.BTBHits") + .desc("Number of BTB hits") + ; + + BTBCorrect + .name(name() + ".BPredUnit.BTBCorrect") + .desc("Number of correct BTB predictions (this stat may not " + "work properly.") + ; + + usedRAS + .name(name() + ".BPredUnit.usedRAS") + .desc("Number of times the RAS was used.") + ; + + RASIncorrect + .name(name() + ".BPredUnit.RASInCorrect") + .desc("Number of incorrect RAS predictions.") + ; +} + +template +bool +TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC) +{ + // See if branch predictor predicts taken. + // If so, get its target addr either from the BTB or the RAS. + // Once that's done, speculatively update the predictor? + // Save off record of branch stuff so the RAS can be fixed + // up once it's done. + + bool pred_taken = false; + Addr target; + + ++lookups; + + if (inst->isUncondCtrl()) { + DPRINTF(Fetch, "BranchPred: Unconditional control.\n"); + pred_taken = true; + } else { + ++condPredicted; + + pred_taken = BPLookup(PC); + + DPRINTF(Fetch, "BranchPred: Branch predictor predicted %i for PC %#x" + "\n", pred_taken, inst->readPC()); + } + + PredictorHistory predict_record(inst->seqNum, PC, pred_taken); + + // Now lookup in the BTB or RAS. + if (pred_taken) { + if (inst->isReturn()) { + ++usedRAS; + + // If it's a function return call, then look up the address + // in the RAS. + target = RAS.top(); + + // Record the top entry of the RAS, and its index. + predict_record.usedRAS = true; + predict_record.RASIndex = RAS.topIdx(); + predict_record.RASTarget = target; + + RAS.pop(); + + DPRINTF(Fetch, "BranchPred: Instruction %#x is a return, RAS " + "predicted target: %#x, RAS index: %i.\n", + inst->readPC(), target, predict_record.RASIndex); + } else { + ++BTBLookups; + + if (inst->isCall()) { + RAS.push(PC+sizeof(MachInst)); + + // Record that it was a call so that the top RAS entry can + // be popped off if the speculation is incorrect. + predict_record.wasCall = true; + + DPRINTF(Fetch, "BranchPred: Instruction %#x was a call, " + "adding %#x to the RAS.\n", + inst->readPC(), PC+sizeof(MachInst)); + } + + if (BTB.valid(PC)) { + ++BTBHits; + + //If it's anything else, use the BTB to get the target addr. + target = BTB.lookup(PC); + + DPRINTF(Fetch, "BranchPred: Instruction %#x predicted target " + "is %#x.\n", inst->readPC(), target); + + } else { + DPRINTF(Fetch, "BranchPred: BTB doesn't have a valid entry." + "\n"); + pred_taken = false; + } + + } + } + + if (pred_taken) { + // Set the PC and the instruction's predicted target. + PC = target; + inst->setPredTarg(target); + } else { + PC = PC + sizeof(MachInst); + inst->setPredTarg(PC); + } + + predHist.push_front(predict_record); + + assert(!predHist.empty()); + + return pred_taken; +} + +template +void +TwobitBPredUnit::update(const InstSeqNum &done_sn) +{ + DPRINTF(Fetch, "BranchPred: Commiting branches until sequence number " + "%i.\n", done_sn); + + while (!predHist.empty() && predHist.back().seqNum <= done_sn) { + assert(!predHist.empty()); + + // Update the branch predictor with the correct results of branches. + BP.update(predHist.back().PC, predHist.back().predTaken); + + predHist.pop_back(); + } +} + +template +void +TwobitBPredUnit::squash(const InstSeqNum &squashed_sn) +{ + while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { + if (predHist.front().usedRAS) { + DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " + "target: %#x.\n", + predHist.front().RASIndex, + predHist.front().RASTarget); + + RAS.restore(predHist.front().RASIndex, + predHist.front().RASTarget); + } else if (predHist.front().wasCall) { + DPRINTF(Fetch, "BranchPred: Removing speculative entry added " + "to the RAS.\n"); + + RAS.pop(); + } + + predHist.pop_front(); + } +} + +template +void +TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, + const Addr &corr_target, + const bool actually_taken) +{ + // Now that we know that a branch was mispredicted, we need to undo + // all the branches that have been seen up until this branch and + // fix up everything. + + ++condIncorrect; + + DPRINTF(Fetch, "BranchPred: Squashing from sequence number %i, " + "setting target to %#x.\n", + squashed_sn, corr_target); + + while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { + + if (predHist.front().usedRAS) { + DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " + "target: %#x.\n", + predHist.front().RASIndex, + predHist.front().RASTarget); + + RAS.restore(predHist.front().RASIndex, + predHist.front().RASTarget); + } else if (predHist.front().wasCall) { + DPRINTF(Fetch, "BranchPred: Removing speculative entry added " + "to the RAS.\n"); + + RAS.pop(); + } + + predHist.pop_front(); + } + + predHist.front().predTaken = actually_taken; + + if (predHist.front().usedRAS) { + ++RASIncorrect; + } + + BP.update(predHist.front().PC, actually_taken); + + BTB.update(predHist.front().PC, corr_target); +} diff --git a/cpu/beta_cpu/btb.cc b/cpu/beta_cpu/btb.cc index b49f30482..bceaa66d1 100644 --- a/cpu/beta_cpu/btb.cc +++ b/cpu/beta_cpu/btb.cc @@ -50,6 +50,8 @@ DefaultBTB::valid(const Addr &inst_PC) Addr inst_tag = getTag(inst_PC); + assert(btb_idx < numEntries); + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { return true; } else { @@ -67,6 +69,8 @@ DefaultBTB::lookup(const Addr &inst_PC) Addr inst_tag = getTag(inst_PC); + assert(btb_idx < numEntries); + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { return btb[btb_idx].target; } else { @@ -79,6 +83,8 @@ DefaultBTB::update(const Addr &inst_PC, const Addr &target) { unsigned btb_idx = getIndex(inst_PC); + assert(btb_idx < numEntries); + btb[btb_idx].valid = true; btb[btb_idx].target = target; btb[btb_idx].tag = getTag(inst_PC); diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh index 849a6c797..e327a83b9 100644 --- a/cpu/beta_cpu/comm.hh +++ b/cpu/beta_cpu/comm.hh @@ -9,6 +9,7 @@ using namespace std; // Find better place to put this typedef. +// The impl might be the best place for this. typedef short int PhysRegIndex; template @@ -45,6 +46,14 @@ struct SimpleIEWSimpleCommit { int size; DynInstPtr insts[Impl::MaxWidth + 1]; + + bool squash; + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + unsigned globalHist; + InstSeqNum squashedSeqNum; }; template @@ -63,10 +72,15 @@ struct TimeBufStruct { bool predIncorrect; uint64_t branchAddr; + InstSeqNum doneSeqNum; + + // Might want to package this kind of branch stuff into a single + // struct as it is used pretty frequently. bool branchMispredict; bool branchTaken; uint64_t mispredPC; uint64_t nextPC; + unsigned globalHist; }; decodeComm decodeInfo; @@ -84,17 +98,10 @@ struct TimeBufStruct { renameComm renameInfo; struct iewComm { - bool squash; bool stall; // Also eventually include skid buffer space. unsigned freeIQEntries; - - bool branchMispredict; - bool branchTaken; - uint64_t mispredPC; - uint64_t nextPC; - InstSeqNum squashedSeqNum; }; iewComm iewInfo; @@ -108,6 +115,7 @@ struct TimeBufStruct { bool branchTaken; uint64_t mispredPC; uint64_t nextPC; + unsigned globalHist; // Think of better names here. // Will need to be a variety of sizes... diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh index 981d9e78f..f1a185143 100644 --- a/cpu/beta_cpu/commit.hh +++ b/cpu/beta_cpu/commit.hh @@ -59,6 +59,8 @@ class SimpleCommit public: SimpleCommit(Params ¶ms); + void regStats(); + void setCPU(FullCPU *cpu_ptr); void setTimeBuffer(TimeBuffer *tb_ptr); @@ -142,6 +144,17 @@ class SimpleCommit /** Commit width, in instructions. */ unsigned commitWidth; + + Stats::Scalar<> commitCommittedInsts; + Stats::Scalar<> commitSquashedInsts; + Stats::Scalar<> commitSquashEvents; + Stats::Scalar<> commitNonSpecStalls; + Stats::Scalar<> commitCommittedBranches; + Stats::Scalar<> commitCommittedLoads; + Stats::Scalar<> commitCommittedMemRefs; + Stats::Scalar<> branchMispredicts; + + Stats::Distribution<> n_committed_dist; }; #endif // __SIMPLE_COMMIT_HH__ diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh index 45b8bc7de..9a69c9259 100644 --- a/cpu/beta_cpu/commit_impl.hh +++ b/cpu/beta_cpu/commit_impl.hh @@ -21,6 +21,51 @@ SimpleCommit::SimpleCommit(Params ¶ms) _status = Idle; } +template +void +SimpleCommit::regStats() +{ + commitCommittedInsts + .name(name() + ".commitCommittedInsts") + .desc("The number of committed instructions") + .prereq(commitCommittedInsts); + commitSquashedInsts + .name(name() + ".commitSquashedInsts") + .desc("The number of squashed insts skipped by commit") + .prereq(commitSquashedInsts); + commitSquashEvents + .name(name() + ".commitSquashEvents") + .desc("The number of times commit is told to squash") + .prereq(commitSquashEvents); + commitNonSpecStalls + .name(name() + ".commitNonSpecStalls") + .desc("The number of times commit has been forced to stall to " + "communicate backwards") + .prereq(commitNonSpecStalls); + commitCommittedBranches + .name(name() + ".commitCommittedBranches") + .desc("The number of committed branches") + .prereq(commitCommittedBranches); + commitCommittedLoads + .name(name() + ".commitCommittedLoads") + .desc("The number of committed loads") + .prereq(commitCommittedLoads); + commitCommittedMemRefs + .name(name() + ".commitCommittedMemRefs") + .desc("The number of committed memory references") + .prereq(commitCommittedMemRefs); + branchMispredicts + .name(name() + ".branchMispredicts") + .desc("The number of times a branch was mispredicted") + .prereq(branchMispredicts); + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(Stats::pdf) + ; +} + template void SimpleCommit::setCPU(FullCPU *cpu_ptr) @@ -143,12 +188,12 @@ SimpleCommit::commit() // Should I also check if the commit stage is telling the ROB to squah? // This might be necessary to keep the same timing between the IQ and // the ROB... - if (robInfoFromIEW->iewInfo.squash) { + if (fromIEW->squash) { DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n"); _status = ROBSquashing; - InstSeqNum squashed_inst = robInfoFromIEW->iewInfo.squashedSeqNum; + InstSeqNum squashed_inst = fromIEW->squashedSeqNum; rob->squash(squashed_inst); @@ -162,15 +207,19 @@ SimpleCommit::commit() // ROB is in the process of squashing. toIEW->commitInfo.robSquashing = true; - toIEW->commitInfo.branchMispredict = - robInfoFromIEW->iewInfo.branchMispredict; + toIEW->commitInfo.branchMispredict = fromIEW->branchMispredict; - toIEW->commitInfo.branchTaken = - robInfoFromIEW->iewInfo.branchTaken; + toIEW->commitInfo.branchTaken = fromIEW->branchTaken; - toIEW->commitInfo.nextPC = robInfoFromIEW->iewInfo.nextPC; + toIEW->commitInfo.nextPC = fromIEW->nextPC; - toIEW->commitInfo.mispredPC = robInfoFromIEW->iewInfo.mispredPC; + toIEW->commitInfo.mispredPC = fromIEW->mispredPC; + + toIEW->commitInfo.globalHist = fromIEW->globalHist; + + if (toIEW->commitInfo.branchMispredict) { + ++branchMispredicts; + } } if (_status != ROBSquashing) { @@ -237,6 +286,8 @@ SimpleCommit::commitInsts() // inst in the ROB without affecting any other stages. rob->retireHead(); + ++commitSquashedInsts; + } else { // Increment the total number of non-speculative instructions // executed. @@ -249,7 +300,7 @@ SimpleCommit::commitInsts() bool commit_success = commitHead(head_inst, num_committed); // Update what instruction we are looking at if the commit worked. - if(commit_success) { + if (commit_success) { ++num_committed; // Send back which instruction has been committed. @@ -258,7 +309,11 @@ SimpleCommit::commitInsts() // sequence number instead (copy). toIEW->commitInfo.doneSeqNum = head_inst->seqNum; - cpu->instDone(); + ++commitCommittedInsts; + + if (!head_inst->isNop()) { + cpu->instDone(); + } } else { break; } @@ -267,6 +322,8 @@ SimpleCommit::commitInsts() // Update the pointer to read the next instruction in the ROB. head_inst = rob->readHeadInst(); } + + n_committed_dist.sample(num_committed); } template @@ -276,18 +333,13 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Make sure instruction is valid assert(head_inst); - Fault fault = No_Fault; - - // If the head instruction is a store or a load, then execute it - // because this simple model does no speculative memory access. - // Hopefully this covers all memory references. - // Also check if it's nonspeculative. Or a nop. Then it will be - // executed only when it reaches the head of the ROB. Actually - // executing a nop is a bit overkill... + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. if (!head_inst->isExecuted()) { // Keep this number correct. We have not yet actually executed // and committed this instruction. cpu->funcExeInst--; + if (head_inst->isStore() || head_inst->isNonSpeculative()) { DPRINTF(Commit, "Commit: Encountered a store or non-speculative " "instruction at the head of the ROB, PC %#x.\n", @@ -299,6 +351,8 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // it is executed. head_inst->clearCanCommit(); + ++commitNonSpecStalls; + return false; } else { panic("Commit: Trying to commit un-executed instruction " @@ -306,19 +360,6 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) } } - // Check if memory access was successful. - if (fault != No_Fault) { - // Handle data cache miss here. In the future, set the status - // to data cache miss, then exit the stage. Have an event - // that handles commiting the head instruction, then setting - // the stage back to running, when the event is run. (just - // make sure that event is commit's run for that cycle) - panic("Commit: Load/store instruction failed, not sure what " - "to do.\n"); - // Also will want to clear the instruction's fault after being - // handled here so it's not handled again below. - } - // Now check if it's one of the special trap or barrier or // serializing instructions. if (head_inst->isThreadSync() || @@ -335,39 +376,43 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Check if the instruction caused a fault. If so, trap. if (head_inst->getFault() != No_Fault) { -#ifdef FULL_SYSTEM - cpu->trap(fault); -#else // !FULL_SYSTEM if (!head_inst->isNop()) { +#ifdef FULL_SYSTEM + cpu->trap(fault); +#else // !FULL_SYSTEM panic("fault (%d) detected @ PC %08p", head_inst->getFault(), head_inst->PC); - } #endif // FULL_SYSTEM + } } // Check if we're really ready to commit. If not then return false. // I'm pretty sure all instructions should be able to commit if they've // reached this far. For now leave this in as a check. if(!rob->isHeadReady()) { - DPRINTF(Commit, "Commit: Unable to commit head instruction!\n"); + panic("Commit: Unable to commit head instruction!\n"); return false; } // If it's a branch, then send back branch prediction update info // to the fetch stage. // This should be handled in the iew stage if a mispredict happens... -#if 0 + if (head_inst->isControl()) { +#if 0 toIEW->nextPC = head_inst->readPC(); //Maybe switch over to BTB incorrect. toIEW->btbMissed = head_inst->btbMiss(); toIEW->target = head_inst->nextPC; //Maybe also include global history information. //This simple version will have no branch prediction however. - } #endif + ++commitCommittedBranches; + } + + #if 0 // Check if the instruction has a destination register. // If so add the previous physical register of its logical register's @@ -383,8 +428,12 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // the LDSTQ will already have been told that a store has reached the head // of the ROB. Consider including communication if it's a store as well // to keep things orthagonal. - if (head_inst->isLoad()) { - toIEW->commitInfo.commitIsLoad = true; + if (head_inst->isMemRef()) { + ++commitCommittedMemRefs; + if (head_inst->isLoad()) { + toIEW->commitInfo.commitIsLoad = true; + ++commitCommittedLoads; + } } // Now that the instruction is going to be committed, finalize its diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh index ec8460b77..1479eb191 100644 --- a/cpu/beta_cpu/cpu_policy.hh +++ b/cpu/beta_cpu/cpu_policy.hh @@ -22,7 +22,7 @@ template struct SimpleCPUPolicy { - typedef DefaultBPredUnit BPredUnit; + typedef TwobitBPredUnit BPredUnit; typedef PhysRegFile RegFile; typedef SimpleFreeList FreeList; typedef SimpleRenameMap RenameMap; diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh index be88a4b36..64e87290e 100644 --- a/cpu/beta_cpu/decode.hh +++ b/cpu/beta_cpu/decode.hh @@ -49,6 +49,8 @@ class SimpleDecode public: SimpleDecode(Params ¶ms); + void regStats(); + void setCPU(FullCPU *cpu_ptr); void setTimeBuffer(TimeBuffer *tb_ptr); @@ -128,6 +130,15 @@ class SimpleDecode * group of instructions, it can restart at the proper instruction. */ unsigned numInst; + + Stats::Scalar<> decodeIdleCycles; + Stats::Scalar<> decodeBlockedCycles; + Stats::Scalar<> decodeUnblockCycles; + Stats::Scalar<> decodeSquashCycles; + Stats::Scalar<> decodeBranchMispred; + Stats::Scalar<> decodeControlMispred; + Stats::Scalar<> decodeDecodedInsts; + Stats::Scalar<> decodeSquashedInsts; }; #endif // __SIMPLE_DECODE_HH__ diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh index d0f46eaa5..8b20bf8bc 100644 --- a/cpu/beta_cpu/decode_impl.hh +++ b/cpu/beta_cpu/decode_impl.hh @@ -16,6 +16,45 @@ SimpleDecode::SimpleDecode(Params ¶ms) _status = Idle; } +template +void +SimpleDecode::regStats() +{ + decodeIdleCycles + .name(name() + ".decodeIdleCycles") + .desc("Number of cycles decode is idle") + .prereq(decodeIdleCycles); + decodeBlockedCycles + .name(name() + ".decodeBlockedCycles") + .desc("Number of cycles decode is blocked") + .prereq(decodeBlockedCycles); + decodeUnblockCycles + .name(name() + ".decodeUnblockCycles") + .desc("Number of cycles decode is unblocking") + .prereq(decodeUnblockCycles); + decodeSquashCycles + .name(name() + ".decodeSquashCycles") + .desc("Number of cycles decode is squashing") + .prereq(decodeSquashCycles); + decodeBranchMispred + .name(name() + ".decodeBranchMispred") + .desc("Number of times decode detected a branch misprediction") + .prereq(decodeBranchMispred); + decodeControlMispred + .name(name() + ".decodeControlMispred") + .desc("Number of times decode detected an instruction incorrectly" + " predicted as a control") + .prereq(decodeControlMispred); + decodeDecodedInsts + .name(name() + ".decodeDecodedInsts") + .desc("Number of instructions handled by decode") + .prereq(decodeDecodedInsts); + decodeSquashedInsts + .name(name() + ".decodeSquashedInsts") + .desc("Number of squashed instructions handled by decode") + .prereq(decodeSquashedInsts); +} + template void SimpleDecode::setCPU(FullCPU *cpu_ptr) @@ -91,7 +130,7 @@ SimpleDecode::unblock() // If there's still information in the skid buffer, then // continue to tell previous stages to stall. They will be - // able to restart once the skid buffer is empty. + // able to restart once the skid buffer is empty. if (!skidBuffer.empty()) { toFetch->decodeInfo.stall = true; } else { @@ -110,9 +149,12 @@ SimpleDecode::squash(DynInstPtr &inst) "detected at decode.\n"); Addr new_PC = inst->nextPC; + toFetch->decodeInfo.branchMispredict = true; + toFetch->decodeInfo.doneSeqNum = inst->seqNum; toFetch->decodeInfo.predIncorrect = true; toFetch->decodeInfo.squash = true; toFetch->decodeInfo.nextPC = new_PC; + toFetch->decodeInfo.branchTaken = true; // Set status to squashing. _status = Squashing; @@ -164,6 +206,8 @@ SimpleDecode::tick() // buffer were used. Remove those instructions and handle // the rest of unblocking. if (_status == Unblocking) { + ++decodeUnblockCycles; + if (fromFetch->size > 0) { // Add the current inputs to the skid buffer so they can be // reprocessed when this stage unblocks. @@ -173,6 +217,8 @@ SimpleDecode::tick() unblock(); } } else if (_status == Blocked) { + ++decodeBlockedCycles; + if (fromFetch->size > 0) { block(); } @@ -197,6 +243,8 @@ SimpleDecode::tick() squash(); } } else if (_status == Squashing) { + ++decodeSquashCycles; + if (!fromCommit->commitInfo.squash && !fromCommit->commitInfo.robSquashing) { _status = Running; @@ -228,17 +276,16 @@ SimpleDecode::decode() // Check fetch queue to see if instructions are available. // If no available instructions, do nothing, unless this stage is // currently unblocking. - if (!fromFetch->insts[0] && _status != Unblocking) { + if (fromFetch->size == 0 && _status != Unblocking) { DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); // Should I change the status to idle? + ++decodeIdleCycles; return; } + // Might be better to use a base DynInst * instead? DynInstPtr inst; - // Instead have a class member variable that records which instruction - // was the last one that was ended on. At the tick() stage, it can - // check if that's equal to 0. If not, then don't pop stuff off. unsigned to_rename_index = 0; int insts_available = _status == Unblocking ? @@ -264,18 +311,10 @@ SimpleDecode::decode() } #endif - // Check to make sure that instructions coming from fetch are valid. - // Normally at this stage the branch target of PC-relative branches - // should be computed here. However in this simple model all - // computation will take place at execute. Hence doneTargCalc() - // will always be false. while (insts_available > 0) { DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); - // Might create some sort of accessor to get an instruction - // on a per thread basis. Or might be faster to just get - // a pointer to an array or list of instructions and use that - // within this code. + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : fromFetch->insts[numInst]; @@ -287,6 +326,8 @@ SimpleDecode::decode() "squashed, skipping.\n", inst->seqNum, inst->readPC()); + ++decodeSquashedInsts; + ++numInst; --insts_available; @@ -305,16 +346,22 @@ SimpleDecode::decode() if (inst->predTaken() && !inst->isControl()) { panic("Instruction predicted as a branch!"); + ++decodeControlMispred; // Might want to set some sort of boolean and just do // a check at the end squash(inst); break; } - // Ensure that the predicted branch target is the actual branch - // target if possible (branches that are PC relative). - if (inst->isControl() && inst->doneTargCalc()) { + // Go ahead and compute any PC-relative branches. + + if (inst->isDirectCtrl() && inst->isUncondCtrl() && + inst->numDestRegs() == 0 && inst->numSrcRegs() == 0) { + inst->execute(); + inst->setExecuted(); + if (inst->mispredicted()) { + ++decodeBranchMispred; // Might want to set some sort of boolean and just do // a check at the end squash(inst); @@ -322,6 +369,11 @@ SimpleDecode::decode() } } + // Normally can check if a direct branch has the right target + // addr (either the immediate, or the branch PC + 4) and redirect + // fetch if it's incorrect. + + // Also check if instructions have no source registers. Mark // them as ready to issue at any time. Not sure if this check // should exist here or at a later stage; however it doesn't matter @@ -334,6 +386,7 @@ SimpleDecode::decode() // Increment which instruction we're looking at. ++numInst; ++to_rename_index; + ++decodeDecodedInsts; --insts_available; } diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh index e59a9df7f..4cfc2f167 100644 --- a/cpu/beta_cpu/fetch.hh +++ b/cpu/beta_cpu/fetch.hh @@ -14,6 +14,7 @@ #include "sim/eventq.hh" #include "cpu/pc_event.hh" #include "mem/mem_interface.hh" +#include "base/statistics.hh" /** * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch @@ -59,6 +60,8 @@ class SimpleFetch /** SimpleFetch constructor. */ SimpleFetch(Params ¶ms); + void regStats(); + void setCPU(FullCPU *cpu_ptr); void setTimeBuffer(TimeBuffer *time_buffer); @@ -73,9 +76,13 @@ class SimpleFetch // private: // Figure out PC vs next PC and how it should be updated - void squash(Addr newPC); + void squash(const Addr &new_PC); private: + inline void doSquash(const Addr &new_PC); + + void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num); + /** * Looks up in the branch predictor to see if the next PC should be * either next PC+=MachInst or a branch target. @@ -84,7 +91,27 @@ class SimpleFetch * the next PC will be. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(Addr &next_PC); + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + + // Might not want this function... +// inline void recordGlobalHist(DynInstPtr &inst); + + /** + * Fetches the cache line that contains fetch_PC. Returns any + * fault that happened. Puts the data into the class variable + * cacheData. + * @params fetch_PC The PC address that is being fetched from. + * @return Any fault that occured. + */ + Fault fetchCacheLine(Addr fetch_PC); + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = ISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } public: class CacheCompletionEvent : public Event @@ -99,7 +126,7 @@ class SimpleFetch virtual const char *description(); }; - CacheCompletionEvent cacheCompletionEvent; +// CacheCompletionEvent cacheCompletionEvent; private: /** Pointer to the FullCPU. */ @@ -152,20 +179,32 @@ class SimpleFetch unsigned fetchWidth; /** Cache block size. */ - int blkSize; + int cacheBlkSize; /** Mask to get a cache block's address. */ - Addr cacheBlockMask; + Addr cacheBlkMask; /** The instruction being fetched. */ - MachInst inst; +// MachInst inst; + + /** The cache line being fetched. */ + uint8_t *cacheData; /** Size of instructions. */ int instSize; /** Icache stall statistics. */ -// Stats::Scalar<> icacheStallCycles; -// Counter lastIcacheStall; + Counter lastIcacheStall; + + Stats::Scalar<> icacheStallCycles; + Stats::Scalar<> fetchedInsts; + Stats::Scalar<> predictedBranches; + Stats::Scalar<> fetchCycles; + Stats::Scalar<> fetchSquashCycles; + Stats::Scalar<> fetchBlockedCycles; + Stats::Scalar<> fetchedCacheLines; + + Stats::Distribution<> fetch_nisn_dist; }; #endif //__SIMPLE_FETCH_HH__ diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh index 93f7bf6d2..8c9cf9f41 100644 --- a/cpu/beta_cpu/fetch_impl.hh +++ b/cpu/beta_cpu/fetch_impl.hh @@ -1,10 +1,8 @@ -// Todo: Add in branch prediction. With probe path, should -// be able to specify -// size of data to fetch. Will be able to get full cache line. - -// Remove this later. +// Remove this later; used only for debugging. #define OPCODE(X) (X >> 26) & 0x3f + +#include "arch/alpha/byte_swap.hh" #include "cpu/exetrace.hh" #include "mem/base_mem.hh" #include "mem/mem_interface.hh" @@ -37,15 +35,14 @@ SimpleFetch::CacheCompletionEvent::description() template SimpleFetch::SimpleFetch(Params ¶ms) - : cacheCompletionEvent(this), + : //cacheCompletionEvent(this), icacheInterface(params.icacheInterface), branchPred(params), decodeToFetchDelay(params.decodeToFetchDelay), renameToFetchDelay(params.renameToFetchDelay), iewToFetchDelay(params.iewToFetchDelay), commitToFetchDelay(params.commitToFetchDelay), - fetchWidth(params.fetchWidth), - inst(0) + fetchWidth(params.fetchWidth) { // Set status to idle. _status = Idle; @@ -62,13 +59,63 @@ SimpleFetch::SimpleFetch(Params ¶ms) memReq->data = new uint8_t[64]; // Size of cache block. - blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; // Create mask to get rid of offset bits. - cacheBlockMask = (blkSize - 1); + cacheBlkMask = (cacheBlkSize - 1); // Get the size of an instruction. instSize = sizeof(MachInst); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; +} + +template +void +SimpleFetch::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".fetchedInsts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + predictedBranches + .name(name() + ".predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); + fetchCycles + .name(name() + ".fetchCycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + fetchSquashCycles + .name(name() + ".fetchSquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + fetchBlockedCycles + .name(name() + ".fetchBlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + fetchedCacheLines + .name(name() + ".fetchedCacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetch_nisn_dist + .init(/* base value */ 0, + /* last value */ fetchWidth, + /* bucket size */ 1) + .name(name() + ".FETCH:rate_dist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf) + ; + + branchPred.regStats(); } template @@ -122,19 +169,40 @@ SimpleFetch::processCacheCompletion() _status = IcacheMissComplete; } -template -bool -SimpleFetch::lookupAndUpdateNextPC(Addr &next_PC) +#if 0 +template +inline void +SimpleFetch::recordGlobalHist(DynInstPtr &inst) +{ + inst->setGlobalHist(branchPred.BPReadGlobalHist()); +} +#endif + +template +bool +SimpleFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) { -#if 1 // Do branch prediction check here. - bool predict_taken = branchPred.BPLookup(next_PC); - Addr predict_target; + // A bit of a misnomer...next_PC is actually the current PC until + // this function updates it. + bool predict_taken; + + if (!inst->isControl()) { + next_PC = next_PC + instSize; + inst->setPredTarg(next_PC); + return false; + } + + predict_taken = branchPred.predict(inst, next_PC); + +#if 0 + predict_taken = branchPred.BPLookup(next_PC) DPRINTF(Fetch, "Fetch: Branch predictor predicts taken? %i\n", predict_taken); - if (branchPred.BTBValid(next_PC)) { + // Only check the BTB if the BP has predicted taken. + if (predict_taken && branchPred.BTBValid(next_PC)) { predict_target = branchPred.BTBLookup(next_PC); DPRINTF(Fetch, "Fetch: BTB target is %#x.\n", predict_target); } else { @@ -142,42 +210,135 @@ SimpleFetch::lookupAndUpdateNextPC(Addr &next_PC) DPRINTF(Fetch, "Fetch: BTB does not have a valid entry.\n"); } - // Now update the PC to fetch the next instruction in the cache - // line. - if (!predict_taken) { - next_PC = next_PC + instSize; - return false; - } else { - next_PC = predict_target; - return true; +#endif + if (predict_taken) { + ++predictedBranches; } -#endif -#if 0 - next_PC = next_PC + instSize; - return false; -#endif + return predict_taken; } -template -void -SimpleFetch::squash(Addr new_PC) +template +Fault +SimpleFetch::fetchCacheLine(Addr fetch_PC) +{ + // Check if the instruction exists within the cache. + // If it does, then proceed on to read the instruction and the rest + // of the instructions in the cache line until either the end of the + // cache line or a predicted taken branch is encountered. + +#ifdef FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + + Fault fault = No_Fault; + + // Align the fetch PC so it's at the start of a cache block. + fetch_PC = icacheBlockAlignPC(fetch_PC); + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq->cmd = Read; + memReq->reset(fetch_PC, cacheBlkSize, flags); + + // Translate the instruction request. + // Should this function be + // in the CPU class ? Probably...ITB/DTB should exist within the + // CPU. + + fault = cpu->translateInstReq(memReq); + + // In the case of faults, the fetch stage may need to stall and wait + // on what caused the fetch (ITB or Icache miss). + + // If translation was successful, attempt to read the first + // instruction. + if (fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); + fault = cpu->mem->read(memReq, cacheData); + // This read may change when the mem interface changes. + + fetchedCacheLines++; + } + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == No_Fault) { + DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); + memReq->completionEvent = NULL; + + memReq->time = curTick; + + MemAccessResult result = icacheInterface->access(memReq); + + // If the cache missed (in this model functional and timing + // memories are different), then schedule an event to wake + // up this stage once the cache miss completes. + if (result != MA_HIT && icacheInterface->doEvents()) { + memReq->completionEvent = new CacheCompletionEvent(this); +// lastIcacheStall = curTick; + + // How does current model work as far as individual + // stages scheduling/unscheduling? + // Perhaps have only the main CPU scheduled/unscheduled, + // and have it choose what stages to run appropriately. + + DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); + _status = IcacheMissStall; + } + } + + return fault; +} + +template +inline void +SimpleFetch::doSquash(const Addr &new_PC) { DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); cpu->setNextPC(new_PC + instSize); cpu->setPC(new_PC); - _status = Squashing; - // Clear the icache miss if it's outstanding. if (_status == IcacheMissStall && icacheInterface) { + DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n"); // @todo: Use an actual thread number here. icacheInterface->squash(0); } - // Tell the CPU to remove any instructions that aren't currently - // in the ROB (instructions in flight that were killed). + _status = Squashing; + + ++fetchSquashCycles; +} + +template +void +SimpleFetch::squashFromDecode(const Addr &new_PC, + const InstSeqNum &seq_num) +{ + DPRINTF(Fetch, "Fetch: Squashing from decode.\n"); + + doSquash(new_PC); + + // Tell the CPU to remove any instructions that are in flight between + // fetch and decode. + cpu->removeInstsUntil(seq_num); + +} + +template +void +SimpleFetch::squash(const Addr &new_PC) +{ + DPRINTF(Fetch, "Fetch: Squash from commit.\n"); + + doSquash(new_PC); + + // Tell the CPU to remove any instructions that are not in the ROB. cpu->removeInstsNotInROB(); } @@ -185,7 +346,6 @@ template void SimpleFetch::tick() { -#if 1 // Check squash signals from commit. if (fromCommit->commitInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " @@ -196,13 +356,18 @@ SimpleFetch::tick() // Also check if there's a mispredict that happened. if (fromCommit->commitInfo.branchMispredict) { - branchPred.BPUpdate(fromCommit->commitInfo.mispredPC, - fromCommit->commitInfo.branchTaken); - branchPred.BTBUpdate(fromCommit->commitInfo.mispredPC, - fromCommit->commitInfo.nextPC); + branchPred.squash(fromCommit->commitInfo.doneSeqNum, + fromCommit->commitInfo.nextPC, + fromCommit->commitInfo.branchTaken); + } else { + branchPred.squash(fromCommit->commitInfo.doneSeqNum); } return; + } else if (fromCommit->commitInfo.doneSeqNum) { + // Update the branch predictor if it wasn't a squashed instruction + // that was braodcasted. + branchPred.update(fromCommit->commitInfo.doneSeqNum); } // Check ROB squash signals from commit. @@ -211,6 +376,8 @@ SimpleFetch::tick() // Continue to squash. _status = Squashing; + + ++fetchSquashCycles; return; } @@ -220,22 +387,22 @@ SimpleFetch::tick() "from decode.\n"); // Update the branch predictor. - if (fromCommit->decodeInfo.branchMispredict) { - branchPred.BPUpdate(fromDecode->decodeInfo.mispredPC, - fromDecode->decodeInfo.branchTaken); - branchPred.BTBUpdate(fromDecode->decodeInfo.mispredPC, - fromDecode->decodeInfo.nextPC); + if (fromDecode->decodeInfo.branchMispredict) { + branchPred.squash(fromDecode->decodeInfo.doneSeqNum, + fromDecode->decodeInfo.nextPC, + fromDecode->decodeInfo.branchTaken); + } else { + branchPred.squash(fromDecode->decodeInfo.doneSeqNum); } if (_status != Squashing) { // Squash unless we're already squashing? - squash(fromDecode->decodeInfo.nextPC); + squashFromDecode(fromDecode->decodeInfo.nextPC, + fromDecode->decodeInfo.doneSeqNum); return; } } - - // Check if any of the stall signals are high. if (fromDecode->decodeInfo.stall || fromRename->renameInfo.stall || @@ -253,12 +420,15 @@ SimpleFetch::tick() fromCommit->commitInfo.stall); _status = Blocked; + + ++fetchBlockedCycles; return; } else if (_status == Blocked) { // Unblock stage if status is currently blocked and none of the // stall signals are being held high. _status = Running; + ++fetchBlockedCycles; return; } @@ -273,74 +443,15 @@ SimpleFetch::tick() // Switch status to running _status = Running; + + ++fetchSquashCycles; } else if (_status != IcacheMissStall) { DPRINTF(Fetch, "Fetch: Running stage.\n"); - fetch(); - } -#endif - -#if 0 - if (_status != Blocked && - _status != Squashing && - _status != IcacheMissStall) { - DPRINTF(Fetch, "Fetch: Running stage.\n"); + ++fetchCycles; fetch(); - } else if (_status == Blocked) { - // If still being told to stall, do nothing. - if (fromDecode->decodeInfo.stall || - fromRename->renameInfo.stall || - fromIEW->iewInfo.stall || - fromCommit->commitInfo.stall) - { - DPRINTF(Fetch, "Fetch: Stalling stage.\n"); - DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " - "Commit: %i\n", - fromDecode->decodeInfo.stall, - fromRename->renameInfo.stall, - fromIEW->iewInfo.stall, - fromCommit->commitInfo.stall); - } else { - - DPRINTF(Fetch, "Fetch: Done blocking.\n"); - _status = Running; - } - - if (fromCommit->commitInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from commit.\n"); - squash(fromCommit->commitInfo.nextPC); - return; - } else if (fromDecode->decodeInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from decode.\n"); - squash(fromDecode->decodeInfo.nextPC); - return; - } else if (fromCommit->commitInfo.robSquashing) { - DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); - _status = Squashing; - return; - } - } else if (_status == Squashing) { - // If there are no squash signals then change back to running. - // Note that when a squash starts happening, commitInfo.squash will - // be high. But if the squash is still in progress, then only - // commitInfo.robSquashing will be high. - if (!fromCommit->commitInfo.squash && - !fromCommit->commitInfo.robSquashing) { - - DPRINTF(Fetch, "Fetch: Done squashing.\n"); - _status = Running; - } else if (fromCommit->commitInfo.squash) { - // If there's a new squash, then start squashing again. - squash(fromCommit->commitInfo.nextPC); - } else { - // Purely a debugging statement. - DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); - } } -#endif } template @@ -351,13 +462,6 @@ SimpleFetch::fetch() // Start actual fetch ////////////////////////////////////////// -#ifdef FULL_SYSTEM - // Flag to say whether or not address is physical addr. - unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; -#else - unsigned flags = 0; -#endif // FULL_SYSTEM - // The current PC. Addr fetch_PC = cpu->readPC(); @@ -379,64 +483,14 @@ SimpleFetch::fetch() "instruction, starting at PC %08p.\n", fetch_PC); - // Otherwise check if the instruction exists within the cache. - // If it does, then proceed on to read the instruction and the rest - // of the instructions in the cache line until either the end of the - // cache line or a predicted taken branch is encountered. - // Note that this simply checks if the first instruction exists - // within the cache, assuming the rest of the cache line also exists - // within the cache. + fault = fetchCacheLine(fetch_PC); + } - // Setup the memReq to do a read of the first isntruction's address. - // Set the appropriate read size and flags as well. - memReq->cmd = Read; - memReq->reset(fetch_PC, instSize, flags); - - // Translate the instruction request. - // Should this function be - // in the CPU class ? Probably...ITB/DTB should exist within the - // CPU. - - fault = cpu->translateInstReq(memReq); - - // In the case of faults, the fetch stage may need to stall and wait - // on what caused the fetch (ITB or Icache miss). - - // If translation was successful, attempt to read the first - // instruction. - if (fault == No_Fault) { - DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); - fault = cpu->mem->read(memReq, inst); - // This read may change when the mem interface changes. - } - - // Now do the timing access to see whether or not the instruction - // exists within the cache. - if (icacheInterface && fault == No_Fault) { - DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); - memReq->completionEvent = NULL; - - memReq->time = curTick; - - MemAccessResult result = icacheInterface->access(memReq); - - // If the cache missed (in this model functional and timing - // memories are different), then schedule an event to wake - // up this stage once the cache miss completes. - if (result != MA_HIT && icacheInterface->doEvents()) { - memReq->completionEvent = &cacheCompletionEvent; -// lastIcacheStall = curTick; - - // How does current model work as far as individual - // stages scheduling/unscheduling? - // Perhaps have only the main CPU scheduled/unscheduled, - // and have it choose what stages to run appropriately. - - DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); - _status = IcacheMissStall; - return; - } - } + // If we had a stall due to an icache miss, then return. It'd + // be nicer if this were handled through the kind of fault that + // is returned by the function. + if (_status == IcacheMissStall) { + return; } // As far as timing goes, the CPU will need to send an event through @@ -446,11 +500,15 @@ SimpleFetch::fetch() Addr next_PC = fetch_PC; InstSeqNum inst_seq; + MachInst inst; + unsigned offset = fetch_PC & cacheBlkMask; + unsigned fetched; - // If the read of the first instruction was successful, then grab the - // instructions from the rest of the cache line and put them into the - // queue heading to decode. if (fault == No_Fault) { + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. + DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); ////////////////////////// @@ -461,124 +519,59 @@ SimpleFetch::fetch() // ended this fetch block. bool predicted_branch = false; - // Might want to keep track of various stats. -// numLinesFetched++; - - // Get a sequence number. - inst_seq = cpu->getAndIncrementInstSeq(); - - // Update the next PC; it either is PC+sizeof(MachInst), or - // branch_target. Check whether or not a branch was taken. - predicted_branch = lookupAndUpdateNextPC(next_PC); - - // Because the first instruction was already fetched, create the - // DynInst and put it into the queue to decode. - DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, - inst_seq, cpu); - - DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", - inst_seq, instruction->readPC()); - DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", - OPCODE(inst)); - - instruction->traceData = - Trace::getInstRecord(curTick, cpu->xcBase(), cpu, - instruction->staticInst, - instruction->readPC(), 0); - - cpu->addInst(instruction); - - // Write the instruction to the first slot in the queue - // that heads to decode. - toDecode->insts[0] = instruction; - - toDecode->size++; - - fetch_PC = next_PC; - - ////////////////////////// - // Fetch other instructions - ////////////////////////// - - // Obtain the index into the cache line by getting only the low - // order bits. Will need to do shifting as well. - int line_index = fetch_PC & cacheBlockMask; - - // Take instructions and put them into the queue heading to decode. - // Then read the next instruction in the cache line. Continue - // until either all of the fetch bandwidth is used (not an issue for - // non-SMT), or the end of the cache line is reached. Note that - // this assumes standard cachelines, and not something like a trace - // cache where lines might not end at cache-line size aligned - // addresses. - // @todo: Fix the horrible amount of translates/reads that must - // take place due to reading an entire cacheline. Ideally it - // should all take place at once, return an array of binary - // instructions, which can then be used to get all the instructions - // needed. Figure out if I can roll it back into one loop. - for (int fetched = 1; - line_index < blkSize && + for (fetched = 0; + offset < cacheBlkSize && fetched < fetchWidth && !predicted_branch; - line_index+=instSize, ++fetched) + ++fetched) { - // Reset the mem request to setup the read of the next - // instruction. - memReq->reset(fetch_PC, instSize, flags); - - // Translate the instruction request. - fault = cpu->translateInstReq(memReq); - - // Read instruction. - if (fault == No_Fault) { - fault = cpu->mem->read(memReq, inst); - } - - // Check if there was a fault. - if (fault != No_Fault) { - panic("Fetch: Read of instruction faulted when it should " - "succeed; most likely exceeding cache line.\n"); - } // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); - predicted_branch = lookupAndUpdateNextPC(next_PC); + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - instSize); - // Create the actual DynInst. Parameters are: - // DynInst(instruction, PC, predicted PC, CPU pointer). - // Because this simple model has no branch prediction, the - // predicted PC will simply be PC+sizeof(MachInst). - // Update to actually use a branch predictor to predict the - // target in the future. - DynInstPtr instruction = - new DynInst(inst, fetch_PC, next_PC, inst_seq, cpu); + // Get the instruction from the array of the cache line. + inst = htoa(*reinterpret_cast + (&cacheData[offset])); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, + inst_seq, cpu); + + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", + inst_seq, instruction->readPC()); + + DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", + OPCODE(inst)); instruction->traceData = Trace::getInstRecord(curTick, cpu->xcBase(), cpu, instruction->staticInst, instruction->readPC(), 0); - DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", - inst_seq, instruction->readPC()); - DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", - OPCODE(inst)); + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + // Add instruction to the CPU's list of instructions. cpu->addInst(instruction); - // Write the instruction to the proper slot in the queue + // Write the instruction to the first slot in the queue // that heads to decode. toDecode->insts[fetched] = instruction; toDecode->size++; - // Might want to keep track of various stats. -// numInstsFetched++; + // Increment stat of fetched instructions. + ++fetchedInsts; - // Update the PC with the next PC. + // Move to the next instruction, unless we have a branch. fetch_PC = next_PC; + + offset+= instSize; } + fetch_nisn_dist.sample(fetched); } // Now that fetching is completed, update the PC to signify what the next @@ -592,6 +585,12 @@ SimpleFetch::fetch() cpu->setPC(next_PC); cpu->setNextPC(next_PC + instSize); } else { + // If the issue was an icache miss, then we can just return and + // wait until it is handled. + if (_status == IcacheMissStall) { + return; + } + // Handle the fault. // This stage will not be able to continue until all the ROB // slots are empty, at which point the fault can be handled. diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh index 0d2b2c421..e8e75f7ec 100644 --- a/cpu/beta_cpu/free_list.hh +++ b/cpu/beta_cpu/free_list.hh @@ -6,11 +6,9 @@ #include "arch/alpha/isa_traits.hh" #include "cpu/beta_cpu/comm.hh" +#include "base/traceflags.hh" #include "base/trace.hh" -// Question: Do I even need the number of logical registers? -// How to avoid freeing registers instantly? Same with ROB entries. - /** * FreeList class that simply holds the list of free integer and floating * point registers. Can request for a free register of either type, and @@ -153,8 +151,6 @@ SimpleFreeList::addIntReg(PhysRegIndex freed_reg) assert(!freeIntRegsScoreboard[freed_reg]); freeIntRegsScoreboard[freed_reg] = 1; - //Might want to add in a check for whether or not this register is - //already in there. A bit vector or something similar would be useful. freeIntRegs.push(freed_reg); } @@ -167,8 +163,6 @@ SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) assert(!freeFloatRegsScoreboard[freed_reg]); freeFloatRegsScoreboard[freed_reg] = 1; - //Might want to add in a check for whether or not this register is - //already in there. A bit vector or something similar would be useful. freeFloatRegs.push(freed_reg); } diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc index abeb4cb87..d5228601c 100644 --- a/cpu/beta_cpu/full_cpu.cc +++ b/cpu/beta_cpu/full_cpu.cc @@ -166,6 +166,13 @@ FullBetaCPU::~FullBetaCPU() { } +template +void +FullBetaCPU::fullCPURegStats() +{ + // Register any of the FullCPU's stats here. +} + template void FullBetaCPU::tick() @@ -424,19 +431,17 @@ template void FullBetaCPU::removeFrontInst(DynInstPtr &inst) { - DynInstPtr inst_to_delete; + DynInstPtr inst_to_remove; - // The front instruction should be the same one being asked to be deleted. + // The front instruction should be the same one being asked to be removed. assert(instList.front() == inst); // Remove the front instruction. - inst_to_delete = inst; + inst_to_remove = inst; instList.pop_front(); - DPRINTF(FullCPU, "FullCPU: Deleting committed instruction %#x, PC %#x\n", - inst_to_delete, inst_to_delete->readPC()); - -// delete inst_to_delete; + DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n", + inst_to_remove, inst_to_remove->readPC()); } template @@ -451,6 +456,33 @@ FullBetaCPU::removeInstsNotInROB() removeBackInst(rob_tail); } +template +void +FullBetaCPU::removeInstsUntil(const InstSeqNum &seq_num) +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInstPtr inst_to_delete; + + while (instList.back()->seqNum > seq_num) { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.pop_back(); + + // Mark it as squashed. + inst_to_delete->setSquashed(); + } + +} + template void FullBetaCPU::removeAllInsts() diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh index cf753ad67..bde7e5bbf 100644 --- a/cpu/beta_cpu/full_cpu.hh +++ b/cpu/beta_cpu/full_cpu.hh @@ -115,6 +115,8 @@ class FullBetaCPU : public BaseFullCPU void init(); + void fullCPURegStats(); + void activateContext(int thread_num, int delay); void suspendContext(int thread_num); void deallocateContext(int thread_num); @@ -205,6 +207,9 @@ class FullBetaCPU : public BaseFullCPU /** Remove all instructions that are not currently in the ROB. */ void removeInstsNotInROB(); + /** Remove all instructions younger than the given sequence number. */ + void removeInstsUntil(const InstSeqNum &seq_num); + /** Remove all instructions from the list. */ void removeAllInsts(); diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh index de408ef0c..90bd39e7f 100644 --- a/cpu/beta_cpu/iew.hh +++ b/cpu/beta_cpu/iew.hh @@ -9,6 +9,7 @@ #include "base/timebuf.hh" #include "cpu/beta_cpu/comm.hh" +#include "base/statistics.hh" //Can IEW even stall? Space should be available/allocated already...maybe //if there's not enough write ports on the ROB or waiting for CDB @@ -50,7 +51,9 @@ class SimpleIEW public: void squash(); - void squash(DynInstPtr &inst); + void squashDueToBranch(DynInstPtr &inst); + + void squashDueToMem(DynInstPtr &inst); void block(); @@ -59,6 +62,8 @@ class SimpleIEW public: SimpleIEW(Params ¶ms); + void regStats(); + void setCPU(FullCPU *cpu_ptr); void setTimeBuffer(TimeBuffer *tb_ptr); @@ -76,6 +81,10 @@ class SimpleIEW void iew(); private: + void dispatchInsts(); + + void executeInsts(); + //Interfaces to objects inside and outside of IEW. /** Time buffer interface. */ TimeBuffer *timeBuffer; @@ -159,9 +168,23 @@ class SimpleIEW */ unsigned cyclesSquashing; - //Will implement later - //Load queue interface (probably one and the same) - //Store queue interface + Stats::Scalar<> iewIdleCycles; + Stats::Scalar<> iewSquashCycles; + Stats::Scalar<> iewBlockCycles; + Stats::Scalar<> iewUnblockCycles; +// Stats::Scalar<> iewWBInsts; + Stats::Scalar<> iewDispatchedInsts; + Stats::Scalar<> iewDispSquashedInsts; + Stats::Scalar<> iewDispLoadInsts; + Stats::Scalar<> iewDispStoreInsts; + Stats::Scalar<> iewDispNonSpecInsts; + Stats::Scalar<> iewIQFullEvents; + Stats::Scalar<> iewExecutedInsts; + Stats::Scalar<> iewExecLoadInsts; + Stats::Scalar<> iewExecStoreInsts; + Stats::Scalar<> iewExecSquashedInsts; + Stats::Scalar<> memOrderViolationEvents; + Stats::Scalar<> predictedTakenIncorrect; }; #endif diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh index 521ce77f6..2bfd6bae9 100644 --- a/cpu/beta_cpu/iew_impl.hh +++ b/cpu/beta_cpu/iew_impl.hh @@ -38,6 +38,79 @@ SimpleIEW::SimpleIEW(Params ¶ms) instQueue.setIssueToExecuteQueue(&issueToExecQueue); } +template +void +SimpleIEW::regStats() +{ + instQueue.regStats(); + + iewIdleCycles + .name(name() + ".iewIdleCycles") + .desc("Number of cycles IEW is idle"); + + iewSquashCycles + .name(name() + ".iewSquashCycles") + .desc("Number of cycles IEW is squashing"); + + iewBlockCycles + .name(name() + ".iewBlockCycles") + .desc("Number of cycles IEW is blocking"); + + iewUnblockCycles + .name(name() + ".iewUnblockCycles") + .desc("Number of cycles IEW is unblocking"); + +// iewWBInsts; + + iewDispatchedInsts + .name(name() + ".iewDispatchedInsts") + .desc("Number of instructions dispatched to IQ"); + + iewDispSquashedInsts + .name(name() + ".iewDispSquashedInsts") + .desc("Number of squashed instructions skipped by dispatch"); + + iewDispLoadInsts + .name(name() + ".iewDispLoadInsts") + .desc("Number of dispatched load instructions"); + + iewDispStoreInsts + .name(name() + ".iewDispStoreInsts") + .desc("Number of dispatched store instructions"); + + iewDispNonSpecInsts + .name(name() + ".iewDispNonSpecInsts") + .desc("Number of dispatched non-speculative instructions"); + + iewIQFullEvents + .name(name() + ".iewIQFullEvents") + .desc("Number of times the IQ has become full, causing a stall"); + + iewExecutedInsts + .name(name() + ".iewExecutedInsts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .name(name() + ".iewExecLoadInsts") + .desc("Number of load instructions executed"); + + iewExecStoreInsts + .name(name() + ".iewExecStoreInsts") + .desc("Number of store instructions executed"); + + iewExecSquashedInsts + .name(name() + ".iewExecSquashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + memOrderViolationEvents + .name(name() + ".memOrderViolationEvents") + .desc("Number of memory order violations"); + + predictedTakenIncorrect + .name(name() + ".predictedTakenIncorrect") + .desc("Number of branches that were predicted taken incorrectly"); +} + template void SimpleIEW::setCPU(FullCPU *cpu_ptr) @@ -158,7 +231,7 @@ SimpleIEW::squash() template void -SimpleIEW::squash(DynInstPtr &inst) +SimpleIEW::squashDueToBranch(DynInstPtr &inst) { DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", inst->PC); @@ -167,14 +240,282 @@ SimpleIEW::squash(DynInstPtr &inst) _status = Squashing; // Tell rename to squash through the time buffer. - toRename->iewInfo.squash = true; + toCommit->squash = true; // Also send PC update information back to prior stages. - toRename->iewInfo.squashedSeqNum = inst->seqNum; - toRename->iewInfo.mispredPC = inst->readPC(); - toRename->iewInfo.nextPC = inst->readCalcTarg(); - toRename->iewInfo.branchMispredict = true; + toCommit->squashedSeqNum = inst->seqNum; + toCommit->mispredPC = inst->readPC(); + toCommit->nextPC = inst->readCalcTarg(); + toCommit->branchMispredict = true; // Prediction was incorrect, so send back inverse. - toRename->iewInfo.branchTaken = !(inst->predTaken()); + toCommit->branchTaken = inst->readCalcTarg() != + (inst->readPC() + sizeof(MachInst)); +// toCommit->globalHist = inst->readGlobalHist(); +} + +template +void +SimpleIEW::squashDueToMem(DynInstPtr &inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toCommit->squash = true; + // Also send PC update information back to prior stages. + toCommit->squashedSeqNum = inst->seqNum; + toCommit->nextPC = inst->readCalcTarg(); +} + +template +void +SimpleIEW::dispatchInsts() +{ + //////////////////////////////////////// + // DISPATCH/ISSUE stage + //////////////////////////////////////// + + //Put into its own function? + //Add instructions to IQ if there are any instructions there + + // Check if there are any instructions coming from rename, and we're. + // not squashing. + if (fromRename->size > 0) { + int insts_to_add = fromRename->size; + + // Loop through the instructions, putting them in the instruction + // queue. + for (int inst_num = 0; inst_num < insts_to_add; ++inst_num) + { + DynInstPtr inst = fromRename->insts[inst_num]; + + // Make sure there's a valid instruction there. + assert(inst); + + DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", + inst->readPC()); + + // Be sure to mark these instructions as ready so that the + // commit stage can go ahead and execute them, and mark + // them as issued so the IQ doesn't reprocess them. + if (inst->isSquashed()) { + ++iewDispSquashedInsts; + continue; + } else if (instQueue.isFull()) { + DPRINTF(IEW, "IEW: Issue: IQ has become full.\n"); + // Call function to start blocking. + block(); + // Tell previous stage to stall. + toRename->iewInfo.stall = true; + + ++iewIQFullEvents; + break; + } else if (inst->isLoad()) { + DPRINTF(IEW, "IEW: Issue: Memory instruction " + "encountered, adding to LDSTQ.\n"); + + // Reserve a spot in the load store queue for this + // memory access. + ldstQueue.insertLoad(inst); + + ++iewDispLoadInsts; + } else if (inst->isStore()) { + ldstQueue.insertStore(inst); + + // A bit of a hack. Set that it can commit so that + // the commit stage will try committing it, and then + // once commit realizes it's a store it will send back + // a signal to this stage to issue and execute that + // store. Change to be a bit that says the instruction + // has extra work to do at commit. + inst->setCanCommit(); + + instQueue.insertNonSpec(inst); + + ++iewDispStoreInsts; + ++iewDispNonSpecInsts; + + continue; + } else if (inst->isNonSpeculative()) { + DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " + "encountered, skipping.\n"); + + // Same hack as with stores. + inst->setCanCommit(); + + // Specificall insert it as nonspeculative. + instQueue.insertNonSpec(inst); + + ++iewDispNonSpecInsts; + + continue; + } else if (inst->isNop()) { + DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " + ", skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + + continue; + } else if (inst->isExecuted()) { + DPRINTF(IEW, "IEW: Issue: Executed branch encountered, " + "skipping.\n"); + + assert(inst->isDirectCtrl()); + + inst->setIssued(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + + continue; + } + + // If the instruction queue is not full, then add the + // instruction. + instQueue.insert(fromRename->insts[inst_num]); + + ++iewDispatchedInsts; + } + } +} + +template +void +SimpleIEW::executeInsts() +{ + //////////////////////////////////////// + //EXECUTE/WRITEBACK stage + //////////////////////////////////////// + + //Put into its own function? + //Similarly should probably have separate execution for int vs FP. + // Above comment is handled by the issue queue only issuing a valid + // mix of int/fp instructions. + //Actually okay to just have one execution, buuuuuut will need + //somewhere that defines the execution latency of all instructions. + // @todo: Move to the FU pool used in the current full cpu. + + int fu_usage = 0; + bool fetch_redirect = false; + + // Execute/writeback any instructions that are available. + for (int inst_num = 0; + fu_usage < executeWidth && /* Haven't exceeded available FU's. */ + inst_num < issueWidth && + fromIssue->insts[inst_num]; + ++inst_num) { + + DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); + + // Get instruction from issue's queue. + DynInstPtr inst = fromIssue->insts[inst_num]; + + DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + toCommit->insts[inst_num] = inst; + + ++iewExecSquashedInsts; + + continue; + } + + inst->setExecuted(); + + // If an instruction is executed, then count it towards FU usage. + ++fu_usage; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef()) { + DPRINTF(IEW, "IEW: Execute: Calculating address for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + ldstQueue.executeLoad(inst); + + ++iewExecLoadInsts; + } else if (inst->isStore()) { + ldstQueue.executeStore(); + + ++iewExecStoreInsts; + } else { + panic("IEW: Unexpected memory type!\n"); + } + + } else { + inst->execute(); + + ++iewExecutedInsts; + } + + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + + // Add finished instruction to queue to commit. + toCommit->insts[inst_num] = inst; + + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + if (!fetch_redirect) { + if (inst->mispredicted()) { + fetch_redirect = true; + + DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squashDueToBranch(inst); + + if (inst->predTaken()) { + predictedTakenIncorrect++; + } + } else if (ldstQueue.violation()) { + fetch_redirect = true; + + // Get the DynInst that caused the violation. + DynInstPtr violator = ldstQueue.getMemDepViolator(); + + DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Tell the instruction queue that a violation has occured. + instQueue.violation(inst, violator); + + // Squash. + squashDueToMem(inst); + + ++memOrderViolationEvents; + } + } + } } template @@ -198,6 +539,8 @@ SimpleIEW::tick() // to running. if (_status == Unblocking) { unblock(); + + ++iewUnblockCycles; } } else if (_status == Squashing) { @@ -216,6 +559,8 @@ SimpleIEW::tick() instQueue.doSquash(); } + ++iewSquashCycles; + // Also should advance its own time buffers if the stage ran. // Not sure about this... // issueToExecQueue.advance(); @@ -232,7 +577,7 @@ SimpleIEW::tick() // If there's still instructions coming from rename, continue to // put them on the skid buffer. - if (fromRename->insts[0]) { + if (fromRename->size == 0) { block(); } @@ -240,6 +585,8 @@ SimpleIEW::tick() fromCommit->commitInfo.robSquashing) { squash(); } + + ++iewBlockCycles; } // @todo: Maybe put these at the beginning, so if it's idle it can @@ -280,209 +627,12 @@ SimpleIEW::iew() return; } - //////////////////////////////////////// - // DISPATCH/ISSUE stage - //////////////////////////////////////// - - //Put into its own function? - //Add instructions to IQ if there are any instructions there - - // Check if there are any instructions coming from rename, and we're. - // not squashing. - if (fromRename->insts[0] && _status != Squashing) { - - // Loop through the instructions, putting them in the instruction - // queue. - for (int inst_num = 0; inst_num < issueReadWidth; ++inst_num) - { - DynInstPtr inst = fromRename->insts[inst_num]; - - // Make sure there's a valid instruction there. - if (!inst) - break; - - DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", - inst->readPC()); - - // If it's a memory reference, don't put it in the - // instruction queue. These will only be executed at commit. - // Do the same for nonspeculative instructions and nops. - // Be sure to mark these instructions as ready so that the - // commit stage can go ahead and execute them, and mark - // them as issued so the IQ doesn't reprocess them. - if (inst->isSquashed()) { - continue; - } else if (inst->isLoad()) { - DPRINTF(IEW, "IEW: Issue: Memory instruction " - "encountered, adding to LDSTQ.\n"); - - // Reserve a spot in the load store queue for this - // memory access. - ldstQueue.insertLoad(inst); - - } else if (inst->isStore()) { - ldstQueue.insertStore(inst); - - // A bit of a hack. Set that it can commit so that - // the commit stage will try committing it, and then - // once commit realizes it's a store it will send back - // a signal to this stage to issue and execute that - // store. - inst->setCanCommit(); - - instQueue.insertNonSpec(inst); - continue; - } else if (inst->isNonSpeculative()) { - DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " - "encountered, skipping.\n"); - - // Same hack as with stores. - inst->setCanCommit(); - - // Specificall insert it as nonspeculative. - instQueue.insertNonSpec(inst); - - continue; - } else if (inst->isNop()) { - DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " - ", skipping.\n"); - - inst->setIssued(); - inst->setExecuted(); - inst->setCanCommit(); - - instQueue.advanceTail(inst); - continue; - } else if (instQueue.isFull()) { - DPRINTF(IEW, "IEW: Issue: IQ has become full.\n"); - // Call function to start blocking. - block(); - // Tell previous stage to stall. - toRename->iewInfo.stall = true; - break; - } - - // If the instruction queue is not full, then add the - // instruction. - instQueue.insert(fromRename->insts[inst_num]); - } - } + dispatchInsts(); // Have the instruction queue try to schedule any ready instructions. instQueue.scheduleReadyInsts(); - //////////////////////////////////////// - //EXECUTE/WRITEBACK stage - //////////////////////////////////////// - - //Put into its own function? - //Similarly should probably have separate execution for int vs FP. - // Above comment is handled by the issue queue only issuing a valid - // mix of int/fp instructions. - //Actually okay to just have one execution, buuuuuut will need - //somewhere that defines the execution latency of all instructions. - // @todo: Move to the FU pool used in the current full cpu. - - int fu_usage = 0; - bool fetch_redirect = false; - - // Execute/writeback any instructions that are available. - for (int inst_num = 0; - fu_usage < executeWidth && /* Haven't exceeded available FU's. */ - inst_num < issueWidth && /* Haven't exceeded issue width. */ - fromIssue->insts[inst_num]; /* There are available instructions. */ - ++inst_num) { - DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); - - // Get instruction from issue's queue. - DynInstPtr inst = fromIssue->insts[inst_num]; - - DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); - - // Check if the instruction is squashed; if so then skip it - // and don't count it towards the FU usage. - if (inst->isSquashed()) { - DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); - - // Consider this instruction executed so that commit can go - // ahead and retire the instruction. - inst->setExecuted(); - - toCommit->insts[inst_num] = inst; - - continue; - } - - inst->setExecuted(); - - // If an instruction is executed, then count it towards FU usage. - ++fu_usage; - - // Execute instruction. - // Note that if the instruction faults, it will be handled - // at the commit stage. - if (inst->isMemRef()) { - DPRINTF(IEW, "IEW: Execute: Calculating address for memory " - "reference.\n"); - - // Tell the LDSTQ to execute this instruction (if it is a load). - if (inst->isLoad()) { - ldstQueue.executeLoad(inst); - } else if (inst->isStore()) { - ldstQueue.executeStore(); - } else { - panic("IEW: Unexpected memory type!\n"); - } - - } else { - inst->execute(); - } - - // First check the time slot that this instruction will write - // to. If there are free write ports at the time, then go ahead - // and write the instruction to that time. If there are not, - // keep looking back to see where's the first time there's a - // free slot. What happens if you run out of free spaces? - // For now naively assume that all instructions take one cycle. - // Otherwise would have to look into the time buffer based on the - // latency of the instruction. - - // Add finished instruction to queue to commit. - toCommit->insts[inst_num] = inst; - - // Check if branch was correct. This check happens after the - // instruction is added to the queue because even if the branch - // is mispredicted, the branch instruction itself is still valid. - // Only handle this if there hasn't already been something that - // redirects fetch in this group of instructions. - if (!fetch_redirect) { - if (inst->mispredicted()) { - fetch_redirect = true; - - DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); - DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", - inst->nextPC); - - // If incorrect, then signal the ROB that it must be squashed. - squash(inst); - } else if (ldstQueue.violation()) { - fetch_redirect = true; - - DynInstPtr violator = ldstQueue.getMemDepViolator(); - - DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: " - "%#x, inst PC: %#x. Addr is: %#x.\n", - violator->readPC(), inst->readPC(), inst->physEffAddr); - - instQueue.violation(inst, violator); - - squash(inst); - // Otherwise check if there was a memory ordering violation. - // If there was, then signal ROB that it must be squashed. Also - // signal IQ that there was a violation. - } - } - } + executeInsts(); // Loop through the head of the time buffer and wake any dependents. // These instructions are about to write back. In the simple model @@ -491,7 +641,7 @@ SimpleIEW::iew() // Also mark scoreboard that this instruction is finally complete. // Either have IEW have direct access to rename map, or have this as // part of backwards communication. - for (int inst_num = 0; inst_num < executeWidth && + for (int inst_num = 0; inst_num < issueWidth && toCommit->insts[inst_num]; inst_num++) { DynInstPtr inst = toCommit->insts[inst_num]; diff --git a/cpu/beta_cpu/inst_queue.cc b/cpu/beta_cpu/inst_queue.cc index 43b0a4572..c4fd077bc 100644 --- a/cpu/beta_cpu/inst_queue.cc +++ b/cpu/beta_cpu/inst_queue.cc @@ -5,3 +5,6 @@ // Force instantiation of InstructionQueue. template InstructionQueue; + +unsigned +InstructionQueue::DependencyEntry::mem_alloc_counter = 0; diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh index a170979cb..6fcce70a4 100644 --- a/cpu/beta_cpu/inst_queue.hh +++ b/cpu/beta_cpu/inst_queue.hh @@ -7,14 +7,10 @@ #include #include +#include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/inst_seq.hh" -//Perhaps have a better separation between the data structure underlying -//and the actual algorithm. -//somewhat nasty to try to have a nice ordering. -// Consider moving to STL list or slist for the LL stuff. - /** * A standard instruction queue class. It holds instructions in an * array, holds the ordering of the instructions within a linked list, @@ -74,6 +70,8 @@ class InstructionQueue InstructionQueue(Params ¶ms); + void regStats(); + void setCPU(FullCPU *cpu); void setIssueToExecuteQueue(TimeBuffer *i2eQueue); @@ -98,6 +96,7 @@ class InstructionQueue void violation(DynInstPtr &store, DynInstPtr &faulting_load); + // Change this to take in the sequence number void squash(); void doSquash(); @@ -159,7 +158,7 @@ class InstructionQueue ReadyInstQueue readyBranchInsts; /** List of ready memory instructions. */ - ReadyInstQueue readyMemInsts; +// ReadyInstQueue readyMemInsts; /** List of ready miscellaneous instructions. */ ReadyInstQueue readyMiscInsts; @@ -228,9 +227,6 @@ class InstructionQueue /** The sequence number of the squashed instruction. */ InstSeqNum squashedSeqNum; - /** Iterator that points to the oldest instruction in the IQ. */ -// ListIt head; - /** Iterator that points to the youngest instruction in the IQ. */ ListIt tail; @@ -261,6 +257,9 @@ class InstructionQueue void insert(DynInstPtr &new_inst); void remove(DynInstPtr &inst_to_remove); + + // Debug variable, remove when done testing. + static unsigned mem_alloc_counter; }; /** Array of linked lists. Each linked list is a list of all the @@ -285,6 +284,25 @@ class InstructionQueue void dumpDependGraph(); void addIfReady(DynInstPtr &inst); + + Stats::Scalar<> iqInstsAdded; + Stats::Scalar<> iqNonSpecInstsAdded; +// Stats::Scalar<> iqIntInstsAdded; + Stats::Scalar<> iqIntInstsIssued; +// Stats::Scalar<> iqFloatInstsAdded; + Stats::Scalar<> iqFloatInstsIssued; +// Stats::Scalar<> iqBranchInstsAdded; + Stats::Scalar<> iqBranchInstsIssued; +// Stats::Scalar<> iqMemInstsAdded; + Stats::Scalar<> iqMemInstsIssued; +// Stats::Scalar<> iqMiscInstsAdded; + Stats::Scalar<> iqMiscInstsIssued; + Stats::Scalar<> iqSquashedInstsIssued; + Stats::Scalar<> iqLoopSquashStalls; + Stats::Scalar<> iqSquashedInstsExamined; + Stats::Scalar<> iqSquashedOperandsExamined; + Stats::Scalar<> iqSquashedNonSpecRemoved; + }; #endif //__INST_QUEUE_HH__ diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh index 03e3fed33..c688181ed 100644 --- a/cpu/beta_cpu/inst_queue_impl.hh +++ b/cpu/beta_cpu/inst_queue_impl.hh @@ -24,15 +24,13 @@ InstructionQueue::InstructionQueue(Params ¶ms) numEntries(params.numIQEntries), intWidth(params.executeIntWidth), floatWidth(params.executeFloatWidth), + branchWidth(params.executeBranchWidth), + memoryWidth(params.executeMemoryWidth), totalWidth(params.issueWidth), numPhysIntRegs(params.numPhysIntRegs), numPhysFloatRegs(params.numPhysFloatRegs), commitToIEWDelay(params.commitToIEWDelay) { - // HACK: HARDCODED NUMBER. REMOVE LATER AND ADD TO PARAMETER. - branchWidth = 1; - memoryWidth = 1; - DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth); // Initialize the number of free IQ entries. @@ -66,6 +64,87 @@ InstructionQueue::InstructionQueue(Params ¶ms) } +template +void +InstructionQueue::regStats() +{ + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ (excludes non-spec)") + .prereq(iqInstsAdded); + + iqNonSpecInstsAdded + .name(name() + ".iqNonSpecInstsAdded") + .desc("Number of non-speculative instructions added to the IQ") + .prereq(iqNonSpecInstsAdded); + +// iqIntInstsAdded; + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + +// iqFloatInstsAdded; + + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + +// iqBranchInstsAdded; + + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + +// iqMemInstsAdded; + + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + +// iqMiscInstsAdded; + + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqSquashedInstsIssued + .name(name() + ".iqSquashedInstsIssued") + .desc("Number of squashed instructions issued") + .prereq(iqSquashedInstsIssued); + + iqLoopSquashStalls + .name(name() + ".iqLoopSquashStalls") + .desc("Number of times issue loop had to restart due to squashed " + "inst; mainly for profiling") + .prereq(iqLoopSquashStalls); + + iqSquashedInstsExamined + .name(name() + ".iqSquashedInstsExamined") + .desc("Number of squashed instructions iterated over during squash;" + " mainly for profiling") + .prereq(iqSquashedInstsExamined); + + iqSquashedOperandsExamined + .name(name() + ".iqSquashedOperandsExamined") + .desc("Number of squashed operands that are examined and possibly " + "removed from graph") + .prereq(iqSquashedOperandsExamined); + + iqSquashedNonSpecRemoved + .name(name() + ".iqSquashedNonSpecRemoved") + .desc("Number of squashed non-spec instructions that were removed") + .prereq(iqSquashedNonSpecRemoved); + + // Tell mem dependence unit to reg stats as well. + memDepUnit.regStats(); +} + template void InstructionQueue::setCPU(FullCPU *cpu_ptr) @@ -161,10 +240,14 @@ InstructionQueue::insert(DynInstPtr &new_inst) // unit. if (new_inst->isMemRef()) { memDepUnit.insert(new_inst); + // Uh..forgot to look it up and put it on the proper dependency list + // if the instruction should not go yet. + } else { + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); } - // If the instruction is ready then add it to the ready list. - addIfReady(new_inst); + ++iqInstsAdded; assert(freeEntries == (numEntries - countInsts())); } @@ -219,13 +302,16 @@ InstructionQueue::insertNonSpec(DynInstPtr &inst) // If it's a memory instruction, add it to the memory dependency // unit. if (inst->isMemRef()) { - memDepUnit.insert(inst); + memDepUnit.insertNonSpec(inst); } + + ++iqNonSpecInstsAdded; } // Slightly hack function to advance the tail iterator in the case that // the IEW stage issues an instruction that is not added to the IQ. This // is needed in case a long chain of such instructions occurs. +// I don't think this is used anymore. template void InstructionQueue::advanceTail(DynInstPtr &inst) @@ -288,7 +374,7 @@ InstructionQueue::scheduleReadyInsts() bool insts_available = !readyBranchInsts.empty() || !readyIntInsts.empty() || !readyFloatInsts.empty() || - !readyMemInsts.empty() || + !memDepUnit.empty() || !readyMiscInsts.empty() || !squashedInsts.empty(); @@ -327,6 +413,9 @@ InstructionQueue::scheduleReadyInsts() if (int_head_inst->isSquashed()) { readyIntInsts.pop(); + + ++iqLoopSquashStalls; + continue; } @@ -344,6 +433,9 @@ InstructionQueue::scheduleReadyInsts() if (float_head_inst->isSquashed()) { readyFloatInsts.pop(); + + ++iqLoopSquashStalls; + continue; } else if (float_head_inst->seqNum < oldest_inst) { oldest_inst = float_head_inst->seqNum; @@ -361,6 +453,9 @@ InstructionQueue::scheduleReadyInsts() if (branch_head_inst->isSquashed()) { readyBranchInsts.pop(); + + ++iqLoopSquashStalls; + continue; } else if (branch_head_inst->seqNum < oldest_inst) { oldest_inst = branch_head_inst->seqNum; @@ -370,15 +465,18 @@ InstructionQueue::scheduleReadyInsts() } - if (!readyMemInsts.empty() && + if (!memDepUnit.empty() && memory_issued < memoryWidth) { insts_available = true; - mem_head_inst = readyMemInsts.top(); + mem_head_inst = memDepUnit.top(); if (mem_head_inst->isSquashed()) { - readyMemInsts.pop(); + memDepUnit.pop(); + + ++iqLoopSquashStalls; + continue; } else if (mem_head_inst->seqNum < oldest_inst) { oldest_inst = mem_head_inst->seqNum; @@ -395,6 +493,9 @@ InstructionQueue::scheduleReadyInsts() if (misc_head_inst->isSquashed()) { readyMiscInsts.pop(); + + ++iqLoopSquashStalls; + continue; } else if (misc_head_inst->seqNum < oldest_inst) { oldest_inst = misc_head_inst->seqNum; @@ -450,9 +551,7 @@ InstructionQueue::scheduleReadyInsts() case Memory: issuing_inst = mem_head_inst; - memDepUnit.issue(mem_head_inst); - - readyMemInsts.pop(); + memDepUnit.pop(); ++memory_issued; DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n", issuing_inst->readPC()); @@ -461,6 +560,9 @@ InstructionQueue::scheduleReadyInsts() case Misc: issuing_inst = misc_head_inst; readyMiscInsts.pop(); + + ++iqMiscInstsIssued; + DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n", issuing_inst->readPC()); break; @@ -476,6 +578,7 @@ InstructionQueue::scheduleReadyInsts() if (list_with_oldest != None) { i2e_info->insts[total_issued] = issuing_inst; + i2e_info->size++; issuing_inst->setIssued(); @@ -485,12 +588,21 @@ InstructionQueue::scheduleReadyInsts() assert(freeEntries == (numEntries - countInsts())); } + + iqIntInstsIssued += int_issued; + iqFloatInstsIssued += float_issued; + iqBranchInstsIssued += branch_issued; + iqMemInstsIssued += memory_issued; + iqSquashedInstsIssued += squashed_issued; } template void InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) { + DPRINTF(IQ, "IQ: Marking nonspeculative instruction with sequence " + "number %i as ready to execute.\n", inst); + non_spec_it_t inst_it = nonSpecInsts.find(inst); assert(inst_it != nonSpecInsts.end()); @@ -499,7 +611,11 @@ InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) (*inst_it).second->setCanIssue(); // Now schedule the instruction. - addIfReady((*inst_it).second); + if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); + } else { + memDepUnit.nonSpecInstReady((*inst_it).second); + } nonSpecInsts.erase(inst_it); } @@ -552,6 +668,7 @@ InstructionQueue::doSquash() // hasn't already been squashed in the IQ. if (!squashed_inst->isIssued() && !squashed_inst->isSquashedInIQ()) { + // Remove the instruction from the dependency list. // Hack for now: These below don't add themselves to the // dependency list, so don't try to remove them. @@ -576,7 +693,15 @@ InstructionQueue::doSquash() src_reg < numPhysRegs) { dependGraph[src_reg].remove(squashed_inst); } + + ++iqSquashedOperandsExamined; } + + // Might want to remove producers as well. + } else { + nonSpecInsts.erase(squashed_inst->seqNum); + + ++iqSquashedNonSpecRemoved; } // Might want to also clear out the head of the dependency graph. @@ -590,11 +715,8 @@ InstructionQueue::doSquash() squashed_inst->readPC()); } - if (squashed_inst->isNonSpeculative() || squashed_inst->isStore()) { - nonSpecInsts.erase(squashed_inst->seqNum); - } - --squashIt; + ++iqSquashedInstsExamined; } } @@ -665,6 +787,8 @@ InstructionQueue::wakeDependents(DynInstPtr &completed_inst) dependGraph[dest_reg].next = curr->next; + DependencyEntry::mem_alloc_counter--; + delete curr; } @@ -749,13 +873,9 @@ InstructionQueue::createDependency(DynInstPtr &new_inst) } dependGraph[dest_reg].inst = new_inst; -#if 0 - if (dependGraph[dest_reg].next) { - panic("Dependency chain of dest reg %i is not empty.\n", - dest_reg); - } -#endif + assert(!dependGraph[dest_reg].next); + // Mark the scoreboard to say it's not yet ready. regScoreboard[dest_reg] = false; } @@ -776,6 +896,8 @@ InstructionQueue::DependencyEntry::insert(DynInstPtr &new_inst) // Then actually add it to the chain. this->next = new_entry; + + ++mem_alloc_counter; } template @@ -805,6 +927,8 @@ InstructionQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) // Now remove this instruction from the list. prev->next = curr->next; + --mem_alloc_counter; + delete curr; } @@ -855,12 +979,26 @@ InstructionQueue::addIfReady(DynInstPtr &inst) DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n"); + // Message to the mem dependence unit that this instruction has + // its registers ready. + + memDepUnit.regsReady(inst); + +#if 0 if (memDepUnit.readyToIssue(inst)) { DPRINTF(IQ, "IQ: Memory instruction is ready to issue, " "putting it onto the ready list, PC %#x.\n", inst->readPC()); readyMemInsts.push(inst); + } else { + // Make dependent on the store. + // Will need some way to get the store instruction it should + // be dependent upon; then when the store issues it can + // put the instruction on the ready list. + // Yet another tree? + assert(0 && "Instruction has no way to actually issue"); } +#endif } else if (inst->isInteger()) { @@ -923,7 +1061,7 @@ InstructionQueue::dumpLists() cprintf("Ready branch list size: %i\n", readyBranchInsts.size()); - cprintf("Ready memory list size: %i\n", readyMemInsts.size()); +// cprintf("Ready memory list size: %i\n", readyMemInsts.size()); cprintf("Ready misc list size: %i\n", readyMiscInsts.size()); diff --git a/cpu/beta_cpu/mem_dep_unit.hh b/cpu/beta_cpu/mem_dep_unit.hh index 4821c63b7..e43543e09 100644 --- a/cpu/beta_cpu/mem_dep_unit.hh +++ b/cpu/beta_cpu/mem_dep_unit.hh @@ -6,6 +6,7 @@ #include #include "cpu/inst_seq.hh" +#include "base/statistics.hh" /** * Memory dependency unit class. This holds the memory dependence predictor. @@ -24,17 +25,18 @@ class MemDepUnit { typedef typename Impl::Params Params; typedef typename Impl::DynInstPtr DynInstPtr; - public: - typedef typename std::set::iterator sn_it_t; - typedef typename std::map >::iterator - dep_it_t; - public: MemDepUnit(Params ¶ms); + void regStats(); + void insert(DynInstPtr &inst); - bool readyToIssue(DynInstPtr &inst); + void insertNonSpec(DynInstPtr &inst); + + void regsReady(DynInstPtr &inst); + + void nonSpecInstReady(DynInstPtr &inst); void issue(DynInstPtr &inst); @@ -44,19 +46,83 @@ class MemDepUnit { void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); + // Will want to make this operation relatively fast. Right now it + // kind of sucks. + DynInstPtr &top(); + + void pop(); + + inline bool empty() + { return readyInsts.empty(); } + + private: + typedef typename std::set::iterator sn_it_t; + typedef typename std::map::iterator dyn_it_t; + + // Forward declarations so that the following two typedefs work. + class Dependency; + class ltDependency; + + typedef typename std::set::iterator dep_it_t; + typedef typename std::map >::iterator + sd_it_t; + + struct Dependency { + Dependency(const InstSeqNum &_seqNum) + : seqNum(_seqNum), regsReady(0), memDepReady(0) + { } + + Dependency(const InstSeqNum &_seqNum, bool _regsReady, + bool _memDepReady) + : seqNum(_seqNum), regsReady(_regsReady), + memDepReady(_memDepReady) + { } + + InstSeqNum seqNum; + mutable bool regsReady; + mutable bool memDepReady; + mutable sd_it_t storeDep; + }; + + struct ltDependency { + bool operator() (const Dependency &lhs, const Dependency &rhs) + { + return lhs.seqNum < rhs.seqNum; + } + }; + + + private: + inline void moveToReady(dep_it_t &woken_inst); + private: /** List of instructions that have passed through rename, yet are still - * waiting on a memory dependence to resolve before they can issue. + * waiting on either a memory dependence to resolve or source registers to + * become available before they can issue. */ - std::set renamedInsts; + std::set waitingInsts; /** List of instructions that have all their predicted memory dependences - * resolved. They are ready in terms of being free of memory - * dependences; however they may still have to wait on source registers. + * resolved and their source registers ready. */ std::set readyInsts; - std::map > dependencies; + // Change this to hold a vector of iterators, which will point to the + // entry of the waiting instructions. + /** List of stores' sequence numbers, each of which has a vector of + * iterators. The iterators point to the appropriate node within + * waitingInsts that has the depenendent instruction. + */ + std::map > storeDependents; + + // For now will implement this as a map...hash table might not be too + // bad, or could move to something that mimics the current dependency + // graph. + std::map memInsts; + + // Iterator pointer to the top instruction which has is ready. + // Is set by the top() call. + dyn_it_t topInst; /** The memory dependence predictor. It is accessed upon new * instructions being added to the IQ, and responds by telling @@ -65,6 +131,10 @@ class MemDepUnit { */ MemDepPred depPred; + Stats::Scalar<> insertedLoads; + Stats::Scalar<> insertedStores; + Stats::Scalar<> conflictingLoads; + Stats::Scalar<> conflictingStores; }; #endif diff --git a/cpu/beta_cpu/mem_dep_unit_impl.hh b/cpu/beta_cpu/mem_dep_unit_impl.hh index 4299acb7a..4161ac2a8 100644 --- a/cpu/beta_cpu/mem_dep_unit_impl.hh +++ b/cpu/beta_cpu/mem_dep_unit_impl.hh @@ -3,60 +3,236 @@ #include "cpu/beta_cpu/mem_dep_unit.hh" -// Hack: dependence predictor sizes are hardcoded. template MemDepUnit::MemDepUnit(Params ¶ms) - : depPred(4028, 128) + : depPred(params.SSITSize, params.LFSTSize) { DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n"); } +template +void +MemDepUnit::regStats() +{ + insertedLoads + .name(name() + ".memDep.insertedLoads") + .desc("Number of loads inserted to the mem dependence unit."); + + insertedStores + .name(name() + ".memDep.insertedStores") + .desc("Number of stores inserted to the mem dependence unit."); + + conflictingLoads + .name(name() + ".memDep.conflictingLoads") + .desc("Number of conflicting loads."); + + conflictingStores + .name(name() + ".memDep.conflictingStores") + .desc("Number of conflicting stores."); +} + template void MemDepUnit::insert(DynInstPtr &inst) { InstSeqNum inst_seq_num = inst->seqNum; + Dependency unresolved_dependencies(inst_seq_num); InstSeqNum producing_store = depPred.checkInst(inst->readPC()); if (producing_store == 0 || - dependencies.find(producing_store) == dependencies.end()) { - readyInsts.insert(inst_seq_num); + storeDependents.find(producing_store) == storeDependents.end()) { + + DPRINTF(MemDepUnit, "MemDepUnit: No dependency for inst PC " + "%#x.\n", inst->readPC()); + + unresolved_dependencies.storeDep = storeDependents.end(); + + if (inst->readyToIssue()) { + readyInsts.insert(inst_seq_num); + } else { + unresolved_dependencies.memDepReady = true; + + waitingInsts.insert(unresolved_dependencies); + } } else { + DPRINTF(MemDepUnit, "MemDepUnit: Adding to dependency list; " + "inst PC %#x is dependent on seq num %i.\n", + inst->readPC(), producing_store); + + if (inst->readyToIssue()) { + unresolved_dependencies.regsReady = true; + } + + // Find the store that this instruction is dependent on. + sd_it_t store_loc = storeDependents.find(producing_store); + + assert(store_loc != storeDependents.end()); + + // Record the location of the store that this instruction is + // dependent on. + unresolved_dependencies.storeDep = store_loc; + // If it's not already ready, then add it to the renamed // list and the dependencies. - renamedInsts.insert(inst_seq_num); + dep_it_t inst_loc = + (waitingInsts.insert(unresolved_dependencies)).first; - dependencies[producing_store].push_back(inst_seq_num); + // Add this instruction to the list of dependents. + (*store_loc).second.push_back(inst_loc); + + assert(!(*store_loc).second.empty()); + + if (inst->isLoad()) { + ++conflictingLoads; + } else { + ++conflictingStores; + } } if (inst->isStore()) { + DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", + inst->readPC()); + depPred.insertStore(inst->readPC(), inst_seq_num); // Make sure this store isn't already in this list. - assert(dependencies.find(inst_seq_num) == dependencies.end()); + assert(storeDependents.find(inst_seq_num) == storeDependents.end()); // Put a dependency entry in at the store's sequence number. // Uh, not sure how this works...I want to create an entry but // I don't have anything to put into the value yet. - dependencies[inst_seq_num]; - } else if (!inst->isLoad()) { + storeDependents[inst_seq_num]; + + assert(storeDependents.size() != 0); + + ++insertedStores; + + } else if (inst->isLoad()) { + ++insertedLoads; + } else { panic("MemDepUnit: Unknown type! (most likely a barrier)."); } + + memInsts[inst_seq_num] = inst; +} + +template +void +MemDepUnit::insertNonSpec(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency non_spec_inst(inst_seq_num); + + non_spec_inst.storeDep = storeDependents.end(); + + waitingInsts.insert(non_spec_inst); + + // Might want to turn this part into an inline function or something. + // It's shared between both insert functions. + if (inst->isStore()) { + DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", + inst->readPC()); + + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(storeDependents.find(inst_seq_num) == storeDependents.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + storeDependents[inst_seq_num]; + + assert(storeDependents.size() != 0); + + ++insertedStores; + + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } + + memInsts[inst_seq_num] = inst; +} + +template +typename Impl::DynInstPtr & +MemDepUnit::top() +{ + topInst = memInsts.find( (*readyInsts.begin()) ); + + DPRINTF(MemDepUnit, "MemDepUnit: Top instruction is PC %#x.\n", + (*topInst).second->readPC()); + + return (*topInst).second; +} + +template +void +MemDepUnit::pop() +{ + DPRINTF(MemDepUnit, "MemDepUnit: Removing instruction PC %#x.\n", + (*topInst).second->readPC()); + + wakeDependents((*topInst).second); + + issue((*topInst).second); + + memInsts.erase(topInst); + + topInst = memInsts.end(); +} + +template +void +MemDepUnit::regsReady(DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Marking registers as ready for " + "instruction PC %#x.\n", + inst->readPC()); + + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency inst_to_find(inst_seq_num); + + dep_it_t waiting_inst = waitingInsts.find(inst_to_find); + + assert(waiting_inst != waitingInsts.end()); + + if ((*waiting_inst).memDepReady) { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction has its memory " + "dependencies resolved, adding it to the ready list.\n"); + + moveToReady(waiting_inst); + } else { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction still waiting on " + "memory dependency.\n"); + + (*waiting_inst).regsReady = true; + } } template -bool -MemDepUnit::readyToIssue(DynInstPtr &inst) +void +MemDepUnit::nonSpecInstReady(DynInstPtr &inst) { + DPRINTF(MemDepUnit, "MemDepUnit: Marking non speculative " + "instruction PC %#x as ready.\n", + inst->readPC()); + InstSeqNum inst_seq_num = inst->seqNum; - if (readyInsts.find(inst_seq_num) == readyInsts.end()) { - return false; - } else { - return true; - } + Dependency inst_to_find(inst_seq_num); + + dep_it_t waiting_inst = waitingInsts.find(inst_to_find); + + assert(waiting_inst != waitingInsts.end()); + + moveToReady(waiting_inst); } template @@ -65,46 +241,63 @@ MemDepUnit::issue(DynInstPtr &inst) { assert(readyInsts.find(inst->seqNum) != readyInsts.end()); + DPRINTF(MemDepUnit, "MemDepUnit: Issuing instruction PC %#x.\n", + inst->readPC()); + // Remove the instruction from the ready list. readyInsts.erase(inst->seqNum); + + depPred.issued(inst->readPC(), inst->seqNum, inst->isStore()); } template void MemDepUnit::wakeDependents(DynInstPtr &inst) { - // Wake any dependencies. - dep_it_t dep_it = dependencies.find(inst); - - // If there's no entry, then return. Really there should only be - // no entry if the instruction is a load. - if (dep_it == dependencies.end()) { + // Only stores have dependents. + if (!inst->isStore()) { return; } - assert(inst->isStore()); + // Wake any dependencies. + sd_it_t sd_it = storeDependents.find(inst->seqNum); - for(int i = 0; i < (*dep_it).second.size(); ++i ) { - InstSeqNum woken_inst = (*dep_it).second[i]; + // If there's no entry, then return. Really there should only be + // no entry if the instruction is a load. + if (sd_it == storeDependents.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction PC %#x, sequence " + "number %i has no dependents.\n", + inst->readPC(), inst->seqNum); + return; + } + + for (int i = 0; i < (*sd_it).second.size(); ++i ) { + dep_it_t woken_inst = (*sd_it).second[i]; + + DPRINTF(MemDepUnit, "MemDepUnit: Waking up a dependent inst, " + "sequence number %i.\n", + (*woken_inst).seqNum); +#if 0 // Should we have reached instructions that are actually squashed, // there will be no more useful instructions in this dependency // list. Break out early. - if (renamedInsts.find(woken_inst) == renamedInsts.end()) { + if (waitingInsts.find(woken_inst) == waitingInsts.end()) { DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x " "are squashed, starting at SN %i. Breaking early.\n", inst->readPC(), woken_inst); break; } +#endif - // Remove it from the renamed instructions. - renamedInsts.erase(woken_inst); - - // Add it to the ready list. - readyInsts.insert(woken_inst); + if ((*woken_inst).regsReady) { + moveToReady(woken_inst); + } else { + (*woken_inst).memDepReady = true; + } } - dependencies.erase(dep_it); + storeDependents.erase(sd_it); } template @@ -112,17 +305,30 @@ void MemDepUnit::squash(const InstSeqNum &squashed_num) { - if (!renamedInsts.empty()) { - sn_it_t renamed_it = renamedInsts.end(); + if (!waitingInsts.empty()) { + dep_it_t waiting_it = waitingInsts.end(); - --renamed_it; + --waiting_it; // Remove entries from the renamed list as long as we haven't reached // the end and the entries continue to be younger than the squashed. - while (!renamedInsts.empty() && - (*renamed_it) > squashed_num) + while (!waitingInsts.empty() && + (*waiting_it).seqNum > squashed_num) { - renamedInsts.erase(renamed_it--); + if (!(*waiting_it).memDepReady && + (*waiting_it).storeDep != storeDependents.end()) { + sd_it_t sd_it = (*waiting_it).storeDep; + + // Make sure the iterator that the store has pointing + // back is actually to this instruction. + assert((*sd_it).second.back() == waiting_it); + + // Now remove this from the store's list of dependent + // instructions. + (*sd_it).second.pop_back(); + } + + waitingInsts.erase(waiting_it--); } } @@ -139,16 +345,19 @@ MemDepUnit::squash(const InstSeqNum &squashed_num) } } - if (!dependencies.empty()) { - dep_it_t dep_it = dependencies.end(); + if (!storeDependents.empty()) { + sd_it_t dep_it = storeDependents.end(); --dep_it; // Same for the dependencies list. - while (!dependencies.empty() && + while (!storeDependents.empty() && (*dep_it).first > squashed_num) { - dependencies.erase(dep_it--); + // This store's list of dependent instructions should be empty. + assert((*dep_it).second.empty()); + + storeDependents.erase(dep_it--); } } @@ -161,6 +370,23 @@ void MemDepUnit::violation(DynInstPtr &store_inst, DynInstPtr &violating_load) { + DPRINTF(MemDepUnit, "MemDepUnit: Passing violating PCs to store sets," + " load: %#x, store: %#x\n", violating_load->readPC(), + store_inst->readPC()); // Tell the memory dependence unit of the violation. depPred.violation(violating_load->readPC(), store_inst->readPC()); } + +template +inline void +MemDepUnit::moveToReady(dep_it_t &woken_inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Adding instruction sequence number %i " + "to the ready list.\n", (*woken_inst).seqNum); + + // Add it to the ready list. + readyInsts.insert((*woken_inst).seqNum); + + // Remove it from the waiting instructions. + waitingInsts.erase(woken_inst); +} diff --git a/cpu/beta_cpu/ras.cc b/cpu/beta_cpu/ras.cc new file mode 100644 index 000000000..ca05f5a0d --- /dev/null +++ b/cpu/beta_cpu/ras.cc @@ -0,0 +1,42 @@ +#include "cpu/beta_cpu/ras.hh" + +ReturnAddrStack::ReturnAddrStack(unsigned _numEntries) + : numEntries(_numEntries), usedEntries(0), + tos(0) +{ + addrStack = new Addr[numEntries](0); +} + +void +ReturnAddrStack::push(const Addr &return_addr) +{ + incrTos(); + + addrStack[tos] = return_addr; + + if (usedEntries != numEntries) { + ++usedEntries; + } +} + +void +ReturnAddrStack::pop() +{ + // Not sure it's possible to really track usedEntries properly. +// assert(usedEntries > 0); + + if (usedEntries > 0) { + --usedEntries; + } + + decrTos(); +} + +void +ReturnAddrStack::restore(unsigned top_entry_idx, + const Addr &restored_target) +{ + tos = top_entry_idx; + + addrStack[tos] = restored_target; +} diff --git a/cpu/beta_cpu/ras.hh b/cpu/beta_cpu/ras.hh new file mode 100644 index 000000000..7666f825f --- /dev/null +++ b/cpu/beta_cpu/ras.hh @@ -0,0 +1,40 @@ +#ifndef __RAS_HH__ +#define __RAS_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class ReturnAddrStack +{ + public: + ReturnAddrStack(unsigned numEntries); + + Addr top() + { return addrStack[tos]; } + + unsigned topIdx() + { return tos; } + + void push(const Addr &return_addr); + + void pop(); + + void restore(unsigned top_entry_idx, const Addr &restored_target); + + private: + inline void incrTos() + { tos = (tos + 1) % numEntries; } + + inline void decrTos() + { tos = (tos == 0 ? numEntries - 1 : tos - 1); } + + Addr *addrStack; + + unsigned numEntries; + + unsigned usedEntries; + + unsigned tos; +}; + +#endif // __RAS_HH__ diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh index aba897fdc..148d9408a 100644 --- a/cpu/beta_cpu/regfile.hh +++ b/cpu/beta_cpu/regfile.hh @@ -54,7 +54,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs); + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); DPRINTF(IEW, "RegFile: Access to float register %i as single, has " "data %8.8f\n", int(reg_idx), (float)floatRegFile[reg_idx].d); @@ -67,7 +67,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs); + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); DPRINTF(IEW, "RegFile: Access to float register %i as double, has " " data %8.8f\n", int(reg_idx), floatRegFile[reg_idx].d); @@ -80,7 +80,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs); + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); DPRINTF(IEW, "RegFile: Access to float register %i as int, has data " "%lli\n", int(reg_idx), floatRegFile[reg_idx].q); @@ -103,7 +103,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs); + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n", int(reg_idx), val); @@ -116,7 +116,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs); + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %8.8f\n", int(reg_idx), val); @@ -129,7 +129,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs); + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", int(reg_idx), val); diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh index 9f031012a..3e6b873ae 100644 --- a/cpu/beta_cpu/rename.hh +++ b/cpu/beta_cpu/rename.hh @@ -54,6 +54,8 @@ class SimpleRename public: SimpleRename(Params ¶ms); + void regStats(); + void setCPU(FullCPU *cpu_ptr); void setTimeBuffer(TimeBuffer *tb_ptr); @@ -182,6 +184,22 @@ class SimpleRename * group of instructions, it can restart at the proper instruction. */ unsigned numInst; + + Stats::Scalar<> renameSquashCycles; + Stats::Scalar<> renameIdleCycles; + Stats::Scalar<> renameBlockCycles; + Stats::Scalar<> renameUnblockCycles; + Stats::Scalar<> renameRenamedInsts; + Stats::Scalar<> renameSquashedInsts; + Stats::Scalar<> renameROBFullEvents; + Stats::Scalar<> renameIQFullEvents; + Stats::Scalar<> renameFullRegistersEvents; + Stats::Scalar<> renameRenamedOperands; + Stats::Scalar<> renameRenameLookups; + Stats::Scalar<> renameHBPlaceHolders; + Stats::Scalar<> renameCommittedMaps; + Stats::Scalar<> renameUndoneMaps; + Stats::Scalar<> renameValidUndoneMaps; }; #endif // __SIMPLE_RENAME_HH__ diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh index 47464d961..5a8e499e9 100644 --- a/cpu/beta_cpu/rename_impl.hh +++ b/cpu/beta_cpu/rename_impl.hh @@ -14,6 +14,72 @@ SimpleRename::SimpleRename(Params ¶ms) _status = Idle; } +template +void +SimpleRename::regStats() +{ + renameSquashCycles + .name(name() + ".renameSquashCycles") + .desc("Number of cycles rename is squashing") + .prereq(renameSquashCycles); + renameIdleCycles + .name(name() + ".renameIdleCycles") + .desc("Number of cycles rename is idle") + .prereq(renameIdleCycles); + renameBlockCycles + .name(name() + ".renameBlockCycles") + .desc("Number of cycles rename is blocking") + .prereq(renameBlockCycles); + renameUnblockCycles + .name(name() + ".renameUnblockCycles") + .desc("Number of cycles rename is unblocking") + .prereq(renameUnblockCycles); + renameRenamedInsts + .name(name() + ".renameRenamedInsts") + .desc("Number of instructions processed by rename") + .prereq(renameRenamedInsts); + renameSquashedInsts + .name(name() + ".renameSquashedInsts") + .desc("Number of squashed instructions processed by rename") + .prereq(renameSquashedInsts); + renameROBFullEvents + .name(name() + ".renameROBFullEvents") + .desc("Number of times rename has considered the ROB 'full'") + .prereq(renameROBFullEvents); + renameIQFullEvents + .name(name() + ".renameIQFullEvents") + .desc("Number of times rename has considered the IQ 'full'") + .prereq(renameIQFullEvents); + renameFullRegistersEvents + .name(name() + ".renameFullRegisterEvents") + .desc("Number of times there has been no free registers") + .prereq(renameFullRegistersEvents); + renameRenamedOperands + .name(name() + ".renameRenamedOperands") + .desc("Number of destination operands rename has renamed") + .prereq(renameRenamedOperands); + renameRenameLookups + .name(name() + ".renameRenameLookups") + .desc("Number of register rename lookups that rename has made") + .prereq(renameRenameLookups); + renameHBPlaceHolders + .name(name() + ".renameHBPlaceHolders") + .desc("Number of place holders added to the history buffer") + .prereq(renameHBPlaceHolders); + renameCommittedMaps + .name(name() + ".renameCommittedMaps") + .desc("Number of HB maps that are committed") + .prereq(renameCommittedMaps); + renameUndoneMaps + .name(name() + ".renameUndoneMaps") + .desc("Number of HB maps that are undone due to squashing") + .prereq(renameUndoneMaps); + renameValidUndoneMaps + .name(name() + ".renameValidUndoneMaps") + .desc("Number of HB maps that are undone, and are not place holders") + .prereq(renameValidUndoneMaps); +} + template void SimpleRename::setCPU(FullCPU *cpu_ptr) @@ -59,7 +125,6 @@ SimpleRename::setDecodeQueue(TimeBuffer *dq_ptr) // Setup wire to get information from decode. fromDecode = decodeQueue->getWire(-decodeToRenameDelay); - } template @@ -124,7 +189,7 @@ SimpleRename::unblock() // continue to tell previous stages to stall. They will be // able to restart once the skid buffer is empty. if (!skidBuffer.empty()) { - toDecode->renameInfo.stall = true; + toDecode->renameInfo.stall = true; } else { DPRINTF(Rename, "Rename: Done unblocking.\n"); _status = Running; @@ -136,7 +201,6 @@ void SimpleRename::doSquash() { typename list::iterator hb_it = historyBuffer.begin(); -// typename list::iterator delete_it; InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; @@ -154,6 +218,8 @@ SimpleRename::doSquash() // they did and freeing up the registers. while ((*hb_it).instSeqNum > squashed_seq_num) { + assert(hb_it != historyBuffer.end()); + DPRINTF(Rename, "Rename: Removing history entry with sequence " "number %i.\n", (*hb_it).instSeqNum); @@ -165,15 +231,13 @@ SimpleRename::doSquash() // Put the renamed physical register back on the free list. freeList->addReg(hb_it->newPhysReg); + + ++renameValidUndoneMaps; } -// delete_it = hb_it; - -// hb_it++; - historyBuffer.erase(hb_it++); - assert(hb_it != historyBuffer.end()); + ++renameUndoneMaps; } } @@ -196,9 +260,6 @@ SimpleRename::squash() doSquash(); } -// In the future, when a SmartPtr is used for DynInst, then this function -// itself can handle returning the instruction's physical registers to -// the free list. template void SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) @@ -233,19 +294,20 @@ SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) if (!(*hb_it).placeHolder) { freeList->addReg((*hb_it).prevPhysReg); + ++renameCommittedMaps; } historyBuffer.erase(hb_it--); } - // Finally free up the previous register of the squashed instruction + // Finally free up the previous register of the finished instruction // itself. if (!(*hb_it).placeHolder) { freeList->addReg(hb_it->prevPhysReg); + ++renameCommittedMaps; } historyBuffer.erase(hb_it); - } template @@ -263,7 +325,7 @@ SimpleRename::renameSrcRegs(DynInstPtr &inst) // Look up the source registers to get the phys. register they've // been renamed to, and set the sources to those registers. - RegIndex renamed_reg = renameMap->lookup(src_reg); + PhysRegIndex renamed_reg = renameMap->lookup(src_reg); DPRINTF(Rename, "Rename: Looking up arch reg %i, got " "physical reg %i.\n", (int)src_reg, (int)renamed_reg); @@ -278,6 +340,8 @@ SimpleRename::renameSrcRegs(DynInstPtr &inst) inst->markSrcRegReady(src_idx); } + + ++renameRenameLookups; } } @@ -289,40 +353,6 @@ SimpleRename::renameDestRegs(DynInstPtr &inst) unsigned num_dest_regs = inst->numDestRegs(); - // Rename the destination registers. - for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) - { - RegIndex dest_reg = inst->destRegIdx(dest_idx); - - // Get the physical register that the destination will be - // renamed to. - rename_result = renameMap->rename(dest_reg); - - DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " - "reg %i.\n", (int)dest_reg, - (int)rename_result.first); - - // Record the rename information so that a history can be kept. - RenameHistory hb_entry(inst->seqNum, dest_reg, - rename_result.first, - rename_result.second); - - historyBuffer.push_front(hb_entry); - - DPRINTF(Rename, "Rename: Adding instruction to history buffer, " - "sequence number %lli.\n", - (*historyBuffer.begin()).instSeqNum); - - // Tell the instruction to rename the appropriate destination - // register (dest_idx) to the new physical register - // (rename_result.first), and record the previous physical - // register that the same logical register was renamed to - // (rename_result.second). - inst->renameDestReg(dest_idx, - rename_result.first, - rename_result.second); - } - // If it's an instruction with no destination registers, then put // a placeholder within the history buffer. It might be better // to not put it in the history buffer at all (other than branches, @@ -337,6 +367,45 @@ SimpleRename::renameDestRegs(DynInstPtr &inst) DPRINTF(Rename, "Rename: Adding placeholder instruction to " "history buffer, sequence number %lli.\n", inst->seqNum); + + ++renameHBPlaceHolders; + } else { + + // Rename the destination registers. + for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) + { + RegIndex dest_reg = inst->destRegIdx(dest_idx); + + // Get the physical register that the destination will be + // renamed to. + rename_result = renameMap->rename(dest_reg); + + DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " + "reg %i.\n", (int)dest_reg, + (int)rename_result.first); + + // Record the rename information so that a history can be kept. + RenameHistory hb_entry(inst->seqNum, dest_reg, + rename_result.first, + rename_result.second); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding instruction to history buffer, " + "sequence number %lli.\n", + (*historyBuffer.begin()).instSeqNum); + + // Tell the instruction to rename the appropriate destination + // register (dest_idx) to the new physical register + // (rename_result.first), and record the previous physical + // register that the same logical register was renamed to + // (rename_result.second). + inst->renameDestReg(dest_idx, + rename_result.first, + rename_result.second); + + ++renameRenamedOperands; + } } } @@ -379,6 +448,8 @@ SimpleRename::tick() // buffer were used. Remove those instructions and handle // the rest of unblocking. if (_status == Unblocking) { + ++renameUnblockCycles; + if (fromDecode->size > 0) { // Add the current inputs onto the skid buffer, so they can be // reprocessed when this stage unblocks. @@ -388,6 +459,8 @@ SimpleRename::tick() unblock(); } } else if (_status == Blocked) { + ++renameBlockCycles; + // If stage is blocked and still receiving valid instructions, // make sure to store them in the skid buffer. if (fromDecode->size > 0) { @@ -425,6 +498,8 @@ SimpleRename::tick() return; } } else if (_status == Squashing) { + ++renameSquashCycles; + if (fromCommit->commitInfo.squash) { squash(); } else if (!fromCommit->commitInfo.squash && @@ -439,7 +514,13 @@ SimpleRename::tick() // Ugly code, revamp all of the tick() functions eventually. if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) { +#ifndef FULL_SYSTEM + if (!fromCommit->commitInfo.squash) { + removeFromHistory(fromCommit->commitInfo.doneSeqNum); + } +#else removeFromHistory(fromCommit->commitInfo.doneSeqNum); +#endif } // Perhaps put this outside of this function, since this will @@ -539,6 +620,12 @@ SimpleRename::rename() // Tell previous stage to stall. toDecode->renameInfo.stall = true; + if (free_rob_entries <= 0) { + ++renameROBFullEvents; + } else { + ++renameIQFullEvents; + } + return; } else if (min_iq_rob < insts_available) { DPRINTF(Rename, "Rename: Will have to block this cycle. Only " @@ -548,6 +635,12 @@ SimpleRename::rename() insts_available = min_iq_rob; block_this_cycle = true; + + if (free_rob_entries < free_iq_entries) { + ++renameROBFullEvents; + } else { + ++renameIQFullEvents; + } } while (insts_available > 0) { @@ -566,6 +659,8 @@ SimpleRename::rename() // Go to the next instruction. ++numInst; + ++renameSquashedInsts; + // Decrement how many instructions are available. --insts_available; @@ -606,6 +701,8 @@ SimpleRename::rename() block_this_cycle = true; + ++renameFullRegistersEvents; + break; } @@ -625,6 +722,8 @@ SimpleRename::rename() ++to_iew_index; ++numInst; + ++renameRenamedInsts; + // Decrement how many instructions are available. --insts_available; } diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc index cb9720d28..1301202f2 100644 --- a/cpu/beta_cpu/rename_map.cc +++ b/cpu/beta_cpu/rename_map.cc @@ -72,7 +72,7 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, floatRenameMap[index].physical_reg = float_reg_idx++; } - for (RegIndex index = numPhysicalIntRegs; + for (PhysRegIndex index = numPhysicalIntRegs; index < numPhysicalIntRegs + numLogicalFloatRegs; ++index) { floatScoreboard[index] = 1; @@ -88,7 +88,7 @@ SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, } // Initialize the entries in the misc register scoreboard to be ready. - for (RegIndex index = numPhysicalRegs; + for (PhysRegIndex index = numPhysicalRegs; index < numPhysicalRegs + numMiscRegs; ++index) { miscScoreboard[index] = 1; diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh index 862008429..86c4e2db1 100644 --- a/cpu/beta_cpu/rob_impl.hh +++ b/cpu/beta_cpu/rob_impl.hh @@ -139,9 +139,7 @@ bool ROB::isHeadReady() { if (numInstsInROB != 0) { - DynInstPtr head_inst = cpu->instList.front(); - - return head_inst->readyToCommit(); + return cpu->instList.front()->readyToCommit(); } return false; diff --git a/cpu/beta_cpu/store_set.cc b/cpu/beta_cpu/store_set.cc index 46d763d37..a5458685d 100644 --- a/cpu/beta_cpu/store_set.cc +++ b/cpu/beta_cpu/store_set.cc @@ -5,6 +5,8 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size) : SSIT_size(_SSIT_size), LFST_size(_LFST_size) { DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", + SSIT_size, LFST_size); SSIT = new SSID[SSIT_size]; @@ -31,11 +33,13 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size) } void -StoreSet::violation(Addr load_PC, Addr store_PC) +StoreSet::violation(Addr store_PC, Addr load_PC) { int load_index = calcIndex(load_PC); int store_index = calcIndex(store_PC); + assert(load_index < SSIT_size && store_index < SSIT_size); + bool valid_load_SSID = validSSIT[load_index]; bool valid_store_SSID = validSSIT[store_index]; @@ -51,7 +55,14 @@ StoreSet::violation(Addr load_PC, Addr store_PC) SSIT[store_index] = new_set; + assert(new_set < LFST_size); + SSCounters[new_set]++; + + + DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid " + "storeset, creating a new one: %i for load %#x, store %#x\n", + new_set, load_PC, store_PC); } else if (valid_load_SSID && !valid_store_SSID) { SSID load_SSID = SSIT[load_index]; @@ -59,7 +70,13 @@ StoreSet::violation(Addr load_PC, Addr store_PC) SSIT[store_index] = load_SSID; + assert(load_SSID < LFST_size); + SSCounters[load_SSID]++; + + DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding " + "store to that set: %i for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); } else if (!valid_load_SSID && valid_store_SSID) { SSID store_SSID = SSIT[store_index]; @@ -69,10 +86,16 @@ StoreSet::violation(Addr load_PC, Addr store_PC) // Because we are having a load point to an already existing set, // the size of the store set is not incremented. + + DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for " + "load %#x, store %#x\n", + store_SSID, load_PC, store_PC); } else { SSID load_SSID = SSIT[load_index]; SSID store_SSID = SSIT[store_index]; + assert(load_SSID < LFST_size && store_SSID < LFST_size); + int load_SS_size = SSCounters[load_SSID]; int store_SS_size = SSCounters[store_SSID]; @@ -83,11 +106,19 @@ StoreSet::violation(Addr load_PC, Addr store_PC) SSCounters[load_SSID]++; SSCounters[store_SSID]--; + + DPRINTF(StoreSet, "StoreSet: Load had bigger store set: %i; " + "for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); } else { SSIT[load_index] = store_SSID; SSCounters[store_SSID]++; SSCounters[load_SSID]--; + + DPRINTF(StoreSet, "StoreSet: Store had bigger store set: %i; " + "for load %#x, store %#x\n", + store_SSID, load_PC, store_PC); } } } @@ -106,6 +137,8 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num) int store_SSID; + assert(index < SSIT_size); + if (!validSSIT[index]) { // Do nothing if there's no valid entry. return; @@ -116,6 +149,11 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num) // Update the last store that was fetched with the current one. LFST[store_SSID] = store_seq_num; + + validLFST[store_SSID] = 1; + + DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n", + store_PC, store_SSID); } } @@ -126,7 +164,12 @@ StoreSet::checkInst(Addr PC) int inst_SSID; + assert(index < SSIT_size); + if (!validSSIT[index]) { + DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n", + PC, index); + // Return 0 if there's no valid entry. return 0; } else { @@ -135,8 +178,15 @@ StoreSet::checkInst(Addr PC) assert(inst_SSID < LFST_size); if (!validLFST[inst_SSID]) { + + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no " + "dependency\n", PC, index, inst_SSID); + return 0; } else { + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST " + "inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]); + return LFST[inst_SSID]; } } @@ -154,14 +204,21 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) int store_SSID; + assert(index < SSIT_size); + // Make sure the SSIT still has a valid entry for the issued store. - assert(validSSIT[index]); + if (!validSSIT[index]) { + return; + } store_SSID = SSIT[index]; + assert(store_SSID < LFST_size); + // If the last fetched store in the store set refers to the store that // was just issued, then invalidate the entry. if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) { + DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n"); validLFST[store_SSID] = false; } } @@ -170,9 +227,14 @@ void StoreSet::squash(InstSeqNum squashed_num) { // Not really sure how to do this well. + // Generally this is small enough that it should be okay; short circuit + // evaluation should take care of invalid entries. + + DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n", + squashed_num); for (int i = 0; i < LFST_size; ++i) { - if (LFST[i] < squashed_num) { + if (validLFST[i] && LFST[i] < squashed_num) { validLFST[i] = false; } } diff --git a/cpu/beta_cpu/store_set.hh b/cpu/beta_cpu/store_set.hh index 701c60a2d..b634a180d 100644 --- a/cpu/beta_cpu/store_set.hh +++ b/cpu/beta_cpu/store_set.hh @@ -14,7 +14,7 @@ class StoreSet public: StoreSet(int SSIT_size, int LFST_size); - void violation(Addr load_PC, Addr store_PC); + void violation(Addr store_PC, Addr load_PC); void insertLoad(Addr load_PC, InstSeqNum load_seq_num); diff --git a/cpu/beta_cpu/tournament_pred.cc b/cpu/beta_cpu/tournament_pred.cc new file mode 100644 index 000000000..53a11326a --- /dev/null +++ b/cpu/beta_cpu/tournament_pred.cc @@ -0,0 +1,243 @@ +#include "cpu/beta_cpu/tournament_pred.hh" + +TournamentBP::SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +TournamentBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +TournamentBP::SatCounter::increment() +{ + if (counter < maxVal) { + ++counter; + } +} + +void +TournamentBP::SatCounter::decrement() +{ + if (counter > 0) { + --counter; + } +} + +TournamentBP::TournamentBP(unsigned _local_predictor_size, + unsigned _local_ctr_bits, + unsigned _local_history_table_size, + unsigned _local_history_bits, + unsigned _global_predictor_size, + unsigned _global_ctr_bits, + unsigned _global_history_bits, + unsigned _choice_predictor_size, + unsigned _choice_ctr_bits, + unsigned _instShiftAmt) + : local_predictor_size(_local_predictor_size), + local_ctr_bits(_local_ctr_bits), + local_history_table_size(_local_history_table_size), + local_history_bits(_local_history_bits), + global_predictor_size(_global_predictor_size), + global_ctr_bits(_global_ctr_bits), + global_history_bits(_global_history_bits), + choice_predictor_size(_global_predictor_size), + choice_ctr_bits(_choice_ctr_bits), + instShiftAmt(_instShiftAmt) +{ + //Should do checks here to make sure sizes are correct (powers of 2) + + //Setup the array of counters for the local predictor + local_ctrs = new SatCounter[local_predictor_size](local_ctr_bits); + //Setup the history table for the local table + local_history_table = new unsigned[local_history_table_size](0); + // Setup the local history mask + localHistoryMask = (1 << local_history_bits) - 1; + + //Setup the array of counters for the global predictor + global_ctrs = new SatCounter[global_predictor_size](global_ctr_bits); + //Clear the global history + global_history = 0; + // Setup the global history mask + globalHistoryMask = (1 << global_history_bits) - 1; + + //Setup the array of counters for the choice predictor + choice_ctrs = new SatCounter[choice_predictor_size](choice_ctr_bits); + + threshold = (1 << (local_ctr_bits - 1)) - 1; + threshold = threshold / 2; +} + +inline +unsigned +TournamentBP::calcLocHistIdx(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & (local_history_table_size - 1); +} + +inline +void +TournamentBP::updateHistoriesTaken(unsigned local_history_idx) +{ + global_history = (global_history << 1) | 1; + global_history = global_history & globalHistoryMask; + + local_history_table[local_history_idx] = + (local_history_table[local_history_idx] << 1) | 1; +} + +inline +void +TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx) +{ + global_history = (global_history << 1); + global_history = global_history & globalHistoryMask; + + local_history_table[local_history_idx] = + (local_history_table[local_history_idx] << 1); +} + +bool +TournamentBP::lookup(Addr &branch_addr) +{ + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + + uint8_t global_prediction; + uint8_t choice_prediction; + + //Lookup in the local predictor to get its branch prediction + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = local_history_table[local_history_idx] + & localHistoryMask; + local_prediction = local_ctrs[local_predictor_idx].read(); + + //Lookup in the global predictor to get its branch prediction + global_prediction = global_ctrs[global_history].read(); + + //Lookup in the choice predictor to see which one to use + choice_prediction = choice_ctrs[global_history].read(); + + //@todo Put a threshold value in for the three predictors that can + // be set through the constructor (so this isn't hard coded). + //Also should put some of this code into functions. + if (choice_prediction > threshold) { + if (global_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(global_history < global_predictor_size && + local_history_idx < local_predictor_size); + + global_ctrs[global_history].increment(); + local_ctrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(global_history < global_predictor_size && + local_history_idx < local_predictor_size); + + global_ctrs[global_history].decrement(); + local_ctrs[local_history_idx].decrement(); + + return false; + } + } else { + if (local_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(global_history < global_predictor_size && + local_history_idx < local_predictor_size); + + global_ctrs[global_history].increment(); + local_ctrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(global_history < global_predictor_size && + local_history_idx < local_predictor_size); + + global_ctrs[global_history].decrement(); + local_ctrs[local_history_idx].decrement(); + + return false; + } + } +} + +// Update the branch predictor if it predicted a branch wrong. +void +TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) +{ + + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + bool local_pred_taken; + + uint8_t global_prediction; + bool global_pred_taken; + + // Load the correct global history into the register. + global_history = correct_gh; + + // Get the local predictor's current prediction, remove the incorrect + // update, and update the local predictor + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = local_history_table[local_history_idx]; + local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask; + + local_prediction = local_ctrs[local_predictor_idx].read(); + local_pred_taken = local_prediction > threshold; + + //Get the global predictor's current prediction, and update the + //global predictor + global_prediction = global_ctrs[global_history].read(); + global_pred_taken = global_prediction > threshold; + + //Update the choice predictor to tell it which one was correct + if (local_pred_taken != global_pred_taken) { + //If the local prediction matches the actual outcome, decerement + //the counter. Otherwise increment the counter. + if (local_pred_taken == taken) { + choice_ctrs[global_history].decrement(); + } else { + choice_ctrs[global_history].increment(); + } + } + + if (taken) { + assert(global_history < global_predictor_size && + local_predictor_idx < local_predictor_size); + + local_ctrs[local_predictor_idx].increment(); + global_ctrs[global_history].increment(); + + global_history = (global_history << 1) | 1; + global_history = global_history & globalHistoryMask; + + local_history_table[local_history_idx] |= 1; + } + else { + assert(global_history < global_predictor_size && + local_predictor_idx < local_predictor_size); + + local_ctrs[local_predictor_idx].decrement(); + global_ctrs[global_history].decrement(); + + global_history = (global_history << 1); + global_history = global_history & globalHistoryMask; + + local_history_table[local_history_idx] &= ~1; + } +} diff --git a/cpu/beta_cpu/tournament_pred.hh b/cpu/beta_cpu/tournament_pred.hh new file mode 100644 index 000000000..bf87d753b --- /dev/null +++ b/cpu/beta_cpu/tournament_pred.hh @@ -0,0 +1,160 @@ +#ifndef __TOURNAMENT_PRED_HH__ +#define __TOURNAMENT_PRED_HH__ + +// For Addr type. +#include "arch/alpha/isa_traits.hh" + +class TournamentBP +{ + public: + /** + * Default branch predictor constructor. + */ + TournamentBP(unsigned local_predictor_size, + unsigned local_ctr_bits, + unsigned local_history_table_size, + unsigned local_history_bits, + unsigned global_predictor_size, + unsigned global_history_bits, + unsigned global_ctr_bits, + unsigned choice_predictor_size, + unsigned choice_ctr_bits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, unsigned global_history, bool taken); + + inline unsigned readGlobalHist() { return global_history; } + + private: + + inline bool getPrediction(uint8_t &count); + + inline unsigned calcLocHistIdx(Addr &branch_addr); + + inline void updateHistoriesTaken(unsigned local_history_idx); + + inline void updateHistoriesNotTaken(unsigned local_history_idx); + + /** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ + class SatCounter + { + public: + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + uint8_t read() + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; + }; + + /** Local counters. */ + SatCounter *local_ctrs; + + /** Size of the local predictor. */ + unsigned local_predictor_size; + + /** Number of bits of the local predictor's counters. */ + unsigned local_ctr_bits; + + /** Array of local history table entries. */ + unsigned *local_history_table; + + /** Size of the local history table. */ + unsigned local_history_table_size; + + /** Number of bits for each entry of the local history table. + * @todo Doesn't this come from the size of the local predictor? + */ + unsigned local_history_bits; + + /** Mask to get the proper local history. */ + unsigned localHistoryMask; + + + /** Array of counters that make up the global predictor. */ + SatCounter *global_ctrs; + + /** Size of the global predictor. */ + unsigned global_predictor_size; + + /** Number of bits of the global predictor's counters. */ + unsigned global_ctr_bits; + + /** Global history register. */ + unsigned global_history; + + /** Number of bits for the global history. */ + unsigned global_history_bits; + + /** Mask to get the proper global history. */ + unsigned globalHistoryMask; + + + /** Array of counters that make up the choice predictor. */ + SatCounter *choice_ctrs; + + /** Size of the choice predictor (identical to the global predictor). */ + unsigned choice_predictor_size; + + /** Number of bits of the choice predictor's counters. */ + unsigned choice_ctr_bits; + + /** Number of bits to shift the instruction over to get rid of the word + * offset. + */ + unsigned instShiftAmt; + + /** Threshold for the counter value; above the threshold is taken, + * equal to or below the threshold is not taken. + */ + unsigned threshold; +}; + +#endif // __TOURNAMENT_PRED_HH__ From 90d4436351620bd3861013333aabd152d5492df7 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 11 Jan 2005 18:52:29 -0500 Subject: [PATCH 4/6] Slight fixes, add in commit trace flag. base/traceflags.py: Add new commit rate trace flag. build/SConstruct: Add extra option for efence. cpu/beta_cpu/alpha_full_cpu_impl.hh: Use function calls instead of direct indexing (avoids confusion). cpu/beta_cpu/commit_impl.hh: Add commit rate trace output (might not be worthwhile in the future). cpu/beta_cpu/decode_impl.hh: Remove some older hacks. Fix it so that the isntruction properly sets its next PC to the one calculated by the branch. cpu/beta_cpu/fetch_impl.hh: Remove old commented code. cpu/beta_cpu/iew_impl.hh: Add extra check to ensure that the instruction is valid. cpu/beta_cpu/regfile.hh: Include trace file. --HG-- extra : convert_revision : 4ee1dc88f8a5ed9b65486c6c111a3718a8040e42 --- base/traceflags.py | 3 ++- build/SConstruct | 7 ++++++- cpu/beta_cpu/alpha_full_cpu_impl.hh | 12 ++++++------ cpu/beta_cpu/commit_impl.hh | 1 + cpu/beta_cpu/decode_impl.hh | 9 ++++----- cpu/beta_cpu/fetch_impl.hh | 16 ---------------- cpu/beta_cpu/iew_impl.hh | 4 ++-- cpu/beta_cpu/regfile.hh | 2 ++ 8 files changed, 23 insertions(+), 31 deletions(-) diff --git a/base/traceflags.py b/base/traceflags.py index a1fb45177..eb404fa54 100644 --- a/base/traceflags.py +++ b/base/traceflags.py @@ -136,7 +136,8 @@ baseFlags = [ 'StoreSet', 'MemDepUnit', 'DynInst', - 'FullCPU' + 'FullCPU', + 'CommitRate' ] # diff --git a/build/SConstruct b/build/SConstruct index 381b6ecda..22f39b72c 100644 --- a/build/SConstruct +++ b/build/SConstruct @@ -114,11 +114,16 @@ def MySqlOpt(env): def NoFastAllocOpt(env): env.Append(CPPDEFINES = 'NO_FAST_ALLOC') +# Enable efence +def EfenceOpt(env): + env.Append(LIBS=['efence']) + # Configuration options map. options_map = { 'MEASURE' : MeasureOpt, 'MYSQL' : MySqlOpt, - 'NO_FAST_ALLOC' : NoFastAllocOpt + 'NO_FAST_ALLOC' : NoFastAllocOpt, + 'EFENCE' : EfenceOpt } # The 'local_configs' file can be used to define additional base diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh index ee8f9f33b..611a0d80d 100644 --- a/cpu/beta_cpu/alpha_full_cpu_impl.hh +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -127,7 +127,7 @@ AlphaFullCPU::copyToXC() for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { renamed_reg = renameMap.lookup(i); - xc->regs.intRegFile[i] = regFile.intRegFile[renamed_reg]; + xc->regs.intRegFile[i] = regFile.readIntReg(renamed_reg); DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", renamed_reg, regFile.intRegFile[renamed_reg]); } @@ -136,8 +136,8 @@ AlphaFullCPU::copyToXC() for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - xc->regs.floatRegFile.d[i] = regFile.floatRegFile[renamed_reg].d; - xc->regs.floatRegFile.q[i] = regFile.floatRegFile[renamed_reg].q; + xc->regs.floatRegFile.d[i] = regFile.readFloatRegDouble(renamed_reg); + xc->regs.floatRegFile.q[i] = regFile.readFloatRegInt(renamed_reg); } xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; @@ -169,15 +169,15 @@ AlphaFullCPU::copyFromXC() renamed_reg, regFile.intRegFile[renamed_reg], xc->regs.intRegFile[i]); - regFile.intRegFile[renamed_reg] = xc->regs.intRegFile[i]; + regFile.setIntReg(renamed_reg, xc->regs.intRegFile[i]); } // Then loop through the floating point registers. for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - regFile.floatRegFile[renamed_reg].d = xc->regs.floatRegFile.d[i]; - regFile.floatRegFile[renamed_reg].q = xc->regs.floatRegFile.q[i] ; + regFile.setFloatRegDouble(renamed_reg, xc->regs.floatRegFile.d[i]); + regFile.setFloatRegInt(renamed_reg, xc->regs.floatRegFile.q[i]); } // Then loop through the misc registers. diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh index 9a69c9259..3e97b980c 100644 --- a/cpu/beta_cpu/commit_impl.hh +++ b/cpu/beta_cpu/commit_impl.hh @@ -323,6 +323,7 @@ SimpleCommit::commitInsts() head_inst = rob->readHeadInst(); } + DPRINTF(CommitRate, "%i\n", num_committed); n_committed_dist.sample(num_committed); } diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh index 8b20bf8bc..dd51f564d 100644 --- a/cpu/beta_cpu/decode_impl.hh +++ b/cpu/beta_cpu/decode_impl.hh @@ -147,7 +147,7 @@ SimpleDecode::squash(DynInstPtr &inst) { DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " "detected at decode.\n"); - Addr new_PC = inst->nextPC; + Addr new_PC = inst->readNextPC(); toFetch->decodeInfo.branchMispredict = true; toFetch->decodeInfo.doneSeqNum = inst->seqNum; @@ -355,10 +355,9 @@ SimpleDecode::decode() // Go ahead and compute any PC-relative branches. - if (inst->isDirectCtrl() && inst->isUncondCtrl() && - inst->numDestRegs() == 0 && inst->numSrcRegs() == 0) { - inst->execute(); - inst->setExecuted(); + if (inst->isDirectCtrl() && inst->isUncondCtrl()) { + + inst->setNextPC(inst->branchTarget()); if (inst->mispredicted()) { ++decodeBranchMispred; diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh index 8c9cf9f41..90caf9ffe 100644 --- a/cpu/beta_cpu/fetch_impl.hh +++ b/cpu/beta_cpu/fetch_impl.hh @@ -195,22 +195,6 @@ SimpleFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) predict_taken = branchPred.predict(inst, next_PC); -#if 0 - predict_taken = branchPred.BPLookup(next_PC) - - DPRINTF(Fetch, "Fetch: Branch predictor predicts taken? %i\n", - predict_taken); - - // Only check the BTB if the BP has predicted taken. - if (predict_taken && branchPred.BTBValid(next_PC)) { - predict_target = branchPred.BTBLookup(next_PC); - DPRINTF(Fetch, "Fetch: BTB target is %#x.\n", predict_target); - } else { - predict_taken = false; - DPRINTF(Fetch, "Fetch: BTB does not have a valid entry.\n"); - } - -#endif if (predict_taken) { ++predictedBranches; } diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh index 2bfd6bae9..b718e6aa0 100644 --- a/cpu/beta_cpu/iew_impl.hh +++ b/cpu/beta_cpu/iew_impl.hh @@ -249,7 +249,6 @@ SimpleIEW::squashDueToBranch(DynInstPtr &inst) // Prediction was incorrect, so send back inverse. toCommit->branchTaken = inst->readCalcTarg() != (inst->readPC() + sizeof(MachInst)); -// toCommit->globalHist = inst->readGlobalHist(); } template @@ -363,10 +362,11 @@ SimpleIEW::dispatchInsts() continue; } else if (inst->isExecuted()) { + assert(0 && "Instruction shouldn't be executed.\n"); DPRINTF(IEW, "IEW: Issue: Executed branch encountered, " "skipping.\n"); - assert(inst->isDirectCtrl()); +// assert(inst->isDirectCtrl()); inst->setIssued(); inst->setCanCommit(); diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh index 148d9408a..f6fb917ba 100644 --- a/cpu/beta_cpu/regfile.hh +++ b/cpu/beta_cpu/regfile.hh @@ -8,6 +8,8 @@ using namespace std; #include "arch/alpha/isa_traits.hh" #include "cpu/beta_cpu/comm.hh" +#include "base/trace.hh" + // This really only depends on the ISA, and not the Impl. It might be nicer // to see if I can make it depend on nothing... // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, From c4d0ebd25cc5f0657b99543ff2df30d1a86f3ad5 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 11 Feb 2005 17:54:33 -0500 Subject: [PATCH 5/6] Fix up #defines to use full path; fix up code for g++ 3.4 SConscript: Remove efence option from automatically being used. --HG-- extra : convert_revision : 466bb8077aa341db0b409720e2a73535b1fa6b69 --- SConscript | 2 +- cpu/beta_cpu/alpha_dyn_inst.hh | 6 +++--- cpu/beta_cpu/alpha_full_cpu.hh | 10 +++------- cpu/beta_cpu/alpha_impl.hh | 6 +++--- cpu/beta_cpu/comm.hh | 18 ++++++++---------- cpu/beta_cpu/commit.cc | 4 ++-- cpu/beta_cpu/commit.hh | 11 ++++------- cpu/beta_cpu/cpu_policy.hh | 6 +++--- cpu/beta_cpu/decode.cc | 4 ++-- cpu/beta_cpu/decode.hh | 7 ++++--- cpu/beta_cpu/decode_impl.hh | 5 ----- cpu/beta_cpu/fetch.cc | 5 ++--- cpu/beta_cpu/fetch.hh | 10 +++++----- cpu/beta_cpu/full_cpu.hh | 2 -- cpu/beta_cpu/iew.cc | 7 +++---- cpu/beta_cpu/iew.hh | 8 ++++---- cpu/beta_cpu/regfile.hh | 2 -- cpu/beta_cpu/rename.cc | 4 ++-- cpu/beta_cpu/rename.hh | 7 ++++--- cpu/beta_cpu/rename_map.hh | 20 +++++++++----------- cpu/beta_cpu/rob.hh | 16 +++++++--------- 21 files changed, 69 insertions(+), 91 deletions(-) diff --git a/SConscript b/SConscript index 6d7ed172f..54f16fce6 100644 --- a/SConscript +++ b/SConscript @@ -432,7 +432,7 @@ env.Append(CPPPATH='.') # Debug binary debug = env.Copy(OBJSUFFIX='.do') -debug.Append(CCFLAGS=Split('-g -gstabs+ -O0 -lefence')) +debug.Append(CCFLAGS=Split('-g -gstabs+ -O0')) debug.Append(CPPDEFINES='DEBUG') debug.Program(target = 'm5.debug', source = make_objs(sources, debug)) diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh index c964762db..584e027d7 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.hh +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -1,7 +1,7 @@ //Todo: -#ifndef __ALPHA_DYN_INST_HH__ -#define __ALPHA_DYN_INST_HH__ +#ifndef __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ +#define __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ #include "cpu/base_dyn_inst.hh" #include "cpu/beta_cpu/alpha_full_cpu.hh" @@ -76,5 +76,5 @@ class AlphaDynInst : public BaseDynInst }; -#endif // __ALPHA_DYN_INST_HH__ +#endif // __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh index e01eba3bf..92eebc82a 100644 --- a/cpu/beta_cpu/alpha_full_cpu.hh +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -3,16 +3,12 @@ // Read and write are horribly hacked up between not being sure where to // copy their code from, and Ron's memory changes. -#ifndef __ALPHA_FULL_CPU_HH__ -#define __ALPHA_FULL_CPU_HH__ +#ifndef __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__ +#define __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__ // To include: comm, full cpu, ITB/DTB if full sys, -//#include "cpu/beta_cpu/comm.hh" -//#include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/beta_cpu/full_cpu.hh" -using namespace std; - template class AlphaFullCPU : public FullBetaCPU { @@ -250,4 +246,4 @@ class AlphaFullCPU : public FullBetaCPU }; -#endif // __ALPHA_FULL_CPU_HH__ +#endif // __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh index fc86dacd7..81a1aba9b 100644 --- a/cpu/beta_cpu/alpha_impl.hh +++ b/cpu/beta_cpu/alpha_impl.hh @@ -1,5 +1,5 @@ -#ifndef __ALPHA_IMPL_HH__ -#define __ALPHA_IMPL_HH__ +#ifndef __CPU_BETA_CPU_ALPHA_IMPL_HH__ +#define __CPU_BETA_CPU_ALPHA_IMPL_HH__ #include "arch/alpha/isa_traits.hh" @@ -51,4 +51,4 @@ struct AlphaSimpleImpl }; }; -#endif // __ALPHA_IMPL_HH__ +#endif // __CPU_BETA_CPU_ALPHA_IMPL_HH__ diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh index e327a83b9..c0afe3d1b 100644 --- a/cpu/beta_cpu/comm.hh +++ b/cpu/beta_cpu/comm.hh @@ -1,13 +1,11 @@ -#ifndef __COMM_HH__ -#define __COMM_HH__ +#ifndef __CPU_BETA_CPU_COMM_HH__ +#define __CPU_BETA_CPU_COMM_HH__ #include #include #include "arch/alpha/isa_traits.hh" #include "cpu/inst_seq.hh" -using namespace std; - // Find better place to put this typedef. // The impl might be the best place for this. typedef short int PhysRegIndex; @@ -18,7 +16,7 @@ struct SimpleFetchSimpleDecode { int size; - DynInstPtr insts[Impl::MaxWidth + 1]; + DynInstPtr insts[Impl::MaxWidth]; }; template @@ -27,7 +25,7 @@ struct SimpleDecodeSimpleRename { int size; - DynInstPtr insts[Impl::MaxWidth + 1]; + DynInstPtr insts[Impl::MaxWidth]; }; template @@ -36,7 +34,7 @@ struct SimpleRenameSimpleIEW { int size; - DynInstPtr insts[Impl::MaxWidth + 1]; + DynInstPtr insts[Impl::MaxWidth]; }; template @@ -45,7 +43,7 @@ struct SimpleIEWSimpleCommit { int size; - DynInstPtr insts[Impl::MaxWidth + 1]; + DynInstPtr insts[Impl::MaxWidth]; bool squash; bool branchMispredict; @@ -62,7 +60,7 @@ struct IssueStruct { int size; - DynInstPtr insts[Impl::MaxWidth + 1]; + DynInstPtr insts[Impl::MaxWidth]; }; struct TimeBufStruct { @@ -142,4 +140,4 @@ struct TimeBufStruct { commitComm commitInfo; }; -#endif //__COMM_HH__ +#endif //__CPU_BETA_CPU_COMM_HH__ diff --git a/cpu/beta_cpu/commit.cc b/cpu/beta_cpu/commit.cc index 2efb38976..9e8fa2781 100644 --- a/cpu/beta_cpu/commit.cc +++ b/cpu/beta_cpu/commit.cc @@ -1,6 +1,6 @@ #include "cpu/beta_cpu/alpha_dyn_inst.hh" -#include "cpu/beta_cpu/commit_impl.hh" #include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/commit_impl.hh" -template SimpleCommit; +template class SimpleCommit; diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh index f1a185143..731307bf7 100644 --- a/cpu/beta_cpu/commit.hh +++ b/cpu/beta_cpu/commit.hh @@ -12,14 +12,11 @@ // Probably not a big deal if the IPR stuff isn't cycle accurate. Can just // have the original function handle writing to the IPR register. -#ifndef __SIMPLE_COMMIT_HH__ -#define __SIMPLE_COMMIT_HH__ +#ifndef __CPU_BETA_CPU_SIMPLE_COMMIT_HH__ +#define __CPU_BETA_CPU_SIMPLE_COMMIT_HH__ -//#include "arch/alpha/isa_traits.hh" +#include "base/statistics.hh" #include "base/timebuf.hh" -//#include "cpu/beta_cpu/comm.hh" -//#include "cpu/beta_cpu/rename_map.hh" -//#include "cpu/beta_cpu/rob.hh" #include "mem/memory_interface.hh" template @@ -157,4 +154,4 @@ class SimpleCommit Stats::Distribution<> n_committed_dist; }; -#endif // __SIMPLE_COMMIT_HH__ +#endif // __CPU_BETA_CPU_SIMPLE_COMMIT_HH__ diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh index 1479eb191..6606aba29 100644 --- a/cpu/beta_cpu/cpu_policy.hh +++ b/cpu/beta_cpu/cpu_policy.hh @@ -1,5 +1,5 @@ -#ifndef __CPU_POLICY_HH__ -#define __CPU_POLICY_HH__ +#ifndef __CPU_BETA_CPU_CPU_POLICY_HH__ +#define __CPU_BETA_CPU_CPU_POLICY_HH__ #include "cpu/beta_cpu/bpred_unit.hh" #include "cpu/beta_cpu/inst_queue.hh" @@ -57,4 +57,4 @@ struct SimpleCPUPolicy }; -#endif //__CPU_POLICY_HH__ +#endif //__CPU_BETA_CPU_CPU_POLICY_HH__ diff --git a/cpu/beta_cpu/decode.cc b/cpu/beta_cpu/decode.cc index ffabcf18a..177293bca 100644 --- a/cpu/beta_cpu/decode.cc +++ b/cpu/beta_cpu/decode.cc @@ -1,6 +1,6 @@ #include "cpu/beta_cpu/alpha_dyn_inst.hh" -#include "cpu/beta_cpu/decode_impl.hh" #include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/decode_impl.hh" -template SimpleDecode; +template class SimpleDecode; diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh index 64e87290e..dd18cf176 100644 --- a/cpu/beta_cpu/decode.hh +++ b/cpu/beta_cpu/decode.hh @@ -5,11 +5,12 @@ // Fix up squashing too, as it's too // dependent upon the iew stage continually telling it to squash. -#ifndef __SIMPLE_DECODE_HH__ -#define __SIMPLE_DECODE_HH__ +#ifndef __CPU_BETA_CPU_SIMPLE_DECODE_HH__ +#define __CPU_BETA_CPU_SIMPLE_DECODE_HH__ #include +#include "base/statistics.hh" #include "base/timebuf.hh" template @@ -141,4 +142,4 @@ class SimpleDecode Stats::Scalar<> decodeSquashedInsts; }; -#endif // __SIMPLE_DECODE_HH__ +#endif // __CPU_BETA_CPU_SIMPLE_DECODE_HH__ diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh index dd51f564d..9d88f94ac 100644 --- a/cpu/beta_cpu/decode_impl.hh +++ b/cpu/beta_cpu/decode_impl.hh @@ -1,6 +1,3 @@ -#ifndef __SIMPLE_DECODE_CC__ -#define __SIMPLE_DECODE_CC__ - #include "cpu/beta_cpu/decode.hh" template @@ -392,5 +389,3 @@ SimpleDecode::decode() numInst = 0; } - -#endif // __SIMPLE_DECODE_CC__ diff --git a/cpu/beta_cpu/fetch.cc b/cpu/beta_cpu/fetch.cc index 4d08754b6..877262750 100644 --- a/cpu/beta_cpu/fetch.cc +++ b/cpu/beta_cpu/fetch.cc @@ -1,7 +1,6 @@ #include "cpu/beta_cpu/alpha_dyn_inst.hh" -#include "cpu/beta_cpu/alpha_full_cpu.hh" -#include "cpu/beta_cpu/fetch_impl.hh" #include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/fetch_impl.hh" -template SimpleFetch; +template class SimpleFetch; diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh index 4cfc2f167..7a3893708 100644 --- a/cpu/beta_cpu/fetch.hh +++ b/cpu/beta_cpu/fetch.hh @@ -4,17 +4,17 @@ // Figure out where to advance time buffer. Add a way to get a // stage's current status. -#ifndef __SIMPLE_FETCH_HH__ -#define __SIMPLE_FETCH_HH__ +#ifndef __CPU_BETA_CPU_SIMPLE_FETCH_HH__ +#define __CPU_BETA_CPU_SIMPLE_FETCH_HH__ //Will want to include: time buffer, structs, MemInterface, Event, //whatever class bzero uses, MemReqPtr +#include "base/statistics.hh" #include "base/timebuf.hh" -#include "sim/eventq.hh" #include "cpu/pc_event.hh" #include "mem/mem_interface.hh" -#include "base/statistics.hh" +#include "sim/eventq.hh" /** * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch @@ -207,4 +207,4 @@ class SimpleFetch Stats::Distribution<> fetch_nisn_dist; }; -#endif //__SIMPLE_FETCH_HH__ +#endif //__CPU_BETA_CPU_SIMPLE_FETCH_HH__ diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh index bde7e5bbf..19eb972d9 100644 --- a/cpu/beta_cpu/full_cpu.hh +++ b/cpu/beta_cpu/full_cpu.hh @@ -20,8 +20,6 @@ #include "cpu/beta_cpu/cpu_policy.hh" #include "sim/process.hh" -using namespace std; - class FunctionalMemory; class Process; diff --git a/cpu/beta_cpu/iew.cc b/cpu/beta_cpu/iew.cc index 8abb2f196..a90d64434 100644 --- a/cpu/beta_cpu/iew.cc +++ b/cpu/beta_cpu/iew.cc @@ -1,8 +1,7 @@ #include "cpu/beta_cpu/alpha_dyn_inst.hh" -#include "cpu/beta_cpu/inst_queue.hh" -#include "cpu/beta_cpu/iew_impl.hh" #include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/iew_impl.hh" +#include "cpu/beta_cpu/inst_queue.hh" -template SimpleIEW; +template class SimpleIEW; diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh index 90bd39e7f..e3e7c6db5 100644 --- a/cpu/beta_cpu/iew.hh +++ b/cpu/beta_cpu/iew.hh @@ -2,14 +2,14 @@ //Need to handle delaying writes to the writeback bus if it's full at the //given time. Load store queue. -#ifndef __SIMPLE_IEW_HH__ -#define __SIMPLE_IEW_HH__ +#ifndef __CPU_BETA_CPU_SIMPLE_IEW_HH__ +#define __CPU_BETA_CPU_SIMPLE_IEW_HH__ #include +#include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/beta_cpu/comm.hh" -#include "base/statistics.hh" //Can IEW even stall? Space should be available/allocated already...maybe //if there's not enough write ports on the ROB or waiting for CDB @@ -187,4 +187,4 @@ class SimpleIEW Stats::Scalar<> predictedTakenIncorrect; }; -#endif +#endif // __CPU_BETA_CPU_IEW_HH__ diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh index f6fb917ba..a81ed63bc 100644 --- a/cpu/beta_cpu/regfile.hh +++ b/cpu/beta_cpu/regfile.hh @@ -3,8 +3,6 @@ // @todo: Destructor -using namespace std; - #include "arch/alpha/isa_traits.hh" #include "cpu/beta_cpu/comm.hh" diff --git a/cpu/beta_cpu/rename.cc b/cpu/beta_cpu/rename.cc index bcce7ef49..1feec4342 100644 --- a/cpu/beta_cpu/rename.cc +++ b/cpu/beta_cpu/rename.cc @@ -1,6 +1,6 @@ #include "cpu/beta_cpu/alpha_dyn_inst.hh" -#include "cpu/beta_cpu/rename_impl.hh" #include "cpu/beta_cpu/alpha_impl.hh" +#include "cpu/beta_cpu/rename_impl.hh" -template SimpleRename; +template class SimpleRename; diff --git a/cpu/beta_cpu/rename.hh b/cpu/beta_cpu/rename.hh index 3e6b873ae..3e9899718 100644 --- a/cpu/beta_cpu/rename.hh +++ b/cpu/beta_cpu/rename.hh @@ -3,11 +3,12 @@ // May want to have different statuses to differentiate the different stall // conditions. -#ifndef __SIMPLE_RENAME_HH__ -#define __SIMPLE_RENAME_HH__ +#ifndef __CPU_BETA_CPU_SIMPLE_RENAME_HH__ +#define __CPU_BETA_CPU_SIMPLE_RENAME_HH__ #include +#include "base/statistics.hh" #include "base/timebuf.hh" // Will need rename maps for both the int reg file and fp reg file. @@ -202,4 +203,4 @@ class SimpleRename Stats::Scalar<> renameValidUndoneMaps; }; -#endif // __SIMPLE_RENAME_HH__ +#endif // __CPU_BETA_CPU_SIMPLE_RENAME_HH__ diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh index e68fa05a8..44a7eefb1 100644 --- a/cpu/beta_cpu/rename_map.hh +++ b/cpu/beta_cpu/rename_map.hh @@ -2,17 +2,15 @@ // Have it so that there's a more meaningful name given to the variable // that marks the beginning of the FP registers. -#ifndef __RENAME_MAP_HH__ -#define __RENAME_MAP_HH__ +#ifndef __CPU_BETA_CPU_RENAME_MAP_HH__ +#define __CPU_BETA_CPU_RENAME_MAP_HH__ #include -#include #include +#include #include "cpu/beta_cpu/free_list.hh" -using namespace std; - class SimpleRenameMap { public: @@ -21,7 +19,7 @@ class SimpleRenameMap * previous mapping of a logical register to a physical register. * Used to roll back the rename map to a previous state. */ - typedef pair UnmapInfo; + typedef std::pair UnmapInfo; /** * Pair of a physical register and a physical register. Used to @@ -29,7 +27,7 @@ class SimpleRenameMap * renamed to, and the previous physical register that the same * logical register was previously mapped to. */ - typedef pair RenameInfo; + typedef std::pair RenameInfo; public: //Constructor @@ -128,17 +126,17 @@ class SimpleRenameMap /** Scoreboard of physical integer registers, saying whether or not they * are ready. */ - vector intScoreboard; + std::vector intScoreboard; /** Scoreboard of physical floating registers, saying whether or not they * are ready. */ - vector floatScoreboard; + std::vector floatScoreboard; /** Scoreboard of miscellaneous registers, saying whether or not they * are ready. */ - vector miscScoreboard; + std::vector miscScoreboard; }; -#endif //__RENAME_MAP_HH__ +#endif //__CPU_BETA_CPU_RENAME_MAP_HH__ diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh index c921c0619..da6b5232a 100644 --- a/cpu/beta_cpu/rob.hh +++ b/cpu/beta_cpu/rob.hh @@ -4,15 +4,13 @@ // all instructions after the instruction, and all instructions after *and* // including that instruction. -#ifndef __ROB_HH__ -#define __ROB_HH__ +#ifndef __CPU_BETA_CPU_ROB_HH__ +#define __CPU_BETA_CPU_ROB_HH__ -#include -#include +#include +#include -#include "arch/alpha/isa_traits.hh" - -using namespace std; +//#include "arch/alpha/isa_traits.hh" /** * ROB class. Uses the instruction list that exists within the CPU to @@ -28,7 +26,7 @@ class ROB typedef typename Impl::FullCPU FullCPU; typedef typename Impl::DynInstPtr DynInstPtr; - typedef pair UnmapInfo_t; + typedef std::pair UnmapInfo_t; typedef typename list::iterator InstIt_t; public: @@ -135,4 +133,4 @@ class ROB bool doneSquashing; }; -#endif //__ROB_HH__ +#endif //__CPU_BETA_CPU_ROB_HH__ From 5c4714c1a91680a0253f866958a9db80cd8decb2 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 25 Feb 2005 18:00:49 -0500 Subject: [PATCH 6/6] Initial light-weight OoO CPU checkin, along with gcc-3.4 fixes. SConscript: Include new files. arch/alpha/isa_desc: Make the eaCompPtr and memAccPtr non-const so that execute() can be called on them. arch/alpha/isa_traits.hh: Add enum for total number of data registers. arch/isa_parser.py: base/traceflags.py: Include new light-weight OoO CPU model. cpu/base_dyn_inst.cc: cpu/base_dyn_inst.hh: Changes to abstract more away from the base dyn inst class. cpu/beta_cpu/2bit_local_pred.cc: cpu/beta_cpu/2bit_local_pred.hh: cpu/beta_cpu/tournament_pred.cc: cpu/beta_cpu/tournament_pred.hh: Remove redundant SatCounter class. cpu/beta_cpu/alpha_dyn_inst.cc: cpu/beta_cpu/alpha_full_cpu.cc: cpu/beta_cpu/alpha_full_cpu.hh: cpu/beta_cpu/bpred_unit.cc: cpu/beta_cpu/inst_queue.cc: cpu/beta_cpu/mem_dep_unit.cc: cpu/beta_cpu/ras.cc: cpu/beta_cpu/rename_map.cc: cpu/beta_cpu/rename_map.hh: cpu/beta_cpu/rob.cc: Fix for gcc-3.4 cpu/beta_cpu/alpha_dyn_inst.hh: cpu/beta_cpu/alpha_dyn_inst_impl.hh: Fixes for gcc-3.4. Include more variables and functions that are specific to AlphaDynInst which were once in BaseDynInst. cpu/beta_cpu/alpha_full_cpu_builder.cc: Make params match the current params inherited from BaseCPU. cpu/beta_cpu/alpha_full_cpu_impl.hh: Fixes for gcc-3.4 cpu/beta_cpu/full_cpu.cc: Use new params pointer in BaseCPU. Fix for gcc-3.4. cpu/beta_cpu/full_cpu.hh: Use new params class from BaseCPU. cpu/beta_cpu/iew_impl.hh: Remove unused function. cpu/simple_cpu/simple_cpu.cc: Remove unused global variable. cpu/static_inst.hh: Include OoODynInst for new lightweight OoO CPU --HG-- extra : convert_revision : 34d9f2e64ca0313377391e0d059bf09c040286fa --- SConscript | 57 ++- arch/alpha/isa_desc | 8 +- arch/alpha/isa_traits.hh | 4 + arch/isa_parser.py | 3 + base/traceflags.py | 3 +- cpu/base_dyn_inst.cc | 135 +++--- cpu/base_dyn_inst.hh | 252 +++------- cpu/beta_cpu/2bit_local_pred.cc | 35 +- cpu/beta_cpu/2bit_local_pred.hh | 53 +-- cpu/beta_cpu/alpha_dyn_inst.cc | 2 +- cpu/beta_cpu/alpha_dyn_inst.hh | 135 +++++- cpu/beta_cpu/alpha_dyn_inst_impl.hh | 56 ++- cpu/beta_cpu/alpha_full_cpu.cc | 2 +- cpu/beta_cpu/alpha_full_cpu.hh | 26 +- cpu/beta_cpu/alpha_full_cpu_builder.cc | 8 +- cpu/beta_cpu/alpha_full_cpu_impl.hh | 118 ++--- cpu/beta_cpu/bpred_unit.cc | 2 +- cpu/beta_cpu/full_cpu.cc | 16 +- cpu/beta_cpu/full_cpu.hh | 22 +- cpu/beta_cpu/iew_impl.hh | 6 +- cpu/beta_cpu/inst_queue.cc | 3 +- cpu/beta_cpu/mem_dep_unit.cc | 2 +- cpu/beta_cpu/ras.cc | 5 +- cpu/beta_cpu/rename_map.cc | 4 + cpu/beta_cpu/rename_map.hh | 4 +- cpu/beta_cpu/rob.cc | 2 +- cpu/beta_cpu/sat_counter.cc | 43 ++ cpu/beta_cpu/sat_counter.hh | 62 +++ cpu/beta_cpu/tournament_pred.cc | 53 +-- cpu/beta_cpu/tournament_pred.hh | 53 +-- cpu/ooo_cpu/ea_list.cc | 50 ++ cpu/ooo_cpu/ea_list.hh | 44 ++ cpu/ooo_cpu/ooo_cpu.cc | 6 + cpu/ooo_cpu/ooo_cpu.hh | 613 +++++++++++++++++++++++++ cpu/ooo_cpu/ooo_impl.hh | 21 + cpu/simple_cpu/simple_cpu.cc | 3 - cpu/static_inst.hh | 15 +- 37 files changed, 1324 insertions(+), 602 deletions(-) create mode 100644 cpu/beta_cpu/sat_counter.cc create mode 100644 cpu/beta_cpu/sat_counter.hh create mode 100644 cpu/ooo_cpu/ea_list.cc create mode 100644 cpu/ooo_cpu/ea_list.hh create mode 100644 cpu/ooo_cpu/ooo_cpu.cc create mode 100644 cpu/ooo_cpu/ooo_cpu.hh create mode 100644 cpu/ooo_cpu/ooo_impl.hh diff --git a/SConscript b/SConscript index 4e4cb8727..94e788ce9 100644 --- a/SConscript +++ b/SConscript @@ -51,6 +51,7 @@ base_sources = Split(''' arch/alpha/full_cpu_exec.cc arch/alpha/faults.cc arch/alpha/isa_traits.cc + arch/alpha/ooo_cpu_exec.cc base/circlebuf.cc base/copyright.cc @@ -114,6 +115,7 @@ base_sources = Split(''' cpu/beta_cpu/rename.cc cpu/beta_cpu/rename_map.cc cpu/beta_cpu/rob.cc + cpu/beta_cpu/sat_counter.cc cpu/beta_cpu/store_set.cc cpu/beta_cpu/tournament_pred.cc cpu/fast_cpu/fast_cpu.cc @@ -136,30 +138,34 @@ base_sources = Split(''' cpu/full_cpu/ls_queue.cc cpu/full_cpu/machine_queue.cc cpu/full_cpu/pc_sample_profile.cc - cpu/full_cpu/pipetrace.cc - cpu/full_cpu/readyq.cc - cpu/full_cpu/reg_info.cc - cpu/full_cpu/rob_station.cc - cpu/full_cpu/spec_memory.cc - cpu/full_cpu/spec_state.cc - cpu/full_cpu/storebuffer.cc - cpu/full_cpu/writeback.cc - cpu/full_cpu/iq/iq_station.cc - cpu/full_cpu/iq/iqueue.cc - cpu/full_cpu/iq/segmented/chain_info.cc - cpu/full_cpu/iq/segmented/chain_wire.cc - cpu/full_cpu/iq/segmented/iq_seg.cc - cpu/full_cpu/iq/segmented/iq_segmented.cc - cpu/full_cpu/iq/segmented/seg_chain.cc - cpu/full_cpu/iq/seznec/iq_seznec.cc - cpu/full_cpu/iq/standard/iq_standard.cc - cpu/sampling_cpu/sampling_cpu.cc - cpu/simple_cpu/simple_cpu.cc - cpu/inorder_cpu/inorder_cpu.cc - cpu/trace/reader/mem_trace_reader.cc - cpu/trace/reader/ibm_reader.cc - cpu/trace/reader/itx_reader.cc - cpu/trace/reader/m5_reader.cc + cpu/full_cpu/pipetrace.cc + cpu/full_cpu/readyq.cc + cpu/full_cpu/reg_info.cc + cpu/full_cpu/rob_station.cc + cpu/full_cpu/spec_memory.cc + cpu/full_cpu/spec_state.cc + cpu/full_cpu/storebuffer.cc + cpu/full_cpu/writeback.cc + cpu/full_cpu/iq/iq_station.cc + cpu/full_cpu/iq/iqueue.cc + cpu/full_cpu/iq/segmented/chain_info.cc + cpu/full_cpu/iq/segmented/chain_wire.cc + cpu/full_cpu/iq/segmented/iq_seg.cc + cpu/full_cpu/iq/segmented/iq_segmented.cc + cpu/full_cpu/iq/segmented/seg_chain.cc + cpu/full_cpu/iq/seznec/iq_seznec.cc + cpu/full_cpu/iq/standard/iq_standard.cc + cpu/inorder_cpu/inorder_cpu.cc + cpu/ooo_cpu/ea_list.cc + cpu/ooo_cpu/ooo_cpu.cc + cpu/ooo_cpu/ooo_dyn_inst.cc + cpu/ooo_cpu/ooo_sim_obj.cc + cpu/sampling_cpu/sampling_cpu.cc + cpu/simple_cpu/simple_cpu.cc + cpu/trace/reader/mem_trace_reader.cc + cpu/trace/reader/ibm_reader.cc + cpu/trace/reader/itx_reader.cc + cpu/trace/reader/m5_reader.cc mem/base_hier.cc mem/base_mem.cc @@ -390,7 +396,8 @@ env.Command(Split('''arch/alpha/decoder.cc arch/alpha/fast_cpu_exec.cc arch/alpha/simple_cpu_exec.cc arch/alpha/inorder_cpu_exec.cc - arch/alpha/full_cpu_exec.cc'''), + arch/alpha/full_cpu_exec.cc + arch/alpha/ooo_cpu_exec.cc'''), Split('''arch/alpha/isa_desc arch/isa_parser.py'''), '$SRCDIR/arch/isa_parser.py $SOURCE $TARGET.dir arch/alpha') diff --git a/arch/alpha/isa_desc b/arch/alpha/isa_desc index e7d0d68ce..1e92033dc 100644 --- a/arch/alpha/isa_desc +++ b/arch/alpha/isa_desc @@ -738,9 +738,9 @@ output header {{ /// Memory request flags. See mem_req_base.hh. unsigned memAccessFlags; /// Pointer to EAComp object. - const StaticInstPtr eaCompPtr; + StaticInstPtr eaCompPtr; /// Pointer to MemAcc object. - const StaticInstPtr memAccPtr; + StaticInstPtr memAccPtr; /// Constructor Memory(const char *mnem, MachInst _machInst, OpClass __opClass, @@ -755,8 +755,8 @@ output header {{ public: - const StaticInstPtr &eaCompInst() const { return eaCompPtr; } - const StaticInstPtr &memAccInst() const { return memAccPtr; } + StaticInstPtr &eaCompInst() { return eaCompPtr; } + StaticInstPtr &memAccInst() { return memAccPtr; } }; /** diff --git a/arch/alpha/isa_traits.hh b/arch/alpha/isa_traits.hh index ff3da1502..bf184b875 100644 --- a/arch/alpha/isa_traits.hh +++ b/arch/alpha/isa_traits.hh @@ -148,6 +148,10 @@ static const Addr PageOffset = PageBytes - 1; NumIntRegs + NumFloatRegs + NumMiscRegs + NumInternalProcRegs }; + enum { + TotalDataRegs = NumIntRegs + NumFloatRegs + }; + typedef union { IntReg intreg; FloatReg fpreg; diff --git a/arch/isa_parser.py b/arch/isa_parser.py index 8187cf188..5e0267c9e 100755 --- a/arch/isa_parser.py +++ b/arch/isa_parser.py @@ -642,6 +642,9 @@ CpuModel('FullCPU', 'full_cpu_exec.cc', CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc', '#include "cpu/beta_cpu/alpha_dyn_inst.hh"', { 'CPU_exec_context': 'AlphaDynInst' }) +CpuModel('OoOCPU', 'ooo_cpu_exec.cc', + '#include "cpu/ooo_cpu/ooo_dyn_inst.hh"', + { 'CPU_exec_context': 'OoODynInst' }) # Expand template with CPU-specific references into a dictionary with # an entry for each CPU model name. The entry key is the model name diff --git a/base/traceflags.py b/base/traceflags.py index d1eb4478a..496647116 100644 --- a/base/traceflags.py +++ b/base/traceflags.py @@ -138,7 +138,8 @@ baseFlags = [ 'MemDepUnit', 'DynInst', 'FullCPU', - 'CommitRate' + 'CommitRate', + 'OoOCPU' ] # diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index 74f6b8a6c..b8424f576 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -26,8 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __BASE_DYN_INST_CC__ -#define __BASE_DYN_INST_CC__ +#ifndef __CPU_BASE_DYN_INST_CC__ +#define __CPU_BASE_DYN_INST_CC__ #include #include @@ -43,6 +43,8 @@ #include "cpu/base_dyn_inst.hh" #include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/ooo_cpu/ooo_impl.hh" +#include "cpu/ooo_cpu/ooo_cpu.hh" using namespace std; @@ -74,90 +76,56 @@ BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, FullCPU *cpu) : staticInst(machInst), traceData(NULL), cpu(cpu), xc(cpu->xcBase()) { - DPRINTF(FullCPU, "DynInst: Creating new DynInst.\n"); - - effAddr = MemReq::inval_addr; - physEffAddr = MemReq::inval_addr; - - readyRegs = 0; - seqNum = seq_num; -// specMemWrite = false; - - canIssue = false; - issued = false; - executed = false; - canCommit = false; - squashed = false; - squashedInIQ = false; - - blockingInst = false; - recoverInst = false; - specMode = false; -// btbMissed = false; - // Eventually make this a parameter. - threadNumber = 0; - // Also make this a parameter. - specMode = true; - // Also make this a parameter, or perhaps get it from xc or cpu. - asid = 0; - - // Initialize the fault to be unimplemented opcode. - fault = Unimplemented_Opcode_Fault; - PC = inst_PC; nextPC = PC + sizeof(MachInst); predPC = pred_PC; - // Make sure to have the renamed register entries set to the same - // as the normal register entries. It will allow the IQ to work - // without any modifications. - for (int i = 0; i < staticInst->numDestRegs(); i++) - { - _destRegIdx[i] = staticInst->destRegIdx(i); - } - - for (int i = 0; i < staticInst->numSrcRegs(); i++) - { - _srcRegIdx[i] = staticInst->srcRegIdx(i); - _readySrcRegIdx[i] = 0; - } - - ++instcount; - -// assert(instcount < 50); - - DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", - instcount); + initVars(); } template BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) : staticInst(_staticInst), traceData(NULL) +{ + initVars(); +} + +template +void +BaseDynInst::initVars() { effAddr = MemReq::inval_addr; physEffAddr = MemReq::inval_addr; -// specMemWrite = false; + readyRegs = 0; + + completed = false; + canIssue = false; + issued = false; + executed = false; + canCommit = false; + squashed = false; + squashedInIQ = false; + eaCalcDone = false; blockingInst = false; recoverInst = false; - specMode = false; -// btbMissed = false; - // Make sure to have the renamed register entries set to the same - // as the normal register entries. It will allow the IQ to work - // without any modifications. - for (int i = 0; i < staticInst->numDestRegs(); i++) - { - _destRegIdx[i] = staticInst->destRegIdx(i); - } + // Eventually make this a parameter. + threadNumber = 0; - for (int i = 0; i < staticInst->numSrcRegs(); i++) - { - _srcRegIdx[i] = staticInst->srcRegIdx(i); - } + // Also make this a parameter, or perhaps get it from xc or cpu. + asid = 0; + + // Initialize the fault to be unimplemented opcode. + fault = Unimplemented_Opcode_Fault; + + ++instcount; + + DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", + instcount); } template @@ -173,14 +141,14 @@ BaseDynInst::~BaseDynInst() DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n", instcount); } - +/* template FunctionalMemory * BaseDynInst::getMemory(void) { return xc->mem; } -/* + template IntReg * BaseDynInst::getIntegerRegs(void) @@ -395,10 +363,35 @@ BaseDynInst::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) #endif +template +bool +BaseDynInst::eaSrcsReady() +{ + // For now I am assuming that src registers 1..n-1 are the ones that the + // EA calc depends on. (i.e. src reg 0 is the source of the data to be + // stored) + +// StaticInstPtr eaInst = staticInst->eaCompInst(); + + for (int i = 1; i < numSrcRegs(); ++i) + { + if (!_readySrcRegIdx[i]) + return false; + } + + return true; +} + +// Forward declaration... +template class BaseDynInst; +template class BaseDynInst; + +template <> int BaseDynInst::instcount = 0; -// Forward declaration... -template BaseDynInst; +template <> +int +BaseDynInst::instcount = 0; -#endif // __BASE_DYN_INST_CC__ +#endif // __CPU_BASE_DYN_INST_CC__ diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 171721e61..943293b25 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -26,35 +26,32 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __BASE_DYN_INST_HH__ -#define __BASE_DYN_INST_HH__ +#ifndef __CPU_BASE_DYN_INST_HH__ +#define __CPU_BASE_DYN_INST_HH__ -#include #include +#include #include "base/fast_alloc.hh" #include "base/trace.hh" -#include "cpu/static_inst.hh" #include "cpu/beta_cpu/comm.hh" +#include "cpu/exetrace.hh" #include "cpu/full_cpu/bpred_update.hh" -#include "mem/functional_mem/main_memory.hh" -#include "cpu/full_cpu/spec_memory.hh" -#include "cpu/inst_seq.hh" #include "cpu/full_cpu/op_class.hh" +#include "cpu/full_cpu/spec_memory.hh" #include "cpu/full_cpu/spec_state.hh" +#include "cpu/inst_seq.hh" +#include "cpu/static_inst.hh" +#include "mem/functional_mem/main_memory.hh" /** * @file * Defines a dynamic instruction context. */ -namespace Trace { - class InstRecord; -}; - // Forward declaration. -template +template class StaticInstPtr; template @@ -90,8 +87,6 @@ class BaseDynInst : public FastAlloc, public RefCounted //////////////////////////////////////////// Trace::InstRecord *traceData; -// void setCPSeq(InstSeqNum seq); - template Fault read(Addr addr, T &data, unsigned flags); @@ -99,15 +94,12 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault write(T data, Addr addr, unsigned flags, uint64_t *res); - - IntReg *getIntegerRegs(void); - FunctionalMemory *getMemory(void); - void prefetch(Addr addr, unsigned flags); void writeHint(Addr addr, int size, unsigned flags); Fault copySrcTranslate(Addr src); Fault copy(Addr dest); + // Probably should be private... public: /** Is this instruction valid. */ bool valid; @@ -118,6 +110,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** How many source registers are ready. */ unsigned readyRegs; + /** Is the instruction completed. */ + bool completed; + /** Can this instruction issue. */ bool canIssue; @@ -145,18 +140,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Is this a thread syncrhonization instruction. */ bool threadsyncWait; - /** If the BTB missed. */ -// bool btbMissed; - - /** The global history of this instruction (branch). */ -// unsigned globalHistory; - /** The thread this instruction is from. */ short threadNumber; - /** If instruction is speculative. */ - short specMode; - /** data address space ID, for loads & stores. */ short asid; @@ -190,14 +176,16 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The data to be stored. */ IntReg storeData; - /** Result of this instruction, if an integer. */ - uint64_t intResult; + union Result { + uint64_t integer; + float fp; + double dbl; + }; - /** Result of this instruction, if a float. */ - float floatResult; - - /** Result of this instruction, if a double. */ - double doubleResult; + /** The result of the instruction; assumes for now that there's only one + * destination register. + */ + Result instResult; /** PC of this instruction. */ Addr PC; @@ -214,28 +202,11 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Count of total number of dynamic instructions. */ static int instcount; - /** Did this instruction do a spec write? */ -// bool specMemWrite; - - private: - /** Physical register index of the destination registers of this - * instruction. + /** Whether or not the source register is ready. Not sure this should be + * here vs. the derived class. */ - PhysRegIndex _destRegIdx[MaxInstDestRegs]; - - /** Physical register index of the source registers of this - * instruction. - */ - PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; - - /** Whether or not the source register is ready. */ bool _readySrcRegIdx[MaxInstSrcRegs]; - /** Physical register index of the previous producers of the - * architected destinations. - */ - PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; - public: /** BaseDynInst constructor given a binary instruction. */ BaseDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, @@ -247,14 +218,10 @@ class BaseDynInst : public FastAlloc, public RefCounted /** BaseDynInst destructor. */ ~BaseDynInst(); -#if 0 - Fault - mem_access(MemCmd cmd, // Read or Write access cmd - Addr addr, // virtual address of access - void *p, // input/output buffer - int nbytes); // access size -#endif + private: + void initVars(); + public: void trace_mem(Fault fault, // last fault MemCmd cmd, // last command @@ -278,7 +245,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool doneTargCalc() { return false; } /** Returns the calculated target of the branch. */ - Addr readCalcTarg() { return nextPC; } +// Addr readCalcTarg() { return nextPC; } Addr readNextPC() { return nextPC; } @@ -296,16 +263,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns whether the instruction mispredicted. */ bool mispredicted() { return (predPC != nextPC); } -/* - unsigned readGlobalHist() { - return globalHistory; - } - - void setGlobalHist(unsigned history) { - globalHistory = history; - } -*/ - // // Instruction types. Forward checks to StaticInst object. // @@ -331,6 +288,12 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } + /** Returns the opclass of this instruction. */ + OpClass opClass() const { return staticInst->opClass(); } + + /** Returns the branch target address. */ + Addr branchTarget() const { return staticInst->branchTarget(PC); } + int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } int8_t numDestRegs() const { return staticInst->numDestRegs(); } @@ -351,52 +314,9 @@ class BaseDynInst : public FastAlloc, public RefCounted return staticInst->srcRegIdx(i); } - /** Returns the physical register index of the i'th destination - * register. - */ - PhysRegIndex renamedDestRegIdx(int idx) const - { - return _destRegIdx[idx]; - } - - /** Returns the physical register index of the i'th source register. */ - PhysRegIndex renamedSrcRegIdx(int idx) const - { - return _srcRegIdx[idx]; - } - - bool isReadySrcRegIdx(int idx) const - { - return _readySrcRegIdx[idx]; - } - - /** Returns the physical register index of the previous physical register - * that remapped to the same logical register index. - */ - PhysRegIndex prevDestRegIdx(int idx) const - { - return _prevDestRegIdx[idx]; - } - - /** Renames a destination register to a physical register. Also records - * the previous physical register that the logical register mapped to. - */ - void renameDestReg(int idx, - PhysRegIndex renamed_dest, - PhysRegIndex previous_rename) - { - _destRegIdx[idx] = renamed_dest; - _prevDestRegIdx[idx] = previous_rename; - } - - /** Renames a source logical register to the physical register which - * has/will produce that logical register's result. - * @todo: add in whether or not the source register is ready. - */ - void renameSrcReg(int idx, PhysRegIndex renamed_src) - { - _srcRegIdx[idx] = renamed_src; - } + uint64_t readIntResult() { return instResult.integer; } + float readFloatResult() { return instResult.fp; } + double readDoubleResult() { return instResult.dbl; } //Push to .cc file. /** Records that one of the source registers is ready. */ @@ -419,6 +339,15 @@ class BaseDynInst : public FastAlloc, public RefCounted } } + bool isReadySrcRegIdx(int idx) const + { + return this->_readySrcRegIdx[idx]; + } + + void setCompleted() { completed = true; } + + bool isCompleted() const { return completed; } + /** Sets this instruction as ready to issue. */ void setCanIssue() { canIssue = true; } @@ -429,13 +358,13 @@ class BaseDynInst : public FastAlloc, public RefCounted void setIssued() { issued = true; } /** Returns whether or not this instruction has issued. */ - bool isIssued() { return issued; } + bool isIssued() const { return issued; } /** Sets this instruction as executed. */ void setExecuted() { executed = true; } /** Returns whether or not this instruction has executed. */ - bool isExecuted() { return executed; } + bool isExecuted() const { return executed; } /** Sets this instruction as ready to commit. */ void setCanCommit() { canCommit = true; } @@ -456,82 +385,25 @@ class BaseDynInst : public FastAlloc, public RefCounted void setSquashedInIQ() { squashedInIQ = true; } /** Returns whether or not this instruction is squashed in the IQ. */ - bool isSquashedInIQ() { return squashedInIQ; } - - /** Returns the opclass of this instruction. */ - OpClass opClass() const { return staticInst->opClass(); } - - /** Returns whether or not the BTB missed. */ -// bool btbMiss() const { return btbMissed; } - - /** Returns the branch target address. */ - Addr branchTarget() const { return staticInst->branchTarget(PC); } - - // The register accessor methods provide the index of the - // instruction's operand (e.g., 0 or 1), not the architectural - // register index, to simplify the implementation of register - // renaming. We find the architectural register index by indexing - // into the instruction's own operand index table. Note that a - // raw pointer to the StaticInst is provided instead of a - // ref-counted StaticInstPtr to redice overhead. This is fine as - // long as these methods don't copy the pointer into any long-term - // storage (which is pretty hard to imagine they would have reason - // to do). - - uint64_t readIntReg(StaticInst *si, int idx) - { - return cpu->readIntReg(_srcRegIdx[idx]); - } - - float readFloatRegSingle(StaticInst *si, int idx) - { - return cpu->readFloatRegSingle(_srcRegIdx[idx]); - } - - double readFloatRegDouble(StaticInst *si, int idx) - { - return cpu->readFloatRegDouble(_srcRegIdx[idx]); - } - - uint64_t readFloatRegInt(StaticInst *si, int idx) - { - return cpu->readFloatRegInt(_srcRegIdx[idx]); - } - /** @todo: Make results into arrays so they can handle multiple dest - * registers. - */ - void setIntReg(StaticInst *si, int idx, uint64_t val) - { - cpu->setIntReg(_destRegIdx[idx], val); - intResult = val; - } - - void setFloatRegSingle(StaticInst *si, int idx, float val) - { - cpu->setFloatRegSingle(_destRegIdx[idx], val); - floatResult = val; - } - - void setFloatRegDouble(StaticInst *si, int idx, double val) - { - cpu->setFloatRegDouble(_destRegIdx[idx], val); - doubleResult = val; - } - - void setFloatRegInt(StaticInst *si, int idx, uint64_t val) - { - cpu->setFloatRegInt(_destRegIdx[idx], val); - intResult = val; - } + bool isSquashedInIQ() const { return squashedInIQ; } /** Read the PC of this instruction. */ - Addr readPC() { return PC; } + const Addr readPC() const { return PC; } /** Set the next PC of this instruction (its actual target). */ void setNextPC(uint64_t val) { nextPC = val; } -// bool misspeculating() { return cpu->misspeculating(); } ExecContext *xcBase() { return xc; } + + private: + Addr instEffAddr; + bool eaCalcDone; + + public: + void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } + const Addr &getEA() const { return instEffAddr; } + bool doneEACalc() { return eaCalcDone; } + bool eaSrcsReady(); }; template @@ -589,8 +461,6 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) storeSize = sizeof(T); storeData = data; -// if (specMode) -// specMemWrite = true; MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); @@ -627,4 +497,4 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) return fault; } -#endif // __DYN_INST_HH__ +#endif // __CPU_BASE_DYN_INST_HH__ diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc index ef7f23d49..e5bf9647f 100644 --- a/cpu/beta_cpu/2bit_local_pred.cc +++ b/cpu/beta_cpu/2bit_local_pred.cc @@ -1,36 +1,6 @@ #include "base/trace.hh" #include "cpu/beta_cpu/2bit_local_pred.hh" -DefaultBP::SatCounter::SatCounter(unsigned bits) - : maxVal((1 << bits) - 1), counter(0) -{ -} - -DefaultBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) - : maxVal((1 << bits) - 1), counter(initial_val) -{ - // Check to make sure initial value doesn't exceed the max counter value. - if (initial_val > maxVal) { - panic("BP: Initial counter value exceeds max size."); - } -} - -void -DefaultBP::SatCounter::increment() -{ - if(counter < maxVal) { - ++counter; - } -} - -void -DefaultBP::SatCounter::decrement() -{ - if(counter > 0) { - --counter; - } -} - DefaultBP::DefaultBP(unsigned _localPredictorSize, unsigned _localCtrBits, unsigned _instShiftAmt) @@ -46,7 +16,10 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize, DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); // Setup the array of counters for the local predictor. - localCtrs = new SatCounter[localPredictorSize](localCtrBits); + localCtrs = new SatCounter[localPredictorSize]; + + for (int i = 0; i < localPredictorSize; ++i) + localCtrs[i].setBits(_localCtrBits); DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", localPredictorSize); diff --git a/cpu/beta_cpu/2bit_local_pred.hh b/cpu/beta_cpu/2bit_local_pred.hh index 32a7972d0..cda7d3e65 100644 --- a/cpu/beta_cpu/2bit_local_pred.hh +++ b/cpu/beta_cpu/2bit_local_pred.hh @@ -1,8 +1,9 @@ -#ifndef __2BIT_LOCAL_PRED_HH__ -#define __2BIT_LOCAL_PRED_HH__ +#ifndef __CPU_BETA_CPU_2BIT_LOCAL_PRED_HH__ +#define __CPU_BETA_CPU_2BIT_LOCAL_PRED_HH__ // For Addr type. #include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/sat_counter.hh" class DefaultBP { @@ -34,52 +35,6 @@ class DefaultBP inline unsigned getLocalIndex(Addr &PC); - /** - * Private counter class for the internal saturating counters. - * Implements an n bit saturating counter and provides methods to - * increment, decrement, and read it. - * @todo Consider making this something that more closely mimics a - * built in class so you can use ++ or --. - */ - class SatCounter - { - public: - /** - * Constructor for the counter. - * @param bits How many bits the counter will have. - */ - SatCounter(unsigned bits); - - /** - * Constructor for the counter. - * @param bits How many bits the counter will have. - * @param initial_val Starting value for each counter. - */ - SatCounter(unsigned bits, unsigned initial_val); - - /** - * Increments the counter's current value. - */ - void increment(); - - /** - * Decrements the counter's current value. - */ - void decrement(); - - /** - * Read the counter's value. - */ - uint8_t read() - { - return counter; - } - - private: - uint8_t maxVal; - uint8_t counter; - }; - /** Array of counters that make up the local predictor. */ SatCounter *localCtrs; @@ -96,4 +51,4 @@ class DefaultBP unsigned indexMask; }; -#endif // __2BIT_LOCAL_PRED_HH__ +#endif // __CPU_BETA_CPU_2BIT_LOCAL_PRED_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.cc b/cpu/beta_cpu/alpha_dyn_inst.cc index 1bfcb8420..d929da1cf 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.cc +++ b/cpu/beta_cpu/alpha_dyn_inst.cc @@ -4,4 +4,4 @@ // Force instantiation of AlphaDynInst for all the implementations that // are needed. -template AlphaDynInst; +template class AlphaDynInst; diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh index 584e027d7..b2f0d703e 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.hh +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -47,11 +47,11 @@ class AlphaDynInst : public BaseDynInst /** BaseDynInst constructor given a static inst pointer. */ AlphaDynInst(StaticInstPtr &_staticInst); - /** Executes the instruction. */ + /** Executes the instruction. Why the hell did I put this here? */ Fault execute() { - fault = staticInst->execute(this, traceData); - return fault; + this->fault = this->staticInst->execute(this, this->traceData); + return this->fault; } public: @@ -74,6 +74,135 @@ class AlphaDynInst : public BaseDynInst void syscall(); #endif + + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + float readFloatRegSingle(StaticInst *si, int idx) + { + return this->cpu->readFloatRegSingle(_srcRegIdx[idx]); + } + + double readFloatRegDouble(StaticInst *si, int idx) + { + return this->cpu->readFloatRegDouble(_srcRegIdx[idx]); + } + + uint64_t readFloatRegInt(StaticInst *si, int idx) + { + return this->cpu->readFloatRegInt(_srcRegIdx[idx]); + } + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); + this->instResult.integer = val; + } + + void setFloatRegSingle(StaticInst *si, int idx, float val) + { + this->cpu->setFloatRegSingle(_destRegIdx[idx], val); + this->instResult.fp = val; + } + + void setFloatRegDouble(StaticInst *si, int idx, double val) + { + this->cpu->setFloatRegDouble(_destRegIdx[idx], val); + this->instResult.dbl = val; + } + + void setFloatRegInt(StaticInst *si, int idx, uint64_t val) + { + this->cpu->setFloatRegInt(_destRegIdx[idx], val); + this->instResult.integer = val; + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } }; #endif // __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh index 8311067db..4a3ae99d4 100644 --- a/cpu/beta_cpu/alpha_dyn_inst_impl.hh +++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh @@ -4,42 +4,68 @@ template AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, FullCPU *cpu) - : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) { + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } + } template AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) - : BaseDynInst(_staticInst) + : BaseDynInst(_staticInst) { + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < _staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = _staticInst->destRegIdx(i); + } + + for (int i = 0; i < _staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = _staticInst->srcRegIdx(i); + } } template uint64_t AlphaDynInst::readUniq() { - return cpu->readUniq(); + return this->cpu->readUniq(); } template void AlphaDynInst::setUniq(uint64_t val) { - cpu->setUniq(val); + this->cpu->setUniq(val); } template uint64_t AlphaDynInst::readFpcr() { - return cpu->readFpcr(); + return this->cpu->readFpcr(); } template void AlphaDynInst::setFpcr(uint64_t val) { - cpu->setFpcr(val); + this->cpu->setFpcr(val); } #ifdef FULL_SYSTEM @@ -47,63 +73,63 @@ template uint64_t AlphaDynInst::readIpr(int idx, Fault &fault) { - return cpu->readIpr(idx, fault); + return this->cpu->readIpr(idx, fault); } template Fault AlphaDynInst::setIpr(int idx, uint64_t val) { - return cpu->setIpr(idx, val); + return this->cpu->setIpr(idx, val); } template Fault AlphaDynInst::hwrei() { - return cpu->hwrei(); + return this->cpu->hwrei(); } template int AlphaDynInst::readIntrFlag() { -return cpu->readIntrFlag(); +return this->cpu->readIntrFlag(); } template void AlphaDynInst::setIntrFlag(int val) { - cpu->setIntrFlag(val); + this->cpu->setIntrFlag(val); } template bool AlphaDynInst::inPalMode() { - return cpu->inPalMode(); + return this->cpu->inPalMode(); } template void AlphaDynInst::trap(Fault fault) { - cpu->trap(fault); + this->cpu->trap(fault); } template bool AlphaDynInst::simPalCheck(int palFunc) { - return cpu->simPalCheck(palFunc); + return this->cpu->simPalCheck(palFunc); } #else template void AlphaDynInst::syscall() { - cpu->syscall(); + this->cpu->syscall(); } #endif diff --git a/cpu/beta_cpu/alpha_full_cpu.cc b/cpu/beta_cpu/alpha_full_cpu.cc index 80c4bdec8..ee461eb13 100644 --- a/cpu/beta_cpu/alpha_full_cpu.cc +++ b/cpu/beta_cpu/alpha_full_cpu.cc @@ -6,4 +6,4 @@ // Force instantiation of AlphaFullCPU for all the implemntations that are // needed. Consider merging this and alpha_dyn_inst.cc, and maybe all // classes that depend on a certain impl, into one file (alpha_impl.cc?). -template AlphaFullCPU; +template class AlphaFullCPU; diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh index 92eebc82a..3c29dd277 100644 --- a/cpu/beta_cpu/alpha_full_cpu.hh +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -87,22 +87,22 @@ class AlphaFullCPU : public FullBetaCPU // trying to rename source/destination registers... uint64_t readUniq() { - return regFile.readUniq(); + return this->regFile.readUniq(); } void setUniq(uint64_t val) { - regFile.setUniq(val); + this->regFile.setUniq(val); } uint64_t readFpcr() { - return regFile.readFpcr(); + return this->regFile.readFpcr(); } void setFpcr(uint64_t val) { - regFile.setFpcr(val); + this->regFile.setFpcr(val); } #ifdef FULL_SYSTEM @@ -127,13 +127,13 @@ class AlphaFullCPU : public FullBetaCPU // set the register. IntReg getSyscallArg(int i) { - return xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i]; + return this->xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i]; } // used to shift args for indirect syscall void setSyscallArg(int i, IntReg val) { - xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i] = val; + this->xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i] = val; } void setSyscallReturn(int64_t return_value) @@ -144,12 +144,12 @@ class AlphaFullCPU : public FullBetaCPU const int RegA3 = 19; // only place this is used if (return_value >= 0) { // no error - xc->regs.intRegFile[RegA3] = 0; - xc->regs.intRegFile[AlphaISA::ReturnValueReg] = return_value; + this->xc->regs.intRegFile[RegA3] = 0; + this->xc->regs.intRegFile[AlphaISA::ReturnValueReg] = return_value; } else { // got an error, return details - xc->regs.intRegFile[RegA3] = (IntReg) -1; - xc->regs.intRegFile[AlphaISA::ReturnValueReg] = -return_value; + this->xc->regs.intRegFile[RegA3] = (IntReg) -1; + this->xc->regs.intRegFile[AlphaISA::ReturnValueReg] = -return_value; } } @@ -188,7 +188,7 @@ class AlphaFullCPU : public FullBetaCPU #endif Fault error; - error = mem->read(req, data); + error = this->mem->read(req, data); data = htoa(data); return error; } @@ -203,7 +203,7 @@ class AlphaFullCPU : public FullBetaCPU // If this is a store conditional, act appropriately if (req->flags & LOCKED) { - cregs = &xc->regs.miscRegs; + cregs = &this->xc->regs.miscRegs; if (req->flags & UNCACHEABLE) { // Don't update result register (see stq_c in isa_desc) @@ -241,7 +241,7 @@ class AlphaFullCPU : public FullBetaCPU #endif - return mem->write(req, (T)htoa(data)); + return this->mem->write(req, (T)htoa(data)); } }; diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc index f37081232..cf9536cb8 100644 --- a/cpu/beta_cpu/alpha_full_cpu_builder.cc +++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc @@ -283,10 +283,10 @@ CREATE_SIM_OBJECT(BaseFullCPU) params.mem = mem; - params.maxInstsAnyThread = max_insts_any_thread; - params.maxInstsAllThreads = max_insts_all_threads; - params.maxLoadsAnyThread = max_loads_any_thread; - params.maxLoadsAllThreads = max_loads_all_threads; + params.max_insts_any_thread = max_insts_any_thread; + params.max_insts_all_threads = max_insts_all_threads; + params.max_loads_any_thread = max_loads_any_thread; + params.max_loads_all_threads = max_loads_all_threads; // // Caches diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh index 611a0d80d..fccded193 100644 --- a/cpu/beta_cpu/alpha_full_cpu_impl.hh +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -14,17 +14,17 @@ template AlphaFullCPU::AlphaFullCPU(Params ¶ms) - : FullBetaCPU(params) + : FullBetaCPU(params) { DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); - fetch.setCPU(this); - decode.setCPU(this); - rename.setCPU(this); - iew.setCPU(this); - commit.setCPU(this); + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); - rob.setCPU(this); + this->rob.setCPU(this); } template @@ -32,12 +32,12 @@ void AlphaFullCPU::regStats() { // Register stats for everything that has stats. - fullCPURegStats(); - fetch.regStats(); - decode.regStats(); - rename.regStats(); - iew.regStats(); - commit.regStats(); + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); } #ifndef FULL_SYSTEM @@ -49,25 +49,25 @@ AlphaFullCPU::syscall() DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); // Commit stage needs to run as well. - commit.tick(); + this->commit.tick(); squashStages(); // Temporarily increase this by one to account for the syscall // instruction. - ++funcExeInst; + ++(this->funcExeInst); // Copy over all important state to xc once all the unrolling is done. copyToXC(); - process->syscall(xc); + this->process->syscall(this->xc); // Copy over all important state back to CPU. copyFromXC(); // Decrease funcExeInst by one as the normal commit will handle // incrememnting it. - --funcExeInst; + --(this->funcExeInst); } // This is not a pretty function, and should only be used if it is necessary @@ -77,40 +77,40 @@ template void AlphaFullCPU::squashStages() { - InstSeqNum rob_head = rob.readHeadSeqNum(); + InstSeqNum rob_head = this->rob.readHeadSeqNum(); // Now hack the time buffer to put this sequence number in the places // where the stages might read it. for (int i = 0; i < 5; ++i) { - timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + this->timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; } - fetch.squash(rob.readHeadNextPC()); - fetchQueue.advance(); + this->fetch.squash(this->rob.readHeadNextPC()); + this->fetchQueue.advance(); - decode.squash(); - decodeQueue.advance(); + this->decode.squash(); + this->decodeQueue.advance(); - rename.squash(); - renameQueue.advance(); - renameQueue.advance(); + this->rename.squash(); + this->renameQueue.advance(); + this->renameQueue.advance(); // Be sure to advance the IEW queues so that the commit stage doesn't // try to set an instruction as completed at the same time that it // might be deleting it. - iew.squash(); - iewQueue.advance(); - iewQueue.advance(); + this->iew.squash(); + this->iewQueue.advance(); + this->iewQueue.advance(); - rob.squash(rob_head); - commit.setSquashing(); + this->rob.squash(rob_head); + this->commit.setSquashing(); // Now hack the time buffer to clear the sequence numbers in the places // where the stages might read it.? for (int i = 0; i < 5; ++i) { - timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; + this->timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; } } @@ -126,29 +126,31 @@ AlphaFullCPU::copyToXC() // First loop through the integer registers. for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = renameMap.lookup(i); - xc->regs.intRegFile[i] = regFile.readIntReg(renamed_reg); + renamed_reg = this->renameMap.lookup(i); + this->xc->regs.intRegFile[i] = this->regFile.readIntReg(renamed_reg); DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", - renamed_reg, regFile.intRegFile[renamed_reg]); + renamed_reg, this->regFile.intRegFile[renamed_reg]); } // Then loop through the floating point registers. for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - xc->regs.floatRegFile.d[i] = regFile.readFloatRegDouble(renamed_reg); - xc->regs.floatRegFile.q[i] = regFile.readFloatRegInt(renamed_reg); + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + this->xc->regs.floatRegFile.d[i] = + this->regFile.readFloatRegDouble(renamed_reg); + this->xc->regs.floatRegFile.q[i] = + this->regFile.readFloatRegInt(renamed_reg); } - xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; - xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; - xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; - xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; + this->xc->regs.miscRegs.fpcr = this->regFile.miscRegs.fpcr; + this->xc->regs.miscRegs.uniq = this->regFile.miscRegs.uniq; + this->xc->regs.miscRegs.lock_flag = this->regFile.miscRegs.lock_flag; + this->xc->regs.miscRegs.lock_addr = this->regFile.miscRegs.lock_addr; - xc->regs.pc = rob.readHeadPC(); - xc->regs.npc = xc->regs.pc+4; + this->xc->regs.pc = this->rob.readHeadPC(); + this->xc->regs.npc = this->xc->regs.pc+4; - xc->func_exe_inst = funcExeInst; + this->xc->func_exe_inst = this->funcExeInst; } // This function will probably mess things up unless the ROB is empty and @@ -162,35 +164,37 @@ AlphaFullCPU::copyFromXC() // First loop through the integer registers. for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = renameMap.lookup(i); + renamed_reg = this->renameMap.lookup(i); DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " "now has data %lli.\n", - renamed_reg, regFile.intRegFile[renamed_reg], - xc->regs.intRegFile[i]); + renamed_reg, this->regFile.intRegFile[renamed_reg], + this->xc->regs.intRegFile[i]); - regFile.setIntReg(renamed_reg, xc->regs.intRegFile[i]); + this->regFile.setIntReg(renamed_reg, this->xc->regs.intRegFile[i]); } // Then loop through the floating point registers. for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - regFile.setFloatRegDouble(renamed_reg, xc->regs.floatRegFile.d[i]); - regFile.setFloatRegInt(renamed_reg, xc->regs.floatRegFile.q[i]); + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + this->regFile.setFloatRegDouble(renamed_reg, + this->xc->regs.floatRegFile.d[i]); + this->regFile.setFloatRegInt(renamed_reg, + this->xc->regs.floatRegFile.q[i]); } // Then loop through the misc registers. - regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; - regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; - regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; - regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + this->regFile.miscRegs.fpcr = this->xc->regs.miscRegs.fpcr; + this->regFile.miscRegs.uniq = this->xc->regs.miscRegs.uniq; + this->regFile.miscRegs.lock_flag = this->xc->regs.miscRegs.lock_flag; + this->regFile.miscRegs.lock_addr = this->xc->regs.miscRegs.lock_addr; // Then finally set the PC and the next PC. // regFile.pc = xc->regs.pc; // regFile.npc = xc->regs.npc; - funcExeInst = xc->func_exe_inst; + this->funcExeInst = this->xc->func_exe_inst; } #ifdef FULL_SYSTEM diff --git a/cpu/beta_cpu/bpred_unit.cc b/cpu/beta_cpu/bpred_unit.cc index c4a79fbbe..c1b0f54b2 100644 --- a/cpu/beta_cpu/bpred_unit.cc +++ b/cpu/beta_cpu/bpred_unit.cc @@ -3,4 +3,4 @@ #include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/beta_cpu/alpha_dyn_inst.hh" -template TwobitBPredUnit; +template class TwobitBPredUnit; diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc index d5228601c..04c74393b 100644 --- a/cpu/beta_cpu/full_cpu.cc +++ b/cpu/beta_cpu/full_cpu.cc @@ -15,22 +15,10 @@ using namespace std; -#ifdef FULL_SYSTEM BaseFullCPU::BaseFullCPU(Params ¶ms) - : BaseCPU(params.name, params.numberOfThreads, - params.maxInstsAnyThread, params.maxInstsAllThreads, - params.maxLoadsAnyThread, params.maxLoadsAllThreads, - params._system, params.freq) + : BaseCPU(¶ms) { } -#else -BaseFullCPU::BaseFullCPU(Params ¶ms) - : BaseCPU(params.name, params.numberOfThreads, - params.maxInstsAnyThread, params.maxInstsAllThreads, - params.maxLoadsAnyThread, params.maxLoadsAllThreads) -{ -} -#endif // FULL_SYSTEM template FullBetaCPU::TickEvent::TickEvent(FullBetaCPU *c) @@ -515,6 +503,6 @@ FullBetaCPU::wakeDependents(DynInstPtr &inst) } // Forward declaration of FullBetaCPU. -template FullBetaCPU; +template class FullBetaCPU; #endif // __SIMPLE_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh index 19eb972d9..8ce32b7c7 100644 --- a/cpu/beta_cpu/full_cpu.hh +++ b/cpu/beta_cpu/full_cpu.hh @@ -27,27 +27,7 @@ class BaseFullCPU : public BaseCPU { //Stuff that's pretty ISA independent will go here. public: - class Params - { - public: -#ifdef FULL_SYSTEM - std::string name; - int numberOfThreads; - Counter maxInstsAnyThread; - Counter maxInstsAllThreads; - Counter maxLoadsAnyThread; - Counter maxLoadsAllThreads; - System *_system; - Tick freq; -#else - std::string name; - int numberOfThreads; - Counter maxInstsAnyThread; - Counter maxInstsAllThreads; - Counter maxLoadsAnyThread; - Counter maxLoadsAllThreads; -#endif // FULL_SYSTEM - }; + typedef BaseCPU::Params Params; #ifdef FULL_SYSTEM BaseFullCPU(Params ¶ms); diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh index b718e6aa0..1d072ab33 100644 --- a/cpu/beta_cpu/iew_impl.hh +++ b/cpu/beta_cpu/iew_impl.hh @@ -244,10 +244,10 @@ SimpleIEW::squashDueToBranch(DynInstPtr &inst) // Also send PC update information back to prior stages. toCommit->squashedSeqNum = inst->seqNum; toCommit->mispredPC = inst->readPC(); - toCommit->nextPC = inst->readCalcTarg(); + toCommit->nextPC = inst->readNextPC(); toCommit->branchMispredict = true; // Prediction was incorrect, so send back inverse. - toCommit->branchTaken = inst->readCalcTarg() != + toCommit->branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(MachInst)); } @@ -265,7 +265,7 @@ SimpleIEW::squashDueToMem(DynInstPtr &inst) toCommit->squash = true; // Also send PC update information back to prior stages. toCommit->squashedSeqNum = inst->seqNum; - toCommit->nextPC = inst->readCalcTarg(); + toCommit->nextPC = inst->readNextPC(); } template diff --git a/cpu/beta_cpu/inst_queue.cc b/cpu/beta_cpu/inst_queue.cc index c4fd077bc..cd660ac79 100644 --- a/cpu/beta_cpu/inst_queue.cc +++ b/cpu/beta_cpu/inst_queue.cc @@ -4,7 +4,8 @@ #include "cpu/beta_cpu/inst_queue_impl.hh" // Force instantiation of InstructionQueue. -template InstructionQueue; +template class InstructionQueue; +template<> unsigned InstructionQueue::DependencyEntry::mem_alloc_counter = 0; diff --git a/cpu/beta_cpu/mem_dep_unit.cc b/cpu/beta_cpu/mem_dep_unit.cc index 3175997f6..d8b5a80eb 100644 --- a/cpu/beta_cpu/mem_dep_unit.cc +++ b/cpu/beta_cpu/mem_dep_unit.cc @@ -6,4 +6,4 @@ // Force instantation of memory dependency unit using store sets and // AlphaSimpleImpl. -template MemDepUnit; +template class MemDepUnit; diff --git a/cpu/beta_cpu/ras.cc b/cpu/beta_cpu/ras.cc index ca05f5a0d..23ca45b3a 100644 --- a/cpu/beta_cpu/ras.cc +++ b/cpu/beta_cpu/ras.cc @@ -4,7 +4,10 @@ ReturnAddrStack::ReturnAddrStack(unsigned _numEntries) : numEntries(_numEntries), usedEntries(0), tos(0) { - addrStack = new Addr[numEntries](0); + addrStack = new Addr[numEntries]; + + for (int i = 0; i < numEntries; ++i) + addrStack[i] = 0; } void diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc index 1301202f2..45b8084de 100644 --- a/cpu/beta_cpu/rename_map.cc +++ b/cpu/beta_cpu/rename_map.cc @@ -1,6 +1,10 @@ +#include + #include "cpu/beta_cpu/rename_map.hh" +using namespace std; + // Todo: Consider making functions inline. Avoid having things that are // using the zero register or misc registers from adding on the registers // to the free list. Possibly remove the direct communication between diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh index 44a7eefb1..198cfc536 100644 --- a/cpu/beta_cpu/rename_map.hh +++ b/cpu/beta_cpu/rename_map.hh @@ -64,8 +64,8 @@ class SimpleRenameMap void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); - void squash(vector freed_regs, - vector unmaps); + void squash(std::vector freed_regs, + std::vector unmaps); int numFreeEntries(); diff --git a/cpu/beta_cpu/rob.cc b/cpu/beta_cpu/rob.cc index 611cca0ba..ad45c022f 100644 --- a/cpu/beta_cpu/rob.cc +++ b/cpu/beta_cpu/rob.cc @@ -4,4 +4,4 @@ #include "cpu/beta_cpu/rob_impl.hh" // Force instantiation of InstructionQueue. -template ROB; +template class ROB; diff --git a/cpu/beta_cpu/sat_counter.cc b/cpu/beta_cpu/sat_counter.cc new file mode 100644 index 000000000..da095c3e1 --- /dev/null +++ b/cpu/beta_cpu/sat_counter.cc @@ -0,0 +1,43 @@ +#include "base/misc.hh" +#include "cpu/beta_cpu/sat_counter.hh" + +SatCounter::SatCounter() + : maxVal(0), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +SatCounter::setBits(unsigned bits) +{ + maxVal = (1 << bits) - 1; +} + +void +SatCounter::increment() +{ + if(counter < maxVal) { + ++counter; + } +} + +void +SatCounter::decrement() +{ + if(counter > 0) { + --counter; + } +} diff --git a/cpu/beta_cpu/sat_counter.hh b/cpu/beta_cpu/sat_counter.hh new file mode 100644 index 000000000..e0f23e13e --- /dev/null +++ b/cpu/beta_cpu/sat_counter.hh @@ -0,0 +1,62 @@ +#ifndef __CPU_BETA_CPU_SAT_COUNTER_HH__ +#define __CPU_BETA_CPU_SAT_COUNTER_HH__ + +#include + +/** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ +class SatCounter +{ + public: + /** + * Constructor for the counter. + */ + SatCounter(); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Sets the number of bits. + */ + void setBits(unsigned bits); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + const uint8_t read() const + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; +}; + +#endif // __CPU_BETA_CPU_SAT_COUNTER_HH__ diff --git a/cpu/beta_cpu/tournament_pred.cc b/cpu/beta_cpu/tournament_pred.cc index 53a11326a..5a22278eb 100644 --- a/cpu/beta_cpu/tournament_pred.cc +++ b/cpu/beta_cpu/tournament_pred.cc @@ -1,35 +1,5 @@ #include "cpu/beta_cpu/tournament_pred.hh" -TournamentBP::SatCounter::SatCounter(unsigned bits) - : maxVal((1 << bits) - 1), counter(0) -{ -} - -TournamentBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) - : maxVal((1 << bits) - 1), counter(initial_val) -{ - // Check to make sure initial value doesn't exceed the max counter value. - if (initial_val > maxVal) { - panic("BP: Initial counter value exceeds max size."); - } -} - -void -TournamentBP::SatCounter::increment() -{ - if (counter < maxVal) { - ++counter; - } -} - -void -TournamentBP::SatCounter::decrement() -{ - if (counter > 0) { - --counter; - } -} - TournamentBP::TournamentBP(unsigned _local_predictor_size, unsigned _local_ctr_bits, unsigned _local_history_table_size, @@ -54,21 +24,36 @@ TournamentBP::TournamentBP(unsigned _local_predictor_size, //Should do checks here to make sure sizes are correct (powers of 2) //Setup the array of counters for the local predictor - local_ctrs = new SatCounter[local_predictor_size](local_ctr_bits); + local_ctrs = new SatCounter[local_predictor_size]; + + for (int i = 0; i < local_predictor_size; ++i) + local_ctrs[i].setBits(local_ctr_bits); + //Setup the history table for the local table - local_history_table = new unsigned[local_history_table_size](0); + local_history_table = new unsigned[local_history_table_size]; + + for (int i = 0; i < local_history_table_size; ++i) + local_history_table[i] = 0; + // Setup the local history mask localHistoryMask = (1 << local_history_bits) - 1; //Setup the array of counters for the global predictor - global_ctrs = new SatCounter[global_predictor_size](global_ctr_bits); + global_ctrs = new SatCounter[global_predictor_size]; + + for (int i = 0; i < global_predictor_size; ++i) + global_ctrs[i].setBits(global_ctr_bits); + //Clear the global history global_history = 0; // Setup the global history mask globalHistoryMask = (1 << global_history_bits) - 1; //Setup the array of counters for the choice predictor - choice_ctrs = new SatCounter[choice_predictor_size](choice_ctr_bits); + choice_ctrs = new SatCounter[choice_predictor_size]; + + for (int i = 0; i < choice_predictor_size; ++i) + choice_ctrs[i].setBits(choice_ctr_bits); threshold = (1 << (local_ctr_bits - 1)) - 1; threshold = threshold / 2; diff --git a/cpu/beta_cpu/tournament_pred.hh b/cpu/beta_cpu/tournament_pred.hh index bf87d753b..1512abc78 100644 --- a/cpu/beta_cpu/tournament_pred.hh +++ b/cpu/beta_cpu/tournament_pred.hh @@ -1,8 +1,9 @@ -#ifndef __TOURNAMENT_PRED_HH__ -#define __TOURNAMENT_PRED_HH__ +#ifndef __CPU_BETA_CPU_TOURNAMENT_PRED_HH__ +#define __CPU_BETA_CPU_TOURNAMENT_PRED_HH__ // For Addr type. #include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/sat_counter.hh" class TournamentBP { @@ -48,52 +49,6 @@ class TournamentBP inline void updateHistoriesNotTaken(unsigned local_history_idx); - /** - * Private counter class for the internal saturating counters. - * Implements an n bit saturating counter and provides methods to - * increment, decrement, and read it. - * @todo Consider making this something that more closely mimics a - * built in class so you can use ++ or --. - */ - class SatCounter - { - public: - /** - * Constructor for the counter. - * @param bits How many bits the counter will have. - */ - SatCounter(unsigned bits); - - /** - * Constructor for the counter. - * @param bits How many bits the counter will have. - * @param initial_val Starting value for each counter. - */ - SatCounter(unsigned bits, unsigned initial_val); - - /** - * Increments the counter's current value. - */ - void increment(); - - /** - * Decrements the counter's current value. - */ - void decrement(); - - /** - * Read the counter's value. - */ - uint8_t read() - { - return counter; - } - - private: - uint8_t maxVal; - uint8_t counter; - }; - /** Local counters. */ SatCounter *local_ctrs; @@ -157,4 +112,4 @@ class TournamentBP unsigned threshold; }; -#endif // __TOURNAMENT_PRED_HH__ +#endif // __CPU_BETA_CPU_TOURNAMENT_PRED_HH__ diff --git a/cpu/ooo_cpu/ea_list.cc b/cpu/ooo_cpu/ea_list.cc new file mode 100644 index 000000000..4142e7f5e --- /dev/null +++ b/cpu/ooo_cpu/ea_list.cc @@ -0,0 +1,50 @@ + +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ooo_cpu/ea_list.hh" + +void +EAList::addAddr(const InstSeqNum &new_sn, const Addr &new_ea) +{ + instEA newEA(new_sn, new_ea); + + eaList.push_back(newEA); +} + +void +EAList::clearAddr(const InstSeqNum &sn_to_clear, const Addr &ea_to_clear) +{ + eaListIt list_it = eaList.begin(); + + while (list_it != eaList.end() && (*list_it).first != sn_to_clear) { + assert((*list_it).second == ea_to_clear); + } +} + +bool +EAList::checkConflict(const InstSeqNum &check_sn, const Addr &check_ea) const +{ + const constEAListIt list_it = eaList.begin(); + + while (list_it != eaList.end() && (*list_it).first < check_sn) { + if ((*list_it).second == check_ea) { + return true; + } + } + + return false; +} + +void +EAList::clear() +{ + eaList.clear(); +} + +void +EAList::commit(const InstSeqNum &commit_sn) +{ + while (!eaList.empty() && eaList.front().first <= commit_sn) { + eaList.pop_front(); + } +} diff --git a/cpu/ooo_cpu/ea_list.hh b/cpu/ooo_cpu/ea_list.hh new file mode 100644 index 000000000..bc099d7f3 --- /dev/null +++ b/cpu/ooo_cpu/ea_list.hh @@ -0,0 +1,44 @@ +#ifndef __CPU_EA_LIST_HH__ +#define __CPU_EA_LIST_HH__ + +#include +#include + +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" + +/** + * Simple class to hold onto a list of pairs, each pair having a memory + * instruction's sequence number and effective addr. This list can be used + * for memory disambiguation. However, if I ever want to forward results, I + * may have to use a list that holds DynInstPtrs. Hence this may change in + * the future. + */ +class EAList { + private: + typedef std::pair instEA; + typedef std::list::iterator eaListIt; + typedef std::list::const_iterator constEAListIt; + + std::list eaList; + + public: + EAList() { } + ~EAList() { } + + void addAddr(const InstSeqNum &new_sn, const Addr &new_ea); + + void clearAddr(const InstSeqNum &sn_to_clear, const Addr &ea_to_clear); + + /** Checks if any instructions older than check_sn have a conflicting + * address with check_ea. Note that this function does not handle the + * sequence number rolling over. + */ + bool checkConflict(const InstSeqNum &check_sn, const Addr &check_ea) const; + + void clear(); + + void commit(const InstSeqNum &commit_sn); +}; + +#endif // __CPU_EA_LIST_HH__ diff --git a/cpu/ooo_cpu/ooo_cpu.cc b/cpu/ooo_cpu/ooo_cpu.cc new file mode 100644 index 000000000..255070de4 --- /dev/null +++ b/cpu/ooo_cpu/ooo_cpu.cc @@ -0,0 +1,6 @@ + +#include "cpu/ooo_cpu/ooo_cpu_impl.hh" +#include "cpu/ooo_cpu/ooo_dyn_inst.hh" +#include "cpu/ooo_cpu/ooo_impl.hh" + +template class OoOCPU; diff --git a/cpu/ooo_cpu/ooo_cpu.hh b/cpu/ooo_cpu/ooo_cpu.hh new file mode 100644 index 000000000..25fdb39b6 --- /dev/null +++ b/cpu/ooo_cpu/ooo_cpu.hh @@ -0,0 +1,613 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OOO_CPU_OOO_CPU_HH__ +#define __CPU_OOO_CPU_OOO_CPU_HH__ + +#include "base/statistics.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" +#include "cpu/full_cpu/fu_pool.hh" +#include "cpu/ooo_cpu/ea_list.hh" +#include "cpu/pc_event.hh" +#include "cpu/static_inst.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + +// forward declarations +#ifdef FULL_SYSTEM +class Processor; +class AlphaITB; +class AlphaDTB; +class PhysicalMemory; + +class RemoteGDB; +class GDBListener; + +#else + +class Process; + +#endif // FULL_SYSTEM + +class Checkpoint; +class MemInterface; + +namespace Trace { + class InstRecord; +} + +/** + * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with + * simple out-of-order capabilities added to it. It is still a 1 CPI machine + * (?), but is capable of handling cache misses. Basically it models having + * a ROB/IQ by only allowing a certain amount of instructions to execute while + * the cache miss is outstanding. + */ + +template +class OoOCPU : public BaseCPU +{ + private: + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::ISA ISA; + + public: + // main simulation loop (one cycle) + void tick(); + + private: + struct TickEvent : public Event + { + OoOCPU *cpu; + int width; + + TickEvent(OoOCPU *c, int w); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + private: + Trace::InstRecord *traceData; + + template + void trace_data(T data); + + public: + // + enum Status { + Running, + Idle, + IcacheMissStall, + IcacheMissComplete, + DcacheMissStall, + SwitchedOut + }; + + private: + Status _status; + + public: + void post_interrupt(int int_num, int index); + + void zero_fill_64(Addr addr) { + static int warned = 0; + if (!warned) { + warn ("WH64 is not implemented"); + warned = 1; + } + }; + + struct Params : public BaseCPU::Params + { + MemInterface *icache_interface; + MemInterface *dcache_interface; + int width; +#ifdef FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; + FunctionalMemory *mem; +#else + Process *process; +#endif + int issueWidth; + }; + + OoOCPU(Params *params); + + virtual ~OoOCPU(); + + private: + void copyFromXC(); + + public: + // execution context + ExecContext *xc; + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + +#ifdef FULL_SYSTEM + Addr dbg_vtophys(Addr addr); + + bool interval_stats; +#endif + + // L1 instruction cache + MemInterface *icacheInterface; + + // L1 data cache + MemInterface *dcacheInterface; + + FuncUnitPool *fuPool; + + // Refcounted pointer to the one memory request. + MemReqPtr cacheMemReq; + + class ICacheCompletionEvent : public Event + { + private: + OoOCPU *cpu; + + public: + ICacheCompletionEvent(OoOCPU *_cpu); + + virtual void process(); + virtual const char *description(); + }; + + // Will need to create a cache completion event upon any memory miss. + ICacheCompletionEvent iCacheCompletionEvent; + + class DCacheCompletionEvent : public Event + { + private: + OoOCPU *cpu; + DynInstPtr inst; + + public: + DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + Status status() const { return _status; } + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + virtual void deallocateContext(int thread_num); + virtual void haltContext(int thread_num); + + // statistics + virtual void regStats(); + virtual void resetStats(); + + // number of simulated instructions + Counter numInst; + Counter startNumInst; + Stats::Scalar<> numInsts; + + virtual Counter totalInstructions() const + { + return numInst - startNumInst; + } + + // number of simulated memory references + Stats::Scalar<> numMemRefs; + + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + // number of idle cycles + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; + + // number of cycles stalled for I-cache misses + Stats::Scalar<> icacheStallCycles; + Counter lastIcacheStall; + + // number of cycles stalled for D-cache misses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + void processICacheCompletion(); + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#ifdef FULL_SYSTEM + bool validInstAddr(Addr addr) { return true; } + bool validDataAddr(Addr addr) { return true; } + int getInstAsid() { return xc->regs.instAsid(); } + int getDataAsid() { return xc->regs.dataAsid(); } + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + bool validInstAddr(Addr addr) + { return xc->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return xc->validDataAddr(addr); } + + int getInstAsid() { return xc->asid; } + int getDataAsid() { return xc->asid; } + + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return No_Fault; + } + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + template + Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst); + + template + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res, DynInstPtr inst); + + void prefetch(Addr addr, unsigned flags) + { + // need to do this... + } + + void writeHint(Addr addr, int size, unsigned flags) + { + // need to do this... + } + + Fault copySrcTranslate(Addr src); + + Fault copy(Addr dest); + + private: + bool executeInst(DynInstPtr &inst); + + void renameInst(DynInstPtr &inst); + + void addInst(DynInstPtr &inst); + + void commitHeadInst(); + + bool grabInst(); + + Fault fetchCacheLine(); + + InstSeqNum getAndIncrementInstSeq(); + + bool ambigMemAddr; + + private: + InstSeqNum globalSeqNum; + + DynInstPtr renameTable[ISA::TotalNumRegs]; + DynInstPtr commitTable[ISA::TotalNumRegs]; + + // Might need a table of the shadow registers as well. +#ifdef FULL_SYSTEM + DynInstPtr palShadowTable[ISA::NumIntRegs]; +#endif + + public: + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + // In the OoO case these shouldn't read from the XC but rather from the + // rename table of DynInsts. Also these likely shouldn't be called very + // often, other than when adding things into the xc during say a syscall. + + uint64_t readIntReg(StaticInst *si, int idx) + { + return xc->readIntReg(si->srcRegIdx(idx)); + } + + float readFloatRegSingle(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegSingle(reg_idx); + } + + double readFloatRegDouble(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegDouble(reg_idx); + } + + uint64_t readFloatRegInt(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegInt(reg_idx); + } + + void setIntReg(StaticInst *si, int idx, uint64_t val) + { + xc->setIntReg(si->destRegIdx(idx), val); + } + + void setFloatRegSingle(StaticInst *si, int idx, float val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegSingle(reg_idx, val); + } + + void setFloatRegDouble(StaticInst *si, int idx, double val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegDouble(reg_idx, val); + } + + void setFloatRegInt(StaticInst *si, int idx, uint64_t val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegInt(reg_idx, val); + } + + uint64_t readPC() { return PC; } + void setNextPC(Addr val) { nextPC = val; } + + private: + Addr PC; + Addr nextPC; + + unsigned issueWidth; + + bool fetchRedirExcp; + bool fetchRedirBranch; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + unsigned cacheBlkSize; + + Addr cacheBlkPC; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + protected: + bool cacheBlkValid; + + private: + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = ISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + unsigned instSize; + + // ROB tracking stuff. + DynInstPtr robHeadPtr; + DynInstPtr robTailPtr; + unsigned robInsts; + + // List of outstanding EA instructions. + protected: + EAList eaList; + + public: + void branchToTarget(Addr val) + { + if (!fetchRedirExcp) { + fetchRedirBranch = true; + PC = val; + } + } + + // ISA stuff: + uint64_t readUniq() { return xc->readUniq(); } + void setUniq(uint64_t val) { xc->setUniq(val); } + + uint64_t readFpcr() { return xc->readFpcr(); } + void setFpcr(uint64_t val) { xc->setFpcr(val); } + +#ifdef FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); } + Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); } + Fault hwrei() { return xc->hwrei(); } + int readIntrFlag() { return xc->readIntrFlag(); } + void setIntrFlag(int val) { xc->setIntrFlag(val); } + bool inPalMode() { return xc->inPalMode(); } + void ev5_trap(Fault fault) { xc->ev5_trap(fault); } + bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); } +#else + void syscall() { xc->syscall(); } +#endif + + ExecContext *xcBase() { return xc; } +}; + + +// precise architected memory state accessor macros +template +template +Fault +OoOCPU::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) +{ + MemReqPtr readReq = new MemReq(); + readReq->xc = xc; + readReq->asid = 0; + readReq->data = new uint8_t[64]; + + readReq->reset(addr, sizeof(T), flags); + + // translate to physical address - This might be an ISA impl call + Fault fault = translateDataReadReq(readReq); + + // do functional access + if (fault == No_Fault) + fault = xc->mem->read(readReq, data); +#if 0 + if (traceData) { + traceData->setAddr(addr); + if (fault == No_Fault) + traceData->setData(data); + } +#endif + + // if we have a cache, do cache access too + if (fault == No_Fault && dcacheInterface) { + readReq->cmd = Read; + readReq->completionEvent = NULL; + readReq->time = curTick; + /*MemAccessResult result = */dcacheInterface->access(readReq); + + if (dcacheInterface->doEvents()) { + readReq->completionEvent = new DCacheCompletionEvent(this, inst); + lastDcacheStall = curTick; + unscheduleTickEvent(); + _status = DcacheMissStall; + } + } + + if (!dcacheInterface && (readReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); + + return fault; +} + +template +template +Fault +OoOCPU::write(T data, Addr addr, unsigned flags, + uint64_t *res, DynInstPtr inst) +{ + MemReqPtr writeReq = new MemReq(); + writeReq->xc = xc; + writeReq->asid = 0; + writeReq->data = new uint8_t[64]; + +#if 0 + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } +#endif + + writeReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = xc->translateDataWriteReq(writeReq); + + // do functional access + if (fault == No_Fault) + fault = xc->write(writeReq, data); + + if (fault == No_Fault && dcacheInterface) { + writeReq->cmd = Write; + memcpy(writeReq->data,(uint8_t *)&data,writeReq->size); + writeReq->completionEvent = NULL; + writeReq->time = curTick; + /*MemAccessResult result = */dcacheInterface->access(writeReq); + + if (dcacheInterface->doEvents()) { + writeReq->completionEvent = new DCacheCompletionEvent(this, inst); + lastDcacheStall = curTick; + unscheduleTickEvent(); + _status = DcacheMissStall; + } + } + + if (res && (fault == No_Fault)) + *res = writeReq->result; + + if (!dcacheInterface && (writeReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); + + return fault; +} + + +#endif // __CPU_OOO_CPU_OOO_CPU_HH__ diff --git a/cpu/ooo_cpu/ooo_impl.hh b/cpu/ooo_cpu/ooo_impl.hh new file mode 100644 index 000000000..9e6df9214 --- /dev/null +++ b/cpu/ooo_cpu/ooo_impl.hh @@ -0,0 +1,21 @@ + +#ifndef __CPU_OOO_CPU_OOO_IMPL_HH__ +#define __CPU_OOO_CPU_OOO_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" + +template +class OoOCPU; + +template +class OoODynInst; + +struct OoOImpl { + typedef AlphaISA ISA; + typedef OoOCPU OoOCPU; + typedef OoOCPU FullCPU; + typedef OoODynInst DynInst; + typedef RefCountingPtr DynInstPtr; +}; + +#endif // __CPU_OOO_CPU_OOO_IMPL_HH__ diff --git a/cpu/simple_cpu/simple_cpu.cc b/cpu/simple_cpu/simple_cpu.cc index 044ee9b9d..df78eb9a9 100644 --- a/cpu/simple_cpu/simple_cpu.cc +++ b/cpu/simple_cpu/simple_cpu.cc @@ -562,9 +562,6 @@ SimpleCPU::dbg_vtophys(Addr addr) } #endif // FULL_SYSTEM -Tick save_cycle = 0; - - void SimpleCPU::processCacheCompletion() { diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh index b5022af5b..25c98b12a 100644 --- a/cpu/static_inst.hh +++ b/cpu/static_inst.hh @@ -26,8 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __STATIC_INST_HH__ -#define __STATIC_INST_HH__ +#ifndef __CPU_STATIC_INST_HH__ +#define __CPU_STATIC_INST_HH__ #include #include @@ -41,11 +41,16 @@ // forward declarations struct AlphaSimpleImpl; +struct OoOImpl; class ExecContext; class DynInst; + template class AlphaDynInst; +template +class OoODynInst; + class FastCPU; class SimpleCPU; class InorderCPU; @@ -255,7 +260,7 @@ class StaticInst : public StaticInstBase * obtain the dependence info (numSrcRegs and srcRegIdx[]) for * just the EA computation. */ - virtual const + virtual StaticInstPtr &eaCompInst() const { return nullStaticInstPtr; } /** @@ -264,7 +269,7 @@ class StaticInst : public StaticInstBase * obtain the dependence info (numSrcRegs and srcRegIdx[]) for * just the memory access (not the EA computation). */ - virtual const + virtual StaticInstPtr &memAccInst() const { return nullStaticInstPtr; } /// The binary machine instruction. @@ -445,4 +450,4 @@ class StaticInstPtr : public RefCountingPtr > } }; -#endif // __STATIC_INST_HH__ +#endif // __CPU_STATIC_INST_HH__