From e7f442d5273bec95f3412cdc5a82742fe32f8cf3 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 2 Mar 2006 10:31:48 -0500 Subject: [PATCH] Simple program runs with sendAtomic! Ignoring returned latency for now. Refactored loadSections in ObjectFile hierarchy. base/loader/aout_object.cc: base/loader/aout_object.hh: base/loader/ecoff_object.cc: base/loader/ecoff_object.hh: base/loader/elf_object.cc: base/loader/elf_object.hh: base/loader/object_file.hh: Have each section record a pointer to image data. This allows us to move common loadSections code into ObjectFile. base/loader/object_file.cc: Have each section record a pointer to image data. This allows us to move common loadSections code into ObjectFile. Also explicitly load BSS now since we need to allocate the translations for it in syscall emulation. cpu/base.hh: Don't need memPort (just pass port in to ExecContext constructor). cpu/exec_context.cc: cpu/exec_context.hh: mem/port.cc: mem/translating_port.cc: mem/translating_port.hh: Pass syscall emulation Port into constructor instead of getting it from BaseCPU. cpu/simple/cpu.cc: Explicitly choose one of three timing models. Statically allocate request and packet objects when possible. Several more minor bug fixes. Works for simple program with SIMPLE_CPU_MEM_IMMEDIATE model now. Probably have memory leaks with SIMPLE_CPU_MEM_TIMING (if it works at all). Pass syscall emulation Port into constructor instead of getting it from BaseCPU. cpu/simple/cpu.hh: Explicitly choose one of three timing models. Statically allocate request and packet objects when possible. Pass syscall emulation Port into constructor instead of getting it from BaseCPU. mem/physical.cc: Set packet result field. --HG-- extra : convert_revision : 359d0ebe4b4665867f4e26e7394ec0f1d17cfc26 --- base/loader/aout_object.cc | 28 +---- base/loader/aout_object.hh | 1 - base/loader/ecoff_object.cc | 27 +---- base/loader/ecoff_object.hh | 1 - base/loader/elf_object.cc | 31 +----- base/loader/elf_object.hh | 4 - base/loader/object_file.cc | 34 ++++++ base/loader/object_file.hh | 9 +- cpu/base.hh | 8 -- cpu/exec_context.cc | 6 +- cpu/exec_context.hh | 4 +- cpu/simple/cpu.cc | 206 ++++++++++++++++++++++++------------ cpu/simple/cpu.hh | 26 ++++- mem/physical.cc | 2 + mem/port.cc | 9 ++ mem/translating_port.cc | 20 ++-- mem/translating_port.hh | 3 +- 17 files changed, 241 insertions(+), 178 deletions(-) diff --git a/base/loader/aout_object.cc b/base/loader/aout_object.cc index d1b27120d..564898ca3 100644 --- a/base/loader/aout_object.cc +++ b/base/loader/aout_object.cc @@ -30,7 +30,6 @@ #include "base/loader/aout_object.hh" -#include "mem/translating_port.hh" #include "base/loader/symtab.hh" #include "base/trace.hh" // for DPRINTF @@ -64,12 +63,15 @@ AoutObject::AoutObject(const string &_filename, int _fd, text.baseAddr = N_TXTADDR(*execHdr); text.size = execHdr->tsize; + text.fileImage = fileData + N_TXTOFF(*execHdr); data.baseAddr = N_DATADDR(*execHdr); data.size = execHdr->dsize; + data.fileImage = fileData + N_DATOFF(*execHdr); bss.baseAddr = N_BSSADDR(*execHdr); bss.size = execHdr->bsize; + bss.fileImage = NULL; DPRINTFR(Loader, "text: 0x%x %d\ndata: 0x%x %d\nbss: 0x%x %d\n", text.baseAddr, text.size, data.baseAddr, data.size, @@ -77,30 +79,6 @@ AoutObject::AoutObject(const string &_filename, int _fd, } -bool -AoutObject::loadSections(TranslatingPort *memPort, bool loadPhys) -{ - Addr textAddr = text.baseAddr; - Addr dataAddr = data.baseAddr; - - if (loadPhys) { - textAddr &= (ULL(1) << 40) - 1; - dataAddr &= (ULL(1) << 40) - 1; - } - - // Since we don't really have an MMU and all memory is - // zero-filled, there's no need to set up the BSS segment. - if (text.size != 0) - memPort->writeBlobFunctional(textAddr, fileData + N_TXTOFF(*execHdr), - text.size, true); - if (data.size != 0) - memPort->writeBlobFunctional(dataAddr, fileData + N_DATOFF(*execHdr), - data.size, true); - - return true; -} - - bool AoutObject::loadGlobalSymbols(SymbolTable *symtab) { diff --git a/base/loader/aout_object.hh b/base/loader/aout_object.hh index 359866dc5..aeb710427 100644 --- a/base/loader/aout_object.hh +++ b/base/loader/aout_object.hh @@ -46,7 +46,6 @@ class AoutObject : public ObjectFile public: virtual ~AoutObject() {} - virtual bool loadSections(TranslatingPort *memPort, bool loadPhys = false); virtual bool loadGlobalSymbols(SymbolTable *symtab); virtual bool loadLocalSymbols(SymbolTable *symtab); diff --git a/base/loader/ecoff_object.cc b/base/loader/ecoff_object.cc index a4b8c8713..cd37abaa7 100644 --- a/base/loader/ecoff_object.cc +++ b/base/loader/ecoff_object.cc @@ -29,8 +29,6 @@ #include #include "base/loader/ecoff_object.hh" - -#include "mem/translating_port.hh" #include "base/loader/symtab.hh" #include "base/trace.hh" // for DPRINTF @@ -68,12 +66,15 @@ EcoffObject::EcoffObject(const string &_filename, int _fd, text.baseAddr = aoutHdr->text_start; text.size = aoutHdr->tsize; + text.fileImage = fileData + ECOFF_TXTOFF(execHdr); data.baseAddr = aoutHdr->data_start; data.size = aoutHdr->dsize; + data.fileImage = fileData + ECOFF_DATOFF(execHdr); bss.baseAddr = aoutHdr->bss_start; bss.size = aoutHdr->bsize; + bss.fileImage = NULL; DPRINTFR(Loader, "text: 0x%x %d\ndata: 0x%x %d\nbss: 0x%x %d\n", text.baseAddr, text.size, data.baseAddr, data.size, @@ -81,28 +82,6 @@ EcoffObject::EcoffObject(const string &_filename, int _fd, } -bool -EcoffObject::loadSections(TranslatingPort *memPort, bool loadPhys) -{ - Addr textAddr = text.baseAddr; - Addr dataAddr = data.baseAddr; - - if (loadPhys) { - textAddr &= (ULL(1) << 40) - 1; - dataAddr &= (ULL(1) << 40) - 1; - } - - // Since we don't really have an MMU and all memory is - // zero-filled, there's no need to set up the BSS segment. - memPort->writeBlobFunctional(textAddr, fileData + ECOFF_TXTOFF(execHdr), - text.size, true); - memPort->writeBlobFunctional(dataAddr, fileData + ECOFF_DATOFF(execHdr), - data.size, true); - - return true; -} - - bool EcoffObject::loadGlobalSymbols(SymbolTable *symtab) { diff --git a/base/loader/ecoff_object.hh b/base/loader/ecoff_object.hh index 39b161bfc..603c70bec 100644 --- a/base/loader/ecoff_object.hh +++ b/base/loader/ecoff_object.hh @@ -50,7 +50,6 @@ class EcoffObject : public ObjectFile public: virtual ~EcoffObject() {} - virtual bool loadSections(TranslatingPort *memPort, bool loadPhys = false); virtual bool loadGlobalSymbols(SymbolTable *symtab); virtual bool loadLocalSymbols(SymbolTable *symtab); diff --git a/base/loader/elf_object.cc b/base/loader/elf_object.cc index 11c94d651..52f236fef 100644 --- a/base/loader/elf_object.cc +++ b/base/loader/elf_object.cc @@ -43,7 +43,6 @@ #include "base/loader/elf_object.hh" -#include "mem/translating_port.hh" #include "base/loader/symtab.hh" #include "base/trace.hh" // for DPRINTF @@ -131,20 +130,19 @@ ElfObject::ElfObject(const string &_filename, int _fd, if (text.size == 0) { // haven't seen text segment yet text.baseAddr = phdr.p_vaddr; text.size = phdr.p_filesz; - // remember where the data is for loadSections() - fileTextBits = fileData + phdr.p_offset; + text.fileImage = fileData + phdr.p_offset; // if there's any padding at the end that's not in the // file, call it the bss. This happens in the "text" // segment if there's only one loadable segment (as for // kernel images). bss.size = phdr.p_memsz - phdr.p_filesz; bss.baseAddr = phdr.p_vaddr + phdr.p_filesz; + bss.fileImage = NULL; } else if (data.size == 0) { // have text, this must be data data.baseAddr = phdr.p_vaddr; data.size = phdr.p_filesz; - // remember where the data is for loadSections() - fileDataBits = fileData + phdr.p_offset; + data.fileImage = fileData + phdr.p_offset; // if there's any padding at the end that's not in the // file, call it the bss. Warn if this happens for both // the text & data segments (should only have one bss). @@ -153,6 +151,7 @@ ElfObject::ElfObject(const string &_filename, int _fd, } bss.size = phdr.p_memsz - phdr.p_filesz; bss.baseAddr = phdr.p_vaddr + phdr.p_filesz; + bss.fileImage = NULL; } } @@ -169,28 +168,6 @@ ElfObject::ElfObject(const string &_filename, int _fd, } -bool -ElfObject::loadSections(TranslatingPort *memPort, bool loadPhys) -{ - Addr textAddr = text.baseAddr; - Addr dataAddr = data.baseAddr; - - if (loadPhys) { - textAddr &= (ULL(1) << 40) - 1; - dataAddr &= (ULL(1) << 40) - 1; - } - - // Since we don't really have an MMU and all memory is - // zero-filled, there's no need to set up the BSS segment. - if (text.size != 0) - memPort->writeBlobFunctional(textAddr, fileTextBits, text.size, true); - if (data.size != 0) - memPort->writeBlobFunctional(dataAddr, fileDataBits, data.size, true); - - return true; -} - - bool ElfObject::loadSomeSymbols(SymbolTable *symtab, int binding) { diff --git a/base/loader/elf_object.hh b/base/loader/elf_object.hh index 3e93c30b7..72c265edd 100644 --- a/base/loader/elf_object.hh +++ b/base/loader/elf_object.hh @@ -35,9 +35,6 @@ class ElfObject : public ObjectFile { protected: - uint8_t *fileTextBits; //!< Pointer to file's text segment image - uint8_t *fileDataBits; //!< Pointer to file's data segment image - /// Helper functions for loadGlobalSymbols() and loadLocalSymbols(). bool loadSomeSymbols(SymbolTable *symtab, int binding); @@ -48,7 +45,6 @@ class ElfObject : public ObjectFile public: virtual ~ElfObject() {} - virtual bool loadSections(TranslatingPort *memPort, bool loadPhys = false); virtual bool loadGlobalSymbols(SymbolTable *symtab); virtual bool loadLocalSymbols(SymbolTable *symtab); diff --git a/base/loader/object_file.cc b/base/loader/object_file.cc index 1410d05b8..f33957269 100644 --- a/base/loader/object_file.cc +++ b/base/loader/object_file.cc @@ -43,6 +43,8 @@ #include "base/loader/aout_object.hh" #include "base/loader/elf_object.hh" +#include "mem/translating_port.hh" + using namespace std; ObjectFile::ObjectFile(const string &_filename, int _fd, @@ -60,6 +62,38 @@ ObjectFile::~ObjectFile() } +bool +ObjectFile::loadSection(Section *sec, TranslatingPort *memPort, bool loadPhys) +{ + if (sec->size != 0) { + Addr addr = sec->baseAddr; + if (loadPhys) { + // this is Alpha-specific... going to have to fix this + // for other architectures + addr &= (ULL(1) << 40) - 1; + } + + if (sec->fileImage) { + memPort->writeBlobFunctional(addr, sec->fileImage, sec->size, true); + } + else { + // no image: must be bss + memPort->memsetBlobFunctional(addr, 0, sec->size, true); + } + } + return true; +} + + +bool +ObjectFile::loadSections(TranslatingPort *memPort, bool loadPhys) +{ + return (loadSection(&text, memPort, loadPhys) + && loadSection(&data, memPort, loadPhys) + && loadSection(&bss, memPort, loadPhys)); +} + + void ObjectFile::close() { diff --git a/base/loader/object_file.hh b/base/loader/object_file.hh index 1d750e341..35ea11b54 100644 --- a/base/loader/object_file.hh +++ b/base/loader/object_file.hh @@ -67,7 +67,7 @@ class ObjectFile void close(); - virtual bool loadSections(TranslatingPort *memPort, bool loadPhys = false) = 0; + virtual bool loadSections(TranslatingPort *memPort, bool loadPhys = false); virtual bool loadGlobalSymbols(SymbolTable *symtab) = 0; virtual bool loadLocalSymbols(SymbolTable *symtab) = 0; @@ -77,8 +77,9 @@ class ObjectFile protected: struct Section { - Addr baseAddr; - size_t size; + Addr baseAddr; + uint8_t *fileImage; + size_t size; }; Addr entry; @@ -88,6 +89,8 @@ class ObjectFile Section data; Section bss; + bool loadSection(Section *sec, TranslatingPort *memPort, bool loadPhys); + public: Addr entryPoint() const { return entry; } Addr globalPointer() const { return globalPtr; } diff --git a/cpu/base.hh b/cpu/base.hh index 870e26a39..c0e087f42 100644 --- a/cpu/base.hh +++ b/cpu/base.hh @@ -154,14 +154,6 @@ class BaseCPU : public SimObject */ int number_of_threads; - /** - * A pointer to the port into the memory system to be used by syscall - * emulation. This way the data being accessed via syscalls looks in - * the memory heirachy for any changes that haven't been written back - * to main memory yet. - */ - Port* memPort; - /** * Vector of per-thread instruction-based event queues. Used for * scheduling events based on number of instructions committed by diff --git a/cpu/exec_context.cc b/cpu/exec_context.cc index fa91eb672..0b91992cf 100644 --- a/cpu/exec_context.cc +++ b/cpu/exec_context.cc @@ -37,13 +37,13 @@ #include "base/output.hh" #include "cpu/profile.hh" #include "kern/kernel_stats.hh" -#include "mem/translating_port.hh" #include "sim/serialize.hh" #include "sim/sim_exit.hh" #include "sim/system.hh" #include "targetarch/stacktrace.hh" #else #include "sim/process.hh" +#include "mem/translating_port.hh" #endif using namespace std; @@ -78,14 +78,14 @@ ExecContext::ExecContext(BaseCPU *_cpu, int _thread_num, System *_sys, } #else ExecContext::ExecContext(BaseCPU *_cpu, int _thread_num, - Process *_process, int _asid) + Process *_process, int _asid, Port *mem_port) : _status(ExecContext::Unallocated), cpu(_cpu), thread_num(_thread_num), cpu_id(-1), process(_process), asid(_asid), func_exe_inst(0), storeCondFailures(0) { - port = new TranslatingPort(cpu->memPort, process->pTable); + port = new TranslatingPort(mem_port, process->pTable); memset(®s, 0, sizeof(RegFile)); } #endif diff --git a/cpu/exec_context.hh b/cpu/exec_context.hh index 8e9e5f2fd..f55b45de1 100644 --- a/cpu/exec_context.hh +++ b/cpu/exec_context.hh @@ -35,7 +35,6 @@ #include "sim/host.hh" #include "sim/serialize.hh" #include "targetarch/byte_swap.hh" -#include "mem/translating_port.hh" class BaseCPU; @@ -52,6 +51,7 @@ namespace Kernel { class Binning; class Statistics; } #else // !FULL_SYSTEM #include "sim/process.hh" +class TranslatingPort; #endif // FULL_SYSTEM @@ -187,7 +187,7 @@ class ExecContext AlphaITB *_itb, AlphaDTB *_dtb, FunctionalMemory *_dem); #else ExecContext(BaseCPU *_cpu, int _thread_num, - Process *_process, int _asid); + Process *_process, int _asid, Port *mem_port); #endif virtual ~ExecContext(); diff --git a/cpu/simple/cpu.cc b/cpu/simple/cpu.cc index 41acd2456..d8d4c3644 100644 --- a/cpu/simple/cpu.cc +++ b/cpu/simple/cpu.cc @@ -132,16 +132,6 @@ SimpleCPU::SimpleCPU(Params *p) dcachePort(this), tickEvent(this, p->width), xc(NULL) { _status = Idle; -#if FULL_SYSTEM - xc = new ExecContext(this, 0, p->system, p->itb, p->dtb, p->mem); - - // initialize CPU, including PC - TheISA::initCPU(&xc->regs); -#else - xc = new ExecContext(this, /* thread_num */ 0, p->process, /* asid */ 0); -#endif // !FULL_SYSTEM - - memPort = &dcachePort; //Create Memory Ports (conect them up) p->mem->addPort("DCACHE"); @@ -152,11 +142,39 @@ SimpleCPU::SimpleCPU(Params *p) icachePort.setPeer(p->mem->getPort("ICACHE")); (p->mem->getPort("ICACHE"))->setPeer(&icachePort); +#if FULL_SYSTEM + xc = new ExecContext(this, 0, p->system, p->itb, p->dtb, p->mem); + // initialize CPU, including PC + TheISA::initCPU(&xc->regs); +#else + xc = new ExecContext(this, /* thread_num */ 0, p->process, /* asid */ 0, + &dcachePort); +#endif // !FULL_SYSTEM - req = new CpuRequest; +#if SIMPLE_CPU_MEM_ATOMIC || SIMPLE_CPU_MEM_IMMEDIATE + ifetch_req = new CpuRequest; + ifetch_req->asid = 0; + ifetch_req->size = sizeof(MachInst); + ifetch_pkt = new Packet; + ifetch_pkt->cmd = Read; + ifetch_pkt->data = (uint8_t *)&inst; + ifetch_pkt->req = ifetch_req; + ifetch_pkt->size = sizeof(MachInst); - req->asid = 0; + data_read_req = new CpuRequest; + data_read_req->asid = 0; + data_read_pkt = new Packet; + data_read_pkt->cmd = Read; + data_read_pkt->data = new uint8_t[8]; + data_read_pkt->req = data_read_req; + + data_write_req = new CpuRequest; + data_write_req->asid = 0; + data_write_pkt = new Packet; + data_write_pkt->cmd = Write; + data_write_pkt->req = data_write_req; +#endif numInst = 0; startNumInst = 0; @@ -425,6 +443,7 @@ SimpleCPU::copy(Addr dest) } return fault; #else + panic("copy not implemented"); return No_Fault; #endif } @@ -437,8 +456,8 @@ SimpleCPU::read(Addr addr, T &data, unsigned flags) if (status() == DcacheWaitResponse || status() == DcacheWaitSwitch) { // Fault fault = xc->read(memReq,data); // Not sure what to check for no fault... - if (pkt->result == Success) { - memcpy(&data, pkt->data, sizeof(T)); + if (data_read_pkt->result == Success) { + memcpy(&data, data_read_pkt->data, sizeof(T)); } if (traceData) { @@ -451,19 +470,45 @@ SimpleCPU::read(Addr addr, T &data, unsigned flags) // memReq->reset(addr, sizeof(T), flags); +#if SIMPLE_CPU_MEM_TIMING + CpuRequest *data_read_req = new CpuRequest; +#endif + + data_read_req->vaddr = addr; + data_read_req->size = sizeof(T); + data_read_req->flags = flags; + data_read_req->time = curTick; + // translate to physical address - // NEED NEW TRANSLATION HERE - Fault fault = xc->translateDataReadReq(req); + Fault fault = xc->translateDataReadReq(data_read_req); // Now do the access. if (fault == No_Fault) { - pkt = new Packet; - pkt->cmd = Read; - req->paddr = addr; - pkt->size = sizeof(T); - pkt->req = req; +#if SIMPLE_CPU_MEM_TIMING + data_read_pkt = new Packet; + data_read_pkt->cmd = Read; + data_read_pkt->req = data_read_req; + data_read_pkt->data = new uint8_t[8]; +#endif + data_read_pkt->addr = data_read_req->paddr; + data_read_pkt->size = sizeof(T); - sendDcacheRequest(); + sendDcacheRequest(data_read_pkt); + +#if SIMPLE_CPU_MEM_IMMEDIATE + // Need to find a way to not duplicate code above. + + if (data_read_pkt->result == Success) { + memcpy(&data, data_read_pkt->data, sizeof(T)); + } + + if (traceData) { + traceData->setAddr(addr); + } + + // @todo: Figure out a way to create a Fault from the packet result. + return No_Fault; +#endif } /* memReq->cmd = Read; @@ -493,7 +538,7 @@ SimpleCPU::read(Addr addr, T &data, unsigned flags) } */ // This will need a new way to tell if it has a dcache attached. - if (/*!dcacheInterface && */(req->flags & UNCACHEABLE)) + if (data_read_req->flags & UNCACHEABLE) recordEvent("Uncached Read"); return fault; @@ -546,27 +591,29 @@ template Fault SimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { -// memReq->reset(addr, sizeof(T), flags); - req->vaddr = addr; - req->time = curTick; - req->size = sizeof(T); + data_write_req->vaddr = addr; + data_write_req->time = curTick; + data_write_req->size = sizeof(T); + data_write_req->flags = flags; // translate to physical address - // NEED NEW TRANSLATION HERE - Fault fault = xc->translateDataWriteReq(req); + Fault fault = xc->translateDataWriteReq(data_write_req); // Now do the access. if (fault == No_Fault) { - pkt = new Packet; - pkt->cmd = Write; - pkt->size = sizeof(T); - pkt->req = req; +#if SIMPLE_CPU_MEM_TIMING + data_write_pkt = new Packet; + data_write_pkt->cmd = Write; + data_write_pkt->req = data_write_req; + data_write_pkt->data = new uint8_t[64]; + memcpy(data_write_pkt->data, &data, sizeof(T)); +#else + data_write_pkt->data = (uint8_t *)&data; +#endif + data_write_pkt->addr = data_write_req->paddr; + data_write_pkt->size = sizeof(T); - // Copy data into the packet. - pkt->data = new uint8_t[64]; - memcpy(pkt->data, &data, sizeof(T)); - - sendDcacheRequest(); + sendDcacheRequest(data_write_pkt); } /* @@ -594,10 +641,10 @@ SimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) } */ if (res && (fault == No_Fault)) - *res = pkt->result; + *res = data_write_pkt->result; // This will need a new way to tell if it's hooked up to a cache or not. - if (/*!dcacheInterface && */(req->flags & UNCACHEABLE)) + if (data_write_req->flags & UNCACHEABLE) recordEvent("Uncached Write"); // If the write needs to have a fault on the access, consider calling @@ -657,9 +704,11 @@ SimpleCPU::dbg_vtophys(Addr addr) #endif // FULL_SYSTEM void -SimpleCPU::sendIcacheRequest() +SimpleCPU::sendIcacheRequest(Packet *pkt) { -#if 0 + assert(!tickEvent.scheduled()); +#if SIMPLE_CPU_MEM_TIMING + retry_pkt = pkt; bool success = icachePort.sendTiming(*pkt); unscheduleTickEvent(); @@ -673,7 +722,7 @@ SimpleCPU::sendIcacheRequest() // Need to wait for cache to respond _status = IcacheWaitResponse; } -#else +#elif SIMPLE_CPU_MEM_ATOMIC Tick latency = icachePort.sendAtomic(*pkt); unscheduleTickEvent(); @@ -685,17 +734,21 @@ SimpleCPU::sendIcacheRequest() icacheStallCycles += latency; _status = IcacheAccessComplete; - - delete pkt; +#elif SIMPLE_CPU_MEM_IMMEDIATE + icachePort.sendAtomic(*pkt); +#else +#error "SimpleCPU has no mem model set" #endif } void -SimpleCPU::sendDcacheRequest() +SimpleCPU::sendDcacheRequest(Packet *pkt) { + assert(!tickEvent.scheduled()); +#if SIMPLE_CPU_MEM_TIMING unscheduleTickEvent(); -#if 0 + retry_pkt = pkt; bool success = dcachePort.sendTiming(*pkt); lastDcacheStall = curTick; @@ -705,7 +758,9 @@ SimpleCPU::sendDcacheRequest() } else { _status = DcacheWaitResponse; } -#else +#elif SIMPLE_CPU_MEM_ATOMIC + unscheduleTickEvent(); + Tick latency = dcachePort.sendAtomic(*pkt); scheduleTickEvent(latency); @@ -714,19 +769,22 @@ SimpleCPU::sendDcacheRequest() // we check the status of the packet sent (is this valid?), // we won't know if the latency is a hit or a miss. dcacheStallCycles += latency; - - // Delete the packet right here? - delete pkt; +#elif SIMPLE_CPU_MEM_IMMEDIATE + dcachePort.sendAtomic(*pkt); +#else +#error "SimpleCPU has no mem model set" #endif } void SimpleCPU::processResponse(Packet &response) { + assert(SIMPLE_CPU_MEM_TIMING); + // For what things is the CPU the consumer of the packet it sent // out? This may create a memory leak if that's the case and it's // expected of the SimpleCPU to delete its own packet. - pkt = &response; + Packet *pkt = &response; switch (status()) { case IcacheWaitResponse: @@ -780,19 +838,23 @@ SimpleCPU::processResponse(Packet &response) Packet * SimpleCPU::processRetry() { +#if SIMPLE_CPU_MEM_TIMING switch(status()) { case IcacheRetry: icacheRetryCycles += curTick - lastIcacheStall; - return pkt; + return retry_pkt; break; case DcacheRetry: dcacheRetryCycles += curTick - lastDcacheStall; - return pkt; + return retry_pkt; break; default: panic("SimpleCPU::processRetry: bad state"); break; } +#else + panic("shouldn't be here"); +#endif } #if FULL_SYSTEM @@ -884,28 +946,35 @@ SimpleCPU::tick() #define IFETCH_FLAGS(pc) 0 #endif - req->vaddr = xc->regs.pc & ~3; - req->time = curTick; - req->size = sizeof(MachInst); +#if SIMPLE_CPU_MEM_TIMING + CpuRequest *ifetch_req = new CpuRequest(); + ifetch_req->size = sizeof(MachInst); +#endif + + ifetch_req->vaddr = xc->regs.pc & ~3; + ifetch_req->time = curTick; /* memReq->reset(xc->regs.pc & ~3, sizeof(uint32_t), IFETCH_FLAGS(xc->regs.pc)); */ - fault = xc->translateInstReq(req); + fault = xc->translateInstReq(ifetch_req); if (fault == No_Fault) { - pkt = new Packet; - pkt->cmd = Read; - pkt->addr = req->paddr; - pkt->size = sizeof(MachInst); - pkt->req = req; - pkt->data = (uint8_t *)&inst; +#if SIMPLE_CPU_MEM_TIMING + Packet *ifetch_pkt = new Packet; + ifetch_pkt->cmd = Read; + ifetch_pkt->data = (uint8_t *)&inst; + ifetch_pkt->req = ifetch_req; + ifetch_pkt->size = sizeof(MachInst); +#endif + ifetch_pkt->addr = ifetch_req->paddr; - sendIcacheRequest(); + sendIcacheRequest(ifetch_pkt); +#if SIMPLE_CPU_MEM_TIMING || SIMPLE_CPU_MEM_ATOMIC return; -/* fault = xc->mem->read(memReq, inst); - +#endif +/* if (icacheInterface && fault == No_Fault) { memReq->completionEvent = NULL; @@ -980,8 +1049,7 @@ SimpleCPU::tick() // If we have a dcache miss, then we can't finialize the instruction // trace yet because we want to populate it with the data later - if (traceData && - !(status() == DcacheWaitResponse && pkt->cmd == Read)) { + if (traceData && (status() != DcacheWaitResponse)) { traceData->finalize(); } diff --git a/cpu/simple/cpu.hh b/cpu/simple/cpu.hh index 3354166cc..ca10134f7 100644 --- a/cpu/simple/cpu.hh +++ b/cpu/simple/cpu.hh @@ -64,6 +64,16 @@ namespace Trace { class InstRecord; } + +// Set exactly one of these symbols to 1 to set the memory access +// model. Probably should make these template parameters, or even +// just fork the CPU models. +// +#define SIMPLE_CPU_MEM_TIMING 0 +#define SIMPLE_CPU_MEM_ATOMIC 0 +#define SIMPLE_CPU_MEM_IMMEDIATE 1 + + class SimpleCPU : public BaseCPU { class CpuPort : public Port @@ -188,8 +198,16 @@ class SimpleCPU : public BaseCPU // current instruction MachInst inst; - CpuRequest *req; - Packet *pkt; +#if SIMPLE_CPU_MEM_TIMING + Packet *retry_pkt; +#elif SIMPLE_CPU_MEM_ATOMIC || SIMPLE_CPU_MEM_IMMEDIATE + CpuRequest *ifetch_req; + Packet *ifetch_pkt; + CpuRequest *data_read_req; + Packet *data_read_pkt; + CpuRequest *data_write_req; + Packet *data_write_pkt; +#endif // Pointer to the sampler that is telling us to switchover. // Used to signal the completion of the pipe drain and schedule @@ -246,8 +264,8 @@ class SimpleCPU : public BaseCPU Stats::Scalar<> dcacheRetryCycles; Counter lastDcacheRetry; - void sendIcacheRequest(); - void sendDcacheRequest(); + void sendIcacheRequest(Packet *pkt); + void sendDcacheRequest(Packet *pkt); void processResponse(Packet &response); Packet * processRetry(); diff --git a/mem/physical.cc b/mem/physical.cc index fea4b6ec5..69544c8fe 100644 --- a/mem/physical.cc +++ b/mem/physical.cc @@ -174,6 +174,8 @@ PhysicalMemory::doFunctionalAccess(Packet &pkt) default: panic("unimplemented"); } + + pkt.result = Success; } Port * diff --git a/mem/port.cc b/mem/port.cc index 8c4b3810c..e080f8b81 100644 --- a/mem/port.cc +++ b/mem/port.cc @@ -63,3 +63,12 @@ Port::readBlobFunctional(Addr addr, uint8_t *p, int size) blobHelper(addr, p, size, Read); } +void +Port::memsetBlobFunctional(Addr addr, uint8_t val, int size) +{ + // quick and dirty... + uint8_t *buf = new uint8_t[size]; + + memset(buf, val, size); + blobHelper(addr, buf, size, Write); +} diff --git a/mem/translating_port.cc b/mem/translating_port.cc index e385a74b6..f4f2ca737 100644 --- a/mem/translating_port.cc +++ b/mem/translating_port.cc @@ -83,23 +83,31 @@ TranslatingPort::writeBlobFunctional(Addr addr, uint8_t *p, int size, return No_Fault; } -/* + Fault -TranslatingPort::memsetBlobFunctional(Addr addr, uint8_t val, int size) +TranslatingPort::memsetBlobFunctional(Addr addr, uint8_t val, int size, + bool alloc) { Addr paddr; for (ChunkGenerator gen(addr, size, VMPageSize); !gen.done(); gen.next()) { - if (!pTable->translate(gen.addr(),paddr)) - return Machine_Check_Fault; + if (!pTable->translate(gen.addr(), paddr)) { + if (alloc) { + pTable->allocate(roundDown(gen.addr(), VMPageSize), + VMPageSize); + pTable->translate(gen.addr(), paddr); + } else { + return Machine_Check_Fault; + } + } - port->memsetBlobFunctional(paddr, val, gen.size()); + port->memsetBlobFunctional(paddr, val, gen.size()); } return No_Fault; } -*/ + Fault TranslatingPort::writeStringFunctional(Addr addr, const char *str) diff --git a/mem/translating_port.hh b/mem/translating_port.hh index 3d77b2c2b..1a334c103 100644 --- a/mem/translating_port.hh +++ b/mem/translating_port.hh @@ -51,7 +51,8 @@ class TranslatingPort Fault readBlobFunctional(Addr addr, uint8_t *p, int size); Fault writeBlobFunctional(Addr addr, uint8_t *p, int size, bool alloc = false); - // Fault memsetBlobFunctional(Addr addr, uint8_t val, int size); + Fault memsetBlobFunctional(Addr addr, uint8_t val, int size, + bool alloc = false); Fault writeStringFunctional(Addr addr, const char *str); Fault readStringFunctional(std::string &str, Addr addr);