inorder-tlb-cunit: merge the TLB as implicit to any memory access

TLBUnit no longer used and we also get rid of memAccSize and memAccFlags functions added to ISA and StaticInst since TLB is not a separate resource to acquire. Instead, TLB access is done before any read/write to memory and the result is checked before it's sent out to memory. * * *
2009-05-12 15:01:16 -04:00 · 2009-05-12 15:01:16 -04:00 · db2b721380
commit db2b721380
parent 3a057bdbb1
16 changed files with 516 additions and 195 deletions
--- a/src/arch/alpha/isa/mem.isa
+++ b/src/arch/alpha/isa/mem.isa
@ -53,10 +53,6 @@ output header {{
        std::string
        generateDisassembly(Addr pc, const SymbolTable *symtab) const;
       public:
        Request::Flags memAccFlags() { return memAccessFlags; }
    };
    /**
@ -140,8 +136,6 @@ def template LoadStoreDeclare {{
        %(InitiateAccDeclare)s
        %(CompleteAccDeclare)s
        %(MemAccSizeDeclare)s
    };
 }};
@ -160,19 +154,6 @@ def template CompleteAccDeclare {{
                      Trace::InstRecord *) const;
 }};
 def template MemAccSizeDeclare {{
    int memAccSize(%(CPU_exec_context)s *xc);
 }};
 def template LoadStoreMemAccSize {{
    int %(class_name)s::memAccSize(%(CPU_exec_context)s *xc)
    {
        // Return the memory access size in bytes
        return (%(mem_acc_size)d / 8);
    }
 }};
 def template LoadStoreConstructor {{
    inline %(class_name)s::%(class_name)s(ExtMachInst machInst)
         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
@ -462,15 +443,6 @@ def template MiscCompleteAcc {{
    }
 }};
 def template MiscMemAccSize {{
    int %(class_name)s::memAccSize(%(CPU_exec_context)s *xc)
    {
        return (%(mem_acc_size)d / 8);
        panic("memAccSize undefined: Misc instruction does not support split "
              "access method!");
        return 0;
    }
 }};
 // load instructions use Ra as dest, so check for
 // Ra == 31 to detect nops
@ -541,11 +513,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
    initiateAccTemplate = eval(exec_template_base + 'InitiateAcc')
    completeAccTemplate = eval(exec_template_base + 'CompleteAcc')
    if (exec_template_base == 'Load' or exec_template_base == 'Store'):
      memAccSizeTemplate = eval('LoadStoreMemAccSize')
    else:
      memAccSizeTemplate = eval('MiscMemAccSize')
    # (header_output, decoder_output, decode_block, exec_output)
    return (LoadStoreDeclare.subst(iop),
            LoadStoreConstructor.subst(iop),
@ -553,8 +520,7 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
            fullExecTemplate.subst(iop)
            + EACompExecute.subst(iop)
            + initiateAccTemplate.subst(iop)
-            + completeAccTemplate.subst(iop)
+            + completeAccTemplate.subst(iop))
            + memAccSizeTemplate.subst(memacc_iop))
 }};
 def format LoadOrNop(memacc_code, ea_code = {{ EA = Rb + disp; }},
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@ -56,8 +56,6 @@ virtual Fault initiateAcc(%(type)s *xc, Trace::InstRecord *traceData) const
 virtual Fault completeAcc(Packet *pkt, %(type)s *xc,
                          Trace::InstRecord *traceData) const
 { panic("completeAcc not defined!"); M5_DUMMY_RETURN };
 virtual int memAccSize(%(type)s *xc)
 { panic("memAccSize not defined!"); M5_DUMMY_RETURN };
 '''
 mem_ini_sig_template = '''
--- a/src/cpu/inorder/SConscript
+++ b/src/cpu/inorder/SConscript
@ -75,7 +75,6 @@ if 'InOrderCPU' in env['CPU_MODELS']:
 	Source('resources/decode_unit.cc')
 	Source('resources/inst_buffer.cc')
 	Source('resources/graduation_unit.cc')
 	Source('resources/tlb_unit.cc')
 	Source('resources/fetch_seq_unit.cc')
 	Source('resources/mult_div_unit.cc')
 	Source('resource_pool.cc')
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@ -204,18 +204,6 @@ InOrderCPU::InOrderCPU(Params *params)
        fatal("Unable to find port for data.\n");
    }
    // Hard-Code Bindings to ITB & DTB
    itbIdx = resPool->getResIdx(name() + "."  + "I-TLB");
    if (itbIdx == 0) {
        fatal("Unable to find ITB resource.\n");
    }
    dtbIdx = resPool->getResIdx(name() + "."  + "D-TLB");
    if (dtbIdx == 0) {
        fatal("Unable to find DTB resource.\n");
    }
    for (int i = 0; i < numThreads; ++i) {
        if (i < params->workload.size()) {
            DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
@ -486,6 +474,7 @@ InOrderCPU::getPort(const std::string &if_name, int idx)
 void
 InOrderCPU::trap(Fault fault, unsigned tid, int delay)
 {
    //@ Squash Pipeline during TRAP
    scheduleCpuEvent(Trap, fault, tid, 0/*vpe*/, delay);
 }
@ -502,7 +491,7 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, vpe);
    if (delay >= 0) {
-        DPRINTF(InOrderCPU, "Scheduling CPU Event Type #%s for cycle %i.\n",
+        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i.\n",
                eventNames[c_event], curTick + delay);
        mainEventQueue.schedule(cpu_event,curTick + delay);
    } else {
@ -1266,20 +1255,6 @@ InOrderCPU::syscall(int64_t callnum, int tid)
    nonSpecInstActive[tid] = false;
 }
 Fault
 InOrderCPU::read(DynInstPtr inst)
 {
    Resource *mem_res = resPool->getResource(dataPortIdx);
    return mem_res->doDataAccess(inst);
 }
 Fault
 InOrderCPU::write(DynInstPtr inst, uint64_t *res)
 {
    Resource *mem_res = resPool->getResource(dataPortIdx);
    return mem_res->doDataAccess(inst, res);
 }
 void
 InOrderCPU::prefetch(DynInstPtr inst)
 {
@ -1298,7 +1273,8 @@ InOrderCPU::writeHint(DynInstPtr inst)
 TheISA::TLB*
 InOrderCPU::getITBPtr()
 {
-    TLBUnit *itb_res = dynamic_cast<TLBUnit*>(resPool->getResource(itbIdx));
+    CacheUnit *itb_res =
        dynamic_cast<CacheUnit*>(resPool->getResource(fetchPortIdx));
    return itb_res->tlb();
 }
@ -1306,6 +1282,136 @@ InOrderCPU::getITBPtr()
 TheISA::TLB*
 InOrderCPU::getDTBPtr()
 {
-    TLBUnit *dtb_res = dynamic_cast<TLBUnit*>(resPool->getResource(dtbIdx));
+    CacheUnit *dtb_res =
        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
    return dtb_res->tlb();
 }
 template <class T>
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
 {
    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
    //       you want to run w/out caches?
    CacheUnit *cache_res = dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
    return cache_res->read(inst, addr, data, flags);
 }
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 template
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, Twin32_t &data, unsigned flags);
 template
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, Twin64_t &data, unsigned flags);
 template
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, uint64_t &data, unsigned flags);
 template
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, uint32_t &data, unsigned flags);
 template
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, uint16_t &data, unsigned flags);
 template
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, uint8_t &data, unsigned flags);
 #endif //DOXYGEN_SHOULD_SKIP_THIS
 template<>
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, double &data, unsigned flags)
 {
    return read(inst, addr, *(uint64_t*)&data, flags);
 }
 template<>
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, float &data, unsigned flags)
 {
    return read(inst, addr, *(uint32_t*)&data, flags);
 }
 template<>
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr, int32_t &data, unsigned flags)
 {
    return read(inst, addr, (uint32_t&)data, flags);
 }
 template <class T>
 Fault
 InOrderCPU::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
                  uint64_t *write_res)
 {
    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
    //       you want to run w/out caches?
    CacheUnit *cache_res =
        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
    return cache_res->write(inst, data, addr, flags, write_res);
 }
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 template
 Fault
 InOrderCPU::write(DynInstPtr inst, Twin32_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 InOrderCPU::write(DynInstPtr inst, Twin64_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 InOrderCPU::write(DynInstPtr inst, uint64_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 InOrderCPU::write(DynInstPtr inst, uint32_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 InOrderCPU::write(DynInstPtr inst, uint16_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 InOrderCPU::write(DynInstPtr inst, uint8_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 #endif //DOXYGEN_SHOULD_SKIP_THIS
 template<>
 Fault
 InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
 {
    return write(inst, *(uint64_t*)&data, addr, flags, res);
 }
 template<>
 Fault
 InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
 {
    return write(inst, *(uint32_t*)&data, addr, flags, res);
 }
 template<>
 Fault
 InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
 {
    return write(inst, (uint32_t)data, addr, flags, res);
 }
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@ -492,12 +492,15 @@ class InOrderCPU : public BaseCPU
    /** Forwards an instruction read to the appropriate data
     *  resource (indexes into Resource Pool thru "dataPortIdx")
     */
-    Fault read(DynInstPtr inst);
+    template <class T>
    Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags);
    /** Forwards an instruction write. to the appropriate data
     *  resource (indexes into Resource Pool thru "dataPortIdx")
     */
-    Fault write(DynInstPtr inst, uint64_t *res = NULL);
+    template <class T>
    Fault write(DynInstPtr inst, T data, Addr addr, unsigned flags,
                uint64_t *write_res = NULL);
    /** Forwards an instruction prefetch to the appropriate data
     *  resource (indexes into Resource Pool thru "dataPortIdx")
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@ -604,7 +604,7 @@ template<class T>
 inline Fault
 InOrderDynInst::read(Addr addr, T &data, unsigned flags)
 {
-    return cpu->read(this);
+    return cpu->read(this, addr, data, flags);
 }
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
@ -657,7 +657,7 @@ InOrderDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res)
    DPRINTF(InOrderDynInst, "[tid:%i]: [sn:%i] Setting store data to %#x.\n",
            threadNumber, seqNum, memData);
-    return cpu->write(this, res);
+    return cpu->write(this, data, addr, flags, res);
 }
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@ -652,10 +652,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted
    Addr getMemAddr()
    { return memAddr; }
    int getMemAccSize() { return staticInst->memAccSize(this); }
    int getMemFlags() { return staticInst->memAccFlags(); }
    /** Sets the effective address. */
    void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
--- a/src/cpu/inorder/pipeline_traits.cc
+++ b/src/cpu/inorder/pipeline_traits.cc
@ -69,7 +69,6 @@ void createFrontEndSchedule(DynInstPtr &inst)
    InstStage *E = inst->addStage();
    I->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
    I->needs(ITLB, TLBUnit::FetchLookup);
    I->needs(ICache, CacheUnit::InitiateFetch);
    E->needs(ICache, CacheUnit::CompleteFetch);
@ -101,14 +100,10 @@ bool createBackEndSchedule(DynInstPtr &inst)
    } else if ( inst->isMemRef() ) {
        if ( inst->isLoad() ) {
            E->needs(AGEN, AGENUnit::GenerateAddr);
            E->needs(DTLB, TLBUnit::DataReadLookup);
            E->needs(DCache, CacheUnit::InitiateReadData);
        }
    } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
        E->needs(MDU, MultDivUnit::StartMultDiv);
        // ZERO-LATENCY Multiply:
        // E->needs(MDU, MultDivUnit::MultDiv);
    } else {
        E->needs(ExecUnit, ExecutionUnit::ExecuteInst);
    }
@ -122,7 +117,6 @@ bool createBackEndSchedule(DynInstPtr &inst)
    } else if ( inst->isStore() ) {
        M->needs(RegManager, UseDefUnit::ReadSrcReg, 1);
        M->needs(AGEN, AGENUnit::GenerateAddr);
        M->needs(DTLB, TLBUnit::DataWriteLookup);
        M->needs(DCache, CacheUnit::InitiateWriteData);
    }
--- a/src/cpu/inorder/pipeline_traits.hh
+++ b/src/cpu/inorder/pipeline_traits.hh
@ -56,7 +56,6 @@ namespace ThePipeline {
    // Enumerated List of Resources The Pipeline Uses
    enum ResourceList {
       FetchSeq = 0,
       ITLB,
       ICache,
       Decode,
       BPred,
@ -65,7 +64,6 @@ namespace ThePipeline {
       AGEN,
       ExecUnit,
       MDU,
       DTLB,
       DCache,
       Grad,
       FetchBuff2
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@ -140,15 +140,14 @@ class Resource {
     *  if instruction is actually in resource before
     *  trying to do access.Needs to be defined for derived units.
     */
-    virtual Fault doDataAccess(DynInstPtr inst, uint64_t *res=NULL)
+    virtual Fault doCacheAccess(DynInstPtr inst, uint64_t *res=NULL)
-    { panic("doDataAccess undefined for %s", name()); return NoFault; }
+    { panic("doCacheAccess undefined for %s", name()); return NoFault; }
    virtual void prefetch(DynInstPtr inst)
    { panic("prefetch undefined for %s", name()); }
    virtual void writeHint(DynInstPtr inst)
-    { panic("doDataAccess undefined for %s", name()); }
+    { panic("writeHint undefined for %s", name()); }
    /** Squash All Requests After This Seq Num */
    virtual void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, unsigned tid);
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@ -50,8 +50,6 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
    // --------------------------------------------------
    resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, StageWidth * 2, 0, _cpu, params));
    resources.push_back(new TLBUnit("I-TLB", ITLB, StageWidth, 0, _cpu, params));
    memObjects.push_back(ICache);
    resources.push_back(new CacheUnit("icache_port", ICache, StageWidth * MaxThreads, 0, _cpu, params));
@ -69,8 +67,6 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
    resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params));
    resources.push_back(new TLBUnit("D-TLB", DTLB, StageWidth, 0, _cpu, params));
    memObjects.push_back(DCache);
    resources.push_back(new CacheUnit("dcache_port", DCache, StageWidth * MaxThreads, 0, _cpu, params));
@ -205,7 +201,6 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
                                     inst->bdelaySeqNum,
                                     inst->readTid());
            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
        }
        break;
@ -256,7 +251,7 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
        break;
      default:
-        DPRINTF(Resource, "Ignoring Unrecognized CPU Event Type #%s.\n", InOrderCPU::eventNames[e_type]);
+        DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]);
        ; // If Resource Pool doesnt recognize event, we ignore it.
    }
 }
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@ -86,6 +86,25 @@ CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
      predecoder(NULL)
 {
    cachePort = new CachePort(this);
    // Hard-Code Selection For Now
    if (res_name == "icache_port")
        _tlb = params->itb;
    else if (res_name == "dcache_port")
        _tlb = params->dtb;
    else
        fatal("Unrecognized TLB name passed by user");
    for (int i=0; i < MaxThreads; i++) {
        tlbBlocked[i] = false;
    }
 }
 TheISA::TLB*
 CacheUnit::tlb()
 {
    return _tlb;
 }
 Port *
@ -97,9 +116,23 @@ CacheUnit::getPort(const string &if_name, int idx)
        return NULL;
 }
 void
 CacheUnit::init()
 {
    // Currently Used to Model TLB Latency. Eventually
    // Switch to Timing TLB translations.
    resourceEvent = new CacheUnitEvent[width];
    initSlots();
 }
 int
 CacheUnit::getSlot(DynInstPtr inst)
 {
    if (tlbBlocked[inst->threadNumber]) {
        return -1;
    }
    if (!inst->validMemAddr()) {
        panic("Mem. Addr. must be set before requesting cache access\n");
    }
@ -156,45 +189,47 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
        panic("Mem. Addr. must be set before requesting cache access\n");
    }
    int req_size = 0;
    MemCmd::Command pkt_cmd;
-    if (sched_entry->cmd == InitiateReadData) {
+    switch (sched_entry->cmd)
    {
      case InitiateReadData:
        pkt_cmd = MemCmd::ReadReq;
        req_size = inst->getMemAccSize();
        DPRINTF(InOrderCachePort,
-                "[tid:%i]: %i byte Read request from [sn:%i] for addr %08p\n",
+                "[tid:%i]: Read request from [sn:%i] for addr %08p\n",
-                inst->readTid(), req_size, inst->seqNum, inst->getMemAddr());
+                inst->readTid(), inst->seqNum, inst->getMemAddr());
-    } else if (sched_entry->cmd == InitiateWriteData) {
+        break;
      case InitiateWriteData:
        pkt_cmd = MemCmd::WriteReq;
        req_size = inst->getMemAccSize();
        DPRINTF(InOrderCachePort,
-                "[tid:%i]: %i byte Write request from [sn:%i] for addr %08p\n",
+                "[tid:%i]: Write request from [sn:%i] for addr %08p\n",
-                inst->readTid(), req_size, inst->seqNum, inst->getMemAddr());
+                inst->readTid(), inst->seqNum, inst->getMemAddr());
-    } else if (sched_entry->cmd == InitiateFetch){
+        break;
      case InitiateFetch:
        pkt_cmd = MemCmd::ReadReq;
        req_size = sizeof(MachInst);
        DPRINTF(InOrderCachePort,
-                "[tid:%i]: %i byte Fetch request from [sn:%i] for addr %08p\n",
+                "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n",
-                inst->readTid(), req_size, inst->seqNum, inst->getMemAddr());
+                inst->readTid(), inst->seqNum, inst->getMemAddr());
-    } else {
+        break;
      default:
        panic("%i: Unexpected request type (%i) to %s", curTick,
              sched_entry->cmd, name());
    }
    return new CacheRequest(this, inst, stage_num, id, slot_num,
-                            sched_entry->cmd, req_size, pkt_cmd,
+                            sched_entry->cmd, 0, pkt_cmd,
                            0/*flags*/, this->cpu->readCpuId());
 }
 void
 CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
 {
    //service_request = false;
    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
    assert(cache_req);
@ -204,7 +239,7 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
        // If different, then update command in the request
        cache_req->cmd = inst->resSched.top()->cmd;
        DPRINTF(InOrderCachePort,
-                "[tid:%i]: [sn:%i]: the command for this instruction\n",
+                "[tid:%i]: [sn:%i]: Updating the command for this instruction\n",
                inst->readTid(), inst->seqNum);
        service_request = true;
@ -219,6 +254,101 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
    }
 }
 Fault
 CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
                       int flags, TheISA::TLB::Mode tlb_mode)
 {
    int tid;
    int seq_num;
    Addr aligned_addr;
    unsigned stage_num;
    unsigned slot_idx;
    tid = inst->readTid();
    seq_num = inst->seqNum;
    aligned_addr = inst->getMemAddr();
    stage_num = cache_req->getStageNum();
    slot_idx = cache_req->getSlot();
    if (tlb_mode == TheISA::TLB::Execute) {
            inst->fetchMemReq = new Request(inst->readTid(), aligned_addr,
                                            acc_size, flags, inst->readPC(),
                                            cpu->readCpuId(), inst->readTid());
            cache_req->memReq = inst->fetchMemReq;
    } else {
            inst->dataMemReq = new Request(inst->readTid(), aligned_addr,
                                           acc_size, flags, inst->readPC(),
                                           cpu->readCpuId(), inst->readTid());
            cache_req->memReq = inst->dataMemReq;
    }
    cache_req->fault =
        _tlb->translateAtomic(cache_req->memReq,
                              cpu->thread[tid]->getTC(), tlb_mode);
    if (cache_req->fault != NoFault) {
        DPRINTF(InOrderTLB, "[tid:%i]: %s encountered while translating "
                "addr:%08p for [sn:%i].\n", tid, cache_req->fault->name(),
                cache_req->memReq->getVaddr(), seq_num);
        cpu->pipelineStage[stage_num]->setResStall(cache_req, tid);
        tlbBlocked[tid] = true;
        cache_req->tlbStall = true;
        scheduleEvent(slot_idx, 1);
        cpu->trap(cache_req->fault, tid);
    } else {
        DPRINTF(InOrderTLB, "[tid:%i]: [sn:%i] virt. addr %08p translated "
                "to phys. addr:%08p.\n", tid, seq_num,
                cache_req->memReq->getVaddr(),
                cache_req->memReq->getPaddr());
    }
    return cache_req->fault;
 }
 template <class T>
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
 {
    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
    assert(cache_req);
    int acc_size =  sizeof(T);
    doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Read);
    if (cache_req->fault == NoFault) {
        cache_req->reqData = new uint8_t[acc_size];
        doCacheAccess(inst, NULL);
    }
    return cache_req->fault;
 }
 template <class T>
 Fault
 CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
            uint64_t *write_res)
 {
    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
    assert(cache_req);
    int acc_size =  sizeof(T);
    doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write);
    if (cache_req->fault == NoFault) {
        cache_req->reqData = new uint8_t[acc_size];
        doCacheAccess(inst, write_res);
    }
    return cache_req->fault;
 }
 void
 CacheUnit::execute(int slot_num)
 {
@ -241,21 +371,46 @@ CacheUnit::execute(int slot_num)
    switch (cache_req->cmd)
    {
      case InitiateFetch:
        {
            //@TODO: Switch to size of full cache block. Store in fetch buffer
            int acc_size =  sizeof(TheISA::MachInst);
            doTLBAccess(inst, cache_req, acc_size, 0, TheISA::TLB::Execute);
            // Only Do Access if no fault from TLB
            if (cache_req->fault == NoFault) {
                DPRINTF(InOrderCachePort,
                        "[tid:%u]: Initiating fetch access to %s for addr. %08p\n",
                        tid, name(), cache_req->inst->getMemAddr());
                cache_req->reqData = new uint8_t[acc_size];
                inst->setCurResSlot(slot_num);
                doCacheAccess(inst);
            }
            break;
        }
      case InitiateReadData:
      case InitiateWriteData:
        DPRINTF(InOrderCachePort,
-                "[tid:%u]: Initiating fetch access to %s for addr. %08p\n",
+                "[tid:%u]: Initiating data access to %s for addr. %08p\n",
                tid, name(), cache_req->inst->getMemAddr());
        DPRINTF(InOrderCachePort,
                "[tid:%u]: Fetching new cache block from addr: %08p\n",
                tid, cache_req->memReq->getVaddr());
        inst->setCurResSlot(slot_num);
-        doDataAccess(inst);
+
        if (inst->isDataPrefetch() || inst->isInstPrefetch()) {
            inst->execute();
        } else {
            inst->initiateAcc();
        }
        break;
      case CompleteFetch:
        // @TODO: MOVE Functionality of handling fetched data into 'fetch unit'
        //        let cache-unit just be responsible for transferring data.
        if (cache_req->isMemAccComplete()) {
            DPRINTF(InOrderCachePort,
                    "[tid:%i]: Completing Fetch Access for [sn:%i]\n",
@ -278,22 +433,6 @@ CacheUnit::execute(int slot_num)
        }
        break;
      case InitiateReadData:
      case InitiateWriteData:
        DPRINTF(InOrderCachePort,
                "[tid:%u]: Initiating data access to %s for addr. %08p\n",
                tid, name(), cache_req->inst->getMemAddr());
        inst->setCurResSlot(slot_num);
        if (inst->isDataPrefetch() || inst->isInstPrefetch()) {
            inst->execute();
        } else {
            inst->initiateAcc();
        }
        break;
      case CompleteReadData:
      case CompleteWriteData:
        DPRINTF(InOrderCachePort,
@ -355,8 +494,9 @@ CacheUnit::writeHint(DynInstPtr inst)
    inst->unsetMemAddr();
 }
 // @TODO: Split into doCacheRead() and doCacheWrite()
 Fault
-CacheUnit::doDataAccess(DynInstPtr inst, uint64_t *write_res)
+CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
 {
    Fault fault = NoFault;
    int tid = 0;
@ -603,6 +743,35 @@ CacheUnit::recvRetry()
    }
 }
 CacheUnitEvent::CacheUnitEvent()
    : ResourceEvent()
 { }
 void
 CacheUnitEvent::process()
 {
    DynInstPtr inst = resource->reqMap[slotIdx]->inst;
    int stage_num = resource->reqMap[slotIdx]->getStageNum();
    int tid = inst->threadNumber;
    CacheReqPtr req_ptr = dynamic_cast<CacheReqPtr>(resource->reqMap[slotIdx]);
    DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n",
            inst->seqNum);
    CacheUnit* tlb_res = dynamic_cast<CacheUnit*>(resource);
    assert(tlb_res);
    tlb_res->tlbBlocked[tid] = false;
    tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid);
    req_ptr->tlbStall = false;
    if (req_ptr->isSquashed()) {
        req_ptr->done();
    }
 }
 void
 CacheUnit::squash(DynInstPtr inst, int stage_num,
                  InstSeqNum squash_seq_num, unsigned tid)
@ -630,7 +799,17 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
            CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(req_ptr);
            assert(cache_req);
-            if (!cache_req->isMemAccPending()) {
+            int req_slot_num = req_ptr->getSlot();
            if (cache_req->tlbStall) {
                tlbBlocked[tid] = false;
                int stall_stage = reqMap[req_slot_num]->getStageNum();
                cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid);
            }
            if (!cache_req->tlbStall && !cache_req->isMemAccPending()) {
                // Mark request for later removal
                cpu->reqRemoveList.push(req_ptr);
@ -669,3 +848,109 @@ CacheUnit::getMemData(Packet *packet)
    }
 }
 // Extra Template Definitions
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 template
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, Twin32_t &data, unsigned flags);
 template
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, Twin64_t &data, unsigned flags);
 template
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, uint64_t &data, unsigned flags);
 template
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, uint32_t &data, unsigned flags);
 template
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, uint16_t &data, unsigned flags);
 template
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, uint8_t &data, unsigned flags);
 #endif //DOXYGEN_SHOULD_SKIP_THIS
 template<>
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, double &data, unsigned flags)
 {
    return read(inst, addr, *(uint64_t*)&data, flags);
 }
 template<>
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, float &data, unsigned flags)
 {
    return read(inst, addr, *(uint32_t*)&data, flags);
 }
 template<>
 Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, int32_t &data, unsigned flags)
 {
    return read(inst, addr, (uint32_t&)data, flags);
 }
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 template
 Fault
 CacheUnit::write(DynInstPtr inst, Twin32_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 CacheUnit::write(DynInstPtr inst, Twin64_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 CacheUnit::write(DynInstPtr inst, uint64_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 CacheUnit::write(DynInstPtr inst, uint32_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 CacheUnit::write(DynInstPtr inst, uint16_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 template
 Fault
 CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr,
                       unsigned flags, uint64_t *res);
 #endif //DOXYGEN_SHOULD_SKIP_THIS
 template<>
 Fault
 CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
 {
    return write(inst, *(uint64_t*)&data, addr, flags, res);
 }
 template<>
 Fault
 CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
 {
    return write(inst, *(uint32_t*)&data, addr, flags, res);
 }
 template<>
 Fault
 CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
 {
    return write(inst, (uint32_t)data, addr, flags, res);
 }
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@ -36,6 +36,7 @@
 #include <list>
 #include <string>
 #include "arch/tlb.hh"
 #include "arch/predecoder.hh"
 #include "cpu/inorder/resource.hh"
 #include "cpu/inorder/inorder_dyn_inst.hh"
@ -124,7 +125,7 @@ class CacheUnit : public Resource
        cacheAccessComplete
    };
-    ///virtual void init();
+    void init();
    virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
                                        int res_idx, int slot_num,
@ -159,10 +160,20 @@ class CacheUnit : public Resource
    /** Returns a specific port. */
    Port *getPort(const std::string &if_name, int idx);
    template <class T>
    Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags);
    template <class T>
    Fault write(DynInstPtr inst, T data, Addr addr, unsigned flags,
                        uint64_t *res);
    Fault doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
                      int flags,  TheISA::TLB::Mode tlb_mode);
    /** Read/Write on behalf of an instruction.
     *  curResSlot needs to be a valid value in instruction.
     */
-    Fault doDataAccess(DynInstPtr inst, uint64_t *write_result=NULL);
+    Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL);
    void prefetch(DynInstPtr inst);
@ -209,23 +220,28 @@ class CacheUnit : public Resource
    //unsigned fetchOffset[ThePipeline::MaxThreads];
    TheISA::Predecoder predecoder;
    bool tlbBlocked[ThePipeline::MaxThreads];
    TheISA::TLB* tlb();
    TheISA::TLB *_tlb;
 };
-struct CacheSchedEntry : public ThePipeline::ScheduleEntry
+class CacheUnitEvent : public ResourceEvent {
-{
+  public:
-    enum EntryType {
+    const std::string name() const
-        FetchAccess,
+    {
-        DataAccess
+        return "CacheUnitEvent";
-    };
+    }
    CacheSchedEntry(int stage_num, int _priority, int res_num,
                    MemCmd::Command pkt_cmd, EntryType _type = FetchAccess)
        : ScheduleEntry(stage_num, _priority, res_num), pktCmd(pkt_cmd),
          type(_type)
    { }
-    MemCmd::Command pktCmd;
+    /** Constructs a resource event. */
-    EntryType type;
+    CacheUnitEvent();
    virtual ~CacheUnitEvent() {}
    /** Processes a resource event. */
    virtual void process();
 };
 class CacheRequest : public ResourceRequest
@ -235,43 +251,17 @@ class CacheRequest : public ResourceRequest
                 int slot_num, unsigned cmd, int req_size,
                 MemCmd::Command pkt_cmd, unsigned flags, int cpu_id)
        : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd),
-          pktCmd(pkt_cmd), memAccComplete(false), memAccPending(false)
+          pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL),
-    {
+          retryPkt(NULL), memAccComplete(false), memAccPending(false),
-        if (cmd == CacheUnit::InitiateFetch ||
+          tlbStall(false)
-            cmd == CacheUnit::CompleteFetch ||
+    { }
            cmd == CacheUnit::Fetch) {
            memReq = inst->fetchMemReq;
        } else {
            memReq = inst->dataMemReq;
        }
        //@ Only matters for Fetch / Read requests
        //  Don't allocate for Writes!
        reqData = new uint8_t[req_size];
        retryPkt = NULL;
    }
    virtual ~CacheRequest()
    {
-#if 0
+        if (reqData) {
-        delete reqData;
+            delete [] reqData;
        // Can get rid of packet and packet request now
        if (*dataPkt) {
            if (*dataPkt->req) {
                delete dataPkt->req;
            }
            delete dataPkt;
        }
        // Can get rid of packet and packet request now
        if (retryPkt) {
            if (retryPkt->req) {
                delete retryPkt->req;
            }
            delete retryPkt;
        }
 #endif
    }
    virtual PacketDataPtr getData()
@ -297,6 +287,7 @@ class CacheRequest : public ResourceRequest
    bool memAccComplete;
    bool memAccPending;
    bool tlbStall;
 };
 class CacheReqPacket : public Packet
--- a/src/cpu/inorder/resources/tlb_unit.hh
+++ b/src/cpu/inorder/resources/tlb_unit.hh
@ -114,8 +114,8 @@ class TLBUnitRequest : public ResourceRequest {
            memReq = inst->fetchMemReq;
        } else {
            aligned_addr = inst->getMemAddr();;
-            req_size = inst->getMemAccSize();
+            req_size = 0; //inst->getMemAccSize();
-            flags = inst->getMemFlags();
+            flags = 0; //inst->getMemFlags();
            if (req_size == 0 && (inst->isDataPrefetch() || inst->isInstPrefetch())) {
                req_size = 8;
--- a/src/cpu/static_inst.cc
+++ b/src/cpu/static_inst.cc
@ -106,13 +106,6 @@ StaticInst::branchTarget(ThreadContext *tc) const
    M5_DUMMY_RETURN;
 }
 Request::Flags
 StaticInst::memAccFlags()
 {
    panic("StaticInst::memAccFlags called on non-memory instruction");
    return 0;
 }
 const string &
 StaticInst::disassemble(Addr pc, const SymbolTable *symtab) const
 {
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@ -419,8 +419,6 @@ class StaticInst : public StaticInstBase
     */
    bool hasBranchTarget(Addr pc, ThreadContext *tc, Addr &tgt) const;
    virtual Request::Flags memAccFlags();
    /**
     * Return string representation of disassembled instruction.
     * The default version of this function will call the internal