ARM/Alpha/Cpu: Change prefetchs to be more like normal loads.

This change modifies the way prefetches work. They are now like normal loads
that don't writeback a register. Previously prefetches were supposed to call
prefetch() on the exection context, so they executed with execute() methods
instead of initiateAcc() completeAcc(). The prefetch() methods for all the CPUs
are blank, meaning that they get executed, but don't actually do anything.

On Alpha dead cache copy code was removed and prefetches are now normal ops.
They count as executed operations, but still don't do anything and IsMemRef is
not longer set on them.

On ARM IsDataPrefetch or IsInstructionPreftech is now set on all prefetch
instructions. The timing simple CPU doesn't try to do anything special for
prefetches now and they execute with the normal memory code path.
This commit is contained in:
Ali Saidi 2010-11-08 13:58:22 -06:00
parent f4f5d03ed2
commit cdacbe734a
22 changed files with 41 additions and 472 deletions

View file

@ -47,11 +47,6 @@ decode OPCODE default Unknown::unknown() {
0x23: ldt({{ Fa = Mem.df; }});
0x2a: ldl_l({{ Ra.sl = Mem.sl; }}, mem_flags = LLSC);
0x2b: ldq_l({{ Ra.uq = Mem.uq; }}, mem_flags = LLSC);
#ifdef USE_COPY
0x20: MiscPrefetch::copy_load({{ EA = Ra; }},
{{ fault = xc->copySrcTranslate(EA); }},
inst_flags = [IsMemRef, IsLoad, IsCopy]);
#endif
}
format LoadOrPrefetch {
@ -71,11 +66,6 @@ decode OPCODE default Unknown::unknown() {
0x0f: stq_u({{ Mem.uq = Ra.uq; }}, {{ EA = (Rb + disp) & ~7; }});
0x26: sts({{ Mem.ul = t_to_s(Fa.uq); }});
0x27: stt({{ Mem.df = Fa; }});
#ifdef USE_COPY
0x24: MiscPrefetch::copy_store({{ EA = Rb; }},
{{ fault = xc->copy(EA); }},
inst_flags = [IsMemRef, IsStore, IsCopy]);
#endif
}
format StoreCond {
@ -788,10 +778,8 @@ decode OPCODE default Unknown::unknown() {
format MiscPrefetch {
0xf800: wh64({{ EA = Rb & ~ULL(63); }},
{{ xc->writeHint(EA, 64, memAccessFlags); }},
mem_flags = PREFETCH,
inst_flags = [IsMemRef, IsDataPrefetch,
IsStore, MemWriteOp]);
{{ ; }},
mem_flags = PREFETCH);
}
format BasicOperate {

View file

@ -396,6 +396,7 @@ def template MiscExecute {{
%(op_rd)s;
%(ea_code)s;
warn_once("Prefetch instrutions is Alpha do not do anything\n");
if (fault == NoFault) {
%(memacc_code)s;
}
@ -404,6 +405,8 @@ def template MiscExecute {{
}
}};
// Prefetches in Alpha don't actually do anything
// They just build an effective address and complete
def template MiscInitiateAcc {{
Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
@ -530,12 +533,10 @@ def format LoadOrPrefetch(memacc_code, ea_code = {{ EA = Rb + disp; }},
inst_flags = makeList(inst_flags)
pf_mem_flags = mem_flags + pf_flags + ['PREFETCH']
pf_inst_flags = inst_flags + ['IsMemRef', 'IsLoad',
'IsDataPrefetch', 'MemReadOp']
pf_inst_flags = inst_flags
(pf_header_output, pf_decoder_output, _, pf_exec_output) = \
LoadStoreBase(name, Name + 'Prefetch', ea_code,
'xc->prefetch(EA, memAccessFlags);',
LoadStoreBase(name, Name + 'Prefetch', ea_code, ';',
pf_mem_flags, pf_inst_flags, exec_template_base = 'Misc')
header_output += pf_header_output

View file

@ -161,8 +161,13 @@ let {{
if self.user:
self.memFlags.append("ArmISA::TLB::UserMode")
if self.flavor == "prefetch":
self.instFlags = []
if self.flavor == "dprefetch":
self.memFlags.append("Request::PREFETCH")
self.instFlags = ['IsDataPrefetch']
elif self.flavor == "iprefetch":
self.memFlags.append("Request::PREFETCH")
self.instFlags = ['IsInstPrefetch']
elif self.flavor == "exclusive":
self.memFlags.append("Request::LLSC")
elif self.flavor == "normal":
@ -185,7 +190,7 @@ let {{
self.codeBlobs["ea_code"] = eaCode
# Code that actually handles the access
if self.flavor == "prefetch":
if self.flavor == "dprefetch" or self.flavor == "iprefetch":
accCode = 'uint64_t temp = Mem%s; temp = temp;'
elif self.flavor == "fp":
accCode = "FpDest.uw = cSwap(Mem%s, ((CPSR)Cpsr).e);\n"
@ -200,7 +205,7 @@ let {{
wbDecl = None
if self.writeback:
wbDecl = self.wbDecl
self.emitHelper(base, wbDecl)
self.emitHelper(base, wbDecl, self.instFlags)
def loadImmClassName(post, add, writeback, size=4, sign=False, user=False):
return memClassName("LOAD_IMM", post, add, writeback, size, sign, user)
@ -325,11 +330,11 @@ let {{
RfeInst(mnem, False, False, True).emit()
RfeInst(mnem, False, False, False).emit()
def buildPrefetches(mnem):
LoadReg(mnem, False, False, False, size=1, flavor="prefetch").emit()
LoadImm(mnem, False, False, False, size=1, flavor="prefetch").emit()
LoadReg(mnem, False, True, False, size=1, flavor="prefetch").emit()
LoadImm(mnem, False, True, False, size=1, flavor="prefetch").emit()
def buildPrefetches(mnem, type):
LoadReg(mnem, False, False, False, size=1, flavor=type).emit()
LoadImm(mnem, False, False, False, size=1, flavor=type).emit()
LoadReg(mnem, False, True, False, size=1, flavor=type).emit()
LoadImm(mnem, False, True, False, size=1, flavor=type).emit()
buildLoads("ldr")
buildLoads("ldrt", user=True)
@ -346,9 +351,9 @@ let {{
buildRfeLoads("rfe")
buildPrefetches("pld")
buildPrefetches("pldw")
buildPrefetches("pli")
buildPrefetches("pld", "dprefetch")
buildPrefetches("pldw", "dprefetch")
buildPrefetches("pli", "iprefetch")
LoadImm("ldrex", False, True, False, size=4, flavor="exclusive").emit()
LoadImm("ldrexh", False, True, False, size=2, flavor="exclusive").emit()

View file

@ -452,7 +452,7 @@ def template MiscExecute {{
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Addr EA M5_VAR_USED = 0;
Fault fault = NoFault;
%(fp_enable_check)s;
@ -577,12 +577,11 @@ def format StoreUnalignedMemory(memacc_code, ea_code = {{ EA = (Rs + disp) & ~3;
def format Prefetch(ea_code = {{ EA = Rs + disp; }},
mem_flags = [], pf_flags = [], inst_flags = []) {{
pf_mem_flags = mem_flags + pf_flags + ['PREFETCH']
pf_inst_flags = inst_flags + ['IsMemRef', 'IsLoad',
'IsDataPrefetch', 'MemReadOp']
pf_inst_flags = inst_flags
(header_output, decoder_output, decode_block, exec_output) = \
LoadStoreBase(name, Name, ea_code,
'xc->prefetch(EA, memAccessFlags);',
'warn_once("Prefetching not implemented for MIPS\\n");',
pf_mem_flags, pf_inst_flags, exec_template_base = 'Misc')
}};

View file

@ -150,11 +150,6 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Finish a DTB address translation. */
void finishTranslation(WholeTranslationState *state);
void prefetch(Addr addr, unsigned flags);
void writeHint(Addr addr, int size, unsigned flags);
Fault copySrcTranslate(Addr src);
Fault copy(Addr dest);
/** @todo: Consider making this private. */
public:
/** The sequence number of the instruction. */

View file

@ -194,73 +194,6 @@ BaseDynInst<Impl>::dumpSNList()
}
#endif
template <class Impl>
void
BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags)
{
// This is the "functional" implementation of prefetch. Not much
// happens here since prefetches don't affect the architectural
// state.
/*
// Generate a MemReq so we can translate the effective address.
MemReqPtr req = new MemReq(addr, thread->getXCProxy(), 1, flags);
req->asid = asid;
// Prefetches never cause faults.
fault = NoFault;
// note this is a local, not BaseDynInst::fault
Fault trans_fault = cpu->translateDataReadReq(req);
if (trans_fault == NoFault && !(req->isUncacheable())) {
// It's a valid address to cacheable space. Record key MemReq
// parameters so we can generate another one just like it for
// the timing access without calling translate() again (which
// might mess up the TLB).
effAddr = req->vaddr;
physEffAddr = req->paddr;
memReqFlags = req->flags;
} else {
// Bogus address (invalid or uncacheable space). Mark it by
// setting the eff_addr to InvalidAddr.
effAddr = physEffAddr = MemReq::inval_addr;
}
if (traceData) {
traceData->setAddr(addr);
}
*/
}
template <class Impl>
void
BaseDynInst<Impl>::writeHint(Addr addr, int size, unsigned flags)
{
// Not currently supported.
}
/**
* @todo Need to find a way to get the cache block size here.
*/
template <class Impl>
Fault
BaseDynInst<Impl>::copySrcTranslate(Addr src)
{
// Not currently supported.
return NoFault;
}
/**
* @todo Need to find a way to get the cache block size here.
*/
template <class Impl>
Fault
BaseDynInst<Impl>::copy(Addr dest)
{
// Not currently supported.
return NoFault;
}
template <class Impl>
void
BaseDynInst<Impl>::dump()

View file

@ -134,18 +134,6 @@ CheckerCPU::unserialize(Checkpoint *cp, const string &section)
*/
}
Fault
CheckerCPU::copySrcTranslate(Addr src)
{
panic("Unimplemented!");
}
Fault
CheckerCPU::copy(Addr dest)
{
panic("Unimplemented!");
}
template <class T>
Fault
CheckerCPU::read(Addr addr, T &data, unsigned flags)

View file

@ -178,20 +178,6 @@ class CheckerCPU : public BaseCPU
void setEA(Addr EA) { panic("SimpleCPU::setEA() not implemented\n"); }
Addr getEA() { panic("SimpleCPU::getEA() not implemented\n"); }
void prefetch(Addr addr, unsigned flags)
{
// need to do this...
}
void writeHint(Addr addr, int size, unsigned flags)
{
// need to do this...
}
Fault copySrcTranslate(Addr src);
Fault copy(Addr dest);
// The register accessor methods provide the index of the
// instruction's operand (e.g., 0 or 1), not the architectural
// register index, to simplify the implementation of register

View file

@ -122,15 +122,6 @@ class ExecContext {
Fault writeBytes(uint8_t *data, unsigned size,
Addr addr, unsigned flags, uint64_t *res);
/** Prefetches an address, creating a memory request with the
* given flags. */
void prefetch(Addr addr, unsigned flags);
/** Hints to the memory system that an address will be written to
* soon, with the given size. Creates a memory request with the
* given flags. */
void writeHint(Addr addr, int size, unsigned flags);
#if FULL_SYSTEM
/** Somewhat Alpha-specific function that handles returning from
* an error or interrupt. */

View file

@ -1445,21 +1445,6 @@ InOrderCPU::syscall(int64_t callnum, ThreadID tid)
}
#endif
void
InOrderCPU::prefetch(DynInstPtr inst)
{
Resource *mem_res = resPool->getResource(dataPortIdx);
return mem_res->prefetch(inst);
}
void
InOrderCPU::writeHint(DynInstPtr inst)
{
Resource *mem_res = resPool->getResource(dataPortIdx);
return mem_res->writeHint(inst);
}
TheISA::TLB*
InOrderCPU::getITBPtr()
{

View file

@ -530,16 +530,6 @@ class InOrderCPU : public BaseCPU
Fault write(DynInstPtr inst, uint8_t *data, unsigned size,
Addr addr, unsigned flags, uint64_t *write_res = NULL);
/** Forwards an instruction prefetch to the appropriate data
* resource (indexes into Resource Pool thru "dataPortIdx")
*/
void prefetch(DynInstPtr inst);
/** Forwards an instruction writeHint to the appropriate data
* resource (indexes into Resource Pool thru "dataPortIdx")
*/
void writeHint(DynInstPtr inst);
/** Executes a syscall.*/
void syscall(int64_t callnum, ThreadID tid);

View file

@ -345,38 +345,6 @@ InOrderDynInst::syscall(int64_t callnum)
}
#endif
void
InOrderDynInst::prefetch(Addr addr, unsigned flags)
{
cpu->prefetch(this);
}
void
InOrderDynInst::writeHint(Addr addr, int size, unsigned flags)
{
cpu->writeHint(this);
}
/**
* @todo Need to find a way to get the cache block size here.
*/
Fault
InOrderDynInst::copySrcTranslate(Addr src)
{
// Not currently supported.
return NoFault;
}
/**
* @todo Need to find a way to get the cache block size here.
*/
Fault
InOrderDynInst::copy(Addr dest)
{
// Not currently supported.
return NoFault;
}
void
InOrderDynInst::releaseReq(ResourceRequest* req)
{

View file

@ -506,10 +506,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted
/** Calls a syscall. */
void syscall(int64_t callnum);
#endif
void prefetch(Addr addr, unsigned flags);
void writeHint(Addr addr, int size, unsigned flags);
Fault copySrcTranslate(Addr src);
Fault copy(Addr dest);
////////////////////////////////////////////////////////////
//

View file

@ -154,12 +154,6 @@ class Resource {
virtual Fault doCacheAccess(DynInstPtr inst, uint64_t *res=NULL)
{ panic("doCacheAccess undefined for %s", name()); return NoFault; }
virtual void prefetch(DynInstPtr inst)
{ panic("prefetch undefined for %s", name()); }
virtual void writeHint(DynInstPtr inst)
{ panic("writeHint undefined for %s", name()); }
/** Squash All Requests After This Seq Num */
virtual void squash(DynInstPtr inst, int stage_num,
InstSeqNum squash_seq_num, ThreadID tid);

View file

@ -842,43 +842,6 @@ CacheUnit::execute(int slot_num)
}
}
void
CacheUnit::prefetch(DynInstPtr inst)
{
warn_once("Prefetching currently unimplemented");
CacheReqPtr cache_req
= dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
assert(cache_req);
// Clean-Up cache resource request so
// other memory insts. can use them
cache_req->setCompleted();
cachePortBlocked = false;
cache_req->setMemAccPending(false);
cache_req->setMemAccCompleted();
inst->unsetMemAddr();
}
void
CacheUnit::writeHint(DynInstPtr inst)
{
warn_once("Write Hints currently unimplemented");
CacheReqPtr cache_req
= dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
assert(cache_req);
// Clean-Up cache resource request so
// other memory insts. can use them
cache_req->setCompleted();
cachePortBlocked = false;
cache_req->setMemAccPending(false);
cache_req->setMemAccCompleted();
inst->unsetMemAddr();
}
// @TODO: Split into doCacheRead() and doCacheWrite()
Fault
CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,

View file

@ -176,10 +176,6 @@ class CacheUnit : public Resource
Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL,
CacheReqPtr split_req=NULL);
void prefetch(DynInstPtr inst);
void writeHint(DynInstPtr inst);
uint64_t getMemData(Packet *packet);
void setAddrDependency(DynInstPtr inst);

View file

@ -409,20 +409,6 @@ class OzoneCPU : public BaseCPU
return backEnd->write(req, data, store_idx);
}
void prefetch(Addr addr, unsigned flags)
{
// need to do this...
}
void writeHint(Addr addr, int size, unsigned flags)
{
// need to do this...
}
Fault copySrcTranslate(Addr src);
Fault copy(Addr dest);
public:
void squashFromTC();

View file

@ -481,95 +481,6 @@ OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
thread.getTC()->copyArchRegs(temp.getTC());
}
template <class Impl>
Fault
OzoneCPU<Impl>::copySrcTranslate(Addr src)
{
panic("Copy not implemented!\n");
return NoFault;
#if 0
static bool no_warn = true;
unsigned blk_size = dcacheInterface ? dcacheInterface->getBlockSize() : 64;
// Only support block sizes of 64 atm.
assert(blk_size == 64);
int offset = src & (blk_size - 1);
// Make sure block doesn't span page
if (no_warn &&
(src & TheISA::PageMask) != ((src + blk_size) & TheISA::PageMask) &&
(src >> 40) != 0xfffffc) {
warn("Copied block source spans pages %x.", src);
no_warn = false;
}
memReq->reset(src & ~(blk_size - 1), blk_size);
// translate to physical address
Fault fault = tc->translateDataReadReq(memReq);
assert(fault != Alignment_Fault);
if (fault == NoFault) {
tc->copySrcAddr = src;
tc->copySrcPhysAddr = memReq->paddr + offset;
} else {
tc->copySrcAddr = 0;
tc->copySrcPhysAddr = 0;
}
return fault;
#endif
}
template <class Impl>
Fault
OzoneCPU<Impl>::copy(Addr dest)
{
panic("Copy not implemented!\n");
return NoFault;
#if 0
static bool no_warn = true;
unsigned blk_size = dcacheInterface ? dcacheInterface->getBlockSize() : 64;
// Only support block sizes of 64 atm.
assert(blk_size == 64);
uint8_t data[blk_size];
//assert(tc->copySrcAddr);
int offset = dest & (blk_size - 1);
// Make sure block doesn't span page
if (no_warn &&
(dest & TheISA::PageMask) != ((dest + blk_size) & TheISA::PageMask) &&
(dest >> 40) != 0xfffffc) {
no_warn = false;
warn("Copied block destination spans pages %x. ", dest);
}
memReq->reset(dest & ~(blk_size -1), blk_size);
// translate to physical address
Fault fault = tc->translateDataWriteReq(memReq);
assert(fault != Alignment_Fault);
if (fault == NoFault) {
Addr dest_addr = memReq->paddr + offset;
// Need to read straight from memory since we have more than 8 bytes.
memReq->paddr = tc->copySrcPhysAddr;
tc->mem->read(memReq, data);
memReq->paddr = dest_addr;
tc->mem->write(memReq, data);
if (dcacheInterface) {
memReq->cmd = Copy;
memReq->completionEvent = NULL;
memReq->paddr = tc->copySrcPhysAddr;
memReq->dest = dest_addr;
memReq->size = 64;
memReq->time = curTick;
dcacheInterface->access(memReq);
}
}
return fault;
#endif
}
#if FULL_SYSTEM
template <class Impl>
Addr

View file

@ -215,118 +215,6 @@ change_thread_state(ThreadID tid, int activate, int priority)
{
}
void
BaseSimpleCPU::prefetch(Addr addr, unsigned flags)
{
if (traceData) {
traceData->setAddr(addr);
}
// need to do this...
}
void
BaseSimpleCPU::writeHint(Addr addr, int size, unsigned flags)
{
if (traceData) {
traceData->setAddr(addr);
}
// need to do this...
}
Fault
BaseSimpleCPU::copySrcTranslate(Addr src)
{
#if 0
static bool no_warn = true;
unsigned blk_size =
(dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
// Only support block sizes of 64 atm.
assert(blk_size == 64);
int offset = src & (blk_size - 1);
// Make sure block doesn't span page
if (no_warn &&
(src & PageMask) != ((src + blk_size) & PageMask) &&
(src >> 40) != 0xfffffc) {
warn("Copied block source spans pages %x.", src);
no_warn = false;
}
memReq->reset(src & ~(blk_size - 1), blk_size);
// translate to physical address
Fault fault = thread->translateDataReadReq(req);
if (fault == NoFault) {
thread->copySrcAddr = src;
thread->copySrcPhysAddr = memReq->paddr + offset;
} else {
assert(!fault->isAlignmentFault());
thread->copySrcAddr = 0;
thread->copySrcPhysAddr = 0;
}
return fault;
#else
return NoFault;
#endif
}
Fault
BaseSimpleCPU::copy(Addr dest)
{
#if 0
static bool no_warn = true;
unsigned blk_size =
(dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
// Only support block sizes of 64 atm.
assert(blk_size == 64);
uint8_t data[blk_size];
//assert(thread->copySrcAddr);
int offset = dest & (blk_size - 1);
// Make sure block doesn't span page
if (no_warn &&
(dest & PageMask) != ((dest + blk_size) & PageMask) &&
(dest >> 40) != 0xfffffc) {
no_warn = false;
warn("Copied block destination spans pages %x. ", dest);
}
memReq->reset(dest & ~(blk_size -1), blk_size);
// translate to physical address
Fault fault = thread->translateDataWriteReq(req);
if (fault == NoFault) {
Addr dest_addr = memReq->paddr + offset;
// Need to read straight from memory since we have more than 8 bytes.
memReq->paddr = thread->copySrcPhysAddr;
thread->mem->read(memReq, data);
memReq->paddr = dest_addr;
thread->mem->write(memReq, data);
if (dcacheInterface) {
memReq->cmd = Copy;
memReq->completionEvent = NULL;
memReq->paddr = thread->copySrcPhysAddr;
memReq->dest = dest_addr;
memReq->size = 64;
memReq->time = curTick;
dcacheInterface->access(memReq);
}
}
else
assert(!fault->isAlignmentFault());
return fault;
#else
panic("copy not implemented");
return NoFault;
#endif
}
#if FULL_SYSTEM
Addr
BaseSimpleCPU::dbg_vtophys(Addr addr)

View file

@ -230,13 +230,6 @@ class BaseSimpleCPU : public BaseCPU
Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n");
M5_DUMMY_RETURN}
void prefetch(Addr addr, unsigned flags);
void writeHint(Addr addr, int size, unsigned flags);
Fault copySrcTranslate(Addr src);
Fault copy(Addr dest);
// The register accessor methods provide the index of the
// instruction's operand (e.g., 0 or 1), not the architectural
// register index, to simplify the implementation of register

View file

@ -1,4 +1,16 @@
/*
* Copyright (c) 2010 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2002-2005 The Regents of The University of Michigan
* All rights reserved.
*
@ -789,8 +801,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
}
preExecute();
if (curStaticInst &&
curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) {
if (curStaticInst && curStaticInst->isMemRef()) {
// load or store: just send to dcache
Fault fault = curStaticInst->initiateAcc(this, traceData);
if (_status != Running) {

View file

@ -226,6 +226,8 @@ class StaticInstBase : public RefCounted
bool isStoreConditional() const { return flags[IsStoreConditional]; }
bool isInstPrefetch() const { return flags[IsInstPrefetch]; }
bool isDataPrefetch() const { return flags[IsDataPrefetch]; }
bool isPrefetch() const { return isInstPrefetch() ||
isDataPrefetch(); }
bool isCopy() const { return flags[IsCopy];}
bool isInteger() const { return flags[IsInteger]; }