LSQ: Only trigger a memory violation with a load/load if the value changes.
Only create a memory ordering violation when the value could have changed between two subsequent loads, instead of just when loads go out-of-order to the same address. While not very common in the case of Alpha, with an architecture with a hardware table walker this can happen reasonably frequently beacuse a translation will miss and start a table walk and before the CPU re-schedules the faulting instruction another one will pass it to the same address (or cache block depending on the dendency checking). This patch has been tested with a couple of self-checking hand crafted programs to stress ordering between two cores. The performance improvement on SPEC benchmarks can be substantial (2-10%).
This commit is contained in:
parent
bb921b1459
commit
649c239cee
9 changed files with 150 additions and 41 deletions
|
@ -75,9 +75,6 @@ template<> ArmFault::FaultVals ArmFaultVals<FastInterrupt>::vals =
|
||||||
template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals =
|
template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals =
|
||||||
{"Pipe Flush", 0x00, MODE_SVC, 0, 0, true, true}; // some dummy values
|
{"Pipe Flush", 0x00, MODE_SVC, 0, 0, true, true}; // some dummy values
|
||||||
|
|
||||||
template<> ArmFault::FaultVals ArmFaultVals<ReExec>::vals =
|
|
||||||
{"ReExec Flush", 0x00, MODE_SVC, 0, 0, true, true}; // some dummy values
|
|
||||||
|
|
||||||
template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals =
|
template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals =
|
||||||
{"ArmSev Flush", 0x00, MODE_SVC, 0, 0, true, true}; // some dummy values
|
{"ArmSev Flush", 0x00, MODE_SVC, 0, 0, true, true}; // some dummy values
|
||||||
Addr
|
Addr
|
||||||
|
@ -240,17 +237,6 @@ FlushPipe::invoke(ThreadContext *tc, StaticInstPtr inst) {
|
||||||
tc->pcState(pc);
|
tc->pcState(pc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
ReExec::invoke(ThreadContext *tc, StaticInstPtr inst) {
|
|
||||||
DPRINTF(Faults, "Invoking ReExec Fault\n");
|
|
||||||
|
|
||||||
// Set the PC to then the faulting instruction.
|
|
||||||
// Net effect is simply squashing all instructions including this
|
|
||||||
// instruction and refetching/rexecuting current instruction
|
|
||||||
PCState pc = tc->pcState();
|
|
||||||
tc->pcState(pc);
|
|
||||||
}
|
|
||||||
|
|
||||||
template void AbortFault<PrefetchAbort>::invoke(ThreadContext *tc,
|
template void AbortFault<PrefetchAbort>::invoke(ThreadContext *tc,
|
||||||
StaticInstPtr inst);
|
StaticInstPtr inst);
|
||||||
template void AbortFault<DataAbort>::invoke(ThreadContext *tc,
|
template void AbortFault<DataAbort>::invoke(ThreadContext *tc,
|
||||||
|
|
|
@ -242,16 +242,6 @@ class FlushPipe : public ArmFaultVals<FlushPipe>
|
||||||
StaticInstPtr inst = StaticInst::nullStaticInstPtr);
|
StaticInstPtr inst = StaticInst::nullStaticInstPtr);
|
||||||
};
|
};
|
||||||
|
|
||||||
// A fault that flushes the pipe, including the faulting instructions
|
|
||||||
class ReExec : public ArmFaultVals<ReExec>
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ReExec() {}
|
|
||||||
void invoke(ThreadContext *tc,
|
|
||||||
StaticInstPtr inst = StaticInst::nullStaticInstPtr);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
static inline Fault genMachineCheckFault()
|
static inline Fault genMachineCheckFault()
|
||||||
{
|
{
|
||||||
return new Reset();
|
return new Reset();
|
||||||
|
|
|
@ -146,6 +146,19 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
||||||
/** True if the DTB address translation has completed. */
|
/** True if the DTB address translation has completed. */
|
||||||
bool translationCompleted;
|
bool translationCompleted;
|
||||||
|
|
||||||
|
/** True if this address was found to match a previous load and they issued
|
||||||
|
* out of order. If that happend, then it's only a problem if an incoming
|
||||||
|
* snoop invalidate modifies the line, in which case we need to squash.
|
||||||
|
* If nothing modified the line the order doesn't matter.
|
||||||
|
*/
|
||||||
|
bool possibleLoadViolation;
|
||||||
|
|
||||||
|
/** True if the address hit a external snoop while sitting in the LSQ.
|
||||||
|
* If this is true and a older instruction sees it, this instruction must
|
||||||
|
* reexecute
|
||||||
|
*/
|
||||||
|
bool hitExternalSnoop;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the DTB address translation is being delayed due to a hw
|
* Returns true if the DTB address translation is being delayed due to a hw
|
||||||
* page table walk.
|
* page table walk.
|
||||||
|
|
|
@ -110,6 +110,8 @@ BaseDynInst<Impl>::initVars()
|
||||||
|
|
||||||
translationStarted = false;
|
translationStarted = false;
|
||||||
translationCompleted = false;
|
translationCompleted = false;
|
||||||
|
possibleLoadViolation = false;
|
||||||
|
hitExternalSnoop = false;
|
||||||
|
|
||||||
isUncacheable = false;
|
isUncacheable = false;
|
||||||
reqMade = false;
|
reqMade = false;
|
||||||
|
|
|
@ -90,11 +90,17 @@ LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
|
||||||
DPRINTF(LSQ, "Got error packet back for address: %#X\n", pkt->getAddr());
|
DPRINTF(LSQ, "Got error packet back for address: %#X\n", pkt->getAddr());
|
||||||
if (pkt->isResponse()) {
|
if (pkt->isResponse()) {
|
||||||
lsq->thread[pkt->req->threadId()].completeDataAccess(pkt);
|
lsq->thread[pkt->req->threadId()].completeDataAccess(pkt);
|
||||||
}
|
} else {
|
||||||
else {
|
DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
|
||||||
// must be a snoop
|
pkt->cmdString());
|
||||||
|
|
||||||
// @TODO someday may need to process invalidations in LSQ here
|
// must be a snoop
|
||||||
|
if (pkt->isInvalidate()) {
|
||||||
|
DPRINTF(LSQ, "received invalidation for addr:%#x\n", pkt->getAddr());
|
||||||
|
for (ThreadID tid = 0; tid < lsq->numThreads; tid++) {
|
||||||
|
lsq->thread[tid].checkSnoop(pkt);
|
||||||
|
}
|
||||||
|
}
|
||||||
// to provide stronger consistency model
|
// to provide stronger consistency model
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -115,12 +115,20 @@ class LSQUnit {
|
||||||
/** Inserts a store instruction. */
|
/** Inserts a store instruction. */
|
||||||
void insertStore(DynInstPtr &store_inst);
|
void insertStore(DynInstPtr &store_inst);
|
||||||
|
|
||||||
/** Check for ordering violations in the LSQ
|
/** Check for ordering violations in the LSQ. For a store squash if we
|
||||||
|
* ever find a conflicting load. For a load, only squash if we
|
||||||
|
* an external snoop invalidate has been seen for that load address
|
||||||
* @param load_idx index to start checking at
|
* @param load_idx index to start checking at
|
||||||
* @param inst the instruction to check
|
* @param inst the instruction to check
|
||||||
*/
|
*/
|
||||||
Fault checkViolations(int load_idx, DynInstPtr &inst);
|
Fault checkViolations(int load_idx, DynInstPtr &inst);
|
||||||
|
|
||||||
|
/** Check if an incoming invalidate hits in the lsq on a load
|
||||||
|
* that might have issued out of order wrt another load beacuse
|
||||||
|
* of the intermediate invalidate.
|
||||||
|
*/
|
||||||
|
void checkSnoop(PacketPtr pkt);
|
||||||
|
|
||||||
/** Executes a load instruction. */
|
/** Executes a load instruction. */
|
||||||
Fault executeLoad(DynInstPtr &inst);
|
Fault executeLoad(DynInstPtr &inst);
|
||||||
|
|
||||||
|
@ -417,6 +425,9 @@ class LSQUnit {
|
||||||
|
|
||||||
//list<InstSeqNum> mshrSeqNums;
|
//list<InstSeqNum> mshrSeqNums;
|
||||||
|
|
||||||
|
/** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
|
||||||
|
Addr cacheBlockMask;
|
||||||
|
|
||||||
/** Wire to read information from the issue stage time queue. */
|
/** Wire to read information from the issue stage time queue. */
|
||||||
typename TimeBuffer<IssueStruct>::wire fromIssue;
|
typename TimeBuffer<IssueStruct>::wire fromIssue;
|
||||||
|
|
||||||
|
|
|
@ -135,7 +135,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
LSQUnit<Impl>::LSQUnit()
|
LSQUnit<Impl>::LSQUnit()
|
||||||
: loads(0), stores(0), storesToWB(0), stalled(false),
|
: loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
|
||||||
isStoreBlocked(false), isLoadBlocked(false),
|
isStoreBlocked(false), isLoadBlocked(false),
|
||||||
loadBlockedHandled(false), hasPendingPkt(false)
|
loadBlockedHandled(false), hasPendingPkt(false)
|
||||||
{
|
{
|
||||||
|
@ -154,6 +154,8 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
|
||||||
|
|
||||||
switchedOut = false;
|
switchedOut = false;
|
||||||
|
|
||||||
|
cacheBlockMask = 0;
|
||||||
|
|
||||||
lsq = lsq_ptr;
|
lsq = lsq_ptr;
|
||||||
|
|
||||||
lsqID = id;
|
lsqID = id;
|
||||||
|
@ -297,6 +299,9 @@ LSQUnit<Impl>::takeOverFrom()
|
||||||
stalled = false;
|
stalled = false;
|
||||||
isLoadBlocked = false;
|
isLoadBlocked = false;
|
||||||
loadBlockedHandled = false;
|
loadBlockedHandled = false;
|
||||||
|
|
||||||
|
// Just incase the memory system changed out from under us
|
||||||
|
cacheBlockMask = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
|
@ -442,6 +447,60 @@ LSQUnit<Impl>::numLoadsReady()
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
|
||||||
|
{
|
||||||
|
int load_idx = loadHead;
|
||||||
|
|
||||||
|
if (!cacheBlockMask) {
|
||||||
|
assert(dcachePort);
|
||||||
|
Addr bs = dcachePort->peerBlockSize();
|
||||||
|
|
||||||
|
// Make sure we actually got a size
|
||||||
|
assert(bs != 0);
|
||||||
|
|
||||||
|
cacheBlockMask = ~(bs - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is the only load in the LSQ we don't care
|
||||||
|
if (load_idx == loadTail)
|
||||||
|
return;
|
||||||
|
incrLdIdx(load_idx);
|
||||||
|
|
||||||
|
DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
|
||||||
|
Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
|
||||||
|
while (load_idx != loadTail) {
|
||||||
|
DynInstPtr ld_inst = loadQueue[load_idx];
|
||||||
|
|
||||||
|
if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
|
||||||
|
incrLdIdx(load_idx);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Addr load_addr = ld_inst->physEffAddr & cacheBlockMask;
|
||||||
|
DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n",
|
||||||
|
ld_inst->seqNum, load_addr, invalidate_addr);
|
||||||
|
|
||||||
|
if (load_addr == invalidate_addr) {
|
||||||
|
if (ld_inst->possibleLoadViolation) {
|
||||||
|
DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
|
||||||
|
ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum);
|
||||||
|
|
||||||
|
// Mark the load for re-execution
|
||||||
|
ld_inst->fault = new ReExec;
|
||||||
|
} else {
|
||||||
|
// If a older load checks this and it's true
|
||||||
|
// then we might have missed the snoop
|
||||||
|
// in which case we need to invalidate to be sure
|
||||||
|
ld_inst->hitExternalSnoop = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
incrLdIdx(load_idx);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
Fault
|
Fault
|
||||||
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
|
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
|
||||||
|
@ -466,7 +525,32 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
|
||||||
(ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
|
(ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
|
||||||
|
|
||||||
if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
|
if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
|
||||||
// A load/store incorrectly passed this load/store.
|
if (inst->isLoad()) {
|
||||||
|
// If this load is to the same block as an external snoop
|
||||||
|
// invalidate that we've observed then the load needs to be
|
||||||
|
// squashed as it could have newer data
|
||||||
|
if (ld_inst->hitExternalSnoop) {
|
||||||
|
if (!memDepViolator ||
|
||||||
|
ld_inst->seqNum < memDepViolator->seqNum) {
|
||||||
|
DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
|
||||||
|
" and [sn:%lli] at address %#x\n", inst->seqNum,
|
||||||
|
ld_inst->seqNum, ld_eff_addr1);
|
||||||
|
memDepViolator = ld_inst;
|
||||||
|
|
||||||
|
++lsqMemOrderViolation;
|
||||||
|
|
||||||
|
return TheISA::genMachineCheckFault();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, mark the load has a possible load violation
|
||||||
|
// and if we see a snoop before it's commited, we need to squash
|
||||||
|
ld_inst->possibleLoadViolation = true;
|
||||||
|
DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x"
|
||||||
|
" between instructions [sn:%lli] and [sn:%lli]\n",
|
||||||
|
inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
|
||||||
|
} else {
|
||||||
|
// A load/store incorrectly passed this store.
|
||||||
// Check if we already have a violator, or if it's newer
|
// Check if we already have a violator, or if it's newer
|
||||||
// squash and refetch.
|
// squash and refetch.
|
||||||
if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
|
if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
|
||||||
|
@ -481,6 +565,7 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
|
||||||
|
|
||||||
return TheISA::genMachineCheckFault();
|
return TheISA::genMachineCheckFault();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
incrLdIdx(load_idx);
|
incrLdIdx(load_idx);
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,12 @@ void UnimpFault::invoke(ThreadContext * tc, StaticInstPtr inst)
|
||||||
panic("Unimpfault: %s\n", panicStr.c_str());
|
panic("Unimpfault: %s\n", panicStr.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ReExec::invoke(ThreadContext *tc, StaticInstPtr inst)
|
||||||
|
{
|
||||||
|
tc->pcState(tc->pcState());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
void GenericPageTableFault::invoke(ThreadContext *tc, StaticInstPtr inst)
|
void GenericPageTableFault::invoke(ThreadContext *tc, StaticInstPtr inst)
|
||||||
{
|
{
|
||||||
|
|
|
@ -75,6 +75,16 @@ class UnimpFault : public FaultBase
|
||||||
StaticInstPtr inst = StaticInst::nullStaticInstPtr);
|
StaticInstPtr inst = StaticInst::nullStaticInstPtr);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ReExec : public FaultBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual FaultName name() const { return "Re-execution fault";}
|
||||||
|
ReExec() {}
|
||||||
|
void invoke(ThreadContext *tc,
|
||||||
|
StaticInstPtr inst = StaticInst::nullStaticInstPtr);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
class GenericPageTableFault : public FaultBase
|
class GenericPageTableFault : public FaultBase
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue