O3: Tighten memory order violation checking to 16 bytes.
The comment in the code suggests that the checking granularity should be 16 bytes, however in reality the shift by 8 is 256 bytes which seems much larger than required.
This commit is contained in:
parent
ee489a541a
commit
7dde557fdc
4 changed files with 77 additions and 69 deletions
|
@ -243,6 +243,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** The effective virtual address (lds & stores only). */
|
||||
Addr effAddr;
|
||||
|
||||
/** The size of the request */
|
||||
Addr effSize;
|
||||
|
||||
/** Is the effective virtual address valid. */
|
||||
bool effAddrValid;
|
||||
|
||||
|
@ -892,6 +895,7 @@ BaseDynInst<Impl>::readBytes(Addr addr, uint8_t *data,
|
|||
if (translationCompleted) {
|
||||
if (fault == NoFault) {
|
||||
effAddr = req->getVaddr();
|
||||
effSize = size;
|
||||
effAddrValid = true;
|
||||
fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
|
||||
} else {
|
||||
|
@ -962,6 +966,7 @@ BaseDynInst<Impl>::writeBytes(uint8_t *data, unsigned size,
|
|||
|
||||
if (fault == NoFault && translationCompleted) {
|
||||
effAddr = req->getVaddr();
|
||||
effSize = size;
|
||||
effAddrValid = true;
|
||||
fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
|
||||
}
|
||||
|
|
|
@ -118,6 +118,9 @@ class DerivO3CPU(BaseCPU):
|
|||
|
||||
LQEntries = Param.Unsigned(32, "Number of load queue entries")
|
||||
SQEntries = Param.Unsigned(32, "Number of store queue entries")
|
||||
LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check")
|
||||
LSQCheckLoads = Param.Bool(True,
|
||||
"Should dependency violations be checked for loads & stores or just stores")
|
||||
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
|
||||
SSITSize = Param.Unsigned(1024, "Store set ID table size")
|
||||
|
||||
|
|
|
@ -111,6 +111,12 @@ class LSQUnit {
|
|||
/** Inserts a store instruction. */
|
||||
void insertStore(DynInstPtr &store_inst);
|
||||
|
||||
/** Check for ordering violations in the LSQ
|
||||
* @param load_idx index to start checking at
|
||||
* @param inst the instruction to check
|
||||
*/
|
||||
Fault checkViolations(int load_idx, DynInstPtr &inst);
|
||||
|
||||
/** Executes a load instruction. */
|
||||
Fault executeLoad(DynInstPtr &inst);
|
||||
|
||||
|
@ -366,6 +372,14 @@ class LSQUnit {
|
|||
*/
|
||||
unsigned SQEntries;
|
||||
|
||||
/** The number of places to shift addresses in the LSQ before checking
|
||||
* for dependency violations
|
||||
*/
|
||||
unsigned depCheckShift;
|
||||
|
||||
/** Should loads be checked for dependency issues */
|
||||
bool checkLoads;
|
||||
|
||||
/** The number of load instructions in the LQ. */
|
||||
int loads;
|
||||
/** The number of store instructions in the SQ. */
|
||||
|
|
|
@ -162,6 +162,9 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
|
|||
loadQueue.resize(LQEntries);
|
||||
storeQueue.resize(SQEntries);
|
||||
|
||||
depCheckShift = params->LSQDepCheckShift;
|
||||
checkLoads = params->LSQCheckLoads;
|
||||
|
||||
loadHead = loadTail = 0;
|
||||
|
||||
storeHead = storeWBIdx = storeTail = 0;
|
||||
|
@ -436,6 +439,55 @@ LSQUnit<Impl>::numLoadsReady()
|
|||
return retval;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
|
||||
{
|
||||
Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
|
||||
Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
|
||||
|
||||
/** @todo in theory you only need to check an instruction that has executed
|
||||
* however, there isn't a good way in the pipeline at the moment to check
|
||||
* all instructions that will execute before the store writes back. Thus,
|
||||
* like the implementation that came before it, we're overly conservative.
|
||||
*/
|
||||
while (load_idx != loadTail) {
|
||||
DynInstPtr ld_inst = loadQueue[load_idx];
|
||||
if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
|
||||
incrLdIdx(load_idx);
|
||||
continue;
|
||||
}
|
||||
|
||||
Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
|
||||
Addr ld_eff_addr2 =
|
||||
(ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
|
||||
|
||||
if ((inst_eff_addr2 > ld_eff_addr1 && inst_eff_addr1 < ld_eff_addr2) ||
|
||||
inst_eff_addr1 == ld_eff_addr1) {
|
||||
// A load/store incorrectly passed this load/store.
|
||||
// Check if we already have a violator, or if it's newer
|
||||
// squash and refetch.
|
||||
if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
|
||||
break;
|
||||
|
||||
DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and [sn:%lli]"
|
||||
" at address %#x\n", inst->seqNum, ld_inst->seqNum,
|
||||
ld_eff_addr1);
|
||||
memDepViolator = ld_inst;
|
||||
|
||||
++lsqMemOrderViolation;
|
||||
|
||||
return TheISA::genMachineCheckFault();
|
||||
}
|
||||
|
||||
incrLdIdx(load_idx);
|
||||
}
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
|
||||
|
@ -477,39 +529,9 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
|
|||
assert(inst->effAddrValid);
|
||||
int load_idx = inst->lqIdx;
|
||||
incrLdIdx(load_idx);
|
||||
while (load_idx != loadTail) {
|
||||
// Really only need to check loads that have actually executed
|
||||
|
||||
// @todo: For now this is extra conservative, detecting a
|
||||
// violation if the addresses match assuming all accesses
|
||||
// are quad word accesses.
|
||||
|
||||
// @todo: Fix this, magic number being used here
|
||||
|
||||
// @todo: Uncachable load is not executed until it reaches
|
||||
// the head of the ROB. Once this if checks only the executed
|
||||
// loads(as noted above), this check can be removed
|
||||
if (loadQueue[load_idx]->effAddrValid &&
|
||||
((loadQueue[load_idx]->effAddr >> 8)
|
||||
== (inst->effAddr >> 8)) &&
|
||||
!loadQueue[load_idx]->uncacheable()) {
|
||||
// A load incorrectly passed this load. Squash and refetch.
|
||||
// For now return a fault to show that it was unsuccessful.
|
||||
DynInstPtr violator = loadQueue[load_idx];
|
||||
if (!memDepViolator ||
|
||||
(violator->seqNum < memDepViolator->seqNum)) {
|
||||
memDepViolator = violator;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
++lsqMemOrderViolation;
|
||||
|
||||
return genMachineCheckFault();
|
||||
}
|
||||
|
||||
incrLdIdx(load_idx);
|
||||
}
|
||||
if (checkLoads)
|
||||
return checkViolations(load_idx, inst);
|
||||
}
|
||||
|
||||
return load_fault;
|
||||
|
@ -564,44 +586,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
|
|||
++storesToWB;
|
||||
}
|
||||
|
||||
assert(store_inst->effAddrValid);
|
||||
while (load_idx != loadTail) {
|
||||
// Really only need to check loads that have actually executed
|
||||
// It's safe to check all loads because effAddr is set to
|
||||
// InvalAddr when the dyn inst is created.
|
||||
return checkViolations(load_idx, store_inst);
|
||||
|
||||
// @todo: For now this is extra conservative, detecting a
|
||||
// violation if the addresses match assuming all accesses
|
||||
// are quad word accesses.
|
||||
|
||||
// @todo: Fix this, magic number being used here
|
||||
|
||||
// @todo: Uncachable load is not executed until it reaches
|
||||
// the head of the ROB. Once this if checks only the executed
|
||||
// loads(as noted above), this check can be removed
|
||||
if (loadQueue[load_idx]->effAddrValid &&
|
||||
((loadQueue[load_idx]->effAddr >> 8)
|
||||
== (store_inst->effAddr >> 8)) &&
|
||||
!loadQueue[load_idx]->uncacheable()) {
|
||||
// A load incorrectly passed this store. Squash and refetch.
|
||||
// For now return a fault to show that it was unsuccessful.
|
||||
DynInstPtr violator = loadQueue[load_idx];
|
||||
if (!memDepViolator ||
|
||||
(violator->seqNum < memDepViolator->seqNum)) {
|
||||
memDepViolator = violator;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
++lsqMemOrderViolation;
|
||||
|
||||
return genMachineCheckFault();
|
||||
}
|
||||
|
||||
incrLdIdx(load_idx);
|
||||
}
|
||||
|
||||
return store_fault;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
|
Loading…
Reference in a new issue