O3: Tighten memory order violation checking to 16 bytes.
The comment in the code suggests that the checking granularity should be 16 bytes, however in reality the shift by 8 is 256 bytes which seems much larger than required.
This commit is contained in:
parent
ee489a541a
commit
7dde557fdc
|
@ -243,6 +243,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
||||||
/** The effective virtual address (lds & stores only). */
|
/** The effective virtual address (lds & stores only). */
|
||||||
Addr effAddr;
|
Addr effAddr;
|
||||||
|
|
||||||
|
/** The size of the request */
|
||||||
|
Addr effSize;
|
||||||
|
|
||||||
/** Is the effective virtual address valid. */
|
/** Is the effective virtual address valid. */
|
||||||
bool effAddrValid;
|
bool effAddrValid;
|
||||||
|
|
||||||
|
@ -892,6 +895,7 @@ BaseDynInst<Impl>::readBytes(Addr addr, uint8_t *data,
|
||||||
if (translationCompleted) {
|
if (translationCompleted) {
|
||||||
if (fault == NoFault) {
|
if (fault == NoFault) {
|
||||||
effAddr = req->getVaddr();
|
effAddr = req->getVaddr();
|
||||||
|
effSize = size;
|
||||||
effAddrValid = true;
|
effAddrValid = true;
|
||||||
fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
|
fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
|
||||||
} else {
|
} else {
|
||||||
|
@ -962,6 +966,7 @@ BaseDynInst<Impl>::writeBytes(uint8_t *data, unsigned size,
|
||||||
|
|
||||||
if (fault == NoFault && translationCompleted) {
|
if (fault == NoFault && translationCompleted) {
|
||||||
effAddr = req->getVaddr();
|
effAddr = req->getVaddr();
|
||||||
|
effSize = size;
|
||||||
effAddrValid = true;
|
effAddrValid = true;
|
||||||
fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
|
fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
|
||||||
}
|
}
|
||||||
|
|
|
@ -118,6 +118,9 @@ class DerivO3CPU(BaseCPU):
|
||||||
|
|
||||||
LQEntries = Param.Unsigned(32, "Number of load queue entries")
|
LQEntries = Param.Unsigned(32, "Number of load queue entries")
|
||||||
SQEntries = Param.Unsigned(32, "Number of store queue entries")
|
SQEntries = Param.Unsigned(32, "Number of store queue entries")
|
||||||
|
LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check")
|
||||||
|
LSQCheckLoads = Param.Bool(True,
|
||||||
|
"Should dependency violations be checked for loads & stores or just stores")
|
||||||
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
|
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
|
||||||
SSITSize = Param.Unsigned(1024, "Store set ID table size")
|
SSITSize = Param.Unsigned(1024, "Store set ID table size")
|
||||||
|
|
||||||
|
|
|
@ -111,6 +111,12 @@ class LSQUnit {
|
||||||
/** Inserts a store instruction. */
|
/** Inserts a store instruction. */
|
||||||
void insertStore(DynInstPtr &store_inst);
|
void insertStore(DynInstPtr &store_inst);
|
||||||
|
|
||||||
|
/** Check for ordering violations in the LSQ
|
||||||
|
* @param load_idx index to start checking at
|
||||||
|
* @param inst the instruction to check
|
||||||
|
*/
|
||||||
|
Fault checkViolations(int load_idx, DynInstPtr &inst);
|
||||||
|
|
||||||
/** Executes a load instruction. */
|
/** Executes a load instruction. */
|
||||||
Fault executeLoad(DynInstPtr &inst);
|
Fault executeLoad(DynInstPtr &inst);
|
||||||
|
|
||||||
|
@ -366,6 +372,14 @@ class LSQUnit {
|
||||||
*/
|
*/
|
||||||
unsigned SQEntries;
|
unsigned SQEntries;
|
||||||
|
|
||||||
|
/** The number of places to shift addresses in the LSQ before checking
|
||||||
|
* for dependency violations
|
||||||
|
*/
|
||||||
|
unsigned depCheckShift;
|
||||||
|
|
||||||
|
/** Should loads be checked for dependency issues */
|
||||||
|
bool checkLoads;
|
||||||
|
|
||||||
/** The number of load instructions in the LQ. */
|
/** The number of load instructions in the LQ. */
|
||||||
int loads;
|
int loads;
|
||||||
/** The number of store instructions in the SQ. */
|
/** The number of store instructions in the SQ. */
|
||||||
|
|
|
@ -162,6 +162,9 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
|
||||||
loadQueue.resize(LQEntries);
|
loadQueue.resize(LQEntries);
|
||||||
storeQueue.resize(SQEntries);
|
storeQueue.resize(SQEntries);
|
||||||
|
|
||||||
|
depCheckShift = params->LSQDepCheckShift;
|
||||||
|
checkLoads = params->LSQCheckLoads;
|
||||||
|
|
||||||
loadHead = loadTail = 0;
|
loadHead = loadTail = 0;
|
||||||
|
|
||||||
storeHead = storeWBIdx = storeTail = 0;
|
storeHead = storeWBIdx = storeTail = 0;
|
||||||
|
@ -436,6 +439,55 @@ LSQUnit<Impl>::numLoadsReady()
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
Fault
|
||||||
|
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
|
||||||
|
{
|
||||||
|
Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
|
||||||
|
Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
|
||||||
|
|
||||||
|
/** @todo in theory you only need to check an instruction that has executed
|
||||||
|
* however, there isn't a good way in the pipeline at the moment to check
|
||||||
|
* all instructions that will execute before the store writes back. Thus,
|
||||||
|
* like the implementation that came before it, we're overly conservative.
|
||||||
|
*/
|
||||||
|
while (load_idx != loadTail) {
|
||||||
|
DynInstPtr ld_inst = loadQueue[load_idx];
|
||||||
|
if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
|
||||||
|
incrLdIdx(load_idx);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
|
||||||
|
Addr ld_eff_addr2 =
|
||||||
|
(ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
|
||||||
|
|
||||||
|
if ((inst_eff_addr2 > ld_eff_addr1 && inst_eff_addr1 < ld_eff_addr2) ||
|
||||||
|
inst_eff_addr1 == ld_eff_addr1) {
|
||||||
|
// A load/store incorrectly passed this load/store.
|
||||||
|
// Check if we already have a violator, or if it's newer
|
||||||
|
// squash and refetch.
|
||||||
|
if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
|
||||||
|
break;
|
||||||
|
|
||||||
|
DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and [sn:%lli]"
|
||||||
|
" at address %#x\n", inst->seqNum, ld_inst->seqNum,
|
||||||
|
ld_eff_addr1);
|
||||||
|
memDepViolator = ld_inst;
|
||||||
|
|
||||||
|
++lsqMemOrderViolation;
|
||||||
|
|
||||||
|
return TheISA::genMachineCheckFault();
|
||||||
|
}
|
||||||
|
|
||||||
|
incrLdIdx(load_idx);
|
||||||
|
}
|
||||||
|
return NoFault;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
Fault
|
Fault
|
||||||
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
|
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
|
||||||
|
@ -477,39 +529,9 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
|
||||||
assert(inst->effAddrValid);
|
assert(inst->effAddrValid);
|
||||||
int load_idx = inst->lqIdx;
|
int load_idx = inst->lqIdx;
|
||||||
incrLdIdx(load_idx);
|
incrLdIdx(load_idx);
|
||||||
while (load_idx != loadTail) {
|
|
||||||
// Really only need to check loads that have actually executed
|
|
||||||
|
|
||||||
// @todo: For now this is extra conservative, detecting a
|
if (checkLoads)
|
||||||
// violation if the addresses match assuming all accesses
|
return checkViolations(load_idx, inst);
|
||||||
// are quad word accesses.
|
|
||||||
|
|
||||||
// @todo: Fix this, magic number being used here
|
|
||||||
|
|
||||||
// @todo: Uncachable load is not executed until it reaches
|
|
||||||
// the head of the ROB. Once this if checks only the executed
|
|
||||||
// loads(as noted above), this check can be removed
|
|
||||||
if (loadQueue[load_idx]->effAddrValid &&
|
|
||||||
((loadQueue[load_idx]->effAddr >> 8)
|
|
||||||
== (inst->effAddr >> 8)) &&
|
|
||||||
!loadQueue[load_idx]->uncacheable()) {
|
|
||||||
// A load incorrectly passed this load. Squash and refetch.
|
|
||||||
// For now return a fault to show that it was unsuccessful.
|
|
||||||
DynInstPtr violator = loadQueue[load_idx];
|
|
||||||
if (!memDepViolator ||
|
|
||||||
(violator->seqNum < memDepViolator->seqNum)) {
|
|
||||||
memDepViolator = violator;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
++lsqMemOrderViolation;
|
|
||||||
|
|
||||||
return genMachineCheckFault();
|
|
||||||
}
|
|
||||||
|
|
||||||
incrLdIdx(load_idx);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return load_fault;
|
return load_fault;
|
||||||
|
@ -564,44 +586,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
|
||||||
++storesToWB;
|
++storesToWB;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(store_inst->effAddrValid);
|
return checkViolations(load_idx, store_inst);
|
||||||
while (load_idx != loadTail) {
|
|
||||||
// Really only need to check loads that have actually executed
|
|
||||||
// It's safe to check all loads because effAddr is set to
|
|
||||||
// InvalAddr when the dyn inst is created.
|
|
||||||
|
|
||||||
// @todo: For now this is extra conservative, detecting a
|
|
||||||
// violation if the addresses match assuming all accesses
|
|
||||||
// are quad word accesses.
|
|
||||||
|
|
||||||
// @todo: Fix this, magic number being used here
|
|
||||||
|
|
||||||
// @todo: Uncachable load is not executed until it reaches
|
|
||||||
// the head of the ROB. Once this if checks only the executed
|
|
||||||
// loads(as noted above), this check can be removed
|
|
||||||
if (loadQueue[load_idx]->effAddrValid &&
|
|
||||||
((loadQueue[load_idx]->effAddr >> 8)
|
|
||||||
== (store_inst->effAddr >> 8)) &&
|
|
||||||
!loadQueue[load_idx]->uncacheable()) {
|
|
||||||
// A load incorrectly passed this store. Squash and refetch.
|
|
||||||
// For now return a fault to show that it was unsuccessful.
|
|
||||||
DynInstPtr violator = loadQueue[load_idx];
|
|
||||||
if (!memDepViolator ||
|
|
||||||
(violator->seqNum < memDepViolator->seqNum)) {
|
|
||||||
memDepViolator = violator;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
++lsqMemOrderViolation;
|
|
||||||
|
|
||||||
return genMachineCheckFault();
|
|
||||||
}
|
|
||||||
|
|
||||||
incrLdIdx(load_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
return store_fault;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
|
Loading…
Reference in a new issue