O3: Tighten memory order violation checking to 16 bytes.

The comment in the code suggests that the checking granularity should be 16
bytes, however in reality the shift by 8 is 256 bytes which seems much
larger than required.
This commit is contained in:
Ali Saidi 2011-04-04 11:42:23 -05:00
parent ee489a541a
commit 7dde557fdc
4 changed files with 77 additions and 69 deletions

View file

@ -243,6 +243,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** The effective virtual address (lds & stores only). */
Addr effAddr;
/** The size of the request */
Addr effSize;
/** Is the effective virtual address valid. */
bool effAddrValid;
@ -892,6 +895,7 @@ BaseDynInst<Impl>::readBytes(Addr addr, uint8_t *data,
if (translationCompleted) {
if (fault == NoFault) {
effAddr = req->getVaddr();
effSize = size;
effAddrValid = true;
fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
} else {
@ -962,6 +966,7 @@ BaseDynInst<Impl>::writeBytes(uint8_t *data, unsigned size,
if (fault == NoFault && translationCompleted) {
effAddr = req->getVaddr();
effSize = size;
effAddrValid = true;
fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
}

View file

@ -118,6 +118,9 @@ class DerivO3CPU(BaseCPU):
LQEntries = Param.Unsigned(32, "Number of load queue entries")
SQEntries = Param.Unsigned(32, "Number of store queue entries")
LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check")
LSQCheckLoads = Param.Bool(True,
"Should dependency violations be checked for loads & stores or just stores")
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
SSITSize = Param.Unsigned(1024, "Store set ID table size")

View file

@ -111,6 +111,12 @@ class LSQUnit {
/** Inserts a store instruction. */
void insertStore(DynInstPtr &store_inst);
/** Check for ordering violations in the LSQ
* @param load_idx index to start checking at
* @param inst the instruction to check
*/
Fault checkViolations(int load_idx, DynInstPtr &inst);
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
@ -366,6 +372,14 @@ class LSQUnit {
*/
unsigned SQEntries;
/** The number of places to shift addresses in the LSQ before checking
* for dependency violations
*/
unsigned depCheckShift;
/** Should loads be checked for dependency issues */
bool checkLoads;
/** The number of load instructions in the LQ. */
int loads;
/** The number of store instructions in the SQ. */

View file

@ -162,6 +162,9 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
loadQueue.resize(LQEntries);
storeQueue.resize(SQEntries);
depCheckShift = params->LSQDepCheckShift;
checkLoads = params->LSQCheckLoads;
loadHead = loadTail = 0;
storeHead = storeWBIdx = storeTail = 0;
@ -436,6 +439,55 @@ LSQUnit<Impl>::numLoadsReady()
return retval;
}
template <class Impl>
Fault
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
{
Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
/** @todo in theory you only need to check an instruction that has executed
* however, there isn't a good way in the pipeline at the moment to check
* all instructions that will execute before the store writes back. Thus,
* like the implementation that came before it, we're overly conservative.
*/
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
incrLdIdx(load_idx);
continue;
}
Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
Addr ld_eff_addr2 =
(ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
if ((inst_eff_addr2 > ld_eff_addr1 && inst_eff_addr1 < ld_eff_addr2) ||
inst_eff_addr1 == ld_eff_addr1) {
// A load/store incorrectly passed this load/store.
// Check if we already have a violator, or if it's newer
// squash and refetch.
if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
break;
DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and [sn:%lli]"
" at address %#x\n", inst->seqNum, ld_inst->seqNum,
ld_eff_addr1);
memDepViolator = ld_inst;
++lsqMemOrderViolation;
return TheISA::genMachineCheckFault();
}
incrLdIdx(load_idx);
}
return NoFault;
}
template <class Impl>
Fault
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
@ -477,39 +529,9 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
assert(inst->effAddrValid);
int load_idx = inst->lqIdx;
incrLdIdx(load_idx);
while (load_idx != loadTail) {
// Really only need to check loads that have actually executed
// @todo: For now this is extra conservative, detecting a
// violation if the addresses match assuming all accesses
// are quad word accesses.
// @todo: Fix this, magic number being used here
// @todo: Uncachable load is not executed until it reaches
// the head of the ROB. Once this if checks only the executed
// loads(as noted above), this check can be removed
if (loadQueue[load_idx]->effAddrValid &&
((loadQueue[load_idx]->effAddr >> 8)
== (inst->effAddr >> 8)) &&
!loadQueue[load_idx]->uncacheable()) {
// A load incorrectly passed this load. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
DynInstPtr violator = loadQueue[load_idx];
if (!memDepViolator ||
(violator->seqNum < memDepViolator->seqNum)) {
memDepViolator = violator;
} else {
break;
}
++lsqMemOrderViolation;
return genMachineCheckFault();
}
incrLdIdx(load_idx);
}
if (checkLoads)
return checkViolations(load_idx, inst);
}
return load_fault;
@ -564,44 +586,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
++storesToWB;
}
assert(store_inst->effAddrValid);
while (load_idx != loadTail) {
// Really only need to check loads that have actually executed
// It's safe to check all loads because effAddr is set to
// InvalAddr when the dyn inst is created.
return checkViolations(load_idx, store_inst);
// @todo: For now this is extra conservative, detecting a
// violation if the addresses match assuming all accesses
// are quad word accesses.
// @todo: Fix this, magic number being used here
// @todo: Uncachable load is not executed until it reaches
// the head of the ROB. Once this if checks only the executed
// loads(as noted above), this check can be removed
if (loadQueue[load_idx]->effAddrValid &&
((loadQueue[load_idx]->effAddr >> 8)
== (store_inst->effAddr >> 8)) &&
!loadQueue[load_idx]->uncacheable()) {
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
DynInstPtr violator = loadQueue[load_idx];
if (!memDepViolator ||
(violator->seqNum < memDepViolator->seqNum)) {
memDepViolator = violator;
} else {
break;
}
++lsqMemOrderViolation;
return genMachineCheckFault();
}
incrLdIdx(load_idx);
}
return store_fault;
}
template <class Impl>