cpu: Clarify meaning of cachePorts variable in lsq_unit.hh of O3
cachePorts currently constrains the number of store packets written to the D-Cache each cycle), but loads currently affect this variable. This leads to unexpected congestion (e.g., setting cachePorts to a realistic 1 will in fact allow a store to WB only if no loads have accessed the D-Cache this cycle). In the absence of arbitration, this patch decouples how many loads can be done per cycle from how many stores can be done per cycle. Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
This commit is contained in:
parent
3a656da1a6
commit
e5fb6752d6
3 changed files with 21 additions and 16 deletions
|
@ -52,7 +52,8 @@ class DerivO3CPU(BaseCPU):
|
||||||
|
|
||||||
activity = Param.Unsigned(0, "Initial count")
|
activity = Param.Unsigned(0, "Initial count")
|
||||||
|
|
||||||
cachePorts = Param.Unsigned(200, "Cache Ports")
|
cacheStorePorts = Param.Unsigned(200, "Cache Ports. "
|
||||||
|
"Constrains stores only. Loads are constrained by load FUs.")
|
||||||
|
|
||||||
decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay")
|
decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay")
|
||||||
renameToFetchDelay = Param.Cycles(1 ,"Rename to fetch delay")
|
renameToFetchDelay = Param.Cycles(1 ,"Rename to fetch delay")
|
||||||
|
|
|
@ -113,7 +113,7 @@ class LSQUnit {
|
||||||
* @todo: Move the number of used ports up to the LSQ level so it can
|
* @todo: Move the number of used ports up to the LSQ level so it can
|
||||||
* be shared by all LSQ units.
|
* be shared by all LSQ units.
|
||||||
*/
|
*/
|
||||||
void tick() { usedPorts = 0; }
|
void tick() { usedStorePorts = 0; }
|
||||||
|
|
||||||
/** Inserts an instruction. */
|
/** Inserts an instruction. */
|
||||||
void insert(DynInstPtr &inst);
|
void insert(DynInstPtr &inst);
|
||||||
|
@ -429,11 +429,11 @@ class LSQUnit {
|
||||||
int storeTail;
|
int storeTail;
|
||||||
|
|
||||||
/// @todo Consider moving to a more advanced model with write vs read ports
|
/// @todo Consider moving to a more advanced model with write vs read ports
|
||||||
/** The number of cache ports available each cycle. */
|
/** The number of cache ports available each cycle (stores only). */
|
||||||
int cachePorts;
|
int cacheStorePorts;
|
||||||
|
|
||||||
/** The number of used cache ports in this cycle. */
|
/** The number of used cache ports in this cycle by stores. */
|
||||||
int usedPorts;
|
int usedStorePorts;
|
||||||
|
|
||||||
//list<InstSeqNum> mshrSeqNums;
|
//list<InstSeqNum> mshrSeqNums;
|
||||||
|
|
||||||
|
@ -765,8 +765,6 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||||
load_inst->memData = new uint8_t[req->getSize()];
|
load_inst->memData = new uint8_t[req->getSize()];
|
||||||
}
|
}
|
||||||
|
|
||||||
++usedPorts;
|
|
||||||
|
|
||||||
// if we the cache is not blocked, do cache access
|
// if we the cache is not blocked, do cache access
|
||||||
bool completedFirst = false;
|
bool completedFirst = false;
|
||||||
PacketPtr data_pkt = Packet::createRead(req);
|
PacketPtr data_pkt = Packet::createRead(req);
|
||||||
|
@ -800,6 +798,11 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||||
state->mainPkt = data_pkt;
|
state->mainPkt = data_pkt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For now, load throughput is constrained by the number of
|
||||||
|
// load FUs only, and loads do not consume a cache port (only
|
||||||
|
// stores do).
|
||||||
|
// @todo We should account for cache port contention
|
||||||
|
// and arbitrate between loads and stores.
|
||||||
bool successful_load = true;
|
bool successful_load = true;
|
||||||
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
|
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
|
||||||
successful_load = false;
|
successful_load = false;
|
||||||
|
@ -811,7 +814,8 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||||
// load will be squashed, so indicate this to the state object.
|
// load will be squashed, so indicate this to the state object.
|
||||||
// The first packet will return in completeDataAccess and be
|
// The first packet will return in completeDataAccess and be
|
||||||
// handled there.
|
// handled there.
|
||||||
++usedPorts;
|
// @todo We should also account for cache port contention
|
||||||
|
// here.
|
||||||
if (!dcachePort->sendTimingReq(snd_data_pkt)) {
|
if (!dcachePort->sendTimingReq(snd_data_pkt)) {
|
||||||
// The main packet will be deleted in completeDataAccess.
|
// The main packet will be deleted in completeDataAccess.
|
||||||
state->complete();
|
state->complete();
|
||||||
|
|
|
@ -176,7 +176,7 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
|
||||||
|
|
||||||
depCheckShift = params->LSQDepCheckShift;
|
depCheckShift = params->LSQDepCheckShift;
|
||||||
checkLoads = params->LSQCheckLoads;
|
checkLoads = params->LSQCheckLoads;
|
||||||
cachePorts = params->cachePorts;
|
cacheStorePorts = params->cacheStorePorts;
|
||||||
needsTSO = params->needsTSO;
|
needsTSO = params->needsTSO;
|
||||||
|
|
||||||
resetState();
|
resetState();
|
||||||
|
@ -193,7 +193,7 @@ LSQUnit<Impl>::resetState()
|
||||||
|
|
||||||
storeHead = storeWBIdx = storeTail = 0;
|
storeHead = storeWBIdx = storeTail = 0;
|
||||||
|
|
||||||
usedPorts = 0;
|
usedStorePorts = 0;
|
||||||
|
|
||||||
retryPkt = NULL;
|
retryPkt = NULL;
|
||||||
memDepViolator = NULL;
|
memDepViolator = NULL;
|
||||||
|
@ -792,7 +792,7 @@ LSQUnit<Impl>::writebackStores()
|
||||||
storeQueue[storeWBIdx].inst &&
|
storeQueue[storeWBIdx].inst &&
|
||||||
storeQueue[storeWBIdx].canWB &&
|
storeQueue[storeWBIdx].canWB &&
|
||||||
((!needsTSO) || (!storeInFlight)) &&
|
((!needsTSO) || (!storeInFlight)) &&
|
||||||
usedPorts < cachePorts) {
|
usedStorePorts < cacheStorePorts) {
|
||||||
|
|
||||||
if (isStoreBlocked) {
|
if (isStoreBlocked) {
|
||||||
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
|
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
|
||||||
|
@ -810,7 +810,7 @@ LSQUnit<Impl>::writebackStores()
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
++usedPorts;
|
++usedStorePorts;
|
||||||
|
|
||||||
if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
|
if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
|
||||||
incrStIdx(storeWBIdx);
|
incrStIdx(storeWBIdx);
|
||||||
|
@ -950,8 +950,8 @@ LSQUnit<Impl>::writebackStores()
|
||||||
assert(snd_data_pkt);
|
assert(snd_data_pkt);
|
||||||
|
|
||||||
// Ensure there are enough ports to use.
|
// Ensure there are enough ports to use.
|
||||||
if (usedPorts < cachePorts) {
|
if (usedStorePorts < cacheStorePorts) {
|
||||||
++usedPorts;
|
++usedStorePorts;
|
||||||
if (sendStore(snd_data_pkt)) {
|
if (sendStore(snd_data_pkt)) {
|
||||||
storePostSend(snd_data_pkt);
|
storePostSend(snd_data_pkt);
|
||||||
} else {
|
} else {
|
||||||
|
@ -975,7 +975,7 @@ LSQUnit<Impl>::writebackStores()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not sure this should set it to 0.
|
// Not sure this should set it to 0.
|
||||||
usedPorts = 0;
|
usedStorePorts = 0;
|
||||||
|
|
||||||
assert(stores >= 0 && storesToWB >= 0);
|
assert(stores >= 0 && storesToWB >= 0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue