cpu: Clarify meaning of cachePorts variable in lsq_unit.hh of O3

cachePorts currently constrains the number of store packets written to the
D-Cache each cycle), but loads currently affect this variable. This leads
to unexpected congestion (e.g., setting cachePorts to a realistic 1 will
in fact allow a store to WB only if no loads have accessed the D-Cache
this cycle). In the absence of arbitration, this patch decouples how many
loads can be done per cycle from how many stores can be done per cycle.

Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
This commit is contained in:
Arthur Perais 2016-12-21 15:04:06 -06:00
parent 3a656da1a6
commit e5fb6752d6
3 changed files with 21 additions and 16 deletions

View file

@ -52,7 +52,8 @@ class DerivO3CPU(BaseCPU):
activity = Param.Unsigned(0, "Initial count") activity = Param.Unsigned(0, "Initial count")
cachePorts = Param.Unsigned(200, "Cache Ports") cacheStorePorts = Param.Unsigned(200, "Cache Ports. "
"Constrains stores only. Loads are constrained by load FUs.")
decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay") decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay")
renameToFetchDelay = Param.Cycles(1 ,"Rename to fetch delay") renameToFetchDelay = Param.Cycles(1 ,"Rename to fetch delay")

View file

@ -113,7 +113,7 @@ class LSQUnit {
* @todo: Move the number of used ports up to the LSQ level so it can * @todo: Move the number of used ports up to the LSQ level so it can
* be shared by all LSQ units. * be shared by all LSQ units.
*/ */
void tick() { usedPorts = 0; } void tick() { usedStorePorts = 0; }
/** Inserts an instruction. */ /** Inserts an instruction. */
void insert(DynInstPtr &inst); void insert(DynInstPtr &inst);
@ -429,11 +429,11 @@ class LSQUnit {
int storeTail; int storeTail;
/// @todo Consider moving to a more advanced model with write vs read ports /// @todo Consider moving to a more advanced model with write vs read ports
/** The number of cache ports available each cycle. */ /** The number of cache ports available each cycle (stores only). */
int cachePorts; int cacheStorePorts;
/** The number of used cache ports in this cycle. */ /** The number of used cache ports in this cycle by stores. */
int usedPorts; int usedStorePorts;
//list<InstSeqNum> mshrSeqNums; //list<InstSeqNum> mshrSeqNums;
@ -765,8 +765,6 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
load_inst->memData = new uint8_t[req->getSize()]; load_inst->memData = new uint8_t[req->getSize()];
} }
++usedPorts;
// if we the cache is not blocked, do cache access // if we the cache is not blocked, do cache access
bool completedFirst = false; bool completedFirst = false;
PacketPtr data_pkt = Packet::createRead(req); PacketPtr data_pkt = Packet::createRead(req);
@ -800,6 +798,11 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
state->mainPkt = data_pkt; state->mainPkt = data_pkt;
} }
// For now, load throughput is constrained by the number of
// load FUs only, and loads do not consume a cache port (only
// stores do).
// @todo We should account for cache port contention
// and arbitrate between loads and stores.
bool successful_load = true; bool successful_load = true;
if (!dcachePort->sendTimingReq(fst_data_pkt)) { if (!dcachePort->sendTimingReq(fst_data_pkt)) {
successful_load = false; successful_load = false;
@ -811,7 +814,8 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
// load will be squashed, so indicate this to the state object. // load will be squashed, so indicate this to the state object.
// The first packet will return in completeDataAccess and be // The first packet will return in completeDataAccess and be
// handled there. // handled there.
++usedPorts; // @todo We should also account for cache port contention
// here.
if (!dcachePort->sendTimingReq(snd_data_pkt)) { if (!dcachePort->sendTimingReq(snd_data_pkt)) {
// The main packet will be deleted in completeDataAccess. // The main packet will be deleted in completeDataAccess.
state->complete(); state->complete();

View file

@ -176,7 +176,7 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
depCheckShift = params->LSQDepCheckShift; depCheckShift = params->LSQDepCheckShift;
checkLoads = params->LSQCheckLoads; checkLoads = params->LSQCheckLoads;
cachePorts = params->cachePorts; cacheStorePorts = params->cacheStorePorts;
needsTSO = params->needsTSO; needsTSO = params->needsTSO;
resetState(); resetState();
@ -193,7 +193,7 @@ LSQUnit<Impl>::resetState()
storeHead = storeWBIdx = storeTail = 0; storeHead = storeWBIdx = storeTail = 0;
usedPorts = 0; usedStorePorts = 0;
retryPkt = NULL; retryPkt = NULL;
memDepViolator = NULL; memDepViolator = NULL;
@ -792,7 +792,7 @@ LSQUnit<Impl>::writebackStores()
storeQueue[storeWBIdx].inst && storeQueue[storeWBIdx].inst &&
storeQueue[storeWBIdx].canWB && storeQueue[storeWBIdx].canWB &&
((!needsTSO) || (!storeInFlight)) && ((!needsTSO) || (!storeInFlight)) &&
usedPorts < cachePorts) { usedStorePorts < cacheStorePorts) {
if (isStoreBlocked) { if (isStoreBlocked) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache" DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
@ -810,7 +810,7 @@ LSQUnit<Impl>::writebackStores()
continue; continue;
} }
++usedPorts; ++usedStorePorts;
if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
incrStIdx(storeWBIdx); incrStIdx(storeWBIdx);
@ -950,8 +950,8 @@ LSQUnit<Impl>::writebackStores()
assert(snd_data_pkt); assert(snd_data_pkt);
// Ensure there are enough ports to use. // Ensure there are enough ports to use.
if (usedPorts < cachePorts) { if (usedStorePorts < cacheStorePorts) {
++usedPorts; ++usedStorePorts;
if (sendStore(snd_data_pkt)) { if (sendStore(snd_data_pkt)) {
storePostSend(snd_data_pkt); storePostSend(snd_data_pkt);
} else { } else {
@ -975,7 +975,7 @@ LSQUnit<Impl>::writebackStores()
} }
// Not sure this should set it to 0. // Not sure this should set it to 0.
usedPorts = 0; usedStorePorts = 0;
assert(stores >= 0 && storesToWB >= 0); assert(stores >= 0 && storesToWB >= 0);
} }