gem5/src/cpu/o3/lsq_unit_impl.hh
Andreas Hansson 3fea59e162 MEM: Separate requests and responses for timing accesses
This patch moves send/recvTiming and send/recvTimingSnoop from the
Port base class to the MasterPort and SlavePort, and also splits them
into separate member functions for requests and responses:
send/recvTimingReq, send/recvTimingResp, and send/recvTimingSnoopReq,
send/recvTimingSnoopResp. A master port sends requests and receives
responses, and also receives snoop requests and sends snoop
responses. A slave port has the reciprocal behaviour as it receives
requests and sends responses, and sends snoop requests and receives
snoop responses.

For all MemObjects that have only master ports or slave ports (but not
both), e.g. a CPU, or a PIO device, this patch merely adds more
clarity to what kind of access is taking place. For example, a CPU
port used to call sendTiming, and will now call
sendTimingReq. Similarly, a response previously came back through
recvTiming, which is now recvTimingResp. For the modules that have
both master and slave ports, e.g. the bus, the behaviour was
previously relying on branches based on pkt->isRequest(), and this is
now replaced with a direct call to the apprioriate member function
depending on the type of access. Please note that send/recvRetry is
still shared by all the timing accessors and remains in the Port base
class for now (to maintain the current bus functionality and avoid
changing the statistics of all regressions).

The packet queue is split into a MasterPort and SlavePort version to
facilitate the use of the new timing accessors. All uses of the
PacketQueue are updated accordingly.

With this patch, the type of packet (request or response) is now well
defined for each type of access, and asserts on pkt->isRequest() and
pkt->isResponse() are now moved to the appropriate send member
functions. It is also worth noting that sendTimingSnoopReq no longer
returns a boolean, as the semantics do not alow snoop requests to be
rejected or stalled. All these assumptions are now excplicitly part of
the port interface itself.
2012-05-01 13:40:42 -04:00

1297 lines
37 KiB
C++

/*
* Copyright (c) 2010-2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#include "arch/generic/debugfaults.hh"
#include "arch/locked_mem.hh"
#include "base/str.hh"
#include "config/the_isa.hh"
#include "cpu/checker/cpu.hh"
#include "cpu/o3/lsq.hh"
#include "cpu/o3/lsq_unit.hh"
#include "debug/Activity.hh"
#include "debug/IEW.hh"
#include "debug/LSQUnit.hh"
#include "mem/packet.hh"
#include "mem/request.hh"
template<class Impl>
LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt,
LSQUnit *lsq_ptr)
: Event(Default_Pri, AutoDelete),
inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
{
}
template<class Impl>
void
LSQUnit<Impl>::WritebackEvent::process()
{
if (!lsqPtr->isSwitchedOut()) {
lsqPtr->writeback(inst, pkt);
}
if (pkt->senderState)
delete pkt->senderState;
delete pkt->req;
delete pkt;
}
template<class Impl>
const char *
LSQUnit<Impl>::WritebackEvent::description() const
{
return "Store writeback";
}
template<class Impl>
void
LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
{
LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
DynInstPtr inst = state->inst;
DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum);
DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum);
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
assert(!pkt->wasNacked());
// If this is a split access, wait until all packets are received.
if (TheISA::HasUnalignedMemAcc && !state->complete()) {
delete pkt->req;
delete pkt;
return;
}
if (isSwitchedOut() || inst->isSquashed()) {
iewStage->decrWb(inst->seqNum);
} else {
if (!state->noWB) {
if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
!state->isLoad) {
writeback(inst, pkt);
} else {
writeback(inst, state->mainPkt);
}
}
if (inst->isStore()) {
completeStore(state->idx);
}
}
if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) {
delete state->mainPkt->req;
delete state->mainPkt;
}
delete state;
delete pkt->req;
delete pkt;
}
template <class Impl>
LSQUnit<Impl>::LSQUnit()
: loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
isStoreBlocked(false), isLoadBlocked(false),
loadBlockedHandled(false), storeInFlight(false), hasPendingPkt(false)
{
}
template<class Impl>
void
LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
unsigned id)
{
cpu = cpu_ptr;
iewStage = iew_ptr;
DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
switchedOut = false;
cacheBlockMask = 0;
lsq = lsq_ptr;
lsqID = id;
// Add 1 for the sentinel entry (they are circular queues).
LQEntries = maxLQEntries + 1;
SQEntries = maxSQEntries + 1;
loadQueue.resize(LQEntries);
storeQueue.resize(SQEntries);
depCheckShift = params->LSQDepCheckShift;
checkLoads = params->LSQCheckLoads;
loadHead = loadTail = 0;
storeHead = storeWBIdx = storeTail = 0;
usedPorts = 0;
cachePorts = params->cachePorts;
retryPkt = NULL;
memDepViolator = NULL;
blockedLoadSeqNum = 0;
needsTSO = params->needsTSO;
}
template<class Impl>
std::string
LSQUnit<Impl>::name() const
{
if (Impl::MaxThreads == 1) {
return iewStage->name() + ".lsq";
} else {
return iewStage->name() + ".lsq.thread" + to_string(lsqID);
}
}
template<class Impl>
void
LSQUnit<Impl>::regStats()
{
lsqForwLoads
.name(name() + ".forwLoads")
.desc("Number of loads that had data forwarded from stores");
invAddrLoads
.name(name() + ".invAddrLoads")
.desc("Number of loads ignored due to an invalid address");
lsqSquashedLoads
.name(name() + ".squashedLoads")
.desc("Number of loads squashed");
lsqIgnoredResponses
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
lsqMemOrderViolation
.name(name() + ".memOrderViolation")
.desc("Number of memory ordering violations");
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
invAddrSwpfs
.name(name() + ".invAddrSwpfs")
.desc("Number of software prefetches ignored due to an invalid address");
lsqBlockedLoads
.name(name() + ".blockedLoads")
.desc("Number of blocked loads due to partial load-store forwarding");
lsqRescheduledLoads
.name(name() + ".rescheduledLoads")
.desc("Number of loads that were rescheduled");
lsqCacheBlocked
.name(name() + ".cacheBlocked")
.desc("Number of times an access to memory failed due to the cache being blocked");
}
template<class Impl>
void
LSQUnit<Impl>::setDcachePort(MasterPort *dcache_port)
{
dcachePort = dcache_port;
}
template<class Impl>
void
LSQUnit<Impl>::clearLQ()
{
loadQueue.clear();
}
template<class Impl>
void
LSQUnit<Impl>::clearSQ()
{
storeQueue.clear();
}
template<class Impl>
void
LSQUnit<Impl>::switchOut()
{
switchedOut = true;
for (int i = 0; i < loadQueue.size(); ++i) {
assert(!loadQueue[i]);
loadQueue[i] = NULL;
}
assert(storesToWB == 0);
}
template<class Impl>
void
LSQUnit<Impl>::takeOverFrom()
{
switchedOut = false;
loads = stores = storesToWB = 0;
loadHead = loadTail = 0;
storeHead = storeWBIdx = storeTail = 0;
usedPorts = 0;
memDepViolator = NULL;
blockedLoadSeqNum = 0;
stalled = false;
isLoadBlocked = false;
loadBlockedHandled = false;
// Just incase the memory system changed out from under us
cacheBlockMask = 0;
}
template<class Impl>
void
LSQUnit<Impl>::resizeLQ(unsigned size)
{
unsigned size_plus_sentinel = size + 1;
assert(size_plus_sentinel >= LQEntries);
if (size_plus_sentinel > LQEntries) {
while (size_plus_sentinel > loadQueue.size()) {
DynInstPtr dummy;
loadQueue.push_back(dummy);
LQEntries++;
}
} else {
LQEntries = size_plus_sentinel;
}
}
template<class Impl>
void
LSQUnit<Impl>::resizeSQ(unsigned size)
{
unsigned size_plus_sentinel = size + 1;
if (size_plus_sentinel > SQEntries) {
while (size_plus_sentinel > storeQueue.size()) {
SQEntry dummy;
storeQueue.push_back(dummy);
SQEntries++;
}
} else {
SQEntries = size_plus_sentinel;
}
}
template <class Impl>
void
LSQUnit<Impl>::insert(DynInstPtr &inst)
{
assert(inst->isMemRef());
assert(inst->isLoad() || inst->isStore());
if (inst->isLoad()) {
insertLoad(inst);
} else {
insertStore(inst);
}
inst->setInLSQ();
}
template <class Impl>
void
LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
{
assert((loadTail + 1) % LQEntries != loadHead);
assert(loads < LQEntries);
DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
load_inst->pcState(), loadTail, load_inst->seqNum);
load_inst->lqIdx = loadTail;
if (stores == 0) {
load_inst->sqIdx = -1;
} else {
load_inst->sqIdx = storeTail;
}
loadQueue[loadTail] = load_inst;
incrLdIdx(loadTail);
++loads;
}
template <class Impl>
void
LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
{
// Make sure it is not full before inserting an instruction.
assert((storeTail + 1) % SQEntries != storeHead);
assert(stores < SQEntries);
DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
store_inst->pcState(), storeTail, store_inst->seqNum);
store_inst->sqIdx = storeTail;
store_inst->lqIdx = loadTail;
storeQueue[storeTail] = SQEntry(store_inst);
incrStIdx(storeTail);
++stores;
}
template <class Impl>
typename Impl::DynInstPtr
LSQUnit<Impl>::getMemDepViolator()
{
DynInstPtr temp = memDepViolator;
memDepViolator = NULL;
return temp;
}
template <class Impl>
unsigned
LSQUnit<Impl>::numFreeEntries()
{
unsigned free_lq_entries = LQEntries - loads;
unsigned free_sq_entries = SQEntries - stores;
// Both the LQ and SQ entries have an extra dummy entry to differentiate
// empty/full conditions. Subtract 1 from the free entries.
if (free_lq_entries < free_sq_entries) {
return free_lq_entries - 1;
} else {
return free_sq_entries - 1;
}
}
template <class Impl>
int
LSQUnit<Impl>::numLoadsReady()
{
int load_idx = loadHead;
int retval = 0;
while (load_idx != loadTail) {
assert(loadQueue[load_idx]);
if (loadQueue[load_idx]->readyToIssue()) {
++retval;
}
}
return retval;
}
template <class Impl>
void
LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
{
int load_idx = loadHead;
if (!cacheBlockMask) {
assert(dcachePort);
Addr bs = dcachePort->peerBlockSize();
// Make sure we actually got a size
assert(bs != 0);
cacheBlockMask = ~(bs - 1);
}
// If this is the only load in the LSQ we don't care
if (load_idx == loadTail)
return;
incrLdIdx(load_idx);
DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
incrLdIdx(load_idx);
continue;
}
Addr load_addr = ld_inst->physEffAddr & cacheBlockMask;
DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n",
ld_inst->seqNum, load_addr, invalidate_addr);
if (load_addr == invalidate_addr) {
if (ld_inst->possibleLoadViolation) {
DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum);
// Mark the load for re-execution
ld_inst->fault = new ReExec;
} else {
// If a older load checks this and it's true
// then we might have missed the snoop
// in which case we need to invalidate to be sure
ld_inst->hitExternalSnoop = true;
}
}
incrLdIdx(load_idx);
}
return;
}
template <class Impl>
Fault
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
{
Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
/** @todo in theory you only need to check an instruction that has executed
* however, there isn't a good way in the pipeline at the moment to check
* all instructions that will execute before the store writes back. Thus,
* like the implementation that came before it, we're overly conservative.
*/
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
incrLdIdx(load_idx);
continue;
}
Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
Addr ld_eff_addr2 =
(ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
if (inst->isLoad()) {
// If this load is to the same block as an external snoop
// invalidate that we've observed then the load needs to be
// squashed as it could have newer data
if (ld_inst->hitExternalSnoop) {
if (!memDepViolator ||
ld_inst->seqNum < memDepViolator->seqNum) {
DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
"and [sn:%lli] at address %#x\n",
inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
memDepViolator = ld_inst;
++lsqMemOrderViolation;
return new GenericISA::M5PanicFault(
"Detected fault with inst [sn:%lli] and "
"[sn:%lli] at address %#x\n",
inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
}
}
// Otherwise, mark the load has a possible load violation
// and if we see a snoop before it's commited, we need to squash
ld_inst->possibleLoadViolation = true;
DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x"
" between instructions [sn:%lli] and [sn:%lli]\n",
inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
} else {
// A load/store incorrectly passed this store.
// Check if we already have a violator, or if it's newer
// squash and refetch.
if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
break;
DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "
"[sn:%lli] at address %#x\n",
inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
memDepViolator = ld_inst;
++lsqMemOrderViolation;
return new GenericISA::M5PanicFault("Detected fault with "
"inst [sn:%lli] and [sn:%lli] at address %#x\n",
inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
}
}
incrLdIdx(load_idx);
}
return NoFault;
}
template <class Impl>
Fault
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
{
using namespace TheISA;
// Execute a specific load.
Fault load_fault = NoFault;
DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
inst->pcState(), inst->seqNum);
assert(!inst->isSquashed());
load_fault = inst->initiateAcc();
if (inst->isTranslationDelayed() &&
load_fault == NoFault)
return load_fault;
// If the instruction faulted or predicated false, then we need to send it
// along to commit without the instruction completing.
if (load_fault != NoFault || inst->readPredicate() == false) {
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
// Mark it as executed unless it is an uncached load that
// needs to hit the head of commit.
if (inst->readPredicate() == false)
inst->forwardOldRegs();
DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
inst->seqNum,
(load_fault != NoFault ? "fault" : "predication"));
if (!(inst->hasRequest() && inst->uncacheable()) ||
inst->isAtCommit()) {
inst->setExecuted();
}
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
} else if (!loadBlocked()) {
assert(inst->effAddrValid);
int load_idx = inst->lqIdx;
incrLdIdx(load_idx);
if (checkLoads)
return checkViolations(load_idx, inst);
}
return load_fault;
}
template <class Impl>
Fault
LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
{
using namespace TheISA;
// Make sure that a store exists.
assert(stores != 0);
int store_idx = store_inst->sqIdx;
DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",
store_inst->pcState(), store_inst->seqNum);
assert(!store_inst->isSquashed());
// Check the recently completed loads to see if any match this store's
// address. If so, then we have a memory ordering violation.
int load_idx = store_inst->lqIdx;
Fault store_fault = store_inst->initiateAcc();
if (store_inst->isTranslationDelayed() &&
store_fault == NoFault)
return store_fault;
if (store_inst->readPredicate() == false)
store_inst->forwardOldRegs();
if (storeQueue[store_idx].size == 0) {
DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
store_inst->pcState(), store_inst->seqNum);
return store_fault;
} else if (store_inst->readPredicate() == false) {
DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",
store_inst->seqNum);
return store_fault;
}
assert(store_fault == NoFault);
if (store_inst->isStoreConditional()) {
// Store conditionals need to set themselves as able to
// writeback if we haven't had a fault by here.
storeQueue[store_idx].canWB = true;
++storesToWB;
}
return checkViolations(load_idx, store_inst);
}
template <class Impl>
void
LSQUnit<Impl>::commitLoad()
{
assert(loadQueue[loadHead]);
DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
loadQueue[loadHead]->pcState());
loadQueue[loadHead] = NULL;
incrLdIdx(loadHead);
--loads;
}
template <class Impl>
void
LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
{
assert(loads == 0 || loadQueue[loadHead]);
while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
commitLoad();
}
}
template <class Impl>
void
LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
{
assert(stores == 0 || storeQueue[storeHead].inst);
int store_idx = storeHead;
while (store_idx != storeTail) {
assert(storeQueue[store_idx].inst);
// Mark any stores that are now committed and have not yet
// been marked as able to write back.
if (!storeQueue[store_idx].canWB) {
if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
break;
}
DPRINTF(LSQUnit, "Marking store as able to write back, PC "
"%s [sn:%lli]\n",
storeQueue[store_idx].inst->pcState(),
storeQueue[store_idx].inst->seqNum);
storeQueue[store_idx].canWB = true;
++storesToWB;
}
incrStIdx(store_idx);
}
}
template <class Impl>
void
LSQUnit<Impl>::writebackPendingStore()
{
if (hasPendingPkt) {
assert(pendingPkt != NULL);
// If the cache is blocked, this will store the packet for retry.
if (sendStore(pendingPkt)) {
storePostSend(pendingPkt);
}
pendingPkt = NULL;
hasPendingPkt = false;
}
}
template <class Impl>
void
LSQUnit<Impl>::writebackStores()
{
// First writeback the second packet from any split store that didn't
// complete last cycle because there weren't enough cache ports available.
if (TheISA::HasUnalignedMemAcc) {
writebackPendingStore();
}
while (storesToWB > 0 &&
storeWBIdx != storeTail &&
storeQueue[storeWBIdx].inst &&
storeQueue[storeWBIdx].canWB &&
((!needsTSO) || (!storeInFlight)) &&
usedPorts < cachePorts) {
if (isStoreBlocked || lsq->cacheBlocked()) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
" is blocked!\n");
break;
}
// Store didn't write any data so no need to write it back to
// memory.
if (storeQueue[storeWBIdx].size == 0) {
completeStore(storeWBIdx);
incrStIdx(storeWBIdx);
continue;
}
++usedPorts;
if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
incrStIdx(storeWBIdx);
continue;
}
assert(storeQueue[storeWBIdx].req);
assert(!storeQueue[storeWBIdx].committed);
if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) {
assert(storeQueue[storeWBIdx].sreqLow);
assert(storeQueue[storeWBIdx].sreqHigh);
}
DynInstPtr inst = storeQueue[storeWBIdx].inst;
Request *req = storeQueue[storeWBIdx].req;
RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow;
RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh;
storeQueue[storeWBIdx].committed = true;
assert(!inst->memData);
inst->memData = new uint8_t[64];
memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
MemCmd command =
req->isSwap() ? MemCmd::SwapReq :
(req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq);
PacketPtr data_pkt;
PacketPtr snd_data_pkt = NULL;
LSQSenderState *state = new LSQSenderState;
state->isLoad = false;
state->idx = storeWBIdx;
state->inst = inst;
if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) {
// Build a single data packet if the store isn't split.
data_pkt = new Packet(req, command);
data_pkt->dataStatic(inst->memData);
data_pkt->senderState = state;
} else {
// Create two packets if the store is split in two.
data_pkt = new Packet(sreqLow, command);
snd_data_pkt = new Packet(sreqHigh, command);
data_pkt->dataStatic(inst->memData);
snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize());
data_pkt->senderState = state;
snd_data_pkt->senderState = state;
state->isSplit = true;
state->outstanding = 2;
// Can delete the main request now.
delete req;
req = sreqLow;
}
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
"to Addr:%#x, data:%#x [sn:%lli]\n",
storeWBIdx, inst->pcState(),
req->getPaddr(), (int)*(inst->memData),
inst->seqNum);
// @todo: Remove this SC hack once the memory system handles it.
if (inst->isStoreConditional()) {
assert(!storeQueue[storeWBIdx].isSplit);
// Disable recording the result temporarily. Writing to
// misc regs normally updates the result, but this is not
// the desired behavior when handling store conditionals.
inst->recordResult = false;
bool success = TheISA::handleLockedWrite(inst.get(), req);
inst->recordResult = true;
if (!success) {
// Instantly complete this store.
DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
"Instantly completing it.\n",
inst->seqNum);
WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
cpu->schedule(wb, curTick() + 1);
if (cpu->checker) {
// Make sure to set the LLSC data for verification
// if checker is loaded
inst->reqToVerify->setExtraData(0);
inst->completeAcc(data_pkt);
}
completeStore(storeWBIdx);
incrStIdx(storeWBIdx);
continue;
}
} else {
// Non-store conditionals do not need a writeback.
state->noWB = true;
}
bool split =
TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit;
ThreadContext *thread = cpu->tcBase(lsqID);
if (req->isMmappedIpr()) {
assert(!inst->isStoreConditional());
TheISA::handleIprWrite(thread, data_pkt);
delete data_pkt;
if (split) {
assert(snd_data_pkt->req->isMmappedIpr());
TheISA::handleIprWrite(thread, snd_data_pkt);
delete snd_data_pkt;
delete sreqLow;
delete sreqHigh;
}
delete state;
delete req;
completeStore(storeWBIdx);
incrStIdx(storeWBIdx);
} else if (!sendStore(data_pkt)) {
DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
"retry later\n",
inst->seqNum);
// Need to store the second packet, if split.
if (split) {
state->pktToSend = true;
state->pendingPacket = snd_data_pkt;
}
} else {
// If split, try to send the second packet too
if (split) {
assert(snd_data_pkt);
// Ensure there are enough ports to use.
if (usedPorts < cachePorts) {
++usedPorts;
if (sendStore(snd_data_pkt)) {
storePostSend(snd_data_pkt);
} else {
DPRINTF(IEW, "D-Cache became blocked when writing"
" [sn:%lli] second packet, will retry later\n",
inst->seqNum);
}
} else {
// Store the packet for when there's free ports.
assert(pendingPkt == NULL);
pendingPkt = snd_data_pkt;
hasPendingPkt = true;
}
} else {
// Not a split store.
storePostSend(data_pkt);
}
}
}
// Not sure this should set it to 0.
usedPorts = 0;
assert(stores >= 0 && storesToWB >= 0);
}
/*template <class Impl>
void
LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
{
list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
mshrSeqNums.end(),
seqNum);
if (mshr_it != mshrSeqNums.end()) {
mshrSeqNums.erase(mshr_it);
DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
}
}*/
template <class Impl>
void
LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
{
DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
"(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
int load_idx = loadTail;
decrLdIdx(load_idx);
while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
"[sn:%lli]\n",
loadQueue[load_idx]->pcState(),
loadQueue[load_idx]->seqNum);
if (isStalled() && load_idx == stallingLoadIdx) {
stalled = false;
stallingStoreIsn = 0;
stallingLoadIdx = 0;
}
// Clear the smart pointer to make sure it is decremented.
loadQueue[load_idx]->setSquashed();
loadQueue[load_idx] = NULL;
--loads;
// Inefficient!
loadTail = load_idx;
decrLdIdx(load_idx);
++lsqSquashedLoads;
}
if (isLoadBlocked) {
if (squashed_num < blockedLoadSeqNum) {
isLoadBlocked = false;
loadBlockedHandled = false;
blockedLoadSeqNum = 0;
}
}
if (memDepViolator && squashed_num < memDepViolator->seqNum) {
memDepViolator = NULL;
}
int store_idx = storeTail;
decrStIdx(store_idx);
while (stores != 0 &&
storeQueue[store_idx].inst->seqNum > squashed_num) {
// Instructions marked as can WB are already committed.
if (storeQueue[store_idx].canWB) {
break;
}
DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
"idx:%i [sn:%lli]\n",
storeQueue[store_idx].inst->pcState(),
store_idx, storeQueue[store_idx].inst->seqNum);
// I don't think this can happen. It should have been cleared
// by the stalling load.
if (isStalled() &&
storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
panic("Is stalled should have been cleared by stalling load!\n");
stalled = false;
stallingStoreIsn = 0;
}
// Clear the smart pointer to make sure it is decremented.
storeQueue[store_idx].inst->setSquashed();
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;
// Must delete request now that it wasn't handed off to
// memory. This is quite ugly. @todo: Figure out the proper
// place to really handle request deletes.
delete storeQueue[store_idx].req;
if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) {
delete storeQueue[store_idx].sreqLow;
delete storeQueue[store_idx].sreqHigh;
storeQueue[store_idx].sreqLow = NULL;
storeQueue[store_idx].sreqHigh = NULL;
}
storeQueue[store_idx].req = NULL;
--stores;
// Inefficient!
storeTail = store_idx;
decrStIdx(store_idx);
++lsqSquashedStores;
}
}
template <class Impl>
void
LSQUnit<Impl>::storePostSend(PacketPtr pkt)
{
if (isStalled() &&
storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
"load idx:%i\n",
stallingStoreIsn, stallingLoadIdx);
stalled = false;
stallingStoreIsn = 0;
iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
}
if (!storeQueue[storeWBIdx].inst->isStoreConditional()) {
// The store is basically completed at this time. This
// only works so long as the checker doesn't try to
// verify the value in memory for stores.
storeQueue[storeWBIdx].inst->setCompleted();
if (cpu->checker) {
cpu->checker->verify(storeQueue[storeWBIdx].inst);
}
}
if (needsTSO) {
storeInFlight = true;
}
incrStIdx(storeWBIdx);
}
template <class Impl>
void
LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
{
iewStage->wakeCPU();
// Squashed instructions do not need to complete their access.
if (inst->isSquashed()) {
iewStage->decrWb(inst->seqNum);
assert(!inst->isStore());
++lsqIgnoredResponses;
return;
}
if (!inst->isExecuted()) {
inst->setExecuted();
// Complete access to copy data to proper place.
inst->completeAcc(pkt);
}
// Need to insert instruction into queue to commit
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
// see if this load changed the PC
iewStage->checkMisprediction(inst);
}
template <class Impl>
void
LSQUnit<Impl>::completeStore(int store_idx)
{
assert(storeQueue[store_idx].inst);
storeQueue[store_idx].completed = true;
--storesToWB;
// A bit conservative because a store completion may not free up entries,
// but hopefully avoids two store completions in one cycle from making
// the CPU tick twice.
cpu->wakeCPU();
cpu->activityThisCycle();
if (store_idx == storeHead) {
do {
incrStIdx(storeHead);
--stores;
} while (storeQueue[storeHead].completed &&
storeHead != storeTail);
iewStage->updateLSQNextCycle = true;
}
DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
"idx:%i\n",
storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
if (isStalled() &&
storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
"load idx:%i\n",
stallingStoreIsn, stallingLoadIdx);
stalled = false;
stallingStoreIsn = 0;
iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
}
storeQueue[store_idx].inst->setCompleted();
if (needsTSO) {
storeInFlight = false;
}
// Tell the checker we've completed this instruction. Some stores
// may get reported twice to the checker, but the checker can
// handle that case.
if (cpu->checker) {
cpu->checker->verify(storeQueue[store_idx].inst);
}
}
template <class Impl>
bool
LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
{
if (!dcachePort->sendTimingReq(data_pkt)) {
// Need to handle becoming blocked on a store.
isStoreBlocked = true;
++lsqCacheBlocked;
assert(retryPkt == NULL);
retryPkt = data_pkt;
lsq->setRetryTid(lsqID);
return false;
}
return true;
}
template <class Impl>
void
LSQUnit<Impl>::recvRetry()
{
if (isStoreBlocked) {
DPRINTF(LSQUnit, "Receiving retry: store blocked\n");
assert(retryPkt != NULL);
LSQSenderState *state =
dynamic_cast<LSQSenderState *>(retryPkt->senderState);
if (dcachePort->sendTimingReq(retryPkt)) {
// Don't finish the store unless this is the last packet.
if (!TheISA::HasUnalignedMemAcc || !state->pktToSend ||
state->pendingPacket == retryPkt) {
state->pktToSend = false;
storePostSend(retryPkt);
}
retryPkt = NULL;
isStoreBlocked = false;
lsq->setRetryTid(InvalidThreadID);
// Send any outstanding packet.
if (TheISA::HasUnalignedMemAcc && state->pktToSend) {
assert(state->pendingPacket);
if (sendStore(state->pendingPacket)) {
storePostSend(state->pendingPacket);
}
}
} else {
// Still blocked!
++lsqCacheBlocked;
lsq->setRetryTid(lsqID);
}
} else if (isLoadBlocked) {
DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, "
"no need to resend packet.\n");
} else {
DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n");
}
}
template <class Impl>
inline void
LSQUnit<Impl>::incrStIdx(int &store_idx)
{
if (++store_idx >= SQEntries)
store_idx = 0;
}
template <class Impl>
inline void
LSQUnit<Impl>::decrStIdx(int &store_idx)
{
if (--store_idx < 0)
store_idx += SQEntries;
}
template <class Impl>
inline void
LSQUnit<Impl>::incrLdIdx(int &load_idx)
{
if (++load_idx >= LQEntries)
load_idx = 0;
}
template <class Impl>
inline void
LSQUnit<Impl>::decrLdIdx(int &load_idx)
{
if (--load_idx < 0)
load_idx += LQEntries;
}
template <class Impl>
void
LSQUnit<Impl>::dumpInsts()
{
cprintf("Load store queue: Dumping instructions.\n");
cprintf("Load queue size: %i\n", loads);
cprintf("Load queue: ");
int load_idx = loadHead;
while (load_idx != loadTail && loadQueue[load_idx]) {
cprintf("%s ", loadQueue[load_idx]->pcState());
incrLdIdx(load_idx);
}
cprintf("Store queue size: %i\n", stores);
cprintf("Store queue: ");
int store_idx = storeHead;
while (store_idx != storeTail && storeQueue[store_idx].inst) {
cprintf("%s ", storeQueue[store_idx].inst->pcState());
incrStIdx(store_idx);
}
cprintf("\n");
}