cpu: Fix cache blocked load behavior in o3 cpu
This patch fixes the load blocked/replay mechanism in the o3 cpu. Rather than flushing the entire pipeline, this patch replays loads once the cache becomes unblocked. Additionally, deferred memory instructions (loads which had conflicting stores), when replayed would not respect the number of functional units (only respected issue width). This patch also corrects that. Improvements over 20% have been observed on a microbenchmark designed to exercise this behavior.
This commit is contained in:
parent
283935a6f0
commit
4f13f676aa
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010-2012 ARM Limited
|
* Copyright (c) 2010-2012, 2014 ARM Limited
|
||||||
* All rights reserved
|
* All rights reserved
|
||||||
*
|
*
|
||||||
* The license below extends only to copyright in the software and shall
|
* The license below extends only to copyright in the software and shall
|
||||||
|
@ -181,6 +181,12 @@ class DefaultIEW
|
||||||
/** Re-executes all rescheduled memory instructions. */
|
/** Re-executes all rescheduled memory instructions. */
|
||||||
void replayMemInst(DynInstPtr &inst);
|
void replayMemInst(DynInstPtr &inst);
|
||||||
|
|
||||||
|
/** Moves memory instruction onto the list of cache blocked instructions */
|
||||||
|
void blockMemInst(DynInstPtr &inst);
|
||||||
|
|
||||||
|
/** Notifies that the cache has become unblocked */
|
||||||
|
void cacheUnblocked();
|
||||||
|
|
||||||
/** Sends an instruction to commit through the time buffer. */
|
/** Sends an instruction to commit through the time buffer. */
|
||||||
void instToCommit(DynInstPtr &inst);
|
void instToCommit(DynInstPtr &inst);
|
||||||
|
|
||||||
|
@ -233,11 +239,6 @@ class DefaultIEW
|
||||||
*/
|
*/
|
||||||
void squashDueToMemOrder(DynInstPtr &inst, ThreadID tid);
|
void squashDueToMemOrder(DynInstPtr &inst, ThreadID tid);
|
||||||
|
|
||||||
/** Sends commit proper information for a squash due to memory becoming
|
|
||||||
* blocked (younger issued instructions must be retried).
|
|
||||||
*/
|
|
||||||
void squashDueToMemBlocked(DynInstPtr &inst, ThreadID tid);
|
|
||||||
|
|
||||||
/** Sets Dispatch to blocked, and signals back to other stages to block. */
|
/** Sets Dispatch to blocked, and signals back to other stages to block. */
|
||||||
void block(ThreadID tid);
|
void block(ThreadID tid);
|
||||||
|
|
||||||
|
|
|
@ -528,29 +528,6 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, ThreadID tid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void
|
|
||||||
DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, ThreadID tid)
|
|
||||||
{
|
|
||||||
DPRINTF(IEW, "[tid:%i]: Memory blocked, squashing load and younger insts, "
|
|
||||||
"PC: %s [sn:%i].\n", tid, inst->pcState(), inst->seqNum);
|
|
||||||
if (!toCommit->squash[tid] ||
|
|
||||||
inst->seqNum < toCommit->squashedSeqNum[tid]) {
|
|
||||||
toCommit->squash[tid] = true;
|
|
||||||
|
|
||||||
toCommit->squashedSeqNum[tid] = inst->seqNum;
|
|
||||||
toCommit->pc[tid] = inst->pcState();
|
|
||||||
toCommit->mispredictInst[tid] = NULL;
|
|
||||||
|
|
||||||
// Must include the broadcasted SN in the squash.
|
|
||||||
toCommit->includeSquashInst[tid] = true;
|
|
||||||
|
|
||||||
ldstQueue.setLoadBlockedHandled(tid);
|
|
||||||
|
|
||||||
wroteToTimeBuffer = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void
|
void
|
||||||
DefaultIEW<Impl>::block(ThreadID tid)
|
DefaultIEW<Impl>::block(ThreadID tid)
|
||||||
|
@ -608,6 +585,20 @@ DefaultIEW<Impl>::replayMemInst(DynInstPtr &inst)
|
||||||
instQueue.replayMemInst(inst);
|
instQueue.replayMemInst(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void
|
||||||
|
DefaultIEW<Impl>::blockMemInst(DynInstPtr& inst)
|
||||||
|
{
|
||||||
|
instQueue.blockMemInst(inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void
|
||||||
|
DefaultIEW<Impl>::cacheUnblocked()
|
||||||
|
{
|
||||||
|
instQueue.cacheUnblocked();
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void
|
void
|
||||||
DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
|
DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
|
||||||
|
@ -1376,15 +1367,6 @@ DefaultIEW<Impl>::executeInsts()
|
||||||
squashDueToMemOrder(violator, tid);
|
squashDueToMemOrder(violator, tid);
|
||||||
|
|
||||||
++memOrderViolationEvents;
|
++memOrderViolationEvents;
|
||||||
} else if (ldstQueue.loadBlocked(tid) &&
|
|
||||||
!ldstQueue.isLoadBlockedHandled(tid)) {
|
|
||||||
fetchRedirect[tid] = true;
|
|
||||||
|
|
||||||
DPRINTF(IEW, "Load operation couldn't execute because the "
|
|
||||||
"memory system is blocked. PC: %s [sn:%lli]\n",
|
|
||||||
inst->pcState(), inst->seqNum);
|
|
||||||
|
|
||||||
squashDueToMemBlocked(inst, tid);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Reset any state associated with redirects that will not
|
// Reset any state associated with redirects that will not
|
||||||
|
@ -1403,17 +1385,6 @@ DefaultIEW<Impl>::executeInsts()
|
||||||
|
|
||||||
++memOrderViolationEvents;
|
++memOrderViolationEvents;
|
||||||
}
|
}
|
||||||
if (ldstQueue.loadBlocked(tid) &&
|
|
||||||
!ldstQueue.isLoadBlockedHandled(tid)) {
|
|
||||||
DPRINTF(IEW, "Load operation couldn't execute because the "
|
|
||||||
"memory system is blocked. PC: %s [sn:%lli]\n",
|
|
||||||
inst->pcState(), inst->seqNum);
|
|
||||||
DPRINTF(IEW, "Blocked load will not be handled because "
|
|
||||||
"already squashing\n");
|
|
||||||
|
|
||||||
ldstQueue.setLoadBlockedHandled(tid);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2012 ARM Limited
|
* Copyright (c) 2011-2012, 2014 ARM Limited
|
||||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
|
@ -188,11 +188,16 @@ class InstructionQueue
|
||||||
*/
|
*/
|
||||||
DynInstPtr getInstToExecute();
|
DynInstPtr getInstToExecute();
|
||||||
|
|
||||||
/** Returns a memory instruction that was referred due to a delayed DTB
|
/** Gets a memory instruction that was referred due to a delayed DTB
|
||||||
* translation if it is now ready to execute.
|
* translation if it is now ready to execute. NULL if none available.
|
||||||
*/
|
*/
|
||||||
DynInstPtr getDeferredMemInstToExecute();
|
DynInstPtr getDeferredMemInstToExecute();
|
||||||
|
|
||||||
|
/** Gets a memory instruction that was blocked on the cache. NULL if none
|
||||||
|
* available.
|
||||||
|
*/
|
||||||
|
DynInstPtr getBlockedMemInstToExecute();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Records the instruction as the producer of a register without
|
* Records the instruction as the producer of a register without
|
||||||
* adding it to the rest of the IQ.
|
* adding it to the rest of the IQ.
|
||||||
|
@ -242,6 +247,12 @@ class InstructionQueue
|
||||||
*/
|
*/
|
||||||
void deferMemInst(DynInstPtr &deferred_inst);
|
void deferMemInst(DynInstPtr &deferred_inst);
|
||||||
|
|
||||||
|
/** Defers a memory instruction when it is cache blocked. */
|
||||||
|
void blockMemInst(DynInstPtr &blocked_inst);
|
||||||
|
|
||||||
|
/** Notify instruction queue that a previous blockage has resolved */
|
||||||
|
void cacheUnblocked();
|
||||||
|
|
||||||
/** Indicates an ordering violation between a store and a load. */
|
/** Indicates an ordering violation between a store and a load. */
|
||||||
void violation(DynInstPtr &store, DynInstPtr &faulting_load);
|
void violation(DynInstPtr &store, DynInstPtr &faulting_load);
|
||||||
|
|
||||||
|
@ -308,6 +319,14 @@ class InstructionQueue
|
||||||
*/
|
*/
|
||||||
std::list<DynInstPtr> deferredMemInsts;
|
std::list<DynInstPtr> deferredMemInsts;
|
||||||
|
|
||||||
|
/** List of instructions that have been cache blocked. */
|
||||||
|
std::list<DynInstPtr> blockedMemInsts;
|
||||||
|
|
||||||
|
/** List of instructions that were cache blocked, but a retry has been seen
|
||||||
|
* since, so they can now be retried. May fail again go on the blocked list.
|
||||||
|
*/
|
||||||
|
std::list<DynInstPtr> retryMemInsts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Struct for comparing entries to be added to the priority queue.
|
* Struct for comparing entries to be added to the priority queue.
|
||||||
* This gives reverse ordering to the instructions in terms of
|
* This gives reverse ordering to the instructions in terms of
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2013 ARM Limited
|
* Copyright (c) 2011-2014 ARM Limited
|
||||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
|
@ -413,6 +413,8 @@ InstructionQueue<Impl>::resetState()
|
||||||
nonSpecInsts.clear();
|
nonSpecInsts.clear();
|
||||||
listOrder.clear();
|
listOrder.clear();
|
||||||
deferredMemInsts.clear();
|
deferredMemInsts.clear();
|
||||||
|
blockedMemInsts.clear();
|
||||||
|
retryMemInsts.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -734,13 +736,14 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
|
|
||||||
IssueStruct *i2e_info = issueToExecuteQueue->access(0);
|
IssueStruct *i2e_info = issueToExecuteQueue->access(0);
|
||||||
|
|
||||||
DynInstPtr deferred_mem_inst;
|
DynInstPtr mem_inst;
|
||||||
int total_deferred_mem_issued = 0;
|
while (mem_inst = getDeferredMemInstToExecute()) {
|
||||||
while (total_deferred_mem_issued < totalWidth &&
|
addReadyMemInst(mem_inst);
|
||||||
(deferred_mem_inst = getDeferredMemInstToExecute()) != 0) {
|
}
|
||||||
issueToExecuteQueue->access(0)->size++;
|
|
||||||
instsToExecute.push_back(deferred_mem_inst);
|
// See if any cache blocked instructions are able to be executed
|
||||||
total_deferred_mem_issued++;
|
while (mem_inst = getBlockedMemInstToExecute()) {
|
||||||
|
addReadyMemInst(mem_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Have iterator to head of the list
|
// Have iterator to head of the list
|
||||||
|
@ -751,12 +754,11 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
// Increment the iterator.
|
// Increment the iterator.
|
||||||
// This will avoid trying to schedule a certain op class if there are no
|
// This will avoid trying to schedule a certain op class if there are no
|
||||||
// FUs that handle it.
|
// FUs that handle it.
|
||||||
|
int total_issued = 0;
|
||||||
ListOrderIt order_it = listOrder.begin();
|
ListOrderIt order_it = listOrder.begin();
|
||||||
ListOrderIt order_end_it = listOrder.end();
|
ListOrderIt order_end_it = listOrder.end();
|
||||||
int total_issued = 0;
|
|
||||||
|
|
||||||
while (total_issued < (totalWidth - total_deferred_mem_issued) &&
|
while (total_issued < totalWidth && order_it != order_end_it) {
|
||||||
order_it != order_end_it) {
|
|
||||||
OpClass op_class = (*order_it).queueType;
|
OpClass op_class = (*order_it).queueType;
|
||||||
|
|
||||||
assert(!readyInsts[op_class].empty());
|
assert(!readyInsts[op_class].empty());
|
||||||
|
@ -874,7 +876,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
// @todo If the way deferred memory instructions are handeled due to
|
// @todo If the way deferred memory instructions are handeled due to
|
||||||
// translation changes then the deferredMemInsts condition should be removed
|
// translation changes then the deferredMemInsts condition should be removed
|
||||||
// from the code below.
|
// from the code below.
|
||||||
if (total_issued || total_deferred_mem_issued || deferredMemInsts.size()) {
|
if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
|
||||||
cpu->activityThisCycle();
|
cpu->activityThisCycle();
|
||||||
} else {
|
} else {
|
||||||
DPRINTF(IQ, "Not able to schedule any instructions.\n");
|
DPRINTF(IQ, "Not able to schedule any instructions.\n");
|
||||||
|
@ -1050,7 +1052,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
InstructionQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
|
InstructionQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
|
||||||
{
|
{
|
||||||
memDepUnit[replay_inst->threadNumber].replay(replay_inst);
|
memDepUnit[replay_inst->threadNumber].replay();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -1077,6 +1079,27 @@ InstructionQueue<Impl>::deferMemInst(DynInstPtr &deferred_inst)
|
||||||
deferredMemInsts.push_back(deferred_inst);
|
deferredMemInsts.push_back(deferred_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
InstructionQueue<Impl>::blockMemInst(DynInstPtr &blocked_inst)
|
||||||
|
{
|
||||||
|
blocked_inst->translationStarted(false);
|
||||||
|
blocked_inst->translationCompleted(false);
|
||||||
|
|
||||||
|
blocked_inst->clearIssued();
|
||||||
|
blocked_inst->clearCanIssue();
|
||||||
|
blockedMemInsts.push_back(blocked_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
InstructionQueue<Impl>::cacheUnblocked()
|
||||||
|
{
|
||||||
|
retryMemInsts.splice(retryMemInsts.end(), blockedMemInsts);
|
||||||
|
// Get the CPU ticking again
|
||||||
|
cpu->wakeCPU();
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
typename Impl::DynInstPtr
|
typename Impl::DynInstPtr
|
||||||
InstructionQueue<Impl>::getDeferredMemInstToExecute()
|
InstructionQueue<Impl>::getDeferredMemInstToExecute()
|
||||||
|
@ -1084,12 +1107,25 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
|
||||||
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
|
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
|
||||||
++it) {
|
++it) {
|
||||||
if ((*it)->translationCompleted() || (*it)->isSquashed()) {
|
if ((*it)->translationCompleted() || (*it)->isSquashed()) {
|
||||||
DynInstPtr ret = *it;
|
DynInstPtr mem_inst = *it;
|
||||||
deferredMemInsts.erase(it);
|
deferredMemInsts.erase(it);
|
||||||
return ret;
|
return mem_inst;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NULL;
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
typename Impl::DynInstPtr
|
||||||
|
InstructionQueue<Impl>::getBlockedMemInstToExecute()
|
||||||
|
{
|
||||||
|
if (retryMemInsts.empty()) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
DynInstPtr mem_inst = retryMemInsts.front();
|
||||||
|
retryMemInsts.pop_front();
|
||||||
|
return mem_inst;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2012 ARM Limited
|
* Copyright (c) 2011-2012, 2014 ARM Limited
|
||||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||||
* All rights reserved
|
* All rights reserved
|
||||||
*
|
*
|
||||||
|
@ -151,18 +151,6 @@ class LSQ {
|
||||||
bool violation(ThreadID tid)
|
bool violation(ThreadID tid)
|
||||||
{ return thread[tid].violation(); }
|
{ return thread[tid].violation(); }
|
||||||
|
|
||||||
/** Returns if a load is blocked due to the memory system for a specific
|
|
||||||
* thread.
|
|
||||||
*/
|
|
||||||
bool loadBlocked(ThreadID tid)
|
|
||||||
{ return thread[tid].loadBlocked(); }
|
|
||||||
|
|
||||||
bool isLoadBlockedHandled(ThreadID tid)
|
|
||||||
{ return thread[tid].isLoadBlockedHandled(); }
|
|
||||||
|
|
||||||
void setLoadBlockedHandled(ThreadID tid)
|
|
||||||
{ thread[tid].setLoadBlockedHandled(); }
|
|
||||||
|
|
||||||
/** Gets the instruction that caused the memory ordering violation. */
|
/** Gets the instruction that caused the memory ordering violation. */
|
||||||
DynInstPtr getMemDepViolator(ThreadID tid)
|
DynInstPtr getMemDepViolator(ThreadID tid)
|
||||||
{ return thread[tid].getMemDepViolator(); }
|
{ return thread[tid].getMemDepViolator(); }
|
||||||
|
@ -277,15 +265,6 @@ class LSQ {
|
||||||
bool willWB(ThreadID tid)
|
bool willWB(ThreadID tid)
|
||||||
{ return thread[tid].willWB(); }
|
{ return thread[tid].willWB(); }
|
||||||
|
|
||||||
/** Returns if the cache is currently blocked. */
|
|
||||||
bool cacheBlocked() const
|
|
||||||
{ return retryTid != InvalidThreadID; }
|
|
||||||
|
|
||||||
/** Sets the retry thread id, indicating that one of the LSQUnits
|
|
||||||
* tried to access the cache but the cache was blocked. */
|
|
||||||
void setRetryTid(ThreadID tid)
|
|
||||||
{ retryTid = tid; }
|
|
||||||
|
|
||||||
/** Debugging function to print out all instructions. */
|
/** Debugging function to print out all instructions. */
|
||||||
void dumpInsts() const;
|
void dumpInsts() const;
|
||||||
/** Debugging function to print out instructions from a specific thread. */
|
/** Debugging function to print out instructions from a specific thread. */
|
||||||
|
@ -348,10 +327,6 @@ class LSQ {
|
||||||
|
|
||||||
/** Number of Threads. */
|
/** Number of Threads. */
|
||||||
ThreadID numThreads;
|
ThreadID numThreads;
|
||||||
|
|
||||||
/** The thread id of the LSQ Unit that is currently waiting for a
|
|
||||||
* retry. */
|
|
||||||
ThreadID retryTid;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2012 ARM Limited
|
* Copyright (c) 2011-2012, 2014 ARM Limited
|
||||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||||
* All rights reserved
|
* All rights reserved
|
||||||
*
|
*
|
||||||
|
@ -62,8 +62,7 @@ LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
|
||||||
: cpu(cpu_ptr), iewStage(iew_ptr),
|
: cpu(cpu_ptr), iewStage(iew_ptr),
|
||||||
LQEntries(params->LQEntries),
|
LQEntries(params->LQEntries),
|
||||||
SQEntries(params->SQEntries),
|
SQEntries(params->SQEntries),
|
||||||
numThreads(params->numThreads),
|
numThreads(params->numThreads)
|
||||||
retryTid(-1)
|
|
||||||
{
|
{
|
||||||
assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
|
assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
|
||||||
|
|
||||||
|
@ -175,11 +174,6 @@ LSQ<Impl>::isDrained() const
|
||||||
drained = false;
|
drained = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (retryTid != InvalidThreadID) {
|
|
||||||
DPRINTF(Drain, "Not drained, the LSQ has blocked the caches.\n");
|
|
||||||
drained = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return drained;
|
return drained;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -338,16 +332,11 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
LSQ<Impl>::recvRetry()
|
LSQ<Impl>::recvRetry()
|
||||||
{
|
{
|
||||||
if (retryTid == InvalidThreadID)
|
iewStage->cacheUnblocked();
|
||||||
{
|
|
||||||
//Squashed, so drop it
|
for (ThreadID tid : *activeThreads) {
|
||||||
return;
|
thread[tid].recvRetry();
|
||||||
}
|
}
|
||||||
int curr_retry_tid = retryTid;
|
|
||||||
// Speculatively clear the retry Tid. This will get set again if
|
|
||||||
// the LSQUnit was unable to complete its access.
|
|
||||||
retryTid = -1;
|
|
||||||
thread[curr_retry_tid].recvRetry();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012-2013 ARM Limited
|
* Copyright (c) 2012-2014 ARM Limited
|
||||||
* All rights reserved
|
* All rights reserved
|
||||||
*
|
*
|
||||||
* The license below extends only to copyright in the software and shall
|
* The license below extends only to copyright in the software and shall
|
||||||
|
@ -183,22 +183,6 @@ class LSQUnit {
|
||||||
/** Returns the memory ordering violator. */
|
/** Returns the memory ordering violator. */
|
||||||
DynInstPtr getMemDepViolator();
|
DynInstPtr getMemDepViolator();
|
||||||
|
|
||||||
/** Returns if a load became blocked due to the memory system. */
|
|
||||||
bool loadBlocked()
|
|
||||||
{ return isLoadBlocked; }
|
|
||||||
|
|
||||||
/** Clears the signal that a load became blocked. */
|
|
||||||
void clearLoadBlocked()
|
|
||||||
{ isLoadBlocked = false; }
|
|
||||||
|
|
||||||
/** Returns if the blocked load was handled. */
|
|
||||||
bool isLoadBlockedHandled()
|
|
||||||
{ return loadBlockedHandled; }
|
|
||||||
|
|
||||||
/** Records the blocked load as being handled. */
|
|
||||||
void setLoadBlockedHandled()
|
|
||||||
{ loadBlockedHandled = true; }
|
|
||||||
|
|
||||||
/** Returns the number of free LQ entries. */
|
/** Returns the number of free LQ entries. */
|
||||||
unsigned numFreeLoadEntries();
|
unsigned numFreeLoadEntries();
|
||||||
|
|
||||||
|
@ -298,7 +282,7 @@ class LSQUnit {
|
||||||
/** Default constructor. */
|
/** Default constructor. */
|
||||||
LSQSenderState()
|
LSQSenderState()
|
||||||
: mainPkt(NULL), pendingPacket(NULL), outstanding(1),
|
: mainPkt(NULL), pendingPacket(NULL), outstanding(1),
|
||||||
noWB(false), isSplit(false), pktToSend(false)
|
noWB(false), isSplit(false), pktToSend(false), cacheBlocked(false)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
/** Instruction who initiated the access to memory. */
|
/** Instruction who initiated the access to memory. */
|
||||||
|
@ -319,6 +303,8 @@ class LSQUnit {
|
||||||
bool isSplit;
|
bool isSplit;
|
||||||
/** Whether or not there is a packet that needs sending. */
|
/** Whether or not there is a packet that needs sending. */
|
||||||
bool pktToSend;
|
bool pktToSend;
|
||||||
|
/** Whether or not the second packet of this split load was blocked */
|
||||||
|
bool cacheBlocked;
|
||||||
|
|
||||||
/** Completes a packet and returns whether the access is finished. */
|
/** Completes a packet and returns whether the access is finished. */
|
||||||
inline bool complete() { return --outstanding == 0; }
|
inline bool complete() { return --outstanding == 0; }
|
||||||
|
@ -473,18 +459,9 @@ class LSQUnit {
|
||||||
/** Whehter or not a store is blocked due to the memory system. */
|
/** Whehter or not a store is blocked due to the memory system. */
|
||||||
bool isStoreBlocked;
|
bool isStoreBlocked;
|
||||||
|
|
||||||
/** Whether or not a load is blocked due to the memory system. */
|
|
||||||
bool isLoadBlocked;
|
|
||||||
|
|
||||||
/** Has the blocked load been handled. */
|
|
||||||
bool loadBlockedHandled;
|
|
||||||
|
|
||||||
/** Whether or not a store is in flight. */
|
/** Whether or not a store is in flight. */
|
||||||
bool storeInFlight;
|
bool storeInFlight;
|
||||||
|
|
||||||
/** The sequence number of the blocked load. */
|
|
||||||
InstSeqNum blockedLoadSeqNum;
|
|
||||||
|
|
||||||
/** The oldest load that caused a memory ordering violation. */
|
/** The oldest load that caused a memory ordering violation. */
|
||||||
DynInstPtr memDepViolator;
|
DynInstPtr memDepViolator;
|
||||||
|
|
||||||
|
@ -706,8 +683,10 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||||
memcpy(data, storeQueue[store_idx].data + shift_amt,
|
memcpy(data, storeQueue[store_idx].data + shift_amt,
|
||||||
req->getSize());
|
req->getSize());
|
||||||
|
|
||||||
assert(!load_inst->memData);
|
// Allocate memory if this is the first time a load is issued.
|
||||||
load_inst->memData = new uint8_t[req->getSize()];
|
if (!load_inst->memData) {
|
||||||
|
load_inst->memData = new uint8_t[req->getSize()];
|
||||||
|
}
|
||||||
if (storeQueue[store_idx].isAllZeros)
|
if (storeQueue[store_idx].isAllZeros)
|
||||||
memset(load_inst->memData, 0, req->getSize());
|
memset(load_inst->memData, 0, req->getSize());
|
||||||
else
|
else
|
||||||
|
@ -788,116 +767,105 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||||
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
|
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
|
||||||
load_inst->seqNum, load_inst->pcState());
|
load_inst->seqNum, load_inst->pcState());
|
||||||
|
|
||||||
assert(!load_inst->memData);
|
// Allocate memory if this is the first time a load is issued.
|
||||||
load_inst->memData = new uint8_t[req->getSize()];
|
if (!load_inst->memData) {
|
||||||
|
load_inst->memData = new uint8_t[req->getSize()];
|
||||||
|
}
|
||||||
|
|
||||||
++usedPorts;
|
++usedPorts;
|
||||||
|
|
||||||
// if we the cache is not blocked, do cache access
|
// if we the cache is not blocked, do cache access
|
||||||
bool completedFirst = false;
|
bool completedFirst = false;
|
||||||
if (!lsq->cacheBlocked()) {
|
MemCmd command = req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
|
||||||
MemCmd command =
|
PacketPtr data_pkt = new Packet(req, command);
|
||||||
req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
|
PacketPtr fst_data_pkt = NULL;
|
||||||
PacketPtr data_pkt = new Packet(req, command);
|
PacketPtr snd_data_pkt = NULL;
|
||||||
PacketPtr fst_data_pkt = NULL;
|
|
||||||
PacketPtr snd_data_pkt = NULL;
|
|
||||||
|
|
||||||
data_pkt->dataStatic(load_inst->memData);
|
data_pkt->dataStatic(load_inst->memData);
|
||||||
|
|
||||||
LSQSenderState *state = new LSQSenderState;
|
LSQSenderState *state = new LSQSenderState;
|
||||||
state->isLoad = true;
|
state->isLoad = true;
|
||||||
state->idx = load_idx;
|
state->idx = load_idx;
|
||||||
state->inst = load_inst;
|
state->inst = load_inst;
|
||||||
data_pkt->senderState = state;
|
data_pkt->senderState = state;
|
||||||
|
|
||||||
if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
|
if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
|
||||||
|
// Point the first packet at the main data packet.
|
||||||
|
fst_data_pkt = data_pkt;
|
||||||
|
} else {
|
||||||
|
// Create the split packets.
|
||||||
|
fst_data_pkt = new Packet(sreqLow, command);
|
||||||
|
snd_data_pkt = new Packet(sreqHigh, command);
|
||||||
|
|
||||||
// Point the first packet at the main data packet.
|
fst_data_pkt->dataStatic(load_inst->memData);
|
||||||
fst_data_pkt = data_pkt;
|
snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
|
||||||
} else {
|
|
||||||
|
|
||||||
// Create the split packets.
|
fst_data_pkt->senderState = state;
|
||||||
fst_data_pkt = new Packet(sreqLow, command);
|
snd_data_pkt->senderState = state;
|
||||||
snd_data_pkt = new Packet(sreqHigh, command);
|
|
||||||
|
|
||||||
fst_data_pkt->dataStatic(load_inst->memData);
|
state->isSplit = true;
|
||||||
snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
|
state->outstanding = 2;
|
||||||
|
state->mainPkt = data_pkt;
|
||||||
|
}
|
||||||
|
|
||||||
fst_data_pkt->senderState = state;
|
bool successful_load = true;
|
||||||
snd_data_pkt->senderState = state;
|
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
|
||||||
|
successful_load = false;
|
||||||
|
} else if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
||||||
|
completedFirst = true;
|
||||||
|
|
||||||
state->isSplit = true;
|
// The first packet was sent without problems, so send this one
|
||||||
state->outstanding = 2;
|
// too. If there is a problem with this packet then the whole
|
||||||
state->mainPkt = data_pkt;
|
// load will be squashed, so indicate this to the state object.
|
||||||
}
|
// The first packet will return in completeDataAccess and be
|
||||||
|
// handled there.
|
||||||
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
|
++usedPorts;
|
||||||
// Delete state and data packet because a load retry
|
if (!dcachePort->sendTimingReq(snd_data_pkt)) {
|
||||||
// initiates a pipeline restart; it does not retry.
|
// The main packet will be deleted in completeDataAccess.
|
||||||
delete state;
|
state->complete();
|
||||||
delete data_pkt->req;
|
// Signify to 1st half that the 2nd half was blocked via state
|
||||||
delete data_pkt;
|
state->cacheBlocked = true;
|
||||||
if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
successful_load = false;
|
||||||
delete fst_data_pkt->req;
|
|
||||||
delete fst_data_pkt;
|
|
||||||
delete snd_data_pkt->req;
|
|
||||||
delete snd_data_pkt;
|
|
||||||
sreqLow = NULL;
|
|
||||||
sreqHigh = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
req = NULL;
|
|
||||||
|
|
||||||
// If the access didn't succeed, tell the LSQ by setting
|
|
||||||
// the retry thread id.
|
|
||||||
lsq->setRetryTid(lsqID);
|
|
||||||
} else if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
|
||||||
completedFirst = true;
|
|
||||||
|
|
||||||
// The first packet was sent without problems, so send this one
|
|
||||||
// too. If there is a problem with this packet then the whole
|
|
||||||
// load will be squashed, so indicate this to the state object.
|
|
||||||
// The first packet will return in completeDataAccess and be
|
|
||||||
// handled there.
|
|
||||||
++usedPorts;
|
|
||||||
if (!dcachePort->sendTimingReq(snd_data_pkt)) {
|
|
||||||
|
|
||||||
// The main packet will be deleted in completeDataAccess.
|
|
||||||
delete snd_data_pkt->req;
|
|
||||||
delete snd_data_pkt;
|
|
||||||
|
|
||||||
state->complete();
|
|
||||||
|
|
||||||
req = NULL;
|
|
||||||
sreqHigh = NULL;
|
|
||||||
|
|
||||||
lsq->setRetryTid(lsqID);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the cache was blocked, or has become blocked due to the access,
|
// If the cache was blocked, or has become blocked due to the access,
|
||||||
// handle it.
|
// handle it.
|
||||||
if (lsq->cacheBlocked()) {
|
if (!successful_load) {
|
||||||
if (req)
|
if (!sreqLow) {
|
||||||
|
// Packet wasn't split, just delete main packet info
|
||||||
|
delete state;
|
||||||
delete req;
|
delete req;
|
||||||
if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
|
delete data_pkt;
|
||||||
delete sreqLow;
|
}
|
||||||
delete sreqHigh;
|
|
||||||
|
if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
||||||
|
if (!completedFirst) {
|
||||||
|
// Split packet, but first failed. Delete all state.
|
||||||
|
delete state;
|
||||||
|
delete req;
|
||||||
|
delete data_pkt;
|
||||||
|
delete fst_data_pkt;
|
||||||
|
delete snd_data_pkt;
|
||||||
|
delete sreqLow;
|
||||||
|
delete sreqHigh;
|
||||||
|
sreqLow = NULL;
|
||||||
|
sreqHigh = NULL;
|
||||||
|
} else {
|
||||||
|
// Can't delete main packet data or state because first packet
|
||||||
|
// was sent to the memory system
|
||||||
|
delete data_pkt;
|
||||||
|
delete req;
|
||||||
|
delete sreqHigh;
|
||||||
|
delete snd_data_pkt;
|
||||||
|
sreqHigh = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
++lsqCacheBlocked;
|
++lsqCacheBlocked;
|
||||||
|
|
||||||
// There's an older load that's already going to squash.
|
iewStage->blockMemInst(load_inst);
|
||||||
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
|
|
||||||
return NoFault;
|
|
||||||
|
|
||||||
// Record that the load was blocked due to memory. This
|
|
||||||
// load will squash all instructions after it, be
|
|
||||||
// refetched, and re-executed.
|
|
||||||
isLoadBlocked = true;
|
|
||||||
loadBlockedHandled = false;
|
|
||||||
blockedLoadSeqNum = load_inst->seqNum;
|
|
||||||
// No fault occurred, even though the interface is blocked.
|
// No fault occurred, even though the interface is blocked.
|
||||||
return NoFault;
|
return NoFault;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010-2013 ARM Limited
|
* Copyright (c) 2010-2014 ARM Limited
|
||||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||||
* All rights reserved
|
* All rights reserved
|
||||||
*
|
*
|
||||||
|
@ -99,7 +99,16 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
||||||
DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum);
|
DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum);
|
||||||
DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum);
|
DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum);
|
||||||
|
|
||||||
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
|
if (state->cacheBlocked) {
|
||||||
|
// This is the first half of a previous split load,
|
||||||
|
// where the 2nd half blocked, ignore this response
|
||||||
|
DPRINTF(IEW, "[sn:%lli]: Response from first half of earlier "
|
||||||
|
"blocked split load recieved. Ignoring.\n", inst->seqNum);
|
||||||
|
delete state;
|
||||||
|
delete pkt->req;
|
||||||
|
delete pkt;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// If this is a split access, wait until all packets are received.
|
// If this is a split access, wait until all packets are received.
|
||||||
if (TheISA::HasUnalignedMemAcc && !state->complete()) {
|
if (TheISA::HasUnalignedMemAcc && !state->complete()) {
|
||||||
|
@ -140,8 +149,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
LSQUnit<Impl>::LSQUnit()
|
LSQUnit<Impl>::LSQUnit()
|
||||||
: loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
|
: loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
|
||||||
isStoreBlocked(false), isLoadBlocked(false),
|
isStoreBlocked(false), storeInFlight(false), hasPendingPkt(false)
|
||||||
loadBlockedHandled(false), storeInFlight(false), hasPendingPkt(false)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,11 +203,7 @@ LSQUnit<Impl>::resetState()
|
||||||
retryPkt = NULL;
|
retryPkt = NULL;
|
||||||
memDepViolator = NULL;
|
memDepViolator = NULL;
|
||||||
|
|
||||||
blockedLoadSeqNum = 0;
|
|
||||||
|
|
||||||
stalled = false;
|
stalled = false;
|
||||||
isLoadBlocked = false;
|
|
||||||
loadBlockedHandled = false;
|
|
||||||
|
|
||||||
cacheBlockMask = ~(cpu->cacheLineSize() - 1);
|
cacheBlockMask = ~(cpu->cacheLineSize() - 1);
|
||||||
}
|
}
|
||||||
|
@ -632,7 +636,7 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
|
||||||
}
|
}
|
||||||
iewStage->instToCommit(inst);
|
iewStage->instToCommit(inst);
|
||||||
iewStage->activityThisCycle();
|
iewStage->activityThisCycle();
|
||||||
} else if (!loadBlocked()) {
|
} else {
|
||||||
assert(inst->effAddrValid());
|
assert(inst->effAddrValid());
|
||||||
int load_idx = inst->lqIdx;
|
int load_idx = inst->lqIdx;
|
||||||
incrLdIdx(load_idx);
|
incrLdIdx(load_idx);
|
||||||
|
@ -787,7 +791,7 @@ LSQUnit<Impl>::writebackStores()
|
||||||
((!needsTSO) || (!storeInFlight)) &&
|
((!needsTSO) || (!storeInFlight)) &&
|
||||||
usedPorts < cachePorts) {
|
usedPorts < cachePorts) {
|
||||||
|
|
||||||
if (isStoreBlocked || lsq->cacheBlocked()) {
|
if (isStoreBlocked) {
|
||||||
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
|
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
|
||||||
" is blocked!\n");
|
" is blocked!\n");
|
||||||
break;
|
break;
|
||||||
|
@ -1024,14 +1028,6 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
|
||||||
++lsqSquashedLoads;
|
++lsqSquashedLoads;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isLoadBlocked) {
|
|
||||||
if (squashed_num < blockedLoadSeqNum) {
|
|
||||||
isLoadBlocked = false;
|
|
||||||
loadBlockedHandled = false;
|
|
||||||
blockedLoadSeqNum = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (memDepViolator && squashed_num < memDepViolator->seqNum) {
|
if (memDepViolator && squashed_num < memDepViolator->seqNum) {
|
||||||
memDepViolator = NULL;
|
memDepViolator = NULL;
|
||||||
}
|
}
|
||||||
|
@ -1218,7 +1214,6 @@ LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
|
||||||
++lsqCacheBlocked;
|
++lsqCacheBlocked;
|
||||||
assert(retryPkt == NULL);
|
assert(retryPkt == NULL);
|
||||||
retryPkt = data_pkt;
|
retryPkt = data_pkt;
|
||||||
lsq->setRetryTid(lsqID);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -1244,7 +1239,6 @@ LSQUnit<Impl>::recvRetry()
|
||||||
}
|
}
|
||||||
retryPkt = NULL;
|
retryPkt = NULL;
|
||||||
isStoreBlocked = false;
|
isStoreBlocked = false;
|
||||||
lsq->setRetryTid(InvalidThreadID);
|
|
||||||
|
|
||||||
// Send any outstanding packet.
|
// Send any outstanding packet.
|
||||||
if (TheISA::HasUnalignedMemAcc && state->pktToSend) {
|
if (TheISA::HasUnalignedMemAcc && state->pktToSend) {
|
||||||
|
@ -1256,13 +1250,7 @@ LSQUnit<Impl>::recvRetry()
|
||||||
} else {
|
} else {
|
||||||
// Still blocked!
|
// Still blocked!
|
||||||
++lsqCacheBlocked;
|
++lsqCacheBlocked;
|
||||||
lsq->setRetryTid(lsqID);
|
|
||||||
}
|
}
|
||||||
} else if (isLoadBlocked) {
|
|
||||||
DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, "
|
|
||||||
"no need to resend packet.\n");
|
|
||||||
} else {
|
|
||||||
DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012 ARM Limited
|
* Copyright (c) 2012, 2014 ARM Limited
|
||||||
* All rights reserved
|
* All rights reserved
|
||||||
*
|
*
|
||||||
* The license below extends only to copyright in the software and shall
|
* The license below extends only to copyright in the software and shall
|
||||||
|
@ -134,7 +134,7 @@ class MemDepUnit
|
||||||
/** Replays all instructions that have been rescheduled by moving them to
|
/** Replays all instructions that have been rescheduled by moving them to
|
||||||
* the ready list.
|
* the ready list.
|
||||||
*/
|
*/
|
||||||
void replay(DynInstPtr &inst);
|
void replay();
|
||||||
|
|
||||||
/** Completes a memory instruction. */
|
/** Completes a memory instruction. */
|
||||||
void completed(DynInstPtr &inst);
|
void completed(DynInstPtr &inst);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012 ARM Limited
|
* Copyright (c) 2012, 2014 ARM Limited
|
||||||
* All rights reserved
|
* All rights reserved
|
||||||
*
|
*
|
||||||
* The license below extends only to copyright in the software and shall
|
* The license below extends only to copyright in the software and shall
|
||||||
|
@ -370,7 +370,7 @@ MemDepUnit<MemDepPred, Impl>::reschedule(DynInstPtr &inst)
|
||||||
|
|
||||||
template <class MemDepPred, class Impl>
|
template <class MemDepPred, class Impl>
|
||||||
void
|
void
|
||||||
MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
|
MemDepUnit<MemDepPred, Impl>::replay()
|
||||||
{
|
{
|
||||||
DynInstPtr temp_inst;
|
DynInstPtr temp_inst;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue