gpu: fix bugs with MemFence, Flat Instrs and Resource utilization

Both Memory Fence is now flagged as Global Memory only to avoid resource
oversubscribing.
Flat instructions now check for Shared Memory resource busy to avoid
oversubscribing resources.
All WaitClass resources now use cycles (not ticks) to register the number
of pipe stages between Scoreboard and Execute to be consistent with
instruction scheduling logic which always used clock cycles.
This commit is contained in:
John Kalamatianos 2016-02-18 10:42:03 -05:00
parent 9a0f1be21f
commit a28a234069
3 changed files with 9 additions and 11 deletions

View file

@ -84,6 +84,7 @@
||(a)==Enums::OT_PRIVATE_ATOMIC \
||(a)==Enums::OT_SPILL_ATOMIC \
||(a)==Enums::OT_READONLY_ATOMIC \
||(a)==Enums::OT_BOTH_MEMFENCE \
||(a)==Enums::OT_FLAT_ATOMIC)
#define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
@ -93,8 +94,7 @@
||(a)==Enums::OT_BOTH_MEMFENCE)
#define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
||(a)==Enums::OT_SHARED_MEMFENCE \
||(a)==Enums::OT_BOTH_MEMFENCE)
||(a)==Enums::OT_SHARED_MEMFENCE)
#define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)

View file

@ -587,8 +587,8 @@ void
ComputeUnit::init()
{
// Initialize CU Bus models
glbMemToVrfBus.init(&shader->tick_cnt, 1);
locMemToVrfBus.init(&shader->tick_cnt, 1);
glbMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1));
locMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1));
nextGlbMemBus = 0;
nextLocMemBus = 0;
fatal_if(numGlbMemUnits > 1,
@ -596,7 +596,7 @@ ComputeUnit::init()
vrfToGlobalMemPipeBus.resize(numGlbMemUnits);
for (int j = 0; j < numGlbMemUnits; ++j) {
vrfToGlobalMemPipeBus[j] = WaitClass();
vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, 1);
vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1));
}
fatal_if(numLocMemUnits > 1,
@ -604,7 +604,7 @@ ComputeUnit::init()
vrfToLocalMemPipeBus.resize(numLocMemUnits);
for (int j = 0; j < numLocMemUnits; ++j) {
vrfToLocalMemPipeBus[j] = WaitClass();
vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, 1);
vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1));
}
vectorRegsReserved.resize(numSIMDs, 0);
aluPipe.resize(numSIMDs);
@ -612,12 +612,12 @@ ComputeUnit::init()
for (int i = 0; i < numSIMDs + numLocMemUnits + numGlbMemUnits; ++i) {
wfWait[i] = WaitClass();
wfWait[i].init(&shader->tick_cnt, 1);
wfWait[i].init(&shader->tick_cnt, shader->ticks(1));
}
for (int i = 0; i < numSIMDs; ++i) {
aluPipe[i] = WaitClass();
aluPipe[i].init(&shader->tick_cnt, 1);
aluPipe[i].init(&shader->tick_cnt, shader->ticks(1));
}
// Setup space for call args

View file

@ -162,7 +162,6 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
IS_OT_ATOMIC_GM(ii->opType())) {
return true;
}
@ -349,7 +348,7 @@ Wavefront::ready(itype_e type)
}
bool locMemBusRdy = false;
bool locMemIssueRdy = false;
if (type == I_SHARED) {
if (type == I_SHARED || type == I_FLAT) {
for (int j=0; j < computeUnit->numLocMemUnits; ++j) {
if (computeUnit->vrfToLocalMemPipeBus[j].prerdy())
locMemBusRdy = true;
@ -598,7 +597,6 @@ Wavefront::ready(itype_e type)
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit->cu_id,
simdId, wfSlotId, ii->disassemble());
return 1;
}