gpu-compute: Refactoring Wavefront::dynWaveId
This commit is contained in:
parent
498d0e63e5
commit
e9b14d5111
4 changed files with 16 additions and 15 deletions
|
@ -755,7 +755,7 @@ gen_special('GridSize', 'w->gridSz[src0]')
|
||||||
gen_special('GridGroups',
|
gen_special('GridGroups',
|
||||||
'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
|
'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
|
||||||
gen_special('LaneId', 'lane')
|
gen_special('LaneId', 'lane')
|
||||||
gen_special('WaveId', 'w->dynWaveId')
|
gen_special('WaveId', 'w->wfId')
|
||||||
gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
|
gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
|
||||||
|
|
||||||
# gen_special('CU'', ')
|
# gen_special('CU'', ')
|
||||||
|
|
|
@ -221,7 +221,7 @@ ComputeUnit::updateEvents() {
|
||||||
|
|
||||||
void
|
void
|
||||||
ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
|
ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
|
||||||
int cnt, LdsChunk *ldsChunk, NDRange *ndr)
|
int waveId, LdsChunk *ldsChunk, NDRange *ndr)
|
||||||
{
|
{
|
||||||
static int _n_wave = 0;
|
static int _n_wave = 0;
|
||||||
|
|
||||||
|
@ -232,20 +232,20 @@ ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
|
||||||
init_mask.reset();
|
init_mask.reset();
|
||||||
|
|
||||||
for (int k = 0; k < wfSize(); ++k) {
|
for (int k = 0; k < wfSize(); ++k) {
|
||||||
if (k + cnt * wfSize() < trueWgSizeTotal)
|
if (k + waveId * wfSize() < trueWgSizeTotal)
|
||||||
init_mask[k] = 1;
|
init_mask[k] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
w->kernId = ndr->dispatchId;
|
w->kernId = ndr->dispatchId;
|
||||||
w->dynWaveId = cnt;
|
w->wfId = waveId;
|
||||||
w->initMask = init_mask.to_ullong();
|
w->initMask = init_mask.to_ullong();
|
||||||
|
|
||||||
for (int k = 0; k < wfSize(); ++k) {
|
for (int k = 0; k < wfSize(); ++k) {
|
||||||
w->workItemId[0][k] = (k+cnt*wfSize()) % trueWgSize[0];
|
w->workItemId[0][k] = (k + waveId * wfSize()) % trueWgSize[0];
|
||||||
w->workItemId[1][k] =
|
w->workItemId[1][k] =
|
||||||
((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1];
|
((k + waveId * wfSize()) / trueWgSize[0]) % trueWgSize[1];
|
||||||
w->workItemId[2][k] =
|
w->workItemId[2][k] =
|
||||||
(k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
|
(k + waveId * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
|
||||||
|
|
||||||
w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] *
|
w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] *
|
||||||
trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] +
|
trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] +
|
||||||
|
@ -294,8 +294,8 @@ ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
|
||||||
// is this the last wavefront in the workgroup
|
// is this the last wavefront in the workgroup
|
||||||
// if set the spillWidth to be the remaining work-items
|
// if set the spillWidth to be the remaining work-items
|
||||||
// so that the vector access is correct
|
// so that the vector access is correct
|
||||||
if ((cnt + 1) * wfSize() >= trueWgSizeTotal) {
|
if ((waveId + 1) * wfSize() >= trueWgSizeTotal) {
|
||||||
w->spillWidth = trueWgSizeTotal - (cnt * wfSize());
|
w->spillWidth = trueWgSizeTotal - (waveId * wfSize());
|
||||||
} else {
|
} else {
|
||||||
w->spillWidth = wfSize();
|
w->spillWidth = wfSize();
|
||||||
}
|
}
|
||||||
|
@ -341,7 +341,7 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
|
||||||
|
|
||||||
// calculate the number of 32-bit vector registers required by wavefront
|
// calculate the number of 32-bit vector registers required by wavefront
|
||||||
int vregDemand = ndr->q.sRegCount + (2 * ndr->q.dRegCount);
|
int vregDemand = ndr->q.sRegCount + (2 * ndr->q.dRegCount);
|
||||||
int cnt = 0;
|
int wave_id = 0;
|
||||||
|
|
||||||
// Assign WFs by spreading them across SIMDs, 1 WF per SIMD at a time
|
// Assign WFs by spreading them across SIMDs, 1 WF per SIMD at a time
|
||||||
for (int m = 0; m < shader->n_wf * numSIMDs; ++m) {
|
for (int m = 0; m < shader->n_wf * numSIMDs; ++m) {
|
||||||
|
@ -352,7 +352,7 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
|
||||||
if (w->status == Wavefront::S_STOPPED) {
|
if (w->status == Wavefront::S_STOPPED) {
|
||||||
// if we have scheduled all work items then stop
|
// if we have scheduled all work items then stop
|
||||||
// scheduling wavefronts
|
// scheduling wavefronts
|
||||||
if (cnt * wfSize() >= trueWgSizeTotal)
|
if (wave_id * wfSize() >= trueWgSizeTotal)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// reserve vector registers for the scheduled wavefront
|
// reserve vector registers for the scheduled wavefront
|
||||||
|
@ -365,8 +365,8 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
|
||||||
w->reservedVectorRegs = normSize;
|
w->reservedVectorRegs = normSize;
|
||||||
vectorRegsReserved[m % numSIMDs] += w->reservedVectorRegs;
|
vectorRegsReserved[m % numSIMDs] += w->reservedVectorRegs;
|
||||||
|
|
||||||
StartWF(w, trueWgSize, trueWgSizeTotal, cnt, ldsChunk, ndr);
|
StartWF(w, trueWgSize, trueWgSizeTotal, wave_id, ldsChunk, ndr);
|
||||||
++cnt;
|
++wave_id;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
++barrier_id;
|
++barrier_id;
|
||||||
|
|
|
@ -935,7 +935,7 @@ Wavefront::pc(uint32_t new_pc)
|
||||||
uint32_t
|
uint32_t
|
||||||
Wavefront::getStaticContextSize() const
|
Wavefront::getStaticContextSize() const
|
||||||
{
|
{
|
||||||
return barCnt.size() * sizeof(int) + sizeof(dynWaveId) + sizeof(maxBarCnt) +
|
return barCnt.size() * sizeof(int) + sizeof(wfId) + sizeof(maxBarCnt) +
|
||||||
sizeof(oldBarrierCnt) + sizeof(barrierCnt) + sizeof(wgId) +
|
sizeof(oldBarrierCnt) + sizeof(barrierCnt) + sizeof(wgId) +
|
||||||
sizeof(computeUnit->cu_id) + sizeof(barrierId) + sizeof(initMask) +
|
sizeof(computeUnit->cu_id) + sizeof(barrierId) + sizeof(initMask) +
|
||||||
sizeof(privBase) + sizeof(spillBase) + sizeof(ldsChunk) +
|
sizeof(privBase) + sizeof(spillBase) + sizeof(ldsChunk) +
|
||||||
|
|
|
@ -194,7 +194,8 @@ class Wavefront : public SimObject
|
||||||
uint32_t gridSz[3];
|
uint32_t gridSz[3];
|
||||||
uint32_t wgId;
|
uint32_t wgId;
|
||||||
uint32_t wgSz;
|
uint32_t wgSz;
|
||||||
uint32_t dynWaveId;
|
// wavefront id within a workgroup
|
||||||
|
uint32_t wfId;
|
||||||
uint32_t maxDynWaveId;
|
uint32_t maxDynWaveId;
|
||||||
uint32_t dispatchId;
|
uint32_t dispatchId;
|
||||||
// outstanding global+local memory requests
|
// outstanding global+local memory requests
|
||||||
|
|
Loading…
Reference in a new issue