gpu-compute: Wavefront refactoring
Renaming members of the Wavefront class in accordance with the style guide.
This commit is contained in:
parent
e9fe1b838b
commit
589e13a23b
12 changed files with 259 additions and 259 deletions
|
@ -233,7 +233,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst)
|
||||||
|
|
||||||
typedef Base::DestCType DestCType;
|
typedef Base::DestCType DestCType;
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
|
@ -254,7 +254,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst)
|
||||||
typedef Base::DestCType DestCType;
|
typedef Base::DestCType DestCType;
|
||||||
typedef Base::SrcCType SrcCType;
|
typedef Base::SrcCType SrcCType;
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
|
@ -275,7 +275,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||||
{
|
{
|
||||||
Wavefront *w = gpuDynInst->wavefront();
|
Wavefront *w = gpuDynInst->wavefront();
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
|
@ -310,7 +310,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||||
typedef typename Base::Src1CType Src1T;
|
typedef typename Base::Src1CType Src1T;
|
||||||
typedef typename Base::Src2CType Src2T;
|
typedef typename Base::Src2CType Src2T;
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
|
@ -344,7 +344,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||||
typedef CType Src0T;
|
typedef CType Src0T;
|
||||||
typedef typename Base::Src1CType Src1T;
|
typedef typename Base::Src1CType Src1T;
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
|
@ -371,7 +371,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||||
{
|
{
|
||||||
Wavefront *w = gpuDynInst->wavefront();
|
Wavefront *w = gpuDynInst->wavefront();
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
CType dest_val;
|
CType dest_val;
|
||||||
|
@ -399,7 +399,7 @@ $class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||||
{
|
{
|
||||||
Wavefront *w = gpuDynInst->wavefront();
|
Wavefront *w = gpuDynInst->wavefront();
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
|
@ -745,17 +745,17 @@ def gen_special(brig_opcode, expr, dest_type='U32'):
|
||||||
|
|
||||||
gen(brig_opcode, None, expr, base_class)
|
gen(brig_opcode, None, expr, base_class)
|
||||||
|
|
||||||
gen_special('WorkItemId', 'w->workitemid[src0][lane]')
|
gen_special('WorkItemId', 'w->workItemId[src0][lane]')
|
||||||
gen_special('WorkItemAbsId',
|
gen_special('WorkItemAbsId',
|
||||||
'w->workitemid[src0][lane] + (w->workgroupid[src0] * w->workgroupsz[src0])')
|
'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
|
||||||
gen_special('WorkGroupId', 'w->workgroupid[src0]')
|
gen_special('WorkGroupId', 'w->workGroupId[src0]')
|
||||||
gen_special('WorkGroupSize', 'w->workgroupsz[src0]')
|
gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
|
||||||
gen_special('CurrentWorkGroupSize', 'w->workgroupsz[src0]')
|
gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
|
||||||
gen_special('GridSize', 'w->gridsz[src0]')
|
gen_special('GridSize', 'w->gridSz[src0]')
|
||||||
gen_special('GridGroups',
|
gen_special('GridGroups',
|
||||||
'divCeil(w->gridsz[src0],w->workgroupsz[src0])')
|
'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
|
||||||
gen_special('LaneId', 'lane')
|
gen_special('LaneId', 'lane')
|
||||||
gen_special('WaveId', 'w->dynwaveid')
|
gen_special('WaveId', 'w->dynWaveId')
|
||||||
gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
|
gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
|
||||||
|
|
||||||
# gen_special('CU'', ')
|
# gen_special('CU'', ')
|
||||||
|
|
|
@ -960,7 +960,7 @@ namespace HsailISA
|
||||||
gpuDynInst->simdId = w->simdId;
|
gpuDynInst->simdId = w->simdId;
|
||||||
gpuDynInst->wfSlotId = w->wfSlotId;
|
gpuDynInst->wfSlotId = w->wfSlotId;
|
||||||
gpuDynInst->wfDynId = w->wfDynId;
|
gpuDynInst->wfDynId = w->wfDynId;
|
||||||
gpuDynInst->kern_id = w->kern_id;
|
gpuDynInst->kern_id = w->kernId;
|
||||||
gpuDynInst->cu_id = w->computeUnit->cu_id;
|
gpuDynInst->cu_id = w->computeUnit->cu_id;
|
||||||
|
|
||||||
gpuDynInst->memoryOrder =
|
gpuDynInst->memoryOrder =
|
||||||
|
@ -971,10 +971,10 @@ namespace HsailISA
|
||||||
GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
|
GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
|
||||||
gmp->getGMReqFIFO().push(gpuDynInst);
|
gmp->getGMReqFIFO().push(gpuDynInst);
|
||||||
|
|
||||||
w->wr_gm_reqs_in_pipe--;
|
w->wrGmReqsInPipe--;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
w->mem_reqs_in_pipe--;
|
w->memReqsInPipe--;
|
||||||
w->outstanding_reqs++;
|
w->outstandingReqs++;
|
||||||
} else if (o_type == Enums::OT_SHARED_MEMFENCE) {
|
} else if (o_type == Enums::OT_SHARED_MEMFENCE) {
|
||||||
// no-op
|
// no-op
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -131,12 +131,12 @@ namespace HsailISA
|
||||||
{
|
{
|
||||||
Wavefront *w = gpuDynInst->wavefront();
|
Wavefront *w = gpuDynInst->wavefront();
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
// mask off completed work-items
|
// mask off completed work-items
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
w->init_mask[lane] = 0;
|
w->initMask[lane] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -149,14 +149,14 @@ namespace HsailISA
|
||||||
}
|
}
|
||||||
|
|
||||||
// if all work-items have completed, then wave-front is done
|
// if all work-items have completed, then wave-front is done
|
||||||
if (w->init_mask.none()) {
|
if (w->initMask.none()) {
|
||||||
w->status = Wavefront::S_STOPPED;
|
w->status = Wavefront::S_STOPPED;
|
||||||
|
|
||||||
int32_t refCount = w->computeUnit->getLds().
|
int32_t refCount = w->computeUnit->getLds().
|
||||||
decreaseRefCounter(w->dispatchid, w->wg_id);
|
decreaseRefCounter(w->dispatchId, w->wgId);
|
||||||
|
|
||||||
DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
|
DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
|
||||||
w->computeUnit->cu_id, w->wg_id, refCount);
|
w->computeUnit->cu_id, w->wgId, refCount);
|
||||||
|
|
||||||
// free the vector registers of the completed wavefront
|
// free the vector registers of the completed wavefront
|
||||||
w->computeUnit->vectorRegsReserved[w->simdId] -=
|
w->computeUnit->vectorRegsReserved[w->simdId] -=
|
||||||
|
@ -201,8 +201,8 @@ namespace HsailISA
|
||||||
{
|
{
|
||||||
Wavefront *w = gpuDynInst->wavefront();
|
Wavefront *w = gpuDynInst->wavefront();
|
||||||
|
|
||||||
assert(w->barrier_cnt == w->old_barrier_cnt);
|
assert(w->barrierCnt == w->oldBarrierCnt);
|
||||||
w->barrier_cnt = w->old_barrier_cnt + 1;
|
w->barrierCnt = w->oldBarrierCnt + 1;
|
||||||
w->stalledAtBarrier = true;
|
w->stalledAtBarrier = true;
|
||||||
}
|
}
|
||||||
} // namespace HsailISA
|
} // namespace HsailISA
|
||||||
|
|
|
@ -59,7 +59,7 @@ namespace HsailISA
|
||||||
Wavefront *w = gpuDynInst->wavefront();
|
Wavefront *w = gpuDynInst->wavefront();
|
||||||
|
|
||||||
typedef typename DestDataType::CType CType M5_VAR_USED;
|
typedef typename DestDataType::CType CType M5_VAR_USED;
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
std::vector<Addr> addr_vec;
|
std::vector<Addr> addr_vec;
|
||||||
addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
|
addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
|
||||||
this->addr.calcVector(w, addr_vec);
|
this->addr.calcVector(w, addr_vec);
|
||||||
|
@ -159,7 +159,7 @@ namespace HsailISA
|
||||||
Wavefront *w = gpuDynInst->wavefront();
|
Wavefront *w = gpuDynInst->wavefront();
|
||||||
|
|
||||||
typedef typename MemDataType::CType MemCType;
|
typedef typename MemDataType::CType MemCType;
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
// Kernarg references are handled uniquely for now (no Memory Request
|
// Kernarg references are handled uniquely for now (no Memory Request
|
||||||
// is used), so special-case them up front. Someday we should
|
// is used), so special-case them up front. Someday we should
|
||||||
|
@ -230,7 +230,7 @@ namespace HsailISA
|
||||||
m->simdId = w->simdId;
|
m->simdId = w->simdId;
|
||||||
m->wfSlotId = w->wfSlotId;
|
m->wfSlotId = w->wfSlotId;
|
||||||
m->wfDynId = w->wfDynId;
|
m->wfDynId = w->wfDynId;
|
||||||
m->kern_id = w->kern_id;
|
m->kern_id = w->kernId;
|
||||||
m->cu_id = w->computeUnit->cu_id;
|
m->cu_id = w->computeUnit->cu_id;
|
||||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||||
|
|
||||||
|
@ -261,8 +261,8 @@ namespace HsailISA
|
||||||
}
|
}
|
||||||
|
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_rd_gm++;
|
w->outstandingReqsRdGm++;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Brig::BRIG_SEGMENT_SPILL:
|
case Brig::BRIG_SEGMENT_SPILL:
|
||||||
|
@ -281,14 +281,14 @@ namespace HsailISA
|
||||||
m->addr[lane] = m->addr[lane] * w->spillWidth +
|
m->addr[lane] = m->addr[lane] * w->spillWidth +
|
||||||
lane * sizeof(MemCType) + w->spillBase;
|
lane * sizeof(MemCType) + w->spillBase;
|
||||||
|
|
||||||
w->last_addr[lane] = m->addr[lane];
|
w->lastAddr[lane] = m->addr[lane];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_rd_gm++;
|
w->outstandingReqsRdGm++;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Brig::BRIG_SEGMENT_GROUP:
|
case Brig::BRIG_SEGMENT_GROUP:
|
||||||
|
@ -296,8 +296,8 @@ namespace HsailISA
|
||||||
m->pipeId = LDSMEM_PIPE;
|
m->pipeId = LDSMEM_PIPE;
|
||||||
m->latency.set(w->computeUnit->shader->ticks(24));
|
m->latency.set(w->computeUnit->shader->ticks(24));
|
||||||
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_rd_lm++;
|
w->outstandingReqsRdLm++;
|
||||||
w->rd_lm_reqs_in_pipe--;
|
w->rdLmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Brig::BRIG_SEGMENT_READONLY:
|
case Brig::BRIG_SEGMENT_READONLY:
|
||||||
|
@ -313,8 +313,8 @@ namespace HsailISA
|
||||||
}
|
}
|
||||||
|
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_rd_gm++;
|
w->outstandingReqsRdGm++;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Brig::BRIG_SEGMENT_PRIVATE:
|
case Brig::BRIG_SEGMENT_PRIVATE:
|
||||||
|
@ -332,8 +332,8 @@ namespace HsailISA
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_rd_gm++;
|
w->outstandingReqsRdGm++;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -341,8 +341,8 @@ namespace HsailISA
|
||||||
m->addr[0]);
|
m->addr[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
w->outstanding_reqs++;
|
w->outstandingReqs++;
|
||||||
w->mem_reqs_in_pipe--;
|
w->memReqsInPipe--;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename OperationType, typename SrcDataType,
|
template<typename OperationType, typename SrcDataType,
|
||||||
|
@ -355,7 +355,7 @@ namespace HsailISA
|
||||||
|
|
||||||
typedef typename OperationType::CType CType;
|
typedef typename OperationType::CType CType;
|
||||||
|
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
// arg references are handled uniquely for now (no Memory Request
|
// arg references are handled uniquely for now (no Memory Request
|
||||||
// is used), so special-case them up front. Someday we should
|
// is used), so special-case them up front. Someday we should
|
||||||
|
@ -419,7 +419,7 @@ namespace HsailISA
|
||||||
m->simdId = w->simdId;
|
m->simdId = w->simdId;
|
||||||
m->wfSlotId = w->wfSlotId;
|
m->wfSlotId = w->wfSlotId;
|
||||||
m->wfDynId = w->wfDynId;
|
m->wfDynId = w->wfDynId;
|
||||||
m->kern_id = w->kern_id;
|
m->kern_id = w->kernId;
|
||||||
m->cu_id = w->computeUnit->cu_id;
|
m->cu_id = w->computeUnit->cu_id;
|
||||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||||
|
|
||||||
|
@ -448,8 +448,8 @@ namespace HsailISA
|
||||||
}
|
}
|
||||||
|
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_wr_gm++;
|
w->outstandingReqsWrGm++;
|
||||||
w->wr_gm_reqs_in_pipe--;
|
w->wrGmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Brig::BRIG_SEGMENT_SPILL:
|
case Brig::BRIG_SEGMENT_SPILL:
|
||||||
|
@ -469,8 +469,8 @@ namespace HsailISA
|
||||||
}
|
}
|
||||||
|
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_wr_gm++;
|
w->outstandingReqsWrGm++;
|
||||||
w->wr_gm_reqs_in_pipe--;
|
w->wrGmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Brig::BRIG_SEGMENT_GROUP:
|
case Brig::BRIG_SEGMENT_GROUP:
|
||||||
|
@ -478,8 +478,8 @@ namespace HsailISA
|
||||||
m->pipeId = LDSMEM_PIPE;
|
m->pipeId = LDSMEM_PIPE;
|
||||||
m->latency.set(w->computeUnit->shader->ticks(24));
|
m->latency.set(w->computeUnit->shader->ticks(24));
|
||||||
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_wr_lm++;
|
w->outstandingReqsWrLm++;
|
||||||
w->wr_lm_reqs_in_pipe--;
|
w->wrLmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Brig::BRIG_SEGMENT_PRIVATE:
|
case Brig::BRIG_SEGMENT_PRIVATE:
|
||||||
|
@ -497,16 +497,16 @@ namespace HsailISA
|
||||||
}
|
}
|
||||||
|
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_wr_gm++;
|
w->outstandingReqsWrGm++;
|
||||||
w->wr_gm_reqs_in_pipe--;
|
w->wrGmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
fatal("Store to unsupported segment %d\n", this->segment);
|
fatal("Store to unsupported segment %d\n", this->segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
w->outstanding_reqs++;
|
w->outstandingReqs++;
|
||||||
w->mem_reqs_in_pipe--;
|
w->memReqsInPipe--;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename OperationType, typename SrcDataType,
|
template<typename OperationType, typename SrcDataType,
|
||||||
|
@ -596,7 +596,7 @@ namespace HsailISA
|
||||||
m->simdId = w->simdId;
|
m->simdId = w->simdId;
|
||||||
m->wfSlotId = w->wfSlotId;
|
m->wfSlotId = w->wfSlotId;
|
||||||
m->wfDynId = w->wfDynId;
|
m->wfDynId = w->wfDynId;
|
||||||
m->kern_id = w->kern_id;
|
m->kern_id = w->kernId;
|
||||||
m->cu_id = w->computeUnit->cu_id;
|
m->cu_id = w->computeUnit->cu_id;
|
||||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||||
|
|
||||||
|
@ -607,10 +607,10 @@ namespace HsailISA
|
||||||
m->pipeId = GLBMEM_PIPE;
|
m->pipeId = GLBMEM_PIPE;
|
||||||
|
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_wr_gm++;
|
w->outstandingReqsWrGm++;
|
||||||
w->wr_gm_reqs_in_pipe--;
|
w->wrGmReqsInPipe--;
|
||||||
w->outstanding_reqs_rd_gm++;
|
w->outstandingReqsRdGm++;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Brig::BRIG_SEGMENT_GROUP:
|
case Brig::BRIG_SEGMENT_GROUP:
|
||||||
|
@ -618,10 +618,10 @@ namespace HsailISA
|
||||||
m->pipeId = LDSMEM_PIPE;
|
m->pipeId = LDSMEM_PIPE;
|
||||||
m->latency.set(w->computeUnit->shader->ticks(24));
|
m->latency.set(w->computeUnit->shader->ticks(24));
|
||||||
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_wr_lm++;
|
w->outstandingReqsWrLm++;
|
||||||
w->wr_lm_reqs_in_pipe--;
|
w->wrLmReqsInPipe--;
|
||||||
w->outstanding_reqs_rd_lm++;
|
w->outstandingReqsRdLm++;
|
||||||
w->rd_lm_reqs_in_pipe--;
|
w->rdLmReqsInPipe--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -629,8 +629,8 @@ namespace HsailISA
|
||||||
this->segment);
|
this->segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
w->outstanding_reqs++;
|
w->outstandingReqs++;
|
||||||
w->mem_reqs_in_pipe--;
|
w->memReqsInPipe--;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
|
const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
|
||||||
|
|
|
@ -79,7 +79,7 @@ namespace HsailISA
|
||||||
void
|
void
|
||||||
Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
|
Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
int op = 0;
|
int op = 0;
|
||||||
bool got_op = false;
|
bool got_op = false;
|
||||||
|
@ -181,7 +181,7 @@ namespace HsailISA
|
||||||
Call::MagicPrintLane(Wavefront *w)
|
Call::MagicPrintLane(Wavefront *w)
|
||||||
{
|
{
|
||||||
#if TRACING_ON
|
#if TRACING_ON
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
int src_val1 = src1.get<int>(w, lane, 1);
|
int src_val1 = src1.get<int>(w, lane, 1);
|
||||||
|
@ -204,7 +204,7 @@ namespace HsailISA
|
||||||
Call::MagicPrintLane64(Wavefront *w)
|
Call::MagicPrintLane64(Wavefront *w)
|
||||||
{
|
{
|
||||||
#if TRACING_ON
|
#if TRACING_ON
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
|
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
|
||||||
|
@ -227,7 +227,7 @@ namespace HsailISA
|
||||||
Call::MagicPrintWF32(Wavefront *w)
|
Call::MagicPrintWF32(Wavefront *w)
|
||||||
{
|
{
|
||||||
#if TRACING_ON
|
#if TRACING_ON
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
std::string res_str;
|
std::string res_str;
|
||||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||||
|
|
||||||
|
@ -265,7 +265,7 @@ namespace HsailISA
|
||||||
Call::MagicPrintWF32ID(Wavefront *w)
|
Call::MagicPrintWF32ID(Wavefront *w)
|
||||||
{
|
{
|
||||||
#if TRACING_ON
|
#if TRACING_ON
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
std::string res_str;
|
std::string res_str;
|
||||||
int src_val3 = -1;
|
int src_val3 = -1;
|
||||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||||
|
@ -307,7 +307,7 @@ namespace HsailISA
|
||||||
Call::MagicPrintWF64(Wavefront *w)
|
Call::MagicPrintWF64(Wavefront *w)
|
||||||
{
|
{
|
||||||
#if TRACING_ON
|
#if TRACING_ON
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
std::string res_str;
|
std::string res_str;
|
||||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||||
|
|
||||||
|
@ -345,7 +345,7 @@ namespace HsailISA
|
||||||
Call::MagicPrintWFID64(Wavefront *w)
|
Call::MagicPrintWFID64(Wavefront *w)
|
||||||
{
|
{
|
||||||
#if TRACING_ON
|
#if TRACING_ON
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
std::string res_str;
|
std::string res_str;
|
||||||
int src_val3 = -1;
|
int src_val3 = -1;
|
||||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||||
|
@ -387,7 +387,7 @@ namespace HsailISA
|
||||||
Call::MagicPrintWFFloat(Wavefront *w)
|
Call::MagicPrintWFFloat(Wavefront *w)
|
||||||
{
|
{
|
||||||
#if TRACING_ON
|
#if TRACING_ON
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
std::string res_str;
|
std::string res_str;
|
||||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||||
|
|
||||||
|
@ -425,7 +425,7 @@ namespace HsailISA
|
||||||
res_str = csprintf("Breakpoint encountered for wavefront %i\n",
|
res_str = csprintf("Breakpoint encountered for wavefront %i\n",
|
||||||
w->wfSlotId);
|
w->wfSlotId);
|
||||||
|
|
||||||
res_str += csprintf(" Kern ID: %i\n", w->kern_id);
|
res_str += csprintf(" Kern ID: %i\n", w->kernId);
|
||||||
res_str += csprintf(" Phase ID: %i\n", w->simdId);
|
res_str += csprintf(" Phase ID: %i\n", w->simdId);
|
||||||
res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
|
res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
|
||||||
res_str += csprintf(" Exec mask: ");
|
res_str += csprintf(" Exec mask: ");
|
||||||
|
@ -455,7 +455,7 @@ namespace HsailISA
|
||||||
void
|
void
|
||||||
Call::MagicPrefixSum(Wavefront *w)
|
Call::MagicPrefixSum(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
|
@ -474,7 +474,7 @@ namespace HsailISA
|
||||||
// The reduction instruction takes up to 64 inputs (one from
|
// The reduction instruction takes up to 64 inputs (one from
|
||||||
// each thread in a WF) and sums them. It returns the sum to
|
// each thread in a WF) and sums them. It returns the sum to
|
||||||
// each thread in the WF.
|
// each thread in the WF.
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
|
@ -494,7 +494,7 @@ namespace HsailISA
|
||||||
void
|
void
|
||||||
Call::MagicMaskLower(Wavefront *w)
|
Call::MagicMaskLower(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
|
@ -519,7 +519,7 @@ namespace HsailISA
|
||||||
void
|
void
|
||||||
Call::MagicMaskUpper(Wavefront *w)
|
Call::MagicMaskUpper(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
int res = 0;
|
int res = 0;
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
|
@ -544,42 +544,42 @@ namespace HsailISA
|
||||||
void
|
void
|
||||||
Call::MagicJoinWFBar(Wavefront *w)
|
Call::MagicJoinWFBar(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
int max_cnt = 0;
|
int max_cnt = 0;
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
w->bar_cnt[lane]++;
|
w->barCnt[lane]++;
|
||||||
|
|
||||||
if (w->bar_cnt[lane] > max_cnt) {
|
if (w->barCnt[lane] > max_cnt) {
|
||||||
max_cnt = w->bar_cnt[lane];
|
max_cnt = w->barCnt[lane];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_cnt > w->max_bar_cnt) {
|
if (max_cnt > w->maxBarCnt) {
|
||||||
w->max_bar_cnt = max_cnt;
|
w->maxBarCnt = max_cnt;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Call::MagicWaitWFBar(Wavefront *w)
|
Call::MagicWaitWFBar(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
int max_cnt = 0;
|
int max_cnt = 0;
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
w->bar_cnt[lane]--;
|
w->barCnt[lane]--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (w->bar_cnt[lane] > max_cnt) {
|
if (w->barCnt[lane] > max_cnt) {
|
||||||
max_cnt = w->bar_cnt[lane];
|
max_cnt = w->barCnt[lane];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_cnt < w->max_bar_cnt) {
|
if (max_cnt < w->maxBarCnt) {
|
||||||
w->max_bar_cnt = max_cnt;
|
w->maxBarCnt = max_cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
|
w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
|
||||||
|
@ -591,7 +591,7 @@ namespace HsailISA
|
||||||
void
|
void
|
||||||
Call::MagicPanic(Wavefront *w)
|
Call::MagicPanic(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
if (mask[lane]) {
|
if (mask[lane]) {
|
||||||
|
@ -648,12 +648,12 @@ namespace HsailISA
|
||||||
m->pipeId = GLBMEM_PIPE;
|
m->pipeId = GLBMEM_PIPE;
|
||||||
m->latency.set(w->computeUnit->shader->ticks(64));
|
m->latency.set(w->computeUnit->shader->ticks(64));
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_wr_gm++;
|
w->outstandingReqsWrGm++;
|
||||||
w->wr_gm_reqs_in_pipe--;
|
w->wrGmReqsInPipe--;
|
||||||
w->outstanding_reqs_rd_gm++;
|
w->outstandingReqsRdGm++;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
w->outstanding_reqs++;
|
w->outstandingReqs++;
|
||||||
w->mem_reqs_in_pipe--;
|
w->memReqsInPipe--;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -687,12 +687,12 @@ namespace HsailISA
|
||||||
m->pipeId = GLBMEM_PIPE;
|
m->pipeId = GLBMEM_PIPE;
|
||||||
m->latency.set(w->computeUnit->shader->ticks(64));
|
m->latency.set(w->computeUnit->shader->ticks(64));
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_wr_gm++;
|
w->outstandingReqsWrGm++;
|
||||||
w->wr_gm_reqs_in_pipe--;
|
w->wrGmReqsInPipe--;
|
||||||
w->outstanding_reqs_rd_gm++;
|
w->outstandingReqsRdGm++;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
w->outstanding_reqs++;
|
w->outstandingReqs++;
|
||||||
w->mem_reqs_in_pipe--;
|
w->memReqsInPipe--;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -725,16 +725,16 @@ namespace HsailISA
|
||||||
m->pipeId = GLBMEM_PIPE;
|
m->pipeId = GLBMEM_PIPE;
|
||||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||||
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
|
||||||
w->outstanding_reqs_rd_gm++;
|
w->outstandingReqsRdGm++;
|
||||||
w->rd_gm_reqs_in_pipe--;
|
w->rdGmReqsInPipe--;
|
||||||
w->outstanding_reqs++;
|
w->outstandingReqs++;
|
||||||
w->mem_reqs_in_pipe--;
|
w->memReqsInPipe--;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Call::MagicXactCasLd(Wavefront *w)
|
Call::MagicXactCasLd(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
int src_val1 = 0;
|
int src_val1 = 0;
|
||||||
|
|
||||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||||
|
@ -756,7 +756,7 @@ namespace HsailISA
|
||||||
void
|
void
|
||||||
Call::MagicMostSigThread(Wavefront *w)
|
Call::MagicMostSigThread(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
unsigned mst = true;
|
unsigned mst = true;
|
||||||
|
|
||||||
for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
|
for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
|
||||||
|
@ -770,7 +770,7 @@ namespace HsailISA
|
||||||
void
|
void
|
||||||
Call::MagicMostSigBroadcast(Wavefront *w)
|
Call::MagicMostSigBroadcast(Wavefront *w)
|
||||||
{
|
{
|
||||||
const VectorMask &mask = w->get_pred();
|
const VectorMask &mask = w->getPred();
|
||||||
int res = 0;
|
int res = 0;
|
||||||
bool got_res = false;
|
bool got_res = false;
|
||||||
|
|
||||||
|
|
|
@ -178,13 +178,13 @@ ComputeUnit::FillKernelState(Wavefront *w, NDRange *ndr)
|
||||||
{
|
{
|
||||||
w->resizeRegFiles(ndr->q.cRegCount, ndr->q.sRegCount, ndr->q.dRegCount);
|
w->resizeRegFiles(ndr->q.cRegCount, ndr->q.sRegCount, ndr->q.dRegCount);
|
||||||
|
|
||||||
w->workgroupsz[0] = ndr->q.wgSize[0];
|
w->workGroupSz[0] = ndr->q.wgSize[0];
|
||||||
w->workgroupsz[1] = ndr->q.wgSize[1];
|
w->workGroupSz[1] = ndr->q.wgSize[1];
|
||||||
w->workgroupsz[2] = ndr->q.wgSize[2];
|
w->workGroupSz[2] = ndr->q.wgSize[2];
|
||||||
w->wg_sz = w->workgroupsz[0] * w->workgroupsz[1] * w->workgroupsz[2];
|
w->wgSz = w->workGroupSz[0] * w->workGroupSz[1] * w->workGroupSz[2];
|
||||||
w->gridsz[0] = ndr->q.gdSize[0];
|
w->gridSz[0] = ndr->q.gdSize[0];
|
||||||
w->gridsz[1] = ndr->q.gdSize[1];
|
w->gridSz[1] = ndr->q.gdSize[1];
|
||||||
w->gridsz[2] = ndr->q.gdSize[2];
|
w->gridSz[2] = ndr->q.gdSize[2];
|
||||||
w->kernelArgs = ndr->q.args;
|
w->kernelArgs = ndr->q.args;
|
||||||
w->privSizePerItem = ndr->q.privMemPerItem;
|
w->privSizePerItem = ndr->q.privMemPerItem;
|
||||||
w->spillSizePerItem = ndr->q.spillMemPerItem;
|
w->spillSizePerItem = ndr->q.spillMemPerItem;
|
||||||
|
@ -236,29 +236,29 @@ ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
|
||||||
init_mask[k] = 1;
|
init_mask[k] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
w->kern_id = ndr->dispatchId;
|
w->kernId = ndr->dispatchId;
|
||||||
w->dynwaveid = cnt;
|
w->dynWaveId = cnt;
|
||||||
w->init_mask = init_mask.to_ullong();
|
w->initMask = init_mask.to_ullong();
|
||||||
|
|
||||||
for (int k = 0; k < wfSize(); ++k) {
|
for (int k = 0; k < wfSize(); ++k) {
|
||||||
w->workitemid[0][k] = (k+cnt*wfSize()) % trueWgSize[0];
|
w->workItemId[0][k] = (k+cnt*wfSize()) % trueWgSize[0];
|
||||||
w->workitemid[1][k] =
|
w->workItemId[1][k] =
|
||||||
((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1];
|
((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1];
|
||||||
w->workitemid[2][k] =
|
w->workItemId[2][k] =
|
||||||
(k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
|
(k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
|
||||||
|
|
||||||
w->workitemFlatId[k] = w->workitemid[2][k] * trueWgSize[0] *
|
w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] *
|
||||||
trueWgSize[1] + w->workitemid[1][k] * trueWgSize[0] +
|
trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] +
|
||||||
w->workitemid[0][k];
|
w->workItemId[0][k];
|
||||||
}
|
}
|
||||||
|
|
||||||
w->barrier_slots = divCeil(trueWgSizeTotal, wfSize());
|
w->barrierSlots = divCeil(trueWgSizeTotal, wfSize());
|
||||||
|
|
||||||
w->bar_cnt.resize(wfSize(), 0);
|
w->barCnt.resize(wfSize(), 0);
|
||||||
|
|
||||||
w->max_bar_cnt = 0;
|
w->maxBarCnt = 0;
|
||||||
w->old_barrier_cnt = 0;
|
w->oldBarrierCnt = 0;
|
||||||
w->barrier_cnt = 0;
|
w->barrierCnt = 0;
|
||||||
|
|
||||||
w->privBase = ndr->q.privMemStart;
|
w->privBase = ndr->q.privMemStart;
|
||||||
ndr->q.privMemStart += ndr->q.privMemPerItem * wfSize();
|
ndr->q.privMemStart += ndr->q.privMemPerItem * wfSize();
|
||||||
|
@ -269,22 +269,22 @@ ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
|
||||||
w->pushToReconvergenceStack(0, UINT32_MAX, init_mask.to_ulong());
|
w->pushToReconvergenceStack(0, UINT32_MAX, init_mask.to_ulong());
|
||||||
|
|
||||||
// WG state
|
// WG state
|
||||||
w->wg_id = ndr->globalWgId;
|
w->wgId = ndr->globalWgId;
|
||||||
w->dispatchid = ndr->dispatchId;
|
w->dispatchId = ndr->dispatchId;
|
||||||
w->workgroupid[0] = w->wg_id % ndr->numWg[0];
|
w->workGroupId[0] = w->wgId % ndr->numWg[0];
|
||||||
w->workgroupid[1] = (w->wg_id / ndr->numWg[0]) % ndr->numWg[1];
|
w->workGroupId[1] = (w->wgId / ndr->numWg[0]) % ndr->numWg[1];
|
||||||
w->workgroupid[2] = w->wg_id / (ndr->numWg[0] * ndr->numWg[1]);
|
w->workGroupId[2] = w->wgId / (ndr->numWg[0] * ndr->numWg[1]);
|
||||||
|
|
||||||
w->barrier_id = barrier_id;
|
w->barrierId = barrier_id;
|
||||||
w->stalledAtBarrier = false;
|
w->stalledAtBarrier = false;
|
||||||
|
|
||||||
// set the wavefront context to have a pointer to this section of the LDS
|
// set the wavefront context to have a pointer to this section of the LDS
|
||||||
w->ldsChunk = ldsChunk;
|
w->ldsChunk = ldsChunk;
|
||||||
|
|
||||||
int32_t refCount M5_VAR_USED =
|
int32_t refCount M5_VAR_USED =
|
||||||
lds.increaseRefCounter(w->dispatchid, w->wg_id);
|
lds.increaseRefCounter(w->dispatchId, w->wgId);
|
||||||
DPRINTF(GPUDisp, "CU%d: increase ref ctr wg[%d] to [%d]\n",
|
DPRINTF(GPUDisp, "CU%d: increase ref ctr wg[%d] to [%d]\n",
|
||||||
cu_id, w->wg_id, refCount);
|
cu_id, w->wgId, refCount);
|
||||||
|
|
||||||
w->instructionBuffer.clear();
|
w->instructionBuffer.clear();
|
||||||
|
|
||||||
|
@ -468,15 +468,15 @@ ComputeUnit::AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
|
||||||
DPRINTF(GPUSync, "Checking WF[%d][%d]\n", i_simd, i_wf);
|
DPRINTF(GPUSync, "Checking WF[%d][%d]\n", i_simd, i_wf);
|
||||||
|
|
||||||
DPRINTF(GPUSync, "wf->barrier_id = %d, _barrier_id = %d\n",
|
DPRINTF(GPUSync, "wf->barrier_id = %d, _barrier_id = %d\n",
|
||||||
w->barrier_id, _barrier_id);
|
w->barrierId, _barrier_id);
|
||||||
|
|
||||||
DPRINTF(GPUSync, "wf->barrier_cnt %d, bcnt = %d\n",
|
DPRINTF(GPUSync, "wf->barrier_cnt %d, bcnt = %d\n",
|
||||||
w->barrier_cnt, bcnt);
|
w->barrierCnt, bcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (w->status == Wavefront::S_RUNNING &&
|
if (w->status == Wavefront::S_RUNNING &&
|
||||||
w->barrier_id == _barrier_id && w->barrier_cnt == bcnt &&
|
w->barrierId == _barrier_id && w->barrierCnt == bcnt &&
|
||||||
!w->outstanding_reqs) {
|
!w->outstandingReqs) {
|
||||||
++ccnt;
|
++ccnt;
|
||||||
|
|
||||||
DPRINTF(GPUSync, "WF[%d][%d] at barrier, increment ccnt to "
|
DPRINTF(GPUSync, "WF[%d][%d] at barrier, increment ccnt to "
|
||||||
|
@ -646,17 +646,17 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
|
||||||
if (w->status == Wavefront::S_RETURNING) {
|
if (w->status == Wavefront::S_RETURNING) {
|
||||||
DPRINTF(GPUDisp, "CU%d: WF[%d][%d][wv=%d]: WG id completed %d\n",
|
DPRINTF(GPUDisp, "CU%d: WF[%d][%d][wv=%d]: WG id completed %d\n",
|
||||||
computeUnit->cu_id, w->simdId, w->wfSlotId,
|
computeUnit->cu_id, w->simdId, w->wfSlotId,
|
||||||
w->wfDynId, w->kern_id);
|
w->wfDynId, w->kernId);
|
||||||
|
|
||||||
computeUnit->shader->dispatcher->notifyWgCompl(w);
|
computeUnit->shader->dispatcher->notifyWgCompl(w);
|
||||||
w->status = Wavefront::S_STOPPED;
|
w->status = Wavefront::S_STOPPED;
|
||||||
} else {
|
} else {
|
||||||
w->outstanding_reqs--;
|
w->outstandingReqs--;
|
||||||
}
|
}
|
||||||
|
|
||||||
DPRINTF(GPUSync, "CU%d: WF[%d][%d]: barrier_cnt = %d\n",
|
DPRINTF(GPUSync, "CU%d: WF[%d][%d]: barrier_cnt = %d\n",
|
||||||
computeUnit->cu_id, gpuDynInst->simdId,
|
computeUnit->cu_id, gpuDynInst->simdId,
|
||||||
gpuDynInst->wfSlotId, w->barrier_cnt);
|
gpuDynInst->wfSlotId, w->barrierCnt);
|
||||||
|
|
||||||
if (gpuDynInst->useContinuation) {
|
if (gpuDynInst->useContinuation) {
|
||||||
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
|
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
|
||||||
|
|
|
@ -305,7 +305,7 @@ GpuDispatcher::exec()
|
||||||
void
|
void
|
||||||
GpuDispatcher::notifyWgCompl(Wavefront *w)
|
GpuDispatcher::notifyWgCompl(Wavefront *w)
|
||||||
{
|
{
|
||||||
int kern_id = w->kern_id;
|
int kern_id = w->kernId;
|
||||||
DPRINTF(GPUDisp, "notify WgCompl %d\n",kern_id);
|
DPRINTF(GPUDisp, "notify WgCompl %d\n",kern_id);
|
||||||
assert(ndRangeMap[kern_id].dispatchId == kern_id);
|
assert(ndRangeMap[kern_id].dispatchId == kern_id);
|
||||||
ndRangeMap[kern_id].numWgCompleted++;
|
ndRangeMap[kern_id].numWgCompleted++;
|
||||||
|
|
|
@ -115,7 +115,7 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
|
||||||
{
|
{
|
||||||
// calculate the virtual address to fetch from the SQC
|
// calculate the virtual address to fetch from the SQC
|
||||||
Addr vaddr = wavefront->pc() + wavefront->instructionBuffer.size();
|
Addr vaddr = wavefront->pc() + wavefront->instructionBuffer.size();
|
||||||
vaddr = wavefront->base_ptr + vaddr * sizeof(GPUStaticInst*);
|
vaddr = wavefront->basePtr + vaddr * sizeof(GPUStaticInst*);
|
||||||
|
|
||||||
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
|
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
|
||||||
computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
|
computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
|
||||||
|
|
|
@ -212,16 +212,16 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decrement outstanding register count
|
// Decrement outstanding register count
|
||||||
computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1);
|
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
|
||||||
|
|
||||||
if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
|
if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
|
||||||
MO_H(m->m_op)) {
|
MO_H(m->m_op)) {
|
||||||
computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_gm, m->time,
|
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time,
|
||||||
-1);
|
-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
|
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
|
||||||
computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_gm, m->time,
|
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time,
|
||||||
-1);
|
-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -170,16 +170,16 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decrement outstanding request count
|
// Decrement outstanding request count
|
||||||
computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1);
|
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
|
||||||
|
|
||||||
if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op)
|
if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op)
|
||||||
|| MO_H(m->m_op)) {
|
|| MO_H(m->m_op)) {
|
||||||
computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_lm,
|
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
|
||||||
m->time, -1);
|
m->time, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
|
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
|
||||||
computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_lm,
|
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
|
||||||
m->time, -1);
|
m->time, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,43 +52,43 @@ WavefrontParams::create()
|
||||||
Wavefront::Wavefront(const Params *p)
|
Wavefront::Wavefront(const Params *p)
|
||||||
: SimObject(p), callArgMem(nullptr)
|
: SimObject(p), callArgMem(nullptr)
|
||||||
{
|
{
|
||||||
last_trace = 0;
|
lastTrace = 0;
|
||||||
simdId = p->simdId;
|
simdId = p->simdId;
|
||||||
wfSlotId = p->wf_slot_id;
|
wfSlotId = p->wf_slot_id;
|
||||||
status = S_STOPPED;
|
status = S_STOPPED;
|
||||||
reservedVectorRegs = 0;
|
reservedVectorRegs = 0;
|
||||||
startVgprIndex = 0;
|
startVgprIndex = 0;
|
||||||
outstanding_reqs = 0;
|
outstandingReqs = 0;
|
||||||
mem_reqs_in_pipe = 0;
|
memReqsInPipe = 0;
|
||||||
outstanding_reqs_wr_gm = 0;
|
outstandingReqsWrGm = 0;
|
||||||
outstanding_reqs_wr_lm = 0;
|
outstandingReqsWrLm = 0;
|
||||||
outstanding_reqs_rd_gm = 0;
|
outstandingReqsRdGm = 0;
|
||||||
outstanding_reqs_rd_lm = 0;
|
outstandingReqsRdLm = 0;
|
||||||
rd_lm_reqs_in_pipe = 0;
|
rdLmReqsInPipe = 0;
|
||||||
rd_gm_reqs_in_pipe = 0;
|
rdGmReqsInPipe = 0;
|
||||||
wr_lm_reqs_in_pipe = 0;
|
wrLmReqsInPipe = 0;
|
||||||
wr_gm_reqs_in_pipe = 0;
|
wrGmReqsInPipe = 0;
|
||||||
|
|
||||||
barrier_cnt = 0;
|
barrierCnt = 0;
|
||||||
old_barrier_cnt = 0;
|
oldBarrierCnt = 0;
|
||||||
stalledAtBarrier = false;
|
stalledAtBarrier = false;
|
||||||
|
|
||||||
mem_trace_busy = 0;
|
memTraceBusy = 0;
|
||||||
old_vgpr_tcnt = 0xffffffffffffffffll;
|
oldVgprTcnt = 0xffffffffffffffffll;
|
||||||
old_dgpr_tcnt = 0xffffffffffffffffll;
|
oldDgprTcnt = 0xffffffffffffffffll;
|
||||||
old_vgpr.resize(p->wfSize);
|
oldVgpr.resize(p->wfSize);
|
||||||
|
|
||||||
pendingFetch = false;
|
pendingFetch = false;
|
||||||
dropFetch = false;
|
dropFetch = false;
|
||||||
condRegState = new ConditionRegisterState();
|
condRegState = new ConditionRegisterState();
|
||||||
maxSpVgprs = 0;
|
maxSpVgprs = 0;
|
||||||
maxDpVgprs = 0;
|
maxDpVgprs = 0;
|
||||||
last_addr.resize(p->wfSize);
|
lastAddr.resize(p->wfSize);
|
||||||
workitemFlatId.resize(p->wfSize);
|
workItemFlatId.resize(p->wfSize);
|
||||||
old_dgpr.resize(p->wfSize);
|
oldDgpr.resize(p->wfSize);
|
||||||
bar_cnt.resize(p->wfSize);
|
barCnt.resize(p->wfSize);
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
workitemid[i].resize(p->wfSize);
|
workItemId[i].resize(p->wfSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ void
|
||||||
Wavefront::start(uint64_t _wfDynId,uint64_t _base_ptr)
|
Wavefront::start(uint64_t _wfDynId,uint64_t _base_ptr)
|
||||||
{
|
{
|
||||||
wfDynId = _wfDynId;
|
wfDynId = _wfDynId;
|
||||||
base_ptr = _base_ptr;
|
basePtr = _base_ptr;
|
||||||
status = S_RUNNING;
|
status = S_RUNNING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -333,12 +333,12 @@ Wavefront::ready(itype_e type)
|
||||||
|
|
||||||
// Is the wave waiting at a barrier
|
// Is the wave waiting at a barrier
|
||||||
if (stalledAtBarrier) {
|
if (stalledAtBarrier) {
|
||||||
if (!computeUnit->AllAtBarrier(barrier_id,barrier_cnt,
|
if (!computeUnit->AllAtBarrier(barrierId,barrierCnt,
|
||||||
computeUnit->getRefCounter(dispatchid, wg_id))) {
|
computeUnit->getRefCounter(dispatchId, wgId))) {
|
||||||
// Are all threads at barrier?
|
// Are all threads at barrier?
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
old_barrier_cnt = barrier_cnt;
|
oldBarrierCnt = barrierCnt;
|
||||||
stalledAtBarrier = false;
|
stalledAtBarrier = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -395,7 +395,7 @@ Wavefront::ready(itype_e type)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Are there in pipe or outstanding memory requests?
|
// Are there in pipe or outstanding memory requests?
|
||||||
if ((outstanding_reqs + mem_reqs_in_pipe) > 0) {
|
if ((outstandingReqs + memReqsInPipe) > 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,7 +416,7 @@ Wavefront::ready(itype_e type)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Are there in pipe or outstanding memory requests?
|
// Are there in pipe or outstanding memory requests?
|
||||||
if ((outstanding_reqs + mem_reqs_in_pipe) > 0) {
|
if ((outstandingReqs + memReqsInPipe) > 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -444,7 +444,7 @@ Wavefront::ready(itype_e type)
|
||||||
// Here Global memory instruction
|
// Here Global memory instruction
|
||||||
if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
|
if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
|
||||||
// Are there in pipe or outstanding global memory write requests?
|
// Are there in pipe or outstanding global memory write requests?
|
||||||
if ((outstanding_reqs_wr_gm + wr_gm_reqs_in_pipe) > 0) {
|
if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -452,7 +452,7 @@ Wavefront::ready(itype_e type)
|
||||||
if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
|
if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
|
||||||
IS_OT_HIST_GM(ii->opType())) {
|
IS_OT_HIST_GM(ii->opType())) {
|
||||||
// Are there in pipe or outstanding global memory read requests?
|
// Are there in pipe or outstanding global memory read requests?
|
||||||
if ((outstanding_reqs_rd_gm + rd_gm_reqs_in_pipe) > 0)
|
if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -467,7 +467,7 @@ Wavefront::ready(itype_e type)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!computeUnit->globalMemoryPipe.
|
if (!computeUnit->globalMemoryPipe.
|
||||||
isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) {
|
isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
|
||||||
// Can we insert a new request to the Global Mem Request FIFO?
|
// Can we insert a new request to the Global Mem Request FIFO?
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -484,14 +484,14 @@ Wavefront::ready(itype_e type)
|
||||||
IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
|
IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
|
||||||
// Here for Shared memory instruction
|
// Here for Shared memory instruction
|
||||||
if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
|
if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
|
||||||
if ((outstanding_reqs_wr_lm + wr_lm_reqs_in_pipe) > 0) {
|
if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
|
if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
|
||||||
IS_OT_HIST_LM(ii->opType())) {
|
IS_OT_HIST_LM(ii->opType())) {
|
||||||
if ((outstanding_reqs_rd_lm + rd_lm_reqs_in_pipe) > 0) {
|
if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -506,7 +506,7 @@ Wavefront::ready(itype_e type)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!computeUnit->localMemoryPipe.
|
if (!computeUnit->localMemoryPipe.
|
||||||
isLMReqFIFOWrRdy(rd_lm_reqs_in_pipe + wr_lm_reqs_in_pipe)) {
|
isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) {
|
||||||
// Can we insert a new request to the LDS Request FIFO?
|
// Can we insert a new request to the LDS Request FIFO?
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -523,14 +523,14 @@ Wavefront::ready(itype_e type)
|
||||||
IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
|
IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
|
||||||
// Here for Private memory instruction ------------------------ //
|
// Here for Private memory instruction ------------------------ //
|
||||||
if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
|
if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
|
||||||
if ((outstanding_reqs_wr_gm + wr_gm_reqs_in_pipe) > 0) {
|
if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
|
if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
|
||||||
IS_OT_HIST_PM(ii->opType())) {
|
IS_OT_HIST_PM(ii->opType())) {
|
||||||
if ((outstanding_reqs_rd_gm + rd_gm_reqs_in_pipe) > 0) {
|
if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -546,7 +546,7 @@ Wavefront::ready(itype_e type)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!computeUnit->globalMemoryPipe.
|
if (!computeUnit->globalMemoryPipe.
|
||||||
isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) {
|
isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
|
||||||
// Can we insert a new request to the Global Mem Request FIFO?
|
// Can we insert a new request to the Global Mem Request FIFO?
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -579,13 +579,13 @@ Wavefront::ready(itype_e type)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (!computeUnit->globalMemoryPipe.
|
if (!computeUnit->globalMemoryPipe.
|
||||||
isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) {
|
isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
|
||||||
// Can we insert a new request to the Global Mem Request FIFO?
|
// Can we insert a new request to the Global Mem Request FIFO?
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!computeUnit->localMemoryPipe.
|
if (!computeUnit->localMemoryPipe.
|
||||||
isLMReqFIFOWrRdy(rd_lm_reqs_in_pipe + wr_lm_reqs_in_pipe)) {
|
isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) {
|
||||||
// Can we insert a new request to the LDS Request FIFO?
|
// Can we insert a new request to the LDS Request FIFO?
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -636,8 +636,8 @@ Wavefront::updateResources()
|
||||||
ticks(computeUnit->issuePeriod));
|
ticks(computeUnit->issuePeriod));
|
||||||
} else if (ii->opType() == Enums::OT_FLAT_READ) {
|
} else if (ii->opType() == Enums::OT_FLAT_READ) {
|
||||||
assert(Enums::SC_NONE != ii->executedAs());
|
assert(Enums::SC_NONE != ii->executedAs());
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
rd_gm_reqs_in_pipe++;
|
rdGmReqsInPipe++;
|
||||||
if ( Enums::SC_SHARED == ii->executedAs() ) {
|
if ( Enums::SC_SHARED == ii->executedAs() ) {
|
||||||
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
||||||
preset(computeUnit->shader->ticks(4));
|
preset(computeUnit->shader->ticks(4));
|
||||||
|
@ -651,8 +651,8 @@ Wavefront::updateResources()
|
||||||
}
|
}
|
||||||
} else if (ii->opType() == Enums::OT_FLAT_WRITE) {
|
} else if (ii->opType() == Enums::OT_FLAT_WRITE) {
|
||||||
assert(Enums::SC_NONE != ii->executedAs());
|
assert(Enums::SC_NONE != ii->executedAs());
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
wr_gm_reqs_in_pipe++;
|
wrGmReqsInPipe++;
|
||||||
if (Enums::SC_SHARED == ii->executedAs()) {
|
if (Enums::SC_SHARED == ii->executedAs()) {
|
||||||
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
||||||
preset(computeUnit->shader->ticks(8));
|
preset(computeUnit->shader->ticks(8));
|
||||||
|
@ -665,67 +665,67 @@ Wavefront::updateResources()
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
}
|
}
|
||||||
} else if (IS_OT_READ_GM(ii->opType())) {
|
} else if (IS_OT_READ_GM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
rd_gm_reqs_in_pipe++;
|
rdGmReqsInPipe++;
|
||||||
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
||||||
preset(computeUnit->shader->ticks(4));
|
preset(computeUnit->shader->ticks(4));
|
||||||
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
} else if (IS_OT_WRITE_GM(ii->opType())) {
|
} else if (IS_OT_WRITE_GM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
wr_gm_reqs_in_pipe++;
|
wrGmReqsInPipe++;
|
||||||
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
||||||
preset(computeUnit->shader->ticks(8));
|
preset(computeUnit->shader->ticks(8));
|
||||||
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
} else if (IS_OT_ATOMIC_GM(ii->opType())) {
|
} else if (IS_OT_ATOMIC_GM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
wr_gm_reqs_in_pipe++;
|
wrGmReqsInPipe++;
|
||||||
rd_gm_reqs_in_pipe++;
|
rdGmReqsInPipe++;
|
||||||
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
||||||
preset(computeUnit->shader->ticks(8));
|
preset(computeUnit->shader->ticks(8));
|
||||||
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
} else if (IS_OT_READ_LM(ii->opType())) {
|
} else if (IS_OT_READ_LM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
rd_lm_reqs_in_pipe++;
|
rdLmReqsInPipe++;
|
||||||
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
||||||
preset(computeUnit->shader->ticks(4));
|
preset(computeUnit->shader->ticks(4));
|
||||||
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
|
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
} else if (IS_OT_WRITE_LM(ii->opType())) {
|
} else if (IS_OT_WRITE_LM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
wr_lm_reqs_in_pipe++;
|
wrLmReqsInPipe++;
|
||||||
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
||||||
preset(computeUnit->shader->ticks(8));
|
preset(computeUnit->shader->ticks(8));
|
||||||
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
|
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
} else if (IS_OT_ATOMIC_LM(ii->opType())) {
|
} else if (IS_OT_ATOMIC_LM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
wr_lm_reqs_in_pipe++;
|
wrLmReqsInPipe++;
|
||||||
rd_lm_reqs_in_pipe++;
|
rdLmReqsInPipe++;
|
||||||
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
|
||||||
preset(computeUnit->shader->ticks(8));
|
preset(computeUnit->shader->ticks(8));
|
||||||
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
|
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
} else if (IS_OT_READ_PM(ii->opType())) {
|
} else if (IS_OT_READ_PM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
rd_gm_reqs_in_pipe++;
|
rdGmReqsInPipe++;
|
||||||
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
||||||
preset(computeUnit->shader->ticks(4));
|
preset(computeUnit->shader->ticks(4));
|
||||||
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
} else if (IS_OT_WRITE_PM(ii->opType())) {
|
} else if (IS_OT_WRITE_PM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
wr_gm_reqs_in_pipe++;
|
wrGmReqsInPipe++;
|
||||||
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
||||||
preset(computeUnit->shader->ticks(8));
|
preset(computeUnit->shader->ticks(8));
|
||||||
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
||||||
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
|
||||||
} else if (IS_OT_ATOMIC_PM(ii->opType())) {
|
} else if (IS_OT_ATOMIC_PM(ii->opType())) {
|
||||||
mem_reqs_in_pipe++;
|
memReqsInPipe++;
|
||||||
wr_gm_reqs_in_pipe++;
|
wrGmReqsInPipe++;
|
||||||
rd_gm_reqs_in_pipe++;
|
rdGmReqsInPipe++;
|
||||||
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
|
||||||
preset(computeUnit->shader->ticks(8));
|
preset(computeUnit->shader->ticks(8));
|
||||||
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
|
||||||
|
@ -865,7 +865,7 @@ Wavefront::exec()
|
||||||
bool
|
bool
|
||||||
Wavefront::waitingAtBarrier(int lane)
|
Wavefront::waitingAtBarrier(int lane)
|
||||||
{
|
{
|
||||||
return bar_cnt[lane] < max_bar_cnt;
|
return barCnt[lane] < maxBarCnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -155,16 +155,16 @@ class Wavefront : public SimObject
|
||||||
enum status_e {S_STOPPED,S_RETURNING,S_RUNNING};
|
enum status_e {S_STOPPED,S_RETURNING,S_RUNNING};
|
||||||
|
|
||||||
// Base pointer for array of instruction pointers
|
// Base pointer for array of instruction pointers
|
||||||
uint64_t base_ptr;
|
uint64_t basePtr;
|
||||||
|
|
||||||
uint32_t old_barrier_cnt;
|
uint32_t oldBarrierCnt;
|
||||||
uint32_t barrier_cnt;
|
uint32_t barrierCnt;
|
||||||
uint32_t barrier_id;
|
uint32_t barrierId;
|
||||||
uint32_t barrier_slots;
|
uint32_t barrierSlots;
|
||||||
status_e status;
|
status_e status;
|
||||||
// HW slot id where the WF is mapped to inside a SIMD unit
|
// HW slot id where the WF is mapped to inside a SIMD unit
|
||||||
int wfSlotId;
|
int wfSlotId;
|
||||||
int kern_id;
|
int kernId;
|
||||||
// SIMD unit where the WV has been scheduled
|
// SIMD unit where the WV has been scheduled
|
||||||
int simdId;
|
int simdId;
|
||||||
// pointer to parent CU
|
// pointer to parent CU
|
||||||
|
@ -193,37 +193,37 @@ class Wavefront : public SimObject
|
||||||
bool isOldestInstALU();
|
bool isOldestInstALU();
|
||||||
bool isOldestInstBarrier();
|
bool isOldestInstBarrier();
|
||||||
// used for passing spill address to DDInstGPU
|
// used for passing spill address to DDInstGPU
|
||||||
std::vector<Addr> last_addr;
|
std::vector<Addr> lastAddr;
|
||||||
std::vector<uint32_t> workitemid[3];
|
std::vector<uint32_t> workItemId[3];
|
||||||
std::vector<uint32_t> workitemFlatId;
|
std::vector<uint32_t> workItemFlatId;
|
||||||
uint32_t workgroupid[3];
|
uint32_t workGroupId[3];
|
||||||
uint32_t workgroupsz[3];
|
uint32_t workGroupSz[3];
|
||||||
uint32_t gridsz[3];
|
uint32_t gridSz[3];
|
||||||
uint32_t wg_id;
|
uint32_t wgId;
|
||||||
uint32_t wg_sz;
|
uint32_t wgSz;
|
||||||
uint32_t dynwaveid;
|
uint32_t dynWaveId;
|
||||||
uint32_t maxdynwaveid;
|
uint32_t maxDynWaveId;
|
||||||
uint32_t dispatchid;
|
uint32_t dispatchId;
|
||||||
// outstanding global+local memory requests
|
// outstanding global+local memory requests
|
||||||
uint32_t outstanding_reqs;
|
uint32_t outstandingReqs;
|
||||||
// memory requests between scoreboard
|
// memory requests between scoreboard
|
||||||
// and execute stage not yet executed
|
// and execute stage not yet executed
|
||||||
uint32_t mem_reqs_in_pipe;
|
uint32_t memReqsInPipe;
|
||||||
// outstanding global memory write requests
|
// outstanding global memory write requests
|
||||||
uint32_t outstanding_reqs_wr_gm;
|
uint32_t outstandingReqsWrGm;
|
||||||
// outstanding local memory write requests
|
// outstanding local memory write requests
|
||||||
uint32_t outstanding_reqs_wr_lm;
|
uint32_t outstandingReqsWrLm;
|
||||||
// outstanding global memory read requests
|
// outstanding global memory read requests
|
||||||
uint32_t outstanding_reqs_rd_gm;
|
uint32_t outstandingReqsRdGm;
|
||||||
// outstanding local memory read requests
|
// outstanding local memory read requests
|
||||||
uint32_t outstanding_reqs_rd_lm;
|
uint32_t outstandingReqsRdLm;
|
||||||
uint32_t rd_lm_reqs_in_pipe;
|
uint32_t rdLmReqsInPipe;
|
||||||
uint32_t rd_gm_reqs_in_pipe;
|
uint32_t rdGmReqsInPipe;
|
||||||
uint32_t wr_lm_reqs_in_pipe;
|
uint32_t wrLmReqsInPipe;
|
||||||
uint32_t wr_gm_reqs_in_pipe;
|
uint32_t wrGmReqsInPipe;
|
||||||
|
|
||||||
int mem_trace_busy;
|
int memTraceBusy;
|
||||||
uint64_t last_trace;
|
uint64_t lastTrace;
|
||||||
// number of vector registers reserved by WF
|
// number of vector registers reserved by WF
|
||||||
int reservedVectorRegs;
|
int reservedVectorRegs;
|
||||||
// Index into the Vector Register File's namespace where the WF's registers
|
// Index into the Vector Register File's namespace where the WF's registers
|
||||||
|
@ -231,25 +231,25 @@ class Wavefront : public SimObject
|
||||||
uint32_t startVgprIndex;
|
uint32_t startVgprIndex;
|
||||||
|
|
||||||
// Old value of destination gpr (for trace)
|
// Old value of destination gpr (for trace)
|
||||||
std::vector<uint32_t> old_vgpr;
|
std::vector<uint32_t> oldVgpr;
|
||||||
// Id of destination gpr (for trace)
|
// Id of destination gpr (for trace)
|
||||||
uint32_t old_vgpr_id;
|
uint32_t oldVgprId;
|
||||||
// Tick count of last old_vgpr copy
|
// Tick count of last old_vgpr copy
|
||||||
uint64_t old_vgpr_tcnt;
|
uint64_t oldVgprTcnt;
|
||||||
|
|
||||||
// Old value of destination gpr (for trace)
|
// Old value of destination gpr (for trace)
|
||||||
std::vector<uint64_t> old_dgpr;
|
std::vector<uint64_t> oldDgpr;
|
||||||
// Id of destination gpr (for trace)
|
// Id of destination gpr (for trace)
|
||||||
uint32_t old_dgpr_id;
|
uint32_t oldDgprId;
|
||||||
// Tick count of last old_vgpr copy
|
// Tick count of last old_vgpr copy
|
||||||
uint64_t old_dgpr_tcnt;
|
uint64_t oldDgprTcnt;
|
||||||
|
|
||||||
// Execution mask at wavefront start
|
// Execution mask at wavefront start
|
||||||
VectorMask init_mask;
|
VectorMask initMask;
|
||||||
|
|
||||||
// number of barriers this WF has joined
|
// number of barriers this WF has joined
|
||||||
std::vector<int> bar_cnt;
|
std::vector<int> barCnt;
|
||||||
int max_bar_cnt;
|
int maxBarCnt;
|
||||||
// Flag to stall a wave on barrier
|
// Flag to stall a wave on barrier
|
||||||
bool stalledAtBarrier;
|
bool stalledAtBarrier;
|
||||||
|
|
||||||
|
@ -333,7 +333,7 @@ class Wavefront : public SimObject
|
||||||
int ready(itype_e type);
|
int ready(itype_e type);
|
||||||
bool instructionBufferHasBranch();
|
bool instructionBufferHasBranch();
|
||||||
void regStats();
|
void regStats();
|
||||||
VectorMask get_pred() { return execMask() & init_mask; }
|
VectorMask getPred() { return execMask() & initMask; }
|
||||||
|
|
||||||
bool waitingAtBarrier(int lane);
|
bool waitingAtBarrier(int lane);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue