From 589e13a23b3969c1137d2170a8638356d0c0fc65 Mon Sep 17 00:00:00 2001 From: Alexandru Dutu Date: Fri, 16 Sep 2016 12:26:52 -0400 Subject: [PATCH] gpu-compute: Wavefront refactoring Renaming members of the Wavefront class in accordance with the style guide. --- src/arch/hsail/gen.py | 30 ++--- src/arch/hsail/insts/decl.hh | 10 +- src/arch/hsail/insts/main.cc | 14 +-- src/arch/hsail/insts/mem_impl.hh | 78 ++++++------- src/arch/hsail/insts/pseudo_inst.cc | 90 +++++++-------- src/gpu-compute/compute_unit.cc | 72 ++++++------ src/gpu-compute/dispatcher.cc | 2 +- src/gpu-compute/fetch_unit.cc | 2 +- src/gpu-compute/global_memory_pipeline.cc | 6 +- src/gpu-compute/local_memory_pipeline.cc | 6 +- src/gpu-compute/wavefront.cc | 130 +++++++++++----------- src/gpu-compute/wavefront.hh | 78 ++++++------- 12 files changed, 259 insertions(+), 259 deletions(-) diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py index f77680541..22832658f 100755 --- a/src/arch/hsail/gen.py +++ b/src/arch/hsail/gen.py @@ -233,7 +233,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) typedef Base::DestCType DestCType; - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { @@ -254,7 +254,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) typedef Base::DestCType DestCType; typedef Base::SrcCType SrcCType; - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { @@ -275,7 +275,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { @@ -310,7 +310,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) typedef typename Base::Src1CType Src1T; typedef typename Base::Src2CType Src2T; - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { @@ -344,7 +344,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) typedef CType Src0T; typedef typename Base::Src1CType Src1T; - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { @@ -371,7 +371,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; @@ -399,7 +399,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { @@ -745,17 +745,17 @@ def gen_special(brig_opcode, expr, dest_type='U32'): gen(brig_opcode, None, expr, base_class) -gen_special('WorkItemId', 'w->workitemid[src0][lane]') +gen_special('WorkItemId', 'w->workItemId[src0][lane]') gen_special('WorkItemAbsId', - 'w->workitemid[src0][lane] + (w->workgroupid[src0] * w->workgroupsz[src0])') -gen_special('WorkGroupId', 'w->workgroupid[src0]') -gen_special('WorkGroupSize', 'w->workgroupsz[src0]') -gen_special('CurrentWorkGroupSize', 'w->workgroupsz[src0]') -gen_special('GridSize', 'w->gridsz[src0]') + 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') +gen_special('WorkGroupId', 'w->workGroupId[src0]') +gen_special('WorkGroupSize', 'w->workGroupSz[src0]') +gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') +gen_special('GridSize', 'w->gridSz[src0]') gen_special('GridGroups', - 'divCeil(w->gridsz[src0],w->workgroupsz[src0])') + 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') gen_special('LaneId', 'lane') -gen_special('WaveId', 'w->dynwaveid') +gen_special('WaveId', 'w->dynWaveId') gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') # gen_special('CU'', ') diff --git a/src/arch/hsail/insts/decl.hh b/src/arch/hsail/insts/decl.hh index 90609c365..48e022ff7 100644 --- a/src/arch/hsail/insts/decl.hh +++ b/src/arch/hsail/insts/decl.hh @@ -960,7 +960,7 @@ namespace HsailISA gpuDynInst->simdId = w->simdId; gpuDynInst->wfSlotId = w->wfSlotId; gpuDynInst->wfDynId = w->wfDynId; - gpuDynInst->kern_id = w->kern_id; + gpuDynInst->kern_id = w->kernId; gpuDynInst->cu_id = w->computeUnit->cu_id; gpuDynInst->memoryOrder = @@ -971,10 +971,10 @@ namespace HsailISA GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe); gmp->getGMReqFIFO().push(gpuDynInst); - w->wr_gm_reqs_in_pipe--; - w->rd_gm_reqs_in_pipe--; - w->mem_reqs_in_pipe--; - w->outstanding_reqs++; + w->wrGmReqsInPipe--; + w->rdGmReqsInPipe--; + w->memReqsInPipe--; + w->outstandingReqs++; } else if (o_type == Enums::OT_SHARED_MEMFENCE) { // no-op } else { diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc index 004054524..f1662430a 100644 --- a/src/arch/hsail/insts/main.cc +++ b/src/arch/hsail/insts/main.cc @@ -131,12 +131,12 @@ namespace HsailISA { Wavefront *w = gpuDynInst->wavefront(); - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); // mask off completed work-items for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { - w->init_mask[lane] = 0; + w->initMask[lane] = 0; } } @@ -149,14 +149,14 @@ namespace HsailISA } // if all work-items have completed, then wave-front is done - if (w->init_mask.none()) { + if (w->initMask.none()) { w->status = Wavefront::S_STOPPED; int32_t refCount = w->computeUnit->getLds(). - decreaseRefCounter(w->dispatchid, w->wg_id); + decreaseRefCounter(w->dispatchId, w->wgId); DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", - w->computeUnit->cu_id, w->wg_id, refCount); + w->computeUnit->cu_id, w->wgId, refCount); // free the vector registers of the completed wavefront w->computeUnit->vectorRegsReserved[w->simdId] -= @@ -201,8 +201,8 @@ namespace HsailISA { Wavefront *w = gpuDynInst->wavefront(); - assert(w->barrier_cnt == w->old_barrier_cnt); - w->barrier_cnt = w->old_barrier_cnt + 1; + assert(w->barrierCnt == w->oldBarrierCnt); + w->barrierCnt = w->oldBarrierCnt + 1; w->stalledAtBarrier = true; } } // namespace HsailISA diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh index 8329c6e8a..3042e2201 100644 --- a/src/arch/hsail/insts/mem_impl.hh +++ b/src/arch/hsail/insts/mem_impl.hh @@ -59,7 +59,7 @@ namespace HsailISA Wavefront *w = gpuDynInst->wavefront(); typedef typename DestDataType::CType CType M5_VAR_USED; - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); std::vector addr_vec; addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); this->addr.calcVector(w, addr_vec); @@ -159,7 +159,7 @@ namespace HsailISA Wavefront *w = gpuDynInst->wavefront(); typedef typename MemDataType::CType MemCType; - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); // Kernarg references are handled uniquely for now (no Memory Request // is used), so special-case them up front. Someday we should @@ -230,7 +230,7 @@ namespace HsailISA m->simdId = w->simdId; m->wfSlotId = w->wfSlotId; m->wfDynId = w->wfDynId; - m->kern_id = w->kern_id; + m->kern_id = w->kernId; m->cu_id = w->computeUnit->cu_id; m->latency.init(&w->computeUnit->shader->tick_cnt); @@ -261,8 +261,8 @@ namespace HsailISA } w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_rd_gm++; - w->rd_gm_reqs_in_pipe--; + w->outstandingReqsRdGm++; + w->rdGmReqsInPipe--; break; case Brig::BRIG_SEGMENT_SPILL: @@ -281,14 +281,14 @@ namespace HsailISA m->addr[lane] = m->addr[lane] * w->spillWidth + lane * sizeof(MemCType) + w->spillBase; - w->last_addr[lane] = m->addr[lane]; + w->lastAddr[lane] = m->addr[lane]; } } } w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_rd_gm++; - w->rd_gm_reqs_in_pipe--; + w->outstandingReqsRdGm++; + w->rdGmReqsInPipe--; break; case Brig::BRIG_SEGMENT_GROUP: @@ -296,8 +296,8 @@ namespace HsailISA m->pipeId = LDSMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(24)); w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); - w->outstanding_reqs_rd_lm++; - w->rd_lm_reqs_in_pipe--; + w->outstandingReqsRdLm++; + w->rdLmReqsInPipe--; break; case Brig::BRIG_SEGMENT_READONLY: @@ -313,8 +313,8 @@ namespace HsailISA } w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_rd_gm++; - w->rd_gm_reqs_in_pipe--; + w->outstandingReqsRdGm++; + w->rdGmReqsInPipe--; break; case Brig::BRIG_SEGMENT_PRIVATE: @@ -332,8 +332,8 @@ namespace HsailISA } } w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_rd_gm++; - w->rd_gm_reqs_in_pipe--; + w->outstandingReqsRdGm++; + w->rdGmReqsInPipe--; break; default: @@ -341,8 +341,8 @@ namespace HsailISA m->addr[0]); } - w->outstanding_reqs++; - w->mem_reqs_in_pipe--; + w->outstandingReqs++; + w->memReqsInPipe--; } templateget_pred(); + const VectorMask &mask = w->getPred(); // arg references are handled uniquely for now (no Memory Request // is used), so special-case them up front. Someday we should @@ -419,7 +419,7 @@ namespace HsailISA m->simdId = w->simdId; m->wfSlotId = w->wfSlotId; m->wfDynId = w->wfDynId; - m->kern_id = w->kern_id; + m->kern_id = w->kernId; m->cu_id = w->computeUnit->cu_id; m->latency.init(&w->computeUnit->shader->tick_cnt); @@ -448,8 +448,8 @@ namespace HsailISA } w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_wr_gm++; - w->wr_gm_reqs_in_pipe--; + w->outstandingReqsWrGm++; + w->wrGmReqsInPipe--; break; case Brig::BRIG_SEGMENT_SPILL: @@ -469,8 +469,8 @@ namespace HsailISA } w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_wr_gm++; - w->wr_gm_reqs_in_pipe--; + w->outstandingReqsWrGm++; + w->wrGmReqsInPipe--; break; case Brig::BRIG_SEGMENT_GROUP: @@ -478,8 +478,8 @@ namespace HsailISA m->pipeId = LDSMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(24)); w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); - w->outstanding_reqs_wr_lm++; - w->wr_lm_reqs_in_pipe--; + w->outstandingReqsWrLm++; + w->wrLmReqsInPipe--; break; case Brig::BRIG_SEGMENT_PRIVATE: @@ -497,16 +497,16 @@ namespace HsailISA } w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_wr_gm++; - w->wr_gm_reqs_in_pipe--; + w->outstandingReqsWrGm++; + w->wrGmReqsInPipe--; break; default: fatal("Store to unsupported segment %d\n", this->segment); } - w->outstanding_reqs++; - w->mem_reqs_in_pipe--; + w->outstandingReqs++; + w->memReqsInPipe--; } templatesimdId = w->simdId; m->wfSlotId = w->wfSlotId; m->wfDynId = w->wfDynId; - m->kern_id = w->kern_id; + m->kern_id = w->kernId; m->cu_id = w->computeUnit->cu_id; m->latency.init(&w->computeUnit->shader->tick_cnt); @@ -607,10 +607,10 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_wr_gm++; - w->wr_gm_reqs_in_pipe--; - w->outstanding_reqs_rd_gm++; - w->rd_gm_reqs_in_pipe--; + w->outstandingReqsWrGm++; + w->wrGmReqsInPipe--; + w->outstandingReqsRdGm++; + w->rdGmReqsInPipe--; break; case Brig::BRIG_SEGMENT_GROUP: @@ -618,10 +618,10 @@ namespace HsailISA m->pipeId = LDSMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(24)); w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); - w->outstanding_reqs_wr_lm++; - w->wr_lm_reqs_in_pipe--; - w->outstanding_reqs_rd_lm++; - w->rd_lm_reqs_in_pipe--; + w->outstandingReqsWrLm++; + w->wrLmReqsInPipe--; + w->outstandingReqsRdLm++; + w->rdLmReqsInPipe--; break; default: @@ -629,8 +629,8 @@ namespace HsailISA this->segment); } - w->outstanding_reqs++; - w->mem_reqs_in_pipe--; + w->outstandingReqs++; + w->memReqsInPipe--; } const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc index 56ca8047c..2bfc5aaad 100644 --- a/src/arch/hsail/insts/pseudo_inst.cc +++ b/src/arch/hsail/insts/pseudo_inst.cc @@ -79,7 +79,7 @@ namespace HsailISA void Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int op = 0; bool got_op = false; @@ -181,7 +181,7 @@ namespace HsailISA Call::MagicPrintLane(Wavefront *w) { #if TRACING_ON - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get(w, lane, 1); @@ -204,7 +204,7 @@ namespace HsailISA Call::MagicPrintLane64(Wavefront *w) { #if TRACING_ON - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int64_t src_val1 = src1.get(w, lane, 1); @@ -227,7 +227,7 @@ namespace HsailISA Call::MagicPrintWF32(Wavefront *w) { #if TRACING_ON - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); @@ -265,7 +265,7 @@ namespace HsailISA Call::MagicPrintWF32ID(Wavefront *w) { #if TRACING_ON - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); std::string res_str; int src_val3 = -1; res_str = csprintf("krl_prt (%s)\n", disassemble()); @@ -307,7 +307,7 @@ namespace HsailISA Call::MagicPrintWF64(Wavefront *w) { #if TRACING_ON - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); @@ -345,7 +345,7 @@ namespace HsailISA Call::MagicPrintWFID64(Wavefront *w) { #if TRACING_ON - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); std::string res_str; int src_val3 = -1; res_str = csprintf("krl_prt (%s)\n", disassemble()); @@ -387,7 +387,7 @@ namespace HsailISA Call::MagicPrintWFFloat(Wavefront *w) { #if TRACING_ON - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); @@ -425,7 +425,7 @@ namespace HsailISA res_str = csprintf("Breakpoint encountered for wavefront %i\n", w->wfSlotId); - res_str += csprintf(" Kern ID: %i\n", w->kern_id); + res_str += csprintf(" Kern ID: %i\n", w->kernId); res_str += csprintf(" Phase ID: %i\n", w->simdId); res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id); res_str += csprintf(" Exec mask: "); @@ -455,7 +455,7 @@ namespace HsailISA void Call::MagicPrefixSum(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int res = 0; for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { @@ -474,7 +474,7 @@ namespace HsailISA // The reduction instruction takes up to 64 inputs (one from // each thread in a WF) and sums them. It returns the sum to // each thread in the WF. - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int res = 0; for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { @@ -494,7 +494,7 @@ namespace HsailISA void Call::MagicMaskLower(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int res = 0; for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { @@ -519,7 +519,7 @@ namespace HsailISA void Call::MagicMaskUpper(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int res = 0; for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { @@ -544,42 +544,42 @@ namespace HsailISA void Call::MagicJoinWFBar(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int max_cnt = 0; for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { - w->bar_cnt[lane]++; + w->barCnt[lane]++; - if (w->bar_cnt[lane] > max_cnt) { - max_cnt = w->bar_cnt[lane]; + if (w->barCnt[lane] > max_cnt) { + max_cnt = w->barCnt[lane]; } } } - if (max_cnt > w->max_bar_cnt) { - w->max_bar_cnt = max_cnt; + if (max_cnt > w->maxBarCnt) { + w->maxBarCnt = max_cnt; } } void Call::MagicWaitWFBar(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int max_cnt = 0; for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { - w->bar_cnt[lane]--; + w->barCnt[lane]--; } - if (w->bar_cnt[lane] > max_cnt) { - max_cnt = w->bar_cnt[lane]; + if (w->barCnt[lane] > max_cnt) { + max_cnt = w->barCnt[lane]; } } - if (max_cnt < w->max_bar_cnt) { - w->max_bar_cnt = max_cnt; + if (max_cnt < w->maxBarCnt) { + w->maxBarCnt = max_cnt; } w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, @@ -591,7 +591,7 @@ namespace HsailISA void Call::MagicPanic(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { @@ -648,12 +648,12 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(64)); w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_wr_gm++; - w->wr_gm_reqs_in_pipe--; - w->outstanding_reqs_rd_gm++; - w->rd_gm_reqs_in_pipe--; - w->outstanding_reqs++; - w->mem_reqs_in_pipe--; + w->outstandingReqsWrGm++; + w->wrGmReqsInPipe--; + w->outstandingReqsRdGm++; + w->rdGmReqsInPipe--; + w->outstandingReqs++; + w->memReqsInPipe--; } void @@ -687,12 +687,12 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(64)); w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_wr_gm++; - w->wr_gm_reqs_in_pipe--; - w->outstanding_reqs_rd_gm++; - w->rd_gm_reqs_in_pipe--; - w->outstanding_reqs++; - w->mem_reqs_in_pipe--; + w->outstandingReqsWrGm++; + w->wrGmReqsInPipe--; + w->outstandingReqsRdGm++; + w->rdGmReqsInPipe--; + w->outstandingReqs++; + w->memReqsInPipe--; } void @@ -725,16 +725,16 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); - w->outstanding_reqs_rd_gm++; - w->rd_gm_reqs_in_pipe--; - w->outstanding_reqs++; - w->mem_reqs_in_pipe--; + w->outstandingReqsRdGm++; + w->rdGmReqsInPipe--; + w->outstandingReqs++; + w->memReqsInPipe--; } void Call::MagicXactCasLd(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int src_val1 = 0; for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { @@ -756,7 +756,7 @@ namespace HsailISA void Call::MagicMostSigThread(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); unsigned mst = true; for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { @@ -770,7 +770,7 @@ namespace HsailISA void Call::MagicMostSigBroadcast(Wavefront *w) { - const VectorMask &mask = w->get_pred(); + const VectorMask &mask = w->getPred(); int res = 0; bool got_res = false; diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 83e2414db..32fa3bd6a 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -178,13 +178,13 @@ ComputeUnit::FillKernelState(Wavefront *w, NDRange *ndr) { w->resizeRegFiles(ndr->q.cRegCount, ndr->q.sRegCount, ndr->q.dRegCount); - w->workgroupsz[0] = ndr->q.wgSize[0]; - w->workgroupsz[1] = ndr->q.wgSize[1]; - w->workgroupsz[2] = ndr->q.wgSize[2]; - w->wg_sz = w->workgroupsz[0] * w->workgroupsz[1] * w->workgroupsz[2]; - w->gridsz[0] = ndr->q.gdSize[0]; - w->gridsz[1] = ndr->q.gdSize[1]; - w->gridsz[2] = ndr->q.gdSize[2]; + w->workGroupSz[0] = ndr->q.wgSize[0]; + w->workGroupSz[1] = ndr->q.wgSize[1]; + w->workGroupSz[2] = ndr->q.wgSize[2]; + w->wgSz = w->workGroupSz[0] * w->workGroupSz[1] * w->workGroupSz[2]; + w->gridSz[0] = ndr->q.gdSize[0]; + w->gridSz[1] = ndr->q.gdSize[1]; + w->gridSz[2] = ndr->q.gdSize[2]; w->kernelArgs = ndr->q.args; w->privSizePerItem = ndr->q.privMemPerItem; w->spillSizePerItem = ndr->q.spillMemPerItem; @@ -236,29 +236,29 @@ ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal, init_mask[k] = 1; } - w->kern_id = ndr->dispatchId; - w->dynwaveid = cnt; - w->init_mask = init_mask.to_ullong(); + w->kernId = ndr->dispatchId; + w->dynWaveId = cnt; + w->initMask = init_mask.to_ullong(); for (int k = 0; k < wfSize(); ++k) { - w->workitemid[0][k] = (k+cnt*wfSize()) % trueWgSize[0]; - w->workitemid[1][k] = + w->workItemId[0][k] = (k+cnt*wfSize()) % trueWgSize[0]; + w->workItemId[1][k] = ((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1]; - w->workitemid[2][k] = + w->workItemId[2][k] = (k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]); - w->workitemFlatId[k] = w->workitemid[2][k] * trueWgSize[0] * - trueWgSize[1] + w->workitemid[1][k] * trueWgSize[0] + - w->workitemid[0][k]; + w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] * + trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] + + w->workItemId[0][k]; } - w->barrier_slots = divCeil(trueWgSizeTotal, wfSize()); + w->barrierSlots = divCeil(trueWgSizeTotal, wfSize()); - w->bar_cnt.resize(wfSize(), 0); + w->barCnt.resize(wfSize(), 0); - w->max_bar_cnt = 0; - w->old_barrier_cnt = 0; - w->barrier_cnt = 0; + w->maxBarCnt = 0; + w->oldBarrierCnt = 0; + w->barrierCnt = 0; w->privBase = ndr->q.privMemStart; ndr->q.privMemStart += ndr->q.privMemPerItem * wfSize(); @@ -269,22 +269,22 @@ ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal, w->pushToReconvergenceStack(0, UINT32_MAX, init_mask.to_ulong()); // WG state - w->wg_id = ndr->globalWgId; - w->dispatchid = ndr->dispatchId; - w->workgroupid[0] = w->wg_id % ndr->numWg[0]; - w->workgroupid[1] = (w->wg_id / ndr->numWg[0]) % ndr->numWg[1]; - w->workgroupid[2] = w->wg_id / (ndr->numWg[0] * ndr->numWg[1]); + w->wgId = ndr->globalWgId; + w->dispatchId = ndr->dispatchId; + w->workGroupId[0] = w->wgId % ndr->numWg[0]; + w->workGroupId[1] = (w->wgId / ndr->numWg[0]) % ndr->numWg[1]; + w->workGroupId[2] = w->wgId / (ndr->numWg[0] * ndr->numWg[1]); - w->barrier_id = barrier_id; + w->barrierId = barrier_id; w->stalledAtBarrier = false; // set the wavefront context to have a pointer to this section of the LDS w->ldsChunk = ldsChunk; int32_t refCount M5_VAR_USED = - lds.increaseRefCounter(w->dispatchid, w->wg_id); + lds.increaseRefCounter(w->dispatchId, w->wgId); DPRINTF(GPUDisp, "CU%d: increase ref ctr wg[%d] to [%d]\n", - cu_id, w->wg_id, refCount); + cu_id, w->wgId, refCount); w->instructionBuffer.clear(); @@ -468,15 +468,15 @@ ComputeUnit::AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots) DPRINTF(GPUSync, "Checking WF[%d][%d]\n", i_simd, i_wf); DPRINTF(GPUSync, "wf->barrier_id = %d, _barrier_id = %d\n", - w->barrier_id, _barrier_id); + w->barrierId, _barrier_id); DPRINTF(GPUSync, "wf->barrier_cnt %d, bcnt = %d\n", - w->barrier_cnt, bcnt); + w->barrierCnt, bcnt); } if (w->status == Wavefront::S_RUNNING && - w->barrier_id == _barrier_id && w->barrier_cnt == bcnt && - !w->outstanding_reqs) { + w->barrierId == _barrier_id && w->barrierCnt == bcnt && + !w->outstandingReqs) { ++ccnt; DPRINTF(GPUSync, "WF[%d][%d] at barrier, increment ccnt to " @@ -646,17 +646,17 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt) if (w->status == Wavefront::S_RETURNING) { DPRINTF(GPUDisp, "CU%d: WF[%d][%d][wv=%d]: WG id completed %d\n", computeUnit->cu_id, w->simdId, w->wfSlotId, - w->wfDynId, w->kern_id); + w->wfDynId, w->kernId); computeUnit->shader->dispatcher->notifyWgCompl(w); w->status = Wavefront::S_STOPPED; } else { - w->outstanding_reqs--; + w->outstandingReqs--; } DPRINTF(GPUSync, "CU%d: WF[%d][%d]: barrier_cnt = %d\n", computeUnit->cu_id, gpuDynInst->simdId, - gpuDynInst->wfSlotId, w->barrier_cnt); + gpuDynInst->wfSlotId, w->barrierCnt); if (gpuDynInst->useContinuation) { assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc index d1d011c0d..adc1f51bd 100644 --- a/src/gpu-compute/dispatcher.cc +++ b/src/gpu-compute/dispatcher.cc @@ -305,7 +305,7 @@ GpuDispatcher::exec() void GpuDispatcher::notifyWgCompl(Wavefront *w) { - int kern_id = w->kern_id; + int kern_id = w->kernId; DPRINTF(GPUDisp, "notify WgCompl %d\n",kern_id); assert(ndRangeMap[kern_id].dispatchId == kern_id); ndRangeMap[kern_id].numWgCompleted++; diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc index 1f0a7d78e..9104c400e 100644 --- a/src/gpu-compute/fetch_unit.cc +++ b/src/gpu-compute/fetch_unit.cc @@ -115,7 +115,7 @@ FetchUnit::initiateFetch(Wavefront *wavefront) { // calculate the virtual address to fetch from the SQC Addr vaddr = wavefront->pc() + wavefront->instructionBuffer.size(); - vaddr = wavefront->base_ptr + vaddr * sizeof(GPUStaticInst*); + vaddr = wavefront->basePtr + vaddr * sizeof(GPUStaticInst*); DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n", computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr); diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index a6a4d86db..102905ec8 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -212,16 +212,16 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m) } // Decrement outstanding register count - computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1); + computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) || MO_H(m->m_op)) { - computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_gm, m->time, + computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time, -1); } if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) { - computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_gm, m->time, + computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time, -1); } diff --git a/src/gpu-compute/local_memory_pipeline.cc b/src/gpu-compute/local_memory_pipeline.cc index a970d8f9b..e2238bf45 100644 --- a/src/gpu-compute/local_memory_pipeline.cc +++ b/src/gpu-compute/local_memory_pipeline.cc @@ -170,16 +170,16 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m) } // Decrement outstanding request count - computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1); + computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) || MO_H(m->m_op)) { - computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_lm, + computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm, m->time, -1); } if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) { - computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_lm, + computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm, m->time, -1); } diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index a20330082..c73307ac4 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -52,43 +52,43 @@ WavefrontParams::create() Wavefront::Wavefront(const Params *p) : SimObject(p), callArgMem(nullptr) { - last_trace = 0; + lastTrace = 0; simdId = p->simdId; wfSlotId = p->wf_slot_id; status = S_STOPPED; reservedVectorRegs = 0; startVgprIndex = 0; - outstanding_reqs = 0; - mem_reqs_in_pipe = 0; - outstanding_reqs_wr_gm = 0; - outstanding_reqs_wr_lm = 0; - outstanding_reqs_rd_gm = 0; - outstanding_reqs_rd_lm = 0; - rd_lm_reqs_in_pipe = 0; - rd_gm_reqs_in_pipe = 0; - wr_lm_reqs_in_pipe = 0; - wr_gm_reqs_in_pipe = 0; + outstandingReqs = 0; + memReqsInPipe = 0; + outstandingReqsWrGm = 0; + outstandingReqsWrLm = 0; + outstandingReqsRdGm = 0; + outstandingReqsRdLm = 0; + rdLmReqsInPipe = 0; + rdGmReqsInPipe = 0; + wrLmReqsInPipe = 0; + wrGmReqsInPipe = 0; - barrier_cnt = 0; - old_barrier_cnt = 0; + barrierCnt = 0; + oldBarrierCnt = 0; stalledAtBarrier = false; - mem_trace_busy = 0; - old_vgpr_tcnt = 0xffffffffffffffffll; - old_dgpr_tcnt = 0xffffffffffffffffll; - old_vgpr.resize(p->wfSize); + memTraceBusy = 0; + oldVgprTcnt = 0xffffffffffffffffll; + oldDgprTcnt = 0xffffffffffffffffll; + oldVgpr.resize(p->wfSize); pendingFetch = false; dropFetch = false; condRegState = new ConditionRegisterState(); maxSpVgprs = 0; maxDpVgprs = 0; - last_addr.resize(p->wfSize); - workitemFlatId.resize(p->wfSize); - old_dgpr.resize(p->wfSize); - bar_cnt.resize(p->wfSize); + lastAddr.resize(p->wfSize); + workItemFlatId.resize(p->wfSize); + oldDgpr.resize(p->wfSize); + barCnt.resize(p->wfSize); for (int i = 0; i < 3; ++i) { - workitemid[i].resize(p->wfSize); + workItemId[i].resize(p->wfSize); } } @@ -158,7 +158,7 @@ void Wavefront::start(uint64_t _wfDynId,uint64_t _base_ptr) { wfDynId = _wfDynId; - base_ptr = _base_ptr; + basePtr = _base_ptr; status = S_RUNNING; } @@ -333,12 +333,12 @@ Wavefront::ready(itype_e type) // Is the wave waiting at a barrier if (stalledAtBarrier) { - if (!computeUnit->AllAtBarrier(barrier_id,barrier_cnt, - computeUnit->getRefCounter(dispatchid, wg_id))) { + if (!computeUnit->AllAtBarrier(barrierId,barrierCnt, + computeUnit->getRefCounter(dispatchId, wgId))) { // Are all threads at barrier? return 0; } - old_barrier_cnt = barrier_cnt; + oldBarrierCnt = barrierCnt; stalledAtBarrier = false; } @@ -395,7 +395,7 @@ Wavefront::ready(itype_e type) } // Are there in pipe or outstanding memory requests? - if ((outstanding_reqs + mem_reqs_in_pipe) > 0) { + if ((outstandingReqs + memReqsInPipe) > 0) { return 0; } @@ -416,7 +416,7 @@ Wavefront::ready(itype_e type) } // Are there in pipe or outstanding memory requests? - if ((outstanding_reqs + mem_reqs_in_pipe) > 0) { + if ((outstandingReqs + memReqsInPipe) > 0) { return 0; } @@ -444,7 +444,7 @@ Wavefront::ready(itype_e type) // Here Global memory instruction if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) { // Are there in pipe or outstanding global memory write requests? - if ((outstanding_reqs_wr_gm + wr_gm_reqs_in_pipe) > 0) { + if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) { return 0; } } @@ -452,7 +452,7 @@ Wavefront::ready(itype_e type) if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_HIST_GM(ii->opType())) { // Are there in pipe or outstanding global memory read requests? - if ((outstanding_reqs_rd_gm + rd_gm_reqs_in_pipe) > 0) + if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) return 0; } @@ -467,7 +467,7 @@ Wavefront::ready(itype_e type) } if (!computeUnit->globalMemoryPipe. - isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) { + isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) { // Can we insert a new request to the Global Mem Request FIFO? return 0; } @@ -484,14 +484,14 @@ Wavefront::ready(itype_e type) IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) { // Here for Shared memory instruction if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) { - if ((outstanding_reqs_wr_lm + wr_lm_reqs_in_pipe) > 0) { + if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) { return 0; } } if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) || IS_OT_HIST_LM(ii->opType())) { - if ((outstanding_reqs_rd_lm + rd_lm_reqs_in_pipe) > 0) { + if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) { return 0; } } @@ -506,7 +506,7 @@ Wavefront::ready(itype_e type) } if (!computeUnit->localMemoryPipe. - isLMReqFIFOWrRdy(rd_lm_reqs_in_pipe + wr_lm_reqs_in_pipe)) { + isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) { // Can we insert a new request to the LDS Request FIFO? return 0; } @@ -523,14 +523,14 @@ Wavefront::ready(itype_e type) IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) { // Here for Private memory instruction ------------------------ // if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) { - if ((outstanding_reqs_wr_gm + wr_gm_reqs_in_pipe) > 0) { + if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) { return 0; } } if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_HIST_PM(ii->opType())) { - if ((outstanding_reqs_rd_gm + rd_gm_reqs_in_pipe) > 0) { + if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) { return 0; } } @@ -546,7 +546,7 @@ Wavefront::ready(itype_e type) } if (!computeUnit->globalMemoryPipe. - isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) { + isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) { // Can we insert a new request to the Global Mem Request FIFO? return 0; } @@ -579,13 +579,13 @@ Wavefront::ready(itype_e type) return 0; } if (!computeUnit->globalMemoryPipe. - isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) { + isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) { // Can we insert a new request to the Global Mem Request FIFO? return 0; } if (!computeUnit->localMemoryPipe. - isLMReqFIFOWrRdy(rd_lm_reqs_in_pipe + wr_lm_reqs_in_pipe)) { + isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) { // Can we insert a new request to the LDS Request FIFO? return 0; } @@ -636,8 +636,8 @@ Wavefront::updateResources() ticks(computeUnit->issuePeriod)); } else if (ii->opType() == Enums::OT_FLAT_READ) { assert(Enums::SC_NONE != ii->executedAs()); - mem_reqs_in_pipe++; - rd_gm_reqs_in_pipe++; + memReqsInPipe++; + rdGmReqsInPipe++; if ( Enums::SC_SHARED == ii->executedAs() ) { computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. preset(computeUnit->shader->ticks(4)); @@ -651,8 +651,8 @@ Wavefront::updateResources() } } else if (ii->opType() == Enums::OT_FLAT_WRITE) { assert(Enums::SC_NONE != ii->executedAs()); - mem_reqs_in_pipe++; - wr_gm_reqs_in_pipe++; + memReqsInPipe++; + wrGmReqsInPipe++; if (Enums::SC_SHARED == ii->executedAs()) { computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. preset(computeUnit->shader->ticks(8)); @@ -665,67 +665,67 @@ Wavefront::updateResources() preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } } else if (IS_OT_READ_GM(ii->opType())) { - mem_reqs_in_pipe++; - rd_gm_reqs_in_pipe++; + memReqsInPipe++; + rdGmReqsInPipe++; computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. preset(computeUnit->shader->ticks(4)); computeUnit->wfWait[computeUnit->GlbMemUnitId()]. preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } else if (IS_OT_WRITE_GM(ii->opType())) { - mem_reqs_in_pipe++; - wr_gm_reqs_in_pipe++; + memReqsInPipe++; + wrGmReqsInPipe++; computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. preset(computeUnit->shader->ticks(8)); computeUnit->wfWait[computeUnit->GlbMemUnitId()]. preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } else if (IS_OT_ATOMIC_GM(ii->opType())) { - mem_reqs_in_pipe++; - wr_gm_reqs_in_pipe++; - rd_gm_reqs_in_pipe++; + memReqsInPipe++; + wrGmReqsInPipe++; + rdGmReqsInPipe++; computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. preset(computeUnit->shader->ticks(8)); computeUnit->wfWait[computeUnit->GlbMemUnitId()]. preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } else if (IS_OT_READ_LM(ii->opType())) { - mem_reqs_in_pipe++; - rd_lm_reqs_in_pipe++; + memReqsInPipe++; + rdLmReqsInPipe++; computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. preset(computeUnit->shader->ticks(4)); computeUnit->wfWait[computeUnit->ShrMemUnitId()]. preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } else if (IS_OT_WRITE_LM(ii->opType())) { - mem_reqs_in_pipe++; - wr_lm_reqs_in_pipe++; + memReqsInPipe++; + wrLmReqsInPipe++; computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. preset(computeUnit->shader->ticks(8)); computeUnit->wfWait[computeUnit->ShrMemUnitId()]. preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } else if (IS_OT_ATOMIC_LM(ii->opType())) { - mem_reqs_in_pipe++; - wr_lm_reqs_in_pipe++; - rd_lm_reqs_in_pipe++; + memReqsInPipe++; + wrLmReqsInPipe++; + rdLmReqsInPipe++; computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. preset(computeUnit->shader->ticks(8)); computeUnit->wfWait[computeUnit->ShrMemUnitId()]. preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } else if (IS_OT_READ_PM(ii->opType())) { - mem_reqs_in_pipe++; - rd_gm_reqs_in_pipe++; + memReqsInPipe++; + rdGmReqsInPipe++; computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. preset(computeUnit->shader->ticks(4)); computeUnit->wfWait[computeUnit->GlbMemUnitId()]. preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } else if (IS_OT_WRITE_PM(ii->opType())) { - mem_reqs_in_pipe++; - wr_gm_reqs_in_pipe++; + memReqsInPipe++; + wrGmReqsInPipe++; computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. preset(computeUnit->shader->ticks(8)); computeUnit->wfWait[computeUnit->GlbMemUnitId()]. preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); } else if (IS_OT_ATOMIC_PM(ii->opType())) { - mem_reqs_in_pipe++; - wr_gm_reqs_in_pipe++; - rd_gm_reqs_in_pipe++; + memReqsInPipe++; + wrGmReqsInPipe++; + rdGmReqsInPipe++; computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. preset(computeUnit->shader->ticks(8)); computeUnit->wfWait[computeUnit->GlbMemUnitId()]. @@ -865,7 +865,7 @@ Wavefront::exec() bool Wavefront::waitingAtBarrier(int lane) { - return bar_cnt[lane] < max_bar_cnt; + return barCnt[lane] < maxBarCnt; } void diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh index 5a5386a3d..db2e434a9 100644 --- a/src/gpu-compute/wavefront.hh +++ b/src/gpu-compute/wavefront.hh @@ -155,16 +155,16 @@ class Wavefront : public SimObject enum status_e {S_STOPPED,S_RETURNING,S_RUNNING}; // Base pointer for array of instruction pointers - uint64_t base_ptr; + uint64_t basePtr; - uint32_t old_barrier_cnt; - uint32_t barrier_cnt; - uint32_t barrier_id; - uint32_t barrier_slots; + uint32_t oldBarrierCnt; + uint32_t barrierCnt; + uint32_t barrierId; + uint32_t barrierSlots; status_e status; // HW slot id where the WF is mapped to inside a SIMD unit int wfSlotId; - int kern_id; + int kernId; // SIMD unit where the WV has been scheduled int simdId; // pointer to parent CU @@ -193,37 +193,37 @@ class Wavefront : public SimObject bool isOldestInstALU(); bool isOldestInstBarrier(); // used for passing spill address to DDInstGPU - std::vector last_addr; - std::vector workitemid[3]; - std::vector workitemFlatId; - uint32_t workgroupid[3]; - uint32_t workgroupsz[3]; - uint32_t gridsz[3]; - uint32_t wg_id; - uint32_t wg_sz; - uint32_t dynwaveid; - uint32_t maxdynwaveid; - uint32_t dispatchid; + std::vector lastAddr; + std::vector workItemId[3]; + std::vector workItemFlatId; + uint32_t workGroupId[3]; + uint32_t workGroupSz[3]; + uint32_t gridSz[3]; + uint32_t wgId; + uint32_t wgSz; + uint32_t dynWaveId; + uint32_t maxDynWaveId; + uint32_t dispatchId; // outstanding global+local memory requests - uint32_t outstanding_reqs; + uint32_t outstandingReqs; // memory requests between scoreboard // and execute stage not yet executed - uint32_t mem_reqs_in_pipe; + uint32_t memReqsInPipe; // outstanding global memory write requests - uint32_t outstanding_reqs_wr_gm; + uint32_t outstandingReqsWrGm; // outstanding local memory write requests - uint32_t outstanding_reqs_wr_lm; + uint32_t outstandingReqsWrLm; // outstanding global memory read requests - uint32_t outstanding_reqs_rd_gm; + uint32_t outstandingReqsRdGm; // outstanding local memory read requests - uint32_t outstanding_reqs_rd_lm; - uint32_t rd_lm_reqs_in_pipe; - uint32_t rd_gm_reqs_in_pipe; - uint32_t wr_lm_reqs_in_pipe; - uint32_t wr_gm_reqs_in_pipe; + uint32_t outstandingReqsRdLm; + uint32_t rdLmReqsInPipe; + uint32_t rdGmReqsInPipe; + uint32_t wrLmReqsInPipe; + uint32_t wrGmReqsInPipe; - int mem_trace_busy; - uint64_t last_trace; + int memTraceBusy; + uint64_t lastTrace; // number of vector registers reserved by WF int reservedVectorRegs; // Index into the Vector Register File's namespace where the WF's registers @@ -231,25 +231,25 @@ class Wavefront : public SimObject uint32_t startVgprIndex; // Old value of destination gpr (for trace) - std::vector old_vgpr; + std::vector oldVgpr; // Id of destination gpr (for trace) - uint32_t old_vgpr_id; + uint32_t oldVgprId; // Tick count of last old_vgpr copy - uint64_t old_vgpr_tcnt; + uint64_t oldVgprTcnt; // Old value of destination gpr (for trace) - std::vector old_dgpr; + std::vector oldDgpr; // Id of destination gpr (for trace) - uint32_t old_dgpr_id; + uint32_t oldDgprId; // Tick count of last old_vgpr copy - uint64_t old_dgpr_tcnt; + uint64_t oldDgprTcnt; // Execution mask at wavefront start - VectorMask init_mask; + VectorMask initMask; // number of barriers this WF has joined - std::vector bar_cnt; - int max_bar_cnt; + std::vector barCnt; + int maxBarCnt; // Flag to stall a wave on barrier bool stalledAtBarrier; @@ -333,7 +333,7 @@ class Wavefront : public SimObject int ready(itype_e type); bool instructionBufferHasBranch(); void regStats(); - VectorMask get_pred() { return execMask() & init_mask; } + VectorMask getPred() { return execMask() & initMask; } bool waitingAtBarrier(int lane);