From d5c8c5d3db1954ff4e33dade0812fed7b60a9269 Mon Sep 17 00:00:00 2001 From: Alexandru Dutu Date: Fri, 16 Sep 2016 12:27:56 -0400 Subject: [PATCH] gpu-compute: Adding ioctl for HW context size Adding runtime support for determining the memory required by a SIMD engine when executing a particular wavefront. --- src/gpu-compute/cl_driver.cc | 7 +++++++ src/gpu-compute/dispatcher.cc | 6 ++++++ src/gpu-compute/dispatcher.hh | 3 +++ src/gpu-compute/hsa_kernel_info.hh | 1 + src/gpu-compute/wavefront.cc | 14 ++++++++++++-- src/gpu-compute/wavefront.hh | 6 ++++++ 6 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/gpu-compute/cl_driver.cc b/src/gpu-compute/cl_driver.cc index 6bb6be102..d3950ec04 100644 --- a/src/gpu-compute/cl_driver.cc +++ b/src/gpu-compute/cl_driver.cc @@ -242,6 +242,13 @@ ClDriver::ioctl(LiveProcess *process, ThreadContext *tc, unsigned req) buf.copyOut(tc->getMemProxy()); } break; + case HSA_GET_HW_STATIC_CONTEXT_SIZE: + { + BufferArg buf(buf_addr, sizeof(uint32_t)); + *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize(); + buf.copyOut(tc->getMemProxy()); + } + break; default: fatal("ClDriver: bad ioctl %d\n", req); diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc index adc1f51bd..79eb46bbf 100644 --- a/src/gpu-compute/dispatcher.cc +++ b/src/gpu-compute/dispatcher.cc @@ -398,3 +398,9 @@ GpuDispatcher::setFuncargsSize(int funcargs_size) { shader->funcargs_size = funcargs_size; } + +uint32_t +GPUDispatcher::getStaticContextSize() const +{ + return shader->cuList[0]->wfList[0][0]->getStaticContextSize(); +} diff --git a/src/gpu-compute/dispatcher.hh b/src/gpu-compute/dispatcher.hh index e984af494..f5e89e8aa 100644 --- a/src/gpu-compute/dispatcher.hh +++ b/src/gpu-compute/dispatcher.hh @@ -159,6 +159,9 @@ class GpuDispatcher : public DmaDevice int getNumCUs(); int wfSize() const; void setFuncargsSize(int funcargs_size); + + /** Returns the size of the static hardware context of a wavefront */ + uint32_t getStaticContextSize() const; }; #endif // __GPU_DISPATCHER_HH__ diff --git a/src/gpu-compute/hsa_kernel_info.hh b/src/gpu-compute/hsa_kernel_info.hh index 396913dac..4151695eb 100644 --- a/src/gpu-compute/hsa_kernel_info.hh +++ b/src/gpu-compute/hsa_kernel_info.hh @@ -48,6 +48,7 @@ static const int HSA_GET_CODE = 0x4804; static const int HSA_GET_READONLY_DATA = 0x4805; static const int HSA_GET_CU_CNT = 0x4806; static const int HSA_GET_VSZ = 0x4807; +static const int HSA_GET_HW_STATIC_CONTEXT_SIZE = 0x4808; // Return value (via buffer ptr) for HSA_GET_SIZES struct HsaDriverSizes diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index c73307ac4..76a0bdf9e 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -155,9 +155,9 @@ Wavefront::~Wavefront() } void -Wavefront::start(uint64_t _wfDynId,uint64_t _base_ptr) +Wavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr) { - wfDynId = _wfDynId; + wfDynId = _wf_dyn_id; basePtr = _base_ptr; status = S_RUNNING; } @@ -931,3 +931,13 @@ Wavefront::pc(uint32_t new_pc) { reconvergenceStack.top()->pc = new_pc; } + +uint32_t +Wavefront::getStaticContextSize() const +{ + return barCnt.size() * sizeof(int) + sizeof(dynWaveId) + sizeof(maxBarCnt) + + sizeof(oldBarrierCnt) + sizeof(barrierCnt) + sizeof(wgId) + + sizeof(computeUnit->cu_id) + sizeof(barrierId) + sizeof(initMask) + + sizeof(privBase) + sizeof(spillBase) + sizeof(ldsChunk) + + computeUnit->wfSize() * sizeof(ReconvergenceStackEntry); +} diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh index db2e434a9..4fe66ecfe 100644 --- a/src/gpu-compute/wavefront.hh +++ b/src/gpu-compute/wavefront.hh @@ -354,6 +354,12 @@ class Wavefront : public SimObject void discardFetch(); + /** + * Returns the size of the static hardware context of a particular wavefront + * This should be updated everytime the context is changed + */ + uint32_t getStaticContextSize() const; + private: /** * Stack containing Control Flow Graph nodes (i.e., kernel instructions)