gpu-compute: use System cache line size in the GPU

2016-10-26 22:47:47 -04:00 · 2016-10-26 22:47:47 -04:00 · aa7364276f
commit aa7364276f
parent 844fb845a5
4 changed files with 9 additions and 5 deletions
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
    req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
    resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
    _masterId(p->system->getMasterId(name() + ".ComputeUnit")),
-    lds(*p->localDataStore), globalSeqNum(0),  wavefrontSize(p->wfSize),
+    lds(*p->localDataStore), _cacheLineSize(p->system->cacheLineSize()),
+    globalSeqNum(0), wavefrontSize(p->wfSize),
    kernelLaunchInst(new KernelLaunchStaticInst())
 {
    /**
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@ -390,6 +390,8 @@ class ComputeUnit : public MemObject
    int32_t
    getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;

+    int cacheLineSize() const { return _cacheLineSize; }
+
    bool
    sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));

@ -767,6 +769,7 @@ class ComputeUnit : public MemObject
    uint64_t getAndIncSeqNum() { return globalSeqNum++; }

  private:
+    const int _cacheLineSize;
    uint64_t globalSeqNum;
    int wavefrontSize;
    GPUStaticInst *kernelLaunchInst;
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@ -132,10 +132,10 @@ FetchUnit::initiateFetch(Wavefront *wavefront)

    // Since this is an instruction prefetch, if you're split then just finish
    // out the current line.
-    unsigned block_size = RubySystem::getBlockSizeBytes();
+    int block_size = computeUnit->cacheLineSize();
    // check for split accesses
    Addr split_addr = roundDown(vaddr + block_size - 1, block_size);
-    unsigned size = block_size;
+    int size = block_size;

    if (split_addr > vaddr) {
        // misaligned access, just grab the rest of the line
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@ -224,7 +224,7 @@ void
 Shader::doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
                           bool suppress_func_errors, int cu_id)
 {
-    unsigned block_size = RubySystem::getBlockSizeBytes();
+    int block_size = cuList.at(cu_id)->cacheLineSize();
    unsigned size = req->getSize();

    Addr tmp_addr;
@ -342,7 +342,7 @@ Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
 {
    uint8_t *data_buf = (uint8_t*)ptr;

-    for (ChunkGenerator gen(address, size, RubySystem::getBlockSizeBytes());
+    for (ChunkGenerator gen(address, size, cuList.at(cu_id)->cacheLineSize());
         !gen.done(); gen.next()) {
        Request *req = new Request(0, gen.addr(), gen.size(), 0,
                                   cuList[0]->masterId(), 0, 0, 0);