From b63eb1302b006682bd227a5e236f7b3b95e9b8e8 Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Wed, 26 Oct 2016 22:47:49 -0400 Subject: [PATCH] gpu-compute, hsail: pass GPUDynInstPtr to getRegisterIndex() for HSAIL an operand's indices into the register files may be calculated trivially, because the operands are always read from a register file, or are an immediate. for machine ISA, however, an op selector may specify special registers, or may specify special SGPRs with an alias op selector value. the location of some of the special registers values are dependent on the size of the RF in some cases. here we add a way for the underlying getRegisterIndex() method to know about the size of the RFs, so that it may find the relative positions of the special register values. --- src/arch/hsail/insts/branch.hh | 12 ++++-- src/arch/hsail/insts/decl.hh | 44 ++++++++++++++++----- src/arch/hsail/insts/mem.hh | 18 ++++++--- src/gpu-compute/condition_register_state.cc | 6 +-- src/gpu-compute/condition_register_state.hh | 2 +- src/gpu-compute/gpu_dyn_inst.cc | 12 ++++-- src/gpu-compute/gpu_dyn_inst.hh | 3 +- src/gpu-compute/gpu_static_inst.hh | 13 +++++- src/gpu-compute/vector_register_file.cc | 6 +-- 9 files changed, 85 insertions(+), 31 deletions(-) diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh index 6df6f766a..79603f408 100644 --- a/src/arch/hsail/insts/branch.hh +++ b/src/arch/hsail/insts/branch.hh @@ -95,7 +95,9 @@ namespace HsailISA return target.opSize(); } - int getRegisterIndex(int operandIndex) override { + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override + { assert(operandIndex >= 0 && operandIndex < getNumOperands()); return target.regIndex(); } @@ -223,7 +225,9 @@ namespace HsailISA else return 1; } - int getRegisterIndex(int operandIndex) override { + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override + { assert(operandIndex >= 0 && operandIndex < getNumOperands()); if (!operandIndex) return target.regIndex(); @@ -370,7 +374,9 @@ namespace HsailISA assert(operandIndex >= 0 && operandIndex < getNumOperands()); return target.opSize(); } - int getRegisterIndex(int operandIndex) override { + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override + { assert(operandIndex >= 0 && operandIndex < getNumOperands()); return target.regIndex(); } diff --git a/src/arch/hsail/insts/decl.hh b/src/arch/hsail/insts/decl.hh index 94f23ac1f..c40411ace 100644 --- a/src/arch/hsail/insts/decl.hh +++ b/src/arch/hsail/insts/decl.hh @@ -178,7 +178,9 @@ namespace HsailISA else return dest.opSize(); } - int getRegisterIndex(int operandIndex) { + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) + { assert(operandIndex >= 0 && operandIndex < getNumOperands()); if (operandIndex < NumSrcOperands) @@ -313,7 +315,10 @@ namespace HsailISA else return dest.opSize(); } - int getRegisterIndex(int operandIndex) { + + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) + { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.regIndex(); @@ -477,7 +482,10 @@ namespace HsailISA else return dest.opSize(); } - int getRegisterIndex(int operandIndex) { + + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) + { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.regIndex(); @@ -643,7 +651,7 @@ namespace HsailISA return -1; //handle positive and negative numbers - T tmp = (src0 < 0) ? (~src0) : (src0); + T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0); //the starting pos is MSB int pos = 8 * sizeof(T) - 1; @@ -732,7 +740,12 @@ namespace HsailISA bool isSrcOperand(int operandIndex) { return false; } bool isDstOperand(int operandIndex) { return false; } int getOperandSize(int operandIndex) { return 0; } - int getRegisterIndex(int operandIndex) { return -1; } + + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) + { + return -1; + } int numSrcRegOperands() { return 0; } int numDstRegOperands() { return 0; } @@ -777,10 +790,14 @@ namespace HsailISA assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.opSize(); } - int getRegisterIndex(int operandIndex) { + + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) + { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.regIndex(); } + int numSrcRegOperands() { return 0; } int numDstRegOperands() { return dest.isVectorRegister(); } int getNumOperands() { return 1; } @@ -848,10 +865,14 @@ namespace HsailISA assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.opSize(); } - int getRegisterIndex(int operandIndex) { + + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) + { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.regIndex(); } + int numSrcRegOperands() { return 0; } int numDstRegOperands() { return dest.isVectorRegister(); } int getNumOperands() { return 1; } @@ -1171,8 +1192,13 @@ namespace HsailISA bool isScalarRegister(int operandIndex) { return false; } bool isSrcOperand(int operandIndex) { return false; } bool isDstOperand(int operandIndex) { return false; } - int getOperandSize(int operandIndex) { return 0; } - int getRegisterIndex(int operandIndex) { return -1; } + int getOperandSize(int operandIndex) { return 0; } + + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) + { + return -1; + } void execute(GPUDynInstPtr gpuDynInst) diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh index 2e7dfcd1c..36a6cbc79 100644 --- a/src/arch/hsail/insts/mem.hh +++ b/src/arch/hsail/insts/mem.hh @@ -146,7 +146,8 @@ namespace HsailISA return((operandIndex == 0) ? dest.opSize() : this->addr.opSize()); } - int getRegisterIndex(int operandIndex) override + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return((operandIndex == 0) ? dest.regIndex() : @@ -377,7 +378,8 @@ namespace HsailISA return((operandIndex == 0) ? dest.opSize() : this->addr.opSize()); } - int getRegisterIndex(int operandIndex) override + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return((operandIndex == 0) ? dest.regIndex() : @@ -670,7 +672,8 @@ namespace HsailISA AddrOperandType>::dest.opSize()); return 0; } - int getRegisterIndex(int operandIndex) override + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if ((num_dest_operands != getNumOperands()) && @@ -934,7 +937,8 @@ namespace HsailISA assert(operandIndex >= 0 && operandIndex < getNumOperands()); return !operandIndex ? src.opSize() : this->addr.opSize(); } - int getRegisterIndex(int operandIndex) override + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override { assert(operandIndex >= 0 && operandIndex < getNumOperands()); return !operandIndex ? src.regIndex() : this->addr.regIndex(); @@ -1144,7 +1148,8 @@ namespace HsailISA AddrOperandType>::src.opSize(); return 0; } - int getRegisterIndex(int operandIndex) override + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex == num_src_operands) @@ -1433,7 +1438,8 @@ namespace HsailISA else return(dest.opSize()); } - int getRegisterIndex(int operandIndex) + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex < NumSrcOperands) diff --git a/src/gpu-compute/condition_register_state.cc b/src/gpu-compute/condition_register_state.cc index f3f2d2927..08555bb7c 100644 --- a/src/gpu-compute/condition_register_state.cc +++ b/src/gpu-compute/condition_register_state.cc @@ -62,19 +62,19 @@ ConditionRegisterState::init(uint32_t _size) } void -ConditionRegisterState::exec(GPUStaticInst *ii, Wavefront *w) +ConditionRegisterState::exec(GPUDynInstPtr ii, Wavefront *w) { // iterate over all operands for (auto i = 0; i < ii->getNumOperands(); ++i) { // is this a condition register destination operand? if (ii->isCondRegister(i) && ii->isDstOperand(i)) { // mark the register as busy - markReg(ii->getRegisterIndex(i), 1); + markReg(ii->getRegisterIndex(i, ii), 1); uint32_t pipeLen = w->computeUnit->spBypassLength(); // schedule an event for marking the register as ready w->computeUnit-> - registerEvent(w->simdId, ii->getRegisterIndex(i), + registerEvent(w->simdId, ii->getRegisterIndex(i, ii), ii->getOperandSize(i), w->computeUnit->shader->tick_cnt + w->computeUnit->shader->ticks(pipeLen), 0); diff --git a/src/gpu-compute/condition_register_state.hh b/src/gpu-compute/condition_register_state.hh index 139874a66..2d3f5e160 100644 --- a/src/gpu-compute/condition_register_state.hh +++ b/src/gpu-compute/condition_register_state.hh @@ -87,7 +87,7 @@ class ConditionRegisterState } int numRegs() { return c_reg.size(); } - void exec(GPUStaticInst *ii, Wavefront *w); + void exec(GPUDynInstPtr ii, Wavefront *w); private: ComputeUnit* computeUnit; diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc index 7092a7a40..ecd54f091 100644 --- a/src/gpu-compute/gpu_dyn_inst.cc +++ b/src/gpu-compute/gpu_dyn_inst.cc @@ -102,10 +102,16 @@ GPUDynInst::isScalarRegister(int operandIdx) return _staticInst->isScalarRegister(operandIdx); } -int -GPUDynInst::getRegisterIndex(int operandIdx) +bool +GPUDynInst::isCondRegister(int operandIdx) { - return _staticInst->getRegisterIndex(operandIdx); + return _staticInst->isCondRegister(operandIdx); +} + +int +GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst) +{ + return _staticInst->getRegisterIndex(operandIdx, gpuDynInst); } int diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh index 527b87b4c..c30871f5e 100644 --- a/src/gpu-compute/gpu_dyn_inst.hh +++ b/src/gpu-compute/gpu_dyn_inst.hh @@ -194,7 +194,8 @@ class GPUDynInst : public GPUExecContext int getNumOperands(); bool isVectorRegister(int operandIdx); bool isScalarRegister(int operandIdx); - int getRegisterIndex(int operandIdx); + bool isCondRegister(int operandIdx); + int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst); int getOperandSize(int operandIdx); bool isDstOperand(int operandIdx); bool isSrcOperand(int operandIdx); diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh index 2fa1e0ca5..e851c52e6 100644 --- a/src/gpu-compute/gpu_static_inst.hh +++ b/src/gpu-compute/gpu_static_inst.hh @@ -83,7 +83,10 @@ class GPUStaticInst : public GPUStaticInstFlags virtual bool isSrcOperand(int operandIndex) = 0; virtual bool isDstOperand(int operandIndex) = 0; virtual int getOperandSize(int operandIndex) = 0; - virtual int getRegisterIndex(int operandIndex) = 0; + + virtual int getRegisterIndex(int operandIndex, + GPUDynInstPtr gpuDynInst) = 0; + virtual int numDstRegOperands() = 0; virtual int numSrcRegOperands() = 0; @@ -286,7 +289,13 @@ class KernelLaunchStaticInst : public GPUStaticInst bool isSrcOperand(int operandIndex) { return false; } bool isDstOperand(int operandIndex) { return false; } int getOperandSize(int operandIndex) { return 0; } - int getRegisterIndex(int operandIndex) { return 0; } + + int + getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override + { + return 0; + } + int numDstRegOperands() { return 0; } int numSrcRegOperands() { return 0; } bool isValid() const { return true; } diff --git a/src/gpu-compute/vector_register_file.cc b/src/gpu-compute/vector_register_file.cc index c50c06cc6..3c3b400bb 100644 --- a/src/gpu-compute/vector_register_file.cc +++ b/src/gpu-compute/vector_register_file.cc @@ -121,7 +121,7 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const { for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i)) { - uint32_t vgprIdx = ii->getRegisterIndex(i); + uint32_t vgprIdx = ii->getRegisterIndex(i, ii); uint32_t pVgpr = w->remap(vgprIdx, ii->getOperandSize(i), 1); if (regBusy(pVgpr, ii->getOperandSize(i)) == 1) { @@ -160,7 +160,7 @@ VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w) // iterate over all register destination operands for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { - uint32_t physReg = w->remap(ii->getRegisterIndex(i), + uint32_t physReg = w->remap(ii->getRegisterIndex(i, ii), ii->getOperandSize(i), 1); // mark the destination vector register as busy @@ -216,7 +216,7 @@ VectorRegisterFile::updateResources(Wavefront *w, GPUDynInstPtr ii) // iterate over all register destination operands for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { - uint32_t physReg = w->remap(ii->getRegisterIndex(i), + uint32_t physReg = w->remap(ii->getRegisterIndex(i, ii), ii->getOperandSize(i), 1); // set the in-flight status of the destination vector register preMarkReg(physReg, ii->getOperandSize(i), 1);