gpu-compute, hsail: make the PC a byte address, not an instruction index

currently the PC is incremented on an instruction granularity, and not as an
instruction's byte address. machine ISA instructions assume the PC is a byte
address, and is incremented accordingly. here we make the GPU model, and the
HSAIL instructions treat the PC as a byte address as well.
This commit is contained in:
Tony Gutierrez 2016-10-26 22:47:43 -04:00
parent d327cdba07
commit 844fb845a5
11 changed files with 46 additions and 34 deletions

View file

@ -38,6 +38,7 @@
#include <cstdint> #include <cstdint>
#include "arch/hsail/gpu_types.hh"
#include "base/misc.hh" #include "base/misc.hh"
#include "gpu-compute/misc.hh" #include "gpu-compute/misc.hh"
@ -71,7 +72,7 @@ namespace HsailISA
uint32_t uint32_t
advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst) advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst)
{ {
return old_pc + 1; return old_pc + sizeof(RawMachInst);
} }
private: private:

View file

@ -51,7 +51,7 @@ namespace HsailISA
// our model uses to represent an actual instruction. In // our model uses to represent an actual instruction. In
// the case of HSAIL this is just an index into a list of // the case of HSAIL this is just an index into a list of
// instruction objects. // instruction objects.
typedef uint64_t RawMachInst; typedef uint32_t RawMachInst;
// The MachInst is a representation of an instruction // The MachInst is a representation of an instruction
// that has more information than just the machine code. // that has more information than just the machine code.

View file

@ -257,7 +257,7 @@ namespace HsailISA
{ {
Wavefront *w = gpuDynInst->wavefront(); Wavefront *w = gpuDynInst->wavefront();
const uint32_t curr_pc = w->pc(); const uint32_t curr_pc M5_VAR_USED = w->pc();
const uint32_t curr_rpc = w->rpc(); const uint32_t curr_rpc = w->rpc();
const VectorMask curr_mask = w->execMask(); const VectorMask curr_mask = w->execMask();
@ -281,7 +281,7 @@ namespace HsailISA
} }
// not taken branch // not taken branch
const uint32_t false_pc = curr_pc + 1; const uint32_t false_pc = nextInstAddr();
assert(true_pc != false_pc); assert(true_pc != false_pc);
if (false_pc != rpc && true_mask.count() < curr_mask.count()) { if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
VectorMask false_mask = curr_mask & ~true_mask; VectorMask false_mask = curr_mask & ~true_mask;

View file

@ -42,6 +42,7 @@
* Defines the base class representing HSAIL GPU static instructions. * Defines the base class representing HSAIL GPU static instructions.
*/ */
#include "arch/hsail/gpu_types.hh"
#include "gpu-compute/gpu_static_inst.hh" #include "gpu-compute/gpu_static_inst.hh"
class BrigObject; class BrigObject;
@ -54,7 +55,7 @@ namespace HsailISA
public: public:
HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode); HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
void generateDisassembly(); void generateDisassembly();
uint32_t instSize() { return 4; } int instSize() const override { return sizeof(RawMachInst); }
bool isValid() const override { return true; } bool isValid() const override { return true; }
protected: protected:

View file

@ -79,7 +79,7 @@ ClDriver::ClDriver(ClDriverParams *p)
kernelInfo[i].code_offs = code_offs; kernelInfo[i].code_offs = code_offs;
name_offs += k->name().size() + 1; name_offs += k->name().size() + 1;
code_offs += k->numInsts() * sizeof(GPUStaticInst*); code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
} }
} }
@ -130,7 +130,8 @@ ClDriver::ioctl(LiveProcess *process, ThreadContext *tc, unsigned req)
HsaCode *k = kernels[i]; HsaCode *k = kernels[i];
// add one for terminating '\0' // add one for terminating '\0'
sizes->string_table_size += k->name().size() + 1; sizes->string_table_size += k->name().size() + 1;
sizes->code_size += k->numInsts() * sizeof(GPUStaticInst*); sizes->code_size +=
k->numInsts() * sizeof(TheGpuISA::RawMachInst);
} }
sizes.copyOut(tc->getMemProxy()); sizes.copyOut(tc->getMemProxy());

View file

@ -122,11 +122,10 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
* instrutions on a 32b granularity so we must account for that here. * instrutions on a 32b granularity so we must account for that here.
*/ */
for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) { for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) {
int current_inst_size = vaddr +=
wavefront->instructionBuffer.at(i)->staticInstruction()->instSize(); wavefront->instructionBuffer.at(i)->staticInstruction()->instSize();
vaddr += current_inst_size / sizeof(uint32_t);
} }
vaddr = wavefront->basePtr + vaddr * sizeof(GPUStaticInst*); vaddr = wavefront->basePtr + vaddr;
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n", DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr); computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);

View file

@ -37,7 +37,7 @@
GPUStaticInst::GPUStaticInst(const std::string &opcode) GPUStaticInst::GPUStaticInst(const std::string &opcode)
: executed_as(Enums::SC_NONE), opcode(opcode), : executed_as(Enums::SC_NONE), opcode(opcode),
_instNum(0) _instNum(0), _instAddr(0)
{ {
setFlag(NoOrder); setFlag(NoOrder);
} }

View file

@ -61,6 +61,9 @@ class GPUStaticInst : public GPUStaticInstFlags
{ {
public: public:
GPUStaticInst(const std::string &opcode); GPUStaticInst(const std::string &opcode);
void instAddr(int inst_addr) { _instAddr = inst_addr; }
int instAddr() const { return _instAddr; }
int nextInstAddr() const { return _instAddr + instSize(); }
void instNum(int num) { _instNum = num; } void instNum(int num) { _instNum = num; }
@ -190,7 +193,7 @@ class GPUStaticInst : public GPUStaticInstFlags
bool isGloballyCoherent() const { return _flags[GloballyCoherent]; } bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
bool isSystemCoherent() const { return _flags[SystemCoherent]; } bool isSystemCoherent() const { return _flags[SystemCoherent]; }
virtual uint32_t instSize() = 0; virtual int instSize() const = 0;
// only used for memory instructions // only used for memory instructions
virtual void virtual void
@ -243,6 +246,7 @@ class GPUStaticInst : public GPUStaticInstFlags
const std::string opcode; const std::string opcode;
std::string disassembly; std::string disassembly;
int _instNum; int _instNum;
int _instAddr;
/** /**
* Identifier of the immediate post-dominator instruction. * Identifier of the immediate post-dominator instruction.
*/ */
@ -286,7 +290,7 @@ class KernelLaunchStaticInst : public GPUStaticInst
int numDstRegOperands() { return 0; } int numDstRegOperands() { return 0; }
int numSrcRegOperands() { return 0; } int numSrcRegOperands() { return 0; }
bool isValid() const { return true; } bool isValid() const { return true; }
uint32_t instSize() { return 0; } int instSize() const override { return 0; }
}; };
#endif // __GPU_STATIC_INST_HH__ #endif // __GPU_STATIC_INST_HH__

View file

@ -84,6 +84,11 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
const BrigBase *endPtr = const BrigBase *endPtr =
obj->getCodeSectionEntry(code_dir->nextModuleEntry); obj->getCodeSectionEntry(code_dir->nextModuleEntry);
// the instruction's byte address (relative to the base addr
// of the code section)
int inst_addr = 0;
// the index that points to the instruction in the instruction
// array
int inst_idx = 0; int inst_idx = 0;
std::vector<GPUStaticInst*> instructions; std::vector<GPUStaticInst*> instructions;
int funcarg_size_scope = 0; int funcarg_size_scope = 0;
@ -121,7 +126,7 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
"kind_label, label is: %s \n", "kind_label, label is: %s \n",
obj->getString(lbl->name)); obj->getString(lbl->name));
labelMap.addLabel(lbl, inst_idx, obj); labelMap.addLabel(lbl, inst_addr, obj);
} }
break; break;
@ -175,14 +180,16 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
if (iptr) { if (iptr) {
DPRINTF(HSAILObject, "Initializing code, processing inst " DPRINTF(HSAILObject, "Initializing code, processing inst "
"#%d idx %d: OPCODE=%d\n", "byte addr #%d idx %d: OPCODE=%d\n", inst_addr,
inst_idx, _insts.size(), instPtr->opcode); inst_idx, instPtr->opcode);
TheGpuISA::RawMachInst inst_num = decoder.saveInst(iptr); TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr);
iptr->instNum(inst_idx); iptr->instNum(inst_idx);
_insts.push_back(inst_num); iptr->instAddr(inst_addr);
_insts.push_back(raw_inst);
instructions.push_back(iptr); instructions.push_back(iptr);
} }
inst_addr += sizeof(TheGpuISA::RawMachInst);
++inst_idx; ++inst_idx;
} else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN && } else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
entryPtr->kind < BRIG_KIND_OPERAND_END) { entryPtr->kind < BRIG_KIND_OPERAND_END) {

View file

@ -63,11 +63,11 @@ ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
} }
BasicBlock* BasicBlock*
ControlFlowInfo::basicBlock(int inst_num) const { ControlFlowInfo::basicBlock(int inst_addr) const {
for (auto& block: basicBlocks) { for (auto& block: basicBlocks) {
int first_block_id = block->firstInstruction->instNum(); int first_block_addr = block->firstInstruction->instAddr();
if (inst_num >= first_block_id && if (inst_addr >= first_block_addr && inst_addr <
inst_num < first_block_id + block->size) { first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
return block.get(); return block.get();
} }
} }
@ -102,24 +102,23 @@ ControlFlowInfo::createBasicBlocks()
std::set<int> leaders; std::set<int> leaders;
// first instruction is a leader // first instruction is a leader
leaders.insert(0); leaders.insert(0);
for (int i = 1; i < instructions.size(); i++) { for (const auto &instruction : instructions) {
GPUStaticInst* instruction = instructions[i];
if (instruction->isBranch()) { if (instruction->isBranch()) {
const int target_pc = instruction->getTargetPc(); const int target_pc = instruction->getTargetPc();
leaders.insert(target_pc); leaders.insert(target_pc);
leaders.insert(i + 1); leaders.insert(instruction->nextInstAddr());
} }
} }
size_t block_size = 0; size_t block_size = 0;
for (int i = 0; i < instructions.size(); i++) { for (const auto &instruction : instructions) {
if (leaders.find(i) != leaders.end()) { if (leaders.find(instruction->instAddr()) != leaders.end()) {
uint32_t id = basicBlocks.size(); uint32_t id = basicBlocks.size();
if (id > 0) { if (id > 0) {
basicBlocks.back()->size = block_size; basicBlocks.back()->size = block_size;
} }
block_size = 0; block_size = 0;
basicBlocks.emplace_back(new BasicBlock(id, instructions[i])); basicBlocks.emplace_back(new BasicBlock(id, instruction));
} }
block_size++; block_size++;
} }
@ -149,7 +148,7 @@ ControlFlowInfo::connectBasicBlocks()
// Unconditional jump instructions have a unique successor // Unconditional jump instructions have a unique successor
if (!last->isUnconditionalJump()) { if (!last->isUnconditionalJump()) {
BasicBlock* next_bb = basicBlock(last->instNum() + 1); BasicBlock* next_bb = basicBlock(last->nextInstAddr());
bb->successorIds.insert(next_bb->id); bb->successorIds.insert(next_bb->id);
} }
} }
@ -236,9 +235,9 @@ ControlFlowInfo::findImmediatePostDominators()
BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get(); BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
if (!ipd_block->isExit()) { if (!ipd_block->isExit()) {
GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction; GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
last_instruction->ipdInstNum(ipd_first_inst->instNum()); last_instruction->ipdInstNum(ipd_first_inst->instAddr());
} else { } else {
last_instruction->ipdInstNum(last_instruction->instNum() + 1); last_instruction->ipdInstNum(last_instruction->nextInstAddr());
} }
} }
} }
@ -271,8 +270,8 @@ void
ControlFlowInfo::printBasicBlocks() const ControlFlowInfo::printBasicBlocks() const
{ {
for (GPUStaticInst* inst : instructions) { for (GPUStaticInst* inst : instructions) {
int inst_num = inst->instNum(); int inst_addr = inst->instAddr();
std::cout << inst_num << " [" << basicBlock(inst_num)->id std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
<< "]: " << inst->disassemble(); << "]: " << inst->disassemble();
if (inst->isBranch()) { if (inst->isBranch()) {
std::cout << ", PC = " << inst->getTargetPc(); std::cout << ", PC = " << inst->getTargetPc();

View file

@ -106,7 +106,7 @@ private:
GPUStaticInst* lastInstruction(const BasicBlock* block) const; GPUStaticInst* lastInstruction(const BasicBlock* block) const;
BasicBlock* basicBlock(int inst_num) const; BasicBlock* basicBlock(int inst_addr) const;
BasicBlock* postDominator(const BasicBlock* block) const; BasicBlock* postDominator(const BasicBlock* block) const;