gpu-compute, hsail: make the PC a byte address, not an instruction index
currently the PC is incremented on an instruction granularity, and not as an instruction's byte address. machine ISA instructions assume the PC is a byte address, and is incremented accordingly. here we make the GPU model, and the HSAIL instructions treat the PC as a byte address as well.
This commit is contained in:
parent
d327cdba07
commit
844fb845a5
11 changed files with 46 additions and 34 deletions
|
@ -38,6 +38,7 @@
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
|
#include "arch/hsail/gpu_types.hh"
|
||||||
#include "base/misc.hh"
|
#include "base/misc.hh"
|
||||||
#include "gpu-compute/misc.hh"
|
#include "gpu-compute/misc.hh"
|
||||||
|
|
||||||
|
@ -71,7 +72,7 @@ namespace HsailISA
|
||||||
uint32_t
|
uint32_t
|
||||||
advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst)
|
advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst)
|
||||||
{
|
{
|
||||||
return old_pc + 1;
|
return old_pc + sizeof(RawMachInst);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -51,7 +51,7 @@ namespace HsailISA
|
||||||
// our model uses to represent an actual instruction. In
|
// our model uses to represent an actual instruction. In
|
||||||
// the case of HSAIL this is just an index into a list of
|
// the case of HSAIL this is just an index into a list of
|
||||||
// instruction objects.
|
// instruction objects.
|
||||||
typedef uint64_t RawMachInst;
|
typedef uint32_t RawMachInst;
|
||||||
|
|
||||||
// The MachInst is a representation of an instruction
|
// The MachInst is a representation of an instruction
|
||||||
// that has more information than just the machine code.
|
// that has more information than just the machine code.
|
||||||
|
|
|
@ -257,7 +257,7 @@ namespace HsailISA
|
||||||
{
|
{
|
||||||
Wavefront *w = gpuDynInst->wavefront();
|
Wavefront *w = gpuDynInst->wavefront();
|
||||||
|
|
||||||
const uint32_t curr_pc = w->pc();
|
const uint32_t curr_pc M5_VAR_USED = w->pc();
|
||||||
const uint32_t curr_rpc = w->rpc();
|
const uint32_t curr_rpc = w->rpc();
|
||||||
const VectorMask curr_mask = w->execMask();
|
const VectorMask curr_mask = w->execMask();
|
||||||
|
|
||||||
|
@ -281,7 +281,7 @@ namespace HsailISA
|
||||||
}
|
}
|
||||||
|
|
||||||
// not taken branch
|
// not taken branch
|
||||||
const uint32_t false_pc = curr_pc + 1;
|
const uint32_t false_pc = nextInstAddr();
|
||||||
assert(true_pc != false_pc);
|
assert(true_pc != false_pc);
|
||||||
if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
|
if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
|
||||||
VectorMask false_mask = curr_mask & ~true_mask;
|
VectorMask false_mask = curr_mask & ~true_mask;
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
* Defines the base class representing HSAIL GPU static instructions.
|
* Defines the base class representing HSAIL GPU static instructions.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "arch/hsail/gpu_types.hh"
|
||||||
#include "gpu-compute/gpu_static_inst.hh"
|
#include "gpu-compute/gpu_static_inst.hh"
|
||||||
|
|
||||||
class BrigObject;
|
class BrigObject;
|
||||||
|
@ -54,7 +55,7 @@ namespace HsailISA
|
||||||
public:
|
public:
|
||||||
HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
|
HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
|
||||||
void generateDisassembly();
|
void generateDisassembly();
|
||||||
uint32_t instSize() { return 4; }
|
int instSize() const override { return sizeof(RawMachInst); }
|
||||||
bool isValid() const override { return true; }
|
bool isValid() const override { return true; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -79,7 +79,7 @@ ClDriver::ClDriver(ClDriverParams *p)
|
||||||
kernelInfo[i].code_offs = code_offs;
|
kernelInfo[i].code_offs = code_offs;
|
||||||
|
|
||||||
name_offs += k->name().size() + 1;
|
name_offs += k->name().size() + 1;
|
||||||
code_offs += k->numInsts() * sizeof(GPUStaticInst*);
|
code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,7 +130,8 @@ ClDriver::ioctl(LiveProcess *process, ThreadContext *tc, unsigned req)
|
||||||
HsaCode *k = kernels[i];
|
HsaCode *k = kernels[i];
|
||||||
// add one for terminating '\0'
|
// add one for terminating '\0'
|
||||||
sizes->string_table_size += k->name().size() + 1;
|
sizes->string_table_size += k->name().size() + 1;
|
||||||
sizes->code_size += k->numInsts() * sizeof(GPUStaticInst*);
|
sizes->code_size +=
|
||||||
|
k->numInsts() * sizeof(TheGpuISA::RawMachInst);
|
||||||
}
|
}
|
||||||
|
|
||||||
sizes.copyOut(tc->getMemProxy());
|
sizes.copyOut(tc->getMemProxy());
|
||||||
|
|
|
@ -122,11 +122,10 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
|
||||||
* instrutions on a 32b granularity so we must account for that here.
|
* instrutions on a 32b granularity so we must account for that here.
|
||||||
*/
|
*/
|
||||||
for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) {
|
for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) {
|
||||||
int current_inst_size =
|
vaddr +=
|
||||||
wavefront->instructionBuffer.at(i)->staticInstruction()->instSize();
|
wavefront->instructionBuffer.at(i)->staticInstruction()->instSize();
|
||||||
vaddr += current_inst_size / sizeof(uint32_t);
|
|
||||||
}
|
}
|
||||||
vaddr = wavefront->basePtr + vaddr * sizeof(GPUStaticInst*);
|
vaddr = wavefront->basePtr + vaddr;
|
||||||
|
|
||||||
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
|
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
|
||||||
computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
|
computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
|
|
||||||
GPUStaticInst::GPUStaticInst(const std::string &opcode)
|
GPUStaticInst::GPUStaticInst(const std::string &opcode)
|
||||||
: executed_as(Enums::SC_NONE), opcode(opcode),
|
: executed_as(Enums::SC_NONE), opcode(opcode),
|
||||||
_instNum(0)
|
_instNum(0), _instAddr(0)
|
||||||
{
|
{
|
||||||
setFlag(NoOrder);
|
setFlag(NoOrder);
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,6 +61,9 @@ class GPUStaticInst : public GPUStaticInstFlags
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GPUStaticInst(const std::string &opcode);
|
GPUStaticInst(const std::string &opcode);
|
||||||
|
void instAddr(int inst_addr) { _instAddr = inst_addr; }
|
||||||
|
int instAddr() const { return _instAddr; }
|
||||||
|
int nextInstAddr() const { return _instAddr + instSize(); }
|
||||||
|
|
||||||
void instNum(int num) { _instNum = num; }
|
void instNum(int num) { _instNum = num; }
|
||||||
|
|
||||||
|
@ -190,7 +193,7 @@ class GPUStaticInst : public GPUStaticInstFlags
|
||||||
bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
|
bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
|
||||||
bool isSystemCoherent() const { return _flags[SystemCoherent]; }
|
bool isSystemCoherent() const { return _flags[SystemCoherent]; }
|
||||||
|
|
||||||
virtual uint32_t instSize() = 0;
|
virtual int instSize() const = 0;
|
||||||
|
|
||||||
// only used for memory instructions
|
// only used for memory instructions
|
||||||
virtual void
|
virtual void
|
||||||
|
@ -243,6 +246,7 @@ class GPUStaticInst : public GPUStaticInstFlags
|
||||||
const std::string opcode;
|
const std::string opcode;
|
||||||
std::string disassembly;
|
std::string disassembly;
|
||||||
int _instNum;
|
int _instNum;
|
||||||
|
int _instAddr;
|
||||||
/**
|
/**
|
||||||
* Identifier of the immediate post-dominator instruction.
|
* Identifier of the immediate post-dominator instruction.
|
||||||
*/
|
*/
|
||||||
|
@ -286,7 +290,7 @@ class KernelLaunchStaticInst : public GPUStaticInst
|
||||||
int numDstRegOperands() { return 0; }
|
int numDstRegOperands() { return 0; }
|
||||||
int numSrcRegOperands() { return 0; }
|
int numSrcRegOperands() { return 0; }
|
||||||
bool isValid() const { return true; }
|
bool isValid() const { return true; }
|
||||||
uint32_t instSize() { return 0; }
|
int instSize() const override { return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __GPU_STATIC_INST_HH__
|
#endif // __GPU_STATIC_INST_HH__
|
||||||
|
|
|
@ -84,6 +84,11 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
|
||||||
const BrigBase *endPtr =
|
const BrigBase *endPtr =
|
||||||
obj->getCodeSectionEntry(code_dir->nextModuleEntry);
|
obj->getCodeSectionEntry(code_dir->nextModuleEntry);
|
||||||
|
|
||||||
|
// the instruction's byte address (relative to the base addr
|
||||||
|
// of the code section)
|
||||||
|
int inst_addr = 0;
|
||||||
|
// the index that points to the instruction in the instruction
|
||||||
|
// array
|
||||||
int inst_idx = 0;
|
int inst_idx = 0;
|
||||||
std::vector<GPUStaticInst*> instructions;
|
std::vector<GPUStaticInst*> instructions;
|
||||||
int funcarg_size_scope = 0;
|
int funcarg_size_scope = 0;
|
||||||
|
@ -121,7 +126,7 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
|
||||||
"kind_label, label is: %s \n",
|
"kind_label, label is: %s \n",
|
||||||
obj->getString(lbl->name));
|
obj->getString(lbl->name));
|
||||||
|
|
||||||
labelMap.addLabel(lbl, inst_idx, obj);
|
labelMap.addLabel(lbl, inst_addr, obj);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -175,14 +180,16 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
|
||||||
|
|
||||||
if (iptr) {
|
if (iptr) {
|
||||||
DPRINTF(HSAILObject, "Initializing code, processing inst "
|
DPRINTF(HSAILObject, "Initializing code, processing inst "
|
||||||
"#%d idx %d: OPCODE=%d\n",
|
"byte addr #%d idx %d: OPCODE=%d\n", inst_addr,
|
||||||
inst_idx, _insts.size(), instPtr->opcode);
|
inst_idx, instPtr->opcode);
|
||||||
|
|
||||||
TheGpuISA::RawMachInst inst_num = decoder.saveInst(iptr);
|
TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr);
|
||||||
iptr->instNum(inst_idx);
|
iptr->instNum(inst_idx);
|
||||||
_insts.push_back(inst_num);
|
iptr->instAddr(inst_addr);
|
||||||
|
_insts.push_back(raw_inst);
|
||||||
instructions.push_back(iptr);
|
instructions.push_back(iptr);
|
||||||
}
|
}
|
||||||
|
inst_addr += sizeof(TheGpuISA::RawMachInst);
|
||||||
++inst_idx;
|
++inst_idx;
|
||||||
} else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
|
} else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
|
||||||
entryPtr->kind < BRIG_KIND_OPERAND_END) {
|
entryPtr->kind < BRIG_KIND_OPERAND_END) {
|
||||||
|
|
|
@ -63,11 +63,11 @@ ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
|
||||||
}
|
}
|
||||||
|
|
||||||
BasicBlock*
|
BasicBlock*
|
||||||
ControlFlowInfo::basicBlock(int inst_num) const {
|
ControlFlowInfo::basicBlock(int inst_addr) const {
|
||||||
for (auto& block: basicBlocks) {
|
for (auto& block: basicBlocks) {
|
||||||
int first_block_id = block->firstInstruction->instNum();
|
int first_block_addr = block->firstInstruction->instAddr();
|
||||||
if (inst_num >= first_block_id &&
|
if (inst_addr >= first_block_addr && inst_addr <
|
||||||
inst_num < first_block_id + block->size) {
|
first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
|
||||||
return block.get();
|
return block.get();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -102,24 +102,23 @@ ControlFlowInfo::createBasicBlocks()
|
||||||
std::set<int> leaders;
|
std::set<int> leaders;
|
||||||
// first instruction is a leader
|
// first instruction is a leader
|
||||||
leaders.insert(0);
|
leaders.insert(0);
|
||||||
for (int i = 1; i < instructions.size(); i++) {
|
for (const auto &instruction : instructions) {
|
||||||
GPUStaticInst* instruction = instructions[i];
|
|
||||||
if (instruction->isBranch()) {
|
if (instruction->isBranch()) {
|
||||||
const int target_pc = instruction->getTargetPc();
|
const int target_pc = instruction->getTargetPc();
|
||||||
leaders.insert(target_pc);
|
leaders.insert(target_pc);
|
||||||
leaders.insert(i + 1);
|
leaders.insert(instruction->nextInstAddr());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t block_size = 0;
|
size_t block_size = 0;
|
||||||
for (int i = 0; i < instructions.size(); i++) {
|
for (const auto &instruction : instructions) {
|
||||||
if (leaders.find(i) != leaders.end()) {
|
if (leaders.find(instruction->instAddr()) != leaders.end()) {
|
||||||
uint32_t id = basicBlocks.size();
|
uint32_t id = basicBlocks.size();
|
||||||
if (id > 0) {
|
if (id > 0) {
|
||||||
basicBlocks.back()->size = block_size;
|
basicBlocks.back()->size = block_size;
|
||||||
}
|
}
|
||||||
block_size = 0;
|
block_size = 0;
|
||||||
basicBlocks.emplace_back(new BasicBlock(id, instructions[i]));
|
basicBlocks.emplace_back(new BasicBlock(id, instruction));
|
||||||
}
|
}
|
||||||
block_size++;
|
block_size++;
|
||||||
}
|
}
|
||||||
|
@ -149,7 +148,7 @@ ControlFlowInfo::connectBasicBlocks()
|
||||||
|
|
||||||
// Unconditional jump instructions have a unique successor
|
// Unconditional jump instructions have a unique successor
|
||||||
if (!last->isUnconditionalJump()) {
|
if (!last->isUnconditionalJump()) {
|
||||||
BasicBlock* next_bb = basicBlock(last->instNum() + 1);
|
BasicBlock* next_bb = basicBlock(last->nextInstAddr());
|
||||||
bb->successorIds.insert(next_bb->id);
|
bb->successorIds.insert(next_bb->id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -236,9 +235,9 @@ ControlFlowInfo::findImmediatePostDominators()
|
||||||
BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
|
BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
|
||||||
if (!ipd_block->isExit()) {
|
if (!ipd_block->isExit()) {
|
||||||
GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
|
GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
|
||||||
last_instruction->ipdInstNum(ipd_first_inst->instNum());
|
last_instruction->ipdInstNum(ipd_first_inst->instAddr());
|
||||||
} else {
|
} else {
|
||||||
last_instruction->ipdInstNum(last_instruction->instNum() + 1);
|
last_instruction->ipdInstNum(last_instruction->nextInstAddr());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -271,8 +270,8 @@ void
|
||||||
ControlFlowInfo::printBasicBlocks() const
|
ControlFlowInfo::printBasicBlocks() const
|
||||||
{
|
{
|
||||||
for (GPUStaticInst* inst : instructions) {
|
for (GPUStaticInst* inst : instructions) {
|
||||||
int inst_num = inst->instNum();
|
int inst_addr = inst->instAddr();
|
||||||
std::cout << inst_num << " [" << basicBlock(inst_num)->id
|
std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
|
||||||
<< "]: " << inst->disassemble();
|
<< "]: " << inst->disassemble();
|
||||||
if (inst->isBranch()) {
|
if (inst->isBranch()) {
|
||||||
std::cout << ", PC = " << inst->getTargetPc();
|
std::cout << ", PC = " << inst->getTargetPc();
|
||||||
|
|
|
@ -106,7 +106,7 @@ private:
|
||||||
|
|
||||||
GPUStaticInst* lastInstruction(const BasicBlock* block) const;
|
GPUStaticInst* lastInstruction(const BasicBlock* block) const;
|
||||||
|
|
||||||
BasicBlock* basicBlock(int inst_num) const;
|
BasicBlock* basicBlock(int inst_addr) const;
|
||||||
|
|
||||||
BasicBlock* postDominator(const BasicBlock* block) const;
|
BasicBlock* postDominator(const BasicBlock* block) const;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue