gpu-compute: remove inst enums and use bit flag for attributes

this patch removes the GPUStaticInst enums that were defined in GPU.py.
instead, a simple set of attribute flags that can be set in the base
instruction class are used. this will help unify the attributes of HSAIL
and machine ISA instructions within the model itself.

because the static instrution now carries the attributes, a GPUDynInst
must carry a pointer to a valid GPUStaticInst so a new static kernel launch
instruction is added, which carries the attributes needed to perform a
the kernel launch.
This commit is contained in:
Tony Gutierrez 2016-10-26 22:47:11 -04:00
parent e1ad8035a3
commit 7ac38849ab
28 changed files with 1257 additions and 1116 deletions

View file

@ -43,7 +43,6 @@ if env['TARGET_GPU_ISA'] == 'hsail':
env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'], env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
'gen.py', '$SOURCE $TARGETS') 'gen.py', '$SOURCE $TARGETS')
Source('generic_types.cc')
Source('gpu_decoder.cc') Source('gpu_decoder.cc')
Source('insts/branch.cc') Source('insts/branch.cc')
Source('insts/gen_exec.cc') Source('insts/gen_exec.cc')

View file

@ -1,47 +0,0 @@
#include "arch/hsail/generic_types.hh"
#include "base/misc.hh"
using namespace Brig;
namespace HsailISA
{
Enums::GenericMemoryOrder
getGenericMemoryOrder(BrigMemoryOrder brig_memory_order)
{
switch(brig_memory_order) {
case BRIG_MEMORY_ORDER_NONE:
return Enums::MEMORY_ORDER_NONE;
case BRIG_MEMORY_ORDER_RELAXED:
return Enums::MEMORY_ORDER_RELAXED;
case BRIG_MEMORY_ORDER_SC_ACQUIRE:
return Enums::MEMORY_ORDER_SC_ACQUIRE;
case BRIG_MEMORY_ORDER_SC_RELEASE:
return Enums::MEMORY_ORDER_SC_RELEASE;
case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
return Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE;
default:
fatal("HsailISA::MemInst::getGenericMemoryOrder -> ",
"bad BrigMemoryOrder\n");
}
}
Enums::GenericMemoryScope
getGenericMemoryScope(BrigMemoryScope brig_memory_scope)
{
switch(brig_memory_scope) {
case BRIG_MEMORY_SCOPE_NONE:
return Enums::MEMORY_SCOPE_NONE;
case BRIG_MEMORY_SCOPE_WORKITEM:
return Enums::MEMORY_SCOPE_WORKITEM;
case BRIG_MEMORY_SCOPE_WORKGROUP:
return Enums::MEMORY_SCOPE_WORKGROUP;
case BRIG_MEMORY_SCOPE_AGENT:
return Enums::MEMORY_SCOPE_DEVICE;
case BRIG_MEMORY_SCOPE_SYSTEM:
return Enums::MEMORY_SCOPE_SYSTEM;
default:
fatal("HsailISA::MemInst::getGenericMemoryScope -> ",
"bad BrigMemoryScope\n");
}
}
} // namespace HsailISA

View file

@ -1,16 +0,0 @@
#ifndef __ARCH_HSAIL_GENERIC_TYPES_HH__
#define __ARCH_HSAIL_GENERIC_TYPES_HH__
#include "arch/hsail/Brig.h"
#include "enums/GenericMemoryOrder.hh"
#include "enums/GenericMemoryScope.hh"
namespace HsailISA
{
Enums::GenericMemoryOrder
getGenericMemoryOrder(Brig::BrigMemoryOrder brig_memory_order);
Enums::GenericMemoryScope
getGenericMemoryScope(Brig::BrigMemoryScope brig_memory_scope);
} // namespace HsailISA
#endif // __ARCH_HSAIL_GENERIC_TYPES_HH__

View file

@ -59,16 +59,15 @@ namespace HsailISA
BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "brn") : HsailGPUStaticInst(obj, "brn")
{ {
o_type = Enums::OT_BRANCH; setFlag(Branch);
setFlag(UnconditionalJump);
width = ((Brig::BrigInstBr*)ib)->width; width = ((Brig::BrigInstBr*)ib)->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
target.init(op_offs, obj); target.init(op_offs, obj);
o_type = Enums::OT_BRANCH;
} }
uint32_t getTargetPc() override { return target.getTarget(0, 0); } uint32_t getTargetPc() override { return target.getTarget(0, 0); }
bool unconditionalJumpInstruction() override { return true; }
bool isVectorRegister(int operandIndex) override { bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands()); assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isVectorRegister(); return target.isVectorRegister();
@ -175,13 +174,12 @@ namespace HsailISA
CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "cbr") : HsailGPUStaticInst(obj, "cbr")
{ {
o_type = Enums::OT_BRANCH; setFlag(Branch);
width = ((Brig::BrigInstBr *)ib)->width; width = ((Brig::BrigInstBr *)ib)->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
cond.init(op_offs, obj); cond.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1); op_offs = obj->getOperandPtr(ib->operands, 1);
target.init(op_offs, obj); target.init(op_offs, obj);
o_type = Enums::OT_BRANCH;
} }
uint32_t getTargetPc() override { return target.getTarget(0, 0); } uint32_t getTargetPc() override { return target.getTarget(0, 0); }
@ -343,17 +341,15 @@ namespace HsailISA
BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "br") : HsailGPUStaticInst(obj, "br")
{ {
o_type = Enums::OT_BRANCH; setFlag(Branch);
setFlag(UnconditionalJump);
width.init(((Brig::BrigInstBr *)ib)->width, obj); width.init(((Brig::BrigInstBr *)ib)->width, obj);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
target.init(op_offs, obj); target.init(op_offs, obj);
o_type = Enums::OT_BRANCH;
} }
uint32_t getTargetPc() override { return target.getTarget(0, 0); } uint32_t getTargetPc() override { return target.getTarget(0, 0); }
bool unconditionalJumpInstruction() override { return true; }
void execute(GPUDynInstPtr gpuDynInst) override; void execute(GPUDynInstPtr gpuDynInst) override;
bool isVectorRegister(int operandIndex) override { bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands()); assert(operandIndex >= 0 && operandIndex < getNumOperands());

View file

@ -38,11 +38,9 @@
#include <cmath> #include <cmath>
#include "arch/hsail/generic_types.hh"
#include "arch/hsail/insts/gpu_static_inst.hh" #include "arch/hsail/insts/gpu_static_inst.hh"
#include "arch/hsail/operand.hh" #include "arch/hsail/operand.hh"
#include "debug/HSAIL.hh" #include "debug/HSAIL.hh"
#include "enums/OpType.hh"
#include "gpu-compute/gpu_dyn_inst.hh" #include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh" #include "gpu-compute/shader.hh"
@ -127,6 +125,8 @@ namespace HsailISA
const char *opcode) const char *opcode)
: HsailGPUStaticInst(obj, opcode) : HsailGPUStaticInst(obj, opcode)
{ {
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj); dest.init(op_offs, obj);
@ -240,6 +240,8 @@ namespace HsailISA
const char *opcode) const char *opcode)
: HsailGPUStaticInst(obj, opcode) : HsailGPUStaticInst(obj, opcode)
{ {
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj); dest.init(op_offs, obj);
@ -414,6 +416,8 @@ namespace HsailISA
const BrigObject *obj, const char *opcode) const BrigObject *obj, const char *opcode)
: HsailGPUStaticInst(obj, opcode) : HsailGPUStaticInst(obj, opcode)
{ {
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj); dest.init(op_offs, obj);
@ -818,6 +822,8 @@ namespace HsailISA
const BrigObject *obj, const char *_opcode) const BrigObject *obj, const char *_opcode)
: HsailGPUStaticInst(obj, _opcode) : HsailGPUStaticInst(obj, _opcode)
{ {
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj); dest.init(op_offs, obj);
@ -874,7 +880,7 @@ namespace HsailISA
Ret(const Brig::BrigInstBase *ib, const BrigObject *obj) Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "ret") : Base(ib, obj, "ret")
{ {
o_type = Enums::OT_RET; setFlag(GPUStaticInst::Return);
} }
void execute(GPUDynInstPtr gpuDynInst); void execute(GPUDynInstPtr gpuDynInst);
@ -889,7 +895,7 @@ namespace HsailISA
Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj) Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "barrier") : Base(ib, obj, "barrier")
{ {
o_type = Enums::OT_BARRIER; setFlag(GPUStaticInst::MemBarrier);
assert(ib->base.kind == Brig::BRIG_KIND_INST_BR); assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
width = (uint8_t)((Brig::BrigInstBr*)ib)->width; width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
} }
@ -924,14 +930,105 @@ namespace HsailISA
memFenceMemOrder = (Brig::BrigMemoryOrder) memFenceMemOrder = (Brig::BrigMemoryOrder)
((Brig::BrigInstMemFence*)ib)->memoryOrder; ((Brig::BrigInstMemFence*)ib)->memoryOrder;
// set o_type based on scopes setFlag(MemoryRef);
setFlag(GPUStaticInst::MemFence);
switch (memFenceMemOrder) {
case Brig::BRIG_MEMORY_ORDER_NONE:
setFlag(NoOrder);
break;
case Brig::BRIG_MEMORY_ORDER_RELAXED:
setFlag(RelaxedOrder);
break;
case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
setFlag(Acquire);
break;
case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
setFlag(Release);
break;
case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
setFlag(AcquireRelease);
break;
default:
fatal("MemInst has bad BrigMemoryOrder\n");
}
// set inst flags based on scopes
if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE && if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
o_type = Enums::OT_BOTH_MEMFENCE; setFlag(GPUStaticInst::GlobalSegment);
/**
* A memory fence that has scope for
* both segments will use the global
* segment, and be executed in the
* global memory pipeline, therefore,
* we set the segment to match the
* global scope only
*/
switch (memFenceScopeSegGlobal) {
case Brig::BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case Brig::BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("MemFence has bad global scope type\n");
}
} else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) { } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
o_type = Enums::OT_GLOBAL_MEMFENCE; setFlag(GPUStaticInst::GlobalSegment);
switch (memFenceScopeSegGlobal) {
case Brig::BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case Brig::BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("MemFence has bad global scope type\n");
}
} else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
o_type = Enums::OT_SHARED_MEMFENCE; setFlag(GPUStaticInst::GroupSegment);
switch (memFenceScopeSegGroup) {
case Brig::BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case Brig::BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("MemFence has bad group scope type\n");
}
} else { } else {
fatal("MemFence constructor: bad scope specifiers\n"); fatal("MemFence constructor: bad scope specifiers\n");
} }
@ -955,18 +1052,13 @@ namespace HsailISA
// etc.). We send a packet, tagged with the memory order and // etc.). We send a packet, tagged with the memory order and
// scope, and let the GPU coalescer handle it. // scope, and let the GPU coalescer handle it.
if (o_type == Enums::OT_GLOBAL_MEMFENCE || if (isGlobalSeg()) {
o_type == Enums::OT_BOTH_MEMFENCE) {
gpuDynInst->simdId = w->simdId; gpuDynInst->simdId = w->simdId;
gpuDynInst->wfSlotId = w->wfSlotId; gpuDynInst->wfSlotId = w->wfSlotId;
gpuDynInst->wfDynId = w->wfDynId; gpuDynInst->wfDynId = w->wfDynId;
gpuDynInst->kern_id = w->kernId; gpuDynInst->kern_id = w->kernId;
gpuDynInst->cu_id = w->computeUnit->cu_id; gpuDynInst->cu_id = w->computeUnit->cu_id;
gpuDynInst->memoryOrder =
getGenericMemoryOrder(memFenceMemOrder);
gpuDynInst->scope =
getGenericMemoryScope(memFenceScopeSegGlobal);
gpuDynInst->useContinuation = false; gpuDynInst->useContinuation = false;
GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe); GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
gmp->getGMReqFIFO().push(gpuDynInst); gmp->getGMReqFIFO().push(gpuDynInst);
@ -975,10 +1067,10 @@ namespace HsailISA
w->rdGmReqsInPipe--; w->rdGmReqsInPipe--;
w->memReqsInPipe--; w->memReqsInPipe--;
w->outstandingReqs++; w->outstandingReqs++;
} else if (o_type == Enums::OT_SHARED_MEMFENCE) { } else if (isGroupSeg()) {
// no-op // no-op
} else { } else {
fatal("MemFence execute: bad o_type\n"); fatal("MemFence execute: bad op type\n");
} }
} }
}; };
@ -1054,6 +1146,7 @@ namespace HsailISA
Call(const Brig::BrigInstBase *ib, const BrigObject *obj) Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "call") : HsailGPUStaticInst(obj, "call")
{ {
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj); dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1); op_offs = obj->getOperandPtr(ib->operands, 1);

View file

@ -179,12 +179,13 @@ namespace HsailISA
w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId); w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
if (!refCount) { if (!refCount) {
setFlag(SystemScope);
setFlag(Release);
setFlag(GlobalSegment);
// Notify Memory System of Kernel Completion // Notify Memory System of Kernel Completion
// Kernel End = isKernel + isRelease // Kernel End = isKernel + isRelease
w->status = Wavefront::S_RETURNING; w->status = Wavefront::S_RETURNING;
GPUDynInstPtr local_mempacket = gpuDynInst; GPUDynInstPtr local_mempacket = gpuDynInst;
local_mempacket->memoryOrder = Enums::MEMORY_ORDER_SC_RELEASE;
local_mempacket->scope = Enums::MEMORY_SCOPE_SYSTEM;
local_mempacket->useContinuation = false; local_mempacket->useContinuation = false;
local_mempacket->simdId = w->simdId; local_mempacket->simdId = w->simdId;
local_mempacket->wfSlotId = w->wfSlotId; local_mempacket->wfSlotId = w->wfSlotId;

View file

@ -36,7 +36,6 @@
#include "arch/hsail/insts/mem.hh" #include "arch/hsail/insts/mem.hh"
#include "arch/hsail/Brig.h" #include "arch/hsail/Brig.h"
#include "enums/OpType.hh"
using namespace Brig; using namespace Brig;
@ -44,68 +43,6 @@ namespace HsailISA
{ {
const char* atomicOpToString(BrigAtomicOperation brigOp); const char* atomicOpToString(BrigAtomicOperation brigOp);
Enums::MemOpType
brigAtomicToMemOpType(BrigOpcode brigOpCode, BrigAtomicOperation brigOp)
{
if (brigOpCode == Brig::BRIG_OPCODE_ATOMIC) {
switch (brigOp) {
case BRIG_ATOMIC_AND:
return Enums::MO_AAND;
case BRIG_ATOMIC_OR:
return Enums::MO_AOR;
case BRIG_ATOMIC_XOR:
return Enums::MO_AXOR;
case BRIG_ATOMIC_CAS:
return Enums::MO_ACAS;
case BRIG_ATOMIC_EXCH:
return Enums::MO_AEXCH;
case BRIG_ATOMIC_ADD:
return Enums::MO_AADD;
case BRIG_ATOMIC_WRAPINC:
return Enums::MO_AINC;
case BRIG_ATOMIC_WRAPDEC:
return Enums::MO_ADEC;
case BRIG_ATOMIC_MIN:
return Enums::MO_AMIN;
case BRIG_ATOMIC_MAX:
return Enums::MO_AMAX;
case BRIG_ATOMIC_SUB:
return Enums::MO_ASUB;
default:
fatal("Bad BrigAtomicOperation code %d\n", brigOp);
}
} else if (brigOpCode == Brig::BRIG_OPCODE_ATOMICNORET) {
switch (brigOp) {
case BRIG_ATOMIC_AND:
return Enums::MO_ANRAND;
case BRIG_ATOMIC_OR:
return Enums::MO_ANROR;
case BRIG_ATOMIC_XOR:
return Enums::MO_ANRXOR;
case BRIG_ATOMIC_CAS:
return Enums::MO_ANRCAS;
case BRIG_ATOMIC_EXCH:
return Enums::MO_ANREXCH;
case BRIG_ATOMIC_ADD:
return Enums::MO_ANRADD;
case BRIG_ATOMIC_WRAPINC:
return Enums::MO_ANRINC;
case BRIG_ATOMIC_WRAPDEC:
return Enums::MO_ANRDEC;
case BRIG_ATOMIC_MIN:
return Enums::MO_ANRMIN;
case BRIG_ATOMIC_MAX:
return Enums::MO_ANRMAX;
case BRIG_ATOMIC_SUB:
return Enums::MO_ANRSUB;
default:
fatal("Bad BrigAtomicOperation code %d\n", brigOp);
}
} else {
fatal("Bad BrigAtomicOpcode %d\n", brigOpCode);
}
}
const char* const char*
atomicOpToString(BrigAtomicOperation brigOp) atomicOpToString(BrigAtomicOperation brigOp)
{ {

View file

@ -96,6 +96,8 @@ namespace HsailISA
{ {
using namespace Brig; using namespace Brig;
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0); unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj); dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1); op_offs = obj->getOperandPtr(ib->operands, 1);
@ -211,131 +213,6 @@ namespace HsailISA
Brig::BrigMemoryOrder memoryOrder; Brig::BrigMemoryOrder memoryOrder;
Brig::BrigMemoryScope memoryScope; Brig::BrigMemoryScope memoryScope;
unsigned int equivClass; unsigned int equivClass;
bool isArgLoad()
{
return segment == Brig::BRIG_SEGMENT_KERNARG ||
segment == Brig::BRIG_SEGMENT_ARG;
}
void
initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
{
using namespace Brig;
const BrigInstMem *ldst = (const BrigInstMem*)ib;
segment = (BrigSegment)ldst->segment;
memoryOrder = BRIG_MEMORY_ORDER_NONE;
memoryScope = BRIG_MEMORY_SCOPE_NONE;
equivClass = ldst->equivClass;
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_READ;
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_READ;
break;
case BRIG_SEGMENT_PRIVATE:
o_type = Enums::OT_PRIVATE_READ;
break;
case BRIG_SEGMENT_READONLY:
o_type = Enums::OT_READONLY_READ;
break;
case BRIG_SEGMENT_SPILL:
o_type = Enums::OT_SPILL_READ;
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_READ;
break;
case BRIG_SEGMENT_KERNARG:
o_type = Enums::OT_KERN_READ;
break;
case BRIG_SEGMENT_ARG:
o_type = Enums::OT_ARG;
break;
default:
panic("Ld: segment %d not supported\n", segment);
}
width = ldst->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
addr.init(op_offs, obj);
}
void
initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
{
using namespace Brig;
const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
segment = (BrigSegment)at->segment;
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
memoryScope = (BrigMemoryScope)at->memoryScope;
equivClass = 0;
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_READ;
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_READ;
break;
case BRIG_SEGMENT_PRIVATE:
o_type = Enums::OT_PRIVATE_READ;
break;
case BRIG_SEGMENT_READONLY:
o_type = Enums::OT_READONLY_READ;
break;
case BRIG_SEGMENT_SPILL:
o_type = Enums::OT_SPILL_READ;
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_READ;
break;
case BRIG_SEGMENT_KERNARG:
o_type = Enums::OT_KERN_READ;
break;
case BRIG_SEGMENT_ARG:
o_type = Enums::OT_ARG;
break;
default:
panic("Ld: segment %d not supported\n", segment);
}
width = BRIG_WIDTH_1;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands,1);
addr.init(op_offs, obj);
}
LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode) const char *_opcode)
@ -343,10 +220,111 @@ namespace HsailISA
{ {
using namespace Brig; using namespace Brig;
setFlag(MemoryRef);
setFlag(Load);
if (ib->opcode == BRIG_OPCODE_LD) { if (ib->opcode == BRIG_OPCODE_LD) {
initLd(ib, obj, _opcode); const BrigInstMem *ldst = (const BrigInstMem*)ib;
segment = (BrigSegment)ldst->segment;
memoryOrder = BRIG_MEMORY_ORDER_NONE;
memoryScope = BRIG_MEMORY_SCOPE_NONE;
equivClass = ldst->equivClass;
width = ldst->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
addr.init(op_offs, obj);
} else { } else {
initAtomicLd(ib, obj, _opcode); const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
segment = (BrigSegment)at->segment;
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
memoryScope = (BrigMemoryScope)at->memoryScope;
equivClass = 0;
width = BRIG_WIDTH_1;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands,1);
addr.init(op_offs, obj);
}
switch (memoryOrder) {
case BRIG_MEMORY_ORDER_NONE:
setFlag(NoOrder);
break;
case BRIG_MEMORY_ORDER_RELAXED:
setFlag(RelaxedOrder);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE:
setFlag(Acquire);
break;
case BRIG_MEMORY_ORDER_SC_RELEASE:
setFlag(Release);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
setFlag(AcquireRelease);
break;
default:
fatal("LdInst has bad memory order type\n");
}
switch (memoryScope) {
case BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("LdInst has bad memory scope type\n");
}
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
setFlag(GlobalSegment);
break;
case BRIG_SEGMENT_GROUP:
setFlag(GroupSegment);
break;
case BRIG_SEGMENT_PRIVATE:
setFlag(PrivateSegment);
break;
case BRIG_SEGMENT_READONLY:
setFlag(ReadOnlySegment);
break;
case BRIG_SEGMENT_SPILL:
setFlag(SpillSegment);
break;
case BRIG_SEGMENT_FLAT:
setFlag(Flat);
break;
case BRIG_SEGMENT_KERNARG:
setFlag(KernArgSegment);
break;
case BRIG_SEGMENT_ARG:
setFlag(ArgSegment);
break;
default:
panic("Ld: segment %d not supported\n", segment);
} }
} }
@ -473,7 +451,7 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) { if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
if (isLocalMem()) { if (this->isLocalMem()) {
// load from shared memory // load from shared memory
*d = gpuDynInst->wavefront()->ldsChunk-> *d = gpuDynInst->wavefront()->ldsChunk->
read<c0>(vaddr); read<c0>(vaddr);
@ -488,8 +466,7 @@ namespace HsailISA
if (gpuDynInst->computeUnit()->shader-> if (gpuDynInst->computeUnit()->shader->
separate_acquire_release && separate_acquire_release &&
gpuDynInst->memoryOrder == gpuDynInst->isAcquire()) {
Enums::MEMORY_ORDER_SC_ACQUIRE) {
// if this load has acquire semantics, // if this load has acquire semantics,
// set the response continuation function // set the response continuation function
// to perform an Acquire request // to perform an Acquire request
@ -520,10 +497,9 @@ namespace HsailISA
{ {
// after the load has complete and if the load has acquire // after the load has complete and if the load has acquire
// semantics, issue an acquire request. // semantics, issue an acquire request.
if (!isLocalMem()) { if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release if (gpuDynInst->computeUnit()->shader->separate_acquire_release
&& gpuDynInst->memoryOrder == && gpuDynInst->isAcquire()) {
Enums::MEMORY_ORDER_SC_ACQUIRE) {
gpuDynInst->statusBitVector = VectorMask(1); gpuDynInst->statusBitVector = VectorMask(1);
gpuDynInst->useContinuation = false; gpuDynInst->useContinuation = false;
// create request // create request
@ -537,12 +513,6 @@ namespace HsailISA
} }
public: public:
bool
isLocalMem() const override
{
return this->segment == Brig::BRIG_SEGMENT_GROUP;
}
bool isVectorRegister(int operandIndex) override bool isVectorRegister(int operandIndex) override
{ {
assert((operandIndex >= 0) && (operandIndex < getNumOperands())); assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
@ -731,127 +701,112 @@ namespace HsailISA
Brig::BrigMemoryOrder memoryOrder; Brig::BrigMemoryOrder memoryOrder;
unsigned int equivClass; unsigned int equivClass;
void
initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
{
using namespace Brig;
const BrigInstMem *ldst = (const BrigInstMem*)ib;
segment = (BrigSegment)ldst->segment;
memoryOrder = BRIG_MEMORY_ORDER_NONE;
memoryScope = BRIG_MEMORY_SCOPE_NONE;
equivClass = ldst->equivClass;
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_WRITE;
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_WRITE;
break;
case BRIG_SEGMENT_PRIVATE:
o_type = Enums::OT_PRIVATE_WRITE;
break;
case BRIG_SEGMENT_READONLY:
o_type = Enums::OT_READONLY_WRITE;
break;
case BRIG_SEGMENT_SPILL:
o_type = Enums::OT_SPILL_WRITE;
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_WRITE;
break;
case BRIG_SEGMENT_ARG:
o_type = Enums::OT_ARG;
break;
default:
panic("St: segment %d not supported\n", segment);
}
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const BrigOperand *baseOp = obj->getOperand(op_offs);
if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
(baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
src.init(op_offs, obj);
}
op_offs = obj->getOperandPtr(ib->operands, 1);
addr.init(op_offs, obj);
}
void
initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
{
using namespace Brig;
const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
segment = (BrigSegment)at->segment;
memoryScope = (BrigMemoryScope)at->memoryScope;
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
equivClass = 0;
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_WRITE;
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_WRITE;
break;
case BRIG_SEGMENT_PRIVATE:
o_type = Enums::OT_PRIVATE_WRITE;
break;
case BRIG_SEGMENT_READONLY:
o_type = Enums::OT_READONLY_WRITE;
break;
case BRIG_SEGMENT_SPILL:
o_type = Enums::OT_SPILL_WRITE;
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_WRITE;
break;
case BRIG_SEGMENT_ARG:
o_type = Enums::OT_ARG;
break;
default:
panic("St: segment %d not supported\n", segment);
}
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
addr.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
src.init(op_offs, obj);
}
StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode) const char *_opcode)
: HsailGPUStaticInst(obj, _opcode) : HsailGPUStaticInst(obj, _opcode)
{ {
using namespace Brig; using namespace Brig;
setFlag(MemoryRef);
setFlag(Store);
if (ib->opcode == BRIG_OPCODE_ST) { if (ib->opcode == BRIG_OPCODE_ST) {
initSt(ib, obj, _opcode); const BrigInstMem *ldst = (const BrigInstMem*)ib;
segment = (BrigSegment)ldst->segment;
memoryOrder = BRIG_MEMORY_ORDER_NONE;
memoryScope = BRIG_MEMORY_SCOPE_NONE;
equivClass = ldst->equivClass;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const BrigOperand *baseOp = obj->getOperand(op_offs);
if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
(baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
src.init(op_offs, obj);
}
op_offs = obj->getOperandPtr(ib->operands, 1);
addr.init(op_offs, obj);
} else { } else {
initAtomicSt(ib, obj, _opcode); const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
segment = (BrigSegment)at->segment;
memoryScope = (BrigMemoryScope)at->memoryScope;
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
equivClass = 0;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
addr.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
src.init(op_offs, obj);
}
switch (memoryOrder) {
case BRIG_MEMORY_ORDER_NONE:
setFlag(NoOrder);
break;
case BRIG_MEMORY_ORDER_RELAXED:
setFlag(RelaxedOrder);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE:
setFlag(Acquire);
break;
case BRIG_MEMORY_ORDER_SC_RELEASE:
setFlag(Release);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
setFlag(AcquireRelease);
break;
default:
fatal("StInst has bad memory order type\n");
}
switch (memoryScope) {
case BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("StInst has bad memory scope type\n");
}
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
setFlag(GlobalSegment);
break;
case BRIG_SEGMENT_GROUP:
setFlag(GroupSegment);
break;
case BRIG_SEGMENT_PRIVATE:
setFlag(PrivateSegment);
break;
case BRIG_SEGMENT_READONLY:
setFlag(ReadOnlySegment);
break;
case BRIG_SEGMENT_SPILL:
setFlag(SpillSegment);
break;
case BRIG_SEGMENT_FLAT:
setFlag(Flat);
break;
case BRIG_SEGMENT_ARG:
setFlag(ArgSegment);
break;
default:
panic("St: segment %d not supported\n", segment);
} }
} }
@ -964,10 +919,9 @@ namespace HsailISA
{ {
// before performing a store, check if this store has // before performing a store, check if this store has
// release semantics, and if so issue a release first // release semantics, and if so issue a release first
if (!isLocalMem()) { if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release if (gpuDynInst->computeUnit()->shader->separate_acquire_release
&& gpuDynInst->memoryOrder == && gpuDynInst->isRelease()) {
Enums::MEMORY_ORDER_SC_RELEASE) {
gpuDynInst->statusBitVector = VectorMask(1); gpuDynInst->statusBitVector = VectorMask(1);
gpuDynInst->execContinuation = &GPUStaticInst::execSt; gpuDynInst->execContinuation = &GPUStaticInst::execSt;
@ -987,12 +941,6 @@ namespace HsailISA
execSt(gpuDynInst); execSt(gpuDynInst);
} }
bool
isLocalMem() const override
{
return this->segment == Brig::BRIG_SEGMENT_GROUP;
}
private: private:
// execSt may be called through a continuation // execSt may be called through a continuation
// if the store had release semantics. see comment for // if the store had release semantics. see comment for
@ -1020,7 +968,7 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) { if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
if (isLocalMem()) { if (this->isLocalMem()) {
//store to shared memory //store to shared memory
gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr, gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
*d); *d);
@ -1166,9 +1114,6 @@ namespace HsailISA
} }
} }
Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
Brig::BrigAtomicOperation brigOp);
template<typename OperandType, typename AddrOperandType, int NumSrcOperands, template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
bool HasDst> bool HasDst>
class AtomicInstBase : public HsailGPUStaticInst class AtomicInstBase : public HsailGPUStaticInst
@ -1183,7 +1128,6 @@ namespace HsailISA
Brig::BrigAtomicOperation atomicOperation; Brig::BrigAtomicOperation atomicOperation;
Brig::BrigMemoryScope memoryScope; Brig::BrigMemoryScope memoryScope;
Brig::BrigOpcode opcode; Brig::BrigOpcode opcode;
Enums::MemOpType opType;
AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode) const char *_opcode)
@ -1198,21 +1142,106 @@ namespace HsailISA
memoryOrder = (BrigMemoryOrder)at->memoryOrder; memoryOrder = (BrigMemoryOrder)at->memoryOrder;
atomicOperation = (BrigAtomicOperation)at->atomicOperation; atomicOperation = (BrigAtomicOperation)at->atomicOperation;
opcode = (BrigOpcode)ib->opcode; opcode = (BrigOpcode)ib->opcode;
opType = brigAtomicToMemOpType(opcode, atomicOperation);
assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
opcode == Brig::BRIG_OPCODE_ATOMIC);
setFlag(MemoryRef);
if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
switch (memoryOrder) {
case BRIG_MEMORY_ORDER_NONE:
setFlag(NoOrder);
break;
case BRIG_MEMORY_ORDER_RELAXED:
setFlag(RelaxedOrder);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE:
setFlag(Acquire);
break;
case BRIG_MEMORY_ORDER_SC_RELEASE:
setFlag(Release);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
setFlag(AcquireRelease);
break;
default:
fatal("AtomicInst has bad memory order type\n");
}
switch (memoryScope) {
case BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("AtomicInst has bad memory scope type\n");
}
switch (atomicOperation) {
case Brig::BRIG_ATOMIC_AND:
setFlag(AtomicAnd);
break;
case Brig::BRIG_ATOMIC_OR:
setFlag(AtomicOr);
break;
case Brig::BRIG_ATOMIC_XOR:
setFlag(AtomicXor);
break;
case Brig::BRIG_ATOMIC_CAS:
setFlag(AtomicCAS);
break;
case Brig::BRIG_ATOMIC_EXCH:
setFlag(AtomicExch);
break;
case Brig::BRIG_ATOMIC_ADD:
setFlag(AtomicAdd);
break;
case Brig::BRIG_ATOMIC_WRAPINC:
setFlag(AtomicInc);
break;
case Brig::BRIG_ATOMIC_WRAPDEC:
setFlag(AtomicDec);
break;
case Brig::BRIG_ATOMIC_MIN:
setFlag(AtomicMin);
break;
case Brig::BRIG_ATOMIC_MAX:
setFlag(AtomicMax);
break;
case Brig::BRIG_ATOMIC_SUB:
setFlag(AtomicSub);
break;
default:
fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
}
switch (segment) { switch (segment) {
case BRIG_SEGMENT_GLOBAL: case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_ATOMIC; setFlag(GlobalSegment);
break; break;
case BRIG_SEGMENT_GROUP: case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_ATOMIC; setFlag(GroupSegment);
break; break;
case BRIG_SEGMENT_FLAT: case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_ATOMIC; setFlag(Flat);
break; break;
default: default:
panic("Atomic: segment %d not supported\n", segment); panic("Atomic: segment %d not supported\n", segment);
} }
@ -1354,11 +1383,10 @@ namespace HsailISA
{ {
// before doing the RMW, check if this atomic has // before doing the RMW, check if this atomic has
// release semantics, and if so issue a release first // release semantics, and if so issue a release first
if (!isLocalMem()) { if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release if (gpuDynInst->computeUnit()->shader->separate_acquire_release
&& (gpuDynInst->memoryOrder == && (gpuDynInst->isRelease()
Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder == || gpuDynInst->isAcquireRelease())) {
Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
gpuDynInst->statusBitVector = VectorMask(1); gpuDynInst->statusBitVector = VectorMask(1);
@ -1383,12 +1411,6 @@ namespace HsailISA
void execute(GPUDynInstPtr gpuDynInst) override; void execute(GPUDynInstPtr gpuDynInst) override;
bool
isLocalMem() const override
{
return this->segment == Brig::BRIG_SEGMENT_GROUP;
}
private: private:
// execAtomic may be called through a continuation // execAtomic may be called through a continuation
// if the RMW had release semantics. see comment for // if the RMW had release semantics. see comment for
@ -1408,72 +1430,48 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) { if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i]; Addr vaddr = gpuDynInst->addr[i];
if (isLocalMem()) { if (this->isLocalMem()) {
Wavefront *wavefront = gpuDynInst->wavefront(); Wavefront *wavefront = gpuDynInst->wavefront();
*d = wavefront->ldsChunk->read<c0>(vaddr); *d = wavefront->ldsChunk->read<c0>(vaddr);
switch (this->opType) { if (this->isAtomicAdd()) {
case Enums::MO_AADD:
case Enums::MO_ANRADD:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) + (*e)); wavefront->ldsChunk->read<c0>(vaddr) + (*e));
break; } else if (this->isAtomicSub()) {
case Enums::MO_ASUB:
case Enums::MO_ANRSUB:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) - (*e)); wavefront->ldsChunk->read<c0>(vaddr) - (*e));
break; } else if (this->isAtomicMax()) {
case Enums::MO_AMAX:
case Enums::MO_ANRMAX:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
std::max(wavefront->ldsChunk->read<c0>(vaddr), std::max(wavefront->ldsChunk->read<c0>(vaddr),
(*e))); (*e)));
break; } else if (this->isAtomicMin()) {
case Enums::MO_AMIN:
case Enums::MO_ANRMIN:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
std::min(wavefront->ldsChunk->read<c0>(vaddr), std::min(wavefront->ldsChunk->read<c0>(vaddr),
(*e))); (*e)));
break; } else if (this->isAtomicAnd()) {
case Enums::MO_AAND:
case Enums::MO_ANRAND:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) & (*e)); wavefront->ldsChunk->read<c0>(vaddr) & (*e));
break; } else if (this->isAtomicOr()) {
case Enums::MO_AOR:
case Enums::MO_ANROR:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) | (*e)); wavefront->ldsChunk->read<c0>(vaddr) | (*e));
break; } else if (this->isAtomicXor()) {
case Enums::MO_AXOR:
case Enums::MO_ANRXOR:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) ^ (*e)); wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
break; } else if (this->isAtomicInc()) {
case Enums::MO_AINC:
case Enums::MO_ANRINC:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) + 1); wavefront->ldsChunk->read<c0>(vaddr) + 1);
break; } else if (this->isAtomicDec()) {
case Enums::MO_ADEC:
case Enums::MO_ANRDEC:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) - 1); wavefront->ldsChunk->read<c0>(vaddr) - 1);
break; } else if (this->isAtomicExch()) {
case Enums::MO_AEXCH:
case Enums::MO_ANREXCH:
wavefront->ldsChunk->write<c0>(vaddr, (*e)); wavefront->ldsChunk->write<c0>(vaddr, (*e));
break; } else if (this->isAtomicCAS()) {
case Enums::MO_ACAS:
case Enums::MO_ANRCAS:
wavefront->ldsChunk->write<c0>(vaddr, wavefront->ldsChunk->write<c0>(vaddr,
(wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ? (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
(*f) : wavefront->ldsChunk->read<c0>(vaddr)); (*f) : wavefront->ldsChunk->read<c0>(vaddr));
break; } else {
default:
fatal("Unrecognized or invalid HSAIL atomic op " fatal("Unrecognized or invalid HSAIL atomic op "
"type.\n"); "type.\n");
break;
} }
} else { } else {
Request *req = Request *req =
@ -1481,7 +1479,7 @@ namespace HsailISA
gpuDynInst->computeUnit()->masterId(), gpuDynInst->computeUnit()->masterId(),
0, gpuDynInst->wfDynId, 0, gpuDynInst->wfDynId,
gpuDynInst->makeAtomicOpFunctor<c0>(e, gpuDynInst->makeAtomicOpFunctor<c0>(e,
f, this->opType)); f));
gpuDynInst->setRequestFlags(req); gpuDynInst->setRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::SwapReq); PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
@ -1489,8 +1487,7 @@ namespace HsailISA
if (gpuDynInst->computeUnit()->shader-> if (gpuDynInst->computeUnit()->shader->
separate_acquire_release && separate_acquire_release &&
(gpuDynInst->memoryOrder == (gpuDynInst->isAcquire())) {
Enums::MEMORY_ORDER_SC_ACQUIRE)) {
// if this atomic has acquire semantics, // if this atomic has acquire semantics,
// schedule the continuation to perform an // schedule the continuation to perform an
// acquire after the RMW completes // acquire after the RMW completes
@ -1523,10 +1520,9 @@ namespace HsailISA
{ {
// after performing the RMW, check to see if this instruction // after performing the RMW, check to see if this instruction
// has acquire semantics, and if so, issue an acquire // has acquire semantics, and if so, issue an acquire
if (!isLocalMem()) { if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release if (gpuDynInst->computeUnit()->shader->separate_acquire_release
&& gpuDynInst->memoryOrder == && gpuDynInst->isAcquire()) {
Enums::MEMORY_ORDER_SC_ACQUIRE) {
gpuDynInst->statusBitVector = VectorMask(1); gpuDynInst->statusBitVector = VectorMask(1);
// the request will be finished when // the request will be finished when

View file

@ -33,7 +33,6 @@
* Author: Steve Reinhardt * Author: Steve Reinhardt
*/ */
#include "arch/hsail/generic_types.hh"
#include "gpu-compute/hsail_code.hh" #include "gpu-compute/hsail_code.hh"
// defined in code.cc, but not worth sucking in all of code.h for this // defined in code.cc, but not worth sucking in all of code.h for this
@ -215,16 +214,12 @@ namespace HsailISA
this->addr.calcVector(w, m->addr); this->addr.calcVector(w, m->addr);
m->m_op = Enums::MO_LD;
m->m_type = MemDataType::memType; m->m_type = MemDataType::memType;
m->v_type = DestDataType::vgprType; m->v_type = DestDataType::vgprType;
m->exec_mask = w->execMask(); m->exec_mask = w->execMask();
m->statusBitVector = 0; m->statusBitVector = 0;
m->equiv = this->equivClass; m->equiv = this->equivClass;
m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
m->scope = getGenericMemoryScope(this->memoryScope);
if (num_dest_operands == 1) { if (num_dest_operands == 1) {
m->dst_reg = this->dest.regIndex(); m->dst_reg = this->dest.regIndex();
@ -245,7 +240,6 @@ namespace HsailISA
switch (this->segment) { switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL: case Brig::BRIG_SEGMENT_GLOBAL:
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1)); m->latency.set(w->computeUnit->shader->ticks(1));
@ -276,7 +270,6 @@ namespace HsailISA
case Brig::BRIG_SEGMENT_SPILL: case Brig::BRIG_SEGMENT_SPILL:
assert(num_dest_operands == 1); assert(num_dest_operands == 1);
m->s_type = SEG_SPILL;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1)); m->latency.set(w->computeUnit->shader->ticks(1));
{ {
@ -301,7 +294,6 @@ namespace HsailISA
break; break;
case Brig::BRIG_SEGMENT_GROUP: case Brig::BRIG_SEGMENT_GROUP:
m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE; m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24)); m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
@ -310,7 +302,6 @@ namespace HsailISA
break; break;
case Brig::BRIG_SEGMENT_READONLY: case Brig::BRIG_SEGMENT_READONLY:
m->s_type = SEG_READONLY;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1)); m->latency.set(w->computeUnit->shader->ticks(1));
@ -327,7 +318,6 @@ namespace HsailISA
break; break;
case Brig::BRIG_SEGMENT_PRIVATE: case Brig::BRIG_SEGMENT_PRIVATE:
m->s_type = SEG_PRIVATE;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1)); m->latency.set(w->computeUnit->shader->ticks(1));
{ {
@ -408,7 +398,6 @@ namespace HsailISA
} }
} }
m->m_op = Enums::MO_ST;
m->m_type = OperationType::memType; m->m_type = OperationType::memType;
m->v_type = OperationType::vgprType; m->v_type = OperationType::vgprType;
@ -421,10 +410,6 @@ namespace HsailISA
m->n_reg = num_src_operands; m->n_reg = num_src_operands;
} }
m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
m->scope = getGenericMemoryScope(this->memoryScope);
m->simdId = w->simdId; m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId; m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId; m->wfDynId = w->wfDynId;
@ -434,7 +419,6 @@ namespace HsailISA
switch (this->segment) { switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL: case Brig::BRIG_SEGMENT_GLOBAL:
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1)); m->latency.set(w->computeUnit->shader->ticks(1));
@ -463,7 +447,6 @@ namespace HsailISA
case Brig::BRIG_SEGMENT_SPILL: case Brig::BRIG_SEGMENT_SPILL:
assert(num_src_operands == 1); assert(num_src_operands == 1);
m->s_type = SEG_SPILL;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1)); m->latency.set(w->computeUnit->shader->ticks(1));
{ {
@ -483,7 +466,6 @@ namespace HsailISA
break; break;
case Brig::BRIG_SEGMENT_GROUP: case Brig::BRIG_SEGMENT_GROUP:
m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE; m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24)); m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
@ -492,7 +474,6 @@ namespace HsailISA
break; break;
case Brig::BRIG_SEGMENT_PRIVATE: case Brig::BRIG_SEGMENT_PRIVATE:
m->s_type = SEG_PRIVATE;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1)); m->latency.set(w->computeUnit->shader->ticks(1));
{ {
@ -586,7 +567,6 @@ namespace HsailISA
assert(NumSrcOperands <= 2); assert(NumSrcOperands <= 2);
m->m_op = this->opType;
m->m_type = DataType::memType; m->m_type = DataType::memType;
m->v_type = DataType::vgprType; m->v_type = DataType::vgprType;
@ -594,9 +574,6 @@ namespace HsailISA
m->statusBitVector = 0; m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1; m->n_reg = 1;
m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
m->scope = getGenericMemoryScope(this->memoryScope);
if (HasDst) { if (HasDst) {
m->dst_reg = this->dest.regIndex(); m->dst_reg = this->dest.regIndex();
@ -611,7 +588,6 @@ namespace HsailISA
switch (this->segment) { switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL: case Brig::BRIG_SEGMENT_GLOBAL:
m->s_type = SEG_GLOBAL;
m->latency.set(w->computeUnit->shader->ticks(64)); m->latency.set(w->computeUnit->shader->ticks(64));
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
@ -623,7 +599,6 @@ namespace HsailISA
break; break;
case Brig::BRIG_SEGMENT_GROUP: case Brig::BRIG_SEGMENT_GROUP:
m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE; m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24)); m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);

View file

@ -627,8 +627,12 @@ namespace HsailISA
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3); ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
} }
m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET, setFlag(AtomicNoReturn);
Brig::BRIG_ATOMIC_ADD); setFlag(AtomicAdd);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType; m->m_type = U32::memType;
m->v_type = U32::vgprType; m->v_type = U32::vgprType;
@ -636,15 +640,12 @@ namespace HsailISA
m->statusBitVector = 0; m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1; m->n_reg = 1;
m->memoryOrder = Enums::MEMORY_ORDER_NONE;
m->scope = Enums::MEMORY_SCOPE_NONE;
m->simdId = w->simdId; m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId; m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId; m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt); m->latency.init(&w->computeUnit->shader->tick_cnt);
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(64)); m->latency.set(w->computeUnit->shader->ticks(64));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
@ -666,8 +667,12 @@ namespace HsailISA
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1); ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
} }
m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET, setFlag(AtomicNoReturn);
Brig::BRIG_ATOMIC_ADD); setFlag(AtomicAdd);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType; m->m_type = U32::memType;
m->v_type = U32::vgprType; m->v_type = U32::vgprType;
@ -675,15 +680,12 @@ namespace HsailISA
m->statusBitVector = 0; m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1; m->n_reg = 1;
m->memoryOrder = Enums::MEMORY_ORDER_NONE;
m->scope = Enums::MEMORY_SCOPE_NONE;
m->simdId = w->simdId; m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId; m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId; m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt); m->latency.init(&w->computeUnit->shader->tick_cnt);
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(64)); m->latency.set(w->computeUnit->shader->ticks(64));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
@ -702,7 +704,11 @@ namespace HsailISA
// calculate the address // calculate the address
calcAddr(w, m); calcAddr(w, m);
m->m_op = Enums::MO_LD; setFlag(Load);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType; //MemDataType::memType; m->m_type = U32::memType; //MemDataType::memType;
m->v_type = U32::vgprType; //DestDataType::vgprType; m->v_type = U32::vgprType; //DestDataType::vgprType;
@ -710,8 +716,6 @@ namespace HsailISA
m->statusBitVector = 0; m->statusBitVector = 0;
m->equiv = 0; m->equiv = 0;
m->n_reg = 1; m->n_reg = 1;
m->memoryOrder = Enums::MEMORY_ORDER_NONE;
m->scope = Enums::MEMORY_SCOPE_NONE;
// FIXME // FIXME
//m->dst_reg = this->dest.regIndex(); //m->dst_reg = this->dest.regIndex();
@ -721,7 +725,6 @@ namespace HsailISA
m->wfDynId = w->wfDynId; m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt); m->latency.init(&w->computeUnit->shader->tick_cnt);
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE; m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1)); m->latency.set(w->computeUnit->shader->ticks(1));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

View file

@ -171,56 +171,6 @@ class GpuDispatcher(DmaDevice):
cl_driver = Param.ClDriver('pointer to driver') cl_driver = Param.ClDriver('pointer to driver')
class OpType(Enum): vals = [
'OT_NULL',
'OT_ALU',
'OT_SPECIAL',
'OT_GLOBAL_READ',
'OT_GLOBAL_WRITE',
'OT_GLOBAL_ATOMIC',
'OT_GLOBAL_HIST',
'OT_GLOBAL_LDAS',
'OT_SHARED_READ',
'OT_SHARED_WRITE',
'OT_SHARED_ATOMIC',
'OT_SHARED_HIST',
'OT_SHARED_LDAS',
'OT_PRIVATE_READ',
'OT_PRIVATE_WRITE',
'OT_PRIVATE_ATOMIC',
'OT_PRIVATE_HIST',
'OT_PRIVATE_LDAS',
'OT_SPILL_READ',
'OT_SPILL_WRITE',
'OT_SPILL_ATOMIC',
'OT_SPILL_HIST',
'OT_SPILL_LDAS',
'OT_READONLY_READ',
'OT_READONLY_WRITE',
'OT_READONLY_ATOMIC',
'OT_READONLY_HIST',
'OT_READONLY_LDAS',
'OT_FLAT_READ',
'OT_FLAT_WRITE',
'OT_FLAT_ATOMIC',
'OT_FLAT_HIST',
'OT_FLAT_LDAS',
'OT_KERN_READ',
'OT_BRANCH',
# note: Only the OT_BOTH_MEMFENCE seems to be supported in the 1.0F version
# of the compiler.
'OT_SHARED_MEMFENCE',
'OT_GLOBAL_MEMFENCE',
'OT_BOTH_MEMFENCE',
'OT_BARRIER',
'OT_PRINT',
'OT_RET',
'OT_NOP',
'OT_ARG'
]
class MemType(Enum): vals = [ class MemType(Enum): vals = [
'M_U8', 'M_U8',
'M_U16', 'M_U16',
@ -235,47 +185,6 @@ class MemType(Enum): vals = [
'M_F64', 'M_F64',
] ]
class MemOpType(Enum): vals = [
'MO_LD',
'MO_ST',
'MO_LDAS',
'MO_LDA',
'MO_AAND',
'MO_AOR',
'MO_AXOR',
'MO_ACAS',
'MO_AEXCH',
'MO_AADD',
'MO_ASUB',
'MO_AINC',
'MO_ADEC',
'MO_AMAX',
'MO_AMIN',
'MO_ANRAND',
'MO_ANROR',
'MO_ANRXOR',
'MO_ANRCAS',
'MO_ANREXCH',
'MO_ANRADD',
'MO_ANRSUB',
'MO_ANRINC',
'MO_ANRDEC',
'MO_ANRMAX',
'MO_ANRMIN',
'MO_HAND',
'MO_HOR',
'MO_HXOR',
'MO_HCAS',
'MO_HEXCH',
'MO_HADD',
'MO_HSUB',
'MO_HINC',
'MO_HDEC',
'MO_HMAX',
'MO_HMIN',
'MO_UNDEF'
]
class StorageClassType(Enum): vals = [ class StorageClassType(Enum): vals = [
'SC_SPILL', 'SC_SPILL',
'SC_GLOBAL', 'SC_GLOBAL',
@ -293,20 +202,3 @@ class RegisterType(Enum): vals = [
'RT_HARDWARE', 'RT_HARDWARE',
'RT_NONE', 'RT_NONE',
] ]
class GenericMemoryOrder(Enum): vals = [
'MEMORY_ORDER_NONE',
'MEMORY_ORDER_RELAXED',
'MEMORY_ORDER_SC_ACQUIRE',
'MEMORY_ORDER_SC_RELEASE',
'MEMORY_ORDER_SC_ACQUIRE_RELEASE',
]
class GenericMemoryScope(Enum): vals = [
'MEMORY_SCOPE_NONE',
'MEMORY_SCOPE_WORKITEM',
'MEMORY_SCOPE_WAVEFRONT',
'MEMORY_SCOPE_WORKGROUP',
'MEMORY_SCOPE_DEVICE',
'MEMORY_SCOPE_SYSTEM',
]

View file

@ -0,0 +1,111 @@
# Copyright (c) 2016 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Anthony Gutierrez
from m5.params import *
class GPUStaticInstFlags(Enum):
wrapper_name = 'GPUStaticInstFlags'
wrapper_is_struct = True
enum_name = 'Flags'
vals = [
# Op types
'ALU', # ALU op
'Branch', # Branch instruction
'Nop', # No-op (no effect at all)
'Return', # Return instruction
'UnconditionalJump', #
'SpecialOp', # Special op
'Waitcnt', # Is a waitcnt instruction
# Memory ops
'MemBarrier', # Barrier instruction
'MemFence', # Memory fence instruction
'MemoryRef', # References memory (load, store, or atomic)
'Flat', # Flat memory op
'Load', # Reads from memory
'Store', # Writes to memory
# Atomic ops
'AtomicReturn', # Atomic instruction that returns data
'AtomicNoReturn', # Atomic instruction that doesn't return data
# Instruction attributes
'Scalar', # A scalar (not vector) operation
'ReadsSCC', # The instruction reads SCC
'WritesSCC', # The instruction writes SCC
'ReadsVCC', # The instruction reads VCC
'WritesVCC', # The instruction writes VCC
# Atomic OP types
'AtomicAnd',
'AtomicOr',
'AtomicXor',
'AtomicCAS',
'AtomicExch',
'AtomicAdd',
'AtomicSub',
'AtomicInc',
'AtomicDec',
'AtomicMax',
'AtomicMin',
# Memory order flags
'RelaxedOrder',
'Acquire', # Has acquire semantics
'Release', # Has release semantics
'AcquireRelease', # Has acquire and release semantics
'NoOrder', # Has no ordering restrictions
# Segment access flags
'ArgSegment', # Accesses the arg segment
'GlobalSegment', # Accesses global memory
'GroupSegment', # Accesses local memory (LDS), aka shared memory
'KernArgSegment', # Accesses the kernel argument segment
'PrivateSegment', # Accesses the private segment
'ReadOnlySegment', # Accesses read only memory
'SpillSegment', # Accesses the spill segment
'NoSegment', # Does not have an associated segment
# Scope flags
'WorkitemScope',
'WavefrontScope',
'WorkgroupScope',
'DeviceScope',
'SystemScope',
'NoScope', # Does not have an associated scope
# Coherence flags
'GloballyCoherent', # Coherent with other workitems on same device
'SystemCoherent' # Coherent with a different device, or the host
]

View file

@ -41,6 +41,7 @@ if not env['BUILD_GPU']:
Return() Return()
SimObject('GPU.py') SimObject('GPU.py')
SimObject('GPUStaticInstFlags.py')
SimObject('LdsState.py') SimObject('LdsState.py')
SimObject('X86GPUTLB.py') SimObject('X86GPUTLB.py')

View file

@ -1,116 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#ifndef __CODE_ENUMS_HH__
#define __CODE_ENUMS_HH__
#define IS_OT_GLOBAL(a) ((a)>=Enums::OT_GLOBAL_READ \
&& (a)<=Enums::OT_GLOBAL_LDAS)
#define IS_OT_SHARED(a) ((a)>=Enums::OT_SHARED_READ \
&& (a)<=Enums::OT_SHARED_LDAS)
#define IS_OT_PRIVATE(a) ((a)>=Enums::OT_PRIVATE_READ \
&& (a)<=Enums::OT_PRIVATE_LDAS)
#define IS_OT_SPILL(a) ((a)>=Enums::OT_SPILL_READ \
&& (a)<=Enums::OT_SPILL_LDAS)
#define IS_OT_READONLY(a) ((a)>=Enums::OT_READONLY_READ \
&& (a)<=Enums::OT_READONLY_LDAS)
#define IS_OT_FLAT(a) ((a)>=Enums::OT_FLAT_READ && (a)<=Enums::OT_FLAT_LDAS)
#define IS_OT_LDAS(a) ((a)==Enums::OT_GLOBAL_LDAS||(a)==Enums::OT_SHARED_LDAS \
||(a)==Enums::OT_PRIVATE_LDAS||(a)==Enums::OT_SPILL_LDAS \
||(a)==Enums::OT_READONLY_LDAS||(a)==Enums::OT_FLAT_LDAS)
#define IS_OT_READ(a) ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SHARED_READ \
||(a)==Enums::OT_PRIVATE_READ||(a)==Enums::OT_SPILL_READ \
||(a)==Enums::OT_READONLY_READ||(a)==Enums::OT_FLAT_READ)
#define IS_OT_READ_GM(a) \
((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SPILL_READ \
||(a)==Enums::OT_READONLY_READ)
#define IS_OT_READ_LM(a) ((a)==Enums::OT_SHARED_READ)
#define IS_OT_READ_RM(a) ((a)==Enums::OT_READONLY_READ)
#define IS_OT_READ_PM(a) ((a)==Enums::OT_PRIVATE_READ)
#define IS_OT_WRITE(a) \
((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SHARED_WRITE \
||(a)==Enums::OT_PRIVATE_WRITE||(a)==Enums::OT_SPILL_WRITE \
||(a)==Enums::OT_READONLY_WRITE||(a)==Enums::OT_FLAT_WRITE)
#define IS_OT_WRITE_GM(a) \
((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SPILL_WRITE \
||(a)==Enums::OT_READONLY_WRITE)
#define IS_OT_WRITE_LM(a) ((a)==Enums::OT_SHARED_WRITE)
#define IS_OT_WRITE_PM(a) ((a)==Enums::OT_PRIVATE_WRITE)
#define IS_OT_ATOMIC(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
||(a)==Enums::OT_SHARED_ATOMIC \
||(a)==Enums::OT_PRIVATE_ATOMIC \
||(a)==Enums::OT_SPILL_ATOMIC \
||(a)==Enums::OT_READONLY_ATOMIC \
||(a)==Enums::OT_BOTH_MEMFENCE \
||(a)==Enums::OT_FLAT_ATOMIC)
#define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
||(a)==Enums::OT_SPILL_ATOMIC \
||(a)==Enums::OT_READONLY_ATOMIC \
||(a)==Enums::OT_GLOBAL_MEMFENCE \
||(a)==Enums::OT_BOTH_MEMFENCE)
#define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
||(a)==Enums::OT_SHARED_MEMFENCE)
#define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)
#define IS_OT_HIST(a) ((a)==Enums::OT_GLOBAL_HIST \
||(a)==Enums::OT_SHARED_HIST \
||(a)==Enums::OT_PRIVATE_HIST \
||(a)==Enums::OT_SPILL_HIST \
||(a)==Enums::OT_READONLY_HIST \
||(a)==Enums::OT_FLAT_HIST)
#define IS_OT_HIST_GM(a) ((a)==Enums::OT_GLOBAL_HIST \
||(a)==Enums::OT_SPILL_HIST \
||(a)==Enums::OT_READONLY_HIST)
#define IS_OT_HIST_LM(a) ((a)==Enums::OT_SHARED_HIST)
#define IS_OT_HIST_PM(a) ((a)==Enums::OT_PRIVATE_HIST)
#endif // __CODE_ENUMS_HH__

View file

@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()), req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()), resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
_masterId(p->system->getMasterId(name() + ".ComputeUnit")), _masterId(p->system->getMasterId(name() + ".ComputeUnit")),
lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize) lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize),
kernelLaunchInst(new KernelLaunchStaticInst())
{ {
/** /**
* This check is necessary because std::bitset only provides conversion * This check is necessary because std::bitset only provides conversion
@ -316,13 +317,11 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
// Send L1 cache acquire // Send L1 cache acquire
// isKernel + isAcquire = Kernel Begin // isKernel + isAcquire = Kernel Begin
if (shader->impl_kern_boundary_sync) { if (shader->impl_kern_boundary_sync) {
GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(this, GPUDynInstPtr gpuDynInst =
nullptr, std::make_shared<GPUDynInst>(this, nullptr, kernelLaunchInst,
nullptr, 0); getAndIncSeqNum());
gpuDynInst->useContinuation = false; gpuDynInst->useContinuation = false;
gpuDynInst->memoryOrder = Enums::MEMORY_ORDER_SC_ACQUIRE;
gpuDynInst->scope = Enums::MEMORY_SCOPE_SYSTEM;
injectGlobalMemFence(gpuDynInst, true); injectGlobalMemFence(gpuDynInst, true);
} }
@ -647,7 +646,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
gpuDynInst->wfSlotId, w->barrierCnt); gpuDynInst->wfSlotId, w->barrierCnt);
if (gpuDynInst->useContinuation) { if (gpuDynInst->useContinuation) {
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst); gpuDynInst);
} }
@ -658,7 +657,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
return true; return true;
} else if (pkt->req->isKernel() && pkt->req->isAcquire()) { } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
if (gpuDynInst->useContinuation) { if (gpuDynInst->useContinuation) {
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst); gpuDynInst);
} }
@ -942,6 +941,8 @@ void
ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch, ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
Request* req) Request* req)
{ {
assert(gpuDynInst->isGlobalSeg());
if (!req) { if (!req) {
req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId); req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
} }
@ -950,8 +951,6 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
req->setFlags(Request::KERNEL); req->setFlags(Request::KERNEL);
} }
gpuDynInst->s_type = SEG_GLOBAL;
// for non-kernel MemFence operations, memorder flags are set depending // for non-kernel MemFence operations, memorder flags are set depending
// on which type of request is currently being sent, so this // on which type of request is currently being sent, so this
// should be set by the caller (e.g. if an inst has acq-rel // should be set by the caller (e.g. if an inst has acq-rel
@ -1033,8 +1032,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST) if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
gpuDynInst->statusVector.clear(); gpuDynInst->statusVector.clear();
if (gpuDynInst->m_op == Enums::MO_LD || MO_A(gpuDynInst->m_op) if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
|| MO_ANR(gpuDynInst->m_op)) {
assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy()); assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy());
compute_unit->globalMemoryPipe.getGMLdRespFIFO() compute_unit->globalMemoryPipe.getGMLdRespFIFO()
@ -1055,7 +1053,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
// the continuation may generate more work for // the continuation may generate more work for
// this memory request // this memory request
if (gpuDynInst->useContinuation) { if (gpuDynInst->useContinuation) {
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst); gpuDynInst);
} }
@ -1065,7 +1063,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
gpuDynInst->statusBitVector = VectorMask(0); gpuDynInst->statusBitVector = VectorMask(0);
if (gpuDynInst->useContinuation) { if (gpuDynInst->useContinuation) {
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst); gpuDynInst);
} }

View file

@ -744,6 +744,7 @@ class ComputeUnit : public MemObject
private: private:
uint64_t globalSeqNum; uint64_t globalSeqNum;
int wavefrontSize; int wavefrontSize;
GPUStaticInst *kernelLaunchInst;
}; };
#endif // __COMPUTE_UNIT_HH__ #endif // __COMPUTE_UNIT_HH__

View file

@ -67,7 +67,7 @@ GlobalMemPipeline::exec()
bool accessVrf = true; bool accessVrf = true;
// check the VRF to see if the operands of a load (or load component // check the VRF to see if the operands of a load (or load component
// of an atomic) are accessible // of an atomic) are accessible
if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) { if ((m) && (m->isLoad() || m->isAtomicRet())) {
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
accessVrf = accessVrf =
@ -127,10 +127,7 @@ GlobalMemPipeline::exec()
// memory packets to DTLB // memory packets to DTLB
if (!gmIssuedRequests.empty()) { if (!gmIssuedRequests.empty()) {
GPUDynInstPtr mp = gmIssuedRequests.front(); GPUDynInstPtr mp = gmIssuedRequests.front();
if (mp->m_op == Enums::MO_LD || if (mp->isLoad() || mp->isAtomic()) {
(mp->m_op >= Enums::MO_AAND && mp->m_op <= Enums::MO_AMIN) ||
(mp->m_op >= Enums::MO_ANRAND && mp->m_op <= Enums::MO_ANRMIN)) {
if (inflightLoads >= gmQueueSize) { if (inflightLoads >= gmQueueSize) {
return; return;
} else { } else {
@ -139,7 +136,7 @@ GlobalMemPipeline::exec()
} else { } else {
if (inflightStores >= gmQueueSize) { if (inflightStores >= gmQueueSize) {
return; return;
} else if (mp->m_op == Enums::MO_ST) { } else if (mp->isStore()) {
++inflightStores; ++inflightStores;
} }
} }
@ -147,9 +144,8 @@ GlobalMemPipeline::exec()
mp->initiateAcc(mp); mp->initiateAcc(mp);
gmIssuedRequests.pop(); gmIssuedRequests.pop();
DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = %s\n", DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
computeUnit->cu_id, mp->simdId, mp->wfSlotId, computeUnit->cu_id, mp->simdId, mp->wfSlotId);
Enums::MemOpTypeStrings[mp->m_op]);
} }
} }
@ -160,12 +156,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
// Return data to registers // Return data to registers
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) { if (m->isLoad() || m->isAtomic()) {
gmReturnedLoads.pop(); gmReturnedLoads.pop();
assert(inflightLoads > 0); assert(inflightLoads > 0);
--inflightLoads; --inflightLoads;
if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) { if (m->isLoad() || m->isAtomicRet()) {
std::vector<uint32_t> regVec; std::vector<uint32_t> regVec;
// iterate over number of destination register operands since // iterate over number of destination register operands since
// this is a load or atomic operation // this is a load or atomic operation
@ -214,13 +210,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
// Decrement outstanding register count // Decrement outstanding register count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) || if (m->isStore() || m->isAtomic()) {
MO_H(m->m_op)) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time, computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time,
-1); -1);
} }
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) { if (m->isLoad() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time, computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time,
-1); -1);
} }

View file

@ -41,11 +41,10 @@
#include "gpu-compute/wavefront.hh" #include "gpu-compute/wavefront.hh"
GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
GPUStaticInst *_staticInst, uint64_t instSeqNum) GPUStaticInst *static_inst, uint64_t instSeqNum)
: GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0), : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
m_op(Enums::MO_UNDEF), n_reg(0), useContinuation(false),
memoryOrder(Enums::MEMORY_ORDER_NONE), n_reg(0), useContinuation(false), statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
statusBitVector(0), staticInst(_staticInst), _seqNum(instSeqNum)
{ {
tlbHitLevel.assign(computeUnit()->wfSize(), -1); tlbHitLevel.assign(computeUnit()->wfSize(), -1);
d_data = new uint8_t[computeUnit()->wfSize() * 16]; d_data = new uint8_t[computeUnit()->wfSize() * 16];
@ -68,77 +67,69 @@ GPUDynInst::~GPUDynInst()
} }
void void
GPUDynInst::execute() GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
{ {
GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(cu, wf, staticInst, _staticInst->execute(gpuDynInst);
_seqNum);
staticInst->execute(gpuDynInst);
} }
int int
GPUDynInst::numSrcRegOperands() GPUDynInst::numSrcRegOperands()
{ {
return staticInst->numSrcRegOperands(); return _staticInst->numSrcRegOperands();
} }
int int
GPUDynInst::numDstRegOperands() GPUDynInst::numDstRegOperands()
{ {
return staticInst->numDstRegOperands(); return _staticInst->numDstRegOperands();
} }
int int
GPUDynInst::getNumOperands() GPUDynInst::getNumOperands()
{ {
return staticInst->getNumOperands(); return _staticInst->getNumOperands();
} }
bool bool
GPUDynInst::isVectorRegister(int operandIdx) GPUDynInst::isVectorRegister(int operandIdx)
{ {
return staticInst->isVectorRegister(operandIdx); return _staticInst->isVectorRegister(operandIdx);
} }
bool bool
GPUDynInst::isScalarRegister(int operandIdx) GPUDynInst::isScalarRegister(int operandIdx)
{ {
return staticInst->isScalarRegister(operandIdx); return _staticInst->isScalarRegister(operandIdx);
} }
int int
GPUDynInst::getRegisterIndex(int operandIdx) GPUDynInst::getRegisterIndex(int operandIdx)
{ {
return staticInst->getRegisterIndex(operandIdx); return _staticInst->getRegisterIndex(operandIdx);
} }
int int
GPUDynInst::getOperandSize(int operandIdx) GPUDynInst::getOperandSize(int operandIdx)
{ {
return staticInst->getOperandSize(operandIdx); return _staticInst->getOperandSize(operandIdx);
} }
bool bool
GPUDynInst::isDstOperand(int operandIdx) GPUDynInst::isDstOperand(int operandIdx)
{ {
return staticInst->isDstOperand(operandIdx); return _staticInst->isDstOperand(operandIdx);
} }
bool bool
GPUDynInst::isSrcOperand(int operandIdx) GPUDynInst::isSrcOperand(int operandIdx)
{ {
return staticInst->isSrcOperand(operandIdx); return _staticInst->isSrcOperand(operandIdx);
}
bool
GPUDynInst::isArgLoad()
{
return staticInst->isArgLoad();
} }
const std::string& const std::string&
GPUDynInst::disassemble() const GPUDynInst::disassemble() const
{ {
return staticInst->disassemble(); return _staticInst->disassemble();
} }
uint64_t uint64_t
@ -147,16 +138,10 @@ GPUDynInst::seqNum() const
return _seqNum; return _seqNum;
} }
Enums::OpType
GPUDynInst::opType()
{
return staticInst->o_type;
}
Enums::StorageClassType Enums::StorageClassType
GPUDynInst::executedAs() GPUDynInst::executedAs()
{ {
return staticInst->executed_as; return _staticInst->executed_as;
} }
// Process a memory instruction and (if necessary) submit timing request // Process a memory instruction and (if necessary) submit timing request
@ -166,20 +151,347 @@ GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n", DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
cu->cu_id, simdId, wfSlotId, exec_mask); cu->cu_id, simdId, wfSlotId, exec_mask);
staticInst->initiateAcc(gpuDynInst); _staticInst->initiateAcc(gpuDynInst);
time = 0; time = 0;
} }
/**
* accessor methods for the attributes of
* the underlying GPU static instruction
*/
bool bool
GPUDynInst::scalarOp() const GPUDynInst::isALU() const
{ {
return staticInst->scalarOp(); return _staticInst->isALU();
}
bool
GPUDynInst::isBranch() const
{
return _staticInst->isBranch();
}
bool
GPUDynInst::isNop() const
{
return _staticInst->isNop();
}
bool
GPUDynInst::isReturn() const
{
return _staticInst->isReturn();
}
bool
GPUDynInst::isUnconditionalJump() const
{
return _staticInst->isUnconditionalJump();
}
bool
GPUDynInst::isSpecialOp() const
{
return _staticInst->isSpecialOp();
}
bool
GPUDynInst::isWaitcnt() const
{
return _staticInst->isWaitcnt();
}
bool
GPUDynInst::isBarrier() const
{
return _staticInst->isBarrier();
}
bool
GPUDynInst::isMemFence() const
{
return _staticInst->isMemFence();
}
bool
GPUDynInst::isMemRef() const
{
return _staticInst->isMemRef();
}
bool
GPUDynInst::isFlat() const
{
return _staticInst->isFlat();
}
bool
GPUDynInst::isLoad() const
{
return _staticInst->isLoad();
}
bool
GPUDynInst::isStore() const
{
return _staticInst->isStore();
}
bool
GPUDynInst::isAtomic() const
{
return _staticInst->isAtomic();
}
bool
GPUDynInst::isAtomicNoRet() const
{
return _staticInst->isAtomicNoRet();
}
bool
GPUDynInst::isAtomicRet() const
{
return _staticInst->isAtomicRet();
}
bool
GPUDynInst::isScalar() const
{
return _staticInst->isScalar();
}
bool
GPUDynInst::readsSCC() const
{
return _staticInst->readsSCC();
}
bool
GPUDynInst::writesSCC() const
{
return _staticInst->writesSCC();
}
bool
GPUDynInst::readsVCC() const
{
return _staticInst->readsVCC();
}
bool
GPUDynInst::writesVCC() const
{
return _staticInst->writesVCC();
}
bool
GPUDynInst::isAtomicAnd() const
{
return _staticInst->isAtomicAnd();
}
bool
GPUDynInst::isAtomicOr() const
{
return _staticInst->isAtomicOr();
}
bool
GPUDynInst::isAtomicXor() const
{
return _staticInst->isAtomicXor();
}
bool
GPUDynInst::isAtomicCAS() const
{
return _staticInst->isAtomicCAS();
}
bool GPUDynInst::isAtomicExch() const
{
return _staticInst->isAtomicExch();
}
bool
GPUDynInst::isAtomicAdd() const
{
return _staticInst->isAtomicAdd();
}
bool
GPUDynInst::isAtomicSub() const
{
return _staticInst->isAtomicSub();
}
bool
GPUDynInst::isAtomicInc() const
{
return _staticInst->isAtomicInc();
}
bool
GPUDynInst::isAtomicDec() const
{
return _staticInst->isAtomicDec();
}
bool
GPUDynInst::isAtomicMax() const
{
return _staticInst->isAtomicMax();
}
bool
GPUDynInst::isAtomicMin() const
{
return _staticInst->isAtomicMin();
}
bool
GPUDynInst::isArgLoad() const
{
return _staticInst->isArgLoad();
}
bool
GPUDynInst::isGlobalMem() const
{
return _staticInst->isGlobalMem();
}
bool
GPUDynInst::isLocalMem() const
{
return _staticInst->isLocalMem();
}
bool
GPUDynInst::isArgSeg() const
{
return _staticInst->isArgSeg();
}
bool
GPUDynInst::isGlobalSeg() const
{
return _staticInst->isGlobalSeg();
}
bool
GPUDynInst::isGroupSeg() const
{
return _staticInst->isGroupSeg();
}
bool
GPUDynInst::isKernArgSeg() const
{
return _staticInst->isKernArgSeg();
}
bool
GPUDynInst::isPrivateSeg() const
{
return _staticInst->isPrivateSeg();
}
bool
GPUDynInst::isReadOnlySeg() const
{
return _staticInst->isReadOnlySeg();
}
bool
GPUDynInst::isSpillSeg() const
{
return _staticInst->isSpillSeg();
}
bool
GPUDynInst::isWorkitemScope() const
{
return _staticInst->isWorkitemScope();
}
bool
GPUDynInst::isWavefrontScope() const
{
return _staticInst->isWavefrontScope();
}
bool
GPUDynInst::isWorkgroupScope() const
{
return _staticInst->isWorkgroupScope();
}
bool
GPUDynInst::isDeviceScope() const
{
return _staticInst->isDeviceScope();
}
bool
GPUDynInst::isSystemScope() const
{
return _staticInst->isSystemScope();
}
bool
GPUDynInst::isNoScope() const
{
return _staticInst->isNoScope();
}
bool
GPUDynInst::isRelaxedOrder() const
{
return _staticInst->isRelaxedOrder();
}
bool
GPUDynInst::isAcquire() const
{
return _staticInst->isAcquire();
}
bool
GPUDynInst::isRelease() const
{
return _staticInst->isRelease();
}
bool
GPUDynInst::isAcquireRelease() const
{
return _staticInst->isAcquireRelease();
}
bool
GPUDynInst::isNoOrder() const
{
return _staticInst->isNoOrder();
}
bool
GPUDynInst::isGloballyCoherent() const
{
return _staticInst->isGloballyCoherent();
}
bool
GPUDynInst::isSystemCoherent() const
{
return _staticInst->isSystemCoherent();
} }
void void
GPUDynInst::updateStats() GPUDynInst::updateStats()
{ {
if (staticInst->isLocalMem()) { if (_staticInst->isLocalMem()) {
// access to LDS (shared) memory // access to LDS (shared) memory
cu->dynamicLMemInstrCnt++; cu->dynamicLMemInstrCnt++;
} else { } else {

View file

@ -39,11 +39,7 @@
#include <cstdint> #include <cstdint>
#include <string> #include <string>
#include "enums/GenericMemoryOrder.hh"
#include "enums/GenericMemoryScope.hh"
#include "enums/MemOpType.hh"
#include "enums/MemType.hh" #include "enums/MemType.hh"
#include "enums/OpType.hh"
#include "enums/StorageClassType.hh" #include "enums/StorageClassType.hh"
#include "gpu-compute/compute_unit.hh" #include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_exec_context.hh" #include "gpu-compute/gpu_exec_context.hh"
@ -180,33 +176,19 @@ class AtomicOpMin : public TypedAtomicOpFunctor<T>
} }
}; };
#define MO_A(a) ((a)>=Enums::MO_AAND && (a)<=Enums::MO_AMIN)
#define MO_ANR(a) ((a)>=Enums::MO_ANRAND && (a)<=Enums::MO_ANRMIN)
#define MO_H(a) ((a)>=Enums::MO_HAND && (a)<=Enums::MO_HMIN)
typedef enum typedef enum
{ {
VT_32, VT_32,
VT_64, VT_64,
} vgpr_type; } vgpr_type;
typedef enum
{
SEG_PRIVATE,
SEG_SPILL,
SEG_GLOBAL,
SEG_SHARED,
SEG_READONLY,
SEG_FLAT
} seg_type;
class GPUDynInst : public GPUExecContext class GPUDynInst : public GPUExecContext
{ {
public: public:
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *_staticInst, GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
uint64_t instSeqNum); uint64_t instSeqNum);
~GPUDynInst(); ~GPUDynInst();
void execute(); void execute(GPUDynInstPtr gpuDynInst);
int numSrcRegOperands(); int numSrcRegOperands();
int numDstRegOperands(); int numDstRegOperands();
int getNumOperands(); int getNumOperands();
@ -216,13 +198,11 @@ class GPUDynInst : public GPUExecContext
int getOperandSize(int operandIdx); int getOperandSize(int operandIdx);
bool isDstOperand(int operandIdx); bool isDstOperand(int operandIdx);
bool isSrcOperand(int operandIdx); bool isSrcOperand(int operandIdx);
bool isArgLoad();
const std::string &disassemble() const; const std::string &disassemble() const;
uint64_t seqNum() const; uint64_t seqNum() const;
Enums::OpType opType();
Enums::StorageClassType executedAs(); Enums::StorageClassType executedAs();
// The address of the memory operation // The address of the memory operation
@ -240,14 +220,7 @@ class GPUDynInst : public GPUExecContext
// The memory type (M_U32, M_S32, ...) // The memory type (M_U32, M_S32, ...)
Enums::MemType m_type; Enums::MemType m_type;
// The memory operation (MO_LD, MO_ST, ...)
Enums::MemOpType m_op;
Enums::GenericMemoryOrder memoryOrder;
// Scope of the request
Enums::GenericMemoryScope scope;
// The memory segment (SEG_SHARED, SEG_GLOBAL, ...)
seg_type s_type;
// The equivalency class // The equivalency class
int equiv; int equiv;
// The return VGPR type (VT_32 or VT_64) // The return VGPR type (VT_32 or VT_64)
@ -288,10 +261,72 @@ class GPUDynInst : public GPUExecContext
void updateStats(); void updateStats();
GPUStaticInst* staticInstruction() { return staticInst; } GPUStaticInst* staticInstruction() { return _staticInst; }
// Is the instruction a scalar or vector op? bool isALU() const;
bool scalarOp() const; bool isBranch() const;
bool isNop() const;
bool isReturn() const;
bool isUnconditionalJump() const;
bool isSpecialOp() const;
bool isWaitcnt() const;
bool isBarrier() const;
bool isMemFence() const;
bool isMemRef() const;
bool isFlat() const;
bool isLoad() const;
bool isStore() const;
bool isAtomic() const;
bool isAtomicNoRet() const;
bool isAtomicRet() const;
bool isScalar() const;
bool readsSCC() const;
bool writesSCC() const;
bool readsVCC() const;
bool writesVCC() const;
bool isAtomicAnd() const;
bool isAtomicOr() const;
bool isAtomicXor() const;
bool isAtomicCAS() const;
bool isAtomicExch() const;
bool isAtomicAdd() const;
bool isAtomicSub() const;
bool isAtomicInc() const;
bool isAtomicDec() const;
bool isAtomicMax() const;
bool isAtomicMin() const;
bool isArgLoad() const;
bool isGlobalMem() const;
bool isLocalMem() const;
bool isArgSeg() const;
bool isGlobalSeg() const;
bool isGroupSeg() const;
bool isKernArgSeg() const;
bool isPrivateSeg() const;
bool isReadOnlySeg() const;
bool isSpillSeg() const;
bool isWorkitemScope() const;
bool isWavefrontScope() const;
bool isWorkgroupScope() const;
bool isDeviceScope() const;
bool isSystemScope() const;
bool isNoScope() const;
bool isRelaxedOrder() const;
bool isAcquire() const;
bool isRelease() const;
bool isAcquireRelease() const;
bool isNoOrder() const;
bool isGloballyCoherent() const;
bool isSystemCoherent() const;
/* /*
* Loads/stores/atomics may have acquire/release semantics associated * Loads/stores/atomics may have acquire/release semantics associated
@ -312,46 +347,32 @@ class GPUDynInst : public GPUExecContext
bool useContinuation; bool useContinuation;
template<typename c0> AtomicOpFunctor* template<typename c0> AtomicOpFunctor*
makeAtomicOpFunctor(c0 *reg0, c0 *reg1, Enums::MemOpType op) makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
{ {
using namespace Enums; if (isAtomicAnd()) {
switch(op) {
case MO_AAND:
case MO_ANRAND:
return new AtomicOpAnd<c0>(*reg0); return new AtomicOpAnd<c0>(*reg0);
case MO_AOR: } else if (isAtomicOr()) {
case MO_ANROR:
return new AtomicOpOr<c0>(*reg0); return new AtomicOpOr<c0>(*reg0);
case MO_AXOR: } else if (isAtomicXor()) {
case MO_ANRXOR:
return new AtomicOpXor<c0>(*reg0); return new AtomicOpXor<c0>(*reg0);
case MO_ACAS: } else if (isAtomicCAS()) {
case MO_ANRCAS:
return new AtomicOpCAS<c0>(*reg0, *reg1, cu); return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
case MO_AEXCH: } else if (isAtomicExch()) {
case MO_ANREXCH:
return new AtomicOpExch<c0>(*reg0); return new AtomicOpExch<c0>(*reg0);
case MO_AADD: } else if (isAtomicAdd()) {
case MO_ANRADD:
return new AtomicOpAdd<c0>(*reg0); return new AtomicOpAdd<c0>(*reg0);
case MO_ASUB: } else if (isAtomicSub()) {
case MO_ANRSUB:
return new AtomicOpSub<c0>(*reg0); return new AtomicOpSub<c0>(*reg0);
case MO_AINC: } else if (isAtomicInc()) {
case MO_ANRINC:
return new AtomicOpInc<c0>(); return new AtomicOpInc<c0>();
case MO_ADEC: } else if (isAtomicDec()) {
case MO_ANRDEC:
return new AtomicOpDec<c0>(); return new AtomicOpDec<c0>();
case MO_AMAX: } else if (isAtomicMax()) {
case MO_ANRMAX:
return new AtomicOpMax<c0>(*reg0); return new AtomicOpMax<c0>(*reg0);
case MO_AMIN: } else if (isAtomicMin()) {
case MO_ANRMIN:
return new AtomicOpMin<c0>(*reg0); return new AtomicOpMin<c0>(*reg0);
default: } else {
panic("Unrecognized atomic operation"); fatal("Unrecognized atomic operation");
} }
} }
@ -359,88 +380,58 @@ class GPUDynInst : public GPUExecContext
setRequestFlags(Request *req, bool setMemOrder=true) setRequestFlags(Request *req, bool setMemOrder=true)
{ {
// currently these are the easy scopes to deduce // currently these are the easy scopes to deduce
switch (s_type) { if (isPrivateSeg()) {
case SEG_PRIVATE:
req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT); req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
break; } else if (isSpillSeg()) {
case SEG_SPILL:
req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT); req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
break; } else if (isGlobalSeg()) {
case SEG_GLOBAL:
req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT); req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
break; } else if (isReadOnlySeg()) {
case SEG_READONLY:
req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT); req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
break; } else if (isGroupSeg()) {
case SEG_SHARED:
req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT); req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
break; } else if (isFlat()) {
case SEG_FLAT:
// TODO: translate to correct scope // TODO: translate to correct scope
assert(false); assert(false);
default: } else {
panic("Bad segment type"); fatal("%s has bad segment type\n", disassemble());
break;
} }
switch (scope) { if (isWavefrontScope()) {
case Enums::MEMORY_SCOPE_NONE:
case Enums::MEMORY_SCOPE_WORKITEM:
break;
case Enums::MEMORY_SCOPE_WAVEFRONT:
req->setMemSpaceConfigFlags(Request::SCOPE_VALID | req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::WAVEFRONT_SCOPE); Request::WAVEFRONT_SCOPE);
break; } else if (isWorkgroupScope()) {
case Enums::MEMORY_SCOPE_WORKGROUP:
req->setMemSpaceConfigFlags(Request::SCOPE_VALID | req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::WORKGROUP_SCOPE); Request::WORKGROUP_SCOPE);
break; } else if (isDeviceScope()) {
case Enums::MEMORY_SCOPE_DEVICE:
req->setMemSpaceConfigFlags(Request::SCOPE_VALID | req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::DEVICE_SCOPE); Request::DEVICE_SCOPE);
break; } else if (isSystemScope()) {
case Enums::MEMORY_SCOPE_SYSTEM:
req->setMemSpaceConfigFlags(Request::SCOPE_VALID | req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::SYSTEM_SCOPE); Request::SYSTEM_SCOPE);
break; } else if (!isNoScope() && !isWorkitemScope()) {
default: fatal("%s has bad scope type\n", disassemble());
panic("Bad scope type");
break;
} }
if (setMemOrder) { if (setMemOrder) {
// set acquire and release flags // set acquire and release flags
switch (memoryOrder){ if (isAcquire()) {
case Enums::MEMORY_ORDER_SC_ACQUIRE:
req->setFlags(Request::ACQUIRE); req->setFlags(Request::ACQUIRE);
break; } else if (isRelease()) {
case Enums::MEMORY_ORDER_SC_RELEASE:
req->setFlags(Request::RELEASE); req->setFlags(Request::RELEASE);
break; } else if (isAcquireRelease()) {
case Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE:
req->setFlags(Request::ACQUIRE | Request::RELEASE); req->setFlags(Request::ACQUIRE | Request::RELEASE);
break; } else if (!isNoOrder()) {
default: fatal("%s has bad memory order\n", disassemble());
break;
} }
} }
// set atomic type // set atomic type
// currently, the instruction genenerator only produces atomic return // currently, the instruction genenerator only produces atomic return
// but a magic instruction can produce atomic no return // but a magic instruction can produce atomic no return
if (m_op == Enums::MO_AADD || m_op == Enums::MO_ASUB || if (isAtomicRet()) {
m_op == Enums::MO_AAND || m_op == Enums::MO_AOR ||
m_op == Enums::MO_AXOR || m_op == Enums::MO_AMAX ||
m_op == Enums::MO_AMIN || m_op == Enums::MO_AINC ||
m_op == Enums::MO_ADEC || m_op == Enums::MO_AEXCH ||
m_op == Enums::MO_ACAS) {
req->setFlags(Request::ATOMIC_RETURN_OP); req->setFlags(Request::ATOMIC_RETURN_OP);
} else if (m_op == Enums::MO_ANRADD || m_op == Enums::MO_ANRSUB || } else if (isAtomicNoRet()) {
m_op == Enums::MO_ANRAND || m_op == Enums::MO_ANROR ||
m_op == Enums::MO_ANRXOR || m_op == Enums::MO_ANRMAX ||
m_op == Enums::MO_ANRMIN || m_op == Enums::MO_ANRINC ||
m_op == Enums::MO_ANRDEC || m_op == Enums::MO_ANREXCH ||
m_op == Enums::MO_ANRCAS) {
req->setFlags(Request::ATOMIC_NO_RETURN_OP); req->setFlags(Request::ATOMIC_NO_RETURN_OP);
} }
} }
@ -457,7 +448,7 @@ class GPUDynInst : public GPUExecContext
std::vector<int> tlbHitLevel; std::vector<int> tlbHitLevel;
private: private:
GPUStaticInst *staticInst; GPUStaticInst *_staticInst;
uint64_t _seqNum; uint64_t _seqNum;
}; };

View file

@ -36,10 +36,12 @@
#include "gpu-compute/gpu_static_inst.hh" #include "gpu-compute/gpu_static_inst.hh"
GPUStaticInst::GPUStaticInst(const std::string &opcode) GPUStaticInst::GPUStaticInst(const std::string &opcode)
: o_type(Enums::OT_ALU), executed_as(Enums::SC_NONE), opcode(opcode), : executed_as(Enums::SC_NONE), opcode(opcode),
_instNum(0), _scalarOp(false) _instNum(0)
{ {
setFlag(NoOrder);
} }
const std::string& const std::string&
GPUStaticInst::disassemble() GPUStaticInst::disassemble()
{ {

View file

@ -48,7 +48,7 @@
#include <cstdint> #include <cstdint>
#include <string> #include <string>
#include "enums/OpType.hh" #include "enums/GPUStaticInstFlags.hh"
#include "enums/StorageClassType.hh" #include "enums/StorageClassType.hh"
#include "gpu-compute/gpu_dyn_inst.hh" #include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/misc.hh" #include "gpu-compute/misc.hh"
@ -57,7 +57,7 @@ class BaseOperand;
class BaseRegOperand; class BaseRegOperand;
class Wavefront; class Wavefront;
class GPUStaticInst class GPUStaticInst : public GPUStaticInstFlags
{ {
public: public:
GPUStaticInst(const std::string &opcode); GPUStaticInst(const std::string &opcode);
@ -86,22 +86,110 @@ class GPUStaticInst
virtual bool isValid() const = 0; virtual bool isValid() const = 0;
/* bool isALU() const { return _flags[ALU]; }
* Most instructions (including all HSAIL instructions) bool isBranch() const { return _flags[Branch]; }
* are vector ops, so _scalarOp will be false by default. bool isNop() const { return _flags[Nop]; }
* Derived instruction objects that are scalar ops must bool isReturn() const { return _flags[Return]; }
* set _scalarOp to true in their constructors.
*/
bool scalarOp() const { return _scalarOp; }
virtual bool isLocalMem() const bool
isUnconditionalJump() const
{ {
fatal("calling isLocalMem() on non-memory instruction.\n"); return _flags[UnconditionalJump];
return false;
} }
bool isArgLoad() { return false; } bool isSpecialOp() const { return _flags[SpecialOp]; }
bool isWaitcnt() const { return _flags[Waitcnt]; }
bool isBarrier() const { return _flags[MemBarrier]; }
bool isMemFence() const { return _flags[MemFence]; }
bool isMemRef() const { return _flags[MemoryRef]; }
bool isFlat() const { return _flags[Flat]; }
bool isLoad() const { return _flags[Load]; }
bool isStore() const { return _flags[Store]; }
bool
isAtomic() const
{
return _flags[AtomicReturn] || _flags[AtomicNoReturn];
}
bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
bool isAtomicRet() const { return _flags[AtomicReturn]; }
bool isScalar() const { return _flags[Scalar]; }
bool readsSCC() const { return _flags[ReadsSCC]; }
bool writesSCC() const { return _flags[WritesSCC]; }
bool readsVCC() const { return _flags[ReadsVCC]; }
bool writesVCC() const { return _flags[WritesVCC]; }
bool isAtomicAnd() const { return _flags[AtomicAnd]; }
bool isAtomicOr() const { return _flags[AtomicOr]; }
bool isAtomicXor() const { return _flags[AtomicXor]; }
bool isAtomicCAS() const { return _flags[AtomicCAS]; }
bool isAtomicExch() const { return _flags[AtomicExch]; }
bool isAtomicAdd() const { return _flags[AtomicAdd]; }
bool isAtomicSub() const { return _flags[AtomicSub]; }
bool isAtomicInc() const { return _flags[AtomicInc]; }
bool isAtomicDec() const { return _flags[AtomicDec]; }
bool isAtomicMax() const { return _flags[AtomicMax]; }
bool isAtomicMin() const { return _flags[AtomicMin]; }
bool
isArgLoad() const
{
return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
}
bool
isGlobalMem() const
{
return _flags[MemoryRef] && (_flags[GlobalSegment] ||
_flags[PrivateSegment] || _flags[ReadOnlySegment] ||
_flags[SpillSegment]);
}
bool
isLocalMem() const
{
return _flags[MemoryRef] && _flags[GroupSegment];
}
bool isArgSeg() const { return _flags[ArgSegment]; }
bool isGlobalSeg() const { return _flags[GlobalSegment]; }
bool isGroupSeg() const { return _flags[GroupSegment]; }
bool isKernArgSeg() const { return _flags[KernArgSegment]; }
bool isPrivateSeg() const { return _flags[PrivateSegment]; }
bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
bool isSpillSeg() const { return _flags[SpillSegment]; }
bool isWorkitemScope() const { return _flags[WorkitemScope]; }
bool isWavefrontScope() const { return _flags[WavefrontScope]; }
bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
bool isDeviceScope() const { return _flags[DeviceScope]; }
bool isSystemScope() const { return _flags[SystemScope]; }
bool isNoScope() const { return _flags[NoScope]; }
bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
bool isAcquire() const { return _flags[Acquire]; }
bool isRelease() const { return _flags[Release]; }
bool isAcquireRelease() const { return _flags[AcquireRelease]; }
bool isNoOrder() const { return _flags[NoOrder]; }
/**
* Coherence domain of a memory instruction. Only valid for
* machine ISA. The coherence domain specifies where it is
* possible to perform memory synchronization, e.g., acquire
* or release, from the shader kernel.
*
* isGloballyCoherent(): returns true if kernel is sharing memory
* with other work-items on the same device (GPU)
*
* isSystemCoherent(): returns true if kernel is sharing memory
* with other work-items on a different device (GPU) or the host (CPU)
*/
bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
bool isSystemCoherent() const { return _flags[SystemCoherent]; }
virtual uint32_t instSize() = 0; virtual uint32_t instSize() = 0;
// only used for memory instructions // only used for memory instructions
@ -120,22 +208,13 @@ class GPUStaticInst
virtual uint32_t getTargetPc() { return 0; } virtual uint32_t getTargetPc() { return 0; }
/**
* Query whether the instruction is an unconditional jump i.e., the jump
* is always executed because there is no condition to be evaluated.
*
* If the instruction is not of branch type, the result is always false.
*
* @return True if the instruction is an unconditional jump.
*/
virtual bool unconditionalJumpInstruction() { return false; }
static uint64_t dynamic_id_count; static uint64_t dynamic_id_count;
Enums::OpType o_type;
// For flat memory accesses // For flat memory accesses
Enums::StorageClassType executed_as; Enums::StorageClassType executed_as;
void setFlag(Flags flag) { _flags[flag] = true; }
protected: protected:
virtual void virtual void
execLdAcq(GPUDynInstPtr gpuDynInst) execLdAcq(GPUDynInstPtr gpuDynInst)
@ -169,7 +248,45 @@ class GPUStaticInst
*/ */
int _ipdInstNum; int _ipdInstNum;
bool _scalarOp; std::bitset<Num_Flags> _flags;
};
class KernelLaunchStaticInst : public GPUStaticInst
{
public:
KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
{
setFlag(Nop);
setFlag(Scalar);
setFlag(Acquire);
setFlag(SystemScope);
setFlag(GlobalSegment);
}
void
execute(GPUDynInstPtr gpuDynInst)
{
fatal("kernel launch instruction should not be executed\n");
}
void
generateDisassembly()
{
disassembly = opcode;
}
int getNumOperands() { return 0; }
bool isCondRegister(int operandIndex) { return false; }
bool isScalarRegister(int operandIndex) { return false; }
bool isVectorRegister(int operandIndex) { return false; }
bool isSrcOperand(int operandIndex) { return false; }
bool isDstOperand(int operandIndex) { return false; }
int getOperandSize(int operandIndex) { return 0; }
int getRegisterIndex(int operandIndex) { return 0; }
int numDstRegOperands() { return 0; }
int numSrcRegOperands() { return 0; }
bool isValid() const { return true; }
uint32_t instSize() { return 0; }
}; };
#endif // __GPU_STATIC_INST_HH__ #endif // __GPU_STATIC_INST_HH__

View file

@ -104,7 +104,7 @@ ControlFlowInfo::createBasicBlocks()
leaders.insert(0); leaders.insert(0);
for (int i = 1; i < instructions.size(); i++) { for (int i = 1; i < instructions.size(); i++) {
GPUStaticInst* instruction = instructions[i]; GPUStaticInst* instruction = instructions[i];
if (instruction->o_type == Enums::OT_BRANCH) { if (instruction->isBranch()) {
const int target_pc = instruction->getTargetPc(); const int target_pc = instruction->getTargetPc();
leaders.insert(target_pc); leaders.insert(target_pc);
leaders.insert(i + 1); leaders.insert(i + 1);
@ -137,18 +137,18 @@ ControlFlowInfo::connectBasicBlocks()
break; break;
} }
GPUStaticInst* last = lastInstruction(bb.get()); GPUStaticInst* last = lastInstruction(bb.get());
if (last->o_type == Enums::OT_RET) { if (last->isReturn()) {
bb->successorIds.insert(exit_bb->id); bb->successorIds.insert(exit_bb->id);
continue; continue;
} }
if (last->o_type == Enums::OT_BRANCH) { if (last->isBranch()) {
const uint32_t target_pc = last->getTargetPc(); const uint32_t target_pc = last->getTargetPc();
BasicBlock* target_bb = basicBlock(target_pc); BasicBlock* target_bb = basicBlock(target_pc);
bb->successorIds.insert(target_bb->id); bb->successorIds.insert(target_bb->id);
} }
// Unconditional jump instructions have a unique successor // Unconditional jump instructions have a unique successor
if (!last->unconditionalJumpInstruction()) { if (!last->isUnconditionalJump()) {
BasicBlock* next_bb = basicBlock(last->instNum() + 1); BasicBlock* next_bb = basicBlock(last->instNum() + 1);
bb->successorIds.insert(next_bb->id); bb->successorIds.insert(next_bb->id);
} }
@ -274,7 +274,7 @@ ControlFlowInfo::printBasicBlocks() const
int inst_num = inst->instNum(); int inst_num = inst->instNum();
std::cout << inst_num << " [" << basicBlock(inst_num)->id std::cout << inst_num << " [" << basicBlock(inst_num)->id
<< "]: " << inst->disassemble(); << "]: " << inst->disassemble();
if (inst->o_type == Enums::OT_BRANCH) { if (inst->isBranch()) {
std::cout << ", PC = " << inst->getTargetPc(); std::cout << ", PC = " << inst->getTargetPc();
} }
std::cout << std::endl; std::cout << std::endl;

View file

@ -141,8 +141,7 @@ LdsState::countBankConflicts(GPUDynInstPtr gpuDynInst,
} }
} }
if (gpuDynInst->m_op == Enums::MO_LD || if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
gpuDynInst->m_op == Enums::MO_ST) {
// mask identical addresses // mask identical addresses
for (int j = 0; j < numBanks; ++j) { for (int j = 0; j < numBanks; ++j) {
for (int j0 = 0; j0 < j; j0++) { for (int j0 = 0; j0 < j; j0++) {
@ -208,8 +207,8 @@ LdsState::processPacket(PacketPtr packet)
GPUDynInstPtr dynInst = getDynInstr(packet); GPUDynInstPtr dynInst = getDynInstr(packet);
// account for the LDS bank conflict overhead // account for the LDS bank conflict overhead
int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() : int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
(dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() : (dynInst->isStore()) ? parent->storeBusLength() :
parent->loadBusLength(); parent->loadBusLength();
// delay for accessing the LDS // delay for accessing the LDS
Tick processingTime = Tick processingTime =

View file

@ -43,7 +43,6 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "enums/MemOpType.hh"
#include "enums/MemType.hh" #include "enums/MemType.hh"
#include "gpu-compute/misc.hh" #include "gpu-compute/misc.hh"
#include "mem/mem_object.hh" #include "mem/mem_object.hh"

View file

@ -62,7 +62,7 @@ LocalMemPipeline::exec()
lmReturnedRequests.front() : nullptr; lmReturnedRequests.front() : nullptr;
bool accessVrf = true; bool accessVrf = true;
if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) { if ((m) && (m->isLoad() || m->isAtomicRet())) {
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
accessVrf = accessVrf =
@ -137,7 +137,7 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
// Return data to registers // Return data to registers
if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) { if (m->isLoad() || m->isAtomicRet()) {
std::vector<uint32_t> regVec; std::vector<uint32_t> regVec;
for (int k = 0; k < m->n_reg; ++k) { for (int k = 0; k < m->n_reg; ++k) {
int dst = m->dst_reg+k; int dst = m->dst_reg+k;
@ -172,13 +172,12 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
// Decrement outstanding request count // Decrement outstanding request count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) if (m->isStore() || m->isAtomic()) {
|| MO_H(m->m_op)) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm, computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
m->time, -1); m->time, -1);
} }
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) { if (m->isLoad() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm, computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
m->time, -1); m->time, -1);
} }

View file

@ -47,7 +47,6 @@
#include "cpu/simple_thread.hh" #include "cpu/simple_thread.hh"
#include "cpu/thread_context.hh" #include "cpu/thread_context.hh"
#include "cpu/thread_state.hh" #include "cpu/thread_state.hh"
#include "enums/MemOpType.hh"
#include "enums/MemType.hh" #include "enums/MemType.hh"
#include "gpu-compute/compute_unit.hh" #include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_tlb.hh" #include "gpu-compute/gpu_tlb.hh"

View file

@ -38,7 +38,6 @@
#include <string> #include <string>
#include "base/misc.hh" #include "base/misc.hh"
#include "gpu-compute/code_enums.hh"
#include "gpu-compute/compute_unit.hh" #include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh" #include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh" #include "gpu-compute/shader.hh"
@ -153,8 +152,8 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
void void
VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w) VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w)
{ {
bool loadInstr = IS_OT_READ(ii->opType()); bool loadInstr = ii->isLoad();
bool atomicInstr = IS_OT_ATOMIC(ii->opType()); bool atomicInstr = ii->isAtomic() || ii->isMemFence();
bool loadNoArgInstr = loadInstr && !ii->isArgLoad(); bool loadNoArgInstr = loadInstr && !ii->isArgLoad();

View file

@ -37,7 +37,6 @@
#include "debug/GPUExec.hh" #include "debug/GPUExec.hh"
#include "debug/WavefrontStack.hh" #include "debug/WavefrontStack.hh"
#include "gpu-compute/code_enums.hh"
#include "gpu-compute/compute_unit.hh" #include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh" #include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh" #include "gpu-compute/shader.hh"
@ -165,19 +164,8 @@ Wavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr)
bool bool
Wavefront::isGmInstruction(GPUDynInstPtr ii) Wavefront::isGmInstruction(GPUDynInstPtr ii)
{ {
if (IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) || if (ii->isGlobalMem() || ii->isFlat())
IS_OT_ATOMIC_PM(ii->opType())) {
return true; return true;
}
if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
IS_OT_ATOMIC_GM(ii->opType())) {
return true;
}
if (IS_OT_FLAT(ii->opType())) {
return true;
}
return false; return false;
} }
@ -185,8 +173,7 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
bool bool
Wavefront::isLmInstruction(GPUDynInstPtr ii) Wavefront::isLmInstruction(GPUDynInstPtr ii)
{ {
if (IS_OT_READ_LM(ii->opType()) || IS_OT_WRITE_LM(ii->opType()) || if (ii->isLocalMem()) {
IS_OT_ATOMIC_LM(ii->opType())) {
return true; return true;
} }
@ -199,10 +186,9 @@ Wavefront::isOldestInstALU()
assert(!instructionBuffer.empty()); assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front(); GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && (ii->opType() == Enums::OT_NOP || if (status != S_STOPPED && (ii->isNop() ||
ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH || ii->isReturn() || ii->isBranch() ||
ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) || ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) {
ii->opType() == Enums::OT_KERN_READ)) {
return true; return true;
} }
@ -215,7 +201,7 @@ Wavefront::isOldestInstBarrier()
assert(!instructionBuffer.empty()); assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front(); GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && ii->opType() == Enums::OT_BARRIER) { if (status != S_STOPPED && ii->isBarrier()) {
return true; return true;
} }
@ -228,9 +214,7 @@ Wavefront::isOldestInstGMem()
assert(!instructionBuffer.empty()); assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front(); GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && (IS_OT_READ_GM(ii->opType()) || if (status != S_STOPPED && ii->isGlobalMem()) {
IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
return true; return true;
} }
@ -243,9 +227,7 @@ Wavefront::isOldestInstLMem()
assert(!instructionBuffer.empty()); assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front(); GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && (IS_OT_READ_LM(ii->opType()) || if (status != S_STOPPED && ii->isLocalMem()) {
IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
return true; return true;
} }
@ -258,9 +240,7 @@ Wavefront::isOldestInstPrivMem()
assert(!instructionBuffer.empty()); assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front(); GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && (IS_OT_READ_PM(ii->opType()) || if (status != S_STOPPED && ii->isPrivateSeg()) {
IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
return true; return true;
} }
@ -273,8 +253,7 @@ Wavefront::isOldestInstFlatMem()
assert(!instructionBuffer.empty()); assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front(); GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && IS_OT_FLAT(ii->opType())) { if (status != S_STOPPED && ii->isFlat()) {
return true; return true;
} }
@ -289,7 +268,7 @@ Wavefront::instructionBufferHasBranch()
for (auto it : instructionBuffer) { for (auto it : instructionBuffer) {
GPUDynInstPtr ii = it; GPUDynInstPtr ii = it;
if (ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH) { if (ii->isReturn() || ii->isBranch()) {
return true; return true;
} }
} }
@ -371,23 +350,16 @@ Wavefront::ready(itype_e type)
// checking readiness will be fixed eventually. In the meantime, let's // checking readiness will be fixed eventually. In the meantime, let's
// make sure that we do not silently let an instruction type slip // make sure that we do not silently let an instruction type slip
// through this logic and always return not ready. // through this logic and always return not ready.
if (!(ii->opType() == Enums::OT_BARRIER || ii->opType() == Enums::OT_NOP || if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH || ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) || ii->isMemFence() || ii->isFlat())) {
ii->opType() == Enums::OT_KERN_READ ||
ii->opType() == Enums::OT_ARG ||
IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_READ_LM(ii->opType()) ||
IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_FLAT(ii->opType()))) {
panic("next instruction: %s is of unknown type\n", ii->disassemble()); panic("next instruction: %s is of unknown type\n", ii->disassemble());
} }
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n", DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
computeUnit->cu_id, simdId, wfSlotId, ii->disassemble()); computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
if (type == I_ALU && ii->opType() == Enums::OT_BARRIER) { if (type == I_ALU && ii->isBarrier()) {
// Here for ALU instruction (barrier) // Here for ALU instruction (barrier)
if (!computeUnit->wfWait[simdId].prerdy()) { if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free? // Is wave slot free?
@ -400,7 +372,7 @@ Wavefront::ready(itype_e type)
} }
ready_inst = true; ready_inst = true;
} else if (type == I_ALU && ii->opType() == Enums::OT_NOP) { } else if (type == I_ALU && ii->isNop()) {
// Here for ALU instruction (nop) // Here for ALU instruction (nop)
if (!computeUnit->wfWait[simdId].prerdy()) { if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free? // Is wave slot free?
@ -408,7 +380,7 @@ Wavefront::ready(itype_e type)
} }
ready_inst = true; ready_inst = true;
} else if (type == I_ALU && ii->opType() == Enums::OT_RET) { } else if (type == I_ALU && ii->isReturn()) {
// Here for ALU instruction (return) // Here for ALU instruction (return)
if (!computeUnit->wfWait[simdId].prerdy()) { if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free? // Is wave slot free?
@ -421,10 +393,10 @@ Wavefront::ready(itype_e type)
} }
ready_inst = true; ready_inst = true;
} else if (type == I_ALU && (ii->opType() == Enums::OT_BRANCH || } else if (type == I_ALU && (ii->isBranch() ||
ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) || ii->isALU() ||
ii->opType() == Enums::OT_KERN_READ || (ii->isKernArgSeg() && ii->isLoad()) ||
ii->opType() == Enums::OT_ARG)) { ii->isArgSeg())) {
// Here for ALU instruction (all others) // Here for ALU instruction (all others)
if (!computeUnit->wfWait[simdId].prerdy()) { if (!computeUnit->wfWait[simdId].prerdy()) {
// Is alu slot free? // Is alu slot free?
@ -439,18 +411,16 @@ Wavefront::ready(itype_e type)
return 0; return 0;
} }
ready_inst = true; ready_inst = true;
} else if (type == I_GLOBAL && (IS_OT_READ_GM(ii->opType()) || } else if (type == I_GLOBAL && ii->isGlobalMem()) {
IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
// Here Global memory instruction // Here Global memory instruction
if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) { if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
// Are there in pipe or outstanding global memory write requests? // Are there in pipe or outstanding global memory write requests?
if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) { if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
return 0; return 0;
} }
} }
if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) || if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
IS_OT_HIST_GM(ii->opType())) {
// Are there in pipe or outstanding global memory read requests? // Are there in pipe or outstanding global memory read requests?
if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
return 0; return 0;
@ -480,17 +450,15 @@ Wavefront::ready(itype_e type)
return 0; return 0;
} }
ready_inst = true; ready_inst = true;
} else if (type == I_SHARED && (IS_OT_READ_LM(ii->opType()) || } else if (type == I_SHARED && ii->isLocalMem()) {
IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
// Here for Shared memory instruction // Here for Shared memory instruction
if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) { if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) { if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
return 0; return 0;
} }
} }
if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) || if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
IS_OT_HIST_LM(ii->opType())) {
if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) { if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
return 0; return 0;
} }
@ -519,47 +487,7 @@ Wavefront::ready(itype_e type)
return 0; return 0;
} }
ready_inst = true; ready_inst = true;
} else if (type == I_PRIVATE && (IS_OT_READ_PM(ii->opType()) || } else if (type == I_FLAT && ii->isFlat()) {
IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
// Here for Private memory instruction ------------------------ //
if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
return 0;
}
}
if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
IS_OT_HIST_PM(ii->opType())) {
if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
return 0;
}
}
if (!glbMemBusRdy) {
// Is there an available VRF->Global memory read bus?
return 0;
}
if (!glbMemIssueRdy) {
// Is wave slot free?
return 0;
}
if (!computeUnit->globalMemoryPipe.
isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
// Can we insert a new request to the Global Mem Request FIFO?
return 0;
}
// can we schedule source & destination operands on the VRF?
if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
VrfAccessType::RD_WR)) {
return 0;
}
if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
return 0;
}
ready_inst = true;
} else if (type == I_FLAT && IS_OT_FLAT(ii->opType())) {
if (!glbMemBusRdy) { if (!glbMemBusRdy) {
// Is there an available VRF->Global memory read bus? // Is there an available VRF->Global memory read bus?
return 0; return 0;
@ -618,23 +546,22 @@ Wavefront::updateResources()
assert(ii); assert(ii);
computeUnit->vrf[simdId]->updateResources(this, ii); computeUnit->vrf[simdId]->updateResources(this, ii);
// Single precision ALU or Branch or Return or Special instruction // Single precision ALU or Branch or Return or Special instruction
if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL || if (ii->isALU() || ii->isSpecialOp() ||
ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) || ii->isBranch() ||
// FIXME: Kernel argument loads are currently treated as ALU operations // FIXME: Kernel argument loads are currently treated as ALU operations
// since we don't send memory packets at execution. If we fix that then // since we don't send memory packets at execution. If we fix that then
// we should map them to one of the memory pipelines // we should map them to one of the memory pipelines
ii->opType()==Enums::OT_KERN_READ || (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
ii->opType()==Enums::OT_ARG || ii->isReturn()) {
ii->opType()==Enums::OT_RET) {
computeUnit->aluPipe[simdId].preset(computeUnit->shader-> computeUnit->aluPipe[simdId].preset(computeUnit->shader->
ticks(computeUnit->spBypassLength())); ticks(computeUnit->spBypassLength()));
// this is to enforce a fixed number of cycles per issue slot per SIMD // this is to enforce a fixed number of cycles per issue slot per SIMD
computeUnit->wfWait[simdId].preset(computeUnit->shader-> computeUnit->wfWait[simdId].preset(computeUnit->shader->
ticks(computeUnit->issuePeriod)); ticks(computeUnit->issuePeriod));
} else if (ii->opType() == Enums::OT_BARRIER) { } else if (ii->isBarrier()) {
computeUnit->wfWait[simdId].preset(computeUnit->shader-> computeUnit->wfWait[simdId].preset(computeUnit->shader->
ticks(computeUnit->issuePeriod)); ticks(computeUnit->issuePeriod));
} else if (ii->opType() == Enums::OT_FLAT_READ) { } else if (ii->isLoad() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs()); assert(Enums::SC_NONE != ii->executedAs());
memReqsInPipe++; memReqsInPipe++;
rdGmReqsInPipe++; rdGmReqsInPipe++;
@ -649,7 +576,7 @@ Wavefront::updateResources()
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} }
} else if (ii->opType() == Enums::OT_FLAT_WRITE) { } else if (ii->isStore() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs()); assert(Enums::SC_NONE != ii->executedAs());
memReqsInPipe++; memReqsInPipe++;
wrGmReqsInPipe++; wrGmReqsInPipe++;
@ -664,21 +591,21 @@ Wavefront::updateResources()
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} }
} else if (IS_OT_READ_GM(ii->opType())) { } else if (ii->isLoad() && ii->isGlobalMem()) {
memReqsInPipe++; memReqsInPipe++;
rdGmReqsInPipe++; rdGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(4)); preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_GM(ii->opType())) { } else if (ii->isStore() && ii->isGlobalMem()) {
memReqsInPipe++; memReqsInPipe++;
wrGmReqsInPipe++; wrGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(8)); preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_GM(ii->opType())) { } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
memReqsInPipe++; memReqsInPipe++;
wrGmReqsInPipe++; wrGmReqsInPipe++;
rdGmReqsInPipe++; rdGmReqsInPipe++;
@ -686,21 +613,21 @@ Wavefront::updateResources()
preset(computeUnit->shader->ticks(8)); preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_READ_LM(ii->opType())) { } else if (ii->isLoad() && ii->isLocalMem()) {
memReqsInPipe++; memReqsInPipe++;
rdLmReqsInPipe++; rdLmReqsInPipe++;
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
preset(computeUnit->shader->ticks(4)); preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->ShrMemUnitId()]. computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_LM(ii->opType())) { } else if (ii->isStore() && ii->isLocalMem()) {
memReqsInPipe++; memReqsInPipe++;
wrLmReqsInPipe++; wrLmReqsInPipe++;
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
preset(computeUnit->shader->ticks(8)); preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()]. computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_LM(ii->opType())) { } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
memReqsInPipe++; memReqsInPipe++;
wrLmReqsInPipe++; wrLmReqsInPipe++;
rdLmReqsInPipe++; rdLmReqsInPipe++;
@ -708,28 +635,6 @@ Wavefront::updateResources()
preset(computeUnit->shader->ticks(8)); preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()]. computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_READ_PM(ii->opType())) {
memReqsInPipe++;
rdGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_PM(ii->opType())) {
memReqsInPipe++;
wrGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_PM(ii->opType())) {
memReqsInPipe++;
wrGmReqsInPipe++;
rdGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} }
} }
@ -751,7 +656,7 @@ Wavefront::exec()
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s " DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
"(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId, "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
ii->disassemble(), old_pc); ii->disassemble(), old_pc);
ii->execute(); ii->execute(ii);
// access the VRF // access the VRF
computeUnit->vrf[simdId]->exec(ii, this); computeUnit->vrf[simdId]->exec(ii, this);
srcRegOpDist.sample(ii->numSrcRegOperands()); srcRegOpDist.sample(ii->numSrcRegOperands());
@ -785,24 +690,24 @@ Wavefront::exec()
// ---- Update Vector ALU pipeline and other resources ------------------ // // ---- Update Vector ALU pipeline and other resources ------------------ //
// Single precision ALU or Branch or Return or Special instruction // Single precision ALU or Branch or Return or Special instruction
if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL || if (ii->isALU() || ii->isSpecialOp() ||
ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) || ii->isBranch() ||
// FIXME: Kernel argument loads are currently treated as ALU operations // FIXME: Kernel argument loads are currently treated as ALU operations
// since we don't send memory packets at execution. If we fix that then // since we don't send memory packets at execution. If we fix that then
// we should map them to one of the memory pipelines // we should map them to one of the memory pipelines
ii->opType() == Enums::OT_KERN_READ || (ii->isKernArgSeg() && ii->isLoad()) ||
ii->opType() == Enums::OT_ARG || ii->isArgSeg() ||
ii->opType() == Enums::OT_RET) { ii->isReturn()) {
computeUnit->aluPipe[simdId].set(computeUnit->shader-> computeUnit->aluPipe[simdId].set(computeUnit->shader->
ticks(computeUnit->spBypassLength())); ticks(computeUnit->spBypassLength()));
// this is to enforce a fixed number of cycles per issue slot per SIMD // this is to enforce a fixed number of cycles per issue slot per SIMD
computeUnit->wfWait[simdId].set(computeUnit->shader-> computeUnit->wfWait[simdId].set(computeUnit->shader->
ticks(computeUnit->issuePeriod)); ticks(computeUnit->issuePeriod));
} else if (ii->opType() == Enums::OT_BARRIER) { } else if (ii->isBarrier()) {
computeUnit->wfWait[simdId].set(computeUnit->shader-> computeUnit->wfWait[simdId].set(computeUnit->shader->
ticks(computeUnit->issuePeriod)); ticks(computeUnit->issuePeriod));
} else if (ii->opType() == Enums::OT_FLAT_READ) { } else if (ii->isLoad() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs()); assert(Enums::SC_NONE != ii->executedAs());
if (Enums::SC_SHARED == ii->executedAs()) { if (Enums::SC_SHARED == ii->executedAs()) {
@ -816,7 +721,7 @@ Wavefront::exec()
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod)); set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} }
} else if (ii->opType() == Enums::OT_FLAT_WRITE) { } else if (ii->isStore() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs()); assert(Enums::SC_NONE != ii->executedAs());
if (Enums::SC_SHARED == ii->executedAs()) { if (Enums::SC_SHARED == ii->executedAs()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
@ -829,32 +734,32 @@ Wavefront::exec()
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod)); set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} }
} else if (IS_OT_READ_GM(ii->opType())) { } else if (ii->isLoad() && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(4)); set(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod)); set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_GM(ii->opType())) { } else if (ii->isStore() && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(8)); set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod)); set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_GM(ii->opType())) { } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(8)); set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()]. computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod)); set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_READ_LM(ii->opType())) { } else if (ii->isLoad() && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(4)); set(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->ShrMemUnitId()]. computeUnit->wfWait[computeUnit->ShrMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod)); set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_LM(ii->opType())) { } else if (ii->isStore() && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(8)); set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()]. computeUnit->wfWait[computeUnit->ShrMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod)); set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_LM(ii->opType())) { } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(8)); set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()]. computeUnit->wfWait[computeUnit->ShrMemUnitId()].