gpu-compute: remove inst enums and use bit flag for attributes

this patch removes the GPUStaticInst enums that were defined in GPU.py.
instead, a simple set of attribute flags that can be set in the base
instruction class are used. this will help unify the attributes of HSAIL
and machine ISA instructions within the model itself.

because the static instrution now carries the attributes, a GPUDynInst
must carry a pointer to a valid GPUStaticInst so a new static kernel launch
instruction is added, which carries the attributes needed to perform a
the kernel launch.
This commit is contained in:
Tony Gutierrez 2016-10-26 22:47:11 -04:00
parent e1ad8035a3
commit 7ac38849ab
28 changed files with 1257 additions and 1116 deletions

View file

@ -43,7 +43,6 @@ if env['TARGET_GPU_ISA'] == 'hsail':
env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
'gen.py', '$SOURCE $TARGETS')
Source('generic_types.cc')
Source('gpu_decoder.cc')
Source('insts/branch.cc')
Source('insts/gen_exec.cc')

View file

@ -1,47 +0,0 @@
#include "arch/hsail/generic_types.hh"
#include "base/misc.hh"
using namespace Brig;
namespace HsailISA
{
Enums::GenericMemoryOrder
getGenericMemoryOrder(BrigMemoryOrder brig_memory_order)
{
switch(brig_memory_order) {
case BRIG_MEMORY_ORDER_NONE:
return Enums::MEMORY_ORDER_NONE;
case BRIG_MEMORY_ORDER_RELAXED:
return Enums::MEMORY_ORDER_RELAXED;
case BRIG_MEMORY_ORDER_SC_ACQUIRE:
return Enums::MEMORY_ORDER_SC_ACQUIRE;
case BRIG_MEMORY_ORDER_SC_RELEASE:
return Enums::MEMORY_ORDER_SC_RELEASE;
case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
return Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE;
default:
fatal("HsailISA::MemInst::getGenericMemoryOrder -> ",
"bad BrigMemoryOrder\n");
}
}
Enums::GenericMemoryScope
getGenericMemoryScope(BrigMemoryScope brig_memory_scope)
{
switch(brig_memory_scope) {
case BRIG_MEMORY_SCOPE_NONE:
return Enums::MEMORY_SCOPE_NONE;
case BRIG_MEMORY_SCOPE_WORKITEM:
return Enums::MEMORY_SCOPE_WORKITEM;
case BRIG_MEMORY_SCOPE_WORKGROUP:
return Enums::MEMORY_SCOPE_WORKGROUP;
case BRIG_MEMORY_SCOPE_AGENT:
return Enums::MEMORY_SCOPE_DEVICE;
case BRIG_MEMORY_SCOPE_SYSTEM:
return Enums::MEMORY_SCOPE_SYSTEM;
default:
fatal("HsailISA::MemInst::getGenericMemoryScope -> ",
"bad BrigMemoryScope\n");
}
}
} // namespace HsailISA

View file

@ -1,16 +0,0 @@
#ifndef __ARCH_HSAIL_GENERIC_TYPES_HH__
#define __ARCH_HSAIL_GENERIC_TYPES_HH__
#include "arch/hsail/Brig.h"
#include "enums/GenericMemoryOrder.hh"
#include "enums/GenericMemoryScope.hh"
namespace HsailISA
{
Enums::GenericMemoryOrder
getGenericMemoryOrder(Brig::BrigMemoryOrder brig_memory_order);
Enums::GenericMemoryScope
getGenericMemoryScope(Brig::BrigMemoryScope brig_memory_scope);
} // namespace HsailISA
#endif // __ARCH_HSAIL_GENERIC_TYPES_HH__

View file

@ -59,16 +59,15 @@ namespace HsailISA
BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "brn")
{
o_type = Enums::OT_BRANCH;
setFlag(Branch);
setFlag(UnconditionalJump);
width = ((Brig::BrigInstBr*)ib)->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
target.init(op_offs, obj);
o_type = Enums::OT_BRANCH;
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
bool unconditionalJumpInstruction() override { return true; }
bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isVectorRegister();
@ -175,13 +174,12 @@ namespace HsailISA
CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "cbr")
{
o_type = Enums::OT_BRANCH;
setFlag(Branch);
width = ((Brig::BrigInstBr *)ib)->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
cond.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
target.init(op_offs, obj);
o_type = Enums::OT_BRANCH;
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
@ -343,17 +341,15 @@ namespace HsailISA
BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "br")
{
o_type = Enums::OT_BRANCH;
setFlag(Branch);
setFlag(UnconditionalJump);
width.init(((Brig::BrigInstBr *)ib)->width, obj);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
target.init(op_offs, obj);
o_type = Enums::OT_BRANCH;
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
bool unconditionalJumpInstruction() override { return true; }
void execute(GPUDynInstPtr gpuDynInst) override;
bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());

View file

@ -38,11 +38,9 @@
#include <cmath>
#include "arch/hsail/generic_types.hh"
#include "arch/hsail/insts/gpu_static_inst.hh"
#include "arch/hsail/operand.hh"
#include "debug/HSAIL.hh"
#include "enums/OpType.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh"
@ -127,6 +125,8 @@ namespace HsailISA
const char *opcode)
: HsailGPUStaticInst(obj, opcode)
{
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
@ -240,6 +240,8 @@ namespace HsailISA
const char *opcode)
: HsailGPUStaticInst(obj, opcode)
{
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
@ -414,6 +416,8 @@ namespace HsailISA
const BrigObject *obj, const char *opcode)
: HsailGPUStaticInst(obj, opcode)
{
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
@ -818,6 +822,8 @@ namespace HsailISA
const BrigObject *obj, const char *_opcode)
: HsailGPUStaticInst(obj, _opcode)
{
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
@ -874,7 +880,7 @@ namespace HsailISA
Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "ret")
{
o_type = Enums::OT_RET;
setFlag(GPUStaticInst::Return);
}
void execute(GPUDynInstPtr gpuDynInst);
@ -889,7 +895,7 @@ namespace HsailISA
Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "barrier")
{
o_type = Enums::OT_BARRIER;
setFlag(GPUStaticInst::MemBarrier);
assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
}
@ -924,14 +930,105 @@ namespace HsailISA
memFenceMemOrder = (Brig::BrigMemoryOrder)
((Brig::BrigInstMemFence*)ib)->memoryOrder;
// set o_type based on scopes
setFlag(MemoryRef);
setFlag(GPUStaticInst::MemFence);
switch (memFenceMemOrder) {
case Brig::BRIG_MEMORY_ORDER_NONE:
setFlag(NoOrder);
break;
case Brig::BRIG_MEMORY_ORDER_RELAXED:
setFlag(RelaxedOrder);
break;
case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
setFlag(Acquire);
break;
case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
setFlag(Release);
break;
case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
setFlag(AcquireRelease);
break;
default:
fatal("MemInst has bad BrigMemoryOrder\n");
}
// set inst flags based on scopes
if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
o_type = Enums::OT_BOTH_MEMFENCE;
setFlag(GPUStaticInst::GlobalSegment);
/**
* A memory fence that has scope for
* both segments will use the global
* segment, and be executed in the
* global memory pipeline, therefore,
* we set the segment to match the
* global scope only
*/
switch (memFenceScopeSegGlobal) {
case Brig::BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case Brig::BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("MemFence has bad global scope type\n");
}
} else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
o_type = Enums::OT_GLOBAL_MEMFENCE;
setFlag(GPUStaticInst::GlobalSegment);
switch (memFenceScopeSegGlobal) {
case Brig::BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case Brig::BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("MemFence has bad global scope type\n");
}
} else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
o_type = Enums::OT_SHARED_MEMFENCE;
setFlag(GPUStaticInst::GroupSegment);
switch (memFenceScopeSegGroup) {
case Brig::BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case Brig::BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("MemFence has bad group scope type\n");
}
} else {
fatal("MemFence constructor: bad scope specifiers\n");
}
@ -955,18 +1052,13 @@ namespace HsailISA
// etc.). We send a packet, tagged with the memory order and
// scope, and let the GPU coalescer handle it.
if (o_type == Enums::OT_GLOBAL_MEMFENCE ||
o_type == Enums::OT_BOTH_MEMFENCE) {
if (isGlobalSeg()) {
gpuDynInst->simdId = w->simdId;
gpuDynInst->wfSlotId = w->wfSlotId;
gpuDynInst->wfDynId = w->wfDynId;
gpuDynInst->kern_id = w->kernId;
gpuDynInst->cu_id = w->computeUnit->cu_id;
gpuDynInst->memoryOrder =
getGenericMemoryOrder(memFenceMemOrder);
gpuDynInst->scope =
getGenericMemoryScope(memFenceScopeSegGlobal);
gpuDynInst->useContinuation = false;
GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
gmp->getGMReqFIFO().push(gpuDynInst);
@ -975,10 +1067,10 @@ namespace HsailISA
w->rdGmReqsInPipe--;
w->memReqsInPipe--;
w->outstandingReqs++;
} else if (o_type == Enums::OT_SHARED_MEMFENCE) {
} else if (isGroupSeg()) {
// no-op
} else {
fatal("MemFence execute: bad o_type\n");
fatal("MemFence execute: bad op type\n");
}
}
};
@ -1054,6 +1146,7 @@ namespace HsailISA
Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "call")
{
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);

View file

@ -179,12 +179,13 @@ namespace HsailISA
w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
if (!refCount) {
setFlag(SystemScope);
setFlag(Release);
setFlag(GlobalSegment);
// Notify Memory System of Kernel Completion
// Kernel End = isKernel + isRelease
w->status = Wavefront::S_RETURNING;
GPUDynInstPtr local_mempacket = gpuDynInst;
local_mempacket->memoryOrder = Enums::MEMORY_ORDER_SC_RELEASE;
local_mempacket->scope = Enums::MEMORY_SCOPE_SYSTEM;
local_mempacket->useContinuation = false;
local_mempacket->simdId = w->simdId;
local_mempacket->wfSlotId = w->wfSlotId;

View file

@ -36,7 +36,6 @@
#include "arch/hsail/insts/mem.hh"
#include "arch/hsail/Brig.h"
#include "enums/OpType.hh"
using namespace Brig;
@ -44,68 +43,6 @@ namespace HsailISA
{
const char* atomicOpToString(BrigAtomicOperation brigOp);
Enums::MemOpType
brigAtomicToMemOpType(BrigOpcode brigOpCode, BrigAtomicOperation brigOp)
{
if (brigOpCode == Brig::BRIG_OPCODE_ATOMIC) {
switch (brigOp) {
case BRIG_ATOMIC_AND:
return Enums::MO_AAND;
case BRIG_ATOMIC_OR:
return Enums::MO_AOR;
case BRIG_ATOMIC_XOR:
return Enums::MO_AXOR;
case BRIG_ATOMIC_CAS:
return Enums::MO_ACAS;
case BRIG_ATOMIC_EXCH:
return Enums::MO_AEXCH;
case BRIG_ATOMIC_ADD:
return Enums::MO_AADD;
case BRIG_ATOMIC_WRAPINC:
return Enums::MO_AINC;
case BRIG_ATOMIC_WRAPDEC:
return Enums::MO_ADEC;
case BRIG_ATOMIC_MIN:
return Enums::MO_AMIN;
case BRIG_ATOMIC_MAX:
return Enums::MO_AMAX;
case BRIG_ATOMIC_SUB:
return Enums::MO_ASUB;
default:
fatal("Bad BrigAtomicOperation code %d\n", brigOp);
}
} else if (brigOpCode == Brig::BRIG_OPCODE_ATOMICNORET) {
switch (brigOp) {
case BRIG_ATOMIC_AND:
return Enums::MO_ANRAND;
case BRIG_ATOMIC_OR:
return Enums::MO_ANROR;
case BRIG_ATOMIC_XOR:
return Enums::MO_ANRXOR;
case BRIG_ATOMIC_CAS:
return Enums::MO_ANRCAS;
case BRIG_ATOMIC_EXCH:
return Enums::MO_ANREXCH;
case BRIG_ATOMIC_ADD:
return Enums::MO_ANRADD;
case BRIG_ATOMIC_WRAPINC:
return Enums::MO_ANRINC;
case BRIG_ATOMIC_WRAPDEC:
return Enums::MO_ANRDEC;
case BRIG_ATOMIC_MIN:
return Enums::MO_ANRMIN;
case BRIG_ATOMIC_MAX:
return Enums::MO_ANRMAX;
case BRIG_ATOMIC_SUB:
return Enums::MO_ANRSUB;
default:
fatal("Bad BrigAtomicOperation code %d\n", brigOp);
}
} else {
fatal("Bad BrigAtomicOpcode %d\n", brigOpCode);
}
}
const char*
atomicOpToString(BrigAtomicOperation brigOp)
{

View file

@ -96,6 +96,8 @@ namespace HsailISA
{
using namespace Brig;
setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
@ -211,131 +213,6 @@ namespace HsailISA
Brig::BrigMemoryOrder memoryOrder;
Brig::BrigMemoryScope memoryScope;
unsigned int equivClass;
bool isArgLoad()
{
return segment == Brig::BRIG_SEGMENT_KERNARG ||
segment == Brig::BRIG_SEGMENT_ARG;
}
void
initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
{
using namespace Brig;
const BrigInstMem *ldst = (const BrigInstMem*)ib;
segment = (BrigSegment)ldst->segment;
memoryOrder = BRIG_MEMORY_ORDER_NONE;
memoryScope = BRIG_MEMORY_SCOPE_NONE;
equivClass = ldst->equivClass;
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_READ;
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_READ;
break;
case BRIG_SEGMENT_PRIVATE:
o_type = Enums::OT_PRIVATE_READ;
break;
case BRIG_SEGMENT_READONLY:
o_type = Enums::OT_READONLY_READ;
break;
case BRIG_SEGMENT_SPILL:
o_type = Enums::OT_SPILL_READ;
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_READ;
break;
case BRIG_SEGMENT_KERNARG:
o_type = Enums::OT_KERN_READ;
break;
case BRIG_SEGMENT_ARG:
o_type = Enums::OT_ARG;
break;
default:
panic("Ld: segment %d not supported\n", segment);
}
width = ldst->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
addr.init(op_offs, obj);
}
void
initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
{
using namespace Brig;
const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
segment = (BrigSegment)at->segment;
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
memoryScope = (BrigMemoryScope)at->memoryScope;
equivClass = 0;
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_READ;
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_READ;
break;
case BRIG_SEGMENT_PRIVATE:
o_type = Enums::OT_PRIVATE_READ;
break;
case BRIG_SEGMENT_READONLY:
o_type = Enums::OT_READONLY_READ;
break;
case BRIG_SEGMENT_SPILL:
o_type = Enums::OT_SPILL_READ;
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_READ;
break;
case BRIG_SEGMENT_KERNARG:
o_type = Enums::OT_KERN_READ;
break;
case BRIG_SEGMENT_ARG:
o_type = Enums::OT_ARG;
break;
default:
panic("Ld: segment %d not supported\n", segment);
}
width = BRIG_WIDTH_1;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands,1);
addr.init(op_offs, obj);
}
LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
@ -343,10 +220,111 @@ namespace HsailISA
{
using namespace Brig;
setFlag(MemoryRef);
setFlag(Load);
if (ib->opcode == BRIG_OPCODE_LD) {
initLd(ib, obj, _opcode);
const BrigInstMem *ldst = (const BrigInstMem*)ib;
segment = (BrigSegment)ldst->segment;
memoryOrder = BRIG_MEMORY_ORDER_NONE;
memoryScope = BRIG_MEMORY_SCOPE_NONE;
equivClass = ldst->equivClass;
width = ldst->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
addr.init(op_offs, obj);
} else {
initAtomicLd(ib, obj, _opcode);
const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
segment = (BrigSegment)at->segment;
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
memoryScope = (BrigMemoryScope)at->memoryScope;
equivClass = 0;
width = BRIG_WIDTH_1;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands,1);
addr.init(op_offs, obj);
}
switch (memoryOrder) {
case BRIG_MEMORY_ORDER_NONE:
setFlag(NoOrder);
break;
case BRIG_MEMORY_ORDER_RELAXED:
setFlag(RelaxedOrder);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE:
setFlag(Acquire);
break;
case BRIG_MEMORY_ORDER_SC_RELEASE:
setFlag(Release);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
setFlag(AcquireRelease);
break;
default:
fatal("LdInst has bad memory order type\n");
}
switch (memoryScope) {
case BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("LdInst has bad memory scope type\n");
}
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
setFlag(GlobalSegment);
break;
case BRIG_SEGMENT_GROUP:
setFlag(GroupSegment);
break;
case BRIG_SEGMENT_PRIVATE:
setFlag(PrivateSegment);
break;
case BRIG_SEGMENT_READONLY:
setFlag(ReadOnlySegment);
break;
case BRIG_SEGMENT_SPILL:
setFlag(SpillSegment);
break;
case BRIG_SEGMENT_FLAT:
setFlag(Flat);
break;
case BRIG_SEGMENT_KERNARG:
setFlag(KernArgSegment);
break;
case BRIG_SEGMENT_ARG:
setFlag(ArgSegment);
break;
default:
panic("Ld: segment %d not supported\n", segment);
}
}
@ -473,7 +451,7 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
if (isLocalMem()) {
if (this->isLocalMem()) {
// load from shared memory
*d = gpuDynInst->wavefront()->ldsChunk->
read<c0>(vaddr);
@ -488,8 +466,7 @@ namespace HsailISA
if (gpuDynInst->computeUnit()->shader->
separate_acquire_release &&
gpuDynInst->memoryOrder ==
Enums::MEMORY_ORDER_SC_ACQUIRE) {
gpuDynInst->isAcquire()) {
// if this load has acquire semantics,
// set the response continuation function
// to perform an Acquire request
@ -520,10 +497,9 @@ namespace HsailISA
{
// after the load has complete and if the load has acquire
// semantics, issue an acquire request.
if (!isLocalMem()) {
if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release
&& gpuDynInst->memoryOrder ==
Enums::MEMORY_ORDER_SC_ACQUIRE) {
&& gpuDynInst->isAcquire()) {
gpuDynInst->statusBitVector = VectorMask(1);
gpuDynInst->useContinuation = false;
// create request
@ -537,12 +513,6 @@ namespace HsailISA
}
public:
bool
isLocalMem() const override
{
return this->segment == Brig::BRIG_SEGMENT_GROUP;
}
bool isVectorRegister(int operandIndex) override
{
assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
@ -731,127 +701,112 @@ namespace HsailISA
Brig::BrigMemoryOrder memoryOrder;
unsigned int equivClass;
void
initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
{
using namespace Brig;
const BrigInstMem *ldst = (const BrigInstMem*)ib;
segment = (BrigSegment)ldst->segment;
memoryOrder = BRIG_MEMORY_ORDER_NONE;
memoryScope = BRIG_MEMORY_SCOPE_NONE;
equivClass = ldst->equivClass;
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_WRITE;
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_WRITE;
break;
case BRIG_SEGMENT_PRIVATE:
o_type = Enums::OT_PRIVATE_WRITE;
break;
case BRIG_SEGMENT_READONLY:
o_type = Enums::OT_READONLY_WRITE;
break;
case BRIG_SEGMENT_SPILL:
o_type = Enums::OT_SPILL_WRITE;
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_WRITE;
break;
case BRIG_SEGMENT_ARG:
o_type = Enums::OT_ARG;
break;
default:
panic("St: segment %d not supported\n", segment);
}
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const BrigOperand *baseOp = obj->getOperand(op_offs);
if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
(baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
src.init(op_offs, obj);
}
op_offs = obj->getOperandPtr(ib->operands, 1);
addr.init(op_offs, obj);
}
void
initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
{
using namespace Brig;
const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
segment = (BrigSegment)at->segment;
memoryScope = (BrigMemoryScope)at->memoryScope;
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
equivClass = 0;
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_WRITE;
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_WRITE;
break;
case BRIG_SEGMENT_PRIVATE:
o_type = Enums::OT_PRIVATE_WRITE;
break;
case BRIG_SEGMENT_READONLY:
o_type = Enums::OT_READONLY_WRITE;
break;
case BRIG_SEGMENT_SPILL:
o_type = Enums::OT_SPILL_WRITE;
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_WRITE;
break;
case BRIG_SEGMENT_ARG:
o_type = Enums::OT_ARG;
break;
default:
panic("St: segment %d not supported\n", segment);
}
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
addr.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
src.init(op_offs, obj);
}
StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
: HsailGPUStaticInst(obj, _opcode)
{
using namespace Brig;
setFlag(MemoryRef);
setFlag(Store);
if (ib->opcode == BRIG_OPCODE_ST) {
initSt(ib, obj, _opcode);
const BrigInstMem *ldst = (const BrigInstMem*)ib;
segment = (BrigSegment)ldst->segment;
memoryOrder = BRIG_MEMORY_ORDER_NONE;
memoryScope = BRIG_MEMORY_SCOPE_NONE;
equivClass = ldst->equivClass;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const BrigOperand *baseOp = obj->getOperand(op_offs);
if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
(baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
src.init(op_offs, obj);
}
op_offs = obj->getOperandPtr(ib->operands, 1);
addr.init(op_offs, obj);
} else {
initAtomicSt(ib, obj, _opcode);
const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
segment = (BrigSegment)at->segment;
memoryScope = (BrigMemoryScope)at->memoryScope;
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
equivClass = 0;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
addr.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
src.init(op_offs, obj);
}
switch (memoryOrder) {
case BRIG_MEMORY_ORDER_NONE:
setFlag(NoOrder);
break;
case BRIG_MEMORY_ORDER_RELAXED:
setFlag(RelaxedOrder);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE:
setFlag(Acquire);
break;
case BRIG_MEMORY_ORDER_SC_RELEASE:
setFlag(Release);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
setFlag(AcquireRelease);
break;
default:
fatal("StInst has bad memory order type\n");
}
switch (memoryScope) {
case BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("StInst has bad memory scope type\n");
}
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
setFlag(GlobalSegment);
break;
case BRIG_SEGMENT_GROUP:
setFlag(GroupSegment);
break;
case BRIG_SEGMENT_PRIVATE:
setFlag(PrivateSegment);
break;
case BRIG_SEGMENT_READONLY:
setFlag(ReadOnlySegment);
break;
case BRIG_SEGMENT_SPILL:
setFlag(SpillSegment);
break;
case BRIG_SEGMENT_FLAT:
setFlag(Flat);
break;
case BRIG_SEGMENT_ARG:
setFlag(ArgSegment);
break;
default:
panic("St: segment %d not supported\n", segment);
}
}
@ -964,10 +919,9 @@ namespace HsailISA
{
// before performing a store, check if this store has
// release semantics, and if so issue a release first
if (!isLocalMem()) {
if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release
&& gpuDynInst->memoryOrder ==
Enums::MEMORY_ORDER_SC_RELEASE) {
&& gpuDynInst->isRelease()) {
gpuDynInst->statusBitVector = VectorMask(1);
gpuDynInst->execContinuation = &GPUStaticInst::execSt;
@ -987,12 +941,6 @@ namespace HsailISA
execSt(gpuDynInst);
}
bool
isLocalMem() const override
{
return this->segment == Brig::BRIG_SEGMENT_GROUP;
}
private:
// execSt may be called through a continuation
// if the store had release semantics. see comment for
@ -1020,7 +968,7 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
if (isLocalMem()) {
if (this->isLocalMem()) {
//store to shared memory
gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
*d);
@ -1166,9 +1114,6 @@ namespace HsailISA
}
}
Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
Brig::BrigAtomicOperation brigOp);
template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
bool HasDst>
class AtomicInstBase : public HsailGPUStaticInst
@ -1183,7 +1128,6 @@ namespace HsailISA
Brig::BrigAtomicOperation atomicOperation;
Brig::BrigMemoryScope memoryScope;
Brig::BrigOpcode opcode;
Enums::MemOpType opType;
AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
@ -1198,21 +1142,106 @@ namespace HsailISA
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
atomicOperation = (BrigAtomicOperation)at->atomicOperation;
opcode = (BrigOpcode)ib->opcode;
opType = brigAtomicToMemOpType(opcode, atomicOperation);
assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
opcode == Brig::BRIG_OPCODE_ATOMIC);
setFlag(MemoryRef);
if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
switch (memoryOrder) {
case BRIG_MEMORY_ORDER_NONE:
setFlag(NoOrder);
break;
case BRIG_MEMORY_ORDER_RELAXED:
setFlag(RelaxedOrder);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE:
setFlag(Acquire);
break;
case BRIG_MEMORY_ORDER_SC_RELEASE:
setFlag(Release);
break;
case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
setFlag(AcquireRelease);
break;
default:
fatal("AtomicInst has bad memory order type\n");
}
switch (memoryScope) {
case BRIG_MEMORY_SCOPE_NONE:
setFlag(NoScope);
break;
case BRIG_MEMORY_SCOPE_WORKITEM:
setFlag(WorkitemScope);
break;
case BRIG_MEMORY_SCOPE_WORKGROUP:
setFlag(WorkgroupScope);
break;
case BRIG_MEMORY_SCOPE_AGENT:
setFlag(DeviceScope);
break;
case BRIG_MEMORY_SCOPE_SYSTEM:
setFlag(SystemScope);
break;
default:
fatal("AtomicInst has bad memory scope type\n");
}
switch (atomicOperation) {
case Brig::BRIG_ATOMIC_AND:
setFlag(AtomicAnd);
break;
case Brig::BRIG_ATOMIC_OR:
setFlag(AtomicOr);
break;
case Brig::BRIG_ATOMIC_XOR:
setFlag(AtomicXor);
break;
case Brig::BRIG_ATOMIC_CAS:
setFlag(AtomicCAS);
break;
case Brig::BRIG_ATOMIC_EXCH:
setFlag(AtomicExch);
break;
case Brig::BRIG_ATOMIC_ADD:
setFlag(AtomicAdd);
break;
case Brig::BRIG_ATOMIC_WRAPINC:
setFlag(AtomicInc);
break;
case Brig::BRIG_ATOMIC_WRAPDEC:
setFlag(AtomicDec);
break;
case Brig::BRIG_ATOMIC_MIN:
setFlag(AtomicMin);
break;
case Brig::BRIG_ATOMIC_MAX:
setFlag(AtomicMax);
break;
case Brig::BRIG_ATOMIC_SUB:
setFlag(AtomicSub);
break;
default:
fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
}
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
o_type = Enums::OT_GLOBAL_ATOMIC;
setFlag(GlobalSegment);
break;
case BRIG_SEGMENT_GROUP:
o_type = Enums::OT_SHARED_ATOMIC;
setFlag(GroupSegment);
break;
case BRIG_SEGMENT_FLAT:
o_type = Enums::OT_FLAT_ATOMIC;
setFlag(Flat);
break;
default:
panic("Atomic: segment %d not supported\n", segment);
}
@ -1354,11 +1383,10 @@ namespace HsailISA
{
// before doing the RMW, check if this atomic has
// release semantics, and if so issue a release first
if (!isLocalMem()) {
if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release
&& (gpuDynInst->memoryOrder ==
Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
&& (gpuDynInst->isRelease()
|| gpuDynInst->isAcquireRelease())) {
gpuDynInst->statusBitVector = VectorMask(1);
@ -1383,12 +1411,6 @@ namespace HsailISA
void execute(GPUDynInstPtr gpuDynInst) override;
bool
isLocalMem() const override
{
return this->segment == Brig::BRIG_SEGMENT_GROUP;
}
private:
// execAtomic may be called through a continuation
// if the RMW had release semantics. see comment for
@ -1408,72 +1430,48 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i];
if (isLocalMem()) {
if (this->isLocalMem()) {
Wavefront *wavefront = gpuDynInst->wavefront();
*d = wavefront->ldsChunk->read<c0>(vaddr);
switch (this->opType) {
case Enums::MO_AADD:
case Enums::MO_ANRADD:
if (this->isAtomicAdd()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) + (*e));
break;
case Enums::MO_ASUB:
case Enums::MO_ANRSUB:
} else if (this->isAtomicSub()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) - (*e));
break;
case Enums::MO_AMAX:
case Enums::MO_ANRMAX:
} else if (this->isAtomicMax()) {
wavefront->ldsChunk->write<c0>(vaddr,
std::max(wavefront->ldsChunk->read<c0>(vaddr),
(*e)));
break;
case Enums::MO_AMIN:
case Enums::MO_ANRMIN:
} else if (this->isAtomicMin()) {
wavefront->ldsChunk->write<c0>(vaddr,
std::min(wavefront->ldsChunk->read<c0>(vaddr),
(*e)));
break;
case Enums::MO_AAND:
case Enums::MO_ANRAND:
} else if (this->isAtomicAnd()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) & (*e));
break;
case Enums::MO_AOR:
case Enums::MO_ANROR:
} else if (this->isAtomicOr()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) | (*e));
break;
case Enums::MO_AXOR:
case Enums::MO_ANRXOR:
} else if (this->isAtomicXor()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
break;
case Enums::MO_AINC:
case Enums::MO_ANRINC:
} else if (this->isAtomicInc()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) + 1);
break;
case Enums::MO_ADEC:
case Enums::MO_ANRDEC:
} else if (this->isAtomicDec()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) - 1);
break;
case Enums::MO_AEXCH:
case Enums::MO_ANREXCH:
} else if (this->isAtomicExch()) {
wavefront->ldsChunk->write<c0>(vaddr, (*e));
break;
case Enums::MO_ACAS:
case Enums::MO_ANRCAS:
} else if (this->isAtomicCAS()) {
wavefront->ldsChunk->write<c0>(vaddr,
(wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
(*f) : wavefront->ldsChunk->read<c0>(vaddr));
break;
default:
} else {
fatal("Unrecognized or invalid HSAIL atomic op "
"type.\n");
break;
}
} else {
Request *req =
@ -1481,7 +1479,7 @@ namespace HsailISA
gpuDynInst->computeUnit()->masterId(),
0, gpuDynInst->wfDynId,
gpuDynInst->makeAtomicOpFunctor<c0>(e,
f, this->opType));
f));
gpuDynInst->setRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
@ -1489,8 +1487,7 @@ namespace HsailISA
if (gpuDynInst->computeUnit()->shader->
separate_acquire_release &&
(gpuDynInst->memoryOrder ==
Enums::MEMORY_ORDER_SC_ACQUIRE)) {
(gpuDynInst->isAcquire())) {
// if this atomic has acquire semantics,
// schedule the continuation to perform an
// acquire after the RMW completes
@ -1523,10 +1520,9 @@ namespace HsailISA
{
// after performing the RMW, check to see if this instruction
// has acquire semantics, and if so, issue an acquire
if (!isLocalMem()) {
if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release
&& gpuDynInst->memoryOrder ==
Enums::MEMORY_ORDER_SC_ACQUIRE) {
&& gpuDynInst->isAcquire()) {
gpuDynInst->statusBitVector = VectorMask(1);
// the request will be finished when

View file

@ -33,7 +33,6 @@
* Author: Steve Reinhardt
*/
#include "arch/hsail/generic_types.hh"
#include "gpu-compute/hsail_code.hh"
// defined in code.cc, but not worth sucking in all of code.h for this
@ -215,16 +214,12 @@ namespace HsailISA
this->addr.calcVector(w, m->addr);
m->m_op = Enums::MO_LD;
m->m_type = MemDataType::memType;
m->v_type = DestDataType::vgprType;
m->exec_mask = w->execMask();
m->statusBitVector = 0;
m->equiv = this->equivClass;
m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
m->scope = getGenericMemoryScope(this->memoryScope);
if (num_dest_operands == 1) {
m->dst_reg = this->dest.regIndex();
@ -245,7 +240,6 @@ namespace HsailISA
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
@ -276,7 +270,6 @@ namespace HsailISA
case Brig::BRIG_SEGMENT_SPILL:
assert(num_dest_operands == 1);
m->s_type = SEG_SPILL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
@ -301,7 +294,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_GROUP:
m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
@ -310,7 +302,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_READONLY:
m->s_type = SEG_READONLY;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
@ -327,7 +318,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_PRIVATE:
m->s_type = SEG_PRIVATE;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
@ -408,7 +398,6 @@ namespace HsailISA
}
}
m->m_op = Enums::MO_ST;
m->m_type = OperationType::memType;
m->v_type = OperationType::vgprType;
@ -421,10 +410,6 @@ namespace HsailISA
m->n_reg = num_src_operands;
}
m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
m->scope = getGenericMemoryScope(this->memoryScope);
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
@ -434,7 +419,6 @@ namespace HsailISA
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
@ -463,7 +447,6 @@ namespace HsailISA
case Brig::BRIG_SEGMENT_SPILL:
assert(num_src_operands == 1);
m->s_type = SEG_SPILL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
@ -483,7 +466,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_GROUP:
m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
@ -492,7 +474,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_PRIVATE:
m->s_type = SEG_PRIVATE;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
@ -586,7 +567,6 @@ namespace HsailISA
assert(NumSrcOperands <= 2);
m->m_op = this->opType;
m->m_type = DataType::memType;
m->v_type = DataType::vgprType;
@ -594,9 +574,6 @@ namespace HsailISA
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
m->scope = getGenericMemoryScope(this->memoryScope);
if (HasDst) {
m->dst_reg = this->dest.regIndex();
@ -611,7 +588,6 @@ namespace HsailISA
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
m->s_type = SEG_GLOBAL;
m->latency.set(w->computeUnit->shader->ticks(64));
m->pipeId = GLBMEM_PIPE;
@ -623,7 +599,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_GROUP:
m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);

View file

@ -627,8 +627,12 @@ namespace HsailISA
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
}
m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
Brig::BRIG_ATOMIC_ADD);
setFlag(AtomicNoReturn);
setFlag(AtomicAdd);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType;
m->v_type = U32::vgprType;
@ -636,15 +640,12 @@ namespace HsailISA
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
m->memoryOrder = Enums::MEMORY_ORDER_NONE;
m->scope = Enums::MEMORY_SCOPE_NONE;
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(64));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
@ -666,8 +667,12 @@ namespace HsailISA
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
}
m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
Brig::BRIG_ATOMIC_ADD);
setFlag(AtomicNoReturn);
setFlag(AtomicAdd);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType;
m->v_type = U32::vgprType;
@ -675,15 +680,12 @@ namespace HsailISA
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
m->memoryOrder = Enums::MEMORY_ORDER_NONE;
m->scope = Enums::MEMORY_SCOPE_NONE;
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(64));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
@ -702,7 +704,11 @@ namespace HsailISA
// calculate the address
calcAddr(w, m);
m->m_op = Enums::MO_LD;
setFlag(Load);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType; //MemDataType::memType;
m->v_type = U32::vgprType; //DestDataType::vgprType;
@ -710,8 +716,6 @@ namespace HsailISA
m->statusBitVector = 0;
m->equiv = 0;
m->n_reg = 1;
m->memoryOrder = Enums::MEMORY_ORDER_NONE;
m->scope = Enums::MEMORY_SCOPE_NONE;
// FIXME
//m->dst_reg = this->dest.regIndex();
@ -721,7 +725,6 @@ namespace HsailISA
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

View file

@ -171,56 +171,6 @@ class GpuDispatcher(DmaDevice):
cl_driver = Param.ClDriver('pointer to driver')
class OpType(Enum): vals = [
'OT_NULL',
'OT_ALU',
'OT_SPECIAL',
'OT_GLOBAL_READ',
'OT_GLOBAL_WRITE',
'OT_GLOBAL_ATOMIC',
'OT_GLOBAL_HIST',
'OT_GLOBAL_LDAS',
'OT_SHARED_READ',
'OT_SHARED_WRITE',
'OT_SHARED_ATOMIC',
'OT_SHARED_HIST',
'OT_SHARED_LDAS',
'OT_PRIVATE_READ',
'OT_PRIVATE_WRITE',
'OT_PRIVATE_ATOMIC',
'OT_PRIVATE_HIST',
'OT_PRIVATE_LDAS',
'OT_SPILL_READ',
'OT_SPILL_WRITE',
'OT_SPILL_ATOMIC',
'OT_SPILL_HIST',
'OT_SPILL_LDAS',
'OT_READONLY_READ',
'OT_READONLY_WRITE',
'OT_READONLY_ATOMIC',
'OT_READONLY_HIST',
'OT_READONLY_LDAS',
'OT_FLAT_READ',
'OT_FLAT_WRITE',
'OT_FLAT_ATOMIC',
'OT_FLAT_HIST',
'OT_FLAT_LDAS',
'OT_KERN_READ',
'OT_BRANCH',
# note: Only the OT_BOTH_MEMFENCE seems to be supported in the 1.0F version
# of the compiler.
'OT_SHARED_MEMFENCE',
'OT_GLOBAL_MEMFENCE',
'OT_BOTH_MEMFENCE',
'OT_BARRIER',
'OT_PRINT',
'OT_RET',
'OT_NOP',
'OT_ARG'
]
class MemType(Enum): vals = [
'M_U8',
'M_U16',
@ -235,47 +185,6 @@ class MemType(Enum): vals = [
'M_F64',
]
class MemOpType(Enum): vals = [
'MO_LD',
'MO_ST',
'MO_LDAS',
'MO_LDA',
'MO_AAND',
'MO_AOR',
'MO_AXOR',
'MO_ACAS',
'MO_AEXCH',
'MO_AADD',
'MO_ASUB',
'MO_AINC',
'MO_ADEC',
'MO_AMAX',
'MO_AMIN',
'MO_ANRAND',
'MO_ANROR',
'MO_ANRXOR',
'MO_ANRCAS',
'MO_ANREXCH',
'MO_ANRADD',
'MO_ANRSUB',
'MO_ANRINC',
'MO_ANRDEC',
'MO_ANRMAX',
'MO_ANRMIN',
'MO_HAND',
'MO_HOR',
'MO_HXOR',
'MO_HCAS',
'MO_HEXCH',
'MO_HADD',
'MO_HSUB',
'MO_HINC',
'MO_HDEC',
'MO_HMAX',
'MO_HMIN',
'MO_UNDEF'
]
class StorageClassType(Enum): vals = [
'SC_SPILL',
'SC_GLOBAL',
@ -293,20 +202,3 @@ class RegisterType(Enum): vals = [
'RT_HARDWARE',
'RT_NONE',
]
class GenericMemoryOrder(Enum): vals = [
'MEMORY_ORDER_NONE',
'MEMORY_ORDER_RELAXED',
'MEMORY_ORDER_SC_ACQUIRE',
'MEMORY_ORDER_SC_RELEASE',
'MEMORY_ORDER_SC_ACQUIRE_RELEASE',
]
class GenericMemoryScope(Enum): vals = [
'MEMORY_SCOPE_NONE',
'MEMORY_SCOPE_WORKITEM',
'MEMORY_SCOPE_WAVEFRONT',
'MEMORY_SCOPE_WORKGROUP',
'MEMORY_SCOPE_DEVICE',
'MEMORY_SCOPE_SYSTEM',
]

View file

@ -0,0 +1,111 @@
# Copyright (c) 2016 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Anthony Gutierrez
from m5.params import *
class GPUStaticInstFlags(Enum):
wrapper_name = 'GPUStaticInstFlags'
wrapper_is_struct = True
enum_name = 'Flags'
vals = [
# Op types
'ALU', # ALU op
'Branch', # Branch instruction
'Nop', # No-op (no effect at all)
'Return', # Return instruction
'UnconditionalJump', #
'SpecialOp', # Special op
'Waitcnt', # Is a waitcnt instruction
# Memory ops
'MemBarrier', # Barrier instruction
'MemFence', # Memory fence instruction
'MemoryRef', # References memory (load, store, or atomic)
'Flat', # Flat memory op
'Load', # Reads from memory
'Store', # Writes to memory
# Atomic ops
'AtomicReturn', # Atomic instruction that returns data
'AtomicNoReturn', # Atomic instruction that doesn't return data
# Instruction attributes
'Scalar', # A scalar (not vector) operation
'ReadsSCC', # The instruction reads SCC
'WritesSCC', # The instruction writes SCC
'ReadsVCC', # The instruction reads VCC
'WritesVCC', # The instruction writes VCC
# Atomic OP types
'AtomicAnd',
'AtomicOr',
'AtomicXor',
'AtomicCAS',
'AtomicExch',
'AtomicAdd',
'AtomicSub',
'AtomicInc',
'AtomicDec',
'AtomicMax',
'AtomicMin',
# Memory order flags
'RelaxedOrder',
'Acquire', # Has acquire semantics
'Release', # Has release semantics
'AcquireRelease', # Has acquire and release semantics
'NoOrder', # Has no ordering restrictions
# Segment access flags
'ArgSegment', # Accesses the arg segment
'GlobalSegment', # Accesses global memory
'GroupSegment', # Accesses local memory (LDS), aka shared memory
'KernArgSegment', # Accesses the kernel argument segment
'PrivateSegment', # Accesses the private segment
'ReadOnlySegment', # Accesses read only memory
'SpillSegment', # Accesses the spill segment
'NoSegment', # Does not have an associated segment
# Scope flags
'WorkitemScope',
'WavefrontScope',
'WorkgroupScope',
'DeviceScope',
'SystemScope',
'NoScope', # Does not have an associated scope
# Coherence flags
'GloballyCoherent', # Coherent with other workitems on same device
'SystemCoherent' # Coherent with a different device, or the host
]

View file

@ -41,6 +41,7 @@ if not env['BUILD_GPU']:
Return()
SimObject('GPU.py')
SimObject('GPUStaticInstFlags.py')
SimObject('LdsState.py')
SimObject('X86GPUTLB.py')

View file

@ -1,116 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#ifndef __CODE_ENUMS_HH__
#define __CODE_ENUMS_HH__
#define IS_OT_GLOBAL(a) ((a)>=Enums::OT_GLOBAL_READ \
&& (a)<=Enums::OT_GLOBAL_LDAS)
#define IS_OT_SHARED(a) ((a)>=Enums::OT_SHARED_READ \
&& (a)<=Enums::OT_SHARED_LDAS)
#define IS_OT_PRIVATE(a) ((a)>=Enums::OT_PRIVATE_READ \
&& (a)<=Enums::OT_PRIVATE_LDAS)
#define IS_OT_SPILL(a) ((a)>=Enums::OT_SPILL_READ \
&& (a)<=Enums::OT_SPILL_LDAS)
#define IS_OT_READONLY(a) ((a)>=Enums::OT_READONLY_READ \
&& (a)<=Enums::OT_READONLY_LDAS)
#define IS_OT_FLAT(a) ((a)>=Enums::OT_FLAT_READ && (a)<=Enums::OT_FLAT_LDAS)
#define IS_OT_LDAS(a) ((a)==Enums::OT_GLOBAL_LDAS||(a)==Enums::OT_SHARED_LDAS \
||(a)==Enums::OT_PRIVATE_LDAS||(a)==Enums::OT_SPILL_LDAS \
||(a)==Enums::OT_READONLY_LDAS||(a)==Enums::OT_FLAT_LDAS)
#define IS_OT_READ(a) ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SHARED_READ \
||(a)==Enums::OT_PRIVATE_READ||(a)==Enums::OT_SPILL_READ \
||(a)==Enums::OT_READONLY_READ||(a)==Enums::OT_FLAT_READ)
#define IS_OT_READ_GM(a) \
((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SPILL_READ \
||(a)==Enums::OT_READONLY_READ)
#define IS_OT_READ_LM(a) ((a)==Enums::OT_SHARED_READ)
#define IS_OT_READ_RM(a) ((a)==Enums::OT_READONLY_READ)
#define IS_OT_READ_PM(a) ((a)==Enums::OT_PRIVATE_READ)
#define IS_OT_WRITE(a) \
((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SHARED_WRITE \
||(a)==Enums::OT_PRIVATE_WRITE||(a)==Enums::OT_SPILL_WRITE \
||(a)==Enums::OT_READONLY_WRITE||(a)==Enums::OT_FLAT_WRITE)
#define IS_OT_WRITE_GM(a) \
((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SPILL_WRITE \
||(a)==Enums::OT_READONLY_WRITE)
#define IS_OT_WRITE_LM(a) ((a)==Enums::OT_SHARED_WRITE)
#define IS_OT_WRITE_PM(a) ((a)==Enums::OT_PRIVATE_WRITE)
#define IS_OT_ATOMIC(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
||(a)==Enums::OT_SHARED_ATOMIC \
||(a)==Enums::OT_PRIVATE_ATOMIC \
||(a)==Enums::OT_SPILL_ATOMIC \
||(a)==Enums::OT_READONLY_ATOMIC \
||(a)==Enums::OT_BOTH_MEMFENCE \
||(a)==Enums::OT_FLAT_ATOMIC)
#define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
||(a)==Enums::OT_SPILL_ATOMIC \
||(a)==Enums::OT_READONLY_ATOMIC \
||(a)==Enums::OT_GLOBAL_MEMFENCE \
||(a)==Enums::OT_BOTH_MEMFENCE)
#define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
||(a)==Enums::OT_SHARED_MEMFENCE)
#define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)
#define IS_OT_HIST(a) ((a)==Enums::OT_GLOBAL_HIST \
||(a)==Enums::OT_SHARED_HIST \
||(a)==Enums::OT_PRIVATE_HIST \
||(a)==Enums::OT_SPILL_HIST \
||(a)==Enums::OT_READONLY_HIST \
||(a)==Enums::OT_FLAT_HIST)
#define IS_OT_HIST_GM(a) ((a)==Enums::OT_GLOBAL_HIST \
||(a)==Enums::OT_SPILL_HIST \
||(a)==Enums::OT_READONLY_HIST)
#define IS_OT_HIST_LM(a) ((a)==Enums::OT_SHARED_HIST)
#define IS_OT_HIST_PM(a) ((a)==Enums::OT_PRIVATE_HIST)
#endif // __CODE_ENUMS_HH__

View file

@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
_masterId(p->system->getMasterId(name() + ".ComputeUnit")),
lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize)
lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize),
kernelLaunchInst(new KernelLaunchStaticInst())
{
/**
* This check is necessary because std::bitset only provides conversion
@ -316,13 +317,11 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
// Send L1 cache acquire
// isKernel + isAcquire = Kernel Begin
if (shader->impl_kern_boundary_sync) {
GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(this,
nullptr,
nullptr, 0);
GPUDynInstPtr gpuDynInst =
std::make_shared<GPUDynInst>(this, nullptr, kernelLaunchInst,
getAndIncSeqNum());
gpuDynInst->useContinuation = false;
gpuDynInst->memoryOrder = Enums::MEMORY_ORDER_SC_ACQUIRE;
gpuDynInst->scope = Enums::MEMORY_SCOPE_SYSTEM;
injectGlobalMemFence(gpuDynInst, true);
}
@ -647,7 +646,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
gpuDynInst->wfSlotId, w->barrierCnt);
if (gpuDynInst->useContinuation) {
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@ -658,7 +657,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
return true;
} else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
if (gpuDynInst->useContinuation) {
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@ -942,6 +941,8 @@ void
ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
Request* req)
{
assert(gpuDynInst->isGlobalSeg());
if (!req) {
req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
}
@ -950,8 +951,6 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
req->setFlags(Request::KERNEL);
}
gpuDynInst->s_type = SEG_GLOBAL;
// for non-kernel MemFence operations, memorder flags are set depending
// on which type of request is currently being sent, so this
// should be set by the caller (e.g. if an inst has acq-rel
@ -1033,8 +1032,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
gpuDynInst->statusVector.clear();
if (gpuDynInst->m_op == Enums::MO_LD || MO_A(gpuDynInst->m_op)
|| MO_ANR(gpuDynInst->m_op)) {
if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy());
compute_unit->globalMemoryPipe.getGMLdRespFIFO()
@ -1055,7 +1053,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
// the continuation may generate more work for
// this memory request
if (gpuDynInst->useContinuation) {
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@ -1065,7 +1063,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
gpuDynInst->statusBitVector = VectorMask(0);
if (gpuDynInst->useContinuation) {
assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}

View file

@ -744,6 +744,7 @@ class ComputeUnit : public MemObject
private:
uint64_t globalSeqNum;
int wavefrontSize;
GPUStaticInst *kernelLaunchInst;
};
#endif // __COMPUTE_UNIT_HH__

View file

@ -67,7 +67,7 @@ GlobalMemPipeline::exec()
bool accessVrf = true;
// check the VRF to see if the operands of a load (or load component
// of an atomic) are accessible
if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
if ((m) && (m->isLoad() || m->isAtomicRet())) {
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
accessVrf =
@ -127,10 +127,7 @@ GlobalMemPipeline::exec()
// memory packets to DTLB
if (!gmIssuedRequests.empty()) {
GPUDynInstPtr mp = gmIssuedRequests.front();
if (mp->m_op == Enums::MO_LD ||
(mp->m_op >= Enums::MO_AAND && mp->m_op <= Enums::MO_AMIN) ||
(mp->m_op >= Enums::MO_ANRAND && mp->m_op <= Enums::MO_ANRMIN)) {
if (mp->isLoad() || mp->isAtomic()) {
if (inflightLoads >= gmQueueSize) {
return;
} else {
@ -139,7 +136,7 @@ GlobalMemPipeline::exec()
} else {
if (inflightStores >= gmQueueSize) {
return;
} else if (mp->m_op == Enums::MO_ST) {
} else if (mp->isStore()) {
++inflightStores;
}
}
@ -147,9 +144,8 @@ GlobalMemPipeline::exec()
mp->initiateAcc(mp);
gmIssuedRequests.pop();
DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = %s\n",
computeUnit->cu_id, mp->simdId, mp->wfSlotId,
Enums::MemOpTypeStrings[mp->m_op]);
DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
computeUnit->cu_id, mp->simdId, mp->wfSlotId);
}
}
@ -160,12 +156,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
// Return data to registers
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
if (m->isLoad() || m->isAtomic()) {
gmReturnedLoads.pop();
assert(inflightLoads > 0);
--inflightLoads;
if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
if (m->isLoad() || m->isAtomicRet()) {
std::vector<uint32_t> regVec;
// iterate over number of destination register operands since
// this is a load or atomic operation
@ -214,13 +210,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
// Decrement outstanding register count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
MO_H(m->m_op)) {
if (m->isStore() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time,
-1);
}
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
if (m->isLoad() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time,
-1);
}

View file

@ -41,11 +41,10 @@
#include "gpu-compute/wavefront.hh"
GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
GPUStaticInst *_staticInst, uint64_t instSeqNum)
GPUStaticInst *static_inst, uint64_t instSeqNum)
: GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
m_op(Enums::MO_UNDEF),
memoryOrder(Enums::MEMORY_ORDER_NONE), n_reg(0), useContinuation(false),
statusBitVector(0), staticInst(_staticInst), _seqNum(instSeqNum)
n_reg(0), useContinuation(false),
statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
{
tlbHitLevel.assign(computeUnit()->wfSize(), -1);
d_data = new uint8_t[computeUnit()->wfSize() * 16];
@ -68,77 +67,69 @@ GPUDynInst::~GPUDynInst()
}
void
GPUDynInst::execute()
GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
{
GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(cu, wf, staticInst,
_seqNum);
staticInst->execute(gpuDynInst);
_staticInst->execute(gpuDynInst);
}
int
GPUDynInst::numSrcRegOperands()
{
return staticInst->numSrcRegOperands();
return _staticInst->numSrcRegOperands();
}
int
GPUDynInst::numDstRegOperands()
{
return staticInst->numDstRegOperands();
return _staticInst->numDstRegOperands();
}
int
GPUDynInst::getNumOperands()
{
return staticInst->getNumOperands();
return _staticInst->getNumOperands();
}
bool
GPUDynInst::isVectorRegister(int operandIdx)
{
return staticInst->isVectorRegister(operandIdx);
return _staticInst->isVectorRegister(operandIdx);
}
bool
GPUDynInst::isScalarRegister(int operandIdx)
{
return staticInst->isScalarRegister(operandIdx);
return _staticInst->isScalarRegister(operandIdx);
}
int
GPUDynInst::getRegisterIndex(int operandIdx)
{
return staticInst->getRegisterIndex(operandIdx);
return _staticInst->getRegisterIndex(operandIdx);
}
int
GPUDynInst::getOperandSize(int operandIdx)
{
return staticInst->getOperandSize(operandIdx);
return _staticInst->getOperandSize(operandIdx);
}
bool
GPUDynInst::isDstOperand(int operandIdx)
{
return staticInst->isDstOperand(operandIdx);
return _staticInst->isDstOperand(operandIdx);
}
bool
GPUDynInst::isSrcOperand(int operandIdx)
{
return staticInst->isSrcOperand(operandIdx);
}
bool
GPUDynInst::isArgLoad()
{
return staticInst->isArgLoad();
return _staticInst->isSrcOperand(operandIdx);
}
const std::string&
GPUDynInst::disassemble() const
{
return staticInst->disassemble();
return _staticInst->disassemble();
}
uint64_t
@ -147,16 +138,10 @@ GPUDynInst::seqNum() const
return _seqNum;
}
Enums::OpType
GPUDynInst::opType()
{
return staticInst->o_type;
}
Enums::StorageClassType
GPUDynInst::executedAs()
{
return staticInst->executed_as;
return _staticInst->executed_as;
}
// Process a memory instruction and (if necessary) submit timing request
@ -166,20 +151,347 @@ GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
cu->cu_id, simdId, wfSlotId, exec_mask);
staticInst->initiateAcc(gpuDynInst);
_staticInst->initiateAcc(gpuDynInst);
time = 0;
}
/**
* accessor methods for the attributes of
* the underlying GPU static instruction
*/
bool
GPUDynInst::scalarOp() const
GPUDynInst::isALU() const
{
return staticInst->scalarOp();
return _staticInst->isALU();
}
bool
GPUDynInst::isBranch() const
{
return _staticInst->isBranch();
}
bool
GPUDynInst::isNop() const
{
return _staticInst->isNop();
}
bool
GPUDynInst::isReturn() const
{
return _staticInst->isReturn();
}
bool
GPUDynInst::isUnconditionalJump() const
{
return _staticInst->isUnconditionalJump();
}
bool
GPUDynInst::isSpecialOp() const
{
return _staticInst->isSpecialOp();
}
bool
GPUDynInst::isWaitcnt() const
{
return _staticInst->isWaitcnt();
}
bool
GPUDynInst::isBarrier() const
{
return _staticInst->isBarrier();
}
bool
GPUDynInst::isMemFence() const
{
return _staticInst->isMemFence();
}
bool
GPUDynInst::isMemRef() const
{
return _staticInst->isMemRef();
}
bool
GPUDynInst::isFlat() const
{
return _staticInst->isFlat();
}
bool
GPUDynInst::isLoad() const
{
return _staticInst->isLoad();
}
bool
GPUDynInst::isStore() const
{
return _staticInst->isStore();
}
bool
GPUDynInst::isAtomic() const
{
return _staticInst->isAtomic();
}
bool
GPUDynInst::isAtomicNoRet() const
{
return _staticInst->isAtomicNoRet();
}
bool
GPUDynInst::isAtomicRet() const
{
return _staticInst->isAtomicRet();
}
bool
GPUDynInst::isScalar() const
{
return _staticInst->isScalar();
}
bool
GPUDynInst::readsSCC() const
{
return _staticInst->readsSCC();
}
bool
GPUDynInst::writesSCC() const
{
return _staticInst->writesSCC();
}
bool
GPUDynInst::readsVCC() const
{
return _staticInst->readsVCC();
}
bool
GPUDynInst::writesVCC() const
{
return _staticInst->writesVCC();
}
bool
GPUDynInst::isAtomicAnd() const
{
return _staticInst->isAtomicAnd();
}
bool
GPUDynInst::isAtomicOr() const
{
return _staticInst->isAtomicOr();
}
bool
GPUDynInst::isAtomicXor() const
{
return _staticInst->isAtomicXor();
}
bool
GPUDynInst::isAtomicCAS() const
{
return _staticInst->isAtomicCAS();
}
bool GPUDynInst::isAtomicExch() const
{
return _staticInst->isAtomicExch();
}
bool
GPUDynInst::isAtomicAdd() const
{
return _staticInst->isAtomicAdd();
}
bool
GPUDynInst::isAtomicSub() const
{
return _staticInst->isAtomicSub();
}
bool
GPUDynInst::isAtomicInc() const
{
return _staticInst->isAtomicInc();
}
bool
GPUDynInst::isAtomicDec() const
{
return _staticInst->isAtomicDec();
}
bool
GPUDynInst::isAtomicMax() const
{
return _staticInst->isAtomicMax();
}
bool
GPUDynInst::isAtomicMin() const
{
return _staticInst->isAtomicMin();
}
bool
GPUDynInst::isArgLoad() const
{
return _staticInst->isArgLoad();
}
bool
GPUDynInst::isGlobalMem() const
{
return _staticInst->isGlobalMem();
}
bool
GPUDynInst::isLocalMem() const
{
return _staticInst->isLocalMem();
}
bool
GPUDynInst::isArgSeg() const
{
return _staticInst->isArgSeg();
}
bool
GPUDynInst::isGlobalSeg() const
{
return _staticInst->isGlobalSeg();
}
bool
GPUDynInst::isGroupSeg() const
{
return _staticInst->isGroupSeg();
}
bool
GPUDynInst::isKernArgSeg() const
{
return _staticInst->isKernArgSeg();
}
bool
GPUDynInst::isPrivateSeg() const
{
return _staticInst->isPrivateSeg();
}
bool
GPUDynInst::isReadOnlySeg() const
{
return _staticInst->isReadOnlySeg();
}
bool
GPUDynInst::isSpillSeg() const
{
return _staticInst->isSpillSeg();
}
bool
GPUDynInst::isWorkitemScope() const
{
return _staticInst->isWorkitemScope();
}
bool
GPUDynInst::isWavefrontScope() const
{
return _staticInst->isWavefrontScope();
}
bool
GPUDynInst::isWorkgroupScope() const
{
return _staticInst->isWorkgroupScope();
}
bool
GPUDynInst::isDeviceScope() const
{
return _staticInst->isDeviceScope();
}
bool
GPUDynInst::isSystemScope() const
{
return _staticInst->isSystemScope();
}
bool
GPUDynInst::isNoScope() const
{
return _staticInst->isNoScope();
}
bool
GPUDynInst::isRelaxedOrder() const
{
return _staticInst->isRelaxedOrder();
}
bool
GPUDynInst::isAcquire() const
{
return _staticInst->isAcquire();
}
bool
GPUDynInst::isRelease() const
{
return _staticInst->isRelease();
}
bool
GPUDynInst::isAcquireRelease() const
{
return _staticInst->isAcquireRelease();
}
bool
GPUDynInst::isNoOrder() const
{
return _staticInst->isNoOrder();
}
bool
GPUDynInst::isGloballyCoherent() const
{
return _staticInst->isGloballyCoherent();
}
bool
GPUDynInst::isSystemCoherent() const
{
return _staticInst->isSystemCoherent();
}
void
GPUDynInst::updateStats()
{
if (staticInst->isLocalMem()) {
if (_staticInst->isLocalMem()) {
// access to LDS (shared) memory
cu->dynamicLMemInstrCnt++;
} else {

View file

@ -39,11 +39,7 @@
#include <cstdint>
#include <string>
#include "enums/GenericMemoryOrder.hh"
#include "enums/GenericMemoryScope.hh"
#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
#include "enums/OpType.hh"
#include "enums/StorageClassType.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_exec_context.hh"
@ -180,33 +176,19 @@ class AtomicOpMin : public TypedAtomicOpFunctor<T>
}
};
#define MO_A(a) ((a)>=Enums::MO_AAND && (a)<=Enums::MO_AMIN)
#define MO_ANR(a) ((a)>=Enums::MO_ANRAND && (a)<=Enums::MO_ANRMIN)
#define MO_H(a) ((a)>=Enums::MO_HAND && (a)<=Enums::MO_HMIN)
typedef enum
{
VT_32,
VT_64,
} vgpr_type;
typedef enum
{
SEG_PRIVATE,
SEG_SPILL,
SEG_GLOBAL,
SEG_SHARED,
SEG_READONLY,
SEG_FLAT
} seg_type;
class GPUDynInst : public GPUExecContext
{
public:
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *_staticInst,
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
uint64_t instSeqNum);
~GPUDynInst();
void execute();
void execute(GPUDynInstPtr gpuDynInst);
int numSrcRegOperands();
int numDstRegOperands();
int getNumOperands();
@ -216,13 +198,11 @@ class GPUDynInst : public GPUExecContext
int getOperandSize(int operandIdx);
bool isDstOperand(int operandIdx);
bool isSrcOperand(int operandIdx);
bool isArgLoad();
const std::string &disassemble() const;
uint64_t seqNum() const;
Enums::OpType opType();
Enums::StorageClassType executedAs();
// The address of the memory operation
@ -240,14 +220,7 @@ class GPUDynInst : public GPUExecContext
// The memory type (M_U32, M_S32, ...)
Enums::MemType m_type;
// The memory operation (MO_LD, MO_ST, ...)
Enums::MemOpType m_op;
Enums::GenericMemoryOrder memoryOrder;
// Scope of the request
Enums::GenericMemoryScope scope;
// The memory segment (SEG_SHARED, SEG_GLOBAL, ...)
seg_type s_type;
// The equivalency class
int equiv;
// The return VGPR type (VT_32 or VT_64)
@ -288,10 +261,72 @@ class GPUDynInst : public GPUExecContext
void updateStats();
GPUStaticInst* staticInstruction() { return staticInst; }
GPUStaticInst* staticInstruction() { return _staticInst; }
// Is the instruction a scalar or vector op?
bool scalarOp() const;
bool isALU() const;
bool isBranch() const;
bool isNop() const;
bool isReturn() const;
bool isUnconditionalJump() const;
bool isSpecialOp() const;
bool isWaitcnt() const;
bool isBarrier() const;
bool isMemFence() const;
bool isMemRef() const;
bool isFlat() const;
bool isLoad() const;
bool isStore() const;
bool isAtomic() const;
bool isAtomicNoRet() const;
bool isAtomicRet() const;
bool isScalar() const;
bool readsSCC() const;
bool writesSCC() const;
bool readsVCC() const;
bool writesVCC() const;
bool isAtomicAnd() const;
bool isAtomicOr() const;
bool isAtomicXor() const;
bool isAtomicCAS() const;
bool isAtomicExch() const;
bool isAtomicAdd() const;
bool isAtomicSub() const;
bool isAtomicInc() const;
bool isAtomicDec() const;
bool isAtomicMax() const;
bool isAtomicMin() const;
bool isArgLoad() const;
bool isGlobalMem() const;
bool isLocalMem() const;
bool isArgSeg() const;
bool isGlobalSeg() const;
bool isGroupSeg() const;
bool isKernArgSeg() const;
bool isPrivateSeg() const;
bool isReadOnlySeg() const;
bool isSpillSeg() const;
bool isWorkitemScope() const;
bool isWavefrontScope() const;
bool isWorkgroupScope() const;
bool isDeviceScope() const;
bool isSystemScope() const;
bool isNoScope() const;
bool isRelaxedOrder() const;
bool isAcquire() const;
bool isRelease() const;
bool isAcquireRelease() const;
bool isNoOrder() const;
bool isGloballyCoherent() const;
bool isSystemCoherent() const;
/*
* Loads/stores/atomics may have acquire/release semantics associated
@ -312,46 +347,32 @@ class GPUDynInst : public GPUExecContext
bool useContinuation;
template<typename c0> AtomicOpFunctor*
makeAtomicOpFunctor(c0 *reg0, c0 *reg1, Enums::MemOpType op)
makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
{
using namespace Enums;
switch(op) {
case MO_AAND:
case MO_ANRAND:
if (isAtomicAnd()) {
return new AtomicOpAnd<c0>(*reg0);
case MO_AOR:
case MO_ANROR:
} else if (isAtomicOr()) {
return new AtomicOpOr<c0>(*reg0);
case MO_AXOR:
case MO_ANRXOR:
} else if (isAtomicXor()) {
return new AtomicOpXor<c0>(*reg0);
case MO_ACAS:
case MO_ANRCAS:
} else if (isAtomicCAS()) {
return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
case MO_AEXCH:
case MO_ANREXCH:
} else if (isAtomicExch()) {
return new AtomicOpExch<c0>(*reg0);
case MO_AADD:
case MO_ANRADD:
} else if (isAtomicAdd()) {
return new AtomicOpAdd<c0>(*reg0);
case MO_ASUB:
case MO_ANRSUB:
} else if (isAtomicSub()) {
return new AtomicOpSub<c0>(*reg0);
case MO_AINC:
case MO_ANRINC:
} else if (isAtomicInc()) {
return new AtomicOpInc<c0>();
case MO_ADEC:
case MO_ANRDEC:
} else if (isAtomicDec()) {
return new AtomicOpDec<c0>();
case MO_AMAX:
case MO_ANRMAX:
} else if (isAtomicMax()) {
return new AtomicOpMax<c0>(*reg0);
case MO_AMIN:
case MO_ANRMIN:
} else if (isAtomicMin()) {
return new AtomicOpMin<c0>(*reg0);
default:
panic("Unrecognized atomic operation");
} else {
fatal("Unrecognized atomic operation");
}
}
@ -359,88 +380,58 @@ class GPUDynInst : public GPUExecContext
setRequestFlags(Request *req, bool setMemOrder=true)
{
// currently these are the easy scopes to deduce
switch (s_type) {
case SEG_PRIVATE:
if (isPrivateSeg()) {
req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
break;
case SEG_SPILL:
} else if (isSpillSeg()) {
req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
break;
case SEG_GLOBAL:
} else if (isGlobalSeg()) {
req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
break;
case SEG_READONLY:
} else if (isReadOnlySeg()) {
req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
break;
case SEG_SHARED:
} else if (isGroupSeg()) {
req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
break;
case SEG_FLAT:
} else if (isFlat()) {
// TODO: translate to correct scope
assert(false);
default:
panic("Bad segment type");
break;
} else {
fatal("%s has bad segment type\n", disassemble());
}
switch (scope) {
case Enums::MEMORY_SCOPE_NONE:
case Enums::MEMORY_SCOPE_WORKITEM:
break;
case Enums::MEMORY_SCOPE_WAVEFRONT:
if (isWavefrontScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::WAVEFRONT_SCOPE);
break;
case Enums::MEMORY_SCOPE_WORKGROUP:
} else if (isWorkgroupScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::WORKGROUP_SCOPE);
break;
case Enums::MEMORY_SCOPE_DEVICE:
} else if (isDeviceScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::DEVICE_SCOPE);
break;
case Enums::MEMORY_SCOPE_SYSTEM:
} else if (isSystemScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::SYSTEM_SCOPE);
break;
default:
panic("Bad scope type");
break;
} else if (!isNoScope() && !isWorkitemScope()) {
fatal("%s has bad scope type\n", disassemble());
}
if (setMemOrder) {
// set acquire and release flags
switch (memoryOrder){
case Enums::MEMORY_ORDER_SC_ACQUIRE:
if (isAcquire()) {
req->setFlags(Request::ACQUIRE);
break;
case Enums::MEMORY_ORDER_SC_RELEASE:
} else if (isRelease()) {
req->setFlags(Request::RELEASE);
break;
case Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE:
} else if (isAcquireRelease()) {
req->setFlags(Request::ACQUIRE | Request::RELEASE);
break;
default:
break;
} else if (!isNoOrder()) {
fatal("%s has bad memory order\n", disassemble());
}
}
// set atomic type
// currently, the instruction genenerator only produces atomic return
// but a magic instruction can produce atomic no return
if (m_op == Enums::MO_AADD || m_op == Enums::MO_ASUB ||
m_op == Enums::MO_AAND || m_op == Enums::MO_AOR ||
m_op == Enums::MO_AXOR || m_op == Enums::MO_AMAX ||
m_op == Enums::MO_AMIN || m_op == Enums::MO_AINC ||
m_op == Enums::MO_ADEC || m_op == Enums::MO_AEXCH ||
m_op == Enums::MO_ACAS) {
if (isAtomicRet()) {
req->setFlags(Request::ATOMIC_RETURN_OP);
} else if (m_op == Enums::MO_ANRADD || m_op == Enums::MO_ANRSUB ||
m_op == Enums::MO_ANRAND || m_op == Enums::MO_ANROR ||
m_op == Enums::MO_ANRXOR || m_op == Enums::MO_ANRMAX ||
m_op == Enums::MO_ANRMIN || m_op == Enums::MO_ANRINC ||
m_op == Enums::MO_ANRDEC || m_op == Enums::MO_ANREXCH ||
m_op == Enums::MO_ANRCAS) {
} else if (isAtomicNoRet()) {
req->setFlags(Request::ATOMIC_NO_RETURN_OP);
}
}
@ -457,7 +448,7 @@ class GPUDynInst : public GPUExecContext
std::vector<int> tlbHitLevel;
private:
GPUStaticInst *staticInst;
GPUStaticInst *_staticInst;
uint64_t _seqNum;
};

View file

@ -36,10 +36,12 @@
#include "gpu-compute/gpu_static_inst.hh"
GPUStaticInst::GPUStaticInst(const std::string &opcode)
: o_type(Enums::OT_ALU), executed_as(Enums::SC_NONE), opcode(opcode),
_instNum(0), _scalarOp(false)
: executed_as(Enums::SC_NONE), opcode(opcode),
_instNum(0)
{
setFlag(NoOrder);
}
const std::string&
GPUStaticInst::disassemble()
{

View file

@ -48,7 +48,7 @@
#include <cstdint>
#include <string>
#include "enums/OpType.hh"
#include "enums/GPUStaticInstFlags.hh"
#include "enums/StorageClassType.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/misc.hh"
@ -57,7 +57,7 @@ class BaseOperand;
class BaseRegOperand;
class Wavefront;
class GPUStaticInst
class GPUStaticInst : public GPUStaticInstFlags
{
public:
GPUStaticInst(const std::string &opcode);
@ -86,22 +86,110 @@ class GPUStaticInst
virtual bool isValid() const = 0;
/*
* Most instructions (including all HSAIL instructions)
* are vector ops, so _scalarOp will be false by default.
* Derived instruction objects that are scalar ops must
* set _scalarOp to true in their constructors.
*/
bool scalarOp() const { return _scalarOp; }
bool isALU() const { return _flags[ALU]; }
bool isBranch() const { return _flags[Branch]; }
bool isNop() const { return _flags[Nop]; }
bool isReturn() const { return _flags[Return]; }
virtual bool isLocalMem() const
bool
isUnconditionalJump() const
{
fatal("calling isLocalMem() on non-memory instruction.\n");
return false;
return _flags[UnconditionalJump];
}
bool isArgLoad() { return false; }
bool isSpecialOp() const { return _flags[SpecialOp]; }
bool isWaitcnt() const { return _flags[Waitcnt]; }
bool isBarrier() const { return _flags[MemBarrier]; }
bool isMemFence() const { return _flags[MemFence]; }
bool isMemRef() const { return _flags[MemoryRef]; }
bool isFlat() const { return _flags[Flat]; }
bool isLoad() const { return _flags[Load]; }
bool isStore() const { return _flags[Store]; }
bool
isAtomic() const
{
return _flags[AtomicReturn] || _flags[AtomicNoReturn];
}
bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
bool isAtomicRet() const { return _flags[AtomicReturn]; }
bool isScalar() const { return _flags[Scalar]; }
bool readsSCC() const { return _flags[ReadsSCC]; }
bool writesSCC() const { return _flags[WritesSCC]; }
bool readsVCC() const { return _flags[ReadsVCC]; }
bool writesVCC() const { return _flags[WritesVCC]; }
bool isAtomicAnd() const { return _flags[AtomicAnd]; }
bool isAtomicOr() const { return _flags[AtomicOr]; }
bool isAtomicXor() const { return _flags[AtomicXor]; }
bool isAtomicCAS() const { return _flags[AtomicCAS]; }
bool isAtomicExch() const { return _flags[AtomicExch]; }
bool isAtomicAdd() const { return _flags[AtomicAdd]; }
bool isAtomicSub() const { return _flags[AtomicSub]; }
bool isAtomicInc() const { return _flags[AtomicInc]; }
bool isAtomicDec() const { return _flags[AtomicDec]; }
bool isAtomicMax() const { return _flags[AtomicMax]; }
bool isAtomicMin() const { return _flags[AtomicMin]; }
bool
isArgLoad() const
{
return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
}
bool
isGlobalMem() const
{
return _flags[MemoryRef] && (_flags[GlobalSegment] ||
_flags[PrivateSegment] || _flags[ReadOnlySegment] ||
_flags[SpillSegment]);
}
bool
isLocalMem() const
{
return _flags[MemoryRef] && _flags[GroupSegment];
}
bool isArgSeg() const { return _flags[ArgSegment]; }
bool isGlobalSeg() const { return _flags[GlobalSegment]; }
bool isGroupSeg() const { return _flags[GroupSegment]; }
bool isKernArgSeg() const { return _flags[KernArgSegment]; }
bool isPrivateSeg() const { return _flags[PrivateSegment]; }
bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
bool isSpillSeg() const { return _flags[SpillSegment]; }
bool isWorkitemScope() const { return _flags[WorkitemScope]; }
bool isWavefrontScope() const { return _flags[WavefrontScope]; }
bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
bool isDeviceScope() const { return _flags[DeviceScope]; }
bool isSystemScope() const { return _flags[SystemScope]; }
bool isNoScope() const { return _flags[NoScope]; }
bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
bool isAcquire() const { return _flags[Acquire]; }
bool isRelease() const { return _flags[Release]; }
bool isAcquireRelease() const { return _flags[AcquireRelease]; }
bool isNoOrder() const { return _flags[NoOrder]; }
/**
* Coherence domain of a memory instruction. Only valid for
* machine ISA. The coherence domain specifies where it is
* possible to perform memory synchronization, e.g., acquire
* or release, from the shader kernel.
*
* isGloballyCoherent(): returns true if kernel is sharing memory
* with other work-items on the same device (GPU)
*
* isSystemCoherent(): returns true if kernel is sharing memory
* with other work-items on a different device (GPU) or the host (CPU)
*/
bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
bool isSystemCoherent() const { return _flags[SystemCoherent]; }
virtual uint32_t instSize() = 0;
// only used for memory instructions
@ -120,22 +208,13 @@ class GPUStaticInst
virtual uint32_t getTargetPc() { return 0; }
/**
* Query whether the instruction is an unconditional jump i.e., the jump
* is always executed because there is no condition to be evaluated.
*
* If the instruction is not of branch type, the result is always false.
*
* @return True if the instruction is an unconditional jump.
*/
virtual bool unconditionalJumpInstruction() { return false; }
static uint64_t dynamic_id_count;
Enums::OpType o_type;
// For flat memory accesses
Enums::StorageClassType executed_as;
void setFlag(Flags flag) { _flags[flag] = true; }
protected:
virtual void
execLdAcq(GPUDynInstPtr gpuDynInst)
@ -169,7 +248,45 @@ class GPUStaticInst
*/
int _ipdInstNum;
bool _scalarOp;
std::bitset<Num_Flags> _flags;
};
class KernelLaunchStaticInst : public GPUStaticInst
{
public:
KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
{
setFlag(Nop);
setFlag(Scalar);
setFlag(Acquire);
setFlag(SystemScope);
setFlag(GlobalSegment);
}
void
execute(GPUDynInstPtr gpuDynInst)
{
fatal("kernel launch instruction should not be executed\n");
}
void
generateDisassembly()
{
disassembly = opcode;
}
int getNumOperands() { return 0; }
bool isCondRegister(int operandIndex) { return false; }
bool isScalarRegister(int operandIndex) { return false; }
bool isVectorRegister(int operandIndex) { return false; }
bool isSrcOperand(int operandIndex) { return false; }
bool isDstOperand(int operandIndex) { return false; }
int getOperandSize(int operandIndex) { return 0; }
int getRegisterIndex(int operandIndex) { return 0; }
int numDstRegOperands() { return 0; }
int numSrcRegOperands() { return 0; }
bool isValid() const { return true; }
uint32_t instSize() { return 0; }
};
#endif // __GPU_STATIC_INST_HH__

View file

@ -104,7 +104,7 @@ ControlFlowInfo::createBasicBlocks()
leaders.insert(0);
for (int i = 1; i < instructions.size(); i++) {
GPUStaticInst* instruction = instructions[i];
if (instruction->o_type == Enums::OT_BRANCH) {
if (instruction->isBranch()) {
const int target_pc = instruction->getTargetPc();
leaders.insert(target_pc);
leaders.insert(i + 1);
@ -137,18 +137,18 @@ ControlFlowInfo::connectBasicBlocks()
break;
}
GPUStaticInst* last = lastInstruction(bb.get());
if (last->o_type == Enums::OT_RET) {
if (last->isReturn()) {
bb->successorIds.insert(exit_bb->id);
continue;
}
if (last->o_type == Enums::OT_BRANCH) {
if (last->isBranch()) {
const uint32_t target_pc = last->getTargetPc();
BasicBlock* target_bb = basicBlock(target_pc);
bb->successorIds.insert(target_bb->id);
}
// Unconditional jump instructions have a unique successor
if (!last->unconditionalJumpInstruction()) {
if (!last->isUnconditionalJump()) {
BasicBlock* next_bb = basicBlock(last->instNum() + 1);
bb->successorIds.insert(next_bb->id);
}
@ -274,7 +274,7 @@ ControlFlowInfo::printBasicBlocks() const
int inst_num = inst->instNum();
std::cout << inst_num << " [" << basicBlock(inst_num)->id
<< "]: " << inst->disassemble();
if (inst->o_type == Enums::OT_BRANCH) {
if (inst->isBranch()) {
std::cout << ", PC = " << inst->getTargetPc();
}
std::cout << std::endl;

View file

@ -141,8 +141,7 @@ LdsState::countBankConflicts(GPUDynInstPtr gpuDynInst,
}
}
if (gpuDynInst->m_op == Enums::MO_LD ||
gpuDynInst->m_op == Enums::MO_ST) {
if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
// mask identical addresses
for (int j = 0; j < numBanks; ++j) {
for (int j0 = 0; j0 < j; j0++) {
@ -208,8 +207,8 @@ LdsState::processPacket(PacketPtr packet)
GPUDynInstPtr dynInst = getDynInstr(packet);
// account for the LDS bank conflict overhead
int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() :
(dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() :
int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
(dynInst->isStore()) ? parent->storeBusLength() :
parent->loadBusLength();
// delay for accessing the LDS
Tick processingTime =

View file

@ -43,7 +43,6 @@
#include <utility>
#include <vector>
#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
#include "gpu-compute/misc.hh"
#include "mem/mem_object.hh"

View file

@ -62,7 +62,7 @@ LocalMemPipeline::exec()
lmReturnedRequests.front() : nullptr;
bool accessVrf = true;
if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
if ((m) && (m->isLoad() || m->isAtomicRet())) {
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
accessVrf =
@ -137,7 +137,7 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
// Return data to registers
if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
if (m->isLoad() || m->isAtomicRet()) {
std::vector<uint32_t> regVec;
for (int k = 0; k < m->n_reg; ++k) {
int dst = m->dst_reg+k;
@ -172,13 +172,12 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
// Decrement outstanding request count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op)
|| MO_H(m->m_op)) {
if (m->isStore() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
m->time, -1);
}
if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
if (m->isLoad() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
m->time, -1);
}

View file

@ -47,7 +47,6 @@
#include "cpu/simple_thread.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_tlb.hh"

View file

@ -38,7 +38,6 @@
#include <string>
#include "base/misc.hh"
#include "gpu-compute/code_enums.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh"
@ -153,8 +152,8 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
void
VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w)
{
bool loadInstr = IS_OT_READ(ii->opType());
bool atomicInstr = IS_OT_ATOMIC(ii->opType());
bool loadInstr = ii->isLoad();
bool atomicInstr = ii->isAtomic() || ii->isMemFence();
bool loadNoArgInstr = loadInstr && !ii->isArgLoad();

View file

@ -37,7 +37,6 @@
#include "debug/GPUExec.hh"
#include "debug/WavefrontStack.hh"
#include "gpu-compute/code_enums.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh"
@ -165,19 +164,8 @@ Wavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr)
bool
Wavefront::isGmInstruction(GPUDynInstPtr ii)
{
if (IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
IS_OT_ATOMIC_PM(ii->opType())) {
if (ii->isGlobalMem() || ii->isFlat())
return true;
}
if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
IS_OT_ATOMIC_GM(ii->opType())) {
return true;
}
if (IS_OT_FLAT(ii->opType())) {
return true;
}
return false;
}
@ -185,8 +173,7 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
bool
Wavefront::isLmInstruction(GPUDynInstPtr ii)
{
if (IS_OT_READ_LM(ii->opType()) || IS_OT_WRITE_LM(ii->opType()) ||
IS_OT_ATOMIC_LM(ii->opType())) {
if (ii->isLocalMem()) {
return true;
}
@ -199,10 +186,9 @@ Wavefront::isOldestInstALU()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && (ii->opType() == Enums::OT_NOP ||
ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
ii->opType() == Enums::OT_KERN_READ)) {
if (status != S_STOPPED && (ii->isNop() ||
ii->isReturn() || ii->isBranch() ||
ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) {
return true;
}
@ -215,7 +201,7 @@ Wavefront::isOldestInstBarrier()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && ii->opType() == Enums::OT_BARRIER) {
if (status != S_STOPPED && ii->isBarrier()) {
return true;
}
@ -228,9 +214,7 @@ Wavefront::isOldestInstGMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && (IS_OT_READ_GM(ii->opType()) ||
IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
if (status != S_STOPPED && ii->isGlobalMem()) {
return true;
}
@ -243,9 +227,7 @@ Wavefront::isOldestInstLMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && (IS_OT_READ_LM(ii->opType()) ||
IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
if (status != S_STOPPED && ii->isLocalMem()) {
return true;
}
@ -258,9 +240,7 @@ Wavefront::isOldestInstPrivMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && (IS_OT_READ_PM(ii->opType()) ||
IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
if (status != S_STOPPED && ii->isPrivateSeg()) {
return true;
}
@ -273,8 +253,7 @@ Wavefront::isOldestInstFlatMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
if (status != S_STOPPED && IS_OT_FLAT(ii->opType())) {
if (status != S_STOPPED && ii->isFlat()) {
return true;
}
@ -289,7 +268,7 @@ Wavefront::instructionBufferHasBranch()
for (auto it : instructionBuffer) {
GPUDynInstPtr ii = it;
if (ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH) {
if (ii->isReturn() || ii->isBranch()) {
return true;
}
}
@ -371,23 +350,16 @@ Wavefront::ready(itype_e type)
// checking readiness will be fixed eventually. In the meantime, let's
// make sure that we do not silently let an instruction type slip
// through this logic and always return not ready.
if (!(ii->opType() == Enums::OT_BARRIER || ii->opType() == Enums::OT_NOP ||
ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
ii->opType() == Enums::OT_KERN_READ ||
ii->opType() == Enums::OT_ARG ||
IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_READ_LM(ii->opType()) ||
IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_FLAT(ii->opType()))) {
if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
ii->isMemFence() || ii->isFlat())) {
panic("next instruction: %s is of unknown type\n", ii->disassemble());
}
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
if (type == I_ALU && ii->opType() == Enums::OT_BARRIER) {
if (type == I_ALU && ii->isBarrier()) {
// Here for ALU instruction (barrier)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@ -400,7 +372,7 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
} else if (type == I_ALU && ii->opType() == Enums::OT_NOP) {
} else if (type == I_ALU && ii->isNop()) {
// Here for ALU instruction (nop)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@ -408,7 +380,7 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
} else if (type == I_ALU && ii->opType() == Enums::OT_RET) {
} else if (type == I_ALU && ii->isReturn()) {
// Here for ALU instruction (return)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@ -421,10 +393,10 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
} else if (type == I_ALU && (ii->opType() == Enums::OT_BRANCH ||
ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
ii->opType() == Enums::OT_KERN_READ ||
ii->opType() == Enums::OT_ARG)) {
} else if (type == I_ALU && (ii->isBranch() ||
ii->isALU() ||
(ii->isKernArgSeg() && ii->isLoad()) ||
ii->isArgSeg())) {
// Here for ALU instruction (all others)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is alu slot free?
@ -439,18 +411,16 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
} else if (type == I_GLOBAL && (IS_OT_READ_GM(ii->opType()) ||
IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
} else if (type == I_GLOBAL && ii->isGlobalMem()) {
// Here Global memory instruction
if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
// Are there in pipe or outstanding global memory write requests?
if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
return 0;
}
}
if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
IS_OT_HIST_GM(ii->opType())) {
if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
// Are there in pipe or outstanding global memory read requests?
if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
return 0;
@ -480,17 +450,15 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
} else if (type == I_SHARED && (IS_OT_READ_LM(ii->opType()) ||
IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
} else if (type == I_SHARED && ii->isLocalMem()) {
// Here for Shared memory instruction
if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
return 0;
}
}
if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
IS_OT_HIST_LM(ii->opType())) {
if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
return 0;
}
@ -519,47 +487,7 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
} else if (type == I_PRIVATE && (IS_OT_READ_PM(ii->opType()) ||
IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
// Here for Private memory instruction ------------------------ //
if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
return 0;
}
}
if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
IS_OT_HIST_PM(ii->opType())) {
if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
return 0;
}
}
if (!glbMemBusRdy) {
// Is there an available VRF->Global memory read bus?
return 0;
}
if (!glbMemIssueRdy) {
// Is wave slot free?
return 0;
}
if (!computeUnit->globalMemoryPipe.
isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
// Can we insert a new request to the Global Mem Request FIFO?
return 0;
}
// can we schedule source & destination operands on the VRF?
if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
VrfAccessType::RD_WR)) {
return 0;
}
if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
return 0;
}
ready_inst = true;
} else if (type == I_FLAT && IS_OT_FLAT(ii->opType())) {
} else if (type == I_FLAT && ii->isFlat()) {
if (!glbMemBusRdy) {
// Is there an available VRF->Global memory read bus?
return 0;
@ -618,23 +546,22 @@ Wavefront::updateResources()
assert(ii);
computeUnit->vrf[simdId]->updateResources(this, ii);
// Single precision ALU or Branch or Return or Special instruction
if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
if (ii->isALU() || ii->isSpecialOp() ||
ii->isBranch() ||
// FIXME: Kernel argument loads are currently treated as ALU operations
// since we don't send memory packets at execution. If we fix that then
// we should map them to one of the memory pipelines
ii->opType()==Enums::OT_KERN_READ ||
ii->opType()==Enums::OT_ARG ||
ii->opType()==Enums::OT_RET) {
(ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
ii->isReturn()) {
computeUnit->aluPipe[simdId].preset(computeUnit->shader->
ticks(computeUnit->spBypassLength()));
// this is to enforce a fixed number of cycles per issue slot per SIMD
computeUnit->wfWait[simdId].preset(computeUnit->shader->
ticks(computeUnit->issuePeriod));
} else if (ii->opType() == Enums::OT_BARRIER) {
} else if (ii->isBarrier()) {
computeUnit->wfWait[simdId].preset(computeUnit->shader->
ticks(computeUnit->issuePeriod));
} else if (ii->opType() == Enums::OT_FLAT_READ) {
} else if (ii->isLoad() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
memReqsInPipe++;
rdGmReqsInPipe++;
@ -649,7 +576,7 @@ Wavefront::updateResources()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
} else if (ii->opType() == Enums::OT_FLAT_WRITE) {
} else if (ii->isStore() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
memReqsInPipe++;
wrGmReqsInPipe++;
@ -664,21 +591,21 @@ Wavefront::updateResources()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
} else if (IS_OT_READ_GM(ii->opType())) {
} else if (ii->isLoad() && ii->isGlobalMem()) {
memReqsInPipe++;
rdGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_GM(ii->opType())) {
} else if (ii->isStore() && ii->isGlobalMem()) {
memReqsInPipe++;
wrGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_GM(ii->opType())) {
} else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
memReqsInPipe++;
wrGmReqsInPipe++;
rdGmReqsInPipe++;
@ -686,21 +613,21 @@ Wavefront::updateResources()
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_READ_LM(ii->opType())) {
} else if (ii->isLoad() && ii->isLocalMem()) {
memReqsInPipe++;
rdLmReqsInPipe++;
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_LM(ii->opType())) {
} else if (ii->isStore() && ii->isLocalMem()) {
memReqsInPipe++;
wrLmReqsInPipe++;
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_LM(ii->opType())) {
} else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
memReqsInPipe++;
wrLmReqsInPipe++;
rdLmReqsInPipe++;
@ -708,28 +635,6 @@ Wavefront::updateResources()
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_READ_PM(ii->opType())) {
memReqsInPipe++;
rdGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_PM(ii->opType())) {
memReqsInPipe++;
wrGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_PM(ii->opType())) {
memReqsInPipe++;
wrGmReqsInPipe++;
rdGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
}
@ -751,7 +656,7 @@ Wavefront::exec()
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
"(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
ii->disassemble(), old_pc);
ii->execute();
ii->execute(ii);
// access the VRF
computeUnit->vrf[simdId]->exec(ii, this);
srcRegOpDist.sample(ii->numSrcRegOperands());
@ -785,24 +690,24 @@ Wavefront::exec()
// ---- Update Vector ALU pipeline and other resources ------------------ //
// Single precision ALU or Branch or Return or Special instruction
if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
if (ii->isALU() || ii->isSpecialOp() ||
ii->isBranch() ||
// FIXME: Kernel argument loads are currently treated as ALU operations
// since we don't send memory packets at execution. If we fix that then
// we should map them to one of the memory pipelines
ii->opType() == Enums::OT_KERN_READ ||
ii->opType() == Enums::OT_ARG ||
ii->opType() == Enums::OT_RET) {
(ii->isKernArgSeg() && ii->isLoad()) ||
ii->isArgSeg() ||
ii->isReturn()) {
computeUnit->aluPipe[simdId].set(computeUnit->shader->
ticks(computeUnit->spBypassLength()));
// this is to enforce a fixed number of cycles per issue slot per SIMD
computeUnit->wfWait[simdId].set(computeUnit->shader->
ticks(computeUnit->issuePeriod));
} else if (ii->opType() == Enums::OT_BARRIER) {
} else if (ii->isBarrier()) {
computeUnit->wfWait[simdId].set(computeUnit->shader->
ticks(computeUnit->issuePeriod));
} else if (ii->opType() == Enums::OT_FLAT_READ) {
} else if (ii->isLoad() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
if (Enums::SC_SHARED == ii->executedAs()) {
@ -816,7 +721,7 @@ Wavefront::exec()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
} else if (ii->opType() == Enums::OT_FLAT_WRITE) {
} else if (ii->isStore() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
if (Enums::SC_SHARED == ii->executedAs()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
@ -829,32 +734,32 @@ Wavefront::exec()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
} else if (IS_OT_READ_GM(ii->opType())) {
} else if (ii->isLoad() && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_GM(ii->opType())) {
} else if (ii->isStore() && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_GM(ii->opType())) {
} else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_READ_LM(ii->opType())) {
} else if (ii->isLoad() && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_WRITE_LM(ii->opType())) {
} else if (ii->isStore() && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
} else if (IS_OT_ATOMIC_LM(ii->opType())) {
} else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].