gpu-compute: remove inst enums and use bit flag for attributes

this patch removes the GPUStaticInst enums that were defined in GPU.py. instead, a simple set of attribute flags that can be set in the base instruction class are used. this will help unify the attributes of HSAIL and machine ISA instructions within the model itself. because the static instrution now carries the attributes, a GPUDynInst must carry a pointer to a valid GPUStaticInst so a new static kernel launch instruction is added, which carries the attributes needed to perform a the kernel launch.
2016-10-26 22:47:11 -04:00 · 2016-10-26 22:47:11 -04:00 · 7ac38849ab
commit 7ac38849ab
parent e1ad8035a3
28 changed files with 1257 additions and 1116 deletions
--- a/src/arch/hsail/SConscript
+++ b/src/arch/hsail/SConscript
@ -43,7 +43,6 @@ if env['TARGET_GPU_ISA'] == 'hsail':
    env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
                'gen.py', '$SOURCE $TARGETS')
    Source('generic_types.cc')
    Source('gpu_decoder.cc')
    Source('insts/branch.cc')
    Source('insts/gen_exec.cc')
--- a/src/arch/hsail/generic_types.cc
+++ b/src/arch/hsail/generic_types.cc
@ -1,47 +0,0 @@
 #include "arch/hsail/generic_types.hh"
 #include "base/misc.hh"
 using namespace Brig;
 namespace HsailISA
 {
    Enums::GenericMemoryOrder
    getGenericMemoryOrder(BrigMemoryOrder brig_memory_order)
    {
        switch(brig_memory_order) {
          case BRIG_MEMORY_ORDER_NONE:
            return Enums::MEMORY_ORDER_NONE;
          case BRIG_MEMORY_ORDER_RELAXED:
            return Enums::MEMORY_ORDER_RELAXED;
          case BRIG_MEMORY_ORDER_SC_ACQUIRE:
            return Enums::MEMORY_ORDER_SC_ACQUIRE;
          case BRIG_MEMORY_ORDER_SC_RELEASE:
            return Enums::MEMORY_ORDER_SC_RELEASE;
          case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
            return Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE;
          default:
            fatal("HsailISA::MemInst::getGenericMemoryOrder -> ",
                  "bad BrigMemoryOrder\n");
        }
    }
    Enums::GenericMemoryScope
    getGenericMemoryScope(BrigMemoryScope brig_memory_scope)
    {
        switch(brig_memory_scope) {
          case BRIG_MEMORY_SCOPE_NONE:
            return Enums::MEMORY_SCOPE_NONE;
          case BRIG_MEMORY_SCOPE_WORKITEM:
            return Enums::MEMORY_SCOPE_WORKITEM;
          case BRIG_MEMORY_SCOPE_WORKGROUP:
            return Enums::MEMORY_SCOPE_WORKGROUP;
          case BRIG_MEMORY_SCOPE_AGENT:
            return Enums::MEMORY_SCOPE_DEVICE;
          case BRIG_MEMORY_SCOPE_SYSTEM:
            return Enums::MEMORY_SCOPE_SYSTEM;
          default:
            fatal("HsailISA::MemInst::getGenericMemoryScope -> ",
                  "bad BrigMemoryScope\n");
        }
    }
 } // namespace HsailISA
--- a/src/arch/hsail/generic_types.hh
+++ b/src/arch/hsail/generic_types.hh
@ -1,16 +0,0 @@
 #ifndef __ARCH_HSAIL_GENERIC_TYPES_HH__
 #define __ARCH_HSAIL_GENERIC_TYPES_HH__
 #include "arch/hsail/Brig.h"
 #include "enums/GenericMemoryOrder.hh"
 #include "enums/GenericMemoryScope.hh"
 namespace HsailISA
 {
    Enums::GenericMemoryOrder
    getGenericMemoryOrder(Brig::BrigMemoryOrder brig_memory_order);
    Enums::GenericMemoryScope
    getGenericMemoryScope(Brig::BrigMemoryScope brig_memory_scope);
 } // namespace HsailISA
 #endif // __ARCH_HSAIL_GENERIC_TYPES_HH__
--- a/src/arch/hsail/insts/branch.hh
+++ b/src/arch/hsail/insts/branch.hh
@ -59,16 +59,15 @@ namespace HsailISA
        BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
           : HsailGPUStaticInst(obj, "brn")
        {
-            o_type = Enums::OT_BRANCH;
+            setFlag(Branch);
            setFlag(UnconditionalJump);
            width = ((Brig::BrigInstBr*)ib)->width;
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            target.init(op_offs, obj);
            o_type = Enums::OT_BRANCH;
        }
        uint32_t getTargetPc()  override { return target.getTarget(0, 0); }
        bool unconditionalJumpInstruction() override { return true; }
        bool isVectorRegister(int operandIndex) override {
            assert(operandIndex >= 0 && operandIndex < getNumOperands());
            return target.isVectorRegister();
@ -175,13 +174,12 @@ namespace HsailISA
        CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
           : HsailGPUStaticInst(obj, "cbr")
        {
-            o_type = Enums::OT_BRANCH;
+            setFlag(Branch);
            width = ((Brig::BrigInstBr *)ib)->width;
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            cond.init(op_offs, obj);
            op_offs = obj->getOperandPtr(ib->operands, 1);
            target.init(op_offs, obj);
            o_type = Enums::OT_BRANCH;
        }
        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
@ -343,17 +341,15 @@ namespace HsailISA
        BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
           : HsailGPUStaticInst(obj, "br")
        {
-            o_type = Enums::OT_BRANCH;
+            setFlag(Branch);
            setFlag(UnconditionalJump);
            width.init(((Brig::BrigInstBr *)ib)->width, obj);
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            target.init(op_offs, obj);
            o_type = Enums::OT_BRANCH;
        }
        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
        bool unconditionalJumpInstruction() override { return true; }
        void execute(GPUDynInstPtr gpuDynInst) override;
        bool isVectorRegister(int operandIndex) override {
            assert(operandIndex >= 0 && operandIndex < getNumOperands());
--- a/src/arch/hsail/insts/decl.hh
+++ b/src/arch/hsail/insts/decl.hh
@ -38,11 +38,9 @@
 #include <cmath>
 #include "arch/hsail/generic_types.hh"
 #include "arch/hsail/insts/gpu_static_inst.hh"
 #include "arch/hsail/operand.hh"
 #include "debug/HSAIL.hh"
 #include "enums/OpType.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
 #include "gpu-compute/shader.hh"
@ -127,6 +125,8 @@ namespace HsailISA
                       const char *opcode)
            : HsailGPUStaticInst(obj, opcode)
        {
            setFlag(ALU);
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            dest.init(op_offs, obj);
@ -240,6 +240,8 @@ namespace HsailISA
                                      const char *opcode)
            : HsailGPUStaticInst(obj, opcode)
        {
            setFlag(ALU);
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            dest.init(op_offs, obj);
@ -414,6 +416,8 @@ namespace HsailISA
                                    const BrigObject *obj, const char *opcode)
            : HsailGPUStaticInst(obj, opcode)
        {
            setFlag(ALU);
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            dest.init(op_offs, obj);
@ -818,6 +822,8 @@ namespace HsailISA
                            const BrigObject *obj, const char *_opcode)
            : HsailGPUStaticInst(obj, _opcode)
        {
            setFlag(ALU);
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            dest.init(op_offs, obj);
@ -874,7 +880,7 @@ namespace HsailISA
        Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
           : Base(ib, obj, "ret")
        {
-            o_type = Enums::OT_RET;
+            setFlag(GPUStaticInst::Return);
        }
        void execute(GPUDynInstPtr gpuDynInst);
@ -889,7 +895,7 @@ namespace HsailISA
        Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
            : Base(ib, obj, "barrier")
        {
-            o_type = Enums::OT_BARRIER;
+            setFlag(GPUStaticInst::MemBarrier);
            assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
            width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
        }
@ -924,14 +930,105 @@ namespace HsailISA
            memFenceMemOrder = (Brig::BrigMemoryOrder)
                ((Brig::BrigInstMemFence*)ib)->memoryOrder;
-            // set o_type based on scopes
+            setFlag(MemoryRef);
            setFlag(GPUStaticInst::MemFence);
            switch (memFenceMemOrder) {
              case Brig::BRIG_MEMORY_ORDER_NONE:
                setFlag(NoOrder);
                break;
              case Brig::BRIG_MEMORY_ORDER_RELAXED:
                setFlag(RelaxedOrder);
                break;
              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
                setFlag(Acquire);
                break;
              case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
                setFlag(Release);
                break;
              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
                setFlag(AcquireRelease);
                break;
              default:
                fatal("MemInst has bad BrigMemoryOrder\n");
            }
            // set inst flags based on scopes
            if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
                memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
-                o_type = Enums::OT_BOTH_MEMFENCE;
+                setFlag(GPUStaticInst::GlobalSegment);
                /**
                 * A memory fence that has scope for
                 * both segments will use the global
                 * segment, and be executed in the
                 * global memory pipeline, therefore,
                 * we set the segment to match the
                 * global scope only
                 */
                switch (memFenceScopeSegGlobal) {
                  case Brig::BRIG_MEMORY_SCOPE_NONE:
                    setFlag(NoScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
                    setFlag(WorkitemScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
                    setFlag(WorkgroupScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
                    setFlag(DeviceScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
                    setFlag(SystemScope);
                    break;
                  default:
                    fatal("MemFence has bad global scope type\n");
                }
            } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
-                o_type = Enums::OT_GLOBAL_MEMFENCE;
+                setFlag(GPUStaticInst::GlobalSegment);
                switch (memFenceScopeSegGlobal) {
                  case Brig::BRIG_MEMORY_SCOPE_NONE:
                    setFlag(NoScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
                    setFlag(WorkitemScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
                    setFlag(WorkgroupScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
                    setFlag(DeviceScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
                    setFlag(SystemScope);
                    break;
                  default:
                    fatal("MemFence has bad global scope type\n");
                }
            } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
-                o_type = Enums::OT_SHARED_MEMFENCE;
+                setFlag(GPUStaticInst::GroupSegment);
                switch (memFenceScopeSegGroup) {
                  case Brig::BRIG_MEMORY_SCOPE_NONE:
                    setFlag(NoScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
                    setFlag(WorkitemScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
                    setFlag(WorkgroupScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
                    setFlag(DeviceScope);
                    break;
                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
                    setFlag(SystemScope);
                    break;
                  default:
                    fatal("MemFence has bad group scope type\n");
                }
            } else {
                fatal("MemFence constructor: bad scope specifiers\n");
            }
@ -955,18 +1052,13 @@ namespace HsailISA
            //     etc.). We send a packet, tagged with the memory order and
            //     scope, and let the GPU coalescer handle it.
-            if (o_type == Enums::OT_GLOBAL_MEMFENCE ||
+            if (isGlobalSeg()) {
                o_type == Enums::OT_BOTH_MEMFENCE) {
                gpuDynInst->simdId = w->simdId;
                gpuDynInst->wfSlotId = w->wfSlotId;
                gpuDynInst->wfDynId = w->wfDynId;
                gpuDynInst->kern_id = w->kernId;
                gpuDynInst->cu_id = w->computeUnit->cu_id;
                gpuDynInst->memoryOrder =
                    getGenericMemoryOrder(memFenceMemOrder);
                gpuDynInst->scope =
                    getGenericMemoryScope(memFenceScopeSegGlobal);
                gpuDynInst->useContinuation = false;
                GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
                gmp->getGMReqFIFO().push(gpuDynInst);
@ -975,10 +1067,10 @@ namespace HsailISA
                w->rdGmReqsInPipe--;
                w->memReqsInPipe--;
                w->outstandingReqs++;
-            } else if (o_type == Enums::OT_SHARED_MEMFENCE) {
+            } else if (isGroupSeg()) {
                // no-op
            } else {
-                fatal("MemFence execute: bad o_type\n");
+                fatal("MemFence execute: bad op type\n");
            }
        }
    };
@ -1054,6 +1146,7 @@ namespace HsailISA
        Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
            : HsailGPUStaticInst(obj, "call")
        {
            setFlag(ALU);
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            dest.init(op_offs, obj);
            op_offs = obj->getOperandPtr(ib->operands, 1);
--- a/src/arch/hsail/insts/main.cc
+++ b/src/arch/hsail/insts/main.cc
@ -179,12 +179,13 @@ namespace HsailISA
                    w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
            if (!refCount) {
                setFlag(SystemScope);
                setFlag(Release);
                setFlag(GlobalSegment);
                // Notify Memory System of Kernel Completion
                // Kernel End = isKernel + isRelease
                w->status = Wavefront::S_RETURNING;
                GPUDynInstPtr local_mempacket = gpuDynInst;
                local_mempacket->memoryOrder = Enums::MEMORY_ORDER_SC_RELEASE;
                local_mempacket->scope = Enums::MEMORY_SCOPE_SYSTEM;
                local_mempacket->useContinuation = false;
                local_mempacket->simdId = w->simdId;
                local_mempacket->wfSlotId = w->wfSlotId;
--- a/src/arch/hsail/insts/mem.cc
+++ b/src/arch/hsail/insts/mem.cc
@ -36,7 +36,6 @@
 #include "arch/hsail/insts/mem.hh"
 #include "arch/hsail/Brig.h"
 #include "enums/OpType.hh"
 using namespace Brig;
@ -44,68 +43,6 @@ namespace HsailISA
 {
    const char* atomicOpToString(BrigAtomicOperation brigOp);
    Enums::MemOpType
    brigAtomicToMemOpType(BrigOpcode brigOpCode, BrigAtomicOperation brigOp)
    {
        if (brigOpCode == Brig::BRIG_OPCODE_ATOMIC) {
            switch (brigOp) {
              case BRIG_ATOMIC_AND:
                return Enums::MO_AAND;
              case BRIG_ATOMIC_OR:
                return Enums::MO_AOR;
              case BRIG_ATOMIC_XOR:
                return Enums::MO_AXOR;
              case BRIG_ATOMIC_CAS:
                return Enums::MO_ACAS;
              case BRIG_ATOMIC_EXCH:
                return Enums::MO_AEXCH;
              case BRIG_ATOMIC_ADD:
                return Enums::MO_AADD;
              case BRIG_ATOMIC_WRAPINC:
                return Enums::MO_AINC;
              case BRIG_ATOMIC_WRAPDEC:
                return Enums::MO_ADEC;
              case BRIG_ATOMIC_MIN:
                return Enums::MO_AMIN;
              case BRIG_ATOMIC_MAX:
                return Enums::MO_AMAX;
              case BRIG_ATOMIC_SUB:
                return Enums::MO_ASUB;
              default:
                fatal("Bad BrigAtomicOperation code %d\n", brigOp);
            }
        } else if (brigOpCode == Brig::BRIG_OPCODE_ATOMICNORET) {
            switch (brigOp) {
              case BRIG_ATOMIC_AND:
                  return Enums::MO_ANRAND;
              case BRIG_ATOMIC_OR:
                  return Enums::MO_ANROR;
              case BRIG_ATOMIC_XOR:
                  return Enums::MO_ANRXOR;
              case BRIG_ATOMIC_CAS:
                  return Enums::MO_ANRCAS;
              case BRIG_ATOMIC_EXCH:
                  return Enums::MO_ANREXCH;
              case BRIG_ATOMIC_ADD:
                  return Enums::MO_ANRADD;
              case BRIG_ATOMIC_WRAPINC:
                  return Enums::MO_ANRINC;
              case BRIG_ATOMIC_WRAPDEC:
                  return Enums::MO_ANRDEC;
              case BRIG_ATOMIC_MIN:
                  return Enums::MO_ANRMIN;
              case BRIG_ATOMIC_MAX:
                  return Enums::MO_ANRMAX;
              case BRIG_ATOMIC_SUB:
                  return Enums::MO_ANRSUB;
              default:
                fatal("Bad BrigAtomicOperation code %d\n", brigOp);
            }
        } else {
            fatal("Bad BrigAtomicOpcode %d\n", brigOpCode);
        }
    }
    const char*
    atomicOpToString(BrigAtomicOperation brigOp)
    {
--- a/src/arch/hsail/insts/mem.hh
+++ b/src/arch/hsail/insts/mem.hh
@ -96,6 +96,8 @@ namespace HsailISA
        {
            using namespace Brig;
            setFlag(ALU);
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            dest.init(op_offs, obj);
            op_offs = obj->getOperandPtr(ib->operands, 1);
@ -211,131 +213,6 @@ namespace HsailISA
        Brig::BrigMemoryOrder memoryOrder;
        Brig::BrigMemoryScope memoryScope;
        unsigned int equivClass;
        bool isArgLoad()
        {
            return segment == Brig::BRIG_SEGMENT_KERNARG ||
                   segment == Brig::BRIG_SEGMENT_ARG;
        }
        void
        initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
               const char *_opcode)
        {
            using namespace Brig;
            const BrigInstMem *ldst = (const BrigInstMem*)ib;
            segment = (BrigSegment)ldst->segment;
            memoryOrder = BRIG_MEMORY_ORDER_NONE;
            memoryScope = BRIG_MEMORY_SCOPE_NONE;
            equivClass = ldst->equivClass;
            switch (segment) {
              case BRIG_SEGMENT_GLOBAL:
                o_type = Enums::OT_GLOBAL_READ;
                break;
              case BRIG_SEGMENT_GROUP:
                o_type = Enums::OT_SHARED_READ;
                break;
              case BRIG_SEGMENT_PRIVATE:
                o_type = Enums::OT_PRIVATE_READ;
                break;
              case BRIG_SEGMENT_READONLY:
                o_type = Enums::OT_READONLY_READ;
                break;
              case BRIG_SEGMENT_SPILL:
                o_type = Enums::OT_SPILL_READ;
                break;
              case BRIG_SEGMENT_FLAT:
                o_type = Enums::OT_FLAT_READ;
                break;
              case BRIG_SEGMENT_KERNARG:
                o_type = Enums::OT_KERN_READ;
                break;
              case BRIG_SEGMENT_ARG:
                o_type = Enums::OT_ARG;
                break;
              default:
                panic("Ld: segment %d not supported\n", segment);
            }
            width = ldst->width;
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
            if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
                dest.init(op_offs, obj);
            op_offs = obj->getOperandPtr(ib->operands, 1);
            addr.init(op_offs, obj);
        }
        void
        initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
                     const char *_opcode)
        {
            using namespace Brig;
            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
            segment = (BrigSegment)at->segment;
            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
            memoryScope = (BrigMemoryScope)at->memoryScope;
            equivClass = 0;
            switch (segment) {
              case BRIG_SEGMENT_GLOBAL:
                o_type = Enums::OT_GLOBAL_READ;
                break;
              case BRIG_SEGMENT_GROUP:
                o_type = Enums::OT_SHARED_READ;
                break;
              case BRIG_SEGMENT_PRIVATE:
                o_type = Enums::OT_PRIVATE_READ;
                break;
              case BRIG_SEGMENT_READONLY:
                o_type = Enums::OT_READONLY_READ;
                break;
              case BRIG_SEGMENT_SPILL:
                o_type = Enums::OT_SPILL_READ;
                break;
              case BRIG_SEGMENT_FLAT:
                o_type = Enums::OT_FLAT_READ;
                break;
              case BRIG_SEGMENT_KERNARG:
                o_type = Enums::OT_KERN_READ;
                break;
              case BRIG_SEGMENT_ARG:
                o_type = Enums::OT_ARG;
                break;
              default:
                panic("Ld: segment %d not supported\n", segment);
            }
            width = BRIG_WIDTH_1;
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
            if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
                dest.init(op_offs, obj);
            op_offs = obj->getOperandPtr(ib->operands,1);
            addr.init(op_offs, obj);
        }
        LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
                   const char *_opcode)
@ -343,10 +220,111 @@ namespace HsailISA
        {
            using namespace Brig;
            setFlag(MemoryRef);
            setFlag(Load);
            if (ib->opcode == BRIG_OPCODE_LD) {
-                initLd(ib, obj, _opcode);
+                const BrigInstMem *ldst = (const BrigInstMem*)ib;
                segment = (BrigSegment)ldst->segment;
                memoryOrder = BRIG_MEMORY_ORDER_NONE;
                memoryScope = BRIG_MEMORY_SCOPE_NONE;
                equivClass = ldst->equivClass;
                width = ldst->width;
                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
                const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
                if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
                    dest.init(op_offs, obj);
                op_offs = obj->getOperandPtr(ib->operands, 1);
                addr.init(op_offs, obj);
            } else {
-                initAtomicLd(ib, obj, _opcode);
+                const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
                segment = (BrigSegment)at->segment;
                memoryOrder = (BrigMemoryOrder)at->memoryOrder;
                memoryScope = (BrigMemoryScope)at->memoryScope;
                equivClass = 0;
                width = BRIG_WIDTH_1;
                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
                const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
                if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
                    dest.init(op_offs, obj);
                op_offs = obj->getOperandPtr(ib->operands,1);
                addr.init(op_offs, obj);
            }
            switch (memoryOrder) {
              case BRIG_MEMORY_ORDER_NONE:
                setFlag(NoOrder);
                break;
              case BRIG_MEMORY_ORDER_RELAXED:
                setFlag(RelaxedOrder);
                break;
              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
                setFlag(Acquire);
                break;
              case BRIG_MEMORY_ORDER_SC_RELEASE:
                setFlag(Release);
                break;
              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
                setFlag(AcquireRelease);
                break;
              default:
                fatal("LdInst has bad memory order type\n");
            }
            switch (memoryScope) {
              case BRIG_MEMORY_SCOPE_NONE:
                setFlag(NoScope);
                break;
              case BRIG_MEMORY_SCOPE_WORKITEM:
                setFlag(WorkitemScope);
                break;
              case BRIG_MEMORY_SCOPE_WORKGROUP:
                setFlag(WorkgroupScope);
                break;
              case BRIG_MEMORY_SCOPE_AGENT:
                setFlag(DeviceScope);
                break;
              case BRIG_MEMORY_SCOPE_SYSTEM:
                setFlag(SystemScope);
                break;
              default:
                fatal("LdInst has bad memory scope type\n");
            }
            switch (segment) {
              case BRIG_SEGMENT_GLOBAL:
                setFlag(GlobalSegment);
                break;
              case BRIG_SEGMENT_GROUP:
                setFlag(GroupSegment);
                break;
              case BRIG_SEGMENT_PRIVATE:
                setFlag(PrivateSegment);
                break;
              case BRIG_SEGMENT_READONLY:
                setFlag(ReadOnlySegment);
                break;
              case BRIG_SEGMENT_SPILL:
                setFlag(SpillSegment);
                break;
              case BRIG_SEGMENT_FLAT:
                setFlag(Flat);
                break;
              case BRIG_SEGMENT_KERNARG:
                setFlag(KernArgSegment);
                break;
              case BRIG_SEGMENT_ARG:
                setFlag(ArgSegment);
                break;
              default:
                panic("Ld: segment %d not supported\n", segment);
            }
        }
@ -473,7 +451,7 @@ namespace HsailISA
                    if (gpuDynInst->exec_mask[i]) {
                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
-                        if (isLocalMem()) {
+                        if (this->isLocalMem()) {
                            // load from shared memory
                            *d = gpuDynInst->wavefront()->ldsChunk->
                                read<c0>(vaddr);
@ -488,8 +466,7 @@ namespace HsailISA
                            if (gpuDynInst->computeUnit()->shader->
                                separate_acquire_release &&
-                                gpuDynInst->memoryOrder ==
+                                gpuDynInst->isAcquire()) {
                                Enums::MEMORY_ORDER_SC_ACQUIRE) {
                                // if this load has acquire semantics,
                                // set the response continuation function
                                // to perform an Acquire request
@ -520,10 +497,9 @@ namespace HsailISA
        {
            // after the load has complete and if the load has acquire
            // semantics, issue an acquire request.
-            if (!isLocalMem()) {
+            if (!this->isLocalMem()) {
                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
-                    && gpuDynInst->memoryOrder ==
+                    && gpuDynInst->isAcquire()) {
                    Enums::MEMORY_ORDER_SC_ACQUIRE) {
                    gpuDynInst->statusBitVector = VectorMask(1);
                    gpuDynInst->useContinuation = false;
                    // create request
@ -537,12 +513,6 @@ namespace HsailISA
        }
      public:
        bool
        isLocalMem() const override
        {
            return this->segment == Brig::BRIG_SEGMENT_GROUP;
        }
        bool isVectorRegister(int operandIndex) override
        {
            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
@ -731,127 +701,112 @@ namespace HsailISA
        Brig::BrigMemoryOrder memoryOrder;
        unsigned int equivClass;
        void
        initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
               const char *_opcode)
        {
            using namespace Brig;
            const BrigInstMem *ldst = (const BrigInstMem*)ib;
            segment = (BrigSegment)ldst->segment;
            memoryOrder = BRIG_MEMORY_ORDER_NONE;
            memoryScope = BRIG_MEMORY_SCOPE_NONE;
            equivClass = ldst->equivClass;
            switch (segment) {
              case BRIG_SEGMENT_GLOBAL:
                o_type = Enums::OT_GLOBAL_WRITE;
                break;
              case BRIG_SEGMENT_GROUP:
                o_type = Enums::OT_SHARED_WRITE;
                break;
              case BRIG_SEGMENT_PRIVATE:
                o_type = Enums::OT_PRIVATE_WRITE;
                break;
              case BRIG_SEGMENT_READONLY:
                o_type = Enums::OT_READONLY_WRITE;
                break;
              case BRIG_SEGMENT_SPILL:
                o_type = Enums::OT_SPILL_WRITE;
                break;
              case BRIG_SEGMENT_FLAT:
                o_type = Enums::OT_FLAT_WRITE;
                break;
              case BRIG_SEGMENT_ARG:
                o_type = Enums::OT_ARG;
                break;
              default:
                panic("St: segment %d not supported\n", segment);
            }
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            const BrigOperand *baseOp = obj->getOperand(op_offs);
            if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
                (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
                src.init(op_offs, obj);
            }
            op_offs = obj->getOperandPtr(ib->operands, 1);
            addr.init(op_offs, obj);
        }
        void
        initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
                     const char *_opcode)
        {
            using namespace Brig;
            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
            segment = (BrigSegment)at->segment;
            memoryScope = (BrigMemoryScope)at->memoryScope;
            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
            equivClass = 0;
            switch (segment) {
              case BRIG_SEGMENT_GLOBAL:
                o_type = Enums::OT_GLOBAL_WRITE;
                break;
              case BRIG_SEGMENT_GROUP:
                o_type = Enums::OT_SHARED_WRITE;
                break;
              case BRIG_SEGMENT_PRIVATE:
                o_type = Enums::OT_PRIVATE_WRITE;
                break;
              case BRIG_SEGMENT_READONLY:
                o_type = Enums::OT_READONLY_WRITE;
                break;
              case BRIG_SEGMENT_SPILL:
                o_type = Enums::OT_SPILL_WRITE;
                break;
              case BRIG_SEGMENT_FLAT:
                o_type = Enums::OT_FLAT_WRITE;
                break;
              case BRIG_SEGMENT_ARG:
                o_type = Enums::OT_ARG;
                break;
              default:
                panic("St: segment %d not supported\n", segment);
            }
            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
            addr.init(op_offs, obj);
            op_offs = obj->getOperandPtr(ib->operands, 1);
            src.init(op_offs, obj);
        }
        StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
                   const char *_opcode)
           : HsailGPUStaticInst(obj, _opcode)
        {
            using namespace Brig;
            setFlag(MemoryRef);
            setFlag(Store);
            if (ib->opcode == BRIG_OPCODE_ST) {
-                initSt(ib, obj, _opcode);
+                const BrigInstMem *ldst = (const BrigInstMem*)ib;
                segment = (BrigSegment)ldst->segment;
                memoryOrder = BRIG_MEMORY_ORDER_NONE;
                memoryScope = BRIG_MEMORY_SCOPE_NONE;
                equivClass = ldst->equivClass;
                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
                const BrigOperand *baseOp = obj->getOperand(op_offs);
                if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
                    (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
                    src.init(op_offs, obj);
                }
                op_offs = obj->getOperandPtr(ib->operands, 1);
                addr.init(op_offs, obj);
            } else {
-                initAtomicSt(ib, obj, _opcode);
+                const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
                segment = (BrigSegment)at->segment;
                memoryScope = (BrigMemoryScope)at->memoryScope;
                memoryOrder = (BrigMemoryOrder)at->memoryOrder;
                equivClass = 0;
                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
                addr.init(op_offs, obj);
                op_offs = obj->getOperandPtr(ib->operands, 1);
                src.init(op_offs, obj);
            }
            switch (memoryOrder) {
              case BRIG_MEMORY_ORDER_NONE:
                setFlag(NoOrder);
                break;
              case BRIG_MEMORY_ORDER_RELAXED:
                setFlag(RelaxedOrder);
                break;
              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
                setFlag(Acquire);
                break;
              case BRIG_MEMORY_ORDER_SC_RELEASE:
                setFlag(Release);
                break;
              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
                setFlag(AcquireRelease);
                break;
              default:
                fatal("StInst has bad memory order type\n");
            }
            switch (memoryScope) {
              case BRIG_MEMORY_SCOPE_NONE:
                setFlag(NoScope);
                break;
              case BRIG_MEMORY_SCOPE_WORKITEM:
                setFlag(WorkitemScope);
                break;
              case BRIG_MEMORY_SCOPE_WORKGROUP:
                setFlag(WorkgroupScope);
                break;
              case BRIG_MEMORY_SCOPE_AGENT:
                setFlag(DeviceScope);
                break;
              case BRIG_MEMORY_SCOPE_SYSTEM:
                setFlag(SystemScope);
                break;
              default:
                fatal("StInst has bad memory scope type\n");
            }
            switch (segment) {
              case BRIG_SEGMENT_GLOBAL:
                setFlag(GlobalSegment);
                break;
              case BRIG_SEGMENT_GROUP:
                setFlag(GroupSegment);
                break;
              case BRIG_SEGMENT_PRIVATE:
                setFlag(PrivateSegment);
                break;
              case BRIG_SEGMENT_READONLY:
                setFlag(ReadOnlySegment);
                break;
              case BRIG_SEGMENT_SPILL:
                setFlag(SpillSegment);
                break;
              case BRIG_SEGMENT_FLAT:
                setFlag(Flat);
                break;
              case BRIG_SEGMENT_ARG:
                setFlag(ArgSegment);
                break;
              default:
                panic("St: segment %d not supported\n", segment);
            }
        }
@ -964,10 +919,9 @@ namespace HsailISA
        {
            // before performing a store, check if this store has
            // release semantics, and if so issue a release first
-            if (!isLocalMem()) {
+            if (!this->isLocalMem()) {
                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
-                    && gpuDynInst->memoryOrder ==
+                    && gpuDynInst->isRelease()) {
                    Enums::MEMORY_ORDER_SC_RELEASE) {
                    gpuDynInst->statusBitVector = VectorMask(1);
                    gpuDynInst->execContinuation = &GPUStaticInst::execSt;
@ -987,12 +941,6 @@ namespace HsailISA
            execSt(gpuDynInst);
        }
        bool
        isLocalMem() const override
        {
            return this->segment == Brig::BRIG_SEGMENT_GROUP;
        }
      private:
        // execSt may be called through a continuation
        // if the store had release semantics. see comment for
@ -1020,7 +968,7 @@ namespace HsailISA
                    if (gpuDynInst->exec_mask[i]) {
                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
-                        if (isLocalMem()) {
+                        if (this->isLocalMem()) {
                            //store to shared memory
                            gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
                                                                         *d);
@ -1166,9 +1114,6 @@ namespace HsailISA
        }
    }
    Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
                                           Brig::BrigAtomicOperation brigOp);
    template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
             bool HasDst>
    class AtomicInstBase : public HsailGPUStaticInst
@ -1183,7 +1128,6 @@ namespace HsailISA
        Brig::BrigAtomicOperation atomicOperation;
        Brig::BrigMemoryScope memoryScope;
        Brig::BrigOpcode opcode;
        Enums::MemOpType opType;
        AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
                       const char *_opcode)
@ -1198,21 +1142,106 @@ namespace HsailISA
            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
            atomicOperation = (BrigAtomicOperation)at->atomicOperation;
            opcode = (BrigOpcode)ib->opcode;
-            opType = brigAtomicToMemOpType(opcode, atomicOperation);
+
            assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
                   opcode == Brig::BRIG_OPCODE_ATOMIC);
            setFlag(MemoryRef);
            if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
                setFlag(AtomicReturn);
            } else {
                setFlag(AtomicNoReturn);
            }
            switch (memoryOrder) {
              case BRIG_MEMORY_ORDER_NONE:
                setFlag(NoOrder);
                break;
              case BRIG_MEMORY_ORDER_RELAXED:
                setFlag(RelaxedOrder);
                break;
              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
                setFlag(Acquire);
                break;
              case BRIG_MEMORY_ORDER_SC_RELEASE:
                setFlag(Release);
                break;
              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
                setFlag(AcquireRelease);
                break;
              default:
                fatal("AtomicInst has bad memory order type\n");
            }
            switch (memoryScope) {
              case BRIG_MEMORY_SCOPE_NONE:
                setFlag(NoScope);
                break;
              case BRIG_MEMORY_SCOPE_WORKITEM:
                setFlag(WorkitemScope);
                break;
              case BRIG_MEMORY_SCOPE_WORKGROUP:
                setFlag(WorkgroupScope);
                break;
              case BRIG_MEMORY_SCOPE_AGENT:
                setFlag(DeviceScope);
                break;
              case BRIG_MEMORY_SCOPE_SYSTEM:
                setFlag(SystemScope);
                break;
              default:
                fatal("AtomicInst has bad memory scope type\n");
            }
            switch (atomicOperation) {
              case Brig::BRIG_ATOMIC_AND:
                setFlag(AtomicAnd);
                break;
              case Brig::BRIG_ATOMIC_OR:
                setFlag(AtomicOr);
                break;
              case Brig::BRIG_ATOMIC_XOR:
                setFlag(AtomicXor);
                break;
              case Brig::BRIG_ATOMIC_CAS:
                setFlag(AtomicCAS);
                break;
              case Brig::BRIG_ATOMIC_EXCH:
                setFlag(AtomicExch);
                break;
              case Brig::BRIG_ATOMIC_ADD:
                setFlag(AtomicAdd);
                break;
              case Brig::BRIG_ATOMIC_WRAPINC:
                setFlag(AtomicInc);
                break;
              case Brig::BRIG_ATOMIC_WRAPDEC:
                setFlag(AtomicDec);
                break;
              case Brig::BRIG_ATOMIC_MIN:
                setFlag(AtomicMin);
                break;
              case Brig::BRIG_ATOMIC_MAX:
                setFlag(AtomicMax);
                break;
              case Brig::BRIG_ATOMIC_SUB:
                setFlag(AtomicSub);
                break;
              default:
                fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
            }
            switch (segment) {
              case BRIG_SEGMENT_GLOBAL:
-                o_type = Enums::OT_GLOBAL_ATOMIC;
+                setFlag(GlobalSegment);
                break;
              case BRIG_SEGMENT_GROUP:
-                o_type = Enums::OT_SHARED_ATOMIC;
+                setFlag(GroupSegment);
                break;
              case BRIG_SEGMENT_FLAT:
-                o_type = Enums::OT_FLAT_ATOMIC;
+                setFlag(Flat);
                break;
              default:
                panic("Atomic: segment %d not supported\n", segment);
            }
@ -1354,11 +1383,10 @@ namespace HsailISA
        {
            // before doing the RMW, check if this atomic has
            // release semantics, and if so issue a release first
-            if (!isLocalMem()) {
+            if (!this->isLocalMem()) {
                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
-                    && (gpuDynInst->memoryOrder ==
+                    && (gpuDynInst->isRelease()
-                    Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
+                    || gpuDynInst->isAcquireRelease())) {
                    Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
                    gpuDynInst->statusBitVector = VectorMask(1);
@ -1383,12 +1411,6 @@ namespace HsailISA
        void execute(GPUDynInstPtr gpuDynInst) override;
        bool
        isLocalMem() const override
        {
            return this->segment == Brig::BRIG_SEGMENT_GROUP;
        }
      private:
        // execAtomic may be called through a continuation
        // if the RMW had release semantics. see comment for
@ -1408,72 +1430,48 @@ namespace HsailISA
                if (gpuDynInst->exec_mask[i]) {
                    Addr vaddr = gpuDynInst->addr[i];
-                    if (isLocalMem()) {
+                    if (this->isLocalMem()) {
                        Wavefront *wavefront = gpuDynInst->wavefront();
                        *d = wavefront->ldsChunk->read<c0>(vaddr);
-                        switch (this->opType) {
+                        if (this->isAtomicAdd()) {
                          case Enums::MO_AADD:
                          case Enums::MO_ANRADD:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            wavefront->ldsChunk->read<c0>(vaddr) + (*e));
-                            break;
+                        } else if (this->isAtomicSub()) {
                          case Enums::MO_ASUB:
                          case Enums::MO_ANRSUB:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            wavefront->ldsChunk->read<c0>(vaddr) - (*e));
-                            break;
+                        } else if (this->isAtomicMax()) {
                          case Enums::MO_AMAX:
                          case Enums::MO_ANRMAX:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            std::max(wavefront->ldsChunk->read<c0>(vaddr),
                            (*e)));
-                            break;
+                        } else if (this->isAtomicMin()) {
                          case Enums::MO_AMIN:
                          case Enums::MO_ANRMIN:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            std::min(wavefront->ldsChunk->read<c0>(vaddr),
                            (*e)));
-                            break;
+                        } else if (this->isAtomicAnd()) {
                          case Enums::MO_AAND:
                          case Enums::MO_ANRAND:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            wavefront->ldsChunk->read<c0>(vaddr) & (*e));
-                            break;
+                        } else if (this->isAtomicOr()) {
                          case Enums::MO_AOR:
                          case Enums::MO_ANROR:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            wavefront->ldsChunk->read<c0>(vaddr) | (*e));
-                            break;
+                        } else if (this->isAtomicXor()) {
                          case Enums::MO_AXOR:
                          case Enums::MO_ANRXOR:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
-                            break;
+                        } else if (this->isAtomicInc()) {
                          case Enums::MO_AINC:
                          case Enums::MO_ANRINC:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            wavefront->ldsChunk->read<c0>(vaddr) + 1);
-                            break;
+                        } else if (this->isAtomicDec()) {
                          case Enums::MO_ADEC:
                          case Enums::MO_ANRDEC:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            wavefront->ldsChunk->read<c0>(vaddr) - 1);
-                            break;
+                        } else if (this->isAtomicExch()) {
                          case Enums::MO_AEXCH:
                          case Enums::MO_ANREXCH:
                            wavefront->ldsChunk->write<c0>(vaddr, (*e));
-                            break;
+                        } else if (this->isAtomicCAS()) {
                          case Enums::MO_ACAS:
                          case Enums::MO_ANRCAS:
                            wavefront->ldsChunk->write<c0>(vaddr,
                            (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
                            (*f) : wavefront->ldsChunk->read<c0>(vaddr));
-                            break;
+                        } else {
                          default:
                            fatal("Unrecognized or invalid HSAIL atomic op "
                                  "type.\n");
                            break;
                        }
                    } else {
                        Request *req =
@ -1481,7 +1479,7 @@ namespace HsailISA
                                        gpuDynInst->computeUnit()->masterId(),
                                        0, gpuDynInst->wfDynId,
                                        gpuDynInst->makeAtomicOpFunctor<c0>(e,
-                                        f, this->opType));
+                                        f));
                        gpuDynInst->setRequestFlags(req);
                        PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
@ -1489,8 +1487,7 @@ namespace HsailISA
                        if (gpuDynInst->computeUnit()->shader->
                            separate_acquire_release &&
-                            (gpuDynInst->memoryOrder ==
+                            (gpuDynInst->isAcquire())) {
                             Enums::MEMORY_ORDER_SC_ACQUIRE)) {
                            // if this atomic has acquire semantics,
                            // schedule the continuation to perform an
                            // acquire after the RMW completes
@ -1523,10 +1520,9 @@ namespace HsailISA
        {
            // after performing the RMW, check to see if this instruction
            // has acquire semantics, and if so, issue an acquire
-            if (!isLocalMem()) {
+            if (!this->isLocalMem()) {
                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
-                     && gpuDynInst->memoryOrder ==
+                     && gpuDynInst->isAcquire()) {
                     Enums::MEMORY_ORDER_SC_ACQUIRE) {
                    gpuDynInst->statusBitVector = VectorMask(1);
                    // the request will be finished when
--- a/src/arch/hsail/insts/mem_impl.hh
+++ b/src/arch/hsail/insts/mem_impl.hh
@ -33,7 +33,6 @@
 * Author: Steve Reinhardt
 */
 #include "arch/hsail/generic_types.hh"
 #include "gpu-compute/hsail_code.hh"
 // defined in code.cc, but not worth sucking in all of code.h for this
@ -215,16 +214,12 @@ namespace HsailISA
        this->addr.calcVector(w, m->addr);
        m->m_op = Enums::MO_LD;
        m->m_type = MemDataType::memType;
        m->v_type = DestDataType::vgprType;
        m->exec_mask = w->execMask();
        m->statusBitVector = 0;
        m->equiv = this->equivClass;
        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
        m->scope = getGenericMemoryScope(this->memoryScope);
        if (num_dest_operands == 1) {
            m->dst_reg = this->dest.regIndex();
@ -245,7 +240,6 @@ namespace HsailISA
        switch (this->segment) {
          case Brig::BRIG_SEGMENT_GLOBAL:
            m->s_type = SEG_GLOBAL;
            m->pipeId = GLBMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(1));
@ -276,7 +270,6 @@ namespace HsailISA
          case Brig::BRIG_SEGMENT_SPILL:
            assert(num_dest_operands == 1);
            m->s_type = SEG_SPILL;
            m->pipeId = GLBMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(1));
            {
@ -301,7 +294,6 @@ namespace HsailISA
            break;
          case Brig::BRIG_SEGMENT_GROUP:
            m->s_type = SEG_SHARED;
            m->pipeId = LDSMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(24));
            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
@ -310,7 +302,6 @@ namespace HsailISA
            break;
          case Brig::BRIG_SEGMENT_READONLY:
            m->s_type = SEG_READONLY;
            m->pipeId = GLBMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(1));
@ -327,7 +318,6 @@ namespace HsailISA
            break;
          case Brig::BRIG_SEGMENT_PRIVATE:
            m->s_type = SEG_PRIVATE;
            m->pipeId = GLBMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(1));
            {
@ -408,7 +398,6 @@ namespace HsailISA
            }
        }
        m->m_op = Enums::MO_ST;
        m->m_type = OperationType::memType;
        m->v_type = OperationType::vgprType;
@ -421,10 +410,6 @@ namespace HsailISA
            m->n_reg = num_src_operands;
        }
        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
        m->scope = getGenericMemoryScope(this->memoryScope);
        m->simdId = w->simdId;
        m->wfSlotId = w->wfSlotId;
        m->wfDynId = w->wfDynId;
@ -434,7 +419,6 @@ namespace HsailISA
        switch (this->segment) {
          case Brig::BRIG_SEGMENT_GLOBAL:
            m->s_type = SEG_GLOBAL;
            m->pipeId = GLBMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(1));
@ -463,7 +447,6 @@ namespace HsailISA
          case Brig::BRIG_SEGMENT_SPILL:
            assert(num_src_operands == 1);
            m->s_type = SEG_SPILL;
            m->pipeId = GLBMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(1));
            {
@ -483,7 +466,6 @@ namespace HsailISA
            break;
          case Brig::BRIG_SEGMENT_GROUP:
            m->s_type = SEG_SHARED;
            m->pipeId = LDSMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(24));
            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
@ -492,7 +474,6 @@ namespace HsailISA
            break;
          case Brig::BRIG_SEGMENT_PRIVATE:
            m->s_type = SEG_PRIVATE;
            m->pipeId = GLBMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(1));
            {
@ -586,7 +567,6 @@ namespace HsailISA
        assert(NumSrcOperands <= 2);
        m->m_op = this->opType;
        m->m_type = DataType::memType;
        m->v_type = DataType::vgprType;
@ -594,9 +574,6 @@ namespace HsailISA
        m->statusBitVector = 0;
        m->equiv = 0;  // atomics don't have an equivalence class operand
        m->n_reg = 1;
        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
        m->scope = getGenericMemoryScope(this->memoryScope);
        if (HasDst) {
            m->dst_reg = this->dest.regIndex();
@ -611,7 +588,6 @@ namespace HsailISA
        switch (this->segment) {
          case Brig::BRIG_SEGMENT_GLOBAL:
            m->s_type = SEG_GLOBAL;
            m->latency.set(w->computeUnit->shader->ticks(64));
            m->pipeId = GLBMEM_PIPE;
@ -623,7 +599,6 @@ namespace HsailISA
            break;
          case Brig::BRIG_SEGMENT_GROUP:
            m->s_type = SEG_SHARED;
            m->pipeId = LDSMEM_PIPE;
            m->latency.set(w->computeUnit->shader->ticks(24));
            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
--- a/src/arch/hsail/insts/pseudo_inst.cc
+++ b/src/arch/hsail/insts/pseudo_inst.cc
@ -627,8 +627,12 @@ namespace HsailISA
            ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
        }
-        m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
+        setFlag(AtomicNoReturn);
-                                        Brig::BRIG_ATOMIC_ADD);
+        setFlag(AtomicAdd);
        setFlag(NoScope);
        setFlag(NoOrder);
        setFlag(GlobalSegment);
        m->m_type = U32::memType;
        m->v_type = U32::vgprType;
@ -636,15 +640,12 @@ namespace HsailISA
        m->statusBitVector = 0;
        m->equiv = 0;  // atomics don't have an equivalence class operand
        m->n_reg = 1;
        m->memoryOrder = Enums::MEMORY_ORDER_NONE;
        m->scope = Enums::MEMORY_SCOPE_NONE;
        m->simdId = w->simdId;
        m->wfSlotId = w->wfSlotId;
        m->wfDynId = w->wfDynId;
        m->latency.init(&w->computeUnit->shader->tick_cnt);
        m->s_type = SEG_GLOBAL;
        m->pipeId = GLBMEM_PIPE;
        m->latency.set(w->computeUnit->shader->ticks(64));
        w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
@ -666,8 +667,12 @@ namespace HsailISA
            ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
        }
-        m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
+        setFlag(AtomicNoReturn);
-                                        Brig::BRIG_ATOMIC_ADD);
+        setFlag(AtomicAdd);
        setFlag(NoScope);
        setFlag(NoOrder);
        setFlag(GlobalSegment);
        m->m_type = U32::memType;
        m->v_type = U32::vgprType;
@ -675,15 +680,12 @@ namespace HsailISA
        m->statusBitVector = 0;
        m->equiv = 0;  // atomics don't have an equivalence class operand
        m->n_reg = 1;
        m->memoryOrder = Enums::MEMORY_ORDER_NONE;
        m->scope = Enums::MEMORY_SCOPE_NONE;
        m->simdId = w->simdId;
        m->wfSlotId = w->wfSlotId;
        m->wfDynId = w->wfDynId;
        m->latency.init(&w->computeUnit->shader->tick_cnt);
        m->s_type = SEG_GLOBAL;
        m->pipeId = GLBMEM_PIPE;
        m->latency.set(w->computeUnit->shader->ticks(64));
        w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
@ -702,7 +704,11 @@ namespace HsailISA
        // calculate the address
        calcAddr(w, m);
-        m->m_op = Enums::MO_LD;
+        setFlag(Load);
        setFlag(NoScope);
        setFlag(NoOrder);
        setFlag(GlobalSegment);
        m->m_type = U32::memType;  //MemDataType::memType;
        m->v_type = U32::vgprType; //DestDataType::vgprType;
@ -710,8 +716,6 @@ namespace HsailISA
        m->statusBitVector = 0;
        m->equiv = 0;
        m->n_reg = 1;
        m->memoryOrder = Enums::MEMORY_ORDER_NONE;
        m->scope = Enums::MEMORY_SCOPE_NONE;
        // FIXME
        //m->dst_reg = this->dest.regIndex();
@ -721,7 +725,6 @@ namespace HsailISA
        m->wfDynId = w->wfDynId;
        m->latency.init(&w->computeUnit->shader->tick_cnt);
        m->s_type = SEG_GLOBAL;
        m->pipeId = GLBMEM_PIPE;
        m->latency.set(w->computeUnit->shader->ticks(1));
        w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@ -171,56 +171,6 @@ class GpuDispatcher(DmaDevice):
    cl_driver = Param.ClDriver('pointer to driver')
 class OpType(Enum): vals = [
    'OT_NULL',
    'OT_ALU',
    'OT_SPECIAL',
    'OT_GLOBAL_READ',
    'OT_GLOBAL_WRITE',
    'OT_GLOBAL_ATOMIC',
    'OT_GLOBAL_HIST',
    'OT_GLOBAL_LDAS',
    'OT_SHARED_READ',
    'OT_SHARED_WRITE',
    'OT_SHARED_ATOMIC',
    'OT_SHARED_HIST',
    'OT_SHARED_LDAS',
    'OT_PRIVATE_READ',
    'OT_PRIVATE_WRITE',
    'OT_PRIVATE_ATOMIC',
    'OT_PRIVATE_HIST',
    'OT_PRIVATE_LDAS',
    'OT_SPILL_READ',
    'OT_SPILL_WRITE',
    'OT_SPILL_ATOMIC',
    'OT_SPILL_HIST',
    'OT_SPILL_LDAS',
    'OT_READONLY_READ',
    'OT_READONLY_WRITE',
    'OT_READONLY_ATOMIC',
    'OT_READONLY_HIST',
    'OT_READONLY_LDAS',
    'OT_FLAT_READ',
    'OT_FLAT_WRITE',
    'OT_FLAT_ATOMIC',
    'OT_FLAT_HIST',
    'OT_FLAT_LDAS',
    'OT_KERN_READ',
    'OT_BRANCH',
    # note: Only the OT_BOTH_MEMFENCE seems to be supported in the 1.0F version
    #       of the compiler.
    'OT_SHARED_MEMFENCE',
    'OT_GLOBAL_MEMFENCE',
    'OT_BOTH_MEMFENCE',
    'OT_BARRIER',
    'OT_PRINT',
    'OT_RET',
    'OT_NOP',
    'OT_ARG'
    ]
 class MemType(Enum): vals = [
    'M_U8',
    'M_U16',
@ -235,47 +185,6 @@ class MemType(Enum): vals = [
    'M_F64',
    ]
 class MemOpType(Enum): vals = [
    'MO_LD',
    'MO_ST',
    'MO_LDAS',
    'MO_LDA',
    'MO_AAND',
    'MO_AOR',
    'MO_AXOR',
    'MO_ACAS',
    'MO_AEXCH',
    'MO_AADD',
    'MO_ASUB',
    'MO_AINC',
    'MO_ADEC',
    'MO_AMAX',
    'MO_AMIN',
    'MO_ANRAND',
    'MO_ANROR',
    'MO_ANRXOR',
    'MO_ANRCAS',
    'MO_ANREXCH',
    'MO_ANRADD',
    'MO_ANRSUB',
    'MO_ANRINC',
    'MO_ANRDEC',
    'MO_ANRMAX',
    'MO_ANRMIN',
    'MO_HAND',
    'MO_HOR',
    'MO_HXOR',
    'MO_HCAS',
    'MO_HEXCH',
    'MO_HADD',
    'MO_HSUB',
    'MO_HINC',
    'MO_HDEC',
    'MO_HMAX',
    'MO_HMIN',
    'MO_UNDEF'
    ]
 class StorageClassType(Enum): vals = [
    'SC_SPILL',
    'SC_GLOBAL',
@ -293,20 +202,3 @@ class RegisterType(Enum): vals = [
    'RT_HARDWARE',
    'RT_NONE',
    ]
 class GenericMemoryOrder(Enum): vals = [
    'MEMORY_ORDER_NONE',
    'MEMORY_ORDER_RELAXED',
    'MEMORY_ORDER_SC_ACQUIRE',
    'MEMORY_ORDER_SC_RELEASE',
    'MEMORY_ORDER_SC_ACQUIRE_RELEASE',
    ]
 class GenericMemoryScope(Enum): vals = [
    'MEMORY_SCOPE_NONE',
    'MEMORY_SCOPE_WORKITEM',
    'MEMORY_SCOPE_WAVEFRONT',
    'MEMORY_SCOPE_WORKGROUP',
    'MEMORY_SCOPE_DEVICE',
    'MEMORY_SCOPE_SYSTEM',
    ]
--- a/src/gpu-compute/GPUStaticInstFlags.py
+++ b/src/gpu-compute/GPUStaticInstFlags.py
@ -0,0 +1,111 @@
 # Copyright (c) 2016 Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # For use for simulation and test purposes only
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
 # 1. Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 #
 # 2. Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 #
 # 3. Neither the name of the copyright holder nor the names of its contributors
 # may be used to endorse or promote products derived from this software
 # without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 #
 # Authors: Anthony Gutierrez
 from m5.params import *
 class GPUStaticInstFlags(Enum):
    wrapper_name = 'GPUStaticInstFlags'
    wrapper_is_struct = True
    enum_name = 'Flags'
    vals = [
        # Op types
        'ALU',               # ALU op
        'Branch',            # Branch instruction
        'Nop',               # No-op (no effect at all)
        'Return',            # Return instruction
        'UnconditionalJump', #
        'SpecialOp',         # Special op
        'Waitcnt',           # Is a waitcnt instruction
        # Memory ops
        'MemBarrier',        # Barrier instruction
        'MemFence',          # Memory fence instruction
        'MemoryRef',         # References memory (load, store, or atomic)
        'Flat',              # Flat memory op
        'Load',              # Reads from memory
        'Store',             # Writes to memory
        # Atomic ops
        'AtomicReturn',      # Atomic instruction that returns data
        'AtomicNoReturn',    # Atomic instruction that doesn't return data
        # Instruction attributes
        'Scalar',            # A scalar (not vector) operation
        'ReadsSCC',          # The instruction reads SCC
        'WritesSCC',         # The instruction writes SCC
        'ReadsVCC',          # The instruction reads VCC
        'WritesVCC',         # The instruction writes VCC
        # Atomic OP types
        'AtomicAnd',
        'AtomicOr',
        'AtomicXor',
        'AtomicCAS',
        'AtomicExch',
        'AtomicAdd',
        'AtomicSub',
        'AtomicInc',
        'AtomicDec',
        'AtomicMax',
        'AtomicMin',
        # Memory order flags
        'RelaxedOrder',
        'Acquire',           # Has acquire semantics
        'Release',           # Has release semantics
        'AcquireRelease',    # Has acquire and release semantics
        'NoOrder',           # Has no ordering restrictions
        # Segment access flags
        'ArgSegment',        # Accesses the arg segment
        'GlobalSegment',     # Accesses global memory
        'GroupSegment',      # Accesses local memory (LDS), aka shared memory
        'KernArgSegment',    # Accesses the kernel argument segment
        'PrivateSegment',    # Accesses the private segment
        'ReadOnlySegment',   # Accesses read only memory
        'SpillSegment',      # Accesses the spill segment
        'NoSegment',         # Does not have an associated segment
        # Scope flags
        'WorkitemScope',
        'WavefrontScope',
        'WorkgroupScope',
        'DeviceScope',
        'SystemScope',
        'NoScope',           # Does not have an associated scope
        # Coherence flags
        'GloballyCoherent',  # Coherent with other workitems on same device
        'SystemCoherent'     # Coherent with a different device, or the host
        ]
--- a/src/gpu-compute/SConscript
+++ b/src/gpu-compute/SConscript
@ -41,6 +41,7 @@ if not env['BUILD_GPU']:
    Return()
 SimObject('GPU.py')
 SimObject('GPUStaticInstFlags.py')
 SimObject('LdsState.py')
 SimObject('X86GPUTLB.py')
--- a/src/gpu-compute/code_enums.hh
+++ b/src/gpu-compute/code_enums.hh
@ -1,116 +0,0 @@
 /*
 * Copyright (c) 2015 Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * For use for simulation and test purposes only
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its contributors
 * may be used to endorse or promote products derived from this software
 * without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * Author: Anthony Gutierrez
 */
 #ifndef __CODE_ENUMS_HH__
 #define __CODE_ENUMS_HH__
 #define IS_OT_GLOBAL(a) ((a)>=Enums::OT_GLOBAL_READ \
                    && (a)<=Enums::OT_GLOBAL_LDAS)
 #define IS_OT_SHARED(a) ((a)>=Enums::OT_SHARED_READ \
                    && (a)<=Enums::OT_SHARED_LDAS)
 #define IS_OT_PRIVATE(a) ((a)>=Enums::OT_PRIVATE_READ \
                    && (a)<=Enums::OT_PRIVATE_LDAS)
 #define IS_OT_SPILL(a) ((a)>=Enums::OT_SPILL_READ \
                    && (a)<=Enums::OT_SPILL_LDAS)
 #define IS_OT_READONLY(a) ((a)>=Enums::OT_READONLY_READ \
                    && (a)<=Enums::OT_READONLY_LDAS)
 #define IS_OT_FLAT(a) ((a)>=Enums::OT_FLAT_READ && (a)<=Enums::OT_FLAT_LDAS)
 #define IS_OT_LDAS(a) ((a)==Enums::OT_GLOBAL_LDAS||(a)==Enums::OT_SHARED_LDAS \
                    ||(a)==Enums::OT_PRIVATE_LDAS||(a)==Enums::OT_SPILL_LDAS \
                    ||(a)==Enums::OT_READONLY_LDAS||(a)==Enums::OT_FLAT_LDAS)
 #define IS_OT_READ(a) ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SHARED_READ \
                    ||(a)==Enums::OT_PRIVATE_READ||(a)==Enums::OT_SPILL_READ \
                    ||(a)==Enums::OT_READONLY_READ||(a)==Enums::OT_FLAT_READ)
 #define IS_OT_READ_GM(a) \
    ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SPILL_READ \
    ||(a)==Enums::OT_READONLY_READ)
 #define IS_OT_READ_LM(a) ((a)==Enums::OT_SHARED_READ)
 #define IS_OT_READ_RM(a) ((a)==Enums::OT_READONLY_READ)
 #define IS_OT_READ_PM(a) ((a)==Enums::OT_PRIVATE_READ)
 #define IS_OT_WRITE(a) \
    ((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SHARED_WRITE \
    ||(a)==Enums::OT_PRIVATE_WRITE||(a)==Enums::OT_SPILL_WRITE \
    ||(a)==Enums::OT_READONLY_WRITE||(a)==Enums::OT_FLAT_WRITE)
 #define IS_OT_WRITE_GM(a) \
    ((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SPILL_WRITE \
    ||(a)==Enums::OT_READONLY_WRITE)
 #define IS_OT_WRITE_LM(a) ((a)==Enums::OT_SHARED_WRITE)
 #define IS_OT_WRITE_PM(a) ((a)==Enums::OT_PRIVATE_WRITE)
 #define IS_OT_ATOMIC(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
                    ||(a)==Enums::OT_SHARED_ATOMIC \
                    ||(a)==Enums::OT_PRIVATE_ATOMIC \
                    ||(a)==Enums::OT_SPILL_ATOMIC \
                    ||(a)==Enums::OT_READONLY_ATOMIC \
                    ||(a)==Enums::OT_BOTH_MEMFENCE \
                    ||(a)==Enums::OT_FLAT_ATOMIC)
 #define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
                    ||(a)==Enums::OT_SPILL_ATOMIC \
                    ||(a)==Enums::OT_READONLY_ATOMIC \
                    ||(a)==Enums::OT_GLOBAL_MEMFENCE \
                    ||(a)==Enums::OT_BOTH_MEMFENCE)
 #define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
                    ||(a)==Enums::OT_SHARED_MEMFENCE)
 #define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)
 #define IS_OT_HIST(a) ((a)==Enums::OT_GLOBAL_HIST \
                    ||(a)==Enums::OT_SHARED_HIST \
                    ||(a)==Enums::OT_PRIVATE_HIST \
                    ||(a)==Enums::OT_SPILL_HIST \
                    ||(a)==Enums::OT_READONLY_HIST \
                    ||(a)==Enums::OT_FLAT_HIST)
 #define IS_OT_HIST_GM(a) ((a)==Enums::OT_GLOBAL_HIST \
                    ||(a)==Enums::OT_SPILL_HIST \
                    ||(a)==Enums::OT_READONLY_HIST)
 #define IS_OT_HIST_LM(a) ((a)==Enums::OT_SHARED_HIST)
 #define IS_OT_HIST_PM(a) ((a)==Enums::OT_PRIVATE_HIST)
 #endif // __CODE_ENUMS_HH__
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
    req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
    resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
    _masterId(p->system->getMasterId(name() + ".ComputeUnit")),
-    lds(*p->localDataStore), globalSeqNum(0),  wavefrontSize(p->wfSize)
+    lds(*p->localDataStore), globalSeqNum(0),  wavefrontSize(p->wfSize),
    kernelLaunchInst(new KernelLaunchStaticInst())
 {
    /**
     * This check is necessary because std::bitset only provides conversion
@ -316,13 +317,11 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
    // Send L1 cache acquire
    // isKernel + isAcquire = Kernel Begin
    if (shader->impl_kern_boundary_sync) {
-        GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(this,
+        GPUDynInstPtr gpuDynInst =
-                                                                nullptr,
+            std::make_shared<GPUDynInst>(this, nullptr, kernelLaunchInst,
-                                                                nullptr, 0);
+                                         getAndIncSeqNum());
        gpuDynInst->useContinuation = false;
        gpuDynInst->memoryOrder = Enums::MEMORY_ORDER_SC_ACQUIRE;
        gpuDynInst->scope = Enums::MEMORY_SCOPE_SYSTEM;
        injectGlobalMemFence(gpuDynInst, true);
    }
@ -647,7 +646,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
                gpuDynInst->wfSlotId, w->barrierCnt);
        if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
            gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                           gpuDynInst);
        }
@ -658,7 +657,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
        return true;
    } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
        if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
            gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                           gpuDynInst);
        }
@ -942,6 +941,8 @@ void
 ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
                                  Request* req)
 {
    assert(gpuDynInst->isGlobalSeg());
    if (!req) {
        req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
    }
@ -950,8 +951,6 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
        req->setFlags(Request::KERNEL);
    }
    gpuDynInst->s_type = SEG_GLOBAL;
    // for non-kernel MemFence operations, memorder flags are set depending
    // on which type of request is currently being sent, so this
    // should be set by the caller (e.g. if an inst has acq-rel
@ -1033,8 +1032,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
                if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
                    gpuDynInst->statusVector.clear();
-                if (gpuDynInst->m_op == Enums::MO_LD || MO_A(gpuDynInst->m_op)
+                if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
                    || MO_ANR(gpuDynInst->m_op)) {
                    assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy());
                    compute_unit->globalMemoryPipe.getGMLdRespFIFO()
@ -1055,7 +1053,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
                // the continuation may generate more work for
                // this memory request
                if (gpuDynInst->useContinuation) {
-                    assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+                    assert(!gpuDynInst->isNoScope());
                    gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                                 gpuDynInst);
                }
@ -1065,7 +1063,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
        gpuDynInst->statusBitVector = VectorMask(0);
        if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
            gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                         gpuDynInst);
        }
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@ -744,6 +744,7 @@ class ComputeUnit : public MemObject
  private:
    uint64_t globalSeqNum;
    int wavefrontSize;
    GPUStaticInst *kernelLaunchInst;
 };
 #endif // __COMPUTE_UNIT_HH__
--- a/src/gpu-compute/global_memory_pipeline.cc
+++ b/src/gpu-compute/global_memory_pipeline.cc
@ -67,7 +67,7 @@ GlobalMemPipeline::exec()
    bool accessVrf = true;
    // check the VRF to see if the operands of a load (or load component
    // of an atomic) are accessible
-    if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
+    if ((m) && (m->isLoad() || m->isAtomicRet())) {
        Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
        accessVrf =
@ -127,10 +127,7 @@ GlobalMemPipeline::exec()
    // memory packets to DTLB
    if (!gmIssuedRequests.empty()) {
        GPUDynInstPtr mp = gmIssuedRequests.front();
-        if (mp->m_op == Enums::MO_LD ||
+        if (mp->isLoad() || mp->isAtomic()) {
            (mp->m_op >= Enums::MO_AAND && mp->m_op <= Enums::MO_AMIN) ||
            (mp->m_op >= Enums::MO_ANRAND && mp->m_op <= Enums::MO_ANRMIN)) {
            if (inflightLoads >= gmQueueSize) {
                return;
            } else {
@ -139,7 +136,7 @@ GlobalMemPipeline::exec()
        } else {
            if (inflightStores >= gmQueueSize) {
                return;
-            } else if (mp->m_op == Enums::MO_ST) {
+            } else if (mp->isStore()) {
                ++inflightStores;
            }
        }
@ -147,9 +144,8 @@ GlobalMemPipeline::exec()
        mp->initiateAcc(mp);
        gmIssuedRequests.pop();
-        DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = %s\n",
+        DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
-                computeUnit->cu_id, mp->simdId, mp->wfSlotId,
+                computeUnit->cu_id, mp->simdId, mp->wfSlotId);
                Enums::MemOpTypeStrings[mp->m_op]);
    }
 }
@ -160,12 +156,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
    Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
    // Return data to registers
-    if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+    if (m->isLoad() || m->isAtomic()) {
        gmReturnedLoads.pop();
        assert(inflightLoads > 0);
        --inflightLoads;
-        if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
+        if (m->isLoad() || m->isAtomicRet()) {
            std::vector<uint32_t> regVec;
            // iterate over number of destination register operands since
            // this is a load or atomic operation
@ -214,13 +210,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
    // Decrement outstanding register count
    computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
-    if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
+    if (m->isStore() || m->isAtomic()) {
        MO_H(m->m_op)) {
        computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time,
                                         -1);
    }
-    if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+    if (m->isLoad() || m->isAtomic()) {
        computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time,
                                         -1);
    }
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@ -41,11 +41,10 @@
 #include "gpu-compute/wavefront.hh"
 GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
-                       GPUStaticInst *_staticInst, uint64_t instSeqNum)
+                       GPUStaticInst *static_inst, uint64_t instSeqNum)
    : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
-      m_op(Enums::MO_UNDEF),
+      n_reg(0), useContinuation(false),
-      memoryOrder(Enums::MEMORY_ORDER_NONE), n_reg(0), useContinuation(false),
+      statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
      statusBitVector(0), staticInst(_staticInst), _seqNum(instSeqNum)
 {
    tlbHitLevel.assign(computeUnit()->wfSize(), -1);
    d_data = new uint8_t[computeUnit()->wfSize() * 16];
@ -68,77 +67,69 @@ GPUDynInst::~GPUDynInst()
 }
 void
-GPUDynInst::execute()
+GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
 {
-    GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(cu, wf, staticInst,
+    _staticInst->execute(gpuDynInst);
                                                            _seqNum);
    staticInst->execute(gpuDynInst);
 }
 int
 GPUDynInst::numSrcRegOperands()
 {
-    return staticInst->numSrcRegOperands();
+    return _staticInst->numSrcRegOperands();
 }
 int
 GPUDynInst::numDstRegOperands()
 {
-    return staticInst->numDstRegOperands();
+    return _staticInst->numDstRegOperands();
 }
 int
 GPUDynInst::getNumOperands()
 {
-    return staticInst->getNumOperands();
+    return _staticInst->getNumOperands();
 }
 bool
 GPUDynInst::isVectorRegister(int operandIdx)
 {
-    return staticInst->isVectorRegister(operandIdx);
+    return _staticInst->isVectorRegister(operandIdx);
 }
 bool
 GPUDynInst::isScalarRegister(int operandIdx)
 {
-    return staticInst->isScalarRegister(operandIdx);
+    return _staticInst->isScalarRegister(operandIdx);
 }
 int
 GPUDynInst::getRegisterIndex(int operandIdx)
 {
-    return staticInst->getRegisterIndex(operandIdx);
+    return _staticInst->getRegisterIndex(operandIdx);
 }
 int
 GPUDynInst::getOperandSize(int operandIdx)
 {
-    return staticInst->getOperandSize(operandIdx);
+    return _staticInst->getOperandSize(operandIdx);
 }
 bool
 GPUDynInst::isDstOperand(int operandIdx)
 {
-    return staticInst->isDstOperand(operandIdx);
+    return _staticInst->isDstOperand(operandIdx);
 }
 bool
 GPUDynInst::isSrcOperand(int operandIdx)
 {
-    return staticInst->isSrcOperand(operandIdx);
+    return _staticInst->isSrcOperand(operandIdx);
 }
 bool
 GPUDynInst::isArgLoad()
 {
    return staticInst->isArgLoad();
 }
 const std::string&
 GPUDynInst::disassemble() const
 {
-    return staticInst->disassemble();
+    return _staticInst->disassemble();
 }
 uint64_t
@ -147,16 +138,10 @@ GPUDynInst::seqNum() const
    return _seqNum;
 }
 Enums::OpType
 GPUDynInst::opType()
 {
    return staticInst->o_type;
 }
 Enums::StorageClassType
 GPUDynInst::executedAs()
 {
-    return staticInst->executed_as;
+    return _staticInst->executed_as;
 }
 // Process a memory instruction and (if necessary) submit timing request
@ -166,20 +151,347 @@ GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
    DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
            cu->cu_id, simdId, wfSlotId, exec_mask);
-    staticInst->initiateAcc(gpuDynInst);
+    _staticInst->initiateAcc(gpuDynInst);
    time = 0;
 }
 /**
 * accessor methods for the attributes of
 * the underlying GPU static instruction
 */
 bool
-GPUDynInst::scalarOp() const
+GPUDynInst::isALU() const
 {
-    return staticInst->scalarOp();
+    return _staticInst->isALU();
 }
 bool
 GPUDynInst::isBranch() const
 {
    return _staticInst->isBranch();
 }
 bool
 GPUDynInst::isNop() const
 {
    return _staticInst->isNop();
 }
 bool
 GPUDynInst::isReturn() const
 {
    return _staticInst->isReturn();
 }
 bool
 GPUDynInst::isUnconditionalJump() const
 {
    return _staticInst->isUnconditionalJump();
 }
 bool
 GPUDynInst::isSpecialOp() const
 {
    return _staticInst->isSpecialOp();
 }
 bool
 GPUDynInst::isWaitcnt() const
 {
    return _staticInst->isWaitcnt();
 }
 bool
 GPUDynInst::isBarrier() const
 {
    return _staticInst->isBarrier();
 }
 bool
 GPUDynInst::isMemFence() const
 {
    return _staticInst->isMemFence();
 }
 bool
 GPUDynInst::isMemRef() const
 {
    return _staticInst->isMemRef();
 }
 bool
 GPUDynInst::isFlat() const
 {
    return _staticInst->isFlat();
 }
 bool
 GPUDynInst::isLoad() const
 {
    return _staticInst->isLoad();
 }
 bool
 GPUDynInst::isStore() const
 {
    return _staticInst->isStore();
 }
 bool
 GPUDynInst::isAtomic() const
 {
    return _staticInst->isAtomic();
 }
 bool
 GPUDynInst::isAtomicNoRet() const
 {
    return _staticInst->isAtomicNoRet();
 }
 bool
 GPUDynInst::isAtomicRet() const
 {
    return _staticInst->isAtomicRet();
 }
 bool
 GPUDynInst::isScalar() const
 {
    return _staticInst->isScalar();
 }
 bool
 GPUDynInst::readsSCC() const
 {
    return _staticInst->readsSCC();
 }
 bool
 GPUDynInst::writesSCC() const
 {
    return _staticInst->writesSCC();
 }
 bool
 GPUDynInst::readsVCC() const
 {
    return _staticInst->readsVCC();
 }
 bool
 GPUDynInst::writesVCC() const
 {
    return _staticInst->writesVCC();
 }
 bool
 GPUDynInst::isAtomicAnd() const
 {
    return _staticInst->isAtomicAnd();
 }
 bool
 GPUDynInst::isAtomicOr() const
 {
    return _staticInst->isAtomicOr();
 }
 bool
 GPUDynInst::isAtomicXor() const
 {
    return _staticInst->isAtomicXor();
 }
 bool
 GPUDynInst::isAtomicCAS() const
 {
    return _staticInst->isAtomicCAS();
 }
 bool GPUDynInst::isAtomicExch() const
 {
    return _staticInst->isAtomicExch();
 }
 bool
 GPUDynInst::isAtomicAdd() const
 {
    return _staticInst->isAtomicAdd();
 }
 bool
 GPUDynInst::isAtomicSub() const
 {
    return _staticInst->isAtomicSub();
 }
 bool
 GPUDynInst::isAtomicInc() const
 {
    return _staticInst->isAtomicInc();
 }
 bool
 GPUDynInst::isAtomicDec() const
 {
    return _staticInst->isAtomicDec();
 }
 bool
 GPUDynInst::isAtomicMax() const
 {
    return _staticInst->isAtomicMax();
 }
 bool
 GPUDynInst::isAtomicMin() const
 {
    return _staticInst->isAtomicMin();
 }
 bool
 GPUDynInst::isArgLoad() const
 {
    return _staticInst->isArgLoad();
 }
 bool
 GPUDynInst::isGlobalMem() const
 {
    return _staticInst->isGlobalMem();
 }
 bool
 GPUDynInst::isLocalMem() const
 {
    return _staticInst->isLocalMem();
 }
 bool
 GPUDynInst::isArgSeg() const
 {
    return _staticInst->isArgSeg();
 }
 bool
 GPUDynInst::isGlobalSeg() const
 {
    return _staticInst->isGlobalSeg();
 }
 bool
 GPUDynInst::isGroupSeg() const
 {
    return _staticInst->isGroupSeg();
 }
 bool
 GPUDynInst::isKernArgSeg() const
 {
    return _staticInst->isKernArgSeg();
 }
 bool
 GPUDynInst::isPrivateSeg() const
 {
    return _staticInst->isPrivateSeg();
 }
 bool
 GPUDynInst::isReadOnlySeg() const
 {
    return _staticInst->isReadOnlySeg();
 }
 bool
 GPUDynInst::isSpillSeg() const
 {
    return _staticInst->isSpillSeg();
 }
 bool
 GPUDynInst::isWorkitemScope() const
 {
    return _staticInst->isWorkitemScope();
 }
 bool
 GPUDynInst::isWavefrontScope() const
 {
    return _staticInst->isWavefrontScope();
 }
 bool
 GPUDynInst::isWorkgroupScope() const
 {
    return _staticInst->isWorkgroupScope();
 }
 bool
 GPUDynInst::isDeviceScope() const
 {
    return _staticInst->isDeviceScope();
 }
 bool
 GPUDynInst::isSystemScope() const
 {
    return _staticInst->isSystemScope();
 }
 bool
 GPUDynInst::isNoScope() const
 {
    return _staticInst->isNoScope();
 }
 bool
 GPUDynInst::isRelaxedOrder() const
 {
    return _staticInst->isRelaxedOrder();
 }
 bool
 GPUDynInst::isAcquire() const
 {
    return _staticInst->isAcquire();
 }
 bool
 GPUDynInst::isRelease() const
 {
    return _staticInst->isRelease();
 }
 bool
 GPUDynInst::isAcquireRelease() const
 {
    return _staticInst->isAcquireRelease();
 }
 bool
 GPUDynInst::isNoOrder() const
 {
    return _staticInst->isNoOrder();
 }
 bool
 GPUDynInst::isGloballyCoherent() const
 {
    return _staticInst->isGloballyCoherent();
 }
 bool
 GPUDynInst::isSystemCoherent() const
 {
    return _staticInst->isSystemCoherent();
 }
 void
 GPUDynInst::updateStats()
 {
-    if (staticInst->isLocalMem()) {
+    if (_staticInst->isLocalMem()) {
        // access to LDS (shared) memory
        cu->dynamicLMemInstrCnt++;
    } else {
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@ -39,11 +39,7 @@
 #include <cstdint>
 #include <string>
 #include "enums/GenericMemoryOrder.hh"
 #include "enums/GenericMemoryScope.hh"
 #include "enums/MemOpType.hh"
 #include "enums/MemType.hh"
 #include "enums/OpType.hh"
 #include "enums/StorageClassType.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_exec_context.hh"
@ -180,33 +176,19 @@ class AtomicOpMin : public TypedAtomicOpFunctor<T>
    }
 };
 #define MO_A(a) ((a)>=Enums::MO_AAND && (a)<=Enums::MO_AMIN)
 #define MO_ANR(a) ((a)>=Enums::MO_ANRAND && (a)<=Enums::MO_ANRMIN)
 #define MO_H(a) ((a)>=Enums::MO_HAND && (a)<=Enums::MO_HMIN)
 typedef enum
 {
    VT_32,
    VT_64,
 } vgpr_type;
 typedef enum
 {
    SEG_PRIVATE,
    SEG_SPILL,
    SEG_GLOBAL,
    SEG_SHARED,
    SEG_READONLY,
    SEG_FLAT
 } seg_type;
 class GPUDynInst : public GPUExecContext
 {
  public:
-    GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *_staticInst,
+    GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
               uint64_t instSeqNum);
    ~GPUDynInst();
-    void execute();
+    void execute(GPUDynInstPtr gpuDynInst);
    int numSrcRegOperands();
    int numDstRegOperands();
    int getNumOperands();
@ -216,13 +198,11 @@ class GPUDynInst : public GPUExecContext
    int getOperandSize(int operandIdx);
    bool isDstOperand(int operandIdx);
    bool isSrcOperand(int operandIdx);
    bool isArgLoad();
    const std::string &disassemble() const;
    uint64_t seqNum() const;
    Enums::OpType opType();
    Enums::StorageClassType executedAs();
    // The address of the memory operation
@ -240,14 +220,7 @@ class GPUDynInst : public GPUExecContext
    // The memory type (M_U32, M_S32, ...)
    Enums::MemType m_type;
    // The memory operation (MO_LD, MO_ST, ...)
    Enums::MemOpType m_op;
    Enums::GenericMemoryOrder memoryOrder;
    // Scope of the request
    Enums::GenericMemoryScope scope;
    // The memory segment (SEG_SHARED, SEG_GLOBAL, ...)
    seg_type s_type;
    // The equivalency class
    int equiv;
    // The return VGPR type (VT_32 or VT_64)
@ -288,10 +261,72 @@ class GPUDynInst : public GPUExecContext
    void updateStats();
-    GPUStaticInst* staticInstruction() { return staticInst; }
+    GPUStaticInst* staticInstruction() { return _staticInst; }
-    // Is the instruction a scalar or vector op?
+    bool isALU() const;
-    bool scalarOp() const;
+    bool isBranch() const;
    bool isNop() const;
    bool isReturn() const;
    bool isUnconditionalJump() const;
    bool isSpecialOp() const;
    bool isWaitcnt() const;
    bool isBarrier() const;
    bool isMemFence() const;
    bool isMemRef() const;
    bool isFlat() const;
    bool isLoad() const;
    bool isStore() const;
    bool isAtomic() const;
    bool isAtomicNoRet() const;
    bool isAtomicRet() const;
    bool isScalar() const;
    bool readsSCC() const;
    bool writesSCC() const;
    bool readsVCC() const;
    bool writesVCC() const;
    bool isAtomicAnd() const;
    bool isAtomicOr() const;
    bool isAtomicXor() const;
    bool isAtomicCAS() const;
    bool isAtomicExch() const;
    bool isAtomicAdd() const;
    bool isAtomicSub() const;
    bool isAtomicInc() const;
    bool isAtomicDec() const;
    bool isAtomicMax() const;
    bool isAtomicMin() const;
    bool isArgLoad() const;
    bool isGlobalMem() const;
    bool isLocalMem() const;
    bool isArgSeg() const;
    bool isGlobalSeg() const;
    bool isGroupSeg() const;
    bool isKernArgSeg() const;
    bool isPrivateSeg() const;
    bool isReadOnlySeg() const;
    bool isSpillSeg() const;
    bool isWorkitemScope() const;
    bool isWavefrontScope() const;
    bool isWorkgroupScope() const;
    bool isDeviceScope() const;
    bool isSystemScope() const;
    bool isNoScope() const;
    bool isRelaxedOrder() const;
    bool isAcquire() const;
    bool isRelease() const;
    bool isAcquireRelease() const;
    bool isNoOrder() const;
    bool isGloballyCoherent() const;
    bool isSystemCoherent() const;
    /*
     * Loads/stores/atomics may have acquire/release semantics associated
@ -312,46 +347,32 @@ class GPUDynInst : public GPUExecContext
    bool useContinuation;
    template<typename c0> AtomicOpFunctor*
-    makeAtomicOpFunctor(c0 *reg0, c0 *reg1, Enums::MemOpType op)
+    makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
    {
-        using namespace Enums;
+        if (isAtomicAnd()) {
        switch(op) {
          case MO_AAND:
          case MO_ANRAND:
            return new AtomicOpAnd<c0>(*reg0);
-          case MO_AOR:
+        } else if (isAtomicOr()) {
          case MO_ANROR:
            return new AtomicOpOr<c0>(*reg0);
-          case MO_AXOR:
+        } else if (isAtomicXor()) {
          case MO_ANRXOR:
            return new AtomicOpXor<c0>(*reg0);
-          case MO_ACAS:
+        } else if (isAtomicCAS()) {
          case MO_ANRCAS:
            return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
-          case MO_AEXCH:
+        } else if (isAtomicExch()) {
          case MO_ANREXCH:
            return new AtomicOpExch<c0>(*reg0);
-          case MO_AADD:
+        } else if (isAtomicAdd()) {
          case MO_ANRADD:
            return new AtomicOpAdd<c0>(*reg0);
-          case MO_ASUB:
+        } else if (isAtomicSub()) {
          case MO_ANRSUB:
            return new AtomicOpSub<c0>(*reg0);
-          case MO_AINC:
+        } else if (isAtomicInc()) {
          case MO_ANRINC:
            return new AtomicOpInc<c0>();
-          case MO_ADEC:
+        } else if (isAtomicDec()) {
          case MO_ANRDEC:
            return new AtomicOpDec<c0>();
-          case MO_AMAX:
+        } else if (isAtomicMax()) {
          case MO_ANRMAX:
            return new AtomicOpMax<c0>(*reg0);
-          case MO_AMIN:
+        } else if (isAtomicMin()) {
          case MO_ANRMIN:
            return new AtomicOpMin<c0>(*reg0);
-          default:
+        } else {
-            panic("Unrecognized atomic operation");
+            fatal("Unrecognized atomic operation");
        }
    }
@ -359,88 +380,58 @@ class GPUDynInst : public GPUExecContext
    setRequestFlags(Request *req, bool setMemOrder=true)
    {
        // currently these are the easy scopes to deduce
-        switch (s_type) {
+        if (isPrivateSeg()) {
          case SEG_PRIVATE:
            req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
-            break;
+        } else if (isSpillSeg()) {
          case SEG_SPILL:
            req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
-            break;
+        } else if (isGlobalSeg()) {
          case SEG_GLOBAL:
            req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
-            break;
+        } else if (isReadOnlySeg()) {
          case SEG_READONLY:
            req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
-            break;
+        } else if (isGroupSeg()) {
          case SEG_SHARED:
            req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
-            break;
+        } else if (isFlat()) {
          case SEG_FLAT:
            // TODO: translate to correct scope
            assert(false);
-          default:
+        } else {
-            panic("Bad segment type");
+            fatal("%s has bad segment type\n", disassemble());
            break;
        }
-        switch (scope) {
+        if (isWavefrontScope()) {
          case Enums::MEMORY_SCOPE_NONE:
          case Enums::MEMORY_SCOPE_WORKITEM:
            break;
          case Enums::MEMORY_SCOPE_WAVEFRONT:
            req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
                                        Request::WAVEFRONT_SCOPE);
-            break;
+        } else if (isWorkgroupScope()) {
          case Enums::MEMORY_SCOPE_WORKGROUP:
            req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
                                        Request::WORKGROUP_SCOPE);
-            break;
+        } else if (isDeviceScope()) {
          case Enums::MEMORY_SCOPE_DEVICE:
            req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
                                        Request::DEVICE_SCOPE);
-            break;
+        } else if (isSystemScope()) {
          case Enums::MEMORY_SCOPE_SYSTEM:
            req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
                                        Request::SYSTEM_SCOPE);
-            break;
+        } else if (!isNoScope() && !isWorkitemScope()) {
-          default:
+            fatal("%s has bad scope type\n", disassemble());
            panic("Bad scope type");
            break;
        }
        if (setMemOrder) {
            // set acquire and release flags
-            switch (memoryOrder){
+            if (isAcquire()) {
              case Enums::MEMORY_ORDER_SC_ACQUIRE:
                req->setFlags(Request::ACQUIRE);
-                break;
+            } else if (isRelease()) {
              case Enums::MEMORY_ORDER_SC_RELEASE:
                req->setFlags(Request::RELEASE);
-                break;
+            } else if (isAcquireRelease()) {
              case Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE:
                req->setFlags(Request::ACQUIRE | Request::RELEASE);
-                break;
+            } else if (!isNoOrder()) {
-              default:
+                fatal("%s has bad memory order\n", disassemble());
                break;
            }
        }
        // set atomic type
        // currently, the instruction genenerator only produces atomic return
        // but a magic instruction can produce atomic no return
-        if (m_op == Enums::MO_AADD || m_op == Enums::MO_ASUB ||
+        if (isAtomicRet()) {
            m_op == Enums::MO_AAND || m_op == Enums::MO_AOR ||
            m_op == Enums::MO_AXOR || m_op == Enums::MO_AMAX ||
            m_op == Enums::MO_AMIN || m_op == Enums::MO_AINC ||
            m_op == Enums::MO_ADEC || m_op == Enums::MO_AEXCH ||
            m_op == Enums::MO_ACAS) {
            req->setFlags(Request::ATOMIC_RETURN_OP);
-        } else if (m_op == Enums::MO_ANRADD || m_op == Enums::MO_ANRSUB ||
+        } else if (isAtomicNoRet()) {
                   m_op == Enums::MO_ANRAND || m_op == Enums::MO_ANROR ||
                   m_op == Enums::MO_ANRXOR || m_op == Enums::MO_ANRMAX ||
                   m_op == Enums::MO_ANRMIN || m_op == Enums::MO_ANRINC ||
                   m_op == Enums::MO_ANRDEC || m_op == Enums::MO_ANREXCH ||
                   m_op == Enums::MO_ANRCAS) {
            req->setFlags(Request::ATOMIC_NO_RETURN_OP);
        }
    }
@ -457,7 +448,7 @@ class GPUDynInst : public GPUExecContext
    std::vector<int> tlbHitLevel;
  private:
-    GPUStaticInst *staticInst;
+    GPUStaticInst *_staticInst;
    uint64_t _seqNum;
 };
--- a/src/gpu-compute/gpu_static_inst.cc
+++ b/src/gpu-compute/gpu_static_inst.cc
@ -36,10 +36,12 @@
 #include "gpu-compute/gpu_static_inst.hh"
 GPUStaticInst::GPUStaticInst(const std::string &opcode)
-    : o_type(Enums::OT_ALU), executed_as(Enums::SC_NONE), opcode(opcode),
+    : executed_as(Enums::SC_NONE), opcode(opcode),
-      _instNum(0), _scalarOp(false)
+      _instNum(0)
 {
    setFlag(NoOrder);
 }
 const std::string&
 GPUStaticInst::disassemble()
 {
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@ -48,7 +48,7 @@
 #include <cstdint>
 #include <string>
-#include "enums/OpType.hh"
+#include "enums/GPUStaticInstFlags.hh"
 #include "enums/StorageClassType.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
 #include "gpu-compute/misc.hh"
@ -57,7 +57,7 @@ class BaseOperand;
 class BaseRegOperand;
 class Wavefront;
-class GPUStaticInst
+class GPUStaticInst : public GPUStaticInstFlags
 {
  public:
    GPUStaticInst(const std::string &opcode);
@ -86,22 +86,110 @@ class GPUStaticInst
    virtual bool isValid() const = 0;
-    /*
+    bool isALU() const { return _flags[ALU]; }
-     * Most instructions (including all HSAIL instructions)
+    bool isBranch() const { return _flags[Branch]; }
-     * are vector ops, so _scalarOp will be false by default.
+    bool isNop() const { return _flags[Nop]; }
-     * Derived instruction objects that are scalar ops must
+    bool isReturn() const { return _flags[Return]; }
     * set _scalarOp to true in their constructors.
     */
    bool scalarOp() const { return _scalarOp; }
-    virtual bool isLocalMem() const
+    bool
    isUnconditionalJump() const
    {
-        fatal("calling isLocalMem() on non-memory instruction.\n");
+        return _flags[UnconditionalJump];
        return false;
    }
-    bool isArgLoad() { return false; }
+    bool isSpecialOp() const { return _flags[SpecialOp]; }
    bool isWaitcnt() const { return _flags[Waitcnt]; }
    bool isBarrier() const { return _flags[MemBarrier]; }
    bool isMemFence() const { return _flags[MemFence]; }
    bool isMemRef() const { return _flags[MemoryRef]; }
    bool isFlat() const { return _flags[Flat]; }
    bool isLoad() const { return _flags[Load]; }
    bool isStore() const { return _flags[Store]; }
    bool
    isAtomic() const
    {
        return _flags[AtomicReturn] || _flags[AtomicNoReturn];
    }
    bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
    bool isAtomicRet() const { return _flags[AtomicReturn]; }
    bool isScalar() const { return _flags[Scalar]; }
    bool readsSCC() const { return _flags[ReadsSCC]; }
    bool writesSCC() const { return _flags[WritesSCC]; }
    bool readsVCC() const { return _flags[ReadsVCC]; }
    bool writesVCC() const { return _flags[WritesVCC]; }
    bool isAtomicAnd() const { return _flags[AtomicAnd]; }
    bool isAtomicOr() const { return _flags[AtomicOr]; }
    bool isAtomicXor() const { return _flags[AtomicXor]; }
    bool isAtomicCAS() const { return _flags[AtomicCAS]; }
    bool isAtomicExch() const { return _flags[AtomicExch]; }
    bool isAtomicAdd() const { return _flags[AtomicAdd]; }
    bool isAtomicSub() const { return _flags[AtomicSub]; }
    bool isAtomicInc() const { return _flags[AtomicInc]; }
    bool isAtomicDec() const { return _flags[AtomicDec]; }
    bool isAtomicMax() const { return _flags[AtomicMax]; }
    bool isAtomicMin() const { return _flags[AtomicMin]; }
    bool
    isArgLoad() const
    {
        return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
    }
    bool
    isGlobalMem() const
    {
        return _flags[MemoryRef] && (_flags[GlobalSegment] ||
               _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
               _flags[SpillSegment]);
    }
    bool
    isLocalMem() const
    {
        return _flags[MemoryRef] && _flags[GroupSegment];
    }
    bool isArgSeg() const { return _flags[ArgSegment]; }
    bool isGlobalSeg() const { return _flags[GlobalSegment]; }
    bool isGroupSeg() const { return _flags[GroupSegment]; }
    bool isKernArgSeg() const { return _flags[KernArgSegment]; }
    bool isPrivateSeg() const { return _flags[PrivateSegment]; }
    bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
    bool isSpillSeg() const { return _flags[SpillSegment]; }
    bool isWorkitemScope() const { return _flags[WorkitemScope]; }
    bool isWavefrontScope() const { return _flags[WavefrontScope]; }
    bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
    bool isDeviceScope() const { return _flags[DeviceScope]; }
    bool isSystemScope() const { return _flags[SystemScope]; }
    bool isNoScope() const { return _flags[NoScope]; }
    bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
    bool isAcquire() const { return _flags[Acquire]; }
    bool isRelease() const { return _flags[Release]; }
    bool isAcquireRelease() const { return _flags[AcquireRelease]; }
    bool isNoOrder() const { return _flags[NoOrder]; }
    /**
     * Coherence domain of a memory instruction. Only valid for
     * machine ISA. The coherence domain specifies where it is
     * possible to perform memory synchronization, e.g., acquire
     * or release, from the shader kernel.
     *
     * isGloballyCoherent(): returns true if kernel is sharing memory
     * with other work-items on the same device (GPU)
     *
     * isSystemCoherent(): returns true if kernel is sharing memory
     * with other work-items on a different device (GPU) or the host (CPU)
     */
    bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
    bool isSystemCoherent() const { return _flags[SystemCoherent]; }
    virtual uint32_t instSize() = 0;
    // only used for memory instructions
@ -120,22 +208,13 @@ class GPUStaticInst
    virtual uint32_t getTargetPc() { return 0; }
    /**
     * Query whether the instruction is an unconditional jump i.e., the jump
     * is always executed because there is no condition to be evaluated.
     *
     * If the instruction is not of branch type, the result is always false.
     *
     * @return True if the instruction is an unconditional jump.
     */
    virtual bool unconditionalJumpInstruction() { return false; }
    static uint64_t dynamic_id_count;
    Enums::OpType o_type;
    // For flat memory accesses
    Enums::StorageClassType executed_as;
    void setFlag(Flags flag) { _flags[flag] = true; }
  protected:
    virtual void
    execLdAcq(GPUDynInstPtr gpuDynInst)
@ -169,7 +248,45 @@ class GPUStaticInst
     */
    int _ipdInstNum;
-    bool _scalarOp;
+    std::bitset<Num_Flags> _flags;
 };
 class KernelLaunchStaticInst : public GPUStaticInst
 {
  public:
    KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
    {
        setFlag(Nop);
        setFlag(Scalar);
        setFlag(Acquire);
        setFlag(SystemScope);
        setFlag(GlobalSegment);
    }
    void
    execute(GPUDynInstPtr gpuDynInst)
    {
        fatal("kernel launch instruction should not be executed\n");
    }
    void
    generateDisassembly()
    {
        disassembly = opcode;
    }
    int getNumOperands() { return 0; }
    bool isCondRegister(int operandIndex) { return false; }
    bool isScalarRegister(int operandIndex) { return false; }
    bool isVectorRegister(int operandIndex) { return false; }
    bool isSrcOperand(int operandIndex) { return false; }
    bool isDstOperand(int operandIndex) { return false; }
    int getOperandSize(int operandIndex) { return 0; }
    int getRegisterIndex(int operandIndex) { return 0; }
    int numDstRegOperands() { return 0; }
    int numSrcRegOperands() { return 0; }
    bool isValid() const { return true; }
    uint32_t instSize() { return 0; }
 };
 #endif // __GPU_STATIC_INST_HH__
--- a/src/gpu-compute/kernel_cfg.cc
+++ b/src/gpu-compute/kernel_cfg.cc
@ -104,7 +104,7 @@ ControlFlowInfo::createBasicBlocks()
    leaders.insert(0);
    for (int i = 1; i < instructions.size(); i++) {
        GPUStaticInst* instruction = instructions[i];
-        if (instruction->o_type == Enums::OT_BRANCH) {
+        if (instruction->isBranch()) {
            const int target_pc = instruction->getTargetPc();
            leaders.insert(target_pc);
            leaders.insert(i + 1);
@ -137,18 +137,18 @@ ControlFlowInfo::connectBasicBlocks()
            break;
        }
        GPUStaticInst* last = lastInstruction(bb.get());
-        if (last->o_type == Enums::OT_RET) {
+        if (last->isReturn()) {
            bb->successorIds.insert(exit_bb->id);
            continue;
        }
-        if (last->o_type == Enums::OT_BRANCH) {
+        if (last->isBranch()) {
            const uint32_t target_pc = last->getTargetPc();
            BasicBlock* target_bb = basicBlock(target_pc);
            bb->successorIds.insert(target_bb->id);
        }
        // Unconditional jump instructions have a unique successor
-        if (!last->unconditionalJumpInstruction()) {
+        if (!last->isUnconditionalJump()) {
            BasicBlock* next_bb = basicBlock(last->instNum() + 1);
            bb->successorIds.insert(next_bb->id);
        }
@ -274,7 +274,7 @@ ControlFlowInfo::printBasicBlocks() const
        int inst_num = inst->instNum();
        std::cout << inst_num << " [" << basicBlock(inst_num)->id
                << "]: " << inst->disassemble();
-        if (inst->o_type == Enums::OT_BRANCH) {
+        if (inst->isBranch()) {
            std::cout << ", PC = " << inst->getTargetPc();
        }
        std::cout << std::endl;
--- a/src/gpu-compute/lds_state.cc
+++ b/src/gpu-compute/lds_state.cc
@ -141,8 +141,7 @@ LdsState::countBankConflicts(GPUDynInstPtr gpuDynInst,
            }
        }
-        if (gpuDynInst->m_op == Enums::MO_LD ||
+        if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
            gpuDynInst->m_op == Enums::MO_ST) {
            // mask identical addresses
            for (int j = 0; j < numBanks; ++j) {
                for (int j0 = 0; j0 < j; j0++) {
@ -208,8 +207,8 @@ LdsState::processPacket(PacketPtr packet)
    GPUDynInstPtr dynInst = getDynInstr(packet);
    // account for the LDS bank conflict overhead
-    int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() :
+    int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
-        (dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() :
+        (dynInst->isStore()) ? parent->storeBusLength() :
        parent->loadBusLength();
    // delay for accessing the LDS
    Tick processingTime =
--- a/src/gpu-compute/lds_state.hh
+++ b/src/gpu-compute/lds_state.hh
@ -43,7 +43,6 @@
 #include <utility>
 #include <vector>
 #include "enums/MemOpType.hh"
 #include "enums/MemType.hh"
 #include "gpu-compute/misc.hh"
 #include "mem/mem_object.hh"
--- a/src/gpu-compute/local_memory_pipeline.cc
+++ b/src/gpu-compute/local_memory_pipeline.cc
@ -62,7 +62,7 @@ LocalMemPipeline::exec()
        lmReturnedRequests.front() : nullptr;
    bool accessVrf = true;
-    if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
+    if ((m) && (m->isLoad() || m->isAtomicRet())) {
        Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
        accessVrf =
@ -137,7 +137,7 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
    Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
    // Return data to registers
-    if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
+    if (m->isLoad() || m->isAtomicRet()) {
        std::vector<uint32_t> regVec;
        for (int k = 0; k < m->n_reg; ++k) {
            int dst = m->dst_reg+k;
@ -172,13 +172,12 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
    // Decrement outstanding request count
    computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
-    if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op)
+    if (m->isStore() || m->isAtomic()) {
        || MO_H(m->m_op)) {
        computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
                                         m->time, -1);
    }
-    if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+    if (m->isLoad() || m->isAtomic()) {
        computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
                                         m->time, -1);
    }
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@ -47,7 +47,6 @@
 #include "cpu/simple_thread.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/thread_state.hh"
 #include "enums/MemOpType.hh"
 #include "enums/MemType.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_tlb.hh"
--- a/src/gpu-compute/vector_register_file.cc
+++ b/src/gpu-compute/vector_register_file.cc
@ -38,7 +38,6 @@
 #include <string>
 #include "base/misc.hh"
 #include "gpu-compute/code_enums.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
 #include "gpu-compute/shader.hh"
@ -153,8 +152,8 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
 void
 VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w)
 {
-    bool loadInstr = IS_OT_READ(ii->opType());
+    bool loadInstr = ii->isLoad();
-    bool atomicInstr = IS_OT_ATOMIC(ii->opType());
+    bool atomicInstr = ii->isAtomic() || ii->isMemFence();
    bool loadNoArgInstr = loadInstr && !ii->isArgLoad();
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@ -37,7 +37,6 @@
 #include "debug/GPUExec.hh"
 #include "debug/WavefrontStack.hh"
 #include "gpu-compute/code_enums.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
 #include "gpu-compute/shader.hh"
@ -165,19 +164,8 @@ Wavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr)
 bool
 Wavefront::isGmInstruction(GPUDynInstPtr ii)
 {
-    if (IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
+    if (ii->isGlobalMem() || ii->isFlat())
        IS_OT_ATOMIC_PM(ii->opType())) {
        return true;
    }
    if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
        IS_OT_ATOMIC_GM(ii->opType())) {
        return true;
    }
    if (IS_OT_FLAT(ii->opType())) {
        return true;
    }
    return false;
 }
@ -185,8 +173,7 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
 bool
 Wavefront::isLmInstruction(GPUDynInstPtr ii)
 {
-    if (IS_OT_READ_LM(ii->opType()) || IS_OT_WRITE_LM(ii->opType()) ||
+    if (ii->isLocalMem()) {
        IS_OT_ATOMIC_LM(ii->opType())) {
        return true;
    }
@ -199,10 +186,9 @@ Wavefront::isOldestInstALU()
    assert(!instructionBuffer.empty());
    GPUDynInstPtr ii = instructionBuffer.front();
-    if (status != S_STOPPED && (ii->opType() == Enums::OT_NOP ||
+    if (status != S_STOPPED && (ii->isNop() ||
-        ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
+        ii->isReturn() || ii->isBranch() ||
-        ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
+        ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) {
        ii->opType() == Enums::OT_KERN_READ)) {
        return true;
    }
@ -215,7 +201,7 @@ Wavefront::isOldestInstBarrier()
    assert(!instructionBuffer.empty());
    GPUDynInstPtr ii = instructionBuffer.front();
-    if (status != S_STOPPED && ii->opType() == Enums::OT_BARRIER) {
+    if (status != S_STOPPED && ii->isBarrier()) {
        return true;
    }
@ -228,9 +214,7 @@ Wavefront::isOldestInstGMem()
    assert(!instructionBuffer.empty());
    GPUDynInstPtr ii = instructionBuffer.front();
-    if (status != S_STOPPED && (IS_OT_READ_GM(ii->opType()) ||
+    if (status != S_STOPPED && ii->isGlobalMem()) {
        IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
        return true;
    }
@ -243,9 +227,7 @@ Wavefront::isOldestInstLMem()
    assert(!instructionBuffer.empty());
    GPUDynInstPtr ii = instructionBuffer.front();
-    if (status != S_STOPPED && (IS_OT_READ_LM(ii->opType()) ||
+    if (status != S_STOPPED && ii->isLocalMem()) {
        IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
        return true;
    }
@ -258,9 +240,7 @@ Wavefront::isOldestInstPrivMem()
    assert(!instructionBuffer.empty());
    GPUDynInstPtr ii = instructionBuffer.front();
-    if (status != S_STOPPED && (IS_OT_READ_PM(ii->opType()) ||
+    if (status != S_STOPPED && ii->isPrivateSeg()) {
        IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
        return true;
    }
@ -273,8 +253,7 @@ Wavefront::isOldestInstFlatMem()
    assert(!instructionBuffer.empty());
    GPUDynInstPtr ii = instructionBuffer.front();
-    if (status != S_STOPPED && IS_OT_FLAT(ii->opType())) {
+    if (status != S_STOPPED && ii->isFlat()) {
        return true;
    }
@ -289,7 +268,7 @@ Wavefront::instructionBufferHasBranch()
    for (auto it : instructionBuffer) {
        GPUDynInstPtr ii = it;
-        if (ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH) {
+        if (ii->isReturn() || ii->isBranch()) {
            return true;
        }
    }
@ -371,23 +350,16 @@ Wavefront::ready(itype_e type)
    // checking readiness will be fixed eventually.  In the meantime, let's
    // make sure that we do not silently let an instruction type slip
    // through this logic and always return not ready.
-    if (!(ii->opType() == Enums::OT_BARRIER || ii->opType() == Enums::OT_NOP ||
+    if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
-          ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
+        ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
-          ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
+        ii->isMemFence() || ii->isFlat())) {
          ii->opType() == Enums::OT_KERN_READ ||
          ii->opType() == Enums::OT_ARG ||
          IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
          IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_READ_LM(ii->opType()) ||
          IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
          IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
          IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_FLAT(ii->opType()))) {
        panic("next instruction: %s is of unknown type\n", ii->disassemble());
    }
    DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
            computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
-    if (type == I_ALU && ii->opType() == Enums::OT_BARRIER) {
+    if (type == I_ALU && ii->isBarrier()) {
        // Here for ALU instruction (barrier)
        if (!computeUnit->wfWait[simdId].prerdy()) {
            // Is wave slot free?
@ -400,7 +372,7 @@ Wavefront::ready(itype_e type)
        }
        ready_inst = true;
-    } else if (type == I_ALU && ii->opType() == Enums::OT_NOP) {
+    } else if (type == I_ALU && ii->isNop()) {
        // Here for ALU instruction (nop)
        if (!computeUnit->wfWait[simdId].prerdy()) {
            // Is wave slot free?
@ -408,7 +380,7 @@ Wavefront::ready(itype_e type)
        }
        ready_inst = true;
-    } else if (type == I_ALU && ii->opType() == Enums::OT_RET) {
+    } else if (type == I_ALU && ii->isReturn()) {
        // Here for ALU instruction (return)
        if (!computeUnit->wfWait[simdId].prerdy()) {
            // Is wave slot free?
@ -421,10 +393,10 @@ Wavefront::ready(itype_e type)
        }
        ready_inst = true;
-    } else if (type == I_ALU && (ii->opType() == Enums::OT_BRANCH ||
+    } else if (type == I_ALU && (ii->isBranch() ||
-               ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
+               ii->isALU() ||
-               ii->opType() == Enums::OT_KERN_READ ||
+               (ii->isKernArgSeg() && ii->isLoad()) ||
-               ii->opType() == Enums::OT_ARG)) {
+               ii->isArgSeg())) {
        // Here for ALU instruction (all others)
        if (!computeUnit->wfWait[simdId].prerdy()) {
            // Is alu slot free?
@ -439,18 +411,16 @@ Wavefront::ready(itype_e type)
            return 0;
        }
        ready_inst = true;
-    } else if (type == I_GLOBAL && (IS_OT_READ_GM(ii->opType()) ||
+    } else if (type == I_GLOBAL && ii->isGlobalMem()) {
               IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
        // Here Global memory instruction
-        if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
+        if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
            // Are there in pipe or outstanding global memory write requests?
            if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
                return 0;
            }
        }
-        if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
+        if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
            IS_OT_HIST_GM(ii->opType())) {
            // Are there in pipe or outstanding global memory read requests?
            if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
                return 0;
@ -480,17 +450,15 @@ Wavefront::ready(itype_e type)
            return 0;
        }
        ready_inst = true;
-    } else if (type == I_SHARED && (IS_OT_READ_LM(ii->opType()) ||
+    } else if (type == I_SHARED && ii->isLocalMem()) {
               IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
        // Here for Shared memory instruction
-        if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
+        if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
            if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
                return 0;
            }
        }
-        if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
+        if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
            IS_OT_HIST_LM(ii->opType())) {
            if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
                return 0;
            }
@ -519,47 +487,7 @@ Wavefront::ready(itype_e type)
            return 0;
        }
        ready_inst = true;
-    } else if (type == I_PRIVATE && (IS_OT_READ_PM(ii->opType()) ||
+    } else if (type == I_FLAT && ii->isFlat()) {
               IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
        // Here for Private memory instruction ------------------------    //
        if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
            if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
                return 0;
            }
        }
        if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
            IS_OT_HIST_PM(ii->opType())) {
            if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
                return 0;
            }
        }
        if (!glbMemBusRdy) {
            // Is there an available VRF->Global memory read bus?
            return 0;
        }
        if (!glbMemIssueRdy) {
             // Is wave slot free?
            return 0;
        }
        if (!computeUnit->globalMemoryPipe.
            isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
            // Can we insert a new request to the Global Mem Request FIFO?
            return 0;
        }
        // can we schedule source & destination operands on the VRF?
        if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
                    VrfAccessType::RD_WR)) {
            return 0;
        }
        if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
            return 0;
        }
        ready_inst = true;
    } else if (type == I_FLAT && IS_OT_FLAT(ii->opType())) {
        if (!glbMemBusRdy) {
            // Is there an available VRF->Global memory read bus?
            return 0;
@ -618,23 +546,22 @@ Wavefront::updateResources()
    assert(ii);
    computeUnit->vrf[simdId]->updateResources(this, ii);
    // Single precision ALU or Branch or Return or Special instruction
-    if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
+    if (ii->isALU() || ii->isSpecialOp() ||
-        ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
+        ii->isBranch() ||
        // FIXME: Kernel argument loads are currently treated as ALU operations
        // since we don't send memory packets at execution. If we fix that then
        // we should map them to one of the memory pipelines
-        ii->opType()==Enums::OT_KERN_READ ||
+        (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
-        ii->opType()==Enums::OT_ARG ||
+        ii->isReturn()) {
        ii->opType()==Enums::OT_RET) {
        computeUnit->aluPipe[simdId].preset(computeUnit->shader->
                                            ticks(computeUnit->spBypassLength()));
        // this is to enforce a fixed number of cycles per issue slot per SIMD
        computeUnit->wfWait[simdId].preset(computeUnit->shader->
                                           ticks(computeUnit->issuePeriod));
-    } else if (ii->opType() == Enums::OT_BARRIER) {
+    } else if (ii->isBarrier()) {
        computeUnit->wfWait[simdId].preset(computeUnit->shader->
                                           ticks(computeUnit->issuePeriod));
-    } else if (ii->opType() == Enums::OT_FLAT_READ) {
+    } else if (ii->isLoad() && ii->isFlat()) {
        assert(Enums::SC_NONE != ii->executedAs());
        memReqsInPipe++;
        rdGmReqsInPipe++;
@ -649,7 +576,7 @@ Wavefront::updateResources()
            computeUnit->wfWait[computeUnit->GlbMemUnitId()].
                preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
        }
-    } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
+    } else if (ii->isStore() && ii->isFlat()) {
        assert(Enums::SC_NONE != ii->executedAs());
        memReqsInPipe++;
        wrGmReqsInPipe++;
@ -664,21 +591,21 @@ Wavefront::updateResources()
            computeUnit->wfWait[computeUnit->GlbMemUnitId()].
                preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
        }
-    } else if (IS_OT_READ_GM(ii->opType())) {
+    } else if (ii->isLoad() && ii->isGlobalMem()) {
        memReqsInPipe++;
        rdGmReqsInPipe++;
        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
            preset(computeUnit->shader->ticks(4));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_GM(ii->opType())) {
+    } else if (ii->isStore() && ii->isGlobalMem()) {
        memReqsInPipe++;
        wrGmReqsInPipe++;
        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
            preset(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_GM(ii->opType())) {
+    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
        memReqsInPipe++;
        wrGmReqsInPipe++;
        rdGmReqsInPipe++;
@ -686,21 +613,21 @@ Wavefront::updateResources()
            preset(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_READ_LM(ii->opType())) {
+    } else if (ii->isLoad() && ii->isLocalMem()) {
        memReqsInPipe++;
        rdLmReqsInPipe++;
        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
            preset(computeUnit->shader->ticks(4));
        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_LM(ii->opType())) {
+    } else if (ii->isStore() && ii->isLocalMem()) {
        memReqsInPipe++;
        wrLmReqsInPipe++;
        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
            preset(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_LM(ii->opType())) {
+    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
        memReqsInPipe++;
        wrLmReqsInPipe++;
        rdLmReqsInPipe++;
@ -708,28 +635,6 @@ Wavefront::updateResources()
            preset(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
    } else if (IS_OT_READ_PM(ii->opType())) {
        memReqsInPipe++;
        rdGmReqsInPipe++;
        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
            preset(computeUnit->shader->ticks(4));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
    } else if (IS_OT_WRITE_PM(ii->opType())) {
        memReqsInPipe++;
        wrGmReqsInPipe++;
        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
            preset(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
    } else if (IS_OT_ATOMIC_PM(ii->opType())) {
        memReqsInPipe++;
        wrGmReqsInPipe++;
        rdGmReqsInPipe++;
        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
            preset(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
    }
 }
@ -751,7 +656,7 @@ Wavefront::exec()
    DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
            "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
            ii->disassemble(), old_pc);
-    ii->execute();
+    ii->execute(ii);
    // access the VRF
    computeUnit->vrf[simdId]->exec(ii, this);
    srcRegOpDist.sample(ii->numSrcRegOperands());
@ -785,24 +690,24 @@ Wavefront::exec()
    // ---- Update Vector ALU pipeline and other resources ------------------ //
    // Single precision ALU or Branch or Return or Special instruction
-    if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
+    if (ii->isALU() || ii->isSpecialOp() ||
-        ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
+        ii->isBranch() ||
        // FIXME: Kernel argument loads are currently treated as ALU operations
        // since we don't send memory packets at execution. If we fix that then
        // we should map them to one of the memory pipelines
-        ii->opType() == Enums::OT_KERN_READ ||
+        (ii->isKernArgSeg() && ii->isLoad()) ||
-        ii->opType() == Enums::OT_ARG ||
+        ii->isArgSeg() ||
-        ii->opType() == Enums::OT_RET) {
+        ii->isReturn()) {
        computeUnit->aluPipe[simdId].set(computeUnit->shader->
                                         ticks(computeUnit->spBypassLength()));
        // this is to enforce a fixed number of cycles per issue slot per SIMD
        computeUnit->wfWait[simdId].set(computeUnit->shader->
                                        ticks(computeUnit->issuePeriod));
-    } else if (ii->opType() == Enums::OT_BARRIER) {
+    } else if (ii->isBarrier()) {
        computeUnit->wfWait[simdId].set(computeUnit->shader->
                                        ticks(computeUnit->issuePeriod));
-    } else if (ii->opType() == Enums::OT_FLAT_READ) {
+    } else if (ii->isLoad() && ii->isFlat()) {
        assert(Enums::SC_NONE != ii->executedAs());
        if (Enums::SC_SHARED == ii->executedAs()) {
@ -816,7 +721,7 @@ Wavefront::exec()
            computeUnit->wfWait[computeUnit->GlbMemUnitId()].
                set(computeUnit->shader->ticks(computeUnit->issuePeriod));
        }
-    } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
+    } else if (ii->isStore() && ii->isFlat()) {
        assert(Enums::SC_NONE != ii->executedAs());
        if (Enums::SC_SHARED == ii->executedAs()) {
            computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
@ -829,32 +734,32 @@ Wavefront::exec()
            computeUnit->wfWait[computeUnit->GlbMemUnitId()].
                set(computeUnit->shader->ticks(computeUnit->issuePeriod));
        }
-    } else if (IS_OT_READ_GM(ii->opType())) {
+    } else if (ii->isLoad() && ii->isGlobalMem()) {
        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
            set(computeUnit->shader->ticks(4));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_GM(ii->opType())) {
+    } else if (ii->isStore() && ii->isGlobalMem()) {
        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
            set(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_GM(ii->opType())) {
+    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
            set(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_READ_LM(ii->opType())) {
+    } else if (ii->isLoad() && ii->isLocalMem()) {
        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
            set(computeUnit->shader->ticks(4));
        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_LM(ii->opType())) {
+    } else if (ii->isStore() && ii->isLocalMem()) {
        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
            set(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_LM(ii->opType())) {
+    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
            set(computeUnit->shader->ticks(8));
        computeUnit->wfWait[computeUnit->ShrMemUnitId()].