2016-01-19 20:28:22 +01:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* For use for simulation and test purposes only
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
*
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
|
|
* and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* 3. Neither the name of the copyright holder nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* Author: Anthony Gutierrez
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __GPU_DYN_INST_HH__
|
|
|
|
#define __GPU_DYN_INST_HH__
|
|
|
|
|
|
|
|
#include <cstdint>
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
#include "enums/MemType.hh"
|
|
|
|
#include "enums/StorageClassType.hh"
|
|
|
|
#include "gpu-compute/compute_unit.hh"
|
|
|
|
#include "gpu-compute/gpu_exec_context.hh"
|
|
|
|
|
|
|
|
class GPUStaticInst;
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpAnd : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T a;
|
|
|
|
|
|
|
|
AtomicOpAnd(T _a) : a(_a) { }
|
|
|
|
void execute(T *b) { *b &= a; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpOr : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T a;
|
|
|
|
AtomicOpOr(T _a) : a(_a) { }
|
|
|
|
void execute(T *b) { *b |= a; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpXor : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T a;
|
|
|
|
AtomicOpXor(T _a) : a(_a) {}
|
|
|
|
void execute(T *b) { *b ^= a; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpCAS : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T c;
|
|
|
|
T s;
|
|
|
|
|
|
|
|
ComputeUnit *computeUnit;
|
|
|
|
|
|
|
|
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
|
|
|
|
: c(_c), s(_s), computeUnit(compute_unit) { }
|
|
|
|
|
|
|
|
void
|
|
|
|
execute(T *b)
|
|
|
|
{
|
|
|
|
computeUnit->numCASOps++;
|
|
|
|
|
|
|
|
if (*b == c) {
|
|
|
|
*b = s;
|
|
|
|
} else {
|
|
|
|
computeUnit->numFailedCASOps++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (computeUnit->xact_cas_mode) {
|
|
|
|
computeUnit->xactCasLoadMap.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpExch : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T a;
|
|
|
|
AtomicOpExch(T _a) : a(_a) { }
|
|
|
|
void execute(T *b) { *b = a; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpAdd : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T a;
|
|
|
|
AtomicOpAdd(T _a) : a(_a) { }
|
|
|
|
void execute(T *b) { *b += a; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpSub : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T a;
|
|
|
|
AtomicOpSub(T _a) : a(_a) { }
|
|
|
|
void execute(T *b) { *b -= a; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpInc : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
AtomicOpInc() { }
|
|
|
|
void execute(T *b) { *b += 1; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpDec : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
AtomicOpDec() {}
|
|
|
|
void execute(T *b) { *b -= 1; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpMax : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T a;
|
|
|
|
AtomicOpMax(T _a) : a(_a) { }
|
|
|
|
|
|
|
|
void
|
|
|
|
execute(T *b)
|
|
|
|
{
|
|
|
|
if (a > *b)
|
|
|
|
*b = a;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
class AtomicOpMin : public TypedAtomicOpFunctor<T>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
T a;
|
|
|
|
AtomicOpMin(T _a) : a(_a) {}
|
|
|
|
|
|
|
|
void
|
|
|
|
execute(T *b)
|
|
|
|
{
|
|
|
|
if (a < *b)
|
|
|
|
*b = a;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
VT_32,
|
|
|
|
VT_64,
|
|
|
|
} vgpr_type;
|
|
|
|
|
|
|
|
class GPUDynInst : public GPUExecContext
|
|
|
|
{
|
|
|
|
public:
|
2016-10-27 04:47:11 +02:00
|
|
|
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
|
2016-01-19 20:28:22 +01:00
|
|
|
uint64_t instSeqNum);
|
2016-06-09 17:24:55 +02:00
|
|
|
~GPUDynInst();
|
2016-10-27 04:47:11 +02:00
|
|
|
void execute(GPUDynInstPtr gpuDynInst);
|
2016-01-19 20:28:22 +01:00
|
|
|
int numSrcRegOperands();
|
|
|
|
int numDstRegOperands();
|
|
|
|
int getNumOperands();
|
|
|
|
bool isVectorRegister(int operandIdx);
|
|
|
|
bool isScalarRegister(int operandIdx);
|
2016-10-27 04:47:49 +02:00
|
|
|
bool isCondRegister(int operandIdx);
|
|
|
|
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
|
2016-01-19 20:28:22 +01:00
|
|
|
int getOperandSize(int operandIdx);
|
|
|
|
bool isDstOperand(int operandIdx);
|
|
|
|
bool isSrcOperand(int operandIdx);
|
|
|
|
|
|
|
|
const std::string &disassemble() const;
|
|
|
|
|
|
|
|
uint64_t seqNum() const;
|
|
|
|
|
|
|
|
Enums::StorageClassType executedAs();
|
|
|
|
|
|
|
|
// The address of the memory operation
|
2016-06-09 17:24:55 +02:00
|
|
|
std::vector<Addr> addr;
|
2016-01-19 20:28:22 +01:00
|
|
|
Addr pAddr;
|
|
|
|
|
|
|
|
// The data to get written
|
2016-06-09 17:24:55 +02:00
|
|
|
uint8_t *d_data;
|
2016-01-19 20:28:22 +01:00
|
|
|
// Additional data (for atomics)
|
2016-06-09 17:24:55 +02:00
|
|
|
uint8_t *a_data;
|
2016-01-19 20:28:22 +01:00
|
|
|
// Additional data (for atomics)
|
2016-06-09 17:24:55 +02:00
|
|
|
uint8_t *x_data;
|
2016-01-19 20:28:22 +01:00
|
|
|
// The execution mask
|
|
|
|
VectorMask exec_mask;
|
|
|
|
|
|
|
|
// The memory type (M_U32, M_S32, ...)
|
|
|
|
Enums::MemType m_type;
|
2016-10-27 04:47:11 +02:00
|
|
|
|
2016-01-19 20:28:22 +01:00
|
|
|
// The equivalency class
|
|
|
|
int equiv;
|
|
|
|
// The return VGPR type (VT_32 or VT_64)
|
|
|
|
vgpr_type v_type;
|
|
|
|
// Number of VGPR's accessed (1, 2, or 4)
|
|
|
|
int n_reg;
|
|
|
|
// The return VGPR index
|
|
|
|
int dst_reg;
|
|
|
|
// There can be max 4 dest regs>
|
|
|
|
int dst_reg_vec[4];
|
|
|
|
// SIMD where the WF of the memory instruction has been mapped to
|
|
|
|
int simdId;
|
|
|
|
// unique id of the WF where the memory instruction belongs to
|
|
|
|
int wfDynId;
|
|
|
|
// The kernel id of the requesting wf
|
|
|
|
int kern_id;
|
|
|
|
// The CU id of the requesting wf
|
|
|
|
int cu_id;
|
|
|
|
// HW slot id where the WF is mapped to inside a SIMD unit
|
|
|
|
int wfSlotId;
|
|
|
|
// execution pipeline id where the memory instruction has been scheduled
|
|
|
|
int pipeId;
|
|
|
|
// The execution time of this operation
|
|
|
|
Tick time;
|
|
|
|
// The latency of this operation
|
|
|
|
WaitClass latency;
|
|
|
|
// A list of bank conflicts for the 4 cycles.
|
|
|
|
uint32_t bc[4];
|
|
|
|
|
|
|
|
// A pointer to ROM
|
|
|
|
uint8_t *rom;
|
|
|
|
// The size of the READONLY segment
|
|
|
|
int sz_rom;
|
|
|
|
|
|
|
|
// Initiate the specified memory operation, by creating a
|
|
|
|
// memory request and sending it off to the memory system.
|
|
|
|
void initiateAcc(GPUDynInstPtr gpuDynInst);
|
2016-10-27 04:47:19 +02:00
|
|
|
// Complete the specified memory operation, by writing
|
|
|
|
// value back to the RF in the case of a load or atomic
|
|
|
|
// return or, in the case of a store, we do nothing
|
|
|
|
void completeAcc(GPUDynInstPtr gpuDynInst);
|
2016-01-19 20:28:22 +01:00
|
|
|
|
|
|
|
void updateStats();
|
|
|
|
|
2016-10-27 04:47:11 +02:00
|
|
|
GPUStaticInst* staticInstruction() { return _staticInst; }
|
|
|
|
|
|
|
|
bool isALU() const;
|
|
|
|
bool isBranch() const;
|
|
|
|
bool isNop() const;
|
|
|
|
bool isReturn() const;
|
|
|
|
bool isUnconditionalJump() const;
|
|
|
|
bool isSpecialOp() const;
|
|
|
|
bool isWaitcnt() const;
|
|
|
|
|
|
|
|
bool isBarrier() const;
|
|
|
|
bool isMemFence() const;
|
|
|
|
bool isMemRef() const;
|
|
|
|
bool isFlat() const;
|
|
|
|
bool isLoad() const;
|
|
|
|
bool isStore() const;
|
|
|
|
|
|
|
|
bool isAtomic() const;
|
|
|
|
bool isAtomicNoRet() const;
|
|
|
|
bool isAtomicRet() const;
|
|
|
|
|
|
|
|
bool isScalar() const;
|
|
|
|
bool readsSCC() const;
|
|
|
|
bool writesSCC() const;
|
|
|
|
bool readsVCC() const;
|
|
|
|
bool writesVCC() const;
|
|
|
|
|
|
|
|
bool isAtomicAnd() const;
|
|
|
|
bool isAtomicOr() const;
|
|
|
|
bool isAtomicXor() const;
|
|
|
|
bool isAtomicCAS() const;
|
|
|
|
bool isAtomicExch() const;
|
|
|
|
bool isAtomicAdd() const;
|
|
|
|
bool isAtomicSub() const;
|
|
|
|
bool isAtomicInc() const;
|
|
|
|
bool isAtomicDec() const;
|
|
|
|
bool isAtomicMax() const;
|
|
|
|
bool isAtomicMin() const;
|
|
|
|
|
|
|
|
bool isArgLoad() const;
|
|
|
|
bool isGlobalMem() const;
|
|
|
|
bool isLocalMem() const;
|
|
|
|
|
|
|
|
bool isArgSeg() const;
|
|
|
|
bool isGlobalSeg() const;
|
|
|
|
bool isGroupSeg() const;
|
|
|
|
bool isKernArgSeg() const;
|
|
|
|
bool isPrivateSeg() const;
|
|
|
|
bool isReadOnlySeg() const;
|
|
|
|
bool isSpillSeg() const;
|
|
|
|
|
|
|
|
bool isWorkitemScope() const;
|
|
|
|
bool isWavefrontScope() const;
|
|
|
|
bool isWorkgroupScope() const;
|
|
|
|
bool isDeviceScope() const;
|
|
|
|
bool isSystemScope() const;
|
|
|
|
bool isNoScope() const;
|
|
|
|
|
|
|
|
bool isRelaxedOrder() const;
|
|
|
|
bool isAcquire() const;
|
|
|
|
bool isRelease() const;
|
|
|
|
bool isAcquireRelease() const;
|
|
|
|
bool isNoOrder() const;
|
|
|
|
|
|
|
|
bool isGloballyCoherent() const;
|
|
|
|
bool isSystemCoherent() const;
|
2016-01-19 20:28:22 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Loads/stores/atomics may have acquire/release semantics associated
|
|
|
|
* withthem. Some protocols want to see the acquire/release as separate
|
|
|
|
* requests from the load/store/atomic. We implement that separation
|
|
|
|
* using continuations (i.e., a function pointer with an object associated
|
|
|
|
* with it). When, for example, the front-end generates a store with
|
|
|
|
* release semantics, we will first issue a normal store and set the
|
|
|
|
* continuation in the GPUDynInst to a function that generate a
|
|
|
|
* release request. That continuation will be called when the normal
|
|
|
|
* store completes (in ComputeUnit::DataPort::recvTimingResponse). The
|
|
|
|
* continuation will be called in the context of the same GPUDynInst
|
|
|
|
* that generated the initial store.
|
|
|
|
*/
|
|
|
|
std::function<void(GPUStaticInst*, GPUDynInstPtr)> execContinuation;
|
|
|
|
|
|
|
|
// when true, call execContinuation when response arrives
|
|
|
|
bool useContinuation;
|
|
|
|
|
|
|
|
template<typename c0> AtomicOpFunctor*
|
2016-10-27 04:47:11 +02:00
|
|
|
makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
|
2016-01-19 20:28:22 +01:00
|
|
|
{
|
2016-10-27 04:47:11 +02:00
|
|
|
if (isAtomicAnd()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpAnd<c0>(*reg0);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicOr()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpOr<c0>(*reg0);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicXor()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpXor<c0>(*reg0);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicCAS()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicExch()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpExch<c0>(*reg0);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicAdd()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpAdd<c0>(*reg0);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicSub()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpSub<c0>(*reg0);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicInc()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpInc<c0>();
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicDec()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpDec<c0>();
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicMax()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpMax<c0>(*reg0);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicMin()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
return new AtomicOpMin<c0>(*reg0);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else {
|
|
|
|
fatal("Unrecognized atomic operation");
|
2016-01-19 20:28:22 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
setRequestFlags(Request *req, bool setMemOrder=true)
|
|
|
|
{
|
|
|
|
// currently these are the easy scopes to deduce
|
2016-10-27 04:47:11 +02:00
|
|
|
if (isPrivateSeg()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isSpillSeg()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isGlobalSeg()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isReadOnlySeg()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isGroupSeg()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isFlat()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
// TODO: translate to correct scope
|
|
|
|
assert(false);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else {
|
|
|
|
fatal("%s has bad segment type\n", disassemble());
|
2016-01-19 20:28:22 +01:00
|
|
|
}
|
|
|
|
|
2016-10-27 04:47:11 +02:00
|
|
|
if (isWavefrontScope()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
|
|
|
|
Request::WAVEFRONT_SCOPE);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isWorkgroupScope()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
|
|
|
|
Request::WORKGROUP_SCOPE);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isDeviceScope()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
|
|
|
|
Request::DEVICE_SCOPE);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isSystemScope()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
|
|
|
|
Request::SYSTEM_SCOPE);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (!isNoScope() && !isWorkitemScope()) {
|
|
|
|
fatal("%s has bad scope type\n", disassemble());
|
2016-01-19 20:28:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (setMemOrder) {
|
|
|
|
// set acquire and release flags
|
2016-10-27 04:47:11 +02:00
|
|
|
if (isAcquire()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setFlags(Request::ACQUIRE);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isRelease()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setFlags(Request::RELEASE);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAcquireRelease()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setFlags(Request::ACQUIRE | Request::RELEASE);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (!isNoOrder()) {
|
|
|
|
fatal("%s has bad memory order\n", disassemble());
|
2016-01-19 20:28:22 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// set atomic type
|
|
|
|
// currently, the instruction genenerator only produces atomic return
|
|
|
|
// but a magic instruction can produce atomic no return
|
2016-10-27 04:47:11 +02:00
|
|
|
if (isAtomicRet()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setFlags(Request::ATOMIC_RETURN_OP);
|
2016-10-27 04:47:11 +02:00
|
|
|
} else if (isAtomicNoRet()) {
|
2016-01-19 20:28:22 +01:00
|
|
|
req->setFlags(Request::ATOMIC_NO_RETURN_OP);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Map returned packets and the addresses they satisfy with which lane they
|
|
|
|
// were requested from
|
|
|
|
typedef std::unordered_map<Addr, std::vector<int>> StatusVector;
|
|
|
|
StatusVector memStatusVector;
|
|
|
|
|
|
|
|
// Track the status of memory requests per lane, a bit per lane
|
|
|
|
VectorMask statusBitVector;
|
|
|
|
// for ld_v# or st_v#
|
|
|
|
std::vector<int> statusVector;
|
|
|
|
std::vector<int> tlbHitLevel;
|
|
|
|
|
|
|
|
private:
|
2016-10-27 04:47:11 +02:00
|
|
|
GPUStaticInst *_staticInst;
|
2016-01-19 20:28:22 +01:00
|
|
|
uint64_t _seqNum;
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif // __GPU_DYN_INST_HH__
|