mem: support for gpu-style RMWs in ruby

This patch adds support for GPU-style read-modify-write (RMW) operations in
ruby. Such atomic operations are traditionally executed at the memory controller
(instead of through an L1 cache using cache-line locking).

Currently, this patch works by propogating operation functors through the memory
system.
This commit is contained in:
Tony Gutierrez 2016-01-19 13:57:50 -05:00
parent 34fb6b5e35
commit d658b6e1cc
6 changed files with 122 additions and 39 deletions

View file

@ -200,6 +200,19 @@ typedef std::shared_ptr<FaultBase> Fault;
constexpr decltype(nullptr) NoFault = nullptr; constexpr decltype(nullptr) NoFault = nullptr;
#endif #endif
struct AtomicOpFunctor
{
virtual void operator()(uint8_t *p) = 0;
virtual ~AtomicOpFunctor() {}
};
template <class T>
struct TypedAtomicOpFunctor : public AtomicOpFunctor
{
void operator()(uint8_t *p) { execute((T *)p); }
virtual void execute(T * p) = 0;
};
enum ByteOrder { enum ByteOrder {
BigEndianByteOrder, BigEndianByteOrder,
LittleEndianByteOrder LittleEndianByteOrder

View file

@ -341,6 +341,12 @@ AbstractMemory::access(PacketPtr pkt)
uint8_t *hostAddr = pmemAddr + pkt->getAddr() - range.start(); uint8_t *hostAddr = pmemAddr + pkt->getAddr() - range.start();
if (pkt->cmd == MemCmd::SwapReq) { if (pkt->cmd == MemCmd::SwapReq) {
if (pkt->isAtomicOp()) {
if (pmemAddr) {
memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
(*(pkt->getAtomicOp()))(hostAddr);
}
} else {
std::vector<uint8_t> overwrite_val(pkt->getSize()); std::vector<uint8_t> overwrite_val(pkt->getSize());
uint64_t condition_val64; uint64_t condition_val64;
uint32_t condition_val32; uint32_t condition_val32;
@ -374,6 +380,7 @@ AbstractMemory::access(PacketPtr pkt)
assert(!pkt->req->isInstFetch()); assert(!pkt->req->isInstFetch());
TRACE_PACKET("Read/Write"); TRACE_PACKET("Read/Write");
numOther[pkt->req->masterId()]++; numOther[pkt->req->masterId()]++;
}
} else if (pkt->isRead()) { } else if (pkt->isRead()) {
assert(!pkt->isWrite()); assert(!pkt->isWrite());
if (pkt->isLLSC()) { if (pkt->isLLSC()) {

View file

@ -660,6 +660,12 @@ class Packet : public Printable
return _isSecure; return _isSecure;
} }
/**
* Accessor function to atomic op.
*/
AtomicOpFunctor *getAtomicOp() const { return req->getAtomicOpFunctor(); }
bool isAtomicOp() const { return req->isAtomic(); }
/** /**
* It has been determined that the SC packet should successfully update * It has been determined that the SC packet should successfully update
* memory. Therefore, convert this SC packet to a normal write. * memory. Therefore, convert this SC packet to a normal write.

View file

@ -56,6 +56,7 @@ bool testAndWrite(Addr addr, DataBlock datablk, Packet *pkt);
enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent") { enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent") {
// Valid data // Valid data
Read_Only, desc="block is Read Only (modulo functional writes)"; Read_Only, desc="block is Read Only (modulo functional writes)";
Write_Only, desc="block is Write Only";
Read_Write, desc="block is Read/Write"; Read_Write, desc="block is Read/Write";
// Possibly Invalid data // Possibly Invalid data
@ -144,7 +145,9 @@ enumeration(TransitionResult, desc="...") {
enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") { enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
LD, desc="Load"; LD, desc="Load";
ST, desc="Store"; ST, desc="Store";
ATOMIC, desc="Atomic Load/Store"; ATOMIC, desc="Atomic Load/Store -- depricated. use ATOMIC_RETURN or ATOMIC_NO_RETURN";
ATOMIC_RETURN, desc="Atomic Load/Store, return data";
ATOMIC_NO_RETURN, desc="Atomic Load/Store, do not return data";
IFETCH, desc="Instruction fetch"; IFETCH, desc="Instruction fetch";
IO, desc="I/O"; IO, desc="I/O";
REPLACEMENT, desc="Replacement"; REPLACEMENT, desc="Replacement";
@ -166,6 +169,8 @@ enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL
Default, desc="Replace this with access_types passed to the DMA Ruby object"; Default, desc="Replace this with access_types passed to the DMA Ruby object";
LD, desc="Load"; LD, desc="Load";
ST, desc="Store"; ST, desc="Store";
ATOMIC, desc="Atomic Load/Store";
REPLACEMENT, desc="Replacement";
FLUSH, desc="Flush request type"; FLUSH, desc="Flush request type";
NULL, desc="Invalid request type"; NULL, desc="Invalid request type";
} }

View file

@ -126,6 +126,7 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
int Size, desc="size in bytes of access"; int Size, desc="size in bytes of access";
PrefetchBit Prefetch, desc="Is this a prefetch request"; PrefetchBit Prefetch, desc="Is this a prefetch request";
int contextId, desc="this goes away but must be replace with Nilay"; int contextId, desc="this goes away but must be replace with Nilay";
int wfid, desc="Writethrough wavefront";
HSAScope scope, desc="HSA scope"; HSAScope scope, desc="HSA scope";
HSASegment segment, desc="HSA segment"; HSASegment segment, desc="HSA segment";
} }

View file

@ -160,6 +160,11 @@ class Request
/** The request should be marked with RELEASE. */ /** The request should be marked with RELEASE. */
RELEASE = 0x00040000, RELEASE = 0x00040000,
/** The request is an atomic that returns data. */
ATOMIC_RETURN_OP = 0x40000000,
/** The request is an atomic that does not return data. */
ATOMIC_NO_RETURN_OP = 0x80000000,
/** The request should be marked with KERNEL. /** The request should be marked with KERNEL.
* Used to indicate the synchronization associated with a GPU kernel * Used to indicate the synchronization associated with a GPU kernel
* launch or completion. * launch or completion.
@ -345,6 +350,9 @@ class Request
/** Sequence number of the instruction that creates the request */ /** Sequence number of the instruction that creates the request */
InstSeqNum _reqInstSeqNum; InstSeqNum _reqInstSeqNum;
/** A pointer to an atomic operation */
AtomicOpFunctor *atomicOpFunctor;
public: public:
/** /**
@ -356,7 +364,8 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0), : _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0), _extraData(0), _contextId(0), _threadId(0), _pc(0),
_reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
accessDelta(0), depth(0)
{} {}
Request(Addr paddr, unsigned size, Flags flags, MasterID mid, Request(Addr paddr, unsigned size, Flags flags, MasterID mid,
@ -364,7 +373,8 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0), : _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0), _extraData(0), _contextId(0), _threadId(0), _pc(0),
_reqInstSeqNum(seq_num), translateDelta(0), accessDelta(0), depth(0) _reqInstSeqNum(seq_num), atomicOpFunctor(nullptr), translateDelta(0),
accessDelta(0), depth(0)
{ {
setPhys(paddr, size, flags, mid, curTick()); setPhys(paddr, size, flags, mid, curTick());
setThreadContext(cid, tid); setThreadContext(cid, tid);
@ -380,7 +390,8 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0), : _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0), _extraData(0), _contextId(0), _threadId(0), _pc(0),
_reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
accessDelta(0), depth(0)
{ {
setPhys(paddr, size, flags, mid, curTick()); setPhys(paddr, size, flags, mid, curTick());
} }
@ -389,7 +400,8 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0), : _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0), _extraData(0), _contextId(0), _threadId(0), _pc(0),
_reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
accessDelta(0), depth(0)
{ {
setPhys(paddr, size, flags, mid, time); setPhys(paddr, size, flags, mid, time);
} }
@ -398,12 +410,12 @@ class Request
Addr pc) Addr pc)
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0), : _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0), _extraData(0), _contextId(0), _threadId(0), _pc(pc),
_reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
accessDelta(0), depth(0)
{ {
setPhys(paddr, size, flags, mid, time); setPhys(paddr, size, flags, mid, time);
privateFlags.set(VALID_PC); privateFlags.set(VALID_PC);
_pc = pc;
} }
Request(int asid, Addr vaddr, unsigned size, Flags flags, MasterID mid, Request(int asid, Addr vaddr, unsigned size, Flags flags, MasterID mid,
@ -411,13 +423,27 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0), : _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0), _extraData(0), _contextId(0), _threadId(0), _pc(0),
_reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
accessDelta(0), depth(0)
{ {
setVirt(asid, vaddr, size, flags, mid, pc); setVirt(asid, vaddr, size, flags, mid, pc);
setThreadContext(cid, tid); setThreadContext(cid, tid);
} }
~Request() {} Request(int asid, Addr vaddr, int size, Flags flags, MasterID mid, Addr pc,
int cid, ThreadID tid, AtomicOpFunctor *atomic_op)
: atomicOpFunctor(atomic_op)
{
setVirt(asid, vaddr, size, flags, mid, pc);
setThreadContext(cid, tid);
}
~Request()
{
if (hasAtomicOpFunctor()) {
delete atomicOpFunctor;
}
}
/** /**
* Set up CPU and thread numbers. * Set up CPU and thread numbers.
@ -541,6 +567,22 @@ class Request
return _time; return _time;
} }
/**
* Accessor for atomic-op functor.
*/
bool
hasAtomicOpFunctor()
{
return atomicOpFunctor != NULL;
}
AtomicOpFunctor *
getAtomicOpFunctor()
{
assert(atomicOpFunctor != NULL);
return atomicOpFunctor;
}
/** Accessor for flags. */ /** Accessor for flags. */
Flags Flags
getFlags() getFlags()
@ -749,6 +791,15 @@ class Request
bool isAcquire() const { return _flags.isSet(ACQUIRE); } bool isAcquire() const { return _flags.isSet(ACQUIRE); }
bool isRelease() const { return _flags.isSet(RELEASE); } bool isRelease() const { return _flags.isSet(RELEASE); }
bool isKernel() const { return _flags.isSet(KERNEL); } bool isKernel() const { return _flags.isSet(KERNEL); }
bool isAtomicReturn() const { return _flags.isSet(ATOMIC_RETURN_OP); }
bool isAtomicNoReturn() const { return _flags.isSet(ATOMIC_NO_RETURN_OP); }
bool
isAtomic() const
{
return _flags.isSet(ATOMIC_RETURN_OP) ||
_flags.isSet(ATOMIC_NO_RETURN_OP);
}
/** /**
* Accessor functions for the memory space configuration flags and used by * Accessor functions for the memory space configuration flags and used by