From 34fb6b5e35db751f310aee824046107e57a0ba03 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Mon, 20 Jul 2015 09:15:18 -0500 Subject: [PATCH] mem: misc flags for AMD gpu model This patch add support to mark memory requests/packets with attributes defined in HSA, such as memory order and scope. --- src/mem/protocol/RubySlicc_Exports.sm | 25 +++- src/mem/protocol/RubySlicc_Types.sm | 2 + src/mem/request.hh | 131 +++++++++++++++++++- src/mem/ruby/common/DataBlock.hh | 1 - src/mem/ruby/slicc_interface/RubyRequest.hh | 70 ++++++++++- src/mem/ruby/system/RubyPort.cc | 49 +++++--- 6 files changed, 251 insertions(+), 27 deletions(-) diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm index 617989d15..6fedfeb2d 100644 --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -41,7 +41,7 @@ external_type(Tick, primitive="yes", default="0"); structure(DataBlock, external = "yes", desc="..."){ void clear(); - void copyPartial(DataBlock, int, int); + void atomicPartial(DataBlock, WriteMask); } bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt); @@ -78,6 +78,26 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent") NotPresent, desc="block is NotPresent"; Busy, desc="block is in a transient state, currently invalid"; } +//HSA scopes +enumeration(HSAScope, desc="...", default="HSAScope_UNSPECIFIED") { + UNSPECIFIED, desc="Unspecified scope"; + NOSCOPE, desc="Explictly unscoped"; + WAVEFRONT, desc="Wavefront scope"; + WORKGROUP, desc="Workgroup scope"; + DEVICE, desc="Device scope"; + SYSTEM, desc="System scope"; +} + +// HSA segment types +enumeration(HSASegment, desc="...", default="HSASegment_GLOBAL") { + GLOBAL, desc="Global segment"; + GROUP, desc="Group segment"; + PRIVATE, desc="Private segment"; + KERNARG, desc="Kernarg segment"; + READONLY, desc="Readonly segment"; + SPILL, desc="Spill segment"; + ARG, desc="Arg segment"; +} // TesterStatus enumeration(TesterStatus, desc="...") { @@ -143,9 +163,10 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") { } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { - Default, desc="Replace this with access_types passed to the DMA Ruby object"; + Default, desc="Replace this with access_types passed to the DMA Ruby object"; LD, desc="Load"; ST, desc="Store"; + FLUSH, desc="Flush request type"; NULL, desc="Invalid request type"; } diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm index 8e846098c..c7479089b 100644 --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -126,6 +126,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") { int Size, desc="size in bytes of access"; PrefetchBit Prefetch, desc="Is this a prefetch request"; int contextId, desc="this goes away but must be replace with Nilay"; + HSAScope scope, desc="HSA scope"; + HSASegment segment, desc="HSA segment"; } structure(AbstractEntry, primitive="yes", external = "yes") { diff --git a/src/mem/request.hh b/src/mem/request.hh index de781f5d6..bb5e5d59c 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -160,6 +160,12 @@ class Request /** The request should be marked with RELEASE. */ RELEASE = 0x00040000, + /** The request should be marked with KERNEL. + * Used to indicate the synchronization associated with a GPU kernel + * launch or completion. + */ + KERNEL = 0x00001000, + /** * The request should be handled by the generic IPR code (only * valid together with MMAPPED_IPR) @@ -198,6 +204,37 @@ class Request }; /** @} */ + typedef uint32_t MemSpaceConfigFlagsType; + typedef ::Flags MemSpaceConfigFlags; + + enum : MemSpaceConfigFlagsType { + /** Has a synchronization scope been set? */ + SCOPE_VALID = 0x00000001, + /** Access has Wavefront scope visibility */ + WAVEFRONT_SCOPE = 0x00000002, + /** Access has Workgroup scope visibility */ + WORKGROUP_SCOPE = 0x00000004, + /** Access has Device (e.g., GPU) scope visibility */ + DEVICE_SCOPE = 0x00000008, + /** Access has System (e.g., CPU + GPU) scope visibility */ + SYSTEM_SCOPE = 0x00000010, + + /** Global Segment */ + GLOBAL_SEGMENT = 0x00000020, + /** Group Segment */ + GROUP_SEGMENT = 0x00000040, + /** Private Segment */ + PRIVATE_SEGMENT = 0x00000080, + /** Kergarg Segment */ + KERNARG_SEGMENT = 0x00000100, + /** Readonly Segment */ + READONLY_SEGMENT = 0x00000200, + /** Spill Segment */ + SPILL_SEGMENT = 0x00000400, + /** Arg Segment */ + ARG_SEGMENT = 0x00000800, + }; + private: typedef uint8_t PrivateFlagsType; typedef ::Flags PrivateFlags; @@ -268,6 +305,9 @@ class Request /** Flag structure for the request. */ Flags _flags; + /** Memory space configuraiton flag structure for the request. */ + MemSpaceConfigFlags _memSpaceConfigFlags; + /** Private flags for field validity checking. */ PrivateFlags privateFlags; @@ -520,6 +560,13 @@ class Request _flags.set(flags); } + void + setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags) + { + assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR)); + _memSpaceConfigFlags.set(extraFlags); + } + /** Accessor function for vaddr.*/ bool hasVaddr() const @@ -685,7 +732,7 @@ class Request _reqInstSeqNum = seq_num; } - /** Accessor functions for flags. Note that these are for testing + /** Accessor functions for flags. Note that these are for testing only; setting flags should be done via setFlags(). */ bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); } bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); } @@ -701,6 +748,88 @@ class Request bool isPTWalk() const { return _flags.isSet(PT_WALK); } bool isAcquire() const { return _flags.isSet(ACQUIRE); } bool isRelease() const { return _flags.isSet(RELEASE); } + bool isKernel() const { return _flags.isSet(KERNEL); } + + /** + * Accessor functions for the memory space configuration flags and used by + * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that + * these are for testing only; setting extraFlags should be done via + * setMemSpaceConfigFlags(). + */ + bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); } + + bool + isWavefrontScope() const + { + assert(isScoped()); + return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE); + } + + bool + isWorkgroupScope() const + { + assert(isScoped()); + return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE); + } + + bool + isDeviceScope() const + { + assert(isScoped()); + return _memSpaceConfigFlags.isSet(DEVICE_SCOPE); + } + + bool + isSystemScope() const + { + assert(isScoped()); + return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE); + } + + bool + isGlobalSegment() const + { + return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) || + (!isGroupSegment() && !isPrivateSegment() && + !isKernargSegment() && !isReadonlySegment() && + !isSpillSegment() && !isArgSegment()); + } + + bool + isGroupSegment() const + { + return _memSpaceConfigFlags.isSet(GROUP_SEGMENT); + } + + bool + isPrivateSegment() const + { + return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT); + } + + bool + isKernargSegment() const + { + return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT); + } + + bool + isReadonlySegment() const + { + return _memSpaceConfigFlags.isSet(READONLY_SEGMENT); + } + + bool + isSpillSegment() const + { + return _memSpaceConfigFlags.isSet(SPILL_SEGMENT); + } + + bool + isArgSegment() const + { + return _memSpaceConfigFlags.isSet(ARG_SEGMENT); + } }; #endif // __MEM_REQUEST_HH__ diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh index ac08fac82..49ce3624a 100644 --- a/src/mem/ruby/common/DataBlock.hh +++ b/src/mem/ruby/common/DataBlock.hh @@ -60,7 +60,6 @@ class DataBlock const uint8_t *getData(int offset, int len) const; void setByte(int whichByte, uint8_t data); void setData(const uint8_t *data, int offset, int len); - void copyPartial(const DataBlock & dblk, int offset, int len); bool equal(const DataBlock& obj) const; void print(std::ostream& out) const; diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index b17269a78..73f214a20 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -30,12 +30,16 @@ #define __MEM_RUBY_SLICC_INTERFACE_RUBY_REQUEST_HH__ #include +#include +#include "mem/protocol/HSAScope.hh" +#include "mem/protocol/HSASegment.hh" #include "mem/protocol/Message.hh" #include "mem/protocol/PrefetchBit.hh" #include "mem/protocol/RubyAccessMode.hh" #include "mem/protocol/RubyRequestType.hh" #include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/DataBlock.hh" class RubyRequest : public Message { @@ -50,11 +54,17 @@ class RubyRequest : public Message uint8_t* data; PacketPtr pkt; ContextID m_contextId; + int m_wfid; + HSAScope m_scope; + HSASegment m_segment; + RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No, - ContextID _proc_id = 100) + ContextID _proc_id = 100, ContextID _core_id = 99, + HSAScope _scope = HSAScope_UNSPECIFIED, + HSASegment _segment = HSASegment_GLOBAL) : Message(curTime), m_PhysicalAddress(_paddr), m_Type(_type), @@ -64,11 +74,65 @@ class RubyRequest : public Message m_Prefetch(_pb), data(_data), pkt(_pkt), - m_contextId(_proc_id) + m_contextId(_core_id), + m_scope(_scope), + m_segment(_segment) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + m_LineAddress = makeLineAddress(m_PhysicalAddress); } + RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, + uint64_t _pc, RubyRequestType _type, + RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, + unsigned _proc_id, unsigned _core_id, + int _wm_size, std::vector & _wm_mask, + DataBlock & _Data, + HSAScope _scope = HSAScope_UNSPECIFIED, + HSASegment _segment = HSASegment_GLOBAL) + : Message(curTime), + m_PhysicalAddress(_paddr), + m_Type(_type), + m_ProgramCounter(_pc), + m_AccessMode(_access_mode), + m_Size(_len), + m_Prefetch(_pb), + data(_data), + pkt(_pkt), + m_contextId(_core_id), + m_wfid(_proc_id), + m_scope(_scope), + m_segment(_segment) + { + m_LineAddress = makeLineAddress(m_PhysicalAddress); + } + + RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, + uint64_t _pc, RubyRequestType _type, + RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, + unsigned _proc_id, unsigned _core_id, + int _wm_size, std::vector & _wm_mask, + DataBlock & _Data, + std::vector< std::pair > _atomicOps, + HSAScope _scope = HSAScope_UNSPECIFIED, + HSASegment _segment = HSASegment_GLOBAL) + : Message(curTime), + m_PhysicalAddress(_paddr), + m_Type(_type), + m_ProgramCounter(_pc), + m_AccessMode(_access_mode), + m_Size(_len), + m_Prefetch(_pb), + data(_data), + pkt(_pkt), + m_contextId(_core_id), + m_wfid(_proc_id), + m_scope(_scope), + m_segment(_segment) + { + m_LineAddress = makeLineAddress(m_PhysicalAddress); + } + + RubyRequest(Tick curTime) : Message(curTime) {} MsgPtr clone() const { return std::shared_ptr(new RubyRequest(*this)); } diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 52acaf8c3..5a5f528bb 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -237,25 +237,27 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt) // Check for pio requests and directly send them to the dedicated // pio port. - if (!isPhysMemAddress(pkt->getAddr())) { - assert(ruby_port->memMasterPort.isConnected()); - DPRINTF(RubyPort, "Request address %#x assumed to be a pio address\n", - pkt->getAddr()); + if (pkt->cmd != MemCmd::MemFenceReq) { + if (!isPhysMemAddress(pkt->getAddr())) { + assert(ruby_port->memMasterPort.isConnected()); + DPRINTF(RubyPort, "Request address %#x assumed to be a " + "pio address\n", pkt->getAddr()); - // Save the port in the sender state object to be used later to - // route the response - pkt->pushSenderState(new SenderState(this)); + // Save the port in the sender state object to be used later to + // route the response + pkt->pushSenderState(new SenderState(this)); - // send next cycle - RubySystem *rs = ruby_port->m_ruby_system; - ruby_port->memMasterPort.schedTimingReq(pkt, - curTick() + rs->clockPeriod()); - return true; + // send next cycle + RubySystem *rs = ruby_port->m_ruby_system; + ruby_port->memMasterPort.schedTimingReq(pkt, + curTick() + rs->clockPeriod()); + return true; + } + + assert(getOffset(pkt->getAddr()) + pkt->getSize() <= + RubySystem::getBlockSizeBytes()); } - assert(getOffset(pkt->getAddr()) + pkt->getSize() <= - RubySystem::getBlockSizeBytes()); - // Submit the ruby request RequestStatus requestStatus = ruby_port->makeRequest(pkt); @@ -272,9 +274,11 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt) return true; } - - DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n", - pkt->getAddr(), RequestStatus_to_string(requestStatus)); + if (pkt->cmd != MemCmd::MemFenceReq) { + DPRINTF(RubyPort, + "Request for address %#x did not issued because %s\n", + pkt->getAddr(), RequestStatus_to_string(requestStatus)); + } addToRetryList(); @@ -466,11 +470,16 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt) } } - // Flush requests don't access physical memory - if (pkt->isFlush()) { + // Flush, acquire, release requests don't access physical memory + if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) { accessPhysMem = false; } + if (pkt->req->isKernel()) { + accessPhysMem = false; + needsResponse = true; + } + DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse); RubyPort *ruby_port = static_cast(&owner);