mem: misc flags for AMD gpu model

This patch add support to mark memory requests/packets with attributes defined
in HSA, such as memory order and scope.
This commit is contained in:
Blake Hechtman 2015-07-20 09:15:18 -05:00
parent b7ea2bc705
commit 34fb6b5e35
6 changed files with 251 additions and 27 deletions

View file

@ -41,7 +41,7 @@ external_type(Tick, primitive="yes", default="0");
structure(DataBlock, external = "yes", desc="..."){ structure(DataBlock, external = "yes", desc="..."){
void clear(); void clear();
void copyPartial(DataBlock, int, int); void atomicPartial(DataBlock, WriteMask);
} }
bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt); bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);
@ -78,6 +78,26 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent")
NotPresent, desc="block is NotPresent"; NotPresent, desc="block is NotPresent";
Busy, desc="block is in a transient state, currently invalid"; Busy, desc="block is in a transient state, currently invalid";
} }
//HSA scopes
enumeration(HSAScope, desc="...", default="HSAScope_UNSPECIFIED") {
UNSPECIFIED, desc="Unspecified scope";
NOSCOPE, desc="Explictly unscoped";
WAVEFRONT, desc="Wavefront scope";
WORKGROUP, desc="Workgroup scope";
DEVICE, desc="Device scope";
SYSTEM, desc="System scope";
}
// HSA segment types
enumeration(HSASegment, desc="...", default="HSASegment_GLOBAL") {
GLOBAL, desc="Global segment";
GROUP, desc="Group segment";
PRIVATE, desc="Private segment";
KERNARG, desc="Kernarg segment";
READONLY, desc="Readonly segment";
SPILL, desc="Spill segment";
ARG, desc="Arg segment";
}
// TesterStatus // TesterStatus
enumeration(TesterStatus, desc="...") { enumeration(TesterStatus, desc="...") {
@ -143,9 +163,10 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
} }
enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
Default, desc="Replace this with access_types passed to the DMA Ruby object"; Default, desc="Replace this with access_types passed to the DMA Ruby object";
LD, desc="Load"; LD, desc="Load";
ST, desc="Store"; ST, desc="Store";
FLUSH, desc="Flush request type";
NULL, desc="Invalid request type"; NULL, desc="Invalid request type";
} }

View file

@ -126,6 +126,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
int Size, desc="size in bytes of access"; int Size, desc="size in bytes of access";
PrefetchBit Prefetch, desc="Is this a prefetch request"; PrefetchBit Prefetch, desc="Is this a prefetch request";
int contextId, desc="this goes away but must be replace with Nilay"; int contextId, desc="this goes away but must be replace with Nilay";
HSAScope scope, desc="HSA scope";
HSASegment segment, desc="HSA segment";
} }
structure(AbstractEntry, primitive="yes", external = "yes") { structure(AbstractEntry, primitive="yes", external = "yes") {

View file

@ -160,6 +160,12 @@ class Request
/** The request should be marked with RELEASE. */ /** The request should be marked with RELEASE. */
RELEASE = 0x00040000, RELEASE = 0x00040000,
/** The request should be marked with KERNEL.
* Used to indicate the synchronization associated with a GPU kernel
* launch or completion.
*/
KERNEL = 0x00001000,
/** /**
* The request should be handled by the generic IPR code (only * The request should be handled by the generic IPR code (only
* valid together with MMAPPED_IPR) * valid together with MMAPPED_IPR)
@ -198,6 +204,37 @@ class Request
}; };
/** @} */ /** @} */
typedef uint32_t MemSpaceConfigFlagsType;
typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
enum : MemSpaceConfigFlagsType {
/** Has a synchronization scope been set? */
SCOPE_VALID = 0x00000001,
/** Access has Wavefront scope visibility */
WAVEFRONT_SCOPE = 0x00000002,
/** Access has Workgroup scope visibility */
WORKGROUP_SCOPE = 0x00000004,
/** Access has Device (e.g., GPU) scope visibility */
DEVICE_SCOPE = 0x00000008,
/** Access has System (e.g., CPU + GPU) scope visibility */
SYSTEM_SCOPE = 0x00000010,
/** Global Segment */
GLOBAL_SEGMENT = 0x00000020,
/** Group Segment */
GROUP_SEGMENT = 0x00000040,
/** Private Segment */
PRIVATE_SEGMENT = 0x00000080,
/** Kergarg Segment */
KERNARG_SEGMENT = 0x00000100,
/** Readonly Segment */
READONLY_SEGMENT = 0x00000200,
/** Spill Segment */
SPILL_SEGMENT = 0x00000400,
/** Arg Segment */
ARG_SEGMENT = 0x00000800,
};
private: private:
typedef uint8_t PrivateFlagsType; typedef uint8_t PrivateFlagsType;
typedef ::Flags<PrivateFlagsType> PrivateFlags; typedef ::Flags<PrivateFlagsType> PrivateFlags;
@ -268,6 +305,9 @@ class Request
/** Flag structure for the request. */ /** Flag structure for the request. */
Flags _flags; Flags _flags;
/** Memory space configuraiton flag structure for the request. */
MemSpaceConfigFlags _memSpaceConfigFlags;
/** Private flags for field validity checking. */ /** Private flags for field validity checking. */
PrivateFlags privateFlags; PrivateFlags privateFlags;
@ -520,6 +560,13 @@ class Request
_flags.set(flags); _flags.set(flags);
} }
void
setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
{
assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
_memSpaceConfigFlags.set(extraFlags);
}
/** Accessor function for vaddr.*/ /** Accessor function for vaddr.*/
bool bool
hasVaddr() const hasVaddr() const
@ -685,7 +732,7 @@ class Request
_reqInstSeqNum = seq_num; _reqInstSeqNum = seq_num;
} }
/** Accessor functions for flags. Note that these are for testing /** Accessor functions for flags. Note that these are for testing
only; setting flags should be done via setFlags(). */ only; setting flags should be done via setFlags(). */
bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); } bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); } bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
@ -701,6 +748,88 @@ class Request
bool isPTWalk() const { return _flags.isSet(PT_WALK); } bool isPTWalk() const { return _flags.isSet(PT_WALK); }
bool isAcquire() const { return _flags.isSet(ACQUIRE); } bool isAcquire() const { return _flags.isSet(ACQUIRE); }
bool isRelease() const { return _flags.isSet(RELEASE); } bool isRelease() const { return _flags.isSet(RELEASE); }
bool isKernel() const { return _flags.isSet(KERNEL); }
/**
* Accessor functions for the memory space configuration flags and used by
* GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
* these are for testing only; setting extraFlags should be done via
* setMemSpaceConfigFlags().
*/
bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
bool
isWavefrontScope() const
{
assert(isScoped());
return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
}
bool
isWorkgroupScope() const
{
assert(isScoped());
return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
}
bool
isDeviceScope() const
{
assert(isScoped());
return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
}
bool
isSystemScope() const
{
assert(isScoped());
return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
}
bool
isGlobalSegment() const
{
return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
(!isGroupSegment() && !isPrivateSegment() &&
!isKernargSegment() && !isReadonlySegment() &&
!isSpillSegment() && !isArgSegment());
}
bool
isGroupSegment() const
{
return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
}
bool
isPrivateSegment() const
{
return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
}
bool
isKernargSegment() const
{
return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
}
bool
isReadonlySegment() const
{
return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
}
bool
isSpillSegment() const
{
return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
}
bool
isArgSegment() const
{
return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
}
}; };
#endif // __MEM_REQUEST_HH__ #endif // __MEM_REQUEST_HH__

View file

@ -60,7 +60,6 @@ class DataBlock
const uint8_t *getData(int offset, int len) const; const uint8_t *getData(int offset, int len) const;
void setByte(int whichByte, uint8_t data); void setByte(int whichByte, uint8_t data);
void setData(const uint8_t *data, int offset, int len); void setData(const uint8_t *data, int offset, int len);
void copyPartial(const DataBlock & dblk, int offset, int len);
bool equal(const DataBlock& obj) const; bool equal(const DataBlock& obj) const;
void print(std::ostream& out) const; void print(std::ostream& out) const;

View file

@ -30,12 +30,16 @@
#define __MEM_RUBY_SLICC_INTERFACE_RUBY_REQUEST_HH__ #define __MEM_RUBY_SLICC_INTERFACE_RUBY_REQUEST_HH__
#include <ostream> #include <ostream>
#include <vector>
#include "mem/protocol/HSAScope.hh"
#include "mem/protocol/HSASegment.hh"
#include "mem/protocol/Message.hh" #include "mem/protocol/Message.hh"
#include "mem/protocol/PrefetchBit.hh" #include "mem/protocol/PrefetchBit.hh"
#include "mem/protocol/RubyAccessMode.hh" #include "mem/protocol/RubyAccessMode.hh"
#include "mem/protocol/RubyRequestType.hh" #include "mem/protocol/RubyRequestType.hh"
#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/DataBlock.hh"
class RubyRequest : public Message class RubyRequest : public Message
{ {
@ -50,11 +54,17 @@ class RubyRequest : public Message
uint8_t* data; uint8_t* data;
PacketPtr pkt; PacketPtr pkt;
ContextID m_contextId; ContextID m_contextId;
int m_wfid;
HSAScope m_scope;
HSASegment m_segment;
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No, PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
ContextID _proc_id = 100) ContextID _proc_id = 100, ContextID _core_id = 99,
HSAScope _scope = HSAScope_UNSPECIFIED,
HSASegment _segment = HSASegment_GLOBAL)
: Message(curTime), : Message(curTime),
m_PhysicalAddress(_paddr), m_PhysicalAddress(_paddr),
m_Type(_type), m_Type(_type),
@ -64,11 +74,65 @@ class RubyRequest : public Message
m_Prefetch(_pb), m_Prefetch(_pb),
data(_data), data(_data),
pkt(_pkt), pkt(_pkt),
m_contextId(_proc_id) m_contextId(_core_id),
m_scope(_scope),
m_segment(_segment)
{ {
m_LineAddress = makeLineAddress(m_PhysicalAddress); m_LineAddress = makeLineAddress(m_PhysicalAddress);
} }
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
uint64_t _pc, RubyRequestType _type,
RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
unsigned _proc_id, unsigned _core_id,
int _wm_size, std::vector<bool> & _wm_mask,
DataBlock & _Data,
HSAScope _scope = HSAScope_UNSPECIFIED,
HSASegment _segment = HSASegment_GLOBAL)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
m_ProgramCounter(_pc),
m_AccessMode(_access_mode),
m_Size(_len),
m_Prefetch(_pb),
data(_data),
pkt(_pkt),
m_contextId(_core_id),
m_wfid(_proc_id),
m_scope(_scope),
m_segment(_segment)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
uint64_t _pc, RubyRequestType _type,
RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
unsigned _proc_id, unsigned _core_id,
int _wm_size, std::vector<bool> & _wm_mask,
DataBlock & _Data,
std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
HSAScope _scope = HSAScope_UNSPECIFIED,
HSASegment _segment = HSASegment_GLOBAL)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
m_ProgramCounter(_pc),
m_AccessMode(_access_mode),
m_Size(_len),
m_Prefetch(_pb),
data(_data),
pkt(_pkt),
m_contextId(_core_id),
m_wfid(_proc_id),
m_scope(_scope),
m_segment(_segment)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
RubyRequest(Tick curTime) : Message(curTime) {} RubyRequest(Tick curTime) : Message(curTime) {}
MsgPtr clone() const MsgPtr clone() const
{ return std::shared_ptr<Message>(new RubyRequest(*this)); } { return std::shared_ptr<Message>(new RubyRequest(*this)); }

View file

@ -237,25 +237,27 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
// Check for pio requests and directly send them to the dedicated // Check for pio requests and directly send them to the dedicated
// pio port. // pio port.
if (!isPhysMemAddress(pkt->getAddr())) { if (pkt->cmd != MemCmd::MemFenceReq) {
assert(ruby_port->memMasterPort.isConnected()); if (!isPhysMemAddress(pkt->getAddr())) {
DPRINTF(RubyPort, "Request address %#x assumed to be a pio address\n", assert(ruby_port->memMasterPort.isConnected());
pkt->getAddr()); DPRINTF(RubyPort, "Request address %#x assumed to be a "
"pio address\n", pkt->getAddr());
// Save the port in the sender state object to be used later to // Save the port in the sender state object to be used later to
// route the response // route the response
pkt->pushSenderState(new SenderState(this)); pkt->pushSenderState(new SenderState(this));
// send next cycle // send next cycle
RubySystem *rs = ruby_port->m_ruby_system; RubySystem *rs = ruby_port->m_ruby_system;
ruby_port->memMasterPort.schedTimingReq(pkt, ruby_port->memMasterPort.schedTimingReq(pkt,
curTick() + rs->clockPeriod()); curTick() + rs->clockPeriod());
return true; return true;
}
assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
RubySystem::getBlockSizeBytes());
} }
assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
RubySystem::getBlockSizeBytes());
// Submit the ruby request // Submit the ruby request
RequestStatus requestStatus = ruby_port->makeRequest(pkt); RequestStatus requestStatus = ruby_port->makeRequest(pkt);
@ -272,9 +274,11 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
return true; return true;
} }
if (pkt->cmd != MemCmd::MemFenceReq) {
DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n", DPRINTF(RubyPort,
pkt->getAddr(), RequestStatus_to_string(requestStatus)); "Request for address %#x did not issued because %s\n",
pkt->getAddr(), RequestStatus_to_string(requestStatus));
}
addToRetryList(); addToRetryList();
@ -466,11 +470,16 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
} }
} }
// Flush requests don't access physical memory // Flush, acquire, release requests don't access physical memory
if (pkt->isFlush()) { if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
accessPhysMem = false; accessPhysMem = false;
} }
if (pkt->req->isKernel()) {
accessPhysMem = false;
needsResponse = true;
}
DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse); DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse);
RubyPort *ruby_port = static_cast<RubyPort *>(&owner); RubyPort *ruby_port = static_cast<RubyPort *>(&owner);