mem: misc flags for AMD gpu model
This patch add support to mark memory requests/packets with attributes defined in HSA, such as memory order and scope.
This commit is contained in:
parent
b7ea2bc705
commit
34fb6b5e35
6 changed files with 251 additions and 27 deletions
|
@ -41,7 +41,7 @@ external_type(Tick, primitive="yes", default="0");
|
|||
|
||||
structure(DataBlock, external = "yes", desc="..."){
|
||||
void clear();
|
||||
void copyPartial(DataBlock, int, int);
|
||||
void atomicPartial(DataBlock, WriteMask);
|
||||
}
|
||||
|
||||
bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);
|
||||
|
@ -78,6 +78,26 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent")
|
|||
NotPresent, desc="block is NotPresent";
|
||||
Busy, desc="block is in a transient state, currently invalid";
|
||||
}
|
||||
//HSA scopes
|
||||
enumeration(HSAScope, desc="...", default="HSAScope_UNSPECIFIED") {
|
||||
UNSPECIFIED, desc="Unspecified scope";
|
||||
NOSCOPE, desc="Explictly unscoped";
|
||||
WAVEFRONT, desc="Wavefront scope";
|
||||
WORKGROUP, desc="Workgroup scope";
|
||||
DEVICE, desc="Device scope";
|
||||
SYSTEM, desc="System scope";
|
||||
}
|
||||
|
||||
// HSA segment types
|
||||
enumeration(HSASegment, desc="...", default="HSASegment_GLOBAL") {
|
||||
GLOBAL, desc="Global segment";
|
||||
GROUP, desc="Group segment";
|
||||
PRIVATE, desc="Private segment";
|
||||
KERNARG, desc="Kernarg segment";
|
||||
READONLY, desc="Readonly segment";
|
||||
SPILL, desc="Spill segment";
|
||||
ARG, desc="Arg segment";
|
||||
}
|
||||
|
||||
// TesterStatus
|
||||
enumeration(TesterStatus, desc="...") {
|
||||
|
@ -143,9 +163,10 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
|
|||
}
|
||||
|
||||
enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
|
||||
Default, desc="Replace this with access_types passed to the DMA Ruby object";
|
||||
Default, desc="Replace this with access_types passed to the DMA Ruby object";
|
||||
LD, desc="Load";
|
||||
ST, desc="Store";
|
||||
FLUSH, desc="Flush request type";
|
||||
NULL, desc="Invalid request type";
|
||||
}
|
||||
|
||||
|
|
|
@ -126,6 +126,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
|
|||
int Size, desc="size in bytes of access";
|
||||
PrefetchBit Prefetch, desc="Is this a prefetch request";
|
||||
int contextId, desc="this goes away but must be replace with Nilay";
|
||||
HSAScope scope, desc="HSA scope";
|
||||
HSASegment segment, desc="HSA segment";
|
||||
}
|
||||
|
||||
structure(AbstractEntry, primitive="yes", external = "yes") {
|
||||
|
|
|
@ -160,6 +160,12 @@ class Request
|
|||
/** The request should be marked with RELEASE. */
|
||||
RELEASE = 0x00040000,
|
||||
|
||||
/** The request should be marked with KERNEL.
|
||||
* Used to indicate the synchronization associated with a GPU kernel
|
||||
* launch or completion.
|
||||
*/
|
||||
KERNEL = 0x00001000,
|
||||
|
||||
/**
|
||||
* The request should be handled by the generic IPR code (only
|
||||
* valid together with MMAPPED_IPR)
|
||||
|
@ -198,6 +204,37 @@ class Request
|
|||
};
|
||||
/** @} */
|
||||
|
||||
typedef uint32_t MemSpaceConfigFlagsType;
|
||||
typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
|
||||
|
||||
enum : MemSpaceConfigFlagsType {
|
||||
/** Has a synchronization scope been set? */
|
||||
SCOPE_VALID = 0x00000001,
|
||||
/** Access has Wavefront scope visibility */
|
||||
WAVEFRONT_SCOPE = 0x00000002,
|
||||
/** Access has Workgroup scope visibility */
|
||||
WORKGROUP_SCOPE = 0x00000004,
|
||||
/** Access has Device (e.g., GPU) scope visibility */
|
||||
DEVICE_SCOPE = 0x00000008,
|
||||
/** Access has System (e.g., CPU + GPU) scope visibility */
|
||||
SYSTEM_SCOPE = 0x00000010,
|
||||
|
||||
/** Global Segment */
|
||||
GLOBAL_SEGMENT = 0x00000020,
|
||||
/** Group Segment */
|
||||
GROUP_SEGMENT = 0x00000040,
|
||||
/** Private Segment */
|
||||
PRIVATE_SEGMENT = 0x00000080,
|
||||
/** Kergarg Segment */
|
||||
KERNARG_SEGMENT = 0x00000100,
|
||||
/** Readonly Segment */
|
||||
READONLY_SEGMENT = 0x00000200,
|
||||
/** Spill Segment */
|
||||
SPILL_SEGMENT = 0x00000400,
|
||||
/** Arg Segment */
|
||||
ARG_SEGMENT = 0x00000800,
|
||||
};
|
||||
|
||||
private:
|
||||
typedef uint8_t PrivateFlagsType;
|
||||
typedef ::Flags<PrivateFlagsType> PrivateFlags;
|
||||
|
@ -268,6 +305,9 @@ class Request
|
|||
/** Flag structure for the request. */
|
||||
Flags _flags;
|
||||
|
||||
/** Memory space configuraiton flag structure for the request. */
|
||||
MemSpaceConfigFlags _memSpaceConfigFlags;
|
||||
|
||||
/** Private flags for field validity checking. */
|
||||
PrivateFlags privateFlags;
|
||||
|
||||
|
@ -520,6 +560,13 @@ class Request
|
|||
_flags.set(flags);
|
||||
}
|
||||
|
||||
void
|
||||
setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
|
||||
{
|
||||
assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
|
||||
_memSpaceConfigFlags.set(extraFlags);
|
||||
}
|
||||
|
||||
/** Accessor function for vaddr.*/
|
||||
bool
|
||||
hasVaddr() const
|
||||
|
@ -685,7 +732,7 @@ class Request
|
|||
_reqInstSeqNum = seq_num;
|
||||
}
|
||||
|
||||
/** Accessor functions for flags. Note that these are for testing
|
||||
/** Accessor functions for flags. Note that these are for testing
|
||||
only; setting flags should be done via setFlags(). */
|
||||
bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
|
||||
bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
|
||||
|
@ -701,6 +748,88 @@ class Request
|
|||
bool isPTWalk() const { return _flags.isSet(PT_WALK); }
|
||||
bool isAcquire() const { return _flags.isSet(ACQUIRE); }
|
||||
bool isRelease() const { return _flags.isSet(RELEASE); }
|
||||
bool isKernel() const { return _flags.isSet(KERNEL); }
|
||||
|
||||
/**
|
||||
* Accessor functions for the memory space configuration flags and used by
|
||||
* GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
|
||||
* these are for testing only; setting extraFlags should be done via
|
||||
* setMemSpaceConfigFlags().
|
||||
*/
|
||||
bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
|
||||
|
||||
bool
|
||||
isWavefrontScope() const
|
||||
{
|
||||
assert(isScoped());
|
||||
return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
|
||||
}
|
||||
|
||||
bool
|
||||
isWorkgroupScope() const
|
||||
{
|
||||
assert(isScoped());
|
||||
return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
|
||||
}
|
||||
|
||||
bool
|
||||
isDeviceScope() const
|
||||
{
|
||||
assert(isScoped());
|
||||
return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
|
||||
}
|
||||
|
||||
bool
|
||||
isSystemScope() const
|
||||
{
|
||||
assert(isScoped());
|
||||
return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
|
||||
}
|
||||
|
||||
bool
|
||||
isGlobalSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
|
||||
(!isGroupSegment() && !isPrivateSegment() &&
|
||||
!isKernargSegment() && !isReadonlySegment() &&
|
||||
!isSpillSegment() && !isArgSegment());
|
||||
}
|
||||
|
||||
bool
|
||||
isGroupSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isPrivateSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isKernargSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isReadonlySegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isSpillSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isArgSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __MEM_REQUEST_HH__
|
||||
|
|
|
@ -60,7 +60,6 @@ class DataBlock
|
|||
const uint8_t *getData(int offset, int len) const;
|
||||
void setByte(int whichByte, uint8_t data);
|
||||
void setData(const uint8_t *data, int offset, int len);
|
||||
void copyPartial(const DataBlock & dblk, int offset, int len);
|
||||
bool equal(const DataBlock& obj) const;
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
|
|
|
@ -30,12 +30,16 @@
|
|||
#define __MEM_RUBY_SLICC_INTERFACE_RUBY_REQUEST_HH__
|
||||
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#include "mem/protocol/HSAScope.hh"
|
||||
#include "mem/protocol/HSASegment.hh"
|
||||
#include "mem/protocol/Message.hh"
|
||||
#include "mem/protocol/PrefetchBit.hh"
|
||||
#include "mem/protocol/RubyAccessMode.hh"
|
||||
#include "mem/protocol/RubyRequestType.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
|
||||
class RubyRequest : public Message
|
||||
{
|
||||
|
@ -50,11 +54,17 @@ class RubyRequest : public Message
|
|||
uint8_t* data;
|
||||
PacketPtr pkt;
|
||||
ContextID m_contextId;
|
||||
int m_wfid;
|
||||
HSAScope m_scope;
|
||||
HSASegment m_segment;
|
||||
|
||||
|
||||
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
|
||||
uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
|
||||
PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
|
||||
ContextID _proc_id = 100)
|
||||
ContextID _proc_id = 100, ContextID _core_id = 99,
|
||||
HSAScope _scope = HSAScope_UNSPECIFIED,
|
||||
HSASegment _segment = HSASegment_GLOBAL)
|
||||
: Message(curTime),
|
||||
m_PhysicalAddress(_paddr),
|
||||
m_Type(_type),
|
||||
|
@ -64,11 +74,65 @@ class RubyRequest : public Message
|
|||
m_Prefetch(_pb),
|
||||
data(_data),
|
||||
pkt(_pkt),
|
||||
m_contextId(_proc_id)
|
||||
m_contextId(_core_id),
|
||||
m_scope(_scope),
|
||||
m_segment(_segment)
|
||||
{
|
||||
m_LineAddress = makeLineAddress(m_PhysicalAddress);
|
||||
m_LineAddress = makeLineAddress(m_PhysicalAddress);
|
||||
}
|
||||
|
||||
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
|
||||
uint64_t _pc, RubyRequestType _type,
|
||||
RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
|
||||
unsigned _proc_id, unsigned _core_id,
|
||||
int _wm_size, std::vector<bool> & _wm_mask,
|
||||
DataBlock & _Data,
|
||||
HSAScope _scope = HSAScope_UNSPECIFIED,
|
||||
HSASegment _segment = HSASegment_GLOBAL)
|
||||
: Message(curTime),
|
||||
m_PhysicalAddress(_paddr),
|
||||
m_Type(_type),
|
||||
m_ProgramCounter(_pc),
|
||||
m_AccessMode(_access_mode),
|
||||
m_Size(_len),
|
||||
m_Prefetch(_pb),
|
||||
data(_data),
|
||||
pkt(_pkt),
|
||||
m_contextId(_core_id),
|
||||
m_wfid(_proc_id),
|
||||
m_scope(_scope),
|
||||
m_segment(_segment)
|
||||
{
|
||||
m_LineAddress = makeLineAddress(m_PhysicalAddress);
|
||||
}
|
||||
|
||||
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
|
||||
uint64_t _pc, RubyRequestType _type,
|
||||
RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
|
||||
unsigned _proc_id, unsigned _core_id,
|
||||
int _wm_size, std::vector<bool> & _wm_mask,
|
||||
DataBlock & _Data,
|
||||
std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
|
||||
HSAScope _scope = HSAScope_UNSPECIFIED,
|
||||
HSASegment _segment = HSASegment_GLOBAL)
|
||||
: Message(curTime),
|
||||
m_PhysicalAddress(_paddr),
|
||||
m_Type(_type),
|
||||
m_ProgramCounter(_pc),
|
||||
m_AccessMode(_access_mode),
|
||||
m_Size(_len),
|
||||
m_Prefetch(_pb),
|
||||
data(_data),
|
||||
pkt(_pkt),
|
||||
m_contextId(_core_id),
|
||||
m_wfid(_proc_id),
|
||||
m_scope(_scope),
|
||||
m_segment(_segment)
|
||||
{
|
||||
m_LineAddress = makeLineAddress(m_PhysicalAddress);
|
||||
}
|
||||
|
||||
|
||||
RubyRequest(Tick curTime) : Message(curTime) {}
|
||||
MsgPtr clone() const
|
||||
{ return std::shared_ptr<Message>(new RubyRequest(*this)); }
|
||||
|
|
|
@ -237,25 +237,27 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
|
|||
|
||||
// Check for pio requests and directly send them to the dedicated
|
||||
// pio port.
|
||||
if (!isPhysMemAddress(pkt->getAddr())) {
|
||||
assert(ruby_port->memMasterPort.isConnected());
|
||||
DPRINTF(RubyPort, "Request address %#x assumed to be a pio address\n",
|
||||
pkt->getAddr());
|
||||
if (pkt->cmd != MemCmd::MemFenceReq) {
|
||||
if (!isPhysMemAddress(pkt->getAddr())) {
|
||||
assert(ruby_port->memMasterPort.isConnected());
|
||||
DPRINTF(RubyPort, "Request address %#x assumed to be a "
|
||||
"pio address\n", pkt->getAddr());
|
||||
|
||||
// Save the port in the sender state object to be used later to
|
||||
// route the response
|
||||
pkt->pushSenderState(new SenderState(this));
|
||||
// Save the port in the sender state object to be used later to
|
||||
// route the response
|
||||
pkt->pushSenderState(new SenderState(this));
|
||||
|
||||
// send next cycle
|
||||
RubySystem *rs = ruby_port->m_ruby_system;
|
||||
ruby_port->memMasterPort.schedTimingReq(pkt,
|
||||
curTick() + rs->clockPeriod());
|
||||
return true;
|
||||
// send next cycle
|
||||
RubySystem *rs = ruby_port->m_ruby_system;
|
||||
ruby_port->memMasterPort.schedTimingReq(pkt,
|
||||
curTick() + rs->clockPeriod());
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
|
||||
RubySystem::getBlockSizeBytes());
|
||||
}
|
||||
|
||||
assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
|
||||
RubySystem::getBlockSizeBytes());
|
||||
|
||||
// Submit the ruby request
|
||||
RequestStatus requestStatus = ruby_port->makeRequest(pkt);
|
||||
|
||||
|
@ -272,9 +274,11 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n",
|
||||
pkt->getAddr(), RequestStatus_to_string(requestStatus));
|
||||
if (pkt->cmd != MemCmd::MemFenceReq) {
|
||||
DPRINTF(RubyPort,
|
||||
"Request for address %#x did not issued because %s\n",
|
||||
pkt->getAddr(), RequestStatus_to_string(requestStatus));
|
||||
}
|
||||
|
||||
addToRetryList();
|
||||
|
||||
|
@ -466,11 +470,16 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
|
|||
}
|
||||
}
|
||||
|
||||
// Flush requests don't access physical memory
|
||||
if (pkt->isFlush()) {
|
||||
// Flush, acquire, release requests don't access physical memory
|
||||
if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
|
||||
accessPhysMem = false;
|
||||
}
|
||||
|
||||
if (pkt->req->isKernel()) {
|
||||
accessPhysMem = false;
|
||||
needsResponse = true;
|
||||
}
|
||||
|
||||
DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse);
|
||||
|
||||
RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
|
||||
|
|
Loading…
Reference in a new issue