From 34fb6b5e35db751f310aee824046107e57a0ba03 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blake.hechtman@amd.com>
Date: Mon, 20 Jul 2015 09:15:18 -0500
Subject: [PATCH] mem: misc flags for AMD gpu model

This patch add support to mark memory requests/packets with attributes defined
in HSA, such as memory order and scope.
---
 src/mem/protocol/RubySlicc_Exports.sm       |  25 +++-
 src/mem/protocol/RubySlicc_Types.sm         |   2 +
 src/mem/request.hh                          | 131 +++++++++++++++++++-
 src/mem/ruby/common/DataBlock.hh            |   1 -
 src/mem/ruby/slicc_interface/RubyRequest.hh |  70 ++++++++++-
 src/mem/ruby/system/RubyPort.cc             |  49 +++++---
 6 files changed, 251 insertions(+), 27 deletions(-)

diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index 617989d15..6fedfeb2d 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -41,7 +41,7 @@ external_type(Tick, primitive="yes", default="0");
 
 structure(DataBlock, external = "yes", desc="..."){
   void clear();
-  void copyPartial(DataBlock, int, int);
+  void atomicPartial(DataBlock, WriteMask);
 }
 
 bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);
@@ -78,6 +78,26 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent")
   NotPresent, desc="block is NotPresent";
   Busy,       desc="block is in a transient state, currently invalid";
 }
+//HSA scopes
+enumeration(HSAScope, desc="...", default="HSAScope_UNSPECIFIED") {
+  UNSPECIFIED, desc="Unspecified scope";
+  NOSCOPE,     desc="Explictly unscoped";
+  WAVEFRONT,   desc="Wavefront scope";
+  WORKGROUP,   desc="Workgroup scope";
+  DEVICE,      desc="Device scope";
+  SYSTEM,      desc="System scope";
+}
+
+// HSA segment types
+enumeration(HSASegment, desc="...", default="HSASegment_GLOBAL") {
+  GLOBAL,   desc="Global segment";
+  GROUP,    desc="Group segment";
+  PRIVATE,  desc="Private segment";
+  KERNARG,  desc="Kernarg segment";
+  READONLY, desc="Readonly segment";
+  SPILL,    desc="Spill segment";
+  ARG,      desc="Arg segment";
+}
 
 // TesterStatus
 enumeration(TesterStatus, desc="...") {
@@ -143,9 +163,10 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
 }
 
 enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
-  Default,    desc="Replace this with access_types passed to the DMA Ruby object";
+  Default,     desc="Replace this with access_types passed to the DMA Ruby object";
   LD,          desc="Load";
   ST,          desc="Store";
+  FLUSH,       desc="Flush request type";
   NULL,        desc="Invalid request type";
 }
 
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm
index 8e846098c..c7479089b 100644
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -126,6 +126,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
   int Size,                  desc="size in bytes of access";
   PrefetchBit Prefetch,      desc="Is this a prefetch request";
   int contextId,             desc="this goes away but must be replace with Nilay";
+  HSAScope scope,            desc="HSA scope";
+  HSASegment segment,        desc="HSA segment";
 }
 
 structure(AbstractEntry, primitive="yes", external = "yes") {
diff --git a/src/mem/request.hh b/src/mem/request.hh
index de781f5d6..bb5e5d59c 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -160,6 +160,12 @@ class Request
         /** The request should be marked with RELEASE. */
         RELEASE                     = 0x00040000,
 
+        /** The request should be marked with KERNEL.
+          * Used to indicate the synchronization associated with a GPU kernel
+          * launch or completion.
+          */
+        KERNEL                      = 0x00001000,
+
         /**
          * The request should be handled by the generic IPR code (only
          * valid together with MMAPPED_IPR)
@@ -198,6 +204,37 @@ class Request
     };
     /** @} */
 
+    typedef uint32_t MemSpaceConfigFlagsType;
+    typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
+
+    enum : MemSpaceConfigFlagsType {
+        /** Has a synchronization scope been set? */
+        SCOPE_VALID            = 0x00000001,
+        /** Access has Wavefront scope visibility */
+        WAVEFRONT_SCOPE        = 0x00000002,
+        /** Access has Workgroup scope visibility */
+        WORKGROUP_SCOPE        = 0x00000004,
+        /** Access has Device (e.g., GPU) scope visibility */
+        DEVICE_SCOPE           = 0x00000008,
+        /** Access has System (e.g., CPU + GPU) scope visibility */
+        SYSTEM_SCOPE           = 0x00000010,
+
+        /** Global Segment */
+        GLOBAL_SEGMENT         = 0x00000020,
+        /** Group Segment */
+        GROUP_SEGMENT          = 0x00000040,
+        /** Private Segment */
+        PRIVATE_SEGMENT        = 0x00000080,
+        /** Kergarg Segment */
+        KERNARG_SEGMENT        = 0x00000100,
+        /** Readonly Segment */
+        READONLY_SEGMENT       = 0x00000200,
+        /** Spill Segment */
+        SPILL_SEGMENT          = 0x00000400,
+        /** Arg Segment */
+        ARG_SEGMENT            = 0x00000800,
+    };
+
   private:
     typedef uint8_t PrivateFlagsType;
     typedef ::Flags<PrivateFlagsType> PrivateFlags;
@@ -268,6 +305,9 @@ class Request
     /** Flag structure for the request. */
     Flags _flags;
 
+    /** Memory space configuraiton flag structure for the request. */
+    MemSpaceConfigFlags _memSpaceConfigFlags;
+
     /** Private flags for field validity checking. */
     PrivateFlags privateFlags;
 
@@ -520,6 +560,13 @@ class Request
         _flags.set(flags);
     }
 
+    void
+    setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
+    {
+        assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
+        _memSpaceConfigFlags.set(extraFlags);
+    }
+
     /** Accessor function for vaddr.*/
     bool
     hasVaddr() const
@@ -685,7 +732,7 @@ class Request
         _reqInstSeqNum = seq_num;
     }
 
-    /** Accessor functions for flags.  Note that these are for testing
+    /** Accessor functions for flags. Note that these are for testing
         only; setting flags should be done via setFlags(). */
     bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
     bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
@@ -701,6 +748,88 @@ class Request
     bool isPTWalk() const { return _flags.isSet(PT_WALK); }
     bool isAcquire() const { return _flags.isSet(ACQUIRE); }
     bool isRelease() const { return _flags.isSet(RELEASE); }
+    bool isKernel() const { return _flags.isSet(KERNEL); }
+
+    /**
+     * Accessor functions for the memory space configuration flags and used by
+     * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
+     * these are for testing only; setting extraFlags should be done via
+     * setMemSpaceConfigFlags().
+     */
+    bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
+
+    bool
+    isWavefrontScope() const
+    {
+        assert(isScoped());
+        return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
+    }
+
+    bool
+    isWorkgroupScope() const
+    {
+        assert(isScoped());
+        return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
+    }
+
+    bool
+    isDeviceScope() const
+    {
+        assert(isScoped());
+        return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
+    }
+
+    bool
+    isSystemScope() const
+    {
+        assert(isScoped());
+        return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
+    }
+
+    bool
+    isGlobalSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
+               (!isGroupSegment() && !isPrivateSegment() &&
+                !isKernargSegment() && !isReadonlySegment() &&
+                !isSpillSegment() && !isArgSegment());
+    }
+
+    bool
+    isGroupSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
+    }
+
+    bool
+    isPrivateSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
+    }
+
+    bool
+    isKernargSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
+    }
+
+    bool
+    isReadonlySegment() const
+    {
+        return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
+    }
+
+    bool
+    isSpillSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
+    }
+
+    bool
+    isArgSegment() const
+    {
+        return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
+    }
 };
 
 #endif // __MEM_REQUEST_HH__
diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh
index ac08fac82..49ce3624a 100644
--- a/src/mem/ruby/common/DataBlock.hh
+++ b/src/mem/ruby/common/DataBlock.hh
@@ -60,7 +60,6 @@ class DataBlock
     const uint8_t *getData(int offset, int len) const;
     void setByte(int whichByte, uint8_t data);
     void setData(const uint8_t *data, int offset, int len);
-    void copyPartial(const DataBlock & dblk, int offset, int len);
     bool equal(const DataBlock& obj) const;
     void print(std::ostream& out) const;
 
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index b17269a78..73f214a20 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -30,12 +30,16 @@
 #define __MEM_RUBY_SLICC_INTERFACE_RUBY_REQUEST_HH__
 
 #include <ostream>
+#include <vector>
 
+#include "mem/protocol/HSAScope.hh"
+#include "mem/protocol/HSASegment.hh"
 #include "mem/protocol/Message.hh"
 #include "mem/protocol/PrefetchBit.hh"
 #include "mem/protocol/RubyAccessMode.hh"
 #include "mem/protocol/RubyRequestType.hh"
 #include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/DataBlock.hh"
 
 class RubyRequest : public Message
 {
@@ -50,11 +54,17 @@ class RubyRequest : public Message
     uint8_t* data;
     PacketPtr pkt;
     ContextID m_contextId;
+    int m_wfid;
+    HSAScope m_scope;
+    HSASegment m_segment;
+
 
     RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
         uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
         PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
-        ContextID _proc_id = 100)
+        ContextID _proc_id = 100, ContextID _core_id = 99,
+        HSAScope _scope = HSAScope_UNSPECIFIED,
+        HSASegment _segment = HSASegment_GLOBAL)
         : Message(curTime),
           m_PhysicalAddress(_paddr),
           m_Type(_type),
@@ -64,11 +74,65 @@ class RubyRequest : public Message
           m_Prefetch(_pb),
           data(_data),
           pkt(_pkt),
-          m_contextId(_proc_id)
+          m_contextId(_core_id),
+          m_scope(_scope),
+          m_segment(_segment)
     {
-      m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        m_LineAddress = makeLineAddress(m_PhysicalAddress);
     }
 
+    RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
+        uint64_t _pc, RubyRequestType _type,
+        RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
+        unsigned _proc_id, unsigned _core_id,
+        int _wm_size, std::vector<bool> & _wm_mask,
+        DataBlock & _Data,
+        HSAScope _scope = HSAScope_UNSPECIFIED,
+        HSASegment _segment = HSASegment_GLOBAL)
+        : Message(curTime),
+          m_PhysicalAddress(_paddr),
+          m_Type(_type),
+          m_ProgramCounter(_pc),
+          m_AccessMode(_access_mode),
+          m_Size(_len),
+          m_Prefetch(_pb),
+          data(_data),
+          pkt(_pkt),
+          m_contextId(_core_id),
+          m_wfid(_proc_id),
+          m_scope(_scope),
+          m_segment(_segment)
+    {
+        m_LineAddress = makeLineAddress(m_PhysicalAddress);
+    }
+
+    RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
+        uint64_t _pc, RubyRequestType _type,
+        RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
+        unsigned _proc_id, unsigned _core_id,
+        int _wm_size, std::vector<bool> & _wm_mask,
+        DataBlock & _Data,
+        std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
+        HSAScope _scope = HSAScope_UNSPECIFIED,
+        HSASegment _segment = HSASegment_GLOBAL)
+        : Message(curTime),
+          m_PhysicalAddress(_paddr),
+          m_Type(_type),
+          m_ProgramCounter(_pc),
+          m_AccessMode(_access_mode),
+          m_Size(_len),
+          m_Prefetch(_pb),
+          data(_data),
+          pkt(_pkt),
+          m_contextId(_core_id),
+          m_wfid(_proc_id),
+          m_scope(_scope),
+          m_segment(_segment)
+    {
+        m_LineAddress = makeLineAddress(m_PhysicalAddress);
+    }
+
+
     RubyRequest(Tick curTime) : Message(curTime) {}
     MsgPtr clone() const
     { return std::shared_ptr<Message>(new RubyRequest(*this)); }
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 52acaf8c3..5a5f528bb 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -237,25 +237,27 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
 
     // Check for pio requests and directly send them to the dedicated
     // pio port.
-    if (!isPhysMemAddress(pkt->getAddr())) {
-        assert(ruby_port->memMasterPort.isConnected());
-        DPRINTF(RubyPort, "Request address %#x assumed to be a pio address\n",
-                pkt->getAddr());
+    if (pkt->cmd != MemCmd::MemFenceReq) {
+        if (!isPhysMemAddress(pkt->getAddr())) {
+            assert(ruby_port->memMasterPort.isConnected());
+            DPRINTF(RubyPort, "Request address %#x assumed to be a "
+                    "pio address\n", pkt->getAddr());
 
-        // Save the port in the sender state object to be used later to
-        // route the response
-        pkt->pushSenderState(new SenderState(this));
+            // Save the port in the sender state object to be used later to
+            // route the response
+            pkt->pushSenderState(new SenderState(this));
 
-        // send next cycle
-        RubySystem *rs = ruby_port->m_ruby_system;
-        ruby_port->memMasterPort.schedTimingReq(pkt,
-            curTick() + rs->clockPeriod());
-        return true;
+            // send next cycle
+            RubySystem *rs = ruby_port->m_ruby_system;
+            ruby_port->memMasterPort.schedTimingReq(pkt,
+                curTick() + rs->clockPeriod());
+            return true;
+        }
+
+        assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
+               RubySystem::getBlockSizeBytes());
     }
 
-    assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
-           RubySystem::getBlockSizeBytes());
-
     // Submit the ruby request
     RequestStatus requestStatus = ruby_port->makeRequest(pkt);
 
@@ -272,9 +274,11 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
         return true;
     }
 
-
-    DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n",
-            pkt->getAddr(), RequestStatus_to_string(requestStatus));
+    if (pkt->cmd != MemCmd::MemFenceReq) {
+        DPRINTF(RubyPort,
+                "Request for address %#x did not issued because %s\n",
+                pkt->getAddr(), RequestStatus_to_string(requestStatus));
+    }
 
     addToRetryList();
 
@@ -466,11 +470,16 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
         }
     }
 
-    // Flush requests don't access physical memory
-    if (pkt->isFlush()) {
+    // Flush, acquire, release requests don't access physical memory
+    if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
         accessPhysMem = false;
     }
 
+    if (pkt->req->isKernel()) {
+        accessPhysMem = false;
+        needsResponse = true;
+    }
+
     DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse);
 
     RubyPort *ruby_port = static_cast<RubyPort *>(&owner);