From 9567c839fecfdb29a59f9da50cf706fcb22a2bb1 Mon Sep 17 00:00:00 2001 From: Joel Hestness Date: Fri, 14 Aug 2015 00:19:37 -0500 Subject: [PATCH] ruby: Remove the RubyCache/CacheMemory latency The RubyCache (CacheMemory) latency parameter is only used for top-level caches instantiated for Ruby coherence protocols. However, the top-level cache hit latency is assessed by the Sequencer as accesses flow through to the cache hierarchy. Further, protocol state machines should be enforcing these cache hit latencies, but RubyCaches do not expose their latency to any existng state machines through the SLICC/C++ interface. Thus, the RubyCache latency parameter is superfluous for all caches. This is confusing for users. As a step toward pushing L0/L1 cache hit latency into the top-level cache controllers, move their latencies out of the RubyCache declarations and over to their Sequencers. Eventually, these Sequencer parameters should be exposed as parameters to the top-level cache controllers, which should assess the latency. NOTE: Assessing these latencies in the cache controllers will require modifying each to eliminate instantaneous Ruby hit callbacks in transitions that finish accesses, which is likely a large undertaking. --- configs/ruby/MESI_Three_Level.py | 16 ++++------------ configs/ruby/MESI_Two_Level.py | 12 +++--------- configs/ruby/MI_example.py | 5 ++--- configs/ruby/MOESI_CMP_directory.py | 12 +++--------- configs/ruby/MOESI_CMP_token.py | 12 +++--------- configs/ruby/MOESI_hammer.py | 18 +++++------------- configs/ruby/Network_test.py | 5 ++--- src/mem/ruby/structures/Cache.py | 1 - src/mem/ruby/structures/CacheMemory.cc | 1 - src/mem/ruby/structures/CacheMemory.hh | 3 --- src/mem/ruby/system/Sequencer.cc | 17 +++++++++++++---- src/mem/ruby/system/Sequencer.hh | 7 +++++++ src/mem/ruby/system/Sequencer.py | 6 ++++++ 13 files changed, 48 insertions(+), 67 deletions(-) diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py index a4074e842..74eb15887 100644 --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -37,19 +37,11 @@ from Ruby import create_topology from Ruby import send_evicts # -# Note: the L1 Cache latency is only used by the sequencer on fast path hits +# Declare caches used by the protocol # -class L0Cache(RubyCache): - latency = 1 - -class L1Cache(RubyCache): - latency = 5 - -# -# Note: the L2 Cache latency is not currently used -# -class L2Cache(RubyCache): - latency = 15 +class L0Cache(RubyCache): pass +class L1Cache(RubyCache): pass +class L2Cache(RubyCache): pass def define_options(parser): parser.add_option("--num-clusters", type="int", default=1, diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py index d911d76ef..9f286fa53 100644 --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -35,16 +35,10 @@ from Ruby import create_topology from Ruby import send_evicts # -# Note: the L1 Cache latency is only used by the sequencer on fast path hits +# Declare caches used by the protocol # -class L1Cache(RubyCache): - latency = 3 - -# -# Note: the L2 Cache latency is not currently used -# -class L2Cache(RubyCache): - latency = 15 +class L1Cache(RubyCache): pass +class L2Cache(RubyCache): pass def define_options(parser): return diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py index 3bb332c1d..5afee674a 100644 --- a/configs/ruby/MI_example.py +++ b/configs/ruby/MI_example.py @@ -35,10 +35,9 @@ from Ruby import create_topology from Ruby import send_evicts # -# Note: the cache latency is only used by the sequencer on fast path hits +# Declare caches used by the protocol # -class Cache(RubyCache): - latency = 3 +class Cache(RubyCache): pass def define_options(parser): return diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index d32700166..f02c45987 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -35,16 +35,10 @@ from Ruby import create_topology from Ruby import send_evicts # -# Note: the L1 Cache latency is only used by the sequencer on fast path hits +# Declare caches used by the protocol # -class L1Cache(RubyCache): - latency = 3 - -# -# Note: the L2 Cache latency is not currently used -# -class L2Cache(RubyCache): - latency = 15 +class L1Cache(RubyCache): pass +class L2Cache(RubyCache): pass def define_options(parser): return diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py index 26cbc10d9..4b09fc937 100644 --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -35,16 +35,10 @@ from Ruby import create_topology from Ruby import send_evicts # -# Note: the L1 Cache latency is only used by the sequencer on fast path hits +# Declare caches used by the protocol # -class L1Cache(RubyCache): - latency = 2 - -# -# Note: the L2 Cache latency is not currently used -# -class L2Cache(RubyCache): - latency = 10 +class L1Cache(RubyCache): pass +class L2Cache(RubyCache): pass def define_options(parser): parser.add_option("--l1-retries", type="int", default=1, diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index b42138743..afbb25dc3 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -35,22 +35,14 @@ from Ruby import create_topology from Ruby import send_evicts # -# Note: the L1 Cache latency is only used by the sequencer on fast path hits +# Declare caches used by the protocol # -class L1Cache(RubyCache): - latency = 2 - +class L1Cache(RubyCache): pass +class L2Cache(RubyCache): pass # -# Note: the L2 Cache latency is not currently used +# Probe filter is a cache # -class L2Cache(RubyCache): - latency = 10 - -# -# Probe filter is a cache, latency is not used -# -class ProbeFilter(RubyCache): - latency = 1 +class ProbeFilter(RubyCache): pass def define_options(parser): parser.add_option("--allow-atomic-migration", action="store_true", diff --git a/configs/ruby/Network_test.py b/configs/ruby/Network_test.py index a4641ae64..5eeaba98b 100644 --- a/configs/ruby/Network_test.py +++ b/configs/ruby/Network_test.py @@ -34,10 +34,9 @@ from m5.util import addToPath from Ruby import create_topology # -# Note: the cache latency is only used by the sequencer on fast path hits +# Declare caches used by the protocol # -class Cache(RubyCache): - latency = 3 +class Cache(RubyCache): pass def define_options(parser): return diff --git a/src/mem/ruby/structures/Cache.py b/src/mem/ruby/structures/Cache.py index 7f26e659f..4eb87ac74 100644 --- a/src/mem/ruby/structures/Cache.py +++ b/src/mem/ruby/structures/Cache.py @@ -37,7 +37,6 @@ class RubyCache(SimObject): cxx_class = 'CacheMemory' cxx_header = "mem/ruby/structures/CacheMemory.hh" size = Param.MemorySize("capacity in bytes"); - latency = Param.Cycles(""); assoc = Param.Int(""); replacement_policy = Param.ReplacementPolicy(PseudoLRUReplacementPolicy(), "") diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index e444ae09c..64a8e9e8a 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -60,7 +60,6 @@ CacheMemory::CacheMemory(const Params *p) p->start_index_bit, p->ruby_system) { m_cache_size = p->size; - m_latency = p->latency; m_cache_assoc = p->assoc; m_replacementPolicy_ptr = p->replacement_policy; m_replacementPolicy_ptr->setCache(this); diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh index 57f2885b6..792d8fd93 100644 --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -96,7 +96,6 @@ class CacheMemory : public SimObject AbstractCacheEntry* lookup(const Address& address); const AbstractCacheEntry* lookup(const Address& address) const; - Cycles getLatency() const { return m_latency; } Cycles getTagLatency() const { return tagArray.getLatency(); } Cycles getDataLatency() const { return dataArray.getLatency(); } @@ -159,8 +158,6 @@ class CacheMemory : public SimObject CacheMemory& operator=(const CacheMemory& obj); private: - Cycles m_latency; - // Data Members (m_prefix) bool m_is_instruction_only_cache; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 01b868017..36bd9cd62 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -58,6 +58,8 @@ Sequencer::Sequencer(const Params *p) m_instCache_ptr = p->icache; m_dataCache_ptr = p->dcache; + m_data_cache_hit_latency = p->dcache_hit_latency; + m_inst_cache_hit_latency = p->icache_hit_latency; m_max_outstanding_requests = p->max_outstanding_requests; m_deadlock_threshold = p->deadlock_threshold; @@ -65,6 +67,8 @@ Sequencer::Sequencer(const Params *p) assert(m_deadlock_threshold > 0); assert(m_instCache_ptr != NULL); assert(m_dataCache_ptr != NULL); + assert(m_data_cache_hit_latency > 0); + assert(m_inst_cache_hit_latency > 0); m_usingNetworkTester = p->using_network_tester; } @@ -691,12 +695,17 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) msg->getPhysicalAddress(), RubyRequestType_to_string(secondary_type)); - Cycles latency(0); // initialzed to an null value - + // The Sequencer currently assesses instruction and data cache hit latency + // for the top-level caches at the beginning of a memory access. + // TODO: Eventually, this latency should be moved to represent the actual + // cache access latency portion of the memory access. This will require + // changing cache controller protocol files to assess the latency on the + // access response path. + Cycles latency(0); // Initialize to zero to catch misconfigured latency if (secondary_type == RubyRequestType_IFETCH) - latency = m_instCache_ptr->getLatency(); + latency = m_inst_cache_hit_latency; else - latency = m_dataCache_ptr->getLatency(); + latency = m_data_cache_hit_latency; // Send the message to the cache controller assert(latency > 0); diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index d5cd17f5f..505b3f3bc 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -180,6 +180,13 @@ class Sequencer : public RubyPort CacheMemory* m_dataCache_ptr; CacheMemory* m_instCache_ptr; + // The cache access latency for top-level caches (L0/L1). These are + // currently assessed at the beginning of each memory access through the + // sequencer. + // TODO: Migrate these latencies into top-level cache controllers. + Cycles m_data_cache_hit_latency; + Cycles m_inst_cache_hit_latency; + typedef m5::hash_map RequestTable; RequestTable m_writeRequestTable; RequestTable m_readRequestTable; diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index e545000cf..7494986e9 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -61,6 +61,12 @@ class RubySequencer(RubyPort): icache = Param.RubyCache("") dcache = Param.RubyCache("") + # Cache latencies currently assessed at the beginning of each access + # NOTE: Setting these values to a value greater than one will result in + # O3 CPU pipeline bubbles and negatively impact performance + # TODO: Latencies should be migrated into each top-level cache controller + icache_hit_latency = Param.Cycles(1, "Inst cache hit latency") + dcache_hit_latency = Param.Cycles(1, "Data cache hit latency") max_outstanding_requests = Param.Int(16, "max requests (incl. prefetches) outstanding") deadlock_threshold = Param.Cycles(500000,