From 63563c9df2eca46231768a448e981e8bb7856655 Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Mon, 23 Jan 2012 11:07:14 -0600 Subject: [PATCH] O3, Ruby: Forward invalidations from Ruby to O3 CPU This patch implements the functionality for forwarding invalidations and replacements from the L1 cache of the Ruby memory system to the O3 CPU. The implementation adds a list of ports to RubyPort. Whenever a replacement or an invalidation is performed, the L1 cache forwards this to all the ports, which is the LSQ in case of the O3 CPU. --- configs/ruby/MESI_CMP_directory.py | 2 ++ configs/ruby/MI_example.py | 2 ++ configs/ruby/MOESI_CMP_directory.py | 2 ++ configs/ruby/MOESI_CMP_token.py | 2 ++ configs/ruby/MOESI_hammer.py | 2 ++ .../protocol/MESI_CMP_directory-L1cache.sm | 27 +++++++++----- src/mem/protocol/MI_example-cache.sm | 16 +++++---- .../protocol/MOESI_CMP_directory-L1cache.sm | 24 +++++++++---- src/mem/protocol/MOESI_CMP_token-L1cache.sm | 35 ++++++++++++++----- src/mem/protocol/MOESI_hammer-cache.sm | 26 +++++++++++++- src/mem/protocol/RubySlicc_Types.sm | 1 + src/mem/ruby/system/RubyPort.cc | 12 +++++++ src/mem/ruby/system/RubyPort.hh | 4 ++- src/mem/ruby/system/Sequencer.cc | 6 ++++ src/mem/ruby/system/Sequencer.hh | 2 +- 15 files changed, 131 insertions(+), 32 deletions(-) diff --git a/configs/ruby/MESI_CMP_directory.py b/configs/ruby/MESI_CMP_directory.py index 6e70944b7..6671c307b 100644 --- a/configs/ruby/MESI_CMP_directory.py +++ b/configs/ruby/MESI_CMP_directory.py @@ -89,6 +89,8 @@ def create_system(options, system, piobus, dma_devices, ruby_system): L1IcacheMemory = l1i_cache, L1DcacheMemory = l1d_cache, l2_select_num_bits = l2_bits, + send_evictions = ( + options.cpu_type == "detailed"), ruby_system = ruby_system) cpu_seq = RubySequencer(version = i, diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py index eeb81e8a3..13f4c9c80 100644 --- a/configs/ruby/MI_example.py +++ b/configs/ruby/MI_example.py @@ -81,6 +81,8 @@ def create_system(options, system, piobus, dma_devices, ruby_system): l1_cntrl = L1Cache_Controller(version = i, cntrl_id = cntrl_count, cacheMemory = cache, + send_evictions = ( + options.cpu_type == "detailed"), ruby_system = ruby_system) cpu_seq = RubySequencer(version = i, diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index e3bc9ae85..f6baa4026 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -89,6 +89,8 @@ def create_system(options, system, piobus, dma_devices, ruby_system): L1IcacheMemory = l1i_cache, L1DcacheMemory = l1d_cache, l2_select_num_bits = l2_bits, + send_evictions = ( + options.cpu_type == "detailed"), ruby_system = ruby_system) cpu_seq = RubySequencer(version = i, diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py index d11bb320c..79e0f15f9 100644 --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -111,6 +111,8 @@ def create_system(options, system, piobus, dma_devices, ruby_system): not options.disable_dyn_timeouts, no_mig_atomic = not \ options.allow_atomic_migration, + send_evictions = ( + options.cpu_type == "detailed"), ruby_system = ruby_system) cpu_seq = RubySequencer(version = i, diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index 4cc377ec8..f50315599 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -104,6 +104,8 @@ def create_system(options, system, piobus, dma_devices, ruby_system): L2cacheMemory = l2_cache, no_mig_atomic = not \ options.allow_atomic_migration, + send_evictions = ( + options.cpu_type == "detailed"), ruby_system = ruby_system) cpu_seq = RubySequencer(version = i, diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm index f0be1fd34..934405786 100644 --- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm @@ -27,14 +27,15 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -machine(L1Cache, "MSI Directory L1 Cache CMP") +machine(L1Cache, "MESI Directory L1 Cache CMP") : Sequencer * sequencer, CacheMemory * L1IcacheMemory, CacheMemory * L1DcacheMemory, int l2_select_num_bits, int l1_request_latency = 2, int l1_response_latency = 2, - int to_l2_latency = 1 + int to_l2_latency = 1, + bool send_evictions { // NODE L1 CACHE // From this node's L1 cache TO the network @@ -67,7 +68,6 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit"; M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK"; - E_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK"; SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2"; } @@ -544,6 +544,12 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") } } + action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer.evictionCallback(address); + } + } action(g_issuePUTX, "g", desc="send data to the L2 cache") { enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_response_latency) { @@ -696,7 +702,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") //***************************************************** // Transitions for Load/Store/Replacement/WriteBack from transient states - transition({IS, IM, IS_I, M_I, E_I, SM}, {Load, Ifetch, Store, L1_Replacement}) { + transition({IS, IM, IS_I, M_I, SM}, {Load, Ifetch, Store, L1_Replacement}) { z_recycleMandatoryQueue; } @@ -748,10 +754,12 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") } transition(S, L1_Replacement, I) { + forward_eviction_to_cpu; ff_deallocateL1CacheBlock; } transition(S, Inv, I) { + forward_eviction_to_cpu; fi_sendInvAck; l_popRequestQueue; } @@ -770,6 +778,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") transition(E, L1_Replacement, M_I) { // silent E replacement?? + forward_eviction_to_cpu; i_allocateTBE; g_issuePUTX; // send data, but hold in case forwarded request ff_deallocateL1CacheBlock; @@ -777,11 +786,13 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") transition(E, Inv, I) { // don't send data + forward_eviction_to_cpu; fi_sendInvAck; l_popRequestQueue; } transition(E, Fwd_GETX, I) { + forward_eviction_to_cpu; d_sendDataToRequestor; l_popRequestQueue; } @@ -804,6 +815,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") } transition(M, L1_Replacement, M_I) { + forward_eviction_to_cpu; i_allocateTBE; g_issuePUTX; // send data, but hold in case forwarded request ff_deallocateL1CacheBlock; @@ -815,6 +827,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") } transition(M, Inv, I) { + forward_eviction_to_cpu; f_sendDataToL2; l_popRequestQueue; } @@ -825,6 +838,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") } transition(M, Fwd_GETX, I) { + forward_eviction_to_cpu; d_sendDataToRequestor; l_popRequestQueue; } @@ -866,7 +880,6 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") o_popIncomingResponseQueue; } - transition(IS, DataS_fromL1, S) { u_writeDataToL1Cache; j_sendUnblock; @@ -935,7 +948,6 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") transition(SINK_WB_ACK, {Load, Store, Ifetch, L1_Replacement}){ z_recycleMandatoryQueue; - } transition(SINK_WB_ACK, Inv){ @@ -948,6 +960,3 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") o_popIncomingResponseQueue; } } - - - diff --git a/src/mem/protocol/MI_example-cache.sm b/src/mem/protocol/MI_example-cache.sm index b11fddd95..2f2e4e3d7 100644 --- a/src/mem/protocol/MI_example-cache.sm +++ b/src/mem/protocol/MI_example-cache.sm @@ -3,7 +3,8 @@ machine(L1Cache, "MI Example L1 Cache") : Sequencer * sequencer, CacheMemory * cacheMemory, int cache_response_latency = 12, - int issue_latency = 2 + int issue_latency = 2, + bool send_evictions { // NETWORK BUFFERS @@ -54,7 +55,6 @@ machine(L1Cache, "MI Example L1 Cache") DataBlock DataBlk, desc="Data in the block"; } - // TBE fields structure(TBE, desc="...") { State TBEState, desc="Transient state"; @@ -70,7 +70,6 @@ machine(L1Cache, "MI Example L1 Cache") // STRUCTURES - TBETable TBEs, template_hack=""; // PROTOTYPES @@ -249,7 +248,6 @@ machine(L1Cache, "MI Example L1 Cache") } } - action(e_sendData, "e", desc="Send data from cache to requestor") { peek(forwardRequestNetwork_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -353,13 +351,18 @@ machine(L1Cache, "MI Example L1 Cache") } } + action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer.evictionCallback(address); + } + } action(v_allocateTBE, "v", desc="Allocate TBE") { TBEs.allocate(address); set_tbe(TBEs[address]); } - action(w_deallocateTBE, "w", desc="Deallocate TBE") { TBEs.deallocate(address); unset_tbe(); @@ -435,6 +438,7 @@ machine(L1Cache, "MI Example L1 Cache") transition(M, Fwd_GETX, I) { e_sendData; + forward_eviction_to_cpu; o_popForwardedRequestQueue; } @@ -446,6 +450,7 @@ machine(L1Cache, "MI Example L1 Cache") v_allocateTBE; b_issuePUT; x_copyDataFromCacheToTBE; + forward_eviction_to_cpu; h_deallocateL1CacheBlock; } @@ -474,4 +479,3 @@ machine(L1Cache, "MI Example L1 Cache") o_popForwardedRequestQueue; } } - diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm index 2845d1ad1..7a5cc6511 100644 --- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm @@ -37,7 +37,8 @@ machine(L1Cache, "Directory protocol") CacheMemory * L1IcacheMemory, CacheMemory * L1DcacheMemory, int l2_select_num_bits, - int request_latency = 2 + int request_latency = 2, + bool send_evictions { // NODE L1 CACHE @@ -530,7 +531,6 @@ machine(L1Cache, "Directory protocol") } } - action(ee_sendDataExclusive, "\e", desc="Send data from cache to requestor, don't keep a shared copy") { peek(requestNetwork_in, RequestMsg) { assert(is_valid(cache_entry)); @@ -689,7 +689,6 @@ machine(L1Cache, "Directory protocol") useTimerTable.set(address, 50); } - action(ub_dmaUnblockL2Cache, "ub", desc="Send dma ack to l2 cache") { peek(requestNetwork_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) { @@ -775,7 +774,6 @@ machine(L1Cache, "Directory protocol") } } - // L2 will usually request data for a writeback action(qq_sendWBDataFromTBEToL2, "\q", desc="Send data from TBE to L2") { enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) { @@ -811,7 +809,6 @@ machine(L1Cache, "Directory protocol") //assert(in_msg.Dirty == false); } } - } action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") { @@ -844,7 +841,12 @@ machine(L1Cache, "Directory protocol") } } - + action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer.evictionCallback(address); + } + } action(uu_profileMiss, "\u", desc="Profile the demand miss") { peek(mandatoryQueue_in, RubyRequest) { @@ -931,11 +933,13 @@ machine(L1Cache, "Directory protocol") transition(S, L1_Replacement, SI) { i_allocateTBE; dd_issuePUTS; + forward_eviction_to_cpu; kk_deallocateL1CacheBlock; } transition(S, Inv, I) { f_sendAck; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -966,11 +970,13 @@ machine(L1Cache, "Directory protocol") transition(O, L1_Replacement, OI) { i_allocateTBE; dd_issuePUTO; + forward_eviction_to_cpu; kk_deallocateL1CacheBlock; } transition(O, Fwd_GETX, I) { ee_sendDataExclusive; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -999,16 +1005,19 @@ machine(L1Cache, "Directory protocol") transition(MM, L1_Replacement, MI) { i_allocateTBE; d_issuePUTX; + forward_eviction_to_cpu; kk_deallocateL1CacheBlock; } transition(MM, Fwd_GETX, I) { ee_sendDataExclusive; + forward_eviction_to_cpu; l_popForwardQueue; } transition(MM, Fwd_GETS, I) { ee_sendDataExclusive; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -1037,12 +1046,14 @@ machine(L1Cache, "Directory protocol") transition(M, L1_Replacement, MI) { i_allocateTBE; d_issuePUTX; + forward_eviction_to_cpu; kk_deallocateL1CacheBlock; } transition(M, Fwd_GETX, I) { // e_sendData; ee_sendDataExclusive; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -1080,6 +1091,7 @@ machine(L1Cache, "Directory protocol") // Transitions from SM transition(SM, Inv, IM) { f_sendAck; + forward_eviction_to_cpu; l_popForwardQueue; } diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm index 66789b594..7cc41cc20 100644 --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -43,7 +43,8 @@ machine(L1Cache, "Token protocol") int retry_threshold = 1, int fixed_timeout_latency = 100, bool dynamic_timeout_enabled = true, - bool no_mig_atomic = true + bool no_mig_atomic = true, + bool send_evictions { // From this node's L1 cache TO the network @@ -1398,7 +1399,6 @@ machine(L1Cache, "Token protocol") } } - action(q_updateTokensFromResponse, "q", desc="Update the token count based on the incoming response message") { peek(responseNetwork_in, ResponseMsg) { assert(is_valid(cache_entry)); @@ -1522,6 +1522,13 @@ machine(L1Cache, "Token protocol") } } + action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer.evictionCallback(address); + } + } + action(uu_profileMiss, "\u", desc="Profile the demand miss") { peek(mandatoryQueue_in, RubyRequest) { if (L1DcacheMemory.isTagPresent(address)) { @@ -1572,7 +1579,6 @@ machine(L1Cache, "Token protocol") zz_stallAndWaitMandatoryQueue; } - // Lockdowns transition({NP, I, S, O, M, MM, M_W, MM_W, IM, SM, OM, IS}, Own_Lock_or_Unlock) { l_popPersistentQueue; @@ -1702,6 +1708,7 @@ machine(L1Cache, "Token protocol") transition(S, L1_Replacement, I) { ta_traceStalledAddress; cc_sharedReplacement; // Only needed in some cases + forward_eviction_to_cpu; gg_deallocateL1CacheBlock; ka_wakeUpAllDependents; } @@ -1709,6 +1716,7 @@ machine(L1Cache, "Token protocol") transition(S, {Transient_GETX, Transient_Local_GETX}, I) { t_sendAckWithCollectedTokens; p_informL2AboutTokenLoss; + forward_eviction_to_cpu m_popRequestQueue; } @@ -1729,6 +1737,7 @@ machine(L1Cache, "Token protocol") transition({S, S_L}, Persistent_GETX, I_L) { e_sendAckWithCollectedTokens; p_informL2AboutTokenLoss; + forward_eviction_to_cpu l_popPersistentQueue; } @@ -1780,6 +1789,7 @@ machine(L1Cache, "Token protocol") transition(O, L1_Replacement, I) { ta_traceStalledAddress; c_ownedReplacement; + forward_eviction_to_cpu gg_deallocateL1CacheBlock; ka_wakeUpAllDependents; } @@ -1787,12 +1797,14 @@ machine(L1Cache, "Token protocol") transition(O, {Transient_GETX, Transient_Local_GETX}, I) { dd_sendDataWithAllTokens; p_informL2AboutTokenLoss; + forward_eviction_to_cpu m_popRequestQueue; } transition(O, Persistent_GETX, I_L) { ee_sendDataWithAllTokens; p_informL2AboutTokenLoss; + forward_eviction_to_cpu l_popPersistentQueue; } @@ -1803,6 +1815,7 @@ machine(L1Cache, "Token protocol") transition(O, Persistent_GETS_Last_Token, I_L) { fo_sendDataWithOwnerToken; + forward_eviction_to_cpu l_popPersistentQueue; } @@ -1867,6 +1880,7 @@ machine(L1Cache, "Token protocol") transition(MM, L1_Replacement, I) { ta_traceStalledAddress; c_ownedReplacement; + forward_eviction_to_cpu gg_deallocateL1CacheBlock; ka_wakeUpAllDependents; } @@ -1874,6 +1888,7 @@ machine(L1Cache, "Token protocol") transition(MM, {Transient_GETX, Transient_Local_GETX, Transient_GETS, Transient_Local_GETS}, I) { dd_sendDataWithAllTokens; p_informL2AboutTokenLoss; + forward_eviction_to_cpu m_popRequestQueue; } @@ -1885,6 +1900,7 @@ machine(L1Cache, "Token protocol") transition(MM, {Persistent_GETX, Persistent_GETS}, I_L) { ee_sendDataWithAllTokens; p_informL2AboutTokenLoss; + forward_eviction_to_cpu l_popPersistentQueue; } @@ -1934,6 +1950,7 @@ machine(L1Cache, "Token protocol") transition(M, L1_Replacement, I) { ta_traceStalledAddress; c_ownedReplacement; + forward_eviction_to_cpu gg_deallocateL1CacheBlock; ka_wakeUpAllDependents; } @@ -1941,6 +1958,7 @@ machine(L1Cache, "Token protocol") transition(M, {Transient_GETX, Transient_Local_GETX}, I) { dd_sendDataWithAllTokens; p_informL2AboutTokenLoss; + forward_eviction_to_cpu m_popRequestQueue; } @@ -1961,6 +1979,7 @@ machine(L1Cache, "Token protocol") transition(M, Persistent_GETX, I_L) { ee_sendDataWithAllTokens; p_informL2AboutTokenLoss; + forward_eviction_to_cpu l_popPersistentQueue; } @@ -1990,22 +2009,21 @@ machine(L1Cache, "Token protocol") transition(M_W, Use_TimeoutStarverX, I_L) { s_deallocateTBE; ee_sendDataWithAllTokens; + forward_eviction_to_cpu; p_informL2AboutTokenLoss; jj_unsetUseTimer; } - - // migratory transition(MM_W, {Use_TimeoutStarverX, Use_TimeoutStarverS}, I_L) { s_deallocateTBE; ee_sendDataWithAllTokens; + forward_eviction_to_cpu; p_informL2AboutTokenLoss; jj_unsetUseTimer; } - // Transient_GETX and Transient_GETS in transient states transition(OM, {Transient_GETX, Transient_Local_GETX, Transient_GETS, Transient_GETS_Last_Token, Transient_Local_GETS_Last_Token, Transient_Local_GETS}) { m_popRequestQueue; // Even if we have the data, we can pretend we don't have it yet. @@ -2040,6 +2058,7 @@ machine(L1Cache, "Token protocol") transition({SM, SM_L}, Persistent_GETX, IM_L) { e_sendAckWithCollectedTokens; + forward_eviction_to_cpu l_popPersistentQueue; } @@ -2054,6 +2073,7 @@ machine(L1Cache, "Token protocol") transition(OM, Persistent_GETX, IM_L) { ee_sendDataWithAllTokens; + forward_eviction_to_cpu l_popPersistentQueue; } @@ -2120,6 +2140,7 @@ machine(L1Cache, "Token protocol") transition({IM, SM}, {Transient_GETX, Transient_Local_GETX}, IM) { // We don't have the data yet, but we might have collected some tokens. We give them up here to avoid livelock t_sendAckWithCollectedTokens; + forward_eviction_to_cpu; m_popRequestQueue; } @@ -2336,7 +2357,6 @@ machine(L1Cache, "Token protocol") kd_wakeUpDependents; } - // Own_Lock_or_Unlock transition(I_L, Own_Lock_or_Unlock, I) { @@ -2364,4 +2384,3 @@ machine(L1Cache, "Token protocol") kd_wakeUpDependents; } } - diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index ce16a8777..219096d26 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -41,7 +41,8 @@ machine(L1Cache, "AMD Hammer-like protocol") int cache_response_latency = 10, int issue_latency = 2, int l2_cache_hit_latency = 10, - bool no_mig_atomic = true + bool no_mig_atomic = true, + bool send_evictions { // NETWORK BUFFERS @@ -1207,6 +1208,13 @@ machine(L1Cache, "AMD Hammer-like protocol") unset_cache_entry(); } + action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer.evictionCallback(address); + } + } + action(uu_profileMiss, "\u", desc="Profile the demand miss") { peek(mandatoryQueue_in, RubyRequest) { if (L1IcacheMemory.isTagPresent(address)) { @@ -1486,17 +1494,20 @@ machine(L1Cache, "AMD Hammer-like protocol") i_allocateTBE; bf_issueGETF; uu_profileMiss; + forward_eviction_to_cpu; gg_deallocateL1CacheBlock; k_popMandatoryQueue; } transition(S, L2_Replacement, I) { + forward_eviction_to_cpu; rr_deallocateL2CacheBlock; ka_wakeUpAllDependents; } transition(S, {Other_GETX, Invalidate}, I) { f_sendAck; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -1528,6 +1539,7 @@ machine(L1Cache, "AMD Hammer-like protocol") bf_issueGETF; p_decrementNumberOfMessagesByOne; uu_profileMiss; + forward_eviction_to_cpu; gg_deallocateL1CacheBlock; k_popMandatoryQueue; } @@ -1535,12 +1547,14 @@ machine(L1Cache, "AMD Hammer-like protocol") transition(O, L2_Replacement, OI) { i_allocateTBE; d_issuePUT; + forward_eviction_to_cpu; rr_deallocateL2CacheBlock; ka_wakeUpAllDependents; } transition(O, {Other_GETX, Invalidate}, I) { e_sendData; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -1569,6 +1583,7 @@ machine(L1Cache, "AMD Hammer-like protocol") i_allocateTBE; bf_issueGETF; p_decrementNumberOfMessagesByOne; + forward_eviction_to_cpu; gg_deallocateL1CacheBlock; k_popMandatoryQueue; } @@ -1582,17 +1597,20 @@ machine(L1Cache, "AMD Hammer-like protocol") transition(MM, L2_Replacement, MI) { i_allocateTBE; d_issuePUT; + forward_eviction_to_cpu; rr_deallocateL2CacheBlock; ka_wakeUpAllDependents; } transition(MM, {Other_GETX, Invalidate}, I) { c_sendExclusiveData; + forward_eviction_to_cpu; l_popForwardQueue; } transition(MM, Other_GETS, I) { c_sendExclusiveData; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -1625,12 +1643,14 @@ machine(L1Cache, "AMD Hammer-like protocol") transition(M, L2_Replacement, MI) { i_allocateTBE; d_issuePUT; + forward_eviction_to_cpu; rr_deallocateL2CacheBlock; ka_wakeUpAllDependents; } transition(M, {Other_GETX, Invalidate}, I) { c_sendExclusiveData; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -1700,11 +1720,13 @@ machine(L1Cache, "AMD Hammer-like protocol") transition(SM, {Other_GETX, Invalidate}, IM) { f_sendAck; + forward_eviction_to_cpu; l_popForwardQueue; } transition(SM_F, {Other_GETX, Invalidate}, IM_F) { f_sendAck; + forward_eviction_to_cpu; l_popForwardQueue; } @@ -1754,12 +1776,14 @@ machine(L1Cache, "AMD Hammer-like protocol") transition(OM, {Other_GETX, Invalidate}, IM) { e_sendData; pp_incrementNumberOfMessagesByOne; + forward_eviction_to_cpu; l_popForwardQueue; } transition(OM_F, {Other_GETX, Invalidate}, IM_F) { q_sendDataFromTBEToCache; pp_incrementNumberOfMessagesByOne; + forward_eviction_to_cpu; l_popForwardQueue; } diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm index c76e0fe3e..3b90dab20 100644 --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -107,6 +107,7 @@ structure (Sequencer, external = "yes") { void writeCallback(Address, GenericMachineType, DataBlock, Time, Time, Time); void checkCoherence(Address); void profileNack(Address, int, int, uint64); + void evictionCallback(Address); } structure(RubyRequest, desc="...", interface="Message", external="yes") { diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index ce9973402..b60ca2a07 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -1,5 +1,6 @@ /* * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2011 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -682,3 +683,14 @@ RubyPort::M5Port::deviceBlockSize() const { return (unsigned) RubySystem::getBlockSizeBytes(); } + +void +RubyPort::ruby_eviction_callback(const Address& address) +{ + DPRINTF(RubyPort, "Sending invalidations.\n"); + Request req(address.getAddress(), 0, 0); + for (CpuPortIter it = cpu_ports.begin(); it != cpu_ports.end(); it++) { + Packet *pkt = new Packet(&req, MemCmd::InvalidationReq, -1); + (*it)->sendTiming(pkt); + } +} diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index d8dbe0cda..2ffdef3d9 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2011 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,6 +59,7 @@ class RubyPort : public MemObject RubySystem*_system, bool _access_phys_mem); bool sendTiming(PacketPtr pkt); void hitCallback(PacketPtr pkt); + void evictionCallback(const Address& address); unsigned deviceBlockSize() const; bool onRetryList() @@ -129,8 +131,8 @@ class RubyPort : public MemObject protected: const std::string m_name; void ruby_hit_callback(PacketPtr pkt); - void hit(PacketPtr pkt); void testDrainComplete(); + void ruby_eviction_callback(const Address& address); int m_version; AbstractController* m_controller; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 3f9ceb34d..1cd54d45c 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -733,3 +733,9 @@ Sequencer::checkCoherence(const Address& addr) g_system_ptr->checkGlobalCoherenceInvariant(addr); #endif } + +void +Sequencer::evictionCallback(const Address& address) +{ + ruby_eviction_callback(address); +} diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 4a6d46c01..e262e32e8 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -117,6 +117,7 @@ class Sequencer : public RubyPort, public Consumer void markRemoved(); void removeRequest(SequencerRequest* request); + void evictionCallback(const Address& address); private: void issueRequest(PacketPtr pkt, RubyRequestType type); @@ -181,4 +182,3 @@ operator<<(std::ostream& out, const Sequencer& obj) } #endif // __MEM_RUBY_SYSTEM_SEQUENCER_HH__ -