diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py index 3853ebbb4..ef110d682 100644 --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -51,7 +51,9 @@ def define_options(parser): help="Token_CMP: cycles until issuing again"); parser.add_option("--disable-dyn-timeouts", action="store_true", help="Token_CMP: disable dyanimc timeouts, use fixed latency instead") - + parser.add_option("--allow-atomic-migration", action="store_true", + help="allow migratory sharing for atomic only accessed blocks") + def create_system(options, system, piobus, dma_devices): if buildEnv['PROTOCOL'] != 'MOESI_CMP_token': @@ -111,7 +113,9 @@ def create_system(options, system, piobus, dma_devices): fixed_timeout_latency = \ options.timeout_latency, dynamic_timeout_enabled = \ - not options.disable_dyn_timeouts) + not options.disable_dyn_timeouts, + no_mig_atomic = not \ + options.allow_atomic_migration) exec("system.l1_cntrl%d = l1_cntrl" % i) # diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index 818600a5a..02d958b09 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -44,7 +44,8 @@ class L2Cache(RubyCache): latency = 10 def define_options(parser): - return + parser.add_option("--allow-atomic-migration", action="store_true", + help="allow migratory sharing for atomic only accessed blocks") def create_system(options, system, piobus, dma_devices): @@ -91,7 +92,9 @@ def create_system(options, system, piobus, dma_devices): sequencer = cpu_seq, L1IcacheMemory = l1i_cache, L1DcacheMemory = l1d_cache, - L2cacheMemory = l2_cache) + L2cacheMemory = l2_cache, + no_mig_atomic = not \ + options.allow_atomic_migration) exec("system.l1_cntrl%d = l1_cntrl" % i) # diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm index d3e993efa..7a234e56f 100644 --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -42,7 +42,8 @@ machine(L1Cache, "Token protocol") int l1_response_latency = 2, int retry_threshold = 1, int fixed_timeout_latency = 100, - bool dynamic_timeout_enabled = true + bool dynamic_timeout_enabled = true, + bool no_mig_atomic = true { // From this node's L1 cache TO the network @@ -92,6 +93,7 @@ machine(L1Cache, "Token protocol") Load, desc="Load request from the processor"; Ifetch, desc="I-fetch request from the processor"; Store, desc="Store request from the processor"; + Atomic, desc="Atomic request from the processor"; L1_Replacement, desc="L1 Replacement"; // Responses @@ -120,7 +122,7 @@ machine(L1Cache, "Token protocol") Use_TimeoutStarverX, desc="Timeout"; Use_TimeoutStarverS, desc="Timeout"; Use_TimeoutNoStarvers, desc="Timeout"; - + Use_TimeoutNoStarvers_NoMig, desc="Timeout Don't Migrate"; } // TYPES @@ -143,6 +145,7 @@ machine(L1Cache, "Token protocol") bool WentPersistent, default="false", desc="Request went persistent"; bool ExternalResponse, default="false", desc="Response came from an external controller"; + bool IsAtomic, default="false", desc="Request was an atomic request"; AccessType AccessType, desc="Type of request (used for profiling)"; Time IssueTime, desc="Time the request was issued"; @@ -361,8 +364,14 @@ machine(L1Cache, "Token protocol") return Event:Load; } else if (type == CacheRequestType:IFETCH) { return Event:Ifetch; - } else if ((type == CacheRequestType:ST) || (type == CacheRequestType:ATOMIC)) { + } else if (type == CacheRequestType:ST) { return Event:Store; + } else if (type == CacheRequestType:ATOMIC) { + if (no_mig_atomic) { + return Event:Atomic; + } else { + return Event:Store; + } } else { error("Invalid CacheRequestType"); } @@ -422,13 +431,16 @@ machine(L1Cache, "Token protocol") if (persistentTable.isLocked(useTimerTable.readyAddress()) && (persistentTable.findSmallest(useTimerTable.readyAddress()) != machineID)) { if (persistentTable.typeOfSmallest(useTimerTable.readyAddress()) == AccessType:Write) { trigger(Event:Use_TimeoutStarverX, useTimerTable.readyAddress()); - } - else { + } else { trigger(Event:Use_TimeoutStarverS, useTimerTable.readyAddress()); } - } - else { - trigger(Event:Use_TimeoutNoStarvers, useTimerTable.readyAddress()); + } else { + assert(L1_TBEs.isPresent(useTimerTable.readyAddress())); + if (no_mig_atomic && L1_TBEs[useTimerTable.readyAddress()].IsAtomic) { + trigger(Event:Use_TimeoutNoStarvers_NoMig, useTimerTable.readyAddress()); + } else { + trigger(Event:Use_TimeoutNoStarvers, useTimerTable.readyAddress()); + } } } } @@ -1245,6 +1257,9 @@ machine(L1Cache, "Token protocol") peek(mandatoryQueue_in, CacheMsg) { L1_TBEs[address].PC := in_msg.ProgramCounter; L1_TBEs[address].AccessType := cache_request_type_to_access_type(in_msg.Type); + if (in_msg.Type == CacheRequestType:ATOMIC) { + L1_TBEs[address].IsAtomic := true; + } L1_TBEs[address].Prefetch := in_msg.Prefetch; L1_TBEs[address].AccessMode := in_msg.AccessMode; } @@ -1444,7 +1459,7 @@ machine(L1Cache, "Token protocol") zz_recycleMandatoryQueue; } - transition({IM, SM, OM, IS, IM_L, IS_L, SM_L}, Store) { + transition({IM, SM, OM, IS, IM_L, IS_L, SM_L}, {Store, Atomic}) { zz_recycleMandatoryQueue; } @@ -1475,7 +1490,7 @@ machine(L1Cache, "Token protocol") k_popMandatoryQueue; } - transition(NP, Store, IM) { + transition(NP, {Store, Atomic}, IM) { ii_allocateL1DCacheBlock; i_allocateTBE; b_issueWriteRequest; @@ -1511,7 +1526,7 @@ machine(L1Cache, "Token protocol") k_popMandatoryQueue; } - transition(I, Store, IM) { + transition(I, {Store, Atomic}, IM) { i_allocateTBE; b_issueWriteRequest; uu_profileMiss; @@ -1570,7 +1585,7 @@ machine(L1Cache, "Token protocol") k_popMandatoryQueue; } - transition(S, Store, SM) { + transition(S, {Store, Atomic}, SM) { i_allocateTBE; b_issueWriteRequest; uu_profileMiss; @@ -1646,7 +1661,7 @@ machine(L1Cache, "Token protocol") k_popMandatoryQueue; } - transition(O, Store, OM) { + transition(O, {Store, Atomic}, OM) { i_allocateTBE; b_issueWriteRequest; uu_profileMiss; @@ -1723,7 +1738,17 @@ machine(L1Cache, "Token protocol") k_popMandatoryQueue; } - transition({MM, MM_W}, Store) { + transition({MM_W}, {Store, Atomic}) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(MM, Store) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(MM, Atomic, M) { hh_store_hit; k_popMandatoryQueue; } @@ -1755,12 +1780,16 @@ machine(L1Cache, "Token protocol") l_popPersistentQueue; } - transition(MM_W, Use_TimeoutNoStarvers, MM) { s_deallocateTBE; jj_unsetUseTimer; } + transition(MM_W, Use_TimeoutNoStarvers_NoMig, M) { + s_deallocateTBE; + jj_unsetUseTimer; + } + // Transitions from Dirty Exclusive transition({M, M_W}, {Load, Ifetch}) { h_load_hit; @@ -1772,11 +1801,21 @@ machine(L1Cache, "Token protocol") k_popMandatoryQueue; } + transition(M, Atomic) { + hh_store_hit; + k_popMandatoryQueue; + } + transition(M_W, Store, MM_W) { hh_store_hit; k_popMandatoryQueue; } + transition(M_W, Atomic) { + hh_store_hit; + k_popMandatoryQueue; + } + transition(M, L1_Replacement, I) { c_ownedReplacement; gg_deallocateL1CacheBlock; @@ -1825,7 +1864,7 @@ machine(L1Cache, "Token protocol") } // someone unlocked during timeout - transition(M_W, Use_TimeoutNoStarvers, M) { + transition(M_W, {Use_TimeoutNoStarvers, Use_TimeoutNoStarvers_NoMig}, M) { s_deallocateTBE; jj_unsetUseTimer; } @@ -2065,7 +2104,7 @@ machine(L1Cache, "Token protocol") k_popMandatoryQueue; } - transition(I_L, Store, IM_L) { + transition(I_L, {Store, Atomic}, IM_L) { ii_allocateL1DCacheBlock; i_allocateTBE; b_issueWriteRequest; @@ -2076,7 +2115,7 @@ machine(L1Cache, "Token protocol") // Transitions from S_L - transition(S_L, Store, SM_L) { + transition(S_L, {Store, Atomic}, SM_L) { i_allocateTBE; b_issueWriteRequest; uu_profileMiss; diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index 44ae479c7..7b49c075c 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -40,7 +40,8 @@ machine(L1Cache, "AMD Hammer-like protocol") CacheMemory * L2cacheMemory, int cache_response_latency = 10, int issue_latency = 2, - int l2_cache_hit_latency = 10 + int l2_cache_hit_latency = 10, + bool no_mig_atomic = true { // NETWORK BUFFERS @@ -94,6 +95,7 @@ machine(L1Cache, "AMD Hammer-like protocol") // Requests Other_GETX, desc="A GetX from another processor"; Other_GETS, desc="A GetS from another processor"; + Other_GETS_No_Mig, desc="A GetS from another processor"; // Responses Ack, desc="Received an ack message"; @@ -122,6 +124,7 @@ machine(L1Cache, "AMD Hammer-like protocol") bool Dirty, desc="Is the data dirty (different than memory)?"; DataBlock DataBlk, desc="data for the block"; bool FromL2, default="false", desc="block just moved from L2"; + bool AtomicAccessed, default="false", desc="block just moved from L2"; } // TBE fields @@ -280,7 +283,15 @@ machine(L1Cache, "AMD Hammer-like protocol") if (in_msg.Type == CoherenceRequestType:GETX) { trigger(Event:Other_GETX, in_msg.Address); } else if (in_msg.Type == CoherenceRequestType:GETS) { - trigger(Event:Other_GETS, in_msg.Address); + if (isCacheTagPresent(in_msg.Address)) { + if (getCacheEntry(in_msg.Address).AtomicAccessed && no_mig_atomic) { + trigger(Event:Other_GETS_No_Mig, in_msg.Address); + } else { + trigger(Event:Other_GETS, in_msg.Address); + } + } else { + trigger(Event:Other_GETS, in_msg.Address); + } } else if (in_msg.Type == CoherenceRequestType:WB_ACK) { trigger(Event:Writeback_Ack, in_msg.Address); } else if (in_msg.Type == CoherenceRequestType:WB_NACK) { @@ -538,12 +549,16 @@ machine(L1Cache, "AMD Hammer-like protocol") action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { DEBUG_EXPR(getCacheEntry(address).DataBlk); + peek(mandatoryQueue_in, CacheMsg) { + sequencer.writeCallback(address, + testAndClearLocalHit(address), + getCacheEntry(address).DataBlk); - sequencer.writeCallback(address, - testAndClearLocalHit(address), - getCacheEntry(address).DataBlk); - - getCacheEntry(address).Dirty := true; + getCacheEntry(address).Dirty := true; + if (in_msg.Type == CacheRequestType:ATOMIC) { + getCacheEntry(address).AtomicAccessed := true; + } + } } action(sx_external_store_hit, "sx", desc="store required external msgs.") { @@ -798,7 +813,7 @@ machine(L1Cache, "AMD Hammer-like protocol") zz_recycleMandatoryQueue; } - transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS}) { + transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) { // stall } @@ -948,7 +963,7 @@ machine(L1Cache, "AMD Hammer-like protocol") rr_deallocateL2CacheBlock; } - transition(I, {Other_GETX, Other_GETS}) { + transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) { f_sendAck; l_popForwardQueue; } @@ -975,7 +990,7 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } - transition(S, Other_GETS) { + transition(S, {Other_GETS, Other_GETS_No_Mig}) { ff_sendAckShared; l_popForwardQueue; } @@ -1005,7 +1020,7 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } - transition(O, Other_GETS) { + transition(O, {Other_GETS, Other_GETS_No_Mig}) { ee_sendDataShared; l_popForwardQueue; } @@ -1037,6 +1052,11 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } + transition(MM, Other_GETS_No_Mig, O) { + ee_sendDataShared; + l_popForwardQueue; + } + // Transitions from Dirty Exclusive transition(M, {Load, Ifetch}) { h_load_hit; @@ -1059,14 +1079,14 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } - transition(M, Other_GETS, O) { + transition(M, {Other_GETS, Other_GETS_No_Mig}, O) { ee_sendDataShared; l_popForwardQueue; } // Transitions from IM - transition(IM, {Other_GETX, Other_GETS}) { + transition(IM, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) { f_sendAck; l_popForwardQueue; } @@ -1093,7 +1113,7 @@ machine(L1Cache, "AMD Hammer-like protocol") } // Transitions from SM - transition(SM, Other_GETS) { + transition(SM, {Other_GETS, Other_GETS_No_Mig}) { ff_sendAckShared; l_popForwardQueue; } @@ -1138,7 +1158,7 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } - transition(OM, Other_GETS) { + transition(OM, {Other_GETS, Other_GETS_No_Mig}) { ee_sendDataShared; l_popForwardQueue; } @@ -1158,7 +1178,7 @@ machine(L1Cache, "AMD Hammer-like protocol") // Transitions from IS - transition(IS, {Other_GETX, Other_GETS}) { + transition(IS, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) { f_sendAck; l_popForwardQueue; } @@ -1274,7 +1294,7 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } - transition({OI, MI}, Other_GETS, OI) { + transition({OI, MI}, {Other_GETS, Other_GETS_No_Mig}, OI) { q_sendDataFromTBEToCache; l_popForwardQueue; } @@ -1292,7 +1312,7 @@ machine(L1Cache, "AMD Hammer-like protocol") } // Transitions from II - transition(II, {Other_GETS, Other_GETX}, II) { + transition(II, {Other_GETS, Other_GETS_No_Mig, Other_GETX}, II) { f_sendAck; l_popForwardQueue; }