From 69d8600bf80ce065feccbac6d55e45db62f1654f Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Fri, 10 Feb 2012 11:05:24 -0600 Subject: [PATCH] MESI: Add queues for stalled requests This patch adds support for stalling the requests queued up at different controllers for the MESI CMP directory protocol. Earlier the controllers would recycle the requests using some fixed latency. This results in younger requests getting serviced first at times, and can result in starvation. Instead all the requests that need a particular block to be in a stable state are moved to a separate queue, where they wait till that block returns to a stable state and then they are processed. --- .../protocol/MESI_CMP_directory-L1cache.sm | 38 +++++++++-------- .../protocol/MESI_CMP_directory-L2cache.sm | 42 ++++++++++++------- src/mem/protocol/MESI_CMP_directory-dir.sm | 24 +++++++---- 3 files changed, 66 insertions(+), 38 deletions(-) diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm index 934405786..91be3933f 100644 --- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm @@ -136,6 +136,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") void unset_cache_entry(); void set_tbe(TBE a); void unset_tbe(); + void wakeUpBuffers(Address a); // inclusive cache returns L1 entries only Entry getCacheEntry(Address addr), return_by_pointer="yes" { @@ -230,7 +231,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") out_port(unblockNetwork_out, ResponseMsg, unblockFromL1Cache); // Response IntraChip L1 Network - response msg to this L1 cache - in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache) { + in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache, rank = 2) { if (responseIntraChipL1Network_in.isReady()) { peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") { assert(in_msg.Destination.isElement(machineID)); @@ -268,7 +269,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } // Request InterChip network - request from this L1 cache to the shared L2 - in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache) { + in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache, rank = 1) { if(requestIntraChipL1Network_in.isReady()) { peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") { assert(in_msg.Destination.isElement(machineID)); @@ -293,7 +294,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } // Mandatory Queue betweens Node's CPU and it's L1 caches - in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) { if (mandatoryQueue_in.isReady()) { peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { @@ -653,9 +654,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } } - action(z_stall, "z", desc="Stall") { - } - action(ff_deallocateL1CacheBlock, "\f", desc="Deallocate L1 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { if (L1DcacheMemory.isTagPresent(address)) { L1DcacheMemory.deallocate(address); @@ -677,12 +675,12 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } } - action(zz_recycleRequestQueue, "zz", desc="recycle L1 request queue") { - requestIntraChipL1Network_in.recycle(); + action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle L1 request queue") { + stall_and_wait(mandatoryQueue_in, address); } - action(z_recycleMandatoryQueue, "\z", desc="recycle L1 request queue") { - mandatoryQueue_in.recycle(); + action(kd_wakeUpDependents, "kd", desc="wake-up dependents") { + wakeUpBuffers(address); } action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") { @@ -702,8 +700,8 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") //***************************************************** // Transitions for Load/Store/Replacement/WriteBack from transient states - transition({IS, IM, IS_I, M_I, SM}, {Load, Ifetch, Store, L1_Replacement}) { - z_recycleMandatoryQueue; + transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) { + z_stallAndWaitMandatoryQueue; } // Transitions from Idle @@ -824,6 +822,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") transition(M_I, WB_Ack, I) { s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } transition(M, Inv, I) { @@ -871,6 +870,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") h_load_hit; s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } transition(IS_I, Data_all_Acks, I) { @@ -878,6 +878,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") h_load_hit; s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } transition(IS, DataS_fromL1, S) { @@ -886,6 +887,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") h_load_hit; s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } transition(IS_I, DataS_fromL1, I) { @@ -894,6 +896,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") h_load_hit; s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } // directory is blocked when sending exclusive data @@ -903,6 +906,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") jj_sendExclusiveUnblock; s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } transition(IS, Data_Exclusive, E) { @@ -911,6 +915,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") jj_sendExclusiveUnblock; s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } // Transitions from IM @@ -931,6 +936,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") jj_sendExclusiveUnblock; s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } // transitions from SM @@ -944,10 +950,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") hh_store_hit; s_deallocateTBE; o_popIncomingResponseQueue; - } - - transition(SINK_WB_ACK, {Load, Store, Ifetch, L1_Replacement}){ - z_recycleMandatoryQueue; + kd_wakeUpDependents; } transition(SINK_WB_ACK, Inv){ @@ -955,8 +958,9 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") l_popRequestQueue; } - transition(SINK_WB_ACK, WB_Ack){ + transition(SINK_WB_ACK, WB_Ack, I){ s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } } diff --git a/src/mem/protocol/MESI_CMP_directory-L2cache.sm b/src/mem/protocol/MESI_CMP_directory-L2cache.sm index 16c5bc5a1..9cc20f8c3 100644 --- a/src/mem/protocol/MESI_CMP_directory-L2cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L2cache.sm @@ -158,6 +158,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") void unset_cache_entry(); void set_tbe(TBE a); void unset_tbe(); + void wakeUpBuffers(Address a); // inclusive cache, returns L2 entries only Entry getCacheEntry(Address addr), return_by_pointer="yes" { @@ -283,7 +284,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") out_port(responseIntraChipL2Network_out, ResponseMsg, responseFromL2Cache); - in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache) { + in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache, rank = 2) { if(L1unblockNetwork_in.isReady()) { peek(L1unblockNetwork_in, ResponseMsg) { Entry cache_entry := getCacheEntry(in_msg.Address); @@ -305,7 +306,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") } // Response IntraChip L2 Network - response msg to this particular L2 bank - in_port(responseIntraChipL2Network_in, ResponseMsg, responseToL2Cache) { + in_port(responseIntraChipL2Network_in, ResponseMsg, responseToL2Cache, rank = 1) { if (responseIntraChipL2Network_in.isReady()) { peek(responseIntraChipL2Network_in, ResponseMsg) { // test wether it's from a local L1 or an off chip source @@ -349,7 +350,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") } // L1 Request - in_port(L1RequestIntraChipL2Network_in, RequestMsg, L1RequestToL2Cache) { + in_port(L1RequestIntraChipL2Network_in, RequestMsg, L1RequestToL2Cache, rank = 0) { if(L1RequestIntraChipL2Network_in.isReady()) { peek(L1RequestIntraChipL2Network_in, RequestMsg) { Entry cache_entry := getCacheEntry(in_msg.Address); @@ -791,14 +792,17 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") } } - action(zz_recycleL1RequestQueue, "zz", desc="recycle L1 request queue") { - L1RequestIntraChipL2Network_in.recycle(); + action(zz_stallAndWaitL1RequestQueue, "zz", desc="recycle L1 request queue") { + stall_and_wait(L1RequestIntraChipL2Network_in, address); } action(zn_recycleResponseNetwork, "zn", desc="recycle memory request") { responseIntraChipL2Network_in.recycle(); } + action(kd_wakeUpDependents, "kd", desc="wake-up dependents") { + wakeUpBuffers(address); + } //***************************************************** // TRANSITIONS @@ -820,7 +824,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") } transition({IM, IS, ISS, SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L2_Replacement, L2_Replacement_clean}) { - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } transition({IM, IS, ISS, SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) { @@ -833,7 +837,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") transition({SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_GET_INSTR, L1_GETX, L1_UPGRADE}) { - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } @@ -885,6 +889,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") e_sendDataToGetSRequestors; s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } transition(IM, Mem_Data, MT_MB) { @@ -902,11 +907,11 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") } transition({IS, ISS}, L1_GETX) { - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } transition(IM, {L1_GETX, L1_GETS, L1_GET_INSTR}) { - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } // transitions from SS @@ -1018,30 +1023,35 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") // transitions from blocking states transition(SS_MB, Unblock_Cancel, SS) { k_popUnblockQueue; + kd_wakeUpDependents; } transition(MT_MB, Unblock_Cancel, MT) { k_popUnblockQueue; + kd_wakeUpDependents; } transition(MT_IB, Unblock_Cancel, MT) { k_popUnblockQueue; + kd_wakeUpDependents; } transition(SS_MB, Exclusive_Unblock, MT) { // update actual directory mmu_markExclusiveFromUnblock; k_popUnblockQueue; + kd_wakeUpDependents; } transition({M_MB, MT_MB}, Exclusive_Unblock, MT) { // update actual directory mmu_markExclusiveFromUnblock; k_popUnblockQueue; + kd_wakeUpDependents; } transition(MT_IIB, {L1_PUTX, L1_PUTX_old}){ - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } transition(MT_IIB, Unblock, MT_IB) { @@ -1057,16 +1067,18 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") transition(MT_IB, {WB_Data, WB_Data_clean}, SS) { m_writeDataToCache; o_popIncomingResponseQueue; + kd_wakeUpDependents; } transition(MT_SB, Unblock, SS) { nnu_addSharerFromUnblock; k_popUnblockQueue; + kd_wakeUpDependents; } // writeback states transition({I_I, S_I, MT_I, MCT_I, M_I}, {L1_GETX, L1_UPGRADE, L1_GETS, L1_GET_INSTR}) { - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } transition(I_I, Ack) { @@ -1091,7 +1103,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") } transition(MCT_I, {L1_PUTX, L1_PUTX_old}){ - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } // L1 never changed Dirty data @@ -1101,17 +1113,18 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") } transition(MT_I, {L1_PUTX, L1_PUTX_old}){ - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } // possible race between unblock and immediate replacement transition({MT_MB,SS_MB}, {L1_PUTX, L1_PUTX_old}) { - zz_recycleL1RequestQueue; + zz_stallAndWaitL1RequestQueue; } transition(MT_I, WB_Data_clean, NP) { s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } transition(S_I, Ack) { @@ -1127,5 +1140,6 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") transition(M_I, Mem_Ack, NP) { s_deallocateTBE; o_popIncomingResponseQueue; + kd_wakeUpDependents; } } diff --git a/src/mem/protocol/MESI_CMP_directory-dir.sm b/src/mem/protocol/MESI_CMP_directory-dir.sm index d026e7b90..aa1294b2c 100644 --- a/src/mem/protocol/MESI_CMP_directory-dir.sm +++ b/src/mem/protocol/MESI_CMP_directory-dir.sm @@ -109,6 +109,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") void set_tbe(TBE tbe); void unset_tbe(); + void wakeUpBuffers(Address a); Entry getDirectoryEntry(Address addr), return_by_pointer="yes" { Entry dir_entry := static_cast(Entry, "pointer", directory[addr]); @@ -191,7 +192,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") // ** IN_PORTS ** - in_port(requestNetwork_in, RequestMsg, requestToDir) { + in_port(requestNetwork_in, RequestMsg, requestToDir, rank = 0) { if (requestNetwork_in.isReady()) { peek(requestNetwork_in, RequestMsg) { assert(in_msg.Destination.isElement(machineID)); @@ -211,7 +212,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } } - in_port(responseNetwork_in, ResponseMsg, responseToDir) { + in_port(responseNetwork_in, ResponseMsg, responseToDir, rank = 1) { if (responseNetwork_in.isReady()) { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Destination.isElement(machineID)); @@ -228,7 +229,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } // off-chip memory request/response is done - in_port(memQueue_in, MemoryMsg, memBuffer) { + in_port(memQueue_in, MemoryMsg, memBuffer, rank = 2) { if (memQueue_in.isReady()) { peek(memQueue_in, MemoryMsg) { if (in_msg.Type == MemoryRequestType:MEMORY_READ) { @@ -244,7 +245,6 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } - // Actions action(a_sendAck, "a", desc="Send ack to L2") { peek(responseNetwork_in, ResponseMsg) { @@ -297,6 +297,10 @@ machine(Directory, "MESI_CMP_filter_directory protocol") memQueue_in.dequeue(); } + action(kd_wakeUpDependents, "kd", desc="wake-up dependents") { + wakeUpBuffers(address); + } + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { peek(requestNetwork_in, RequestMsg) { enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) { @@ -400,8 +404,8 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } } - action(z_recycleRequestQueue, "z", desc="recycle request queue") { - requestNetwork_in.recycle(); + action(z_stallAndWaitRequest, "z", desc="recycle request queue") { + stall_and_wait(requestNetwork_in, address); } action(zz_recycleDMAQueue, "zz", desc="recycle DMA queue") { @@ -502,6 +506,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") transition(IM, Memory_Data, M) { d_sendData; l_popMemQueue; + kd_wakeUpDependents; } //added by SS transition(M, CleanReplacement, I) { @@ -520,6 +525,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") c_clearOwner; aa_sendAck; l_popMemQueue; + kd_wakeUpDependents; } @@ -532,6 +538,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") transition(ID, Memory_Data, I) { dr_sendDMAData; l_popMemQueue; + kd_wakeUpDependents; } transition(I, DMA_WRITE, ID_W) { @@ -543,10 +550,11 @@ machine(Directory, "MESI_CMP_filter_directory protocol") transition(ID_W, Memory_Ack, I) { da_sendDMAAck; l_popMemQueue; + kd_wakeUpDependents; } transition({ID, ID_W, M_DRDI, M_DWRI, IM, MI}, {Fetch, Data} ) { - z_recycleRequestQueue; + z_stallAndWaitRequest; } transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, MI}, {DMA_WRITE, DMA_READ} ) { @@ -570,6 +578,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") aa_sendAck; c_clearOwner; l_popMemQueue; + kd_wakeUpDependents; } transition(M, DMA_WRITE, M_DWR) { @@ -591,6 +600,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") da_sendDMAAck; w_deallocateTBE; l_popMemQueue; + kd_wakeUpDependents; } }