MESI: Add queues for stalled requests

This patch adds support for stalling the requests queued up at different
controllers for the MESI CMP directory protocol. Earlier the controllers
would recycle the requests using some fixed latency. This results in
younger requests getting serviced first at times, and can result in
starvation. Instead all the requests that need a particular block to be
in a stable state are moved to a separate queue, where they wait till
that block returns to a stable state and then they are processed.
This commit is contained in:
Nilay Vaish 2012-02-10 11:05:24 -06:00
parent 72f3f526fc
commit 69d8600bf8
3 changed files with 66 additions and 38 deletions

View file

@ -136,6 +136,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
void unset_cache_entry();
void set_tbe(TBE a);
void unset_tbe();
void wakeUpBuffers(Address a);
// inclusive cache returns L1 entries only
Entry getCacheEntry(Address addr), return_by_pointer="yes" {
@ -230,7 +231,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
out_port(unblockNetwork_out, ResponseMsg, unblockFromL1Cache);
// Response IntraChip L1 Network - response msg to this L1 cache
in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache) {
in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache, rank = 2) {
if (responseIntraChipL1Network_in.isReady()) {
peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
@ -268,7 +269,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
}
// Request InterChip network - request from this L1 cache to the shared L2
in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache) {
in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache, rank = 1) {
if(requestIntraChipL1Network_in.isReady()) {
peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
@ -293,7 +294,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
}
// Mandatory Queue betweens Node's CPU and it's L1 caches
in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
@ -653,9 +654,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
}
}
action(z_stall, "z", desc="Stall") {
}
action(ff_deallocateL1CacheBlock, "\f", desc="Deallocate L1 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
if (L1DcacheMemory.isTagPresent(address)) {
L1DcacheMemory.deallocate(address);
@ -677,12 +675,12 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
}
}
action(zz_recycleRequestQueue, "zz", desc="recycle L1 request queue") {
requestIntraChipL1Network_in.recycle();
action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle L1 request queue") {
stall_and_wait(mandatoryQueue_in, address);
}
action(z_recycleMandatoryQueue, "\z", desc="recycle L1 request queue") {
mandatoryQueue_in.recycle();
action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
wakeUpBuffers(address);
}
action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") {
@ -702,8 +700,8 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
//*****************************************************
// Transitions for Load/Store/Replacement/WriteBack from transient states
transition({IS, IM, IS_I, M_I, SM}, {Load, Ifetch, Store, L1_Replacement}) {
z_recycleMandatoryQueue;
transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) {
z_stallAndWaitMandatoryQueue;
}
// Transitions from Idle
@ -824,6 +822,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
transition(M_I, WB_Ack, I) {
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(M, Inv, I) {
@ -871,6 +870,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(IS_I, Data_all_Acks, I) {
@ -878,6 +878,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(IS, DataS_fromL1, S) {
@ -886,6 +887,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(IS_I, DataS_fromL1, I) {
@ -894,6 +896,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
// directory is blocked when sending exclusive data
@ -903,6 +906,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
jj_sendExclusiveUnblock;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(IS, Data_Exclusive, E) {
@ -911,6 +915,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
jj_sendExclusiveUnblock;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
// Transitions from IM
@ -931,6 +936,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
jj_sendExclusiveUnblock;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
// transitions from SM
@ -944,10 +950,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
hh_store_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
}
transition(SINK_WB_ACK, {Load, Store, Ifetch, L1_Replacement}){
z_recycleMandatoryQueue;
kd_wakeUpDependents;
}
transition(SINK_WB_ACK, Inv){
@ -955,8 +958,9 @@ machine(L1Cache, "MESI Directory L1 Cache CMP")
l_popRequestQueue;
}
transition(SINK_WB_ACK, WB_Ack){
transition(SINK_WB_ACK, WB_Ack, I){
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
}

View file

@ -158,6 +158,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
void unset_cache_entry();
void set_tbe(TBE a);
void unset_tbe();
void wakeUpBuffers(Address a);
// inclusive cache, returns L2 entries only
Entry getCacheEntry(Address addr), return_by_pointer="yes" {
@ -283,7 +284,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
out_port(responseIntraChipL2Network_out, ResponseMsg, responseFromL2Cache);
in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache) {
in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache, rank = 2) {
if(L1unblockNetwork_in.isReady()) {
peek(L1unblockNetwork_in, ResponseMsg) {
Entry cache_entry := getCacheEntry(in_msg.Address);
@ -305,7 +306,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
}
// Response IntraChip L2 Network - response msg to this particular L2 bank
in_port(responseIntraChipL2Network_in, ResponseMsg, responseToL2Cache) {
in_port(responseIntraChipL2Network_in, ResponseMsg, responseToL2Cache, rank = 1) {
if (responseIntraChipL2Network_in.isReady()) {
peek(responseIntraChipL2Network_in, ResponseMsg) {
// test wether it's from a local L1 or an off chip source
@ -349,7 +350,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
}
// L1 Request
in_port(L1RequestIntraChipL2Network_in, RequestMsg, L1RequestToL2Cache) {
in_port(L1RequestIntraChipL2Network_in, RequestMsg, L1RequestToL2Cache, rank = 0) {
if(L1RequestIntraChipL2Network_in.isReady()) {
peek(L1RequestIntraChipL2Network_in, RequestMsg) {
Entry cache_entry := getCacheEntry(in_msg.Address);
@ -791,14 +792,17 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
}
}
action(zz_recycleL1RequestQueue, "zz", desc="recycle L1 request queue") {
L1RequestIntraChipL2Network_in.recycle();
action(zz_stallAndWaitL1RequestQueue, "zz", desc="recycle L1 request queue") {
stall_and_wait(L1RequestIntraChipL2Network_in, address);
}
action(zn_recycleResponseNetwork, "zn", desc="recycle memory request") {
responseIntraChipL2Network_in.recycle();
}
action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
wakeUpBuffers(address);
}
//*****************************************************
// TRANSITIONS
@ -820,7 +824,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
}
transition({IM, IS, ISS, SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L2_Replacement, L2_Replacement_clean}) {
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
transition({IM, IS, ISS, SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) {
@ -833,7 +837,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
transition({SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_GET_INSTR, L1_GETX, L1_UPGRADE}) {
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
@ -885,6 +889,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
e_sendDataToGetSRequestors;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(IM, Mem_Data, MT_MB) {
@ -902,11 +907,11 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
}
transition({IS, ISS}, L1_GETX) {
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
transition(IM, {L1_GETX, L1_GETS, L1_GET_INSTR}) {
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
// transitions from SS
@ -1018,30 +1023,35 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
// transitions from blocking states
transition(SS_MB, Unblock_Cancel, SS) {
k_popUnblockQueue;
kd_wakeUpDependents;
}
transition(MT_MB, Unblock_Cancel, MT) {
k_popUnblockQueue;
kd_wakeUpDependents;
}
transition(MT_IB, Unblock_Cancel, MT) {
k_popUnblockQueue;
kd_wakeUpDependents;
}
transition(SS_MB, Exclusive_Unblock, MT) {
// update actual directory
mmu_markExclusiveFromUnblock;
k_popUnblockQueue;
kd_wakeUpDependents;
}
transition({M_MB, MT_MB}, Exclusive_Unblock, MT) {
// update actual directory
mmu_markExclusiveFromUnblock;
k_popUnblockQueue;
kd_wakeUpDependents;
}
transition(MT_IIB, {L1_PUTX, L1_PUTX_old}){
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
transition(MT_IIB, Unblock, MT_IB) {
@ -1057,16 +1067,18 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
transition(MT_IB, {WB_Data, WB_Data_clean}, SS) {
m_writeDataToCache;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(MT_SB, Unblock, SS) {
nnu_addSharerFromUnblock;
k_popUnblockQueue;
kd_wakeUpDependents;
}
// writeback states
transition({I_I, S_I, MT_I, MCT_I, M_I}, {L1_GETX, L1_UPGRADE, L1_GETS, L1_GET_INSTR}) {
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
transition(I_I, Ack) {
@ -1091,7 +1103,7 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
}
transition(MCT_I, {L1_PUTX, L1_PUTX_old}){
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
// L1 never changed Dirty data
@ -1101,17 +1113,18 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
}
transition(MT_I, {L1_PUTX, L1_PUTX_old}){
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
// possible race between unblock and immediate replacement
transition({MT_MB,SS_MB}, {L1_PUTX, L1_PUTX_old}) {
zz_recycleL1RequestQueue;
zz_stallAndWaitL1RequestQueue;
}
transition(MT_I, WB_Data_clean, NP) {
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(S_I, Ack) {
@ -1127,5 +1140,6 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
transition(M_I, Mem_Ack, NP) {
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
}

View file

@ -109,6 +109,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
void set_tbe(TBE tbe);
void unset_tbe();
void wakeUpBuffers(Address a);
Entry getDirectoryEntry(Address addr), return_by_pointer="yes" {
Entry dir_entry := static_cast(Entry, "pointer", directory[addr]);
@ -191,7 +192,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
// ** IN_PORTS **
in_port(requestNetwork_in, RequestMsg, requestToDir) {
in_port(requestNetwork_in, RequestMsg, requestToDir, rank = 0) {
if (requestNetwork_in.isReady()) {
peek(requestNetwork_in, RequestMsg) {
assert(in_msg.Destination.isElement(machineID));
@ -211,7 +212,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
}
in_port(responseNetwork_in, ResponseMsg, responseToDir) {
in_port(responseNetwork_in, ResponseMsg, responseToDir, rank = 1) {
if (responseNetwork_in.isReady()) {
peek(responseNetwork_in, ResponseMsg) {
assert(in_msg.Destination.isElement(machineID));
@ -228,7 +229,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
// off-chip memory request/response is done
in_port(memQueue_in, MemoryMsg, memBuffer) {
in_port(memQueue_in, MemoryMsg, memBuffer, rank = 2) {
if (memQueue_in.isReady()) {
peek(memQueue_in, MemoryMsg) {
if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
@ -244,7 +245,6 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
// Actions
action(a_sendAck, "a", desc="Send ack to L2") {
peek(responseNetwork_in, ResponseMsg) {
@ -297,6 +297,10 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
memQueue_in.dequeue();
}
action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
wakeUpBuffers(address);
}
action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
peek(requestNetwork_in, RequestMsg) {
enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
@ -400,8 +404,8 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
}
action(z_recycleRequestQueue, "z", desc="recycle request queue") {
requestNetwork_in.recycle();
action(z_stallAndWaitRequest, "z", desc="recycle request queue") {
stall_and_wait(requestNetwork_in, address);
}
action(zz_recycleDMAQueue, "zz", desc="recycle DMA queue") {
@ -502,6 +506,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
transition(IM, Memory_Data, M) {
d_sendData;
l_popMemQueue;
kd_wakeUpDependents;
}
//added by SS
transition(M, CleanReplacement, I) {
@ -520,6 +525,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
c_clearOwner;
aa_sendAck;
l_popMemQueue;
kd_wakeUpDependents;
}
@ -532,6 +538,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
transition(ID, Memory_Data, I) {
dr_sendDMAData;
l_popMemQueue;
kd_wakeUpDependents;
}
transition(I, DMA_WRITE, ID_W) {
@ -543,10 +550,11 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
transition(ID_W, Memory_Ack, I) {
da_sendDMAAck;
l_popMemQueue;
kd_wakeUpDependents;
}
transition({ID, ID_W, M_DRDI, M_DWRI, IM, MI}, {Fetch, Data} ) {
z_recycleRequestQueue;
z_stallAndWaitRequest;
}
transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, MI}, {DMA_WRITE, DMA_READ} ) {
@ -570,6 +578,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
aa_sendAck;
c_clearOwner;
l_popMemQueue;
kd_wakeUpDependents;
}
transition(M, DMA_WRITE, M_DWR) {
@ -591,6 +600,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
da_sendDMAAck;
w_deallocateTBE;
l_popMemQueue;
kd_wakeUpDependents;
}
}