ruby: Add missing block deallocations in MOESI_hammer

Some blocks in MOESI hammer were not getting deallocated when they were set to
an idle state (e.g. by invalidate or other_getx/s messages).  While
functionally correct, this caused some bad effects on performance, such as
blocks in I in the L1s getting sent to the L2 upon eviction, in turn evicting
valid blocks.  Also, if a valid block was in LRU, that block could be evicted
rather than a block in I.  This patch adds in the missing deallocations.

Committed by: Nilay Vaish<nilay@cs.wisc.edu>
This commit is contained in:
Lena Olson 2015-09-16 20:18:40 -05:00
parent 950e431d87
commit 3225379cc0

View file

@ -89,7 +89,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack";
MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack";
II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Other_GETS or Other_GETX, waiting for ack";
IT, AccessPermission:Busy, "IT", desc="Invalid block transferring to L1";
ST, AccessPermission:Busy, "ST", desc="S block transferring to L1";
OT, AccessPermission:Busy, "OT", desc="O block transferring to L1";
MT, AccessPermission:Busy, "MT", desc="M block transferring to L1";
@ -1243,6 +1242,20 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
unset_cache_entry();
}
action(gr_deallocateCacheBlock, "\gr", desc="Deallocate an L1 or L2 cache block.") {
if (L1Dcache.isTagPresent(address)) {
L1Dcache.deallocate(address);
}
else if (L1Icache.isTagPresent(address)){
L1Icache.deallocate(address);
}
else {
assert(L2cache.isTagPresent(address));
L2cache.deallocate(address);
}
unset_cache_entry();
}
action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
if (send_evictions) {
DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
@ -1296,7 +1309,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
//*****************************************************
// Transitions for Load/Store/L2_Replacement from transient states
transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
zz_stallAndWaitMandatoryQueue;
}
@ -1308,11 +1321,11 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
zz_stallAndWaitMandatoryQueue;
}
transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
transition({IM, IS, OI, MI, II, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
zz_stallAndWaitMandatoryQueue;
}
transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
zz_stallAndWaitMandatoryQueue;
}
@ -1324,7 +1337,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
zz_stallAndWaitMandatoryQueue;
}
transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
transition({ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
z_stall;
}
@ -1333,7 +1346,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
}
// Transitions moving data between the L1 and L2 caches
transition({I, S, O, M, MM}, L1_to_L2) {
transition({S, O, M, MM}, L1_to_L2) {
i_allocateTBE;
gg_deallocateL1CacheBlock;
vv_allocateL2CacheBlock;
@ -1341,16 +1354,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
s_deallocateTBE;
}
transition(I, Trigger_L2_to_L1D, IT) {
i_allocateTBE;
rr_deallocateL2CacheBlock;
ii_allocateL1DCacheBlock;
nb_copyFromTBEToL1; // Not really needed for state I
s_deallocateTBE;
zz_stallAndWaitMandatoryQueue;
ll_L2toL1Transfer;
}
transition(S, Trigger_L2_to_L1D, ST) {
i_allocateTBE;
rr_deallocateL2CacheBlock;
@ -1391,16 +1394,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
ll_L2toL1Transfer;
}
transition(I, Trigger_L2_to_L1I, IT) {
i_allocateTBE;
rr_deallocateL2CacheBlock;
jj_allocateL1ICacheBlock;
nb_copyFromTBEToL1;
s_deallocateTBE;
zz_stallAndWaitMandatoryQueue;
ll_L2toL1Transfer;
}
transition(S, Trigger_L2_to_L1I, ST) {
i_allocateTBE;
rr_deallocateL2CacheBlock;
@ -1441,11 +1434,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
ll_L2toL1Transfer;
}
transition(IT, Complete_L2_to_L1, IR) {
j_popTriggerQueue;
kd_wakeUpDependents;
}
transition(ST, Complete_L2_to_L1, SR) {
j_popTriggerQueue;
kd_wakeUpDependents;
@ -1500,11 +1488,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
k_popMandatoryQueue;
}
transition(I, L2_Replacement) {
rr_deallocateL2CacheBlock;
ka_wakeUpAllDependents;
}
transition(I, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
f_sendAck;
l_popForwardQueue;
@ -1564,6 +1547,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
transition(S, {Other_GETX, Invalidate}, I) {
f_sendAck;
forward_eviction_to_cpu;
gr_deallocateCacheBlock;
l_popForwardQueue;
}
@ -1630,6 +1614,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
transition(O, {Other_GETX, Invalidate}, I) {
e_sendData;
forward_eviction_to_cpu;
gr_deallocateCacheBlock;
l_popForwardQueue;
}
@ -1712,12 +1697,14 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
transition(MM, {Other_GETX, Invalidate}, I) {
c_sendExclusiveData;
forward_eviction_to_cpu;
gr_deallocateCacheBlock;
l_popForwardQueue;
}
transition(MM, Other_GETS, I) {
c_sendExclusiveData;
forward_eviction_to_cpu;
gr_deallocateCacheBlock;
l_popForwardQueue;
}
@ -1778,6 +1765,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
transition(M, {Other_GETX, Invalidate}, I) {
c_sendExclusiveData;
forward_eviction_to_cpu;
gr_deallocateCacheBlock;
l_popForwardQueue;
}