ruby: Add missing block deallocations in MOESI_hammer
Some blocks in MOESI hammer were not getting deallocated when they were set to an idle state (e.g. by invalidate or other_getx/s messages). While functionally correct, this caused some bad effects on performance, such as blocks in I in the L1s getting sent to the L2 upon eviction, in turn evicting valid blocks. Also, if a valid block was in LRU, that block could be evicted rather than a block in I. This patch adds in the missing deallocations. Committed by: Nilay Vaish<nilay@cs.wisc.edu>
This commit is contained in:
parent
950e431d87
commit
3225379cc0
1 changed files with 24 additions and 36 deletions
|
@ -89,7 +89,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack";
|
||||
MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack";
|
||||
II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Other_GETS or Other_GETX, waiting for ack";
|
||||
IT, AccessPermission:Busy, "IT", desc="Invalid block transferring to L1";
|
||||
ST, AccessPermission:Busy, "ST", desc="S block transferring to L1";
|
||||
OT, AccessPermission:Busy, "OT", desc="O block transferring to L1";
|
||||
MT, AccessPermission:Busy, "MT", desc="M block transferring to L1";
|
||||
|
@ -1243,6 +1242,20 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
unset_cache_entry();
|
||||
}
|
||||
|
||||
action(gr_deallocateCacheBlock, "\gr", desc="Deallocate an L1 or L2 cache block.") {
|
||||
if (L1Dcache.isTagPresent(address)) {
|
||||
L1Dcache.deallocate(address);
|
||||
}
|
||||
else if (L1Icache.isTagPresent(address)){
|
||||
L1Icache.deallocate(address);
|
||||
}
|
||||
else {
|
||||
assert(L2cache.isTagPresent(address));
|
||||
L2cache.deallocate(address);
|
||||
}
|
||||
unset_cache_entry();
|
||||
}
|
||||
|
||||
action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
|
||||
if (send_evictions) {
|
||||
DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
|
||||
|
@ -1296,7 +1309,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
//*****************************************************
|
||||
|
||||
// Transitions for Load/Store/L2_Replacement from transient states
|
||||
transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
|
||||
transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
|
||||
zz_stallAndWaitMandatoryQueue;
|
||||
}
|
||||
|
||||
|
@ -1308,11 +1321,11 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
zz_stallAndWaitMandatoryQueue;
|
||||
}
|
||||
|
||||
transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
|
||||
transition({IM, IS, OI, MI, II, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
|
||||
zz_stallAndWaitMandatoryQueue;
|
||||
}
|
||||
|
||||
transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
|
||||
transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
|
||||
zz_stallAndWaitMandatoryQueue;
|
||||
}
|
||||
|
||||
|
@ -1324,7 +1337,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
zz_stallAndWaitMandatoryQueue;
|
||||
}
|
||||
|
||||
transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
|
||||
transition({ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
|
||||
z_stall;
|
||||
}
|
||||
|
||||
|
@ -1333,7 +1346,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
}
|
||||
|
||||
// Transitions moving data between the L1 and L2 caches
|
||||
transition({I, S, O, M, MM}, L1_to_L2) {
|
||||
transition({S, O, M, MM}, L1_to_L2) {
|
||||
i_allocateTBE;
|
||||
gg_deallocateL1CacheBlock;
|
||||
vv_allocateL2CacheBlock;
|
||||
|
@ -1341,16 +1354,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
s_deallocateTBE;
|
||||
}
|
||||
|
||||
transition(I, Trigger_L2_to_L1D, IT) {
|
||||
i_allocateTBE;
|
||||
rr_deallocateL2CacheBlock;
|
||||
ii_allocateL1DCacheBlock;
|
||||
nb_copyFromTBEToL1; // Not really needed for state I
|
||||
s_deallocateTBE;
|
||||
zz_stallAndWaitMandatoryQueue;
|
||||
ll_L2toL1Transfer;
|
||||
}
|
||||
|
||||
transition(S, Trigger_L2_to_L1D, ST) {
|
||||
i_allocateTBE;
|
||||
rr_deallocateL2CacheBlock;
|
||||
|
@ -1391,16 +1394,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
ll_L2toL1Transfer;
|
||||
}
|
||||
|
||||
transition(I, Trigger_L2_to_L1I, IT) {
|
||||
i_allocateTBE;
|
||||
rr_deallocateL2CacheBlock;
|
||||
jj_allocateL1ICacheBlock;
|
||||
nb_copyFromTBEToL1;
|
||||
s_deallocateTBE;
|
||||
zz_stallAndWaitMandatoryQueue;
|
||||
ll_L2toL1Transfer;
|
||||
}
|
||||
|
||||
transition(S, Trigger_L2_to_L1I, ST) {
|
||||
i_allocateTBE;
|
||||
rr_deallocateL2CacheBlock;
|
||||
|
@ -1441,11 +1434,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
ll_L2toL1Transfer;
|
||||
}
|
||||
|
||||
transition(IT, Complete_L2_to_L1, IR) {
|
||||
j_popTriggerQueue;
|
||||
kd_wakeUpDependents;
|
||||
}
|
||||
|
||||
transition(ST, Complete_L2_to_L1, SR) {
|
||||
j_popTriggerQueue;
|
||||
kd_wakeUpDependents;
|
||||
|
@ -1500,11 +1488,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
k_popMandatoryQueue;
|
||||
}
|
||||
|
||||
transition(I, L2_Replacement) {
|
||||
rr_deallocateL2CacheBlock;
|
||||
ka_wakeUpAllDependents;
|
||||
}
|
||||
|
||||
transition(I, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
||||
f_sendAck;
|
||||
l_popForwardQueue;
|
||||
|
@ -1564,6 +1547,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
transition(S, {Other_GETX, Invalidate}, I) {
|
||||
f_sendAck;
|
||||
forward_eviction_to_cpu;
|
||||
gr_deallocateCacheBlock;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
||||
|
@ -1630,6 +1614,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
transition(O, {Other_GETX, Invalidate}, I) {
|
||||
e_sendData;
|
||||
forward_eviction_to_cpu;
|
||||
gr_deallocateCacheBlock;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
||||
|
@ -1712,12 +1697,14 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
transition(MM, {Other_GETX, Invalidate}, I) {
|
||||
c_sendExclusiveData;
|
||||
forward_eviction_to_cpu;
|
||||
gr_deallocateCacheBlock;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
||||
transition(MM, Other_GETS, I) {
|
||||
c_sendExclusiveData;
|
||||
forward_eviction_to_cpu;
|
||||
gr_deallocateCacheBlock;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
||||
|
@ -1778,6 +1765,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
|
|||
transition(M, {Other_GETX, Invalidate}, I) {
|
||||
c_sendExclusiveData;
|
||||
forward_eviction_to_cpu;
|
||||
gr_deallocateCacheBlock;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue