ruby: added probe filter support to hammer
This commit is contained in:
parent
593ae7457e
commit
8b28848321
8 changed files with 613 additions and 98 deletions
|
@ -81,6 +81,7 @@ def create_system(options, system, piobus, dma_devices):
|
|||
# Must create the individual controllers before the network to ensure the
|
||||
# controller constructors are called before the network constructor
|
||||
#
|
||||
l2_bits = int(math.log(options.num_l2caches, 2))
|
||||
|
||||
for i in xrange(options.num_cpus):
|
||||
#
|
||||
|
@ -104,9 +105,7 @@ def create_system(options, system, piobus, dma_devices):
|
|||
sequencer = cpu_seq,
|
||||
L1IcacheMemory = l1i_cache,
|
||||
L1DcacheMemory = l1d_cache,
|
||||
l2_select_num_bits = \
|
||||
math.log(options.num_l2caches,
|
||||
2),
|
||||
l2_select_num_bits = l2_bits,
|
||||
N_tokens = n_tokens,
|
||||
retry_threshold = \
|
||||
options.l1_retries,
|
||||
|
@ -129,7 +128,8 @@ def create_system(options, system, piobus, dma_devices):
|
|||
# First create the Ruby objects associated with this cpu
|
||||
#
|
||||
l2_cache = L2Cache(size = options.l2_size,
|
||||
assoc = options.l2_assoc)
|
||||
assoc = options.l2_assoc,
|
||||
start_index_bit = l2_bits)
|
||||
|
||||
l2_cntrl = L2Cache_Controller(version = i,
|
||||
L2cacheMemory = l2_cache,
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#
|
||||
# Authors: Brad Beckmann
|
||||
|
||||
import math
|
||||
import m5
|
||||
from m5.objects import *
|
||||
from m5.defines import buildEnv
|
||||
|
@ -43,9 +44,17 @@ class L1Cache(RubyCache):
|
|||
class L2Cache(RubyCache):
|
||||
latency = 10
|
||||
|
||||
#
|
||||
# Probe filter is a cache, latency is not used
|
||||
#
|
||||
class ProbeFilter(RubyCache):
|
||||
latency = 1
|
||||
|
||||
def define_options(parser):
|
||||
parser.add_option("--allow-atomic-migration", action="store_true",
|
||||
help="allow migratory sharing for atomic only accessed blocks")
|
||||
parser.add_option("--pf-on", action="store_true",
|
||||
help="Hammer: enable Probe Filter")
|
||||
|
||||
def create_system(options, system, piobus, dma_devices):
|
||||
|
||||
|
@ -107,6 +116,29 @@ def create_system(options, system, piobus, dma_devices):
|
|||
long(system.physmem.range.first) + 1
|
||||
mem_module_size = phys_mem_size / options.num_dirs
|
||||
|
||||
#
|
||||
# determine size and index bits for probe filter
|
||||
# By default, the probe filter size is configured to be twice the
|
||||
# size of the L2 cache.
|
||||
#
|
||||
pf_size = MemorySize(options.l2_size)
|
||||
pf_size.value = pf_size.value * 2
|
||||
dir_bits = int(math.log(options.num_dirs, 2))
|
||||
pf_bits = int(math.log(pf_size.value, 2))
|
||||
if options.numa_high_bit:
|
||||
if options.numa_high_bit > 0:
|
||||
# if numa high bit explicitly set, make sure it does not overlap
|
||||
# with the probe filter index
|
||||
assert(options.numa_high_bit - dir_bits > pf_bits)
|
||||
|
||||
# set the probe filter start bit to just above the block offset
|
||||
pf_start_bit = 6
|
||||
else:
|
||||
if dir_bits > 0:
|
||||
pf_start_bit = dir_bits + 5
|
||||
else:
|
||||
pf_start_bit = 6
|
||||
|
||||
for i in xrange(options.num_dirs):
|
||||
#
|
||||
# Create the Ruby objects associated with the directory controller
|
||||
|
@ -117,6 +149,8 @@ def create_system(options, system, piobus, dma_devices):
|
|||
dir_size = MemorySize('0B')
|
||||
dir_size.value = mem_module_size
|
||||
|
||||
pf = ProbeFilter(size = pf_size, assoc = 4)
|
||||
|
||||
dir_cntrl = Directory_Controller(version = i,
|
||||
directory = \
|
||||
RubyDirectoryMemory( \
|
||||
|
@ -125,7 +159,10 @@ def create_system(options, system, piobus, dma_devices):
|
|||
use_map = options.use_map,
|
||||
map_levels = \
|
||||
options.map_levels),
|
||||
memBuffer = mem_cntrl)
|
||||
probeFilter = pf,
|
||||
memBuffer = mem_cntrl,
|
||||
probe_filter_enabled = \
|
||||
options.pf_on)
|
||||
|
||||
exec("system.dir_cntrl%d = dir_cntrl" % i)
|
||||
dir_cntrl_nodes.append(dir_cntrl)
|
||||
|
|
|
@ -96,6 +96,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
Other_GETX, desc="A GetX from another processor";
|
||||
Other_GETS, desc="A GetS from another processor";
|
||||
Other_GETS_No_Mig, desc="A GetS from another processor";
|
||||
Invalidate, desc="Invalidate block";
|
||||
|
||||
// Responses
|
||||
Ack, desc="Received an ack message";
|
||||
|
@ -292,6 +293,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
} else {
|
||||
trigger(Event:Other_GETS, in_msg.Address);
|
||||
}
|
||||
} else if (in_msg.Type == CoherenceRequestType:INV) {
|
||||
trigger(Event:Invalidate, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
|
||||
trigger(Event:Writeback_Ack, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
|
||||
|
@ -445,7 +448,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
out_msg.Destination.add(in_msg.Requestor);
|
||||
out_msg.DataBlk := getCacheEntry(address).DataBlk;
|
||||
out_msg.Dirty := getCacheEntry(address).Dirty;
|
||||
if (in_msg.DirectedProbe) {
|
||||
out_msg.Acks := machineCount(MachineType:L1Cache);
|
||||
} else {
|
||||
out_msg.Acks := 2;
|
||||
}
|
||||
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
}
|
||||
}
|
||||
|
@ -470,7 +477,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
out_msg.Destination.add(in_msg.Requestor);
|
||||
out_msg.DataBlk := getCacheEntry(address).DataBlk;
|
||||
out_msg.Dirty := getCacheEntry(address).Dirty;
|
||||
if (in_msg.DirectedProbe) {
|
||||
out_msg.Acks := machineCount(MachineType:L1Cache);
|
||||
} else {
|
||||
out_msg.Acks := 2;
|
||||
}
|
||||
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
}
|
||||
}
|
||||
|
@ -484,8 +495,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
out_msg.Sender := machineID;
|
||||
out_msg.Destination.add(in_msg.Requestor);
|
||||
out_msg.DataBlk := getCacheEntry(address).DataBlk;
|
||||
DEBUG_EXPR(out_msg.DataBlk);
|
||||
out_msg.Dirty := getCacheEntry(address).Dirty;
|
||||
if (in_msg.DirectedProbe) {
|
||||
out_msg.Acks := machineCount(MachineType:L1Cache);
|
||||
} else {
|
||||
out_msg.Acks := 2;
|
||||
}
|
||||
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
}
|
||||
}
|
||||
|
@ -499,6 +515,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
out_msg.Sender := machineID;
|
||||
out_msg.Destination.add(in_msg.Requestor);
|
||||
out_msg.Acks := 1;
|
||||
assert(in_msg.DirectedProbe == false);
|
||||
out_msg.MessageSize := MessageSizeType:Response_Control;
|
||||
}
|
||||
}
|
||||
|
@ -512,6 +529,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
out_msg.Sender := machineID;
|
||||
out_msg.Destination.add(in_msg.Requestor);
|
||||
out_msg.Acks := 1;
|
||||
assert(in_msg.DirectedProbe == false);
|
||||
out_msg.MessageSize := MessageSizeType:Response_Control;
|
||||
}
|
||||
}
|
||||
|
@ -527,6 +545,26 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(gm_sendUnblockM, "gm", desc="Send unblock to memory and indicate M/O/E state") {
|
||||
enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := CoherenceResponseType:UNBLOCKM;
|
||||
out_msg.Sender := machineID;
|
||||
out_msg.Destination.add(map_Address_to_Directory(address));
|
||||
out_msg.MessageSize := MessageSizeType:Unblock_Control;
|
||||
}
|
||||
}
|
||||
|
||||
action(gs_sendUnblockS, "gs", desc="Send unblock to memory and indicate S state") {
|
||||
enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := CoherenceResponseType:UNBLOCKS;
|
||||
out_msg.Sender := machineID;
|
||||
out_msg.Destination.add(map_Address_to_Directory(address));
|
||||
out_msg.MessageSize := MessageSizeType:Unblock_Control;
|
||||
}
|
||||
}
|
||||
|
||||
action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
|
||||
DEBUG_EXPR(getCacheEntry(address).DataBlk);
|
||||
|
||||
|
@ -653,9 +691,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
out_msg.Type := CoherenceResponseType:DATA;
|
||||
out_msg.Sender := machineID;
|
||||
out_msg.Destination.add(in_msg.Requestor);
|
||||
DEBUG_EXPR(out_msg.Destination);
|
||||
out_msg.DataBlk := TBEs[address].DataBlk;
|
||||
out_msg.Dirty := TBEs[address].Dirty;
|
||||
if (in_msg.DirectedProbe) {
|
||||
out_msg.Acks := machineCount(MachineType:L1Cache);
|
||||
} else {
|
||||
out_msg.Acks := 2;
|
||||
}
|
||||
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
}
|
||||
}
|
||||
|
@ -719,9 +762,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
|
||||
action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") {
|
||||
peek(responseToCache_in, ResponseMsg) {
|
||||
DEBUG_EXPR(getCacheEntry(address).DataBlk);
|
||||
DEBUG_EXPR(in_msg.DataBlk);
|
||||
assert(getCacheEntry(address).DataBlk == in_msg.DataBlk);
|
||||
getCacheEntry(address).DataBlk := in_msg.DataBlk;
|
||||
getCacheEntry(address).Dirty := in_msg.Dirty;
|
||||
getCacheEntry(address).Dirty := in_msg.Dirty || getCacheEntry(address).Dirty;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -813,7 +858,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
zz_recycleMandatoryQueue;
|
||||
}
|
||||
|
||||
transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
|
||||
transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
||||
// stall
|
||||
}
|
||||
|
||||
|
@ -963,7 +1008,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
rr_deallocateL2CacheBlock;
|
||||
}
|
||||
|
||||
transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
|
||||
transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
||||
f_sendAck;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -985,7 +1030,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
rr_deallocateL2CacheBlock;
|
||||
}
|
||||
|
||||
transition(S, Other_GETX, I) {
|
||||
transition(S, {Other_GETX, Invalidate}, I) {
|
||||
f_sendAck;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -1015,7 +1060,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
rr_deallocateL2CacheBlock;
|
||||
}
|
||||
|
||||
transition(O, Other_GETX, I) {
|
||||
transition(O, {Other_GETX, Invalidate}, I) {
|
||||
e_sendData;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -1042,7 +1087,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
rr_deallocateL2CacheBlock;
|
||||
}
|
||||
|
||||
transition(MM, Other_GETX, I) {
|
||||
transition(MM, {Other_GETX, Invalidate}, I) {
|
||||
c_sendExclusiveData;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -1074,7 +1119,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
rr_deallocateL2CacheBlock;
|
||||
}
|
||||
|
||||
transition(M, Other_GETX, I) {
|
||||
transition(M, {Other_GETX, Invalidate}, I) {
|
||||
c_sendExclusiveData;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -1086,7 +1131,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
|
||||
// Transitions from IM
|
||||
|
||||
transition(IM, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
|
||||
transition(IM, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
||||
f_sendAck;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -1118,7 +1163,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
l_popForwardQueue;
|
||||
}
|
||||
|
||||
transition(SM, Other_GETX, IM) {
|
||||
transition(SM, {Other_GETX, Invalidate}, IM) {
|
||||
f_sendAck;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -1145,14 +1190,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
|
||||
transition(ISM, All_acks_no_sharers, MM) {
|
||||
sxt_trig_ext_store_hit;
|
||||
g_sendUnblock;
|
||||
gm_sendUnblockM;
|
||||
s_deallocateTBE;
|
||||
j_popTriggerQueue;
|
||||
}
|
||||
|
||||
// Transitions from OM
|
||||
|
||||
transition(OM, Other_GETX, IM) {
|
||||
transition(OM, {Other_GETX, Invalidate}, IM) {
|
||||
e_sendData;
|
||||
pp_incrementNumberOfMessagesByOne;
|
||||
l_popForwardQueue;
|
||||
|
@ -1171,14 +1216,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
|
||||
transition(OM, {All_acks, All_acks_no_sharers}, MM) {
|
||||
sxt_trig_ext_store_hit;
|
||||
g_sendUnblock;
|
||||
gm_sendUnblockM;
|
||||
s_deallocateTBE;
|
||||
j_popTriggerQueue;
|
||||
}
|
||||
|
||||
// Transitions from IS
|
||||
|
||||
transition(IS, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
|
||||
transition(IS, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
||||
f_sendAck;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -1237,14 +1282,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
}
|
||||
|
||||
transition(SS, All_acks, S) {
|
||||
g_sendUnblock;
|
||||
gs_sendUnblockS;
|
||||
s_deallocateTBE;
|
||||
j_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(SS, All_acks_no_sharers, S) {
|
||||
// Note: The directory might still be the owner, so that is why we go to S
|
||||
g_sendUnblock;
|
||||
gs_sendUnblockS;
|
||||
s_deallocateTBE;
|
||||
j_popTriggerQueue;
|
||||
}
|
||||
|
@ -1263,7 +1308,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
}
|
||||
|
||||
transition(MM_W, All_acks_no_sharers, MM) {
|
||||
g_sendUnblock;
|
||||
gm_sendUnblockM;
|
||||
s_deallocateTBE;
|
||||
j_popTriggerQueue;
|
||||
}
|
||||
|
@ -1282,14 +1327,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
}
|
||||
|
||||
transition(M_W, All_acks_no_sharers, M) {
|
||||
g_sendUnblock;
|
||||
gm_sendUnblockM;
|
||||
s_deallocateTBE;
|
||||
j_popTriggerQueue;
|
||||
}
|
||||
|
||||
// Transitions from OI/MI
|
||||
|
||||
transition({OI, MI}, Other_GETX, II) {
|
||||
transition({OI, MI}, {Other_GETX, Invalidate}, II) {
|
||||
q_sendDataFromTBEToCache;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
@ -1312,7 +1357,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
}
|
||||
|
||||
// Transitions from II
|
||||
transition(II, {Other_GETS, Other_GETS_No_Mig, Other_GETX}, II) {
|
||||
transition(II, {Other_GETS, Other_GETS_No_Mig, Other_GETX, Invalidate}, II) {
|
||||
f_sendAck;
|
||||
l_popForwardQueue;
|
||||
}
|
||||
|
|
|
@ -35,8 +35,10 @@
|
|||
|
||||
machine(Directory, "AMD Hammer-like protocol")
|
||||
: DirectoryMemory * directory,
|
||||
CacheMemory * probeFilter,
|
||||
MemoryControl * memBuffer,
|
||||
int memory_controller_latency = 2
|
||||
int memory_controller_latency = 2,
|
||||
bool probe_filter_enabled = false
|
||||
{
|
||||
|
||||
MessageBuffer forwardFromDir, network="To", virtual_network="3", ordered="false";
|
||||
|
@ -56,9 +58,16 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
// STATES
|
||||
enumeration(State, desc="Directory states", default="Directory_State_E") {
|
||||
// Base states
|
||||
NO, desc="Not Owner";
|
||||
O, desc="Owner";
|
||||
E, desc="Exclusive Owner (we can provide the data in exclusive)";
|
||||
NX, desc="Not Owner, probe filter entry exists, block in O at Owner";
|
||||
NO, desc="Not Owner, probe filter entry exists, block in E/M at Owner";
|
||||
S, desc="Data clean, probe filter entry exists pointing to the current owner";
|
||||
O, desc="Data clean, probe filter entry exists";
|
||||
E, desc="Exclusive Owner, no probe filter entry";
|
||||
|
||||
O_R, desc="Was data Owner, replacing probe filter entry";
|
||||
S_R, desc="Was Not Owner or Sharer, replacing probe filter entry";
|
||||
NO_R, desc="Was Not Owner or Sharer, replacing probe filter entry";
|
||||
|
||||
NO_B, "NO^B", desc="Not Owner, Blocked";
|
||||
O_B, "O^B", desc="Owner, Blocked";
|
||||
NO_B_W, desc="Not Owner, Blocked, waiting for Dram";
|
||||
|
@ -83,11 +92,16 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
GETS, desc="A GETS arrives";
|
||||
PUT, desc="A PUT arrives";
|
||||
Unblock, desc="An unblock message arrives";
|
||||
UnblockS, desc="An unblock message arrives";
|
||||
UnblockM, desc="An unblock message arrives";
|
||||
Writeback_Clean, desc="The final part of a PutX (no data)";
|
||||
Writeback_Dirty, desc="The final part of a PutX (data)";
|
||||
Writeback_Exclusive_Clean, desc="The final part of a PutX (no data, exclusive)";
|
||||
Writeback_Exclusive_Dirty, desc="The final part of a PutX (data, exclusive)";
|
||||
|
||||
// Probe filter
|
||||
Pf_Replacement, desc="probe filter replacement";
|
||||
|
||||
// DMA requests
|
||||
DMA_READ, desc="A DMA Read memory request";
|
||||
DMA_WRITE, desc="A DMA Write memory request";
|
||||
|
@ -100,10 +114,12 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
Ack, desc="Received an ack message";
|
||||
Shared_Ack, desc="Received an ack message, responder has a shared copy";
|
||||
Shared_Data, desc="Received a data message, responder has a shared copy";
|
||||
Data, desc="Received a data message, responder had a owner or exclusive copy, they gave it to us";
|
||||
Exclusive_Data, desc="Received a data message, responder had an exclusive copy, they gave it to us";
|
||||
|
||||
// Triggers
|
||||
All_acks_and_data, desc="Received all required data and message acks";
|
||||
All_acks_and_shared_data, desc="Received shared data and message acks";
|
||||
All_acks_and_owner_data, desc="Received shared data and message acks";
|
||||
All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy";
|
||||
}
|
||||
|
||||
|
@ -115,18 +131,27 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
DataBlock DataBlk, desc="data for the block";
|
||||
}
|
||||
|
||||
// ProbeFilterEntry
|
||||
structure(PfEntry, desc="...", interface="AbstractCacheEntry") {
|
||||
State PfState, desc="Directory state";
|
||||
MachineID Owner, desc="Owner node";
|
||||
DataBlock DataBlk, desc="data for the block";
|
||||
}
|
||||
|
||||
// TBE entries for DMA requests
|
||||
structure(TBE, desc="TBE entries for outstanding DMA requests") {
|
||||
Address PhysicalAddress, desc="physical address";
|
||||
State TBEState, desc="Transient State";
|
||||
CoherenceResponseType ResponseType, desc="The type for the subsequent response message";
|
||||
int Acks, default="0", desc="The number of acks that the waiting response represents";
|
||||
DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory";
|
||||
DataBlock DataBlk, desc="The current view of system memory";
|
||||
int Len, desc="...";
|
||||
MachineID DmaRequestor, desc="DMA requestor";
|
||||
int NumPendingMsgs, desc="Number of pending acks/messages";
|
||||
bool CacheDirty, desc="Indicates whether a cache has responded with dirty data";
|
||||
bool Sharers, desc="Indicates whether a cache has indicated it is currently a sharer";
|
||||
bool CacheDirty, default="false", desc="Indicates whether a cache has responded with dirty data";
|
||||
bool Sharers, default="false", desc="Indicates whether a cache has indicated it is currently a sharer";
|
||||
bool Owned, default="false", desc="Indicates whether a cache has indicated it is currently a sharer";
|
||||
}
|
||||
|
||||
external_type(TBETable) {
|
||||
|
@ -144,10 +169,21 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
return static_cast(Entry, directory[addr]);
|
||||
}
|
||||
|
||||
PfEntry getPfEntry(Address addr), return_by_ref="yes" {
|
||||
return static_cast(PfEntry, probeFilter[addr]);
|
||||
}
|
||||
|
||||
State getState(Address addr) {
|
||||
if (TBEs.isPresent(addr)) {
|
||||
return TBEs[addr].TBEState;
|
||||
} else {
|
||||
if (probe_filter_enabled) {
|
||||
if (probeFilter.isTagPresent(addr)) {
|
||||
assert(getPfEntry(addr).PfState == getDirectoryEntry(addr).DirectoryState);
|
||||
} else {
|
||||
assert(getDirectoryEntry(addr).DirectoryState == State:E);
|
||||
}
|
||||
}
|
||||
return getDirectoryEntry(addr).DirectoryState;
|
||||
}
|
||||
}
|
||||
|
@ -156,9 +192,31 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
if (TBEs.isPresent(addr)) {
|
||||
TBEs[addr].TBEState := state;
|
||||
}
|
||||
if (probe_filter_enabled) {
|
||||
if (probeFilter.isTagPresent(addr)) {
|
||||
getPfEntry(addr).PfState := state;
|
||||
}
|
||||
if (state == State:NX || state == State:NO || state == State:S || state == State:O) {
|
||||
assert(probeFilter.isTagPresent(addr));
|
||||
}
|
||||
}
|
||||
if (state == State:E || state == State:NX || state == State:NO || state == State:S ||
|
||||
state == State:O) {
|
||||
assert(TBEs.isPresent(addr) == false);
|
||||
}
|
||||
getDirectoryEntry(addr).DirectoryState := state;
|
||||
}
|
||||
|
||||
Event cache_request_to_event(CoherenceRequestType type) {
|
||||
if (type == CoherenceRequestType:GETS) {
|
||||
return Event:GETS;
|
||||
} else if (type == CoherenceRequestType:GETX) {
|
||||
return Event:GETX;
|
||||
} else {
|
||||
error("Invalid CoherenceRequestType");
|
||||
}
|
||||
}
|
||||
|
||||
MessageBuffer triggerQueue, ordered="true";
|
||||
|
||||
// ** OUT_PORTS **
|
||||
|
@ -180,7 +238,9 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
if (triggerQueue_in.isReady()) {
|
||||
peek(triggerQueue_in, TriggerMsg) {
|
||||
if (in_msg.Type == TriggerType:ALL_ACKS) {
|
||||
trigger(Event:All_acks_and_data, in_msg.Address);
|
||||
trigger(Event:All_acks_and_owner_data, in_msg.Address);
|
||||
} else if (in_msg.Type == TriggerType:ALL_ACKS_OWNER_EXISTS) {
|
||||
trigger(Event:All_acks_and_shared_data, in_msg.Address);
|
||||
} else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) {
|
||||
trigger(Event:All_acks_and_data_no_sharers, in_msg.Address);
|
||||
} else {
|
||||
|
@ -195,6 +255,10 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
if (in_msg.Type == CoherenceResponseType:UNBLOCK) {
|
||||
trigger(Event:Unblock, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:UNBLOCKS) {
|
||||
trigger(Event:UnblockS, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:UNBLOCKM) {
|
||||
trigger(Event:UnblockM, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:WB_CLEAN) {
|
||||
trigger(Event:Writeback_Clean, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:WB_DIRTY) {
|
||||
|
@ -220,8 +284,9 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
trigger(Event:Shared_Ack, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) {
|
||||
trigger(Event:Shared_Data, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE ||
|
||||
in_msg.Type == CoherenceResponseType:DATA) {
|
||||
} else if (in_msg.Type == CoherenceResponseType:DATA) {
|
||||
trigger(Event:Data, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
|
||||
trigger(Event:Exclusive_Data, in_msg.Address);
|
||||
} else {
|
||||
error("Unexpected message");
|
||||
|
@ -247,14 +312,22 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
in_port(requestQueue_in, RequestMsg, requestToDir) {
|
||||
if (requestQueue_in.isReady()) {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
if (in_msg.Type == CoherenceRequestType:GETS) {
|
||||
trigger(Event:GETS, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceRequestType:GETX) {
|
||||
trigger(Event:GETX, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceRequestType:PUT) {
|
||||
if (in_msg.Type == CoherenceRequestType:PUT) {
|
||||
trigger(Event:PUT, in_msg.Address);
|
||||
} else {
|
||||
error("Invalid message");
|
||||
if (probe_filter_enabled) {
|
||||
if (probeFilter.isTagPresent(in_msg.Address)) {
|
||||
trigger(cache_request_to_event(in_msg.Type), in_msg.Address);
|
||||
} else {
|
||||
if (probeFilter.cacheAvail(in_msg.Address)) {
|
||||
trigger(cache_request_to_event(in_msg.Type), in_msg.Address);
|
||||
} else {
|
||||
trigger(Event:Pf_Replacement, probeFilter.cacheProbe(in_msg.Address));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
trigger(cache_request_to_event(in_msg.Type), in_msg.Address);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -278,6 +351,31 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
|
||||
// Actions
|
||||
|
||||
action(r_setMRU, "\rr", desc="manually set the MRU bit for pf entry" ) {
|
||||
if (probe_filter_enabled) {
|
||||
assert(probeFilter.isTagPresent(address));
|
||||
probeFilter.setMRU(address);
|
||||
}
|
||||
}
|
||||
|
||||
action(auno_assertUnblockerNotOwner, "auno", desc="assert unblocker not owner") {
|
||||
if (probe_filter_enabled) {
|
||||
assert(probeFilter.isTagPresent(address));
|
||||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
assert(getPfEntry(address).Owner != in_msg.Sender);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(uo_updateOwnerIfPf, "uo", desc="update owner") {
|
||||
if (probe_filter_enabled) {
|
||||
assert(probeFilter.isTagPresent(address));
|
||||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
getPfEntry(address).Owner := in_msg.Sender;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
|
@ -302,6 +400,27 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(pfa_probeFilterAllocate, "pfa", desc="Allocate ProbeFilterEntry") {
|
||||
if (probe_filter_enabled) {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
probeFilter.allocate(address, new PfEntry);
|
||||
getPfEntry(in_msg.Address).Owner := in_msg.Requestor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(pfd_probeFilterDeallocate, "pfd", desc="Deallocate ProbeFilterEntry") {
|
||||
if (probe_filter_enabled) {
|
||||
probeFilter.deallocate(address);
|
||||
}
|
||||
}
|
||||
|
||||
action(ppfd_possibleProbeFilterDeallocate, "ppfd", desc="Deallocate ProbeFilterEntry") {
|
||||
if (probe_filter_enabled && probeFilter.isTagPresent(address)) {
|
||||
probeFilter.deallocate(address);
|
||||
}
|
||||
}
|
||||
|
||||
action(v_allocateTBE, "v", desc="Allocate TBE") {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
TBEs.allocate(address);
|
||||
|
@ -330,10 +449,30 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(pa_setPendingMsgsToAll, "pa", desc="set pending msgs to all") {
|
||||
TBEs[address].NumPendingMsgs := machineCount(MachineType:L1Cache);
|
||||
}
|
||||
|
||||
action(po_setPendingMsgsToOne, "po", desc="set pending msgs to one") {
|
||||
TBEs[address].NumPendingMsgs := 1;
|
||||
}
|
||||
|
||||
action(w_deallocateTBE, "w", desc="Deallocate TBE") {
|
||||
TBEs.deallocate(address);
|
||||
}
|
||||
|
||||
action(sa_setAcksToOne, "sa", desc="Forwarded request, set the ack amount to one") {
|
||||
TBEs[address].Acks := 1;
|
||||
}
|
||||
|
||||
action(saa_setAcksToAllIfPF, "saa", desc="Non-forwarded request, set the ack amount to all") {
|
||||
if (probe_filter_enabled) {
|
||||
TBEs[address].Acks := machineCount(MachineType:L1Cache);
|
||||
} else {
|
||||
TBEs[address].Acks := 1;
|
||||
}
|
||||
}
|
||||
|
||||
action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") {
|
||||
peek(responseToDir_in, ResponseMsg) {
|
||||
assert(in_msg.Acks > 0);
|
||||
|
@ -357,7 +496,11 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
enqueue(triggerQueue_out, TriggerMsg) {
|
||||
out_msg.Address := address;
|
||||
if (TBEs[address].Sharers) {
|
||||
if (TBEs[address].Owned) {
|
||||
out_msg.Type := TriggerType:ALL_ACKS_OWNER_EXISTS;
|
||||
} else {
|
||||
out_msg.Type := TriggerType:ALL_ACKS;
|
||||
}
|
||||
} else {
|
||||
out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS;
|
||||
}
|
||||
|
@ -365,6 +508,22 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(spa_setPendingAcksToZeroIfPF, "spa", desc="if probe filter, no need to wait for acks") {
|
||||
if (probe_filter_enabled) {
|
||||
TBEs[address].NumPendingMsgs := 0;
|
||||
}
|
||||
}
|
||||
|
||||
action(sc_signalCompletionIfPF, "sc", desc="indicate that we should skip waiting for cpu acks") {
|
||||
if (TBEs[address].NumPendingMsgs == 0) {
|
||||
assert(probe_filter_enabled);
|
||||
enqueue(triggerQueue_out, TriggerMsg) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(d_sendData, "d", desc="Send data to requestor") {
|
||||
peek(memQueue_in, MemoryMsg) {
|
||||
enqueue(responseNetwork_out, ResponseMsg, latency="1") {
|
||||
|
@ -373,8 +532,11 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
out_msg.Sender := machineID;
|
||||
out_msg.Destination.add(in_msg.OriginalRequestorMachId);
|
||||
out_msg.DataBlk := in_msg.DataBlk;
|
||||
DEBUG_EXPR(out_msg.DataBlk);
|
||||
out_msg.Dirty := false; // By definition, the block is now clean
|
||||
out_msg.Acks := 1;
|
||||
out_msg.Acks := TBEs[address].Acks;
|
||||
DEBUG_EXPR(out_msg.Acks);
|
||||
assert(out_msg.Acks > 0);
|
||||
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
}
|
||||
}
|
||||
|
@ -440,6 +602,11 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
TBEs[address].Sharers := true;
|
||||
}
|
||||
|
||||
action(so_setOwnerBit, "so", desc="We saw other sharers") {
|
||||
TBEs[address].Sharers := true;
|
||||
TBEs[address].Owned := true;
|
||||
}
|
||||
|
||||
action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
enqueue(memQueue_out, MemoryMsg, latency="1") {
|
||||
|
@ -468,7 +635,47 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(f_forwardRequest, "f", desc="Forward requests") {
|
||||
action(fn_forwardRequestIfNecessary, "fn", desc="Forward requests if necessary") {
|
||||
if ((machineCount(MachineType:L1Cache) > 1) && (TBEs[address].Acks <= 1)) {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := in_msg.Type;
|
||||
out_msg.Requestor := in_msg.Requestor;
|
||||
out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
|
||||
out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor
|
||||
out_msg.MessageSize := MessageSizeType:Broadcast_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(ia_invalidateAllRequest, "ia", desc="invalidate all copies") {
|
||||
if (machineCount(MachineType:L1Cache) > 1) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := CoherenceRequestType:INV;
|
||||
out_msg.Requestor := machineID;
|
||||
out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
|
||||
out_msg.MessageSize := MessageSizeType:Broadcast_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(io_invalidateOwnerRequest, "io", desc="invalidate all copies") {
|
||||
if (machineCount(MachineType:L1Cache) > 1) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := CoherenceRequestType:INV;
|
||||
out_msg.Requestor := machineID;
|
||||
out_msg.Destination.add(getPfEntry(address).Owner);
|
||||
out_msg.MessageSize := MessageSizeType:Request_Control;
|
||||
out_msg.DirectedProbe := true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(fb_forwardRequestBcast, "fb", desc="Forward requests to all nodes") {
|
||||
if (machineCount(MachineType:L1Cache) > 1) {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
|
@ -483,7 +690,35 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(fc_forwardRequestConditionalOwner, "fc", desc="Forward request to one or more nodes") {
|
||||
assert(machineCount(MachineType:L1Cache) > 1);
|
||||
if (probe_filter_enabled) {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := in_msg.Type;
|
||||
out_msg.Requestor := in_msg.Requestor;
|
||||
out_msg.Destination.add(getPfEntry(address).Owner);
|
||||
out_msg.MessageSize := MessageSizeType:Request_Control;
|
||||
out_msg.DirectedProbe := true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := in_msg.Type;
|
||||
out_msg.Requestor := in_msg.Requestor;
|
||||
out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
|
||||
out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor
|
||||
out_msg.MessageSize := MessageSizeType:Broadcast_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(f_forwardWriteFromDma, "fw", desc="Forward requests") {
|
||||
if (TBEs[address].NumPendingMsgs > 0) {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
|
@ -498,8 +733,10 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(f_forwardReadFromDma, "fr", desc="Forward requests") {
|
||||
if (TBEs[address].NumPendingMsgs > 0) {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
|
@ -514,6 +751,7 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") {
|
||||
requestQueue_in.dequeue();
|
||||
|
@ -554,6 +792,14 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(wr_writeResponseDataToMemory, "wr", desc="Write response data to memory") {
|
||||
peek(responseToDir_in, ResponseMsg) {
|
||||
getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
|
||||
DEBUG_EXPR(in_msg.Address);
|
||||
DEBUG_EXPR(in_msg.DataBlk);
|
||||
}
|
||||
}
|
||||
|
||||
action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") {
|
||||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
assert(in_msg.Dirty);
|
||||
|
@ -565,14 +811,31 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
|
||||
action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") {
|
||||
DEBUG_EXPR(getDirectoryEntry(address).DataBlk);
|
||||
getDirectoryEntry(address).DataBlk := TBEs[address].DataBlk;
|
||||
DEBUG_EXPR(getDirectoryEntry(address).DataBlk);
|
||||
getDirectoryEntry(address).DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
|
||||
DEBUG_EXPR(getDirectoryEntry(address).DataBlk);
|
||||
}
|
||||
|
||||
action(wdt_writeDataFromTBE, "wdt", desc="DMA Write data to memory from TBE") {
|
||||
DEBUG_EXPR(getDirectoryEntry(address).DataBlk);
|
||||
getDirectoryEntry(address).DataBlk := TBEs[address].DataBlk;
|
||||
DEBUG_EXPR(getDirectoryEntry(address).DataBlk);
|
||||
}
|
||||
|
||||
action(a_assertCacheData, "ac", desc="Assert that a cache provided the data") {
|
||||
assert(TBEs[address].CacheDirty);
|
||||
}
|
||||
|
||||
action(ano_assertNotOwner, "ano", desc="Assert that request is not current owner") {
|
||||
if (probe_filter_enabled) {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
assert(getPfEntry(address).Owner != in_msg.Requestor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") {
|
||||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
enqueue(memQueue_out, MemoryMsg, latency="1") {
|
||||
|
@ -616,75 +879,152 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
|
||||
// Transitions out of E state
|
||||
transition(E, GETX, NO_B_W) {
|
||||
pfa_probeFilterAllocate;
|
||||
v_allocateTBE;
|
||||
rx_recordExclusiveInTBE;
|
||||
saa_setAcksToAllIfPF;
|
||||
qf_queueMemoryFetchRequest;
|
||||
f_forwardRequest;
|
||||
fn_forwardRequestIfNecessary;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition(E, GETS, NO_B_W) {
|
||||
pfa_probeFilterAllocate;
|
||||
v_allocateTBE;
|
||||
rx_recordExclusiveInTBE;
|
||||
saa_setAcksToAllIfPF;
|
||||
qf_queueMemoryFetchRequest;
|
||||
f_forwardRequest;
|
||||
fn_forwardRequestIfNecessary;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition(E, DMA_READ, NO_DR_B_W) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
qd_queueMemoryRequestFromDmaRead;
|
||||
spa_setPendingAcksToZeroIfPF;
|
||||
f_forwardReadFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
transition(E, DMA_WRITE, NO_DW_B_W) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
spa_setPendingAcksToZeroIfPF;
|
||||
sc_signalCompletionIfPF;
|
||||
f_forwardWriteFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
// Transitions out of O state
|
||||
transition(O, GETX, NO_B_W) {
|
||||
r_setMRU;
|
||||
v_allocateTBE;
|
||||
r_recordDataInTBE;
|
||||
sa_setAcksToOne;
|
||||
qf_queueMemoryFetchRequest;
|
||||
f_forwardRequest;
|
||||
fb_forwardRequestBcast;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
// This transition is dumb, if a shared copy exists on-chip, then that should
|
||||
// provide data, not slow off-chip dram. The problem is that the current
|
||||
// caches don't provide data in S state
|
||||
transition(O, GETS, O_B_W) {
|
||||
r_setMRU;
|
||||
v_allocateTBE;
|
||||
r_recordDataInTBE;
|
||||
saa_setAcksToAllIfPF;
|
||||
qf_queueMemoryFetchRequest;
|
||||
f_forwardRequest;
|
||||
fn_forwardRequestIfNecessary;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition(O, DMA_READ, O_DR_B_W) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
spa_setPendingAcksToZeroIfPF;
|
||||
qd_queueMemoryRequestFromDmaRead;
|
||||
f_forwardReadFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
transition({E, O, NO}, DMA_WRITE, NO_DW_B_W) {
|
||||
transition(O, Pf_Replacement, O_R) {
|
||||
v_allocateTBE;
|
||||
pa_setPendingMsgsToAll;
|
||||
ia_invalidateAllRequest;
|
||||
pfd_probeFilterDeallocate;
|
||||
}
|
||||
|
||||
transition(S, Pf_Replacement, S_R) {
|
||||
v_allocateTBE;
|
||||
pa_setPendingMsgsToAll;
|
||||
ia_invalidateAllRequest;
|
||||
pfd_probeFilterDeallocate;
|
||||
}
|
||||
|
||||
transition(NO, Pf_Replacement, NO_R) {
|
||||
v_allocateTBE;
|
||||
po_setPendingMsgsToOne;
|
||||
io_invalidateOwnerRequest;
|
||||
pfd_probeFilterDeallocate;
|
||||
}
|
||||
|
||||
transition(NX, Pf_Replacement, NO_R) {
|
||||
v_allocateTBE;
|
||||
pa_setPendingMsgsToAll;
|
||||
ia_invalidateAllRequest;
|
||||
pfd_probeFilterDeallocate;
|
||||
}
|
||||
|
||||
transition({O, S, NO, NX}, DMA_WRITE, NO_DW_B_W) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
f_forwardWriteFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
// Transitions out of NO state
|
||||
transition(NX, GETX, NO_B) {
|
||||
r_setMRU;
|
||||
fb_forwardRequestBcast;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
// Transitions out of NO state
|
||||
transition(NO, GETX, NO_B) {
|
||||
f_forwardRequest;
|
||||
r_setMRU;
|
||||
ano_assertNotOwner;
|
||||
fc_forwardRequestConditionalOwner;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition(NO, GETS, NO_B) {
|
||||
f_forwardRequest;
|
||||
transition(S, GETX, NO_B) {
|
||||
r_setMRU;
|
||||
fb_forwardRequestBcast;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition(NO, PUT, WB) {
|
||||
transition(S, GETS, NO_B) {
|
||||
r_setMRU;
|
||||
ano_assertNotOwner;
|
||||
fb_forwardRequestBcast;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition({NX, NO}, GETS, NO_B) {
|
||||
r_setMRU;
|
||||
ano_assertNotOwner;
|
||||
fc_forwardRequestConditionalOwner;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition({NO, NX, S}, PUT, WB) {
|
||||
//
|
||||
// note that the PUT requestor may not be the current owner if an invalidate
|
||||
// raced with PUT
|
||||
//
|
||||
a_sendWriteBackAck;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition(NO, DMA_READ, NO_DR_B_D) {
|
||||
transition({NO, NX, S}, DMA_READ, NO_DR_B_D) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
f_forwardReadFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
|
@ -699,23 +1039,28 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
// Blocked transient states
|
||||
transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
|
||||
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W,
|
||||
NO_W, O_W, WB, WB_E_W, WB_O_W},
|
||||
{GETS, GETX, PUT}) {
|
||||
NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R},
|
||||
{GETS, GETX, PUT, Pf_Replacement}) {
|
||||
zz_recycleRequest;
|
||||
}
|
||||
|
||||
transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
|
||||
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W,
|
||||
NO_W, O_W, WB, WB_E_W, WB_O_W},
|
||||
NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R},
|
||||
{DMA_READ, DMA_WRITE}) {
|
||||
y_recycleDmaRequestQueue;
|
||||
}
|
||||
|
||||
transition(NO_B, Unblock, NO) {
|
||||
transition(NO_B, UnblockS, NX) {
|
||||
j_popIncomingUnblockQueue;
|
||||
}
|
||||
|
||||
transition(O_B, Unblock, O) {
|
||||
transition(NO_B, UnblockM, NO) {
|
||||
uo_updateOwnerIfPf;
|
||||
j_popIncomingUnblockQueue;
|
||||
}
|
||||
|
||||
transition(O_B, UnblockS, O) {
|
||||
j_popIncomingUnblockQueue;
|
||||
}
|
||||
|
||||
|
@ -744,7 +1089,32 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_W, Ack) {
|
||||
transition({O_R, S_R, NO_R}, Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(S_R, Data) {
|
||||
wr_writeResponseDataToMemory;
|
||||
m_decrementNumberOfMessages;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_R, {Data, Exclusive_Data}) {
|
||||
wr_writeResponseDataToMemory;
|
||||
m_decrementNumberOfMessages;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition({O_R, S_R, NO_R}, All_acks_and_data_no_sharers, E) {
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition({NO_DR_B_W, O_DR_B_W}, Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
@ -755,6 +1125,19 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(O_DR_B, Shared_Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
so_setOwnerBit;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(O_DR_B_W, Shared_Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
so_setOwnerBit;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition({NO_DR_B, NO_DR_B_D}, Shared_Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
r_setSharerBit;
|
||||
|
@ -765,7 +1148,7 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
transition(NO_DR_B_W, Shared_Data) {
|
||||
r_recordCacheData;
|
||||
m_decrementNumberOfMessages;
|
||||
r_setSharerBit;
|
||||
so_setOwnerBit;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
@ -773,51 +1156,85 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
transition({NO_DR_B, NO_DR_B_D}, Shared_Data) {
|
||||
r_recordCacheData;
|
||||
m_decrementNumberOfMessages;
|
||||
r_setSharerBit;
|
||||
so_setOwnerBit;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_W, Exclusive_Data) {
|
||||
transition(NO_DR_B_W, {Exclusive_Data, Data}) {
|
||||
r_recordCacheData;
|
||||
m_decrementNumberOfMessages;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition({NO_DR_B, NO_DR_B_D, NO_DW_B_W}, Exclusive_Data) {
|
||||
transition({NO_DR_B, NO_DR_B_D, NO_DW_B_W}, {Exclusive_Data, Data}) {
|
||||
r_recordCacheData;
|
||||
m_decrementNumberOfMessages;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B, All_acks_and_data, O) {
|
||||
transition(NO_DR_B, All_acks_and_owner_data, O) {
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
// a response as soon as we receive valid data and prior to receiving
|
||||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
wdt_writeDataFromTBE;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_D, All_acks_and_data, O) {
|
||||
transition(NO_DR_B, All_acks_and_shared_data, S) {
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
// a response as soon as we receive valid data and prior to receiving
|
||||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
wdt_writeDataFromTBE;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(O_DR_B, All_acks_and_data_no_sharers, O) {
|
||||
transition(NO_DR_B_D, All_acks_and_owner_data, O) {
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
// a response as soon as we receive valid data and prior to receiving
|
||||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
wdt_writeDataFromTBE;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_D, All_acks_and_shared_data, S) {
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
// a response as soon as we receive valid data and prior to receiving
|
||||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
wdt_writeDataFromTBE;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(O_DR_B, All_acks_and_owner_data, O) {
|
||||
wdt_writeDataFromTBE;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(O_DR_B, All_acks_and_data_no_sharers, E) {
|
||||
wdt_writeDataFromTBE;
|
||||
w_deallocateTBE;
|
||||
pfd_probeFilterDeallocate;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B, All_acks_and_data_no_sharers, E) {
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
|
@ -825,7 +1242,9 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
wdt_writeDataFromTBE;
|
||||
w_deallocateTBE;
|
||||
ppfd_possibleProbeFilterDeallocate;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
|
@ -837,7 +1256,9 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
wdt_writeDataFromTBE;
|
||||
w_deallocateTBE;
|
||||
ppfd_possibleProbeFilterDeallocate;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
|
@ -850,6 +1271,7 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
transition(NO_DW_W, Memory_Ack, E) {
|
||||
da_sendDmaAck;
|
||||
w_deallocateTBE;
|
||||
ppfd_possibleProbeFilterDeallocate;
|
||||
l_popMemQueue;
|
||||
}
|
||||
|
||||
|
@ -859,11 +1281,11 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
l_popMemQueue;
|
||||
}
|
||||
|
||||
transition(NO_B_W, Unblock, NO_W) {
|
||||
transition(NO_B_W, {UnblockM, UnblockS}, NO_W) {
|
||||
j_popIncomingUnblockQueue;
|
||||
}
|
||||
|
||||
transition(O_B_W, Unblock, O_W) {
|
||||
transition(O_B_W, UnblockS, O_W) {
|
||||
j_popIncomingUnblockQueue;
|
||||
}
|
||||
|
||||
|
@ -891,6 +1313,7 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
|
||||
transition(WB_E_W, Memory_Ack, E) {
|
||||
pfd_probeFilterDeallocate;
|
||||
l_popMemQueue;
|
||||
}
|
||||
|
||||
|
@ -905,10 +1328,12 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
|
||||
transition(WB, Writeback_Exclusive_Clean, E) {
|
||||
ll_checkIncomingWriteback;
|
||||
pfd_probeFilterDeallocate;
|
||||
j_popIncomingUnblockQueue;
|
||||
}
|
||||
|
||||
transition(WB, Unblock, NO) {
|
||||
auno_assertUnblockerNotOwner;
|
||||
j_popIncomingUnblockQueue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ enumeration(CoherenceRequestType, desc="...") {
|
|||
PUT, desc="Put Ownership";
|
||||
WB_ACK, desc="Writeback ack";
|
||||
WB_NACK, desc="Writeback neg. ack";
|
||||
INV, desc="Invalidate";
|
||||
}
|
||||
|
||||
// CoherenceResponseType
|
||||
|
@ -49,7 +50,9 @@ enumeration(CoherenceResponseType, desc="...") {
|
|||
WB_DIRTY, desc="Dirty writeback";
|
||||
WB_EXCLUSIVE_CLEAN, desc="Clean writeback of exclusive data";
|
||||
WB_EXCLUSIVE_DIRTY, desc="Dirty writeback of exclusive data";
|
||||
UNBLOCK, desc="Unblock";
|
||||
UNBLOCK, desc="Unblock for writeback";
|
||||
UNBLOCKS, desc="Unblock now in S";
|
||||
UNBLOCKM, desc="Unblock now in M/O/E";
|
||||
NULL, desc="Null value";
|
||||
}
|
||||
|
||||
|
@ -57,6 +60,7 @@ enumeration(CoherenceResponseType, desc="...") {
|
|||
enumeration(TriggerType, desc="...") {
|
||||
L2_to_L1, desc="L2 to L1 transfer";
|
||||
ALL_ACKS, desc="See corresponding event";
|
||||
ALL_ACKS_OWNER_EXISTS,desc="See corresponding event";
|
||||
ALL_ACKS_NO_SHARERS, desc="See corresponding event";
|
||||
}
|
||||
|
||||
|
@ -73,6 +77,7 @@ structure(RequestMsg, desc="...", interface="NetworkMessage") {
|
|||
MachineID Requestor, desc="Node who initiated the request";
|
||||
NetDest Destination, desc="Multicast destination mask";
|
||||
MessageSizeType MessageSize, desc="size category of the message";
|
||||
bool DirectedProbe, default="false", desc="probe filter directed probe";
|
||||
}
|
||||
|
||||
// ResponseMsg (and also unblock requests)
|
||||
|
|
|
@ -38,3 +38,4 @@ class RubyCache(SimObject):
|
|||
latency = Param.Int("");
|
||||
assoc = Param.Int("");
|
||||
replacement_policy = Param.String("PSEUDO_LRU", "");
|
||||
start_index_bit = Param.Int(6, "index start, default 6 for 64-byte line");
|
||||
|
|
|
@ -53,6 +53,7 @@ CacheMemory::CacheMemory(const Params *p)
|
|||
m_cache_assoc = p->assoc;
|
||||
m_policy = p->replacement_policy;
|
||||
m_profiler_ptr = new CacheProfiler(name());
|
||||
m_start_index_bit = p->start_index_bit;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -127,8 +128,8 @@ Index
|
|||
CacheMemory::addressToCacheSet(const Address& address) const
|
||||
{
|
||||
assert(address == line_address(address));
|
||||
return address.bitSelect(RubySystem::getBlockSizeBits(),
|
||||
RubySystem::getBlockSizeBits() + m_cache_num_set_bits - 1);
|
||||
return address.bitSelect(m_start_index_bit,
|
||||
m_start_index_bit + m_cache_num_set_bits - 1);
|
||||
}
|
||||
|
||||
// Given a cache index: returns the index of the tag in a set.
|
||||
|
|
|
@ -169,6 +169,7 @@ class CacheMemory : public SimObject
|
|||
int m_cache_num_sets;
|
||||
int m_cache_num_set_bits;
|
||||
int m_cache_assoc;
|
||||
int m_start_index_bit;
|
||||
};
|
||||
|
||||
#endif // __MEM_RUBY_SYSTEM_CACHEMEMORY_HH__
|
||||
|
|
Loading…
Reference in a new issue