ruby: add a three level MESI protocol.

The first two levels (L0, L1) are private to the core, the third level (L2)is
possibly shared. The protocol supports clustered designs.  For example, one
can have two sets of two cores. Each core has an L0 and L1 cache. There are
two L2 controllers where each set accesses only one of the L2 controllers.
This commit is contained in:
Nilay Vaish 2014-01-04 00:03:34 -06:00
parent bb6d7d402b
commit 4070b00875
10 changed files with 2056 additions and 2 deletions

View file

@ -0,0 +1,210 @@
# Copyright (c) 2006-2007 The Regents of The University of Michigan
# Copyright (c) 2009 Advanced Micro Devices, Inc.
# Copyright (c) 2013 Mark D. Hill and David A. Wood
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Brad Beckmann
# Nilay Vaish
import math
import m5
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
#
class L0Cache(RubyCache):
latency = 1
class L1Cache(RubyCache):
latency = 5
#
# Note: the L2 Cache latency is not currently used
#
class L2Cache(RubyCache):
latency = 15
def define_options(parser):
parser.add_option("--num-clusters", type="int", default=1,
help="number of clusters in a design in which there are shared\
caches private to clusters")
return
def create_system(options, system, piobus, dma_ports, ruby_system):
if buildEnv['PROTOCOL'] != 'MESI_Three_Level':
fatal("This script requires the MESI_Three_Level protocol to be built.")
cpu_sequencers = []
#
# The ruby network creation expects the list of nodes in the system to be
# consistent with the NetDest list. Therefore the l1 controller nodes must be
# listed before the directory nodes and directory nodes before dma nodes, etc.
#
l0_cntrl_nodes = []
l1_cntrl_nodes = []
l2_cntrl_nodes = []
dir_cntrl_nodes = []
dma_cntrl_nodes = []
assert (options.num_cpus % options.num_clusters == 0)
num_cpus_per_cluster = options.num_cpus / options.num_clusters
assert (options.num_l2caches % options.num_clusters == 0)
num_l2caches_per_cluster = options.num_l2caches / options.num_clusters
l2_bits = int(math.log(num_l2caches_per_cluster, 2))
block_size_bits = int(math.log(options.cacheline_size, 2))
l2_index_start = block_size_bits + l2_bits
#
# Must create the individual controllers before the network to ensure the
# controller constructors are called before the network constructor
#
for i in xrange(options.num_clusters):
for j in xrange(num_cpus_per_cluster):
#
# First create the Ruby objects associated with this cpu
#
l0i_cache = L0Cache(size = '4096B', assoc = 1, is_icache = True,
start_index_bit = block_size_bits, replacement_policy="LRU")
l0d_cache = L0Cache(size = '4096B', assoc = 1, is_icache = False,
start_index_bit = block_size_bits, replacement_policy="LRU")
l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
Icache = l0i_cache, Dcache = l0d_cache,
send_evictions = (options.cpu_type == "detailed"),
ruby_system = ruby_system)
cpu_seq = RubySequencer(version = i, icache = l0i_cache,
dcache = l0d_cache, ruby_system = ruby_system)
l0_cntrl.sequencer = cpu_seq
l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc,
start_index_bit = block_size_bits, is_icache = False)
l1_cntrl = L1Cache_Controller(version = i*num_cpus_per_cluster+j,
cache = l1_cache, l2_select_num_bits = l2_bits,
cluster_id = i, ruby_system = ruby_system)
if piobus != None:
cpu_seq.pio_port = piobus.slave
exec("ruby_system.l0_cntrl%d = l0_cntrl" % (
i*num_cpus_per_cluster+j))
exec("ruby_system.l1_cntrl%d = l1_cntrl" % (
i*num_cpus_per_cluster+j))
#
# Add controllers and sequencers to the appropriate lists
#
cpu_sequencers.append(cpu_seq)
l0_cntrl_nodes.append(l0_cntrl)
l1_cntrl_nodes.append(l1_cntrl)
l0_cntrl.peer = l1_cntrl
for j in xrange(num_l2caches_per_cluster):
l2_cache = L2Cache(size = options.l2_size,
assoc = options.l2_assoc,
start_index_bit = l2_index_start)
l2_cntrl = L2Cache_Controller(
version = i * num_l2caches_per_cluster + j,
L2cache = l2_cache, cluster_id = i,
transitions_per_cycle=options.ports,
ruby_system = ruby_system)
exec("ruby_system.l2_cntrl%d = l2_cntrl" % (
i * num_l2caches_per_cluster + j))
l2_cntrl_nodes.append(l2_cntrl)
phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges))
assert(phys_mem_size % options.num_dirs == 0)
mem_module_size = phys_mem_size / options.num_dirs
# Run each of the ruby memory controllers at a ratio of the frequency of
# the ruby system
# clk_divider value is a fix to pass regression.
ruby_system.memctrl_clk_domain = DerivedClockDomain(
clk_domain=ruby_system.clk_domain,
clk_divider=3)
for i in xrange(options.num_dirs):
#
# Create the Ruby objects associated with the directory controller
#
mem_cntrl = RubyMemoryControl(
clk_domain = ruby_system.memctrl_clk_domain,
version = i,
ruby_system = ruby_system)
dir_size = MemorySize('0B')
dir_size.value = mem_module_size
dir_cntrl = Directory_Controller(version = i,
directory = \
RubyDirectoryMemory(version = i,
size = dir_size,
use_map =
options.use_map),
memBuffer = mem_cntrl,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
dir_cntrl_nodes.append(dir_cntrl)
for i, dma_port in enumerate(dma_ports):
#
# Create the Ruby objects associated with the dma controller
#
dma_seq = DMASequencer(version = i,
ruby_system = ruby_system)
dma_cntrl = DMA_Controller(version = i,
dma_sequencer = dma_seq,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i)
dma_cntrl_nodes.append(dma_cntrl)
all_cntrls = l0_cntrl_nodes + \
l1_cntrl_nodes + \
l2_cntrl_nodes + \
dir_cntrl_nodes + \
dma_cntrl_nodes
topology = create_topology(all_cntrls, options)
return (cpu_sequencers, dir_cntrl_nodes, topology)

View file

@ -0,0 +1,700 @@
/*
* Copyright (c) 2013 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
machine(L0Cache, "MESI Directory L0 Cache")
: Sequencer * sequencer,
CacheMemory * Icache,
CacheMemory * Dcache,
Cycles request_latency = 2,
Cycles response_latency = 2,
bool send_evictions,
{
// NODE L0 CACHE
// From this node's L0 cache to the network
MessageBuffer bufferFromCache, network="To", physical_network="0", ordered="true";
// To this node's L0 cache FROM the network
MessageBuffer bufferToCache, network="From", physical_network="0", ordered="true";
// Message queue between this controller and the processor
MessageBuffer mandatoryQueue, ordered="false";
// STATES
state_declaration(State, desc="Cache states", default="L0Cache_State_I") {
// Base states
// The cache entry has not been allocated.
NP, AccessPermission:Invalid, desc="Not present in either cache";
// The cache entry has been allocated, but is not in use.
I, AccessPermission:Invalid;
// The cache entry is in shared mode. The processor can read this entry
// but it cannot write to it.
S, AccessPermission:Read_Only;
// The cache entry is in exclusive mode. The processor can read this
// entry. It can write to this entry without informing the directory.
// On writing, the entry moves to M state.
E, AccessPermission:Read_Only;
// The processor has read and write permissions on this entry.
M, AccessPermission:Read_Write;
// Transient States
// The cache controller has requested that this entry be fetched in
// shared state so that the processor can read it.
IS, AccessPermission:Busy;
// The cache controller has requested that this entry be fetched in
// modify state so that the processor can read/write it.
IM, AccessPermission:Busy;
// The cache controller had read permission over the entry. But now the
// processor needs to write to it. So, the controller has requested for
// write permission.
SM, AccessPermission:Read_Only;
}
// EVENTS
enumeration(Event, desc="Cache events") {
// L0 events
Load, desc="Load request from the home processor";
Ifetch, desc="I-fetch request from the home processor";
Store, desc="Store request from the home processor";
Inv, desc="Invalidate request from L2 bank";
// internal generated request
L0_Replacement, desc="L0 Replacement", format="!r";
// other requests
Fwd_GETX, desc="GETX from other processor";
Fwd_GETS, desc="GETS from other processor";
Fwd_GET_INSTR, desc="GET_INSTR from other processor";
Data, desc="Data for processor";
Data_Exclusive, desc="Data for processor";
Data_Stale, desc="Data for processor, but not for storage";
Ack, desc="Ack for processor";
Ack_all, desc="Last ack for processor";
WB_Ack, desc="Ack for replacement";
}
// TYPES
// CacheEntry
structure(Entry, desc="...", interface="AbstractCacheEntry" ) {
State CacheState, desc="cache state";
DataBlock DataBlk, desc="data for the block";
bool Dirty, default="false", desc="data is dirty";
}
// TBE fields
structure(TBE, desc="...") {
Address Addr, desc="Physical address for this TBE";
State TBEState, desc="Transient state";
DataBlock DataBlk, desc="Buffer for the data block";
bool Dirty, default="false", desc="data is dirty";
int pendingAcks, default="0", desc="number of pending acks";
}
structure(TBETable, external="yes") {
TBE lookup(Address);
void allocate(Address);
void deallocate(Address);
bool isPresent(Address);
}
TBETable TBEs, template="<L0Cache_TBE>", constructor="m_number_of_TBEs";
void set_cache_entry(AbstractCacheEntry a);
void unset_cache_entry();
void set_tbe(TBE a);
void unset_tbe();
void wakeUpBuffers(Address a);
void wakeUpAllBuffers(Address a);
void profileMsgDelay(int virtualNetworkType, Cycles c);
// inclusive cache returns L0 entries only
Entry getCacheEntry(Address addr), return_by_pointer="yes" {
Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]);
if(is_valid(Dcache_entry)) {
return Dcache_entry;
}
Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]);
return Icache_entry;
}
Entry getDCacheEntry(Address addr), return_by_pointer="yes" {
Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]);
return Dcache_entry;
}
Entry getICacheEntry(Address addr), return_by_pointer="yes" {
Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]);
return Icache_entry;
}
State getState(TBE tbe, Entry cache_entry, Address addr) {
assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false);
if(is_valid(tbe)) {
return tbe.TBEState;
} else if (is_valid(cache_entry)) {
return cache_entry.CacheState;
}
return State:NP;
}
void setState(TBE tbe, Entry cache_entry, Address addr, State state) {
assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false);
// MUST CHANGE
if(is_valid(tbe)) {
tbe.TBEState := state;
}
if (is_valid(cache_entry)) {
cache_entry.CacheState := state;
}
}
AccessPermission getAccessPermission(Address addr) {
TBE tbe := TBEs[addr];
if(is_valid(tbe)) {
DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(tbe.TBEState));
return L0Cache_State_to_permission(tbe.TBEState);
}
Entry cache_entry := getCacheEntry(addr);
if(is_valid(cache_entry)) {
DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(cache_entry.CacheState));
return L0Cache_State_to_permission(cache_entry.CacheState);
}
DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent);
return AccessPermission:NotPresent;
}
DataBlock getDataBlock(Address addr), return_by_ref="yes" {
TBE tbe := TBEs[addr];
if(is_valid(tbe)) {
return tbe.DataBlk;
}
return getCacheEntry(addr).DataBlk;
}
void setAccessPermission(Entry cache_entry, Address addr, State state) {
if (is_valid(cache_entry)) {
cache_entry.changePermission(L0Cache_State_to_permission(state));
}
}
Event mandatory_request_type_to_event(RubyRequestType type) {
if (type == RubyRequestType:LD) {
return Event:Load;
} else if (type == RubyRequestType:IFETCH) {
return Event:Ifetch;
} else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
return Event:Store;
} else {
error("Invalid RubyRequestType");
}
}
int getPendingAcks(TBE tbe) {
return tbe.pendingAcks;
}
out_port(requestNetwork_out, CoherenceMsg, bufferFromCache);
// Messages for this L0 cache from the L1 cache
in_port(messgeBuffer_in, CoherenceMsg, bufferToCache, rank = 1) {
if (messgeBuffer_in.isReady()) {
peek(messgeBuffer_in, CoherenceMsg, block_on="Addr") {
assert(in_msg.Destination == machineID);
Entry cache_entry := getCacheEntry(in_msg.Addr);
TBE tbe := TBEs[in_msg.Addr];
if(in_msg.Class == CoherenceClass:DATA_EXCLUSIVE) {
trigger(Event:Data_Exclusive, in_msg.Addr, cache_entry, tbe);
} else if(in_msg.Class == CoherenceClass:DATA) {
trigger(Event:Data, in_msg.Addr, cache_entry, tbe);
} else if(in_msg.Class == CoherenceClass:STALE_DATA) {
trigger(Event:Data_Stale, in_msg.Addr, cache_entry, tbe);
} else if (in_msg.Class == CoherenceClass:ACK) {
trigger(Event:Ack, in_msg.Addr, cache_entry, tbe);
} else if (in_msg.Class == CoherenceClass:WB_ACK) {
trigger(Event:WB_Ack, in_msg.Addr, cache_entry, tbe);
} else if (in_msg.Class == CoherenceClass:INV) {
trigger(Event:Inv, in_msg.Addr, cache_entry, tbe);
} else if (in_msg.Class == CoherenceClass:GETX ||
in_msg.Class == CoherenceClass:UPGRADE) {
// upgrade transforms to GETX due to race
trigger(Event:Fwd_GETX, in_msg.Addr, cache_entry, tbe);
} else if (in_msg.Class == CoherenceClass:GETS) {
trigger(Event:Fwd_GETS, in_msg.Addr, cache_entry, tbe);
} else if (in_msg.Class == CoherenceClass:GET_INSTR) {
trigger(Event:Fwd_GET_INSTR, in_msg.Addr, cache_entry, tbe);
} else {
error("Invalid forwarded request type");
}
}
}
}
// Mandatory Queue betweens Node's CPU and it's L0 caches
in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
// Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
if (in_msg.Type == RubyRequestType:IFETCH) {
// ** INSTRUCTION ACCESS ***
Entry Icache_entry := getICacheEntry(in_msg.LineAddress);
if (is_valid(Icache_entry)) {
// The tag matches for the L0, so the L0 asks the L2 for it.
trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
Icache_entry, TBEs[in_msg.LineAddress]);
} else {
// Check to see if it is in the OTHER L0
Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress);
if (is_valid(Dcache_entry)) {
// The block is in the wrong L0, put the request on the queue to the shared L2
trigger(Event:L0_Replacement, in_msg.LineAddress,
Dcache_entry, TBEs[in_msg.LineAddress]);
}
if (Icache.cacheAvail(in_msg.LineAddress)) {
// L0 does't have the line, but we have space for it
// in the L0 so let's see if the L2 has it
trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
Icache_entry, TBEs[in_msg.LineAddress]);
} else {
// No room in the L0, so we need to make room in the L0
trigger(Event:L0_Replacement, Icache.cacheProbe(in_msg.LineAddress),
getICacheEntry(Icache.cacheProbe(in_msg.LineAddress)),
TBEs[Icache.cacheProbe(in_msg.LineAddress)]);
}
}
} else {
// *** DATA ACCESS ***
Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress);
if (is_valid(Dcache_entry)) {
// The tag matches for the L0, so the L0 ask the L1 for it
trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
Dcache_entry, TBEs[in_msg.LineAddress]);
} else {
// Check to see if it is in the OTHER L0
Entry Icache_entry := getICacheEntry(in_msg.LineAddress);
if (is_valid(Icache_entry)) {
// The block is in the wrong L0, put the request on the queue to the private L1
trigger(Event:L0_Replacement, in_msg.LineAddress,
Icache_entry, TBEs[in_msg.LineAddress]);
}
if (Dcache.cacheAvail(in_msg.LineAddress)) {
// L1 does't have the line, but we have space for it
// in the L0 let's see if the L1 has it
trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
Dcache_entry, TBEs[in_msg.LineAddress]);
} else {
// No room in the L1, so we need to make room in the L0
trigger(Event:L0_Replacement, Dcache.cacheProbe(in_msg.LineAddress),
getDCacheEntry(Dcache.cacheProbe(in_msg.LineAddress)),
TBEs[Dcache.cacheProbe(in_msg.LineAddress)]);
}
}
}
}
}
}
// ACTIONS
action(a_issueGETS, "a", desc="Issue GETS") {
peek(mandatoryQueue_in, RubyRequest) {
enqueue(requestNetwork_out, CoherenceMsg, latency=request_latency) {
out_msg.Addr := address;
out_msg.Class := CoherenceClass:GETS;
out_msg.Sender := machineID;
out_msg.Destination := createMachineID(MachineType:L1Cache, version);
DPRINTF(RubySlicc, "address: %s, destination: %s\n",
address, out_msg.Destination);
out_msg.MessageSize := MessageSizeType:Control;
out_msg.AccessMode := in_msg.AccessMode;
}
}
}
action(b_issueGETX, "b", desc="Issue GETX") {
peek(mandatoryQueue_in, RubyRequest) {
enqueue(requestNetwork_out, CoherenceMsg, latency=request_latency) {
out_msg.Addr := address;
out_msg.Class := CoherenceClass:GETX;
out_msg.Sender := machineID;
DPRINTF(RubySlicc, "%s\n", machineID);
out_msg.Destination := createMachineID(MachineType:L1Cache, version);
DPRINTF(RubySlicc, "address: %s, destination: %s\n",
address, out_msg.Destination);
out_msg.MessageSize := MessageSizeType:Control;
out_msg.AccessMode := in_msg.AccessMode;
}
}
}
action(c_issueUPGRADE, "c", desc="Issue GETX") {
peek(mandatoryQueue_in, RubyRequest) {
enqueue(requestNetwork_out, CoherenceMsg, latency= request_latency) {
out_msg.Addr := address;
out_msg.Class := CoherenceClass:UPGRADE;
out_msg.Sender := machineID;
out_msg.Destination := createMachineID(MachineType:L1Cache, version);
DPRINTF(RubySlicc, "address: %s, destination: %s\n",
address, out_msg.Destination);
out_msg.MessageSize := MessageSizeType:Control;
out_msg.AccessMode := in_msg.AccessMode;
}
}
}
action(f_sendDataToL1, "f", desc="send data to the L2 cache") {
enqueue(requestNetwork_out, CoherenceMsg, latency=response_latency) {
assert(is_valid(cache_entry));
out_msg.Addr := address;
out_msg.Class := CoherenceClass:INV_DATA;
out_msg.DataBlk := cache_entry.DataBlk;
out_msg.Dirty := cache_entry.Dirty;
out_msg.Sender := machineID;
out_msg.Destination := createMachineID(MachineType:L1Cache, version);
out_msg.MessageSize := MessageSizeType:Writeback_Data;
}
}
action(fi_sendInvAck, "fi", desc="send data to the L2 cache") {
peek(messgeBuffer_in, CoherenceMsg) {
enqueue(requestNetwork_out, CoherenceMsg, latency=response_latency) {
out_msg.Addr := address;
out_msg.Class := CoherenceClass:INV_ACK;
out_msg.Sender := machineID;
out_msg.Destination := createMachineID(MachineType:L1Cache, version);
out_msg.MessageSize := MessageSizeType:Response_Control;
}
}
}
action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
if (send_evictions) {
DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
sequencer.evictionCallback(address);
}
}
action(g_issuePUTX, "g", desc="send data to the L2 cache") {
enqueue(requestNetwork_out, CoherenceMsg, latency=response_latency) {
assert(is_valid(cache_entry));
out_msg.Addr := address;
out_msg.Class := CoherenceClass:PUTX;
out_msg.DataBlk := cache_entry.DataBlk;
out_msg.Dirty := cache_entry.Dirty;
out_msg.Sender:= machineID;
out_msg.Destination := createMachineID(MachineType:L1Cache, version);
if (cache_entry.Dirty) {
out_msg.MessageSize := MessageSizeType:Writeback_Data;
} else {
out_msg.MessageSize := MessageSizeType:Writeback_Control;
}
}
}
action(h_load_hit, "h", desc="If not prefetch, notify sequencer the load completed.") {
assert(is_valid(cache_entry));
DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
sequencer.readCallback(address, cache_entry.DataBlk);
}
action(hh_store_hit, "\h", desc="If not prefetch, notify sequencer that store completed.") {
assert(is_valid(cache_entry));
DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
sequencer.writeCallback(address, cache_entry.DataBlk);
cache_entry.Dirty := true;
}
action(i_allocateTBE, "i", desc="Allocate TBE (number of invalidates=0)") {
check_allocate(TBEs);
assert(is_valid(cache_entry));
TBEs.allocate(address);
set_tbe(TBEs[address]);
tbe.Dirty := cache_entry.Dirty;
tbe.DataBlk := cache_entry.DataBlk;
}
action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
mandatoryQueue_in.dequeue();
}
action(l_popRequestQueue, "l",
desc="Pop incoming request queue and profile the delay within this virtual network") {
profileMsgDelay(2, messgeBuffer_in.dequeue_getDelayCycles());
}
action(o_popIncomingResponseQueue, "o",
desc="Pop Incoming Response queue and profile the delay within this virtual network") {
profileMsgDelay(1, messgeBuffer_in.dequeue_getDelayCycles());
}
action(s_deallocateTBE, "s", desc="Deallocate TBE") {
TBEs.deallocate(address);
unset_tbe();
}
action(u_writeDataToCache, "u", desc="Write data to cache") {
peek(messgeBuffer_in, CoherenceMsg) {
assert(is_valid(cache_entry));
cache_entry.DataBlk := in_msg.DataBlk;
cache_entry.Dirty := in_msg.Dirty;
}
}
action(ff_deallocateCacheBlock, "\f",
desc="Deallocate L1 cache block.") {
if (Dcache.isTagPresent(address)) {
Dcache.deallocate(address);
} else {
Icache.deallocate(address);
}
unset_cache_entry();
}
action(oo_allocateDCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B.") {
if (is_invalid(cache_entry)) {
set_cache_entry(Dcache.allocate(address, new Entry));
}
}
action(pp_allocateICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") {
if (is_invalid(cache_entry)) {
set_cache_entry(Icache.allocate(address, new Entry));
}
}
action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle cpu request queue") {
stall_and_wait(mandatoryQueue_in, address);
}
action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
wakeUpAllBuffers(address);
}
action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") {
++Icache.demand_misses;
}
action(uu_profileInstHit, "\uih", desc="Profile the demand miss") {
++Icache.demand_hits;
}
action(uu_profileDataMiss, "\ud", desc="Profile the demand miss") {
++Dcache.demand_misses;
}
action(uu_profileDataHit, "\udh", desc="Profile the demand miss") {
++Dcache.demand_hits;
}
//*****************************************************
// TRANSITIONS
//*****************************************************
// Transitions for Load/Store/Replacement/WriteBack from transient states
transition({IS, IM, SM}, {Load, Ifetch, Store, L0_Replacement}) {
z_stallAndWaitMandatoryQueue;
}
// Transitions from Idle
transition({NP,I}, L0_Replacement) {
ff_deallocateCacheBlock;
}
transition({NP,I}, Load, IS) {
oo_allocateDCacheBlock;
i_allocateTBE;
a_issueGETS;
uu_profileDataMiss;
k_popMandatoryQueue;
}
transition({NP,I}, Ifetch, IS) {
pp_allocateICacheBlock;
i_allocateTBE;
a_issueGETS;
uu_profileInstMiss;
k_popMandatoryQueue;
}
transition({NP,I}, Store, IM) {
oo_allocateDCacheBlock;
i_allocateTBE;
b_issueGETX;
uu_profileDataMiss;
k_popMandatoryQueue;
}
transition({NP, I, IS, IM}, Inv) {
fi_sendInvAck;
l_popRequestQueue;
}
transition(SM, Inv, IM) {
fi_sendInvAck;
l_popRequestQueue;
}
// Transitions from Shared
transition({S,E,M}, Load) {
h_load_hit;
uu_profileDataHit;
k_popMandatoryQueue;
}
transition({S,E,M}, Ifetch) {
h_load_hit;
uu_profileInstHit;
k_popMandatoryQueue;
}
transition(S, Store, SM) {
i_allocateTBE;
c_issueUPGRADE;
uu_profileDataMiss;
k_popMandatoryQueue;
}
transition(S, L0_Replacement, I) {
forward_eviction_to_cpu;
ff_deallocateCacheBlock;
}
transition(S, Inv, I) {
forward_eviction_to_cpu;
fi_sendInvAck;
ff_deallocateCacheBlock;
l_popRequestQueue;
}
// Transitions from Exclusive
transition({E,M}, Store, M) {
hh_store_hit;
uu_profileDataHit;
k_popMandatoryQueue;
}
transition(E, L0_Replacement, I) {
forward_eviction_to_cpu;
g_issuePUTX;
ff_deallocateCacheBlock;
}
transition(E, {Inv, Fwd_GETX}, I) {
// don't send data
forward_eviction_to_cpu;
fi_sendInvAck;
ff_deallocateCacheBlock;
l_popRequestQueue;
}
transition(E, {Fwd_GETS, Fwd_GET_INSTR}, S) {
f_sendDataToL1;
l_popRequestQueue;
}
// Transitions from Modified
transition(M, L0_Replacement, I) {
forward_eviction_to_cpu;
g_issuePUTX;
ff_deallocateCacheBlock;
}
transition(M, {Inv, Fwd_GETX}, I) {
forward_eviction_to_cpu;
f_sendDataToL1;
ff_deallocateCacheBlock;
l_popRequestQueue;
}
transition(M, {Fwd_GETS, Fwd_GET_INSTR}, S) {
f_sendDataToL1;
l_popRequestQueue;
}
transition(IS, Data, S) {
u_writeDataToCache;
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(IS, Data_Exclusive, E) {
u_writeDataToCache;
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(IS, Data_Stale, I) {
u_writeDataToCache;
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition({IM,SM}, Data_Exclusive, M) {
u_writeDataToCache;
hh_store_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,81 @@
/*
* Copyright (c) 2013 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Various class of messages that can be exchanged between the L0 and the L1
// controllers.
enumeration(CoherenceClass, desc="...") {
GETX, desc="Get eXclusive";
UPGRADE, desc="UPGRADE to exclusive";
GETS, desc="Get Shared";
GET_INSTR, desc="Get Instruction";
INV, desc="INValidate";
PUTX, desc="Replacement message";
WB_ACK, desc="Writeback ack";
// Request types for sending data and acks from L0 to L1 cache
// when an invalidation message is received.
INV_DATA;
INV_ACK;
DATA, desc="Data block for L1 cache in S state";
DATA_EXCLUSIVE, desc="Data block for L1 cache in M/E state";
ACK, desc="Generic invalidate ack";
// This is a special case in which the L1 cache lost permissions to the
// shared block before it got the data. So the L0 cache can use the data
// but not store it.
STALE_DATA;
}
// Class for messages sent between the L0 and the L1 controllers.
structure(CoherenceMsg, desc="...", interface="Message") {
Address Addr, desc="Physical address of the cache block";
CoherenceClass Class, desc="Type of message (GetS, GetX, PutX, etc)";
RubyAccessMode AccessMode, desc="user/supervisor access type";
MachineID Sender, desc="What component sent this message";
MachineID Destination, desc="What machine receives this message";
MessageSizeType MessageSize, desc="size category of the message";
DataBlock DataBlk, desc="Data for the cache line (if PUTX)";
bool Dirty, default="false", desc="Dirty bit";
bool functionalRead(Packet *pkt) {
// Only PUTX messages contains the data block
if (Class == CoherenceClass:PUTX) {
return testAndRead(Addr, DataBlk, pkt);
}
return false;
}
bool functionalWrite(Packet *pkt) {
// No check on message type required since the protocol should
// read data from those messages that contain the block
return testAndWrite(Addr, DataBlk, pkt);
}
}

View file

@ -0,0 +1,9 @@
protocol "MESI_Three_Level";
include "RubySlicc_interfaces.slicc";
include "MESI_Two_Level-msg.sm";
include "MESI_Three_Level-msg.sm";
include "MESI_Three_Level-L0.cache";
include "MESI_Three_Level-L1.cache";
include "MESI_Two_Level-L2cache.sm";
include "MESI_Two_Level-dir.sm";
include "MESI_Two_Level-dma.sm";

View file

@ -39,3 +39,4 @@ NodeID map_Address_to_DirectoryNode(Address addr);
NodeID machineIDToNodeID(MachineID machID);
NodeID machineIDToVersion(MachineID machID);
MachineType machineIDToMachineType(MachineID machID);
MachineID createMachineID(MachineType t, NodeID i);

View file

@ -28,7 +28,6 @@
*/
// Hack, no node object since base class has them
NodeID id;
NodeID version;
MachineID machineID;
NodeID clusterID;

View file

@ -34,6 +34,7 @@ Import('*')
all_protocols.extend([
'MESI_Two_Level',
'MESI_Three_Level',
'MI_example',
'MOESI_CMP_directory',
'MOESI_CMP_token',

View file

@ -96,4 +96,11 @@ machineCount(MachineType machType)
return MachineType_base_count(machType);
}
inline MachineID
createMachineID(MachineType type, NodeID id)
{
MachineID mach = {type, id};
return mach;
}
#endif // __MEM_RUBY_SLICC_INTERFACE_COMPONENTMAPPINGS_HH__

View file

@ -543,7 +543,7 @@ void
$c_ident::init()
{
MachineType machine_type = string_to_MachineType("${{var.machine.ident}}");
int base = MachineType_base_number(machine_type);
int base M5_VAR_USED = MachineType_base_number(machine_type);
m_machineID.type = MachineType_${ident};
m_machineID.num = m_version;