config: Support full-system with SST's memory system

This patch adds an example configuration in ext/sst/tests/ that allows
an SST/gem5 instance to simulate a 4-core AArch64 system with SST's
memHierarchy components providing all the caches and memories.
This commit is contained in:
Curtis Dunham 2015-04-08 15:56:06 -05:00
parent f05cb84ed1
commit c3268f8820
7 changed files with 307 additions and 23 deletions

View file

@ -1,4 +1,4 @@
# Copyright (c) 2012-2013 ARM Limited
# Copyright (c) 2012-2013, 2015 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
@ -46,6 +46,13 @@ from m5.objects import *
from Caches import *
def config_cache(options, system):
if options.external_memory_system and (options.caches or options.l2cache):
print "External caches and internal caches are exclusive options.\n"
sys.exit(1)
if options.external_memory_system:
ExternalCache = ExternalCacheFactory(options.external_memory_system)
if options.cpu_type == "arm_detailed":
try:
from O3_ARM_v7a import *
@ -114,10 +121,50 @@ def config_cache(options, system):
system.cpu[i].dcache = dcache_real
system.cpu[i].dcache_mon = dcache_mon
elif options.external_memory_system:
# These port names are presented to whatever 'external' system
# gem5 is connecting to. Its configuration will likely depend
# on these names. For simplicity, we would advise configuring
# it to use this naming scheme; if this isn't possible, change
# the names below.
if buildEnv['TARGET_ISA'] in ['x86', 'arm']:
system.cpu[i].addPrivateSplitL1Caches(
ExternalCache("cpu%d.icache" % i),
ExternalCache("cpu%d.dcache" % i),
ExternalCache("cpu%d.itb_walker_cache" % i),
ExternalCache("cpu%d.dtb_walker_cache" % i))
else:
system.cpu[i].addPrivateSplitL1Caches(
ExternalCache("cpu%d.icache" % i),
ExternalCache("cpu%d.dcache" % i))
system.cpu[i].createInterruptController()
if options.l2cache:
system.cpu[i].connectAllPorts(system.tol2bus, system.membus)
elif options.external_memory_system:
system.cpu[i].connectUncachedPorts(system.membus)
else:
system.cpu[i].connectAllPorts(system.membus)
return system
# ExternalSlave provides a "port", but when that port connects to a cache,
# the connecting CPU SimObject wants to refer to its "cpu_side".
# The 'ExternalCache' class provides this adaptation by rewriting the name,
# eliminating distracting changes elsewhere in the config code.
class ExternalCache(ExternalSlave):
def __getattr__(cls, attr):
if (attr == "cpu_side"):
attr = "port"
return super(ExternalSlave, cls).__getattr__(attr)
def __setattr__(cls, attr, value):
if (attr == "cpu_side"):
attr = "port"
return super(ExternalSlave, cls).__setattr__(attr, value)
def ExternalCacheFactory(port_type):
def make(name):
return ExternalCache(port_data=name, port_type=port_type,
addr_ranges=[AllMemory])
return make

View file

@ -1,4 +1,4 @@
# Copyright (c) 2010-2012 ARM Limited
# Copyright (c) 2010-2012, 2015 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@ -203,7 +203,8 @@ def makeSparcSystem(mem_mode, mdesc=None):
return self
def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
dtb_filename=None, bare_metal=False, cmdline=None):
dtb_filename=None, bare_metal=False, cmdline=None,
external_memory=""):
assert machine_type
if bare_metal:
@ -293,7 +294,15 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
'lpj=19988480 norandmaps rw loglevel=8 ' + \
'mem=%(mem)s root=%(rootdev)s'
self.realview.setupBootLoader(self.membus, self, binary)
# When using external memory, gem5 writes the boot loader to nvmem
# and then SST will read from it, but SST can only get to nvmem from
# iobus, as gem5's membus is only used for initialization and
# SST doesn't use it. Attaching nvmem to iobus solves this issue.
# During initialization, system_port -> membus -> iobus -> nvmem.
if external_memory:
self.realview.setupBootLoader(self.iobus, self, binary)
else:
self.realview.setupBootLoader(self.membus, self, binary)
self.gic_cpu_addr = self.realview.gic.cpu_addr
self.flags_addr = self.realview.realview_io.pio_addr + 0x30
@ -322,7 +331,24 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
self.boot_osflags = fillInCmdline(mdesc, cmdline)
self.realview.attachOnChipIO(self.membus, self.bridge)
if external_memory:
# I/O traffic enters iobus
self.external_io = ExternalMaster(port_data="external_io",
port_type=external_memory)
self.external_io.port = self.iobus.slave
# Ensure iocache only receives traffic destined for (actual) memory.
self.iocache = ExternalSlave(port_data="iocache",
port_type=external_memory,
addr_ranges=self.mem_ranges)
self.iocache.port = self.iobus.master
# Let system_port get to nvmem and nothing else.
self.bridge.ranges = [self.realview.nvmem.range]
self.realview.attachOnChipIO(self.iobus)
else:
self.realview.attachOnChipIO(self.membus, self.bridge)
self.realview.attachIO(self.iobus)
self.intrctrl = IntrControl()
self.terminal = Terminal()

View file

@ -189,6 +189,14 @@ def config_mem(options, system):
them.
"""
if options.external_memory_system:
system.external_memory = m5.objects.ExternalSlave(
port_type=options.external_memory_system,
port_data="init_mem0", port=system.membus.master,
addr_ranges=system.mem_ranges)
system.kernel_addr_check = False
return
nbr_mem_ctrls = options.mem_channels
import math
from m5.util import fatal

View file

@ -104,6 +104,8 @@ def addCommonOptions(parser):
parser.add_option("--memchecker", action="store_true")
# Cache Options
parser.add_option("--external-memory-system", type="string",
help="use external ports of this port_type for caches")
parser.add_option("--caches", action="store_true")
parser.add_option("--l2cache", action="store_true")
parser.add_option("--fastmem", action="store_true")

View file

@ -98,7 +98,8 @@ def build_test_system(np):
test_sys = makeArmSystem(test_mem_mode, options.machine_type,
options.num_cpus, bm[0], options.dtb_filename,
bare_metal=options.bare_metal,
cmdline=cmdline)
cmdline=cmdline,
external_memory=options.external_memory_system)
if options.enable_context_switch_stats_dump:
test_sys.enable_context_switch_stats_dump = True
else:
@ -185,7 +186,7 @@ def build_test_system(np):
test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges)
test_sys.iocache.cpu_side = test_sys.iobus.master
test_sys.iocache.mem_side = test_sys.membus.slave
else:
elif not options.external_memory_system:
test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges)
test_sys.iobridge.slave = test_sys.iobus.master
test_sys.iobridge.master = test_sys.membus.slave

View file

@ -0,0 +1,199 @@
# Copyright (c)2015 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Curtis Dunham
import sst
import sys
import os
lat="1 ns"
buslat="2 ns"
clockRate = "1GHz"
def getenv(name):
res = ""
try:
res = os.environ[name]
except KeyError:
pass
return res
baseCacheParams = ({
"debug" :getenv("DEBUG"),
"debug_level" : 6,
"coherence_protocol" : "MSI",
"replacement_policy" : "LRU",
"cache_line_size" : 64,
"cache_frequency" : clockRate,
"statistics" : 1
})
l1CacheParams = ({
"debug" : getenv("DEBUG"),
"debug_level" : 6,
"L1" : 1,
"cache_size" : "64 KB",
"associativity" : 4,
"access_latency_cycles" : 2,
"low_network_links" : 1
})
l2CacheParams = ({
"debug" : getenv("DEBUG"),
"debug_level" : 6,
"L1" : 0,
"cache_size" : "256 KB",
"associativity" : 8,
"access_latency_cycles" : 8,
"high_network_links" : 1,
"mshr_num_entries" : 4096,
"low_network_links" : 1
})
GEM5 = sst.Component("system", "gem5.gem5")
GEM5.addParams({
"comp_debug" : getenv("GEM5_DEBUG"),
"gem5DebugFlags" : getenv("M5_DEBUG"),
"frequency" : clockRate,
"cmd" : "configs/example/fs.py --num-cpus 4 --disk-image=vexpress64-openembedded_minimal-armv8_20130623-376.img --root-device=/dev/sda2 --kernel=vmlinux.aarch64.20140821 --dtb-filename=vexpress.aarch64.20140821.dtb --mem-size=256MB --machine-type=VExpress_EMM64 --cpu-type=timing --external-memory-system=sst --initialize-only"
})
bus = sst.Component("membus", "memHierarchy.Bus")
bus.addParams({
"bus_frequency": "2GHz",
"debug" : getenv("DEBUG"),
"debug_level" : 8
})
def buildL1(name, m5, connector):
cache = sst.Component(name, "memHierarchy.Cache")
cache.addParams(baseCacheParams)
cache.addParams(l1CacheParams)
link = sst.Link("cpu_%s_link"%name)
link.connect((m5, connector, lat), (cache, "high_network_0", lat))
return cache
SysBusConn = buildL1("gem5SystemBus", GEM5, "system.external_memory.port")
bus_port = 0
link = sst.Link("sysbus_bus_link")
link.connect((SysBusConn, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
bus_port = bus_port + 1
ioCache = buildL1("ioCache", GEM5, "system.iocache.port")
ioCache.addParams({
"debug" : 0,
"debug_level" : 6,
"cache_size" : "16 KB",
"associativity" : 4
})
link = sst.Link("ioCache_bus_link")
link.connect((ioCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
def buildCPU(m5, num):
l1iCache = buildL1("cpu%u.l1iCache" % num, m5, "system.cpu%u.icache.port" % num)
l1dCache = buildL1("cpu%u.l1dCache" % num, m5, "system.cpu%u.dcache.port" % num)
itlbCache = buildL1("cpu%u.itlbCache" % num, m5, "system.cpu%u.itb_walker_cache.port" % num)
dtlbCache = buildL1("cpu%u.dtlbCache" % num, m5, "system.cpu%u.dtb_walker_cache.port" % num)
l1dCache.addParams({
"debug" : 0,
"debug_level" : 10,
"snoop_l1_invalidations" : 1
})
global bus_port
link = sst.Link("cpu%u.l1iCache_bus_link" % num) ; bus_port = bus_port + 1
link.connect((l1iCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
link = sst.Link("cpu%u.l1dCache_bus_link" % num) ; bus_port = bus_port + 1
link.connect((l1dCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
link = sst.Link("cpu%u.itlbCache_bus_link" % num) ; bus_port = bus_port + 1
link.connect((itlbCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
link = sst.Link("cpu%u.dtlbCache_bus_link" % num) ; bus_port = bus_port + 1
link.connect((dtlbCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
buildCPU(GEM5, 0)
buildCPU(GEM5, 1)
buildCPU(GEM5, 2)
buildCPU(GEM5, 3)
l2cache = sst.Component("l2cache", "memHierarchy.Cache")
l2cache.addParams(baseCacheParams)
l2cache.addParams(l2CacheParams)
l2cache.addParams({
"network_address" : "2",
"directory_at_next_level" : "1"
})
link = sst.Link("l2cache_bus_link")
link.connect((l2cache, "high_network_0", buslat), (bus, "low_network_0", buslat))
memory = sst.Component("memory", "memHierarchy.MemController")
memory.addParams({
"request_width" : 64,
"coherence_protocol" : "MSI",
"access_time" : "25 ns",
"backend.mem_size" : 256,
"clock" : "2GHz",
"debug" : getenv("DEBUG"),
"range_start" : 0, # 2 * (1024 ** 3), # it's behind a directory controller.
})
comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router")
comp_chiprtr.addParams({
"xbar_bw" : "16GB/s",
"link_bw" : "16GB/s",
"input_buf_size" : "1KB",
"num_ports" : "3",
"flit_size" : "72B",
"output_buf_size" : "1KB",
"id" : "0",
"topology" : "merlin.singlerouter"
})
comp_dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController")
comp_dirctrl.addParams({
"coherence_protocol" : "MSI",
"network_address" : "1",
"entry_cache_size" : "16384",
"network_bw" : "1GB/s",
"addr_range_start" : 2 * (1024 ** 3),
"addr_range_end" : 2 * (1024 ** 3) + 256 * (1024 ** 2)
})
sst.Link("link_cache_net_0").connect((l2cache, "directory", "10ns"), (comp_chiprtr, "port2", "2ns"))
sst.Link("link_dir_net_0").connect((comp_chiprtr, "port1", "2ns"), (comp_dirctrl, "network", "2ns"))
sst.Link("l2cache_io_link").connect((comp_chiprtr, "port0", "2ns"), (GEM5, "network", buslat))
sst.Link("link_dir_mem_link").connect((comp_dirctrl, "memory", "10ns"), (memory, "direct_link", "10ns"))

View file

@ -1,4 +1,4 @@
# Copyright (c) 2009-2014 ARM Limited
# Copyright (c) 2009-2015 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@ -509,21 +509,22 @@ class VExpress_EMM(RealView):
# Attach I/O devices that are on chip and also set the appropriate
# ranges for the bridge
def attachOnChipIO(self, bus, bridge):
self.gic.pio = bus.master
self.local_cpu_timer.pio = bus.master
if hasattr(self, "gicv2m"):
self.gicv2m.pio = bus.master
self.hdlcd.dma = bus.slave
# Bridge ranges based on excluding what is part of on-chip I/O
# (gic, a9scu)
bridge.ranges = [AddrRange(0x2F000000, size='16MB'),
AddrRange(0x2B000000, size='4MB'),
AddrRange(0x30000000, size='256MB'),
AddrRange(0x40000000, size='512MB'),
AddrRange(0x18000000, size='64MB'),
AddrRange(0x1C000000, size='64MB')]
self.vgic.pio = bus.master
def attachOnChipIO(self, bus, bridge=None):
self.gic.pio = bus.master
self.vgic.pio = bus.master
self.local_cpu_timer.pio = bus.master
if hasattr(self, "gicv2m"):
self.gicv2m.pio = bus.master
self.hdlcd.dma = bus.slave
if bridge:
# Bridge ranges based on excluding what is part of on-chip I/O
# (gic, a9scu)
bridge.ranges = [AddrRange(0x2F000000, size='16MB'),
AddrRange(0x2B000000, size='4MB'),
AddrRange(0x30000000, size='256MB'),
AddrRange(0x40000000, size='512MB'),
AddrRange(0x18000000, size='64MB'),
AddrRange(0x1C000000, size='64MB')]
# Set the clock domain for IO objects that are considered