config: Enable elastic trace capture and replay in se/fs

This patch adds changes to the configuration scripts to support elastic
tracing and replay.

The patch adds a command line option to enable elastic tracing in SE mode
and FS mode. When enabled the Elastic Trace cpu probe is attached to O3CPU
and a few O3 CPU parameters are tuned. The Elastic Trace probe writes out
both instruction fetch and data dependency traces. The patch also enables
configuring the TraceCPU to replay traces using the SE and FS script.

The replay run is designed to resume from checkpoint using atomic cpu to
restore state keeping it consistent with FS run flow. It then switches to
TraceCPU to replay the input traces.
This commit is contained in:
Radhika Jagtap 2015-12-07 16:42:16 -06:00
parent 8f1ca0a212
commit 9bd5051b60
9 changed files with 200 additions and 2 deletions

View file

@ -69,6 +69,13 @@ def config_cache(options, system):
# Set the cache line size of the system
system.cache_line_size = options.cacheline_size
# If elastic trace generation is enabled, make sure the memory system is
# minimal so that compute delays do not include memory access latencies.
# Configure the compulsory L1 caches for the O3CPU, do not configure
# any more caches.
if options.l2cache and options.elastic_trace_en:
fatal("When elastic trace is enabled, do not configure L2 caches.")
if options.l2cache:
# Provide a clock for the L2 and the L1-to-L2 bus here as they
# are not connected using addTwoLevelCacheHierarchy. Use the

View file

@ -53,6 +53,7 @@ _cpu_aliases_all = [
("minor", "MinorCPU"),
("detailed", "DerivO3CPU"),
("kvm", ("ArmKvmCPU", "ArmV8KvmCPU", "X86KvmCPU")),
("trace", "TraceCPU"),
]
# Filtered list of aliases. Only aliases for existing CPUs exist in
@ -108,6 +109,30 @@ def cpu_names():
"""Return a list of valid CPU names."""
return _cpu_classes.keys() + _cpu_aliases.keys()
def config_etrace(cpu_cls, cpu_list, options):
if issubclass(cpu_cls, m5.objects.DerivO3CPU):
# Assign the same file name to all cpus for now. This must be
# revisited when creating elastic traces for multi processor systems.
for cpu in cpu_list:
# Attach the elastic trace probe listener. Set the protobuf trace
# file names. Set the dependency window size equal to the cpu it
# is attached to.
cpu.traceListener = m5.objects.ElasticTrace(
instFetchTraceFile = options.inst_trace_file,
dataDepTraceFile = options.data_trace_file,
depWindowSize = 3 * cpu.numROBEntries)
# Make the number of entries in the ROB, LQ and SQ very
# large so that there are no stalls due to resource
# limitation as such stalls will get captured in the trace
# as compute delay. For replay, ROB, LQ and SQ sizes are
# modelled in the Trace CPU.
cpu.numROBEntries = 512;
cpu.LQEntries = 128;
cpu.SQEntries = 128;
else:
fatal("%s does not support data dependency tracing. Use a CPU model of"
" type or inherited from DerivO3CPU.", cpu_cls)
# The ARM detailed CPU is special in the sense that it doesn't exist
# in the normal object hierarchy, so we have to add it manually.
try:

View file

@ -187,6 +187,11 @@ def config_mem(options, system):
cls = get(options.mem_type)
mem_ctrls = []
if options.elastic_trace_en and not issubclass(cls, \
m5.objects.SimpleMemory):
fatal("When elastic trace is enabled, configure mem-type as "
"simple-mem.")
# The default behaviour is to interleave memory channels on 128
# byte granularity, or cache line granularity if larger than 128
# byte. This value is based on the locality seen across a large
@ -206,6 +211,11 @@ def config_mem(options, system):
options.mem_ranks:
mem_ctrl.ranks_per_channel = options.mem_ranks
if options.elastic_trace_en:
mem_ctrl.latency = '1ns'
print "For elastic trace, over-riding Simple Memory " \
"latency to 1ns."
mem_ctrls.append(mem_ctrl)
subsystem.mem_ctrls = mem_ctrls

View file

@ -87,6 +87,19 @@ def addCommonOptions(parser):
Only used if multiple programs are specified. If true,
then the number of threads per cpu is same as the
number of programs.""")
parser.add_option("--elastic-trace-en", action="store_true",
help="""Enable capture of data dependency and instruction
fetch traces using elastic trace probe.""")
# Trace file paths input to trace probe in a capture simulation and input
# to Trace CPU in a replay simulation
parser.add_option("--inst-trace-file", action="store", type="string",
help="""Instruction fetch trace file input to
Elastic Trace probe in a capture simulation and
Trace CPU in a replay simulation""", default="")
parser.add_option("--data-trace-file", action="store", type="string",
help="""Data dependency trace file input to
Elastic Trace probe in a capture simulation and
Trace CPU in a replay simulation""", default="")
# Memory Options
parser.add_option("--list-mem-types",

View file

@ -462,7 +462,8 @@ def run(options, root, testsys, cpu_class):
switch_cpus[i].system = testsys
switch_cpus[i].workload = testsys.cpu[i].workload
switch_cpus[i].clk_domain = testsys.cpu[i].clk_domain
switch_cpus[i].progress_interval = testsys.cpu[i].progress_interval
switch_cpus[i].progress_interval = \
testsys.cpu[i].progress_interval
# simulation period
if options.maxinsts:
switch_cpus[i].max_insts_any_thread = options.maxinsts
@ -470,6 +471,11 @@ def run(options, root, testsys, cpu_class):
if options.checker:
switch_cpus[i].addCheckerCpu()
# If elastic tracing is enabled attach the elastic trace probe
# to the switch CPUs
if options.elastic_trace_en:
CpuConfig.config_etrace(cpu_class, switch_cpus, options)
testsys.switch_cpus = switch_cpus
switch_cpu_list = [(testsys.cpu[i], switch_cpus[i]) for i in xrange(np)]

View file

@ -101,6 +101,7 @@ system.mmap_using_noreserve = True
options.mem_channels = 1
options.external_memory_system = 0
options.tlm_memory = 0
options.elastic_trace_en = 0
MemConfig.config_mem(options, system)
# the following assumes that we are using the native DRAM

View file

@ -0,0 +1,119 @@
# Copyright (c) 2015 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Radhika Jagtap
# Basic elastic traces replay script that configures a Trace CPU
import optparse
from m5.util import addToPath, fatal
addToPath('../common')
import Options
import Simulation
import CacheConfig
import MemConfig
from Caches import *
parser = optparse.OptionParser()
Options.addCommonOptions(parser)
if '--ruby' in sys.argv:
print "This script does not support Ruby configuration, mainly"\
" because Trace CPU has been tested only with classic memory system"
sys.exit(1)
(options, args) = parser.parse_args()
if args:
print "Error: script doesn't take any positional arguments"
sys.exit(1)
numThreads = 1
if options.cpu_type != "trace":
fatal("This is a script for elastic trace replay simulation, use "\
"--cpu-type=trace\n");
if options.num_cpus > 1:
fatal("This script does not support multi-processor trace replay.\n")
# In this case FutureClass will be None as there is not fast forwarding or
# switching
(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options)
CPUClass.numThreads = numThreads
system = System(cpu = CPUClass(cpu_id=0),
mem_mode = test_mem_mode,
mem_ranges = [AddrRange(options.mem_size)],
cache_line_size = options.cacheline_size)
# Create a top-level voltage domain
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
# Create a source clock for the system. This is used as the clock period for
# xbar and memory
system.clk_domain = SrcClockDomain(clock = options.sys_clock,
voltage_domain = system.voltage_domain)
# Create a CPU voltage domain
system.cpu_voltage_domain = VoltageDomain()
# Create a separate clock domain for the CPUs. In case of Trace CPUs this clock
# is actually used only by the caches connected to the CPU.
system.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
voltage_domain =
system.cpu_voltage_domain)
# All cpus belong to a common cpu_clk_domain, therefore running at a common
# frequency.
for cpu in system.cpu:
cpu.clk_domain = system.cpu_clk_domain
# Assign input trace files to the Trace CPU
system.cpu.instTraceFile=options.inst_trace_file
system.cpu.dataTraceFile=options.data_trace_file
# Configure the classic memory system options
MemClass = Simulation.setMemClass(options)
system.membus = SystemXBar()
system.system_port = system.membus.slave
CacheConfig.config_cache(options, system)
MemConfig.config_mem(options, system)
root = Root(full_system = False, system = system)
Simulation.run(options, root, system, FutureClass)

View file

@ -214,6 +214,17 @@ def build_test_system(np):
test_sys.cpu[i].addCheckerCpu()
test_sys.cpu[i].createThreads()
# If elastic tracing is enabled when not restoring from checkpoint and
# when not fast forwarding using the atomic cpu, then check that the
# TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check
# passes then attach the elastic trace probe.
# If restoring from checkpoint or fast forwarding, the code that does this for
# FutureCPUClass is in the Simulation module. If the check passes then the
# elastic trace probe is attached to the switch CPUs.
if options.elastic_trace_en and options.checkpoint_restore == None and \
not options.fast_forward:
CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options)
CacheConfig.config_cache(options, test_sys)
MemConfig.config_mem(options, test_sys)

View file

@ -58,6 +58,7 @@ import Options
import Ruby
import Simulation
import CacheConfig
import CpuConfig
import MemConfig
from Caches import *
from cpu2000 import *
@ -196,6 +197,11 @@ system.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
voltage_domain =
system.cpu_voltage_domain)
# If elastic tracing is enabled, then configure the cpu and attach the elastic
# trace probe
if options.elastic_trace_en:
CpuConfig.config_etrace(CPUClass, system.cpu, options)
# All cpus belong to a common cpu_clk_domain, therefore running at a common
# frequency.
for cpu in system.cpu: