306 lines
10 KiB
Python
306 lines
10 KiB
Python
|
# Copyright (c) 2015 ARM Limited
|
||
|
# All rights reserved.
|
||
|
#
|
||
|
# The license below extends only to copyright in the software and shall
|
||
|
# not be construed as granting a license to any other intellectual
|
||
|
# property including but not limited to intellectual property relating
|
||
|
# to a hardware implementation of the functionality of the software
|
||
|
# licensed hereunder. You may use the software subject to the license
|
||
|
# terms below provided that you ensure that this notice is replicated
|
||
|
# unmodified and in its entirety in all distributions of the software,
|
||
|
# modified or unmodified, in source code or in binary form.
|
||
|
#
|
||
|
# Redistribution and use in source and binary forms, with or without
|
||
|
# modification, are permitted provided that the following conditions are
|
||
|
# met: redistributions of source code must retain the above copyright
|
||
|
# notice, this list of conditions and the following disclaimer;
|
||
|
# redistributions in binary form must reproduce the above copyright
|
||
|
# notice, this list of conditions and the following disclaimer in the
|
||
|
# documentation and/or other materials provided with the distribution;
|
||
|
# neither the name of the copyright holders nor the names of its
|
||
|
# contributors may be used to endorse or promote products derived from
|
||
|
# this software without specific prior written permission.
|
||
|
#
|
||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
#
|
||
|
# Authors: Andreas Hansson
|
||
|
|
||
|
import gzip
|
||
|
import optparse
|
||
|
import os
|
||
|
|
||
|
import m5
|
||
|
from m5.objects import *
|
||
|
from m5.util import addToPath
|
||
|
from m5.internal.stats import periodicStatDump
|
||
|
|
||
|
addToPath('../common')
|
||
|
import MemConfig
|
||
|
|
||
|
addToPath('../../util')
|
||
|
import protolib
|
||
|
|
||
|
# this script is helpful to observe the memory latency for various
|
||
|
# levels in a cache hierarchy, and various cache and memory
|
||
|
# configurations, in essence replicating the lmbench lat_mem_rd thrash
|
||
|
# behaviour
|
||
|
|
||
|
# import the packet proto definitions, and if they are not found,
|
||
|
# attempt to generate them automatically
|
||
|
try:
|
||
|
import packet_pb2
|
||
|
except:
|
||
|
print "Did not find packet proto definitions, attempting to generate"
|
||
|
from subprocess import call
|
||
|
error = call(['protoc', '--python_out=configs/dram',
|
||
|
'--proto_path=src/proto', 'src/proto/packet.proto'])
|
||
|
if not error:
|
||
|
print "Generated packet proto definitions"
|
||
|
|
||
|
try:
|
||
|
import google.protobuf
|
||
|
except:
|
||
|
print "Please install the Python protobuf module"
|
||
|
exit(-1)
|
||
|
|
||
|
import packet_pb2
|
||
|
else:
|
||
|
print "Failed to import packet proto definitions"
|
||
|
exit(-1)
|
||
|
|
||
|
parser = optparse.OptionParser()
|
||
|
|
||
|
parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
|
||
|
choices=MemConfig.mem_names(),
|
||
|
help = "type of memory to use")
|
||
|
parser.add_option("--mem-size", action="store", type="string",
|
||
|
default="16MB",
|
||
|
help="Specify the memory size")
|
||
|
parser.add_option("--reuse-trace", action="store_true",
|
||
|
help="Prevent generation of traces and reuse existing")
|
||
|
|
||
|
(options, args) = parser.parse_args()
|
||
|
|
||
|
if args:
|
||
|
print "Error: script doesn't take any positional arguments"
|
||
|
sys.exit(1)
|
||
|
|
||
|
# start by creating the system itself, using a multi-layer 2.0 GHz
|
||
|
# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
|
||
|
# amounts to 42.7 GByte/s per layer and thus per port
|
||
|
system = System(membus = SystemXBar(width = 32))
|
||
|
system.clk_domain = SrcClockDomain(clock = '2.0GHz',
|
||
|
voltage_domain =
|
||
|
VoltageDomain(voltage = '1V'))
|
||
|
|
||
|
mem_range = AddrRange(options.mem_size)
|
||
|
system.mem_ranges = [mem_range]
|
||
|
|
||
|
# do not worry about reserving space for the backing store
|
||
|
system.mmap_using_noreserve = True
|
||
|
|
||
|
# currently not exposed as command-line options, set here for now
|
||
|
options.mem_channels = 1
|
||
|
options.mem_ranks = 1
|
||
|
options.external_memory_system = 0
|
||
|
options.tlm_memory = 0
|
||
|
options.elastic_trace_en = 0
|
||
|
|
||
|
MemConfig.config_mem(options, system)
|
||
|
|
||
|
# there is no point slowing things down by saving any data
|
||
|
for ctrl in system.mem_ctrls:
|
||
|
ctrl.null = True
|
||
|
|
||
|
# the following assumes that we are using the native DRAM
|
||
|
# controller, check to be sure
|
||
|
if isinstance(ctrl, m5.objects.DRAMCtrl):
|
||
|
# make the DRAM refresh interval sufficiently infinite to avoid
|
||
|
# latency spikes
|
||
|
ctrl.tREFI = '100s'
|
||
|
|
||
|
# use the same concept as the utilisation sweep, and print the config
|
||
|
# so that we can later read it in
|
||
|
cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg")
|
||
|
cfg_file = open(cfg_file_name, 'w')
|
||
|
|
||
|
# set an appropriate burst length in bytes
|
||
|
burst_size = 64
|
||
|
system.cache_line_size = burst_size
|
||
|
|
||
|
# lazy version to check if an integer is a power of two
|
||
|
def is_pow2(num):
|
||
|
return num != 0 and ((num & (num - 1)) == 0)
|
||
|
|
||
|
# assume we start every range at 0
|
||
|
max_range = int(mem_range.end)
|
||
|
|
||
|
# start at a size of 4 kByte, and go up till we hit the max, increase
|
||
|
# the step every time we hit a power of two
|
||
|
min_range = 4096
|
||
|
ranges = [min_range]
|
||
|
step = 1024
|
||
|
|
||
|
while ranges[-1] < max_range:
|
||
|
new_range = ranges[-1] + step
|
||
|
if is_pow2(new_range):
|
||
|
step *= 2
|
||
|
ranges.append(new_range)
|
||
|
|
||
|
# how many times to repeat the measurement for each data point
|
||
|
iterations = 2
|
||
|
|
||
|
# 150 ns in ticks, this is choosen to be high enough that transactions
|
||
|
# do not pile up in the system, adjust if needed
|
||
|
itt = 150 * 1000
|
||
|
|
||
|
# for every data point, we create a trace containing a random address
|
||
|
# sequence, so that we can play back the same sequence for warming and
|
||
|
# the actual measurement
|
||
|
def create_trace(filename, max_addr, burst_size, itt):
|
||
|
try:
|
||
|
proto_out = gzip.open(filename, 'wb')
|
||
|
except IOError:
|
||
|
print "Failed to open ", filename, " for writing"
|
||
|
exit(-1)
|
||
|
|
||
|
# write the magic number in 4-byte Little Endian, similar to what
|
||
|
# is done in src/proto/protoio.cc
|
||
|
proto_out.write("gem5")
|
||
|
|
||
|
# add the packet header
|
||
|
header = packet_pb2.PacketHeader()
|
||
|
header.obj_id = "lat_mem_rd for range 0:" + str(max_addr)
|
||
|
# assume the default tick rate (1 ps)
|
||
|
header.tick_freq = 1000000000000
|
||
|
protolib.encodeMessage(proto_out, header)
|
||
|
|
||
|
# create a list of every single address to touch
|
||
|
addrs = range(0, max_addr, burst_size)
|
||
|
|
||
|
import random
|
||
|
random.shuffle(addrs)
|
||
|
|
||
|
tick = 0
|
||
|
|
||
|
# create a packet we can re-use for all the addresses
|
||
|
packet = packet_pb2.Packet()
|
||
|
# ReadReq is 1 in src/mem/packet.hh Command enum
|
||
|
packet.cmd = 1
|
||
|
packet.size = int(burst_size)
|
||
|
|
||
|
for addr in addrs:
|
||
|
packet.tick = long(tick)
|
||
|
packet.addr = long(addr)
|
||
|
protolib.encodeMessage(proto_out, packet)
|
||
|
tick = tick + itt
|
||
|
|
||
|
proto_out.close()
|
||
|
|
||
|
# this will take a while, so keep the user informed
|
||
|
print "Generating traces, please wait..."
|
||
|
|
||
|
nxt_range = 0
|
||
|
nxt_state = 0
|
||
|
period = long(itt * (max_range / burst_size))
|
||
|
|
||
|
# now we create the states for each range
|
||
|
for r in ranges:
|
||
|
filename = os.path.join(m5.options.outdir,
|
||
|
'lat_mem_rd%d.trc.gz' % nxt_range)
|
||
|
|
||
|
if not options.reuse_trace:
|
||
|
# create the actual random trace for this range
|
||
|
create_trace(filename, r, burst_size, itt)
|
||
|
|
||
|
# the warming state
|
||
|
cfg_file.write("STATE %d %d TRACE %s 0\n" %
|
||
|
(nxt_state, period, filename))
|
||
|
nxt_state = nxt_state + 1
|
||
|
|
||
|
# the measuring states
|
||
|
for i in range(iterations):
|
||
|
cfg_file.write("STATE %d %d TRACE %s 0\n" %
|
||
|
(nxt_state, period, filename))
|
||
|
nxt_state = nxt_state + 1
|
||
|
|
||
|
nxt_range = nxt_range + 1
|
||
|
|
||
|
cfg_file.write("INIT 0\n")
|
||
|
|
||
|
# go through the states one by one
|
||
|
for state in range(1, nxt_state):
|
||
|
cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state))
|
||
|
|
||
|
cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1))
|
||
|
|
||
|
cfg_file.close()
|
||
|
|
||
|
# create a traffic generator, and point it to the file we just created
|
||
|
system.tgen = TrafficGen(config_file = cfg_file_name)
|
||
|
|
||
|
# add a communication monitor
|
||
|
system.monitor = CommMonitor()
|
||
|
|
||
|
# connect the traffic generator to the system
|
||
|
system.tgen.port = system.monitor.slave
|
||
|
|
||
|
# create the actual cache hierarchy, for now just go with something
|
||
|
# basic to explore some of the options
|
||
|
from Caches import *
|
||
|
|
||
|
# a starting point for an L3 cache
|
||
|
class L3Cache(Cache):
|
||
|
assoc = 16
|
||
|
hit_latency = 40
|
||
|
response_latency = 40
|
||
|
mshrs = 32
|
||
|
tgts_per_mshr = 12
|
||
|
write_buffers = 16
|
||
|
|
||
|
# note that everything is in the same clock domain, 2.0 GHz as
|
||
|
# specified above
|
||
|
system.l1cache = L1_DCache(size = '64kB')
|
||
|
system.monitor.master = system.l1cache.cpu_side
|
||
|
|
||
|
system.l2cache = L2Cache(size = '512kB', writeback_clean = True)
|
||
|
system.l2cache.xbar = L2XBar()
|
||
|
system.l1cache.mem_side = system.l2cache.xbar.slave
|
||
|
system.l2cache.cpu_side = system.l2cache.xbar.master
|
||
|
|
||
|
# make the L3 mostly exclusive, and correspondingly ensure that the L2
|
||
|
# writes back also clean lines to the L3
|
||
|
system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl')
|
||
|
system.l3cache.xbar = L2XBar()
|
||
|
system.l2cache.mem_side = system.l3cache.xbar.slave
|
||
|
system.l3cache.cpu_side = system.l3cache.xbar.master
|
||
|
system.l3cache.mem_side = system.membus.slave
|
||
|
|
||
|
# connect the system port even if it is not used in this example
|
||
|
system.system_port = system.membus.slave
|
||
|
|
||
|
# every period, dump and reset all stats
|
||
|
periodicStatDump(period)
|
||
|
|
||
|
# run Forrest, run!
|
||
|
root = Root(full_system = False, system = system)
|
||
|
root.system.mem_mode = 'timing'
|
||
|
|
||
|
m5.instantiate()
|
||
|
m5.simulate(nxt_state * period)
|
||
|
|
||
|
# print all we need to make sense of the stats output
|
||
|
print "lat_mem_rd with %d iterations, ranges:" % iterations
|
||
|
for r in ranges:
|
||
|
print r
|