diff --git a/configs/dram/lat_mem_rd.py b/configs/dram/lat_mem_rd.py new file mode 100644 index 000000000..87917418d --- /dev/null +++ b/configs/dram/lat_mem_rd.py @@ -0,0 +1,305 @@ +# Copyright (c) 2015 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andreas Hansson + +import gzip +import optparse +import os + +import m5 +from m5.objects import * +from m5.util import addToPath +from m5.internal.stats import periodicStatDump + +addToPath('../common') +import MemConfig + +addToPath('../../util') +import protolib + +# this script is helpful to observe the memory latency for various +# levels in a cache hierarchy, and various cache and memory +# configurations, in essence replicating the lmbench lat_mem_rd thrash +# behaviour + +# import the packet proto definitions, and if they are not found, +# attempt to generate them automatically +try: + import packet_pb2 +except: + print "Did not find packet proto definitions, attempting to generate" + from subprocess import call + error = call(['protoc', '--python_out=configs/dram', + '--proto_path=src/proto', 'src/proto/packet.proto']) + if not error: + print "Generated packet proto definitions" + + try: + import google.protobuf + except: + print "Please install the Python protobuf module" + exit(-1) + + import packet_pb2 + else: + print "Failed to import packet proto definitions" + exit(-1) + +parser = optparse.OptionParser() + +parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64", + choices=MemConfig.mem_names(), + help = "type of memory to use") +parser.add_option("--mem-size", action="store", type="string", + default="16MB", + help="Specify the memory size") +parser.add_option("--reuse-trace", action="store_true", + help="Prevent generation of traces and reuse existing") + +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +# start by creating the system itself, using a multi-layer 2.0 GHz +# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which +# amounts to 42.7 GByte/s per layer and thus per port +system = System(membus = SystemXBar(width = 32)) +system.clk_domain = SrcClockDomain(clock = '2.0GHz', + voltage_domain = + VoltageDomain(voltage = '1V')) + +mem_range = AddrRange(options.mem_size) +system.mem_ranges = [mem_range] + +# do not worry about reserving space for the backing store +system.mmap_using_noreserve = True + +# currently not exposed as command-line options, set here for now +options.mem_channels = 1 +options.mem_ranks = 1 +options.external_memory_system = 0 +options.tlm_memory = 0 +options.elastic_trace_en = 0 + +MemConfig.config_mem(options, system) + +# there is no point slowing things down by saving any data +for ctrl in system.mem_ctrls: + ctrl.null = True + + # the following assumes that we are using the native DRAM + # controller, check to be sure + if isinstance(ctrl, m5.objects.DRAMCtrl): + # make the DRAM refresh interval sufficiently infinite to avoid + # latency spikes + ctrl.tREFI = '100s' + +# use the same concept as the utilisation sweep, and print the config +# so that we can later read it in +cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg") +cfg_file = open(cfg_file_name, 'w') + +# set an appropriate burst length in bytes +burst_size = 64 +system.cache_line_size = burst_size + +# lazy version to check if an integer is a power of two +def is_pow2(num): + return num != 0 and ((num & (num - 1)) == 0) + +# assume we start every range at 0 +max_range = int(mem_range.end) + +# start at a size of 4 kByte, and go up till we hit the max, increase +# the step every time we hit a power of two +min_range = 4096 +ranges = [min_range] +step = 1024 + +while ranges[-1] < max_range: + new_range = ranges[-1] + step + if is_pow2(new_range): + step *= 2 + ranges.append(new_range) + +# how many times to repeat the measurement for each data point +iterations = 2 + +# 150 ns in ticks, this is choosen to be high enough that transactions +# do not pile up in the system, adjust if needed +itt = 150 * 1000 + +# for every data point, we create a trace containing a random address +# sequence, so that we can play back the same sequence for warming and +# the actual measurement +def create_trace(filename, max_addr, burst_size, itt): + try: + proto_out = gzip.open(filename, 'wb') + except IOError: + print "Failed to open ", filename, " for writing" + exit(-1) + + # write the magic number in 4-byte Little Endian, similar to what + # is done in src/proto/protoio.cc + proto_out.write("gem5") + + # add the packet header + header = packet_pb2.PacketHeader() + header.obj_id = "lat_mem_rd for range 0:" + str(max_addr) + # assume the default tick rate (1 ps) + header.tick_freq = 1000000000000 + protolib.encodeMessage(proto_out, header) + + # create a list of every single address to touch + addrs = range(0, max_addr, burst_size) + + import random + random.shuffle(addrs) + + tick = 0 + + # create a packet we can re-use for all the addresses + packet = packet_pb2.Packet() + # ReadReq is 1 in src/mem/packet.hh Command enum + packet.cmd = 1 + packet.size = int(burst_size) + + for addr in addrs: + packet.tick = long(tick) + packet.addr = long(addr) + protolib.encodeMessage(proto_out, packet) + tick = tick + itt + + proto_out.close() + +# this will take a while, so keep the user informed +print "Generating traces, please wait..." + +nxt_range = 0 +nxt_state = 0 +period = long(itt * (max_range / burst_size)) + +# now we create the states for each range +for r in ranges: + filename = os.path.join(m5.options.outdir, + 'lat_mem_rd%d.trc.gz' % nxt_range) + + if not options.reuse_trace: + # create the actual random trace for this range + create_trace(filename, r, burst_size, itt) + + # the warming state + cfg_file.write("STATE %d %d TRACE %s 0\n" % + (nxt_state, period, filename)) + nxt_state = nxt_state + 1 + + # the measuring states + for i in range(iterations): + cfg_file.write("STATE %d %d TRACE %s 0\n" % + (nxt_state, period, filename)) + nxt_state = nxt_state + 1 + + nxt_range = nxt_range + 1 + +cfg_file.write("INIT 0\n") + +# go through the states one by one +for state in range(1, nxt_state): + cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state)) + +cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1)) + +cfg_file.close() + +# create a traffic generator, and point it to the file we just created +system.tgen = TrafficGen(config_file = cfg_file_name) + +# add a communication monitor +system.monitor = CommMonitor() + +# connect the traffic generator to the system +system.tgen.port = system.monitor.slave + +# create the actual cache hierarchy, for now just go with something +# basic to explore some of the options +from Caches import * + +# a starting point for an L3 cache +class L3Cache(Cache): + assoc = 16 + hit_latency = 40 + response_latency = 40 + mshrs = 32 + tgts_per_mshr = 12 + write_buffers = 16 + +# note that everything is in the same clock domain, 2.0 GHz as +# specified above +system.l1cache = L1_DCache(size = '64kB') +system.monitor.master = system.l1cache.cpu_side + +system.l2cache = L2Cache(size = '512kB', writeback_clean = True) +system.l2cache.xbar = L2XBar() +system.l1cache.mem_side = system.l2cache.xbar.slave +system.l2cache.cpu_side = system.l2cache.xbar.master + +# make the L3 mostly exclusive, and correspondingly ensure that the L2 +# writes back also clean lines to the L3 +system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl') +system.l3cache.xbar = L2XBar() +system.l2cache.mem_side = system.l3cache.xbar.slave +system.l3cache.cpu_side = system.l3cache.xbar.master +system.l3cache.mem_side = system.membus.slave + +# connect the system port even if it is not used in this example +system.system_port = system.membus.slave + +# every period, dump and reset all stats +periodicStatDump(period) + +# run Forrest, run! +root = Root(full_system = False, system = system) +root.system.mem_mode = 'timing' + +m5.instantiate() +m5.simulate(nxt_state * period) + +# print all we need to make sense of the stats output +print "lat_mem_rd with %d iterations, ranges:" % iterations +for r in ranges: + print r diff --git a/util/dram_lat_mem_rd_plot.py b/util/dram_lat_mem_rd_plot.py new file mode 100755 index 000000000..72e1528c3 --- /dev/null +++ b/util/dram_lat_mem_rd_plot.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python + +# Copyright (c) 2015 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andreas Hansson + +try: + import matplotlib.pyplot as plt + import matplotlib as mpl + import numpy as np +except ImportError: + print "Failed to import matplotlib and numpy" + exit(-1) + +import sys +import re + +# This script is intended to post process and plot the output from +# running configs/dram/lat_mem_rd.py, as such it parses the simout and +# stats.txt to get the relevant data points. +def main(): + + if len(sys.argv) != 2: + print "Usage: ", sys.argv[0], "" + exit(-1) + + try: + stats = open(sys.argv[1] + '/stats.txt', 'r') + except IOError: + print "Failed to open ", sys.argv[1] + '/stats.txt', " for reading" + exit(-1) + + try: + simout = open(sys.argv[1] + '/simout', 'r') + except IOError: + print "Failed to open ", sys.argv[1] + '/simout', " for reading" + exit(-1) + + # Get the address ranges + got_ranges = False + ranges = [] + + iterations = 1 + + for line in simout: + if got_ranges: + ranges.append(int(line) / 1024) + + match = re.match("lat_mem_rd with (\d+) iterations, ranges:.*", line) + if match: + got_ranges = True + iterations = int(match.groups(0)[0]) + + simout.close() + + if not got_ranges: + print "Failed to get address ranges, ensure simout is up-to-date" + exit(-1) + + # Now parse the stats + raw_rd_lat = [] + + for line in stats: + match = re.match(".*readLatencyHist::mean\s+(.+)\s+#.*", line) + if match: + raw_rd_lat.append(float(match.groups(0)[0]) / 1000) + stats.close() + + # The stats also contain the warming, so filter the latency stats + i = 0 + filtered_rd_lat = [] + for l in raw_rd_lat: + if i % (iterations + 1) == 0: + pass + else: + filtered_rd_lat.append(l) + i = i + 1 + + # Next we need to take care of the iterations + rd_lat = [] + for i in range(iterations): + rd_lat.append(filtered_rd_lat[i::iterations]) + + final_rd_lat = map(lambda p: min(p), zip(*rd_lat)) + + # Sanity check + if not (len(ranges) == len(final_rd_lat)): + print "Address ranges (%d) and read latency (%d) do not match" % \ + (len(ranges), len(final_rd_lat)) + exit(-1) + + for (r, l) in zip(ranges, final_rd_lat): + print r, round(l, 2) + + # lazy version to check if an integer is a power of two + def is_pow2(num): + return num != 0 and ((num & (num - 1)) == 0) + + plt.semilogx(ranges, final_rd_lat) + + # create human readable labels + xticks_locations = [r for r in ranges if is_pow2(r)] + xticks_labels = [] + for x in xticks_locations: + if x < 1024: + xticks_labels.append('%d kB' % x) + else: + xticks_labels.append('%d MB' % (x / 1024)) + plt.xticks(xticks_locations, xticks_labels, rotation=-45) + + plt.minorticks_off() + plt.xlim((xticks_locations[0], xticks_locations[-1])) + plt.ylabel("Latency (ns)") + plt.grid(True) + plt.show() + +if __name__ == "__main__": + main()