From 4d77ea7a5783d1de87a8eb804b17a6ef352998ce Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 23 Mar 2010 08:50:57 -0700 Subject: [PATCH 1/5] cpu: fix exec tracing memory corruption bug Accessing traceData (to call setAddress() and/or setData()) after initiating a timing translation was causing crashes, since a failed translation could delete the traceData object before returning. It turns out that there was never a need to access traceData after initiating the translation, as the traced data was always available earlier; this ordering was merely historical. Furthermore, traceData->setAddress() and traceData->setData() were being called both from the CPU model and the ISA definition, often redundantly. This patch standardizes all setAddress and setData calls for memory instructions to be in the CPU models and not in the ISA definition. It also moves those calls above the translation calls to eliminate the crashes. --- src/arch/alpha/isa/mem.isa | 6 ------ src/arch/arm/isa/formats/mem.isa | 2 -- src/arch/mips/isa/formats/mem.isa | 8 -------- src/arch/mips/isa/formats/util.isa | 3 --- src/arch/power/isa/formats/mem.isa | 2 -- src/arch/power/isa/formats/util.isa | 3 --- src/cpu/inorder/resources/cache_unit.cc | 9 +++++++++ src/cpu/simple/atomic.cc | 7 +------ src/cpu/simple/base.cc | 21 +++++++++++++++++++++ src/cpu/simple/base.hh | 12 ++---------- src/cpu/simple/timing.cc | 22 ++++++++++------------ 11 files changed, 43 insertions(+), 52 deletions(-) diff --git a/src/arch/alpha/isa/mem.isa b/src/arch/alpha/isa/mem.isa index b1703221f..efff0eac7 100644 --- a/src/arch/alpha/isa/mem.isa +++ b/src/arch/alpha/isa/mem.isa @@ -275,7 +275,6 @@ def template StoreExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -310,7 +309,6 @@ def template StoreCondExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, &write_result); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -344,7 +342,6 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } return fault; @@ -478,9 +475,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) - # add hook to get effective addresses into execution trace output. - ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n' - # Some CPU models execute the memory operation as an atomic unit, # while others want to separate them into an effective address # computation and a memory access operation. As a result, we need diff --git a/src/arch/arm/isa/formats/mem.isa b/src/arch/arm/isa/formats/mem.isa index 0b0a4c9fa..2f66ca54e 100644 --- a/src/arch/arm/isa/formats/mem.isa +++ b/src/arch/arm/isa/formats/mem.isa @@ -172,7 +172,6 @@ def template StoreExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -204,7 +203,6 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } // Need to write back any potential address register update diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa index 161a52b06..411cc5fda 100644 --- a/src/arch/mips/isa/formats/mem.isa +++ b/src/arch/mips/isa/formats/mem.isa @@ -305,7 +305,6 @@ def template StoreExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -342,7 +341,6 @@ def template StoreFPExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -377,7 +375,6 @@ def template StoreCondExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, &write_result); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -411,7 +408,6 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } return fault; @@ -435,8 +431,6 @@ def template StoreCompleteAcc {{ if (fault == NoFault) { %(op_wb)s; - - if (traceData) { traceData->setData(getMemData(xc, pkt)); } } return fault; @@ -459,8 +453,6 @@ def template StoreCompleteAcc {{ if (fault == NoFault) { %(op_wb)s; - - if (traceData) { traceData->setData(getMemData(xc, pkt)); } } return fault; diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa index a6edffeda..708338074 100644 --- a/src/arch/mips/isa/formats/util.isa +++ b/src/arch/mips/isa/formats/util.isa @@ -38,9 +38,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) - # add hook to get effective addresses into execution trace output. - ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n' - # Some CPU models execute the memory operation as an atomic unit, # while others want to separate them into an effective address # computation and a memory access operation. As a result, we need diff --git a/src/arch/power/isa/formats/mem.isa b/src/arch/power/isa/formats/mem.isa index 1be49c2f7..3bcf0633a 100644 --- a/src/arch/power/isa/formats/mem.isa +++ b/src/arch/power/isa/formats/mem.isa @@ -166,7 +166,6 @@ def template StoreExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -196,7 +195,6 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } // Need to write back any potential address register update diff --git a/src/arch/power/isa/formats/util.isa b/src/arch/power/isa/formats/util.isa index ab1e530b2..8fd7f7daa 100644 --- a/src/arch/power/isa/formats/util.isa +++ b/src/arch/power/isa/formats/util.isa @@ -97,9 +97,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) - # add hook to get effective addresses into execution trace output. - ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n' - # Generate InstObjParams for the memory access. iop = InstObjParams(name, Name, base_class, {'ea_code': ea_code, diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 376ea8d26..d12f11a2c 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -443,6 +443,10 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) //The size of the data we're trying to read. int dataSize = sizeof(T); + if (inst->traceData) { + inst->traceData->setAddr(addr); + } + if (inst->split2ndAccess) { dataSize = inst->split2ndSize; cache_req->splitAccess = true; @@ -541,6 +545,11 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, //The size of the data we're trying to read. int dataSize = sizeof(T); + if (inst->traceData) { + inst->traceData->setAddr(addr); + inst->traceData->setData(data); + } + if (inst->split2ndAccess) { dataSize = inst->split2ndSize; cache_req->splitAccess = true; diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 05b4ca3e2..7740434d8 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -451,6 +451,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) if (traceData) { traceData->setAddr(addr); + traceData->setData(data); } //The block size of our peer. @@ -530,12 +531,6 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) //stop now. if (fault != NoFault || secondAddr <= addr) { - // If the write needs to have a fault on the access, consider - // calling changeStatus() and changing it to "bad addr write" - // or something. - if (traceData) { - traceData->setData(gtoh(data)); - } if (req->isLocked() && fault == NoFault) { assert(locked); locked = false; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 0104e1b1f..17ba6a10b 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -205,6 +205,27 @@ change_thread_state(ThreadID tid, int activate, int priority) { } +void +BaseSimpleCPU::prefetch(Addr addr, unsigned flags) +{ + if (traceData) { + traceData->setAddr(addr); + } + + // need to do this... +} + +void +BaseSimpleCPU::writeHint(Addr addr, int size, unsigned flags) +{ + if (traceData) { + traceData->setAddr(addr); + } + + // need to do this... +} + + Fault BaseSimpleCPU::copySrcTranslate(Addr src) { diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 39961fb88..87e211521 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -232,16 +232,8 @@ class BaseSimpleCPU : public BaseCPU Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n"); M5_DUMMY_RETURN} - void prefetch(Addr addr, unsigned flags) - { - // need to do this... - } - - void writeHint(Addr addr, int size, unsigned flags) - { - // need to do this... - } - + void prefetch(Addr addr, unsigned flags); + void writeHint(Addr addr, int size, unsigned flags); Fault copySrcTranslate(Addr src); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 221cb0d0d..7583c09e6 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -426,6 +426,10 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) int data_size = sizeof(T); BaseTLB::Mode mode = BaseTLB::Read; + if (traceData) { + traceData->setAddr(addr); + } + RequestPtr req = new Request(asid, addr, data_size, flags, pc, _cpuId, tid); @@ -460,11 +464,6 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) thread->dtb->translateTiming(req, tc, translation, mode); } - if (traceData) { - traceData->setData(data); - traceData->setAddr(addr); - } - return NoFault; } @@ -548,6 +547,11 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) int data_size = sizeof(T); BaseTLB::Mode mode = BaseTLB::Write; + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + RequestPtr req = new Request(asid, addr, data_size, flags, pc, _cpuId, tid); @@ -584,13 +588,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) thread->dtb->translateTiming(req, tc, translation, mode); } - if (traceData) { - traceData->setAddr(req->getVaddr()); - traceData->setData(data); - } - - // If the write needs to have a fault on the access, consider calling - // changeStatus() and changing it to "bad addr write" or something. + // Translation faults will be returned via finishTranslation() return NoFault; } From f066bfc2f5944353ea11fd4c6853179c4ea1af78 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 23 Mar 2010 08:50:59 -0700 Subject: [PATCH 2/5] cpu: get rid of uncached access "events" These recordEvent() calls could cause crashes since they access the req pointer after it's potentially been deleted during a failed translation call. (Similar problem to the traceData bug fixed in the previous cset.) Moving them above the translation call (as was done recentlyi in cset 8b2b8e5e7d35) avoids the crash but doesn't work, since at that point we don't know if the access is uncached or not. It's not clear why these calls are there, and no one seems to use them, so we'll just delete them. If they are needed, they should be moved to somewhere that's guaranteed to be after the translation completes but before the request is possibly deleted, e.g., in finishTranslation(). --- src/cpu/simple/atomic.cc | 8 -------- src/cpu/simple/timing.cc | 8 -------- 2 files changed, 16 deletions(-) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 7740434d8..d96adffd5 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -351,10 +351,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) } } - // This will need a new way to tell if it has a dcache attached. - if (req->isUncacheable()) - recordEvent("Uncached Read"); - //If there's a fault, return it if (fault != NoFault) { if (req->isPrefetch()) { @@ -523,10 +519,6 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) } } - // This will need a new way to tell if it's hooked up to a cache or not. - if (req->isUncacheable()) - recordEvent("Uncached Write"); - //If there's a fault or we don't need to access a second cache line, //stop now. if (fault != NoFault || secondAddr <= addr) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 7583c09e6..b8fc5ab84 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -436,10 +436,6 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) Addr split_addr = roundDown(addr + data_size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); - // This will need a new way to tell if it's hooked up to a cache or not. - if (req->isUncacheable()) - recordEvent("Uncached Write"); - _status = DTBWaitResponse; if (split_addr > addr) { RequestPtr req1, req2; @@ -558,10 +554,6 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) Addr split_addr = roundDown(addr + data_size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); - // This will need a new way to tell if it's hooked up to a cache or not. - if (req->isUncacheable()) - recordEvent("Uncached Write"); - T *dataP = new T; *dataP = TheISA::htog(data); _status = DTBWaitResponse; From d2eb58967562d04044a5977787a312e9b259b9b7 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Tue, 23 Mar 2010 16:31:47 -0700 Subject: [PATCH 3/5] regress: add some new options add -n/--no-exec which doesn't execute scons, but just prints the command line add -j0 which tries to calculate how many cpus you have add -D/--build-dir to specify a build directory other than ./build --- util/regress | 78 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/util/regress b/util/regress index 1d0b9049a..a74bd09c3 100755 --- a/util/regress +++ b/util/regress @@ -36,28 +36,29 @@ from subprocess import call progname = os.path.basename(sys.argv[0]) optparser = optparse.OptionParser() -optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', - default=False, - help='echo commands before executing') -optparser.add_option('--builds', dest='builds', - default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \ - 'ALPHA_SE_MESI_CMP_directory,' \ - 'ALPHA_SE_MOESI_CMP_directory,' \ - 'ALPHA_SE_MOESI_CMP_token,' \ - 'ALPHA_FS,MIPS_SE,' \ - 'POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE', - help='comma-separated list of build targets to test ' - " (default: '%default')" ) -optparser.add_option('--variants', dest='variants', - default='fast', - help='comma-separated list of build variants to test ' - " (default: '%default')" ) -optparser.add_option('--scons-opts', dest='scons_opts', default='', - help='scons options', metavar='OPTS') -optparser.add_option('-j', '--jobs', type='int', default=1, - help='number of parallel jobs to use') -optparser.add_option('-k', '--keep-going', action='store_true', - help='keep going after errors') +add_option = optparser.add_option +add_option('-v', '--verbose', dest='verbose', action='store_true', + default=False, + help='echo commands before executing') +add_option('--builds', dest='builds', + default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \ + 'ALPHA_SE_MESI_CMP_directory,' \ + 'ALPHA_SE_MOESI_CMP_directory,' \ + 'ALPHA_SE_MOESI_CMP_token,' \ + 'ALPHA_FS,MIPS_SE,POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE', + help="comma-separated build targets to test (default: '%default')") +add_option('--variants', dest='variants', default='fast', + help="comma-separated build variants to test (default: '%default')") +add_option('--scons-opts', dest='scons_opts', default='', metavar='OPTS', + help='scons options') +add_option('-j', '--jobs', type='int', default=1, + help='number of parallel jobs to use') +add_option('-k', '--keep-going', action='store_true', + help='keep going after errors') +add_option('-D', '--build-dir', default='', + help='build directory location') +add_option('-n', "--no-exec", default=False, action='store_true', + help="don't actually invoke scons, just echo SCons command line") (options, tests) = optparser.parse_args() @@ -66,6 +67,8 @@ optparser.add_option('-k', '--keep-going', action='store_true', builds = options.builds.split(',') variants = options.variants.split(',') +options.build_dir = os.path.join(options.build_dir, 'build') + # Call os.system() and raise exception if return status is non-zero def system(cmd): try: @@ -91,11 +94,11 @@ def shellquote(s): if not tests: print "No tests specified, just building binaries." - targets = ['build/%s/m5.%s' % (build, variant) + targets = ['%s/%s/m5.%s' % (options.build_dir, build, variant) for build in builds for variant in variants] elif 'all' in tests: - targets = ['build/%s/tests/%s' % (build, variant) + targets = ['%s/%s/tests/%s' % (options.build_dir, build, variant) for build in builds for variant in variants] else: @@ -103,17 +106,36 @@ else: # If we ever get a quick SPARC_FS test, this code should be removed if 'quick' in tests and 'SPARC_FS' in builds: builds.remove('SPARC_FS') - targets = ['build/%s/tests/%s/%s' % (build, variant, test) + targets = ['%s/%s/tests/%s/%s' % (options.build_dir, build, variant, test) for build in builds for variant in variants for test in tests] +def cpu_count(): + if 'bsd' in sys.platform or sys.platform == 'darwin': + try: + return int(os.popen('sysctl -n hw.ncpu').read()) + except ValueError: + pass + else: + try: + return os.sysconf('SC_NPROCESSORS_ONLN') + except (ValueError, OSError, AttributeError): + pass + + raise NotImplementedError('cannot determine number of cpus') + scons_opts = options.scons_opts if options.jobs != 1: + if options.jobs == 0: + options.jobs = cpu_count() scons_opts += ' -j %d' % options.jobs if options.keep_going: scons_opts += ' -k' -system('scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets))) - -sys.exit(0) +cmd = 'scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets)) +if options.no_exec: + print cmd +else: + system(cmd) + sys.exit(0) From a2652a048aaba65302d5d191754d873fc94fb226 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Tue, 23 Mar 2010 22:49:43 -0700 Subject: [PATCH 4/5] ruby: continue style pass --- .../ruby/profiler/AccessTraceForAddress.cc | 155 ++- .../ruby/profiler/AccessTraceForAddress.hh | 106 +- src/mem/ruby/profiler/AddressProfiler.cc | 474 ++++---- src/mem/ruby/profiler/AddressProfiler.hh | 119 +- src/mem/ruby/profiler/CacheProfiler.cc | 163 +-- src/mem/ruby/profiler/CacheProfiler.hh | 90 +- src/mem/ruby/profiler/MemCntrlProfiler.cc | 197 ++-- src/mem/ruby/profiler/MemCntrlProfiler.hh | 133 +-- src/mem/ruby/profiler/Profiler.cc | 1011 +++++++++-------- src/mem/ruby/profiler/Profiler.hh | 259 +++-- src/mem/ruby/profiler/StoreTrace.cc | 197 ++-- src/mem/ruby/profiler/StoreTrace.hh | 112 +- 12 files changed, 1507 insertions(+), 1509 deletions(-) diff --git a/src/mem/ruby/profiler/AccessTraceForAddress.cc b/src/mem/ruby/profiler/AccessTraceForAddress.cc index 48b28b735..978b72982 100644 --- a/src/mem/ruby/profiler/AccessTraceForAddress.cc +++ b/src/mem/ruby/profiler/AccessTraceForAddress.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,100 +26,96 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - */ - -#include "mem/ruby/profiler/AccessTraceForAddress.hh" #include "mem/ruby/common/Histogram.hh" +#include "mem/ruby/profiler/AccessTraceForAddress.hh" AccessTraceForAddress::AccessTraceForAddress() { - m_histogram_ptr = NULL; + m_histogram_ptr = NULL; } AccessTraceForAddress::AccessTraceForAddress(const Address& addr) { - m_addr = addr; - m_total = 0; - m_loads = 0; - m_stores = 0; - m_atomics = 0; - m_user = 0; - m_sharing = 0; - m_histogram_ptr = NULL; + m_addr = addr; + m_total = 0; + m_loads = 0; + m_stores = 0; + m_atomics = 0; + m_user = 0; + m_sharing = 0; + m_histogram_ptr = NULL; } AccessTraceForAddress::~AccessTraceForAddress() { - if (m_histogram_ptr != NULL) { - delete m_histogram_ptr; - m_histogram_ptr = NULL; - } + if (m_histogram_ptr != NULL) { + delete m_histogram_ptr; + m_histogram_ptr = NULL; + } } -void AccessTraceForAddress::print(ostream& out) const +void +AccessTraceForAddress::print(ostream& out) const { - out << m_addr; + out << m_addr; - if (m_histogram_ptr == NULL) { - out << " " << m_total; - out << " | " << m_loads; - out << " " << m_stores; - out << " " << m_atomics; - out << " | " << m_user; - out << " " << m_total-m_user; - out << " | " << m_sharing; - out << " | " << m_touched_by.count(); - } else { + if (m_histogram_ptr == NULL) { + out << " " << m_total; + out << " | " << m_loads; + out << " " << m_stores; + out << " " << m_atomics; + out << " | " << m_user; + out << " " << m_total-m_user; + out << " | " << m_sharing; + out << " | " << m_touched_by.count(); + } else { + assert(m_total == 0); + out << " " << (*m_histogram_ptr); + } +} + +void +AccessTraceForAddress::update(CacheRequestType type, + AccessModeType access_mode, NodeID cpu, + bool sharing_miss) +{ + m_touched_by.add(cpu); + m_total++; + if(type == CacheRequestType_ATOMIC) { + m_atomics++; + } else if(type == CacheRequestType_LD){ + m_loads++; + } else if (type == CacheRequestType_ST){ + m_stores++; + } else { + // ERROR_MSG("Trying to add invalid access to trace"); + } + + if (access_mode == AccessModeType_UserMode) { + m_user++; + } + + if (sharing_miss) { + m_sharing++; + } +} + +int +AccessTraceForAddress::getTotal() const +{ + if (m_histogram_ptr == NULL) { + return m_total; + } else { + return m_histogram_ptr->getTotal(); + } +} + +void +AccessTraceForAddress::addSample(int value) +{ assert(m_total == 0); - out << " " << (*m_histogram_ptr); - } -} - -void AccessTraceForAddress::update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss) -{ - m_touched_by.add(cpu); - m_total++; - if(type == CacheRequestType_ATOMIC) { - m_atomics++; - } else if(type == CacheRequestType_LD){ - m_loads++; - } else if (type == CacheRequestType_ST){ - m_stores++; - } else { - // ERROR_MSG("Trying to add invalid access to trace"); - } - - if (access_mode == AccessModeType_UserMode) { - m_user++; - } - - if (sharing_miss) { - m_sharing++; - } -} - -int AccessTraceForAddress::getTotal() const -{ - if (m_histogram_ptr == NULL) { - return m_total; - } else { - return m_histogram_ptr->getTotal(); - } -} - -void AccessTraceForAddress::addSample(int value) -{ - assert(m_total == 0); - if (m_histogram_ptr == NULL) { - m_histogram_ptr = new Histogram; - } - m_histogram_ptr->add(value); -} - -bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2) -{ - return (n1->getTotal() > n2->getTotal()); + if (m_histogram_ptr == NULL) { + m_histogram_ptr = new Histogram; + } + m_histogram_ptr->add(value); } diff --git a/src/mem/ruby/profiler/AccessTraceForAddress.hh b/src/mem/ruby/profiler/AccessTraceForAddress.hh index 2761d6de8..53b96856e 100644 --- a/src/mem/ruby/profiler/AccessTraceForAddress.hh +++ b/src/mem/ruby/profiler/AccessTraceForAddress.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,77 +26,60 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - * Description: - * - */ +#ifndef __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ +#define __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ -#ifndef ACCESSTRACEFORADDRESS_H -#define ACCESSTRACEFORADDRESS_H - -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/common/Address.hh" -#include "mem/protocol/CacheRequestType.hh" #include "mem/protocol/AccessModeType.hh" -#include "mem/ruby/system/NodeID.hh" +#include "mem/protocol/CacheRequestType.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Set.hh" +#include "mem/ruby/system/NodeID.hh" + class Histogram; -class AccessTraceForAddress { -public: - // Constructors - AccessTraceForAddress(); - explicit AccessTraceForAddress(const Address& addr); +class AccessTraceForAddress +{ + public: + AccessTraceForAddress(); + explicit AccessTraceForAddress(const Address& addr); + ~AccessTraceForAddress(); - // Destructor - ~AccessTraceForAddress(); + void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, + bool sharing_miss); + int getTotal() const; + int getSharing() const { return m_sharing; } + int getTouchedBy() const { return m_touched_by.count(); } + const Address& getAddress() const { return m_addr; } + void addSample(int value); - // Public Methods + void print(ostream& out) const; - void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss); - int getTotal() const; - int getSharing() const { return m_sharing; } - int getTouchedBy() const { return m_touched_by.count(); } - const Address& getAddress() const { return m_addr; } - void addSample(int value); - - void print(ostream& out) const; -private: - // Private Methods - - // Private copy constructor and assignment operator - // AccessTraceForAddress(const AccessTraceForAddress& obj); - // AccessTraceForAddress& operator=(const AccessTraceForAddress& obj); - - // Data Members (m_ prefix) - - Address m_addr; - uint64 m_loads; - uint64 m_stores; - uint64 m_atomics; - uint64 m_total; - uint64 m_user; - uint64 m_sharing; - Set m_touched_by; - Histogram* m_histogram_ptr; + private: + Address m_addr; + uint64 m_loads; + uint64 m_stores; + uint64 m_atomics; + uint64 m_total; + uint64 m_user; + uint64 m_sharing; + Set m_touched_by; + Histogram* m_histogram_ptr; }; -bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2); - -// Output operator declaration -ostream& operator<<(ostream& out, const AccessTraceForAddress& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const AccessTraceForAddress& obj) +inline bool +node_less_then_eq(const AccessTraceForAddress* n1, + const AccessTraceForAddress* n2) { - obj.print(out); - out << flush; - return out; + return n1->getTotal() > n2->getTotal(); } -#endif //ACCESSTRACEFORADDRESS_H +inline ostream& +operator<<(ostream& out, const AccessTraceForAddress& obj) +{ + obj.print(out); + out << flush; + return out; +} + +#endif // __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc index c613431ca..2d7d655c0 100644 --- a/src/mem/ruby/profiler/AddressProfiler.cc +++ b/src/mem/ruby/profiler/AddressProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,272 +26,293 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * AddressProfiler.cc - * - * Description: See AddressProfiler.hh - * - * $Id$ - * - */ - -#include "mem/ruby/profiler/AddressProfiler.hh" +#include "mem/gems_common/Map.hh" +#include "mem/gems_common/PrioHeap.hh" #include "mem/protocol/CacheMsg.hh" #include "mem/ruby/profiler/AccessTraceForAddress.hh" -#include "mem/gems_common/PrioHeap.hh" -#include "mem/gems_common/Map.hh" -#include "mem/ruby/system/System.hh" +#include "mem/ruby/profiler/AddressProfiler.hh" #include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" + +typedef AddressProfiler::AddressMap AddressMap; // Helper functions -static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, - Map* record_map); +AccessTraceForAddress& +lookupTraceForAddress(const Address& addr, AddressMap* record_map) +{ + if (!record_map->exist(addr)) { + record_map->add(addr, AccessTraceForAddress(addr)); + } + return record_map->lookup(addr); +} -static void printSorted(ostream& out, - int num_of_sequencers, - const Map* record_map, - string description); +void +printSorted(ostream& out, int num_of_sequencers, const AddressMap* record_map, + string description) +{ + const int records_printed = 100; + + uint64 misses = 0; + PrioHeap heap; + Vector
keys = record_map->keys(); + for (int i = 0; i < keys.size(); i++) { + AccessTraceForAddress* record = &(record_map->lookup(keys[i])); + misses += record->getTotal(); + heap.insert(record); + } + + out << "Total_entries_" << description << ": " << keys.size() << endl; + if (g_system_ptr->getProfiler()->getAllInstructions()) + out << "Total_Instructions_" << description << ": " << misses << endl; + else + out << "Total_data_misses_" << description << ": " << misses << endl; + + out << "total | load store atomic | user supervisor | sharing | touched-by" + << endl; + + Histogram remaining_records(1, 100); + Histogram all_records(1, 100); + Histogram remaining_records_log(-1); + Histogram all_records_log(-1); + + // Allows us to track how many lines where touched by n processors + Vector m_touched_vec; + Vector m_touched_weighted_vec; + m_touched_vec.setSize(num_of_sequencers+1); + m_touched_weighted_vec.setSize(num_of_sequencers+1); + for (int i = 0; i < m_touched_vec.size(); i++) { + m_touched_vec[i] = 0; + m_touched_weighted_vec[i] = 0; + } + + int counter = 0; + while (heap.size() > 0 && counter < records_printed) { + AccessTraceForAddress* record = heap.extractMin(); + double percent = 100.0 * (record->getTotal() / double(misses)); + out << description << " | " << percent << " % " << *record << endl; + all_records.add(record->getTotal()); + all_records_log.add(record->getTotal()); + counter++; + m_touched_vec[record->getTouchedBy()]++; + m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); + } + + while (heap.size() > 0) { + AccessTraceForAddress* record = heap.extractMin(); + all_records.add(record->getTotal()); + remaining_records.add(record->getTotal()); + all_records_log.add(record->getTotal()); + remaining_records_log.add(record->getTotal()); + m_touched_vec[record->getTouchedBy()]++; + m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); + } + out << endl; + out << "all_records_" << description << ": " + << all_records << endl + << "all_records_log_" << description << ": " + << all_records_log << endl + << "remaining_records_" << description << ": " + << remaining_records << endl + << "remaining_records_log_" << description << ": " + << remaining_records_log << endl + << "touched_by_" << description << ": " + << m_touched_vec << endl + << "touched_by_weighted_" << description << ": " + << m_touched_weighted_vec << endl + << endl; +} AddressProfiler::AddressProfiler(int num_of_sequencers) { - m_dataAccessTrace = new Map; - m_macroBlockAccessTrace = new Map; - m_programCounterAccessTrace = new Map; - m_retryProfileMap = new Map; - m_num_of_sequencers = num_of_sequencers; - clearStats(); + m_dataAccessTrace = new AddressMap; + m_macroBlockAccessTrace = new AddressMap; + m_programCounterAccessTrace = new AddressMap; + m_retryProfileMap = new AddressMap; + m_num_of_sequencers = num_of_sequencers; + clearStats(); } AddressProfiler::~AddressProfiler() { - delete m_dataAccessTrace; - delete m_macroBlockAccessTrace; - delete m_programCounterAccessTrace; - delete m_retryProfileMap; + delete m_dataAccessTrace; + delete m_macroBlockAccessTrace; + delete m_programCounterAccessTrace; + delete m_retryProfileMap; } -void AddressProfiler::setHotLines(bool hot_lines){ - m_hot_lines = hot_lines; -} -void AddressProfiler::setAllInstructions(bool all_instructions){ - m_all_instructions = all_instructions; -} - -void AddressProfiler::printStats(ostream& out) const +void +AddressProfiler::setHotLines(bool hot_lines) { - if (m_hot_lines) { - out << endl; - out << "AddressProfiler Stats" << endl; - out << "---------------------" << endl; - - out << endl; - out << "sharing_misses: " << m_sharing_miss_counter << endl; - out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl; - out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl; - - out << endl; - out << "Hot Data Blocks" << endl; - out << "---------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_dataAccessTrace, "block_address"); - - out << endl; - out << "Hot MacroData Blocks" << endl; - out << "--------------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, "macroblock_address"); - - out << "Hot Instructions" << endl; - out << "----------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address"); - } - - if (m_all_instructions){ - out << endl; - out << "All Instructions Profile:" << endl; - out << "-------------------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address"); - out << endl; - } - - if (m_retryProfileHisto.size() > 0) { - out << "Retry Profile" << endl; - out << "-------------" << endl; - out << endl; - out << "retry_histogram_absolute: " << m_retryProfileHisto << endl; - out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl; - out << "retry_histogram_read: " << m_retryProfileHistoRead << endl; - - out << "retry_histogram_percent: "; - m_retryProfileHisto.printPercent(out); - out << endl; - - printSorted(out, m_num_of_sequencers, m_retryProfileMap, "block_address"); - out << endl; - } - + m_hot_lines = hot_lines; } -void AddressProfiler::clearStats() +void +AddressProfiler::setAllInstructions(bool all_instructions) { - // Clear the maps - m_sharing_miss_counter = 0; - m_dataAccessTrace->clear(); - m_macroBlockAccessTrace->clear(); - m_programCounterAccessTrace->clear(); - m_retryProfileMap->clear(); - m_retryProfileHisto.clear(); - m_retryProfileHistoRead.clear(); - m_retryProfileHistoWrite.clear(); - m_getx_sharing_histogram.clear(); - m_gets_sharing_histogram.clear(); + m_all_instructions = all_instructions; } -void AddressProfiler::profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) +void +AddressProfiler::printStats(ostream& out) const { - Set indirection_set; - indirection_set.addSet(sharers); - indirection_set.addSet(owner); - indirection_set.remove(requestor); - int num_indirections = indirection_set.count(); + if (m_hot_lines) { + out << endl; + out << "AddressProfiler Stats" << endl; + out << "---------------------" << endl; - m_getx_sharing_histogram.add(num_indirections); - bool indirection_miss = (num_indirections > 0); + out << endl; + out << "sharing_misses: " << m_sharing_miss_counter << endl; + out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl; + out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl; - addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), requestor, indirection_miss); -} + out << endl; + out << "Hot Data Blocks" << endl; + out << "---------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_dataAccessTrace, + "block_address"); -void AddressProfiler::profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) -{ - Set indirection_set; - indirection_set.addSet(owner); - indirection_set.remove(requestor); - int num_indirections = indirection_set.count(); + out << endl; + out << "Hot MacroData Blocks" << endl; + out << "--------------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, + "macroblock_address"); - m_gets_sharing_histogram.add(num_indirections); - bool indirection_miss = (num_indirections > 0); - - addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), requestor, indirection_miss); -} - -void AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss) -{ - if (m_all_instructions) { - if (sharing_miss) { - m_sharing_miss_counter++; + out << "Hot Instructions" << endl; + out << "----------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, + "pc_address"); } - // record data address trace info - data_addr.makeLineAddress(); - lookupTraceForAddress(data_addr, m_dataAccessTrace).update(type, access_mode, id, sharing_miss); + if (m_all_instructions) { + out << endl; + out << "All Instructions Profile:" << endl; + out << "-------------------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, + "pc_address"); + out << endl; + } - // record macro data address trace info - Address macro_addr(data_addr.maskLowOrderBits(10)); // 6 for datablock, 4 to make it 16x more coarse - lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).update(type, access_mode, id, sharing_miss); + if (m_retryProfileHisto.size() > 0) { + out << "Retry Profile" << endl; + out << "-------------" << endl; + out << endl; + out << "retry_histogram_absolute: " << m_retryProfileHisto << endl; + out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl; + out << "retry_histogram_read: " << m_retryProfileHistoRead << endl; - // record program counter address trace info - lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss); - } + out << "retry_histogram_percent: "; + m_retryProfileHisto.printPercent(out); + out << endl; - if (m_all_instructions) { - // This code is used if the address profiler is an all-instructions profiler - // record program counter address trace info - lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss); - } + printSorted(out, m_num_of_sequencers, m_retryProfileMap, + "block_address"); + out << endl; + } } -void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, int count) +void +AddressProfiler::clearStats() { - m_retryProfileHisto.add(count); - if (type == AccessType_Read) { - m_retryProfileHistoRead.add(count); - } else { - m_retryProfileHistoWrite.add(count); - } - if (count > 1) { - lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count); - } + // Clear the maps + m_sharing_miss_counter = 0; + m_dataAccessTrace->clear(); + m_macroBlockAccessTrace->clear(); + m_programCounterAccessTrace->clear(); + m_retryProfileMap->clear(); + m_retryProfileHisto.clear(); + m_retryProfileHistoRead.clear(); + m_retryProfileHistoWrite.clear(); + m_getx_sharing_histogram.clear(); + m_gets_sharing_histogram.clear(); } -// ***** Normal Functions ****** - -static void printSorted(ostream& out, - int num_of_sequencers, - const Map* record_map, - string description) +void +AddressProfiler::profileGetX(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, + NodeID requestor) { - const int records_printed = 100; + Set indirection_set; + indirection_set.addSet(sharers); + indirection_set.addSet(owner); + indirection_set.remove(requestor); + int num_indirections = indirection_set.count(); - uint64 misses = 0; - PrioHeap heap; - Vector
keys = record_map->keys(); - for(int i=0; ilookup(keys[i])); - misses += record->getTotal(); - heap.insert(record); - } + m_getx_sharing_histogram.add(num_indirections); + bool indirection_miss = (num_indirections > 0); - out << "Total_entries_" << description << ": " << keys.size() << endl; - if (g_system_ptr->getProfiler()->getAllInstructions()) - out << "Total_Instructions_" << description << ": " << misses << endl; - else - out << "Total_data_misses_" << description << ": " << misses << endl; - - out << "total | load store atomic | user supervisor | sharing | touched-by" << endl; - - Histogram remaining_records(1, 100); - Histogram all_records(1, 100); - Histogram remaining_records_log(-1); - Histogram all_records_log(-1); - - // Allows us to track how many lines where touched by n processors - Vector m_touched_vec; - Vector m_touched_weighted_vec; - m_touched_vec.setSize(num_of_sequencers+1); - m_touched_weighted_vec.setSize(num_of_sequencers+1); - for (int i=0; i 0) && (counter < records_printed)) { - AccessTraceForAddress* record = heap.extractMin(); - double percent = 100.0*(record->getTotal()/double(misses)); - out << description << " | " << percent << " % " << *record << endl; - all_records.add(record->getTotal()); - all_records_log.add(record->getTotal()); - counter++; - m_touched_vec[record->getTouchedBy()]++; - m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); - } - - while(heap.size() > 0) { - AccessTraceForAddress* record = heap.extractMin(); - all_records.add(record->getTotal()); - remaining_records.add(record->getTotal()); - all_records_log.add(record->getTotal()); - remaining_records_log.add(record->getTotal()); - m_touched_vec[record->getTouchedBy()]++; - m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); - } - out << endl; - out << "all_records_" << description << ": " << all_records << endl; - out << "all_records_log_" << description << ": " << all_records_log << endl; - out << "remaining_records_" << description << ": " << remaining_records << endl; - out << "remaining_records_log_" << description << ": " << remaining_records_log << endl; - out << "touched_by_" << description << ": " << m_touched_vec << endl; - out << "touched_by_weighted_" << description << ": " << m_touched_weighted_vec << endl; - out << endl; + addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), + requestor, indirection_miss); } -static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, Map* record_map) +void +AddressProfiler::profileGetS(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, + NodeID requestor) { - if(record_map->exist(addr) == false){ - record_map->add(addr, AccessTraceForAddress(addr)); - } - return record_map->lookup(addr); + Set indirection_set; + indirection_set.addSet(owner); + indirection_set.remove(requestor); + int num_indirections = indirection_set.count(); + + m_gets_sharing_histogram.add(num_indirections); + bool indirection_miss = (num_indirections > 0); + + addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), + requestor, indirection_miss); +} + +void +AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, + CacheRequestType type, + AccessModeType access_mode, NodeID id, + bool sharing_miss) +{ + if (m_all_instructions) { + if (sharing_miss) { + m_sharing_miss_counter++; + } + + // record data address trace info + data_addr.makeLineAddress(); + lookupTraceForAddress(data_addr, m_dataAccessTrace). + update(type, access_mode, id, sharing_miss); + + // record macro data address trace info + + // 6 for datablock, 4 to make it 16x more coarse + Address macro_addr(data_addr.maskLowOrderBits(10)); + lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace). + update(type, access_mode, id, sharing_miss); + + // record program counter address trace info + lookupTraceForAddress(pc_addr, m_programCounterAccessTrace). + update(type, access_mode, id, sharing_miss); + } + + if (m_all_instructions) { + // This code is used if the address profiler is an + // all-instructions profiler record program counter address + // trace info + lookupTraceForAddress(pc_addr, m_programCounterAccessTrace). + update(type, access_mode, id, sharing_miss); + } +} + +void +AddressProfiler::profileRetry(const Address& data_addr, AccessType type, + int count) +{ + m_retryProfileHisto.add(count); + if (type == AccessType_Read) { + m_retryProfileHistoRead.add(count); + } else { + m_retryProfileHistoWrite.add(count); + } + if (count > 1) { + lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count); + } } diff --git a/src/mem/ruby/profiler/AddressProfiler.hh b/src/mem/ruby/profiler/AddressProfiler.hh index 177aa56d6..76dac323f 100644 --- a/src/mem/ruby/profiler/AddressProfiler.hh +++ b/src/mem/ruby/profiler/AddressProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,89 +26,77 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * AddressProfiler.hh - * - * Description: - * - * $Id$ - * - */ +#ifndef __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ +#define __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ -#ifndef ADDRESSPROFILER_H -#define ADDRESSPROFILER_H - -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/system/NodeID.hh" -#include "mem/ruby/common/Histogram.hh" -#include "mem/ruby/common/Address.hh" -#include "mem/protocol/CacheMsg.hh" #include "mem/protocol/AccessType.hh" +#include "mem/protocol/CacheMsg.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Global.hh" +#include "mem/ruby/common/Histogram.hh" +#include "mem/ruby/system/NodeID.hh" class AccessTraceForAddress; class Set; template class Map; -class AddressProfiler { -public: - // Constructors - AddressProfiler(int num_of_sequencers); +class AddressProfiler +{ + public: + typedef Map AddressMap; - // Destructor - ~AddressProfiler(); + public: + AddressProfiler(int num_of_sequencers); + ~AddressProfiler(); - // Public Methods - void printStats(ostream& out) const; - void clearStats(); + void printStats(ostream& out) const; + void clearStats(); - void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss); - void profileRetry(const Address& data_addr, AccessType type, int count); - void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); - void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); + void addTraceSample(Address data_addr, Address pc_addr, + CacheRequestType type, AccessModeType access_mode, + NodeID id, bool sharing_miss); + void profileRetry(const Address& data_addr, AccessType type, int count); + void profileGetX(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, NodeID requestor); + void profileGetS(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, NodeID requestor); - void print(ostream& out) const; + void print(ostream& out) const; - //added by SS - void setHotLines(bool hot_lines); - void setAllInstructions(bool all_instructions); -private: - // Private Methods + //added by SS + void setHotLines(bool hot_lines); + void setAllInstructions(bool all_instructions); - // Private copy constructor and assignment operator - AddressProfiler(const AddressProfiler& obj); - AddressProfiler& operator=(const AddressProfiler& obj); + private: + // Private copy constructor and assignment operator + AddressProfiler(const AddressProfiler& obj); + AddressProfiler& operator=(const AddressProfiler& obj); - // Data Members (m_ prefix) - int64 m_sharing_miss_counter; + int64 m_sharing_miss_counter; - Map* m_dataAccessTrace; - Map* m_macroBlockAccessTrace; - Map* m_programCounterAccessTrace; - Map* m_retryProfileMap; - Histogram m_retryProfileHisto; - Histogram m_retryProfileHistoWrite; - Histogram m_retryProfileHistoRead; - Histogram m_getx_sharing_histogram; - Histogram m_gets_sharing_histogram; -//added by SS - bool m_hot_lines; - bool m_all_instructions; + AddressMap* m_dataAccessTrace; + AddressMap* m_macroBlockAccessTrace; + AddressMap* m_programCounterAccessTrace; + AddressMap* m_retryProfileMap; + Histogram m_retryProfileHisto; + Histogram m_retryProfileHistoWrite; + Histogram m_retryProfileHistoRead; + Histogram m_getx_sharing_histogram; + Histogram m_gets_sharing_histogram; - int m_num_of_sequencers; + //added by SS + bool m_hot_lines; + bool m_all_instructions; + + int m_num_of_sequencers; }; -// Output operator declaration -ostream& operator<<(ostream& out, const AddressProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const AddressProfiler& obj) +inline ostream& +operator<<(ostream& out, const AddressProfiler& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } -#endif //ADDRESSPROFILER_H +#endif // __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ diff --git a/src/mem/ruby/profiler/CacheProfiler.cc b/src/mem/ruby/profiler/CacheProfiler.cc index 50581fcf9..9d12a46ab 100644 --- a/src/mem/ruby/profiler/CacheProfiler.cc +++ b/src/mem/ruby/profiler/CacheProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,111 +26,113 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * CacheProfiler.C - * - * Description: See CacheProfiler.hh - * - * $Id$ - * - */ - -#include "mem/ruby/profiler/CacheProfiler.hh" -#include "mem/ruby/profiler/AccessTraceForAddress.hh" #include "mem/gems_common/PrioHeap.hh" -#include "mem/ruby/system/System.hh" -#include "mem/ruby/profiler/Profiler.hh" #include "mem/gems_common/Vector.hh" +#include "mem/ruby/profiler/AccessTraceForAddress.hh" +#include "mem/ruby/profiler/CacheProfiler.hh" +#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" CacheProfiler::CacheProfiler(const string& description) { - m_description = description; - m_requestTypeVec_ptr = new Vector; - m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM)); + m_description = description; + m_requestTypeVec_ptr = new Vector; + m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM)); - clearStats(); + clearStats(); } CacheProfiler::~CacheProfiler() { - delete m_requestTypeVec_ptr; + delete m_requestTypeVec_ptr; } -void CacheProfiler::printStats(ostream& out) const +void +CacheProfiler::printStats(ostream& out) const { - out << "Cache Stats: " << m_description << endl; - string description = " " + m_description; - - out << description << "_total_misses: " << m_misses << endl; - out << description << "_total_demand_misses: " << m_demand_misses << endl; - out << description << "_total_prefetches: " << m_prefetches << endl; - out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl; - out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl; - out << endl; - - int requests = 0; - - for(int i=0; iref(i); - } - - assert(m_misses == requests); - - if (requests > 0) { - for(int i=0; iref(i) > 0) { - out << description << "_request_type_" << CacheRequestType_to_string(CacheRequestType(i)) << ": " - << (100.0 * double((m_requestTypeVec_ptr->ref(i)))) / double(requests) - << "%" << endl; - } - } + out << "Cache Stats: " << m_description << endl; + string description = " " + m_description; + out << description << "_total_misses: " << m_misses << endl; + out << description << "_total_demand_misses: " << m_demand_misses << endl; + out << description << "_total_prefetches: " << m_prefetches << endl; + out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl; + out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl; out << endl; - for(int i=0; i 0) { - out << description << "_access_mode_type_" << (AccessModeType) i << ": " << m_accessModeTypeHistogram[i] - << " " << (100.0 * m_accessModeTypeHistogram[i]) / requests << "%" << endl; - } + int requests = 0; + + for (int i = 0; i < int(CacheRequestType_NUM); i++) { + requests += m_requestTypeVec_ptr->ref(i); } - } - out << description << "_request_size: " << m_requestSize << endl; - out << endl; + assert(m_misses == requests); + if (requests > 0) { + for (int i = 0; i < int(CacheRequestType_NUM); i++) { + if (m_requestTypeVec_ptr->ref(i) > 0) { + out << description << "_request_type_" + << CacheRequestType_to_string(CacheRequestType(i)) + << ": " + << 100.0 * (double)m_requestTypeVec_ptr->ref(i) / + (double)requests + << "%" << endl; + } + } + + out << endl; + + for (int i = 0; i < AccessModeType_NUM; i++){ + if (m_accessModeTypeHistogram[i] > 0) { + out << description << "_access_mode_type_" + << (AccessModeType) i << ": " + << m_accessModeTypeHistogram[i] << " " + << 100.0 * m_accessModeTypeHistogram[i] / requests + << "%" << endl; + } + } + } + + out << description << "_request_size: " << m_requestSize << endl; + out << endl; } -void CacheProfiler::clearStats() +void +CacheProfiler::clearStats() { - for(int i=0; iref(i) = 0; - } - m_requestSize.clear(); - m_misses = 0; - m_demand_misses = 0; - m_prefetches = 0; - m_sw_prefetches = 0; - m_hw_prefetches = 0; - for(int i=0; iref(i) = 0; + } + m_requestSize.clear(); + m_misses = 0; + m_demand_misses = 0; + m_prefetches = 0; + m_sw_prefetches = 0; + m_hw_prefetches = 0; + for (int i = 0; i < AccessModeType_NUM; i++) { + m_accessModeTypeHistogram[i] = 0; + } } -void CacheProfiler::addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit) +void +CacheProfiler::addStatSample(CacheRequestType requestType, + AccessModeType type, int msgSize, + PrefetchBit pfBit) { - m_misses++; + m_misses++; - m_requestTypeVec_ptr->ref(requestType)++; + m_requestTypeVec_ptr->ref(requestType)++; - m_accessModeTypeHistogram[type]++; - m_requestSize.add(msgSize); - if (pfBit == PrefetchBit_No) { - m_demand_misses++; - } else if (pfBit == PrefetchBit_Yes) { - m_prefetches++; - m_sw_prefetches++; - } else { // must be L1_HW || L2_HW prefetch - m_prefetches++; - m_hw_prefetches++; - } + m_accessModeTypeHistogram[type]++; + m_requestSize.add(msgSize); + if (pfBit == PrefetchBit_No) { + m_demand_misses++; + } else if (pfBit == PrefetchBit_Yes) { + m_prefetches++; + m_sw_prefetches++; + } else { + // must be L1_HW || L2_HW prefetch + m_prefetches++; + m_hw_prefetches++; + } } diff --git a/src/mem/ruby/profiler/CacheProfiler.hh b/src/mem/ruby/profiler/CacheProfiler.hh index 11f189148..7dcdf57f0 100644 --- a/src/mem/ruby/profiler/CacheProfiler.hh +++ b/src/mem/ruby/profiler/CacheProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,77 +26,58 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * CacheProfiler.hh - * - * Description: - * - * $Id$ - * - */ - -#ifndef CACHEPROFILER_H -#define CACHEPROFILER_H +#ifndef __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ +#define __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ #include #include -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/system/NodeID.hh" -#include "mem/ruby/common/Histogram.hh" #include "mem/protocol/AccessModeType.hh" -#include "mem/protocol/PrefetchBit.hh" #include "mem/protocol/CacheRequestType.hh" +#include "mem/protocol/PrefetchBit.hh" +#include "mem/ruby/common/Global.hh" +#include "mem/ruby/common/Histogram.hh" +#include "mem/ruby/system/NodeID.hh" template class Vector; -class CacheProfiler { -public: - // Constructors - CacheProfiler(const std::string& description); +class CacheProfiler +{ + public: + CacheProfiler(const std::string& description); + ~CacheProfiler(); - // Destructor - ~CacheProfiler(); + void printStats(std::ostream& out) const; + void clearStats(); - // Public Methods - void printStats(std::ostream& out) const; - void clearStats(); + void addStatSample(CacheRequestType requestType, AccessModeType type, + int msgSize, PrefetchBit pfBit); - void addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit); + void print(std::ostream& out) const; - void print(std::ostream& out) const; -private: - // Private Methods + private: + // Private copy constructor and assignment operator + CacheProfiler(const CacheProfiler& obj); + CacheProfiler& operator=(const CacheProfiler& obj); - // Private copy constructor and assignment operator - CacheProfiler(const CacheProfiler& obj); - CacheProfiler& operator=(const CacheProfiler& obj); + std::string m_description; + Histogram m_requestSize; + int64 m_misses; + int64 m_demand_misses; + int64 m_prefetches; + int64 m_sw_prefetches; + int64 m_hw_prefetches; + int64 m_accessModeTypeHistogram[AccessModeType_NUM]; - // Data Members (m_ prefix) - std::string m_description; - Histogram m_requestSize; - int64 m_misses; - int64 m_demand_misses; - int64 m_prefetches; - int64 m_sw_prefetches; - int64 m_hw_prefetches; - int64 m_accessModeTypeHistogram[AccessModeType_NUM]; - - Vector < int >* m_requestTypeVec_ptr; + Vector * m_requestTypeVec_ptr; }; -// Output operator declaration -std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj) +inline std::ostream& +operator<<(std::ostream& out, const CacheProfiler& obj) { - obj.print(out); - out << std::flush; - return out; + obj.print(out); + out << std::flush; + return out; } -#endif //CACHEPROFILER_H +#endif // __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.cc b/src/mem/ruby/profiler/MemCntrlProfiler.cc index b41d7de78..e25719666 100644 --- a/src/mem/ruby/profiler/MemCntrlProfiler.cc +++ b/src/mem/ruby/profiler/MemCntrlProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -32,19 +31,14 @@ using namespace std; MemCntrlProfiler::MemCntrlProfiler(const string& description, - int banks_per_rank, - int ranks_per_dimm, - int dimms_per_channel) + int banks_per_rank, int ranks_per_dimm, int dimms_per_channel) { m_description = description; m_banks_per_rank = banks_per_rank; m_ranks_per_dimm = ranks_per_dimm; m_dimms_per_channel = dimms_per_channel; - int totalBanks = banks_per_rank * - ranks_per_dimm * - dimms_per_channel; - + int totalBanks = banks_per_rank * ranks_per_dimm * dimms_per_channel; m_memBankCount.setSize(totalBanks); clearStats(); @@ -54,50 +48,65 @@ MemCntrlProfiler::~MemCntrlProfiler() { } -void MemCntrlProfiler::printStats(ostream& out) const +void +MemCntrlProfiler::printStats(ostream& out) const { - if (m_memReq || m_memRefresh) { // if there's a memory controller at all - uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; - double stallsPerReq = total_stalls * 1.0 / m_memReq; - out << "Memory controller: " << m_description << ":" << endl; - out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes - out << " memory_reads: " << m_memRead << endl; - out << " memory_writes: " << m_memWrite << endl; - out << " memory_refreshes: " << m_memRefresh << endl; - out << " memory_total_request_delays: " << total_stalls << endl; - out << " memory_delays_per_request: " << stallsPerReq << endl; - out << " memory_delays_in_input_queue: " << m_memInputQ << endl; - out << " memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl; - out << " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl; - // Note: The following "memory stalls" entries are a breakdown of the - // cycles which already showed up in m_memWaitCycles. The order is - // significant; it is the priority of attributing the cycles. - // For example, bank_busy is before arbitration because if the bank was - // busy, we didn't even check arbitration. - // Note: "not old enough" means that since we grouped waiting heads-of-queues - // into batches to avoid starvation, a request in a newer batch - // didn't try to arbitrate yet because there are older requests waiting. - out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl; - out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl; - out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl; - out << " memory_stalls_for_arbitration: " << m_memArbWait << endl; - out << " memory_stalls_for_bus: " << m_memBusBusy << endl; - out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl; - out << " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl; - out << " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl; - out << " accesses_per_bank: "; - for (int bank=0; bank < m_memBankCount.size(); bank++) { - out << m_memBankCount[bank] << " "; - } - } else { + if (!m_memReq && !m_memRefresh) { out << "Memory Controller: " << m_description - << " no stats recorded." << endl; - } + << " no stats recorded." << endl + << endl + << endl; + return; + } + + // if there's a memory controller at all + uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; + double stallsPerReq = total_stalls * 1.0 / m_memReq; + out << "Memory controller: " << m_description << ":" << endl; + + // does not include refreshes + out << " memory_total_requests: " << m_memReq << endl; + out << " memory_reads: " << m_memRead << endl; + out << " memory_writes: " << m_memWrite << endl; + out << " memory_refreshes: " << m_memRefresh << endl; + out << " memory_total_request_delays: " << total_stalls << endl; + out << " memory_delays_per_request: " << stallsPerReq << endl; + out << " memory_delays_in_input_queue: " << m_memInputQ << endl; + out << " memory_delays_behind_head_of_bank_queue: " + << m_memBankQ << endl; + out << " memory_delays_stalled_at_head_of_bank_queue: " + << m_memWaitCycles << endl; + + // Note: The following "memory stalls" entries are a breakdown of + // the cycles which already showed up in m_memWaitCycles. The + // order is significant; it is the priority of attributing the + // cycles. For example, bank_busy is before arbitration because + // if the bank was busy, we didn't even check arbitration. + // Note: "not old enough" means that since we grouped waiting + // heads-of-queues into batches to avoid starvation, a request in + // a newer batch didn't try to arbitrate yet because there are + // older requests waiting. + out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl; + out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl; + out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl; + out << " memory_stalls_for_arbitration: " << m_memArbWait << endl; + out << " memory_stalls_for_bus: " << m_memBusBusy << endl; + out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl; + out << " memory_stalls_for_read_write_turnaround: " + << m_memReadWriteBusy << endl; + out << " memory_stalls_for_read_read_turnaround: " + << m_memDataBusBusy << endl; + out << " accesses_per_bank: "; + + for (int bank = 0; bank < m_memBankCount.size(); bank++) { + out << m_memBankCount[bank] << " "; + } out << endl; out << endl; } -void MemCntrlProfiler::clearStats() +void +MemCntrlProfiler::clearStats() { m_memReq = 0; m_memBankBusy = 0; @@ -115,72 +124,100 @@ void MemCntrlProfiler::clearStats() m_memRandBusy = 0; m_memNotOld = 0; - for (int bank=0; - bank < m_memBankCount.size(); - bank++) { + for (int bank = 0; bank < m_memBankCount.size(); bank++) { m_memBankCount[bank] = 0; } } -void MemCntrlProfiler::profileMemReq(int bank) { - m_memReq++; - m_memBankCount[bank]++; +void +MemCntrlProfiler::profileMemReq(int bank) +{ + m_memReq++; + m_memBankCount[bank]++; } -void MemCntrlProfiler::profileMemBankBusy() { - m_memBankBusy++; +void +MemCntrlProfiler::profileMemBankBusy() +{ + m_memBankBusy++; } -void MemCntrlProfiler::profileMemBusBusy() { - m_memBusBusy++; +void +MemCntrlProfiler::profileMemBusBusy() +{ + m_memBusBusy++; } -void MemCntrlProfiler::profileMemReadWriteBusy() { - m_memReadWriteBusy++; +void +MemCntrlProfiler::profileMemReadWriteBusy() +{ + m_memReadWriteBusy++; } -void MemCntrlProfiler::profileMemDataBusBusy() { - m_memDataBusBusy++; +void +MemCntrlProfiler::profileMemDataBusBusy() +{ + m_memDataBusBusy++; } -void MemCntrlProfiler::profileMemTfawBusy() { - m_memTfawBusy++; +void +MemCntrlProfiler::profileMemTfawBusy() +{ + m_memTfawBusy++; } -void MemCntrlProfiler::profileMemRefresh() { - m_memRefresh++; +void +MemCntrlProfiler::profileMemRefresh() +{ + m_memRefresh++; } -void MemCntrlProfiler::profileMemRead() { - m_memRead++; +void +MemCntrlProfiler::profileMemRead() +{ + m_memRead++; } -void MemCntrlProfiler::profileMemWrite() { - m_memWrite++; +void +MemCntrlProfiler::profileMemWrite() +{ + m_memWrite++; } -void MemCntrlProfiler::profileMemWaitCycles(int cycles) { - m_memWaitCycles += cycles; +void +MemCntrlProfiler::profileMemWaitCycles(int cycles) +{ + m_memWaitCycles += cycles; } -void MemCntrlProfiler::profileMemInputQ(int cycles) { - m_memInputQ += cycles; +void +MemCntrlProfiler::profileMemInputQ(int cycles) +{ + m_memInputQ += cycles; } -void MemCntrlProfiler::profileMemBankQ(int cycles) { - m_memBankQ += cycles; +void +MemCntrlProfiler::profileMemBankQ(int cycles) +{ + m_memBankQ += cycles; } -void MemCntrlProfiler::profileMemArbWait(int cycles) { - m_memArbWait += cycles; +void +MemCntrlProfiler::profileMemArbWait(int cycles) +{ + m_memArbWait += cycles; } -void MemCntrlProfiler::profileMemRandBusy() { - m_memRandBusy++; +void +MemCntrlProfiler::profileMemRandBusy() +{ + m_memRandBusy++; } -void MemCntrlProfiler::profileMemNotOld() { - m_memNotOld++; +void +MemCntrlProfiler::profileMemNotOld() +{ + m_memNotOld++; } diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.hh b/src/mem/ruby/profiler/MemCntrlProfiler.hh index ebedd5185..85c39e0ad 100644 --- a/src/mem/ruby/profiler/MemCntrlProfiler.hh +++ b/src/mem/ruby/profiler/MemCntrlProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,17 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * MemCntrlProfiler.hh - * - * Description: - * - * $Id$ - * - */ - -#ifndef MEM_CNTRL_PROFILER_H -#define MEM_CNTRL_PROFILER_H +#ifndef __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ +#define __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ #include #include @@ -47,80 +37,67 @@ template class Vector; -class MemCntrlProfiler { -public: - // Constructors - MemCntrlProfiler(const std::string& description, - int banks_per_rank, - int ranks_per_dimm, - int dimms_per_channel); +class MemCntrlProfiler +{ + public: + MemCntrlProfiler(const std::string& description, int banks_per_rank, + int ranks_per_dimm, int dimms_per_channel); + ~MemCntrlProfiler(); - // Destructor - ~MemCntrlProfiler(); + void printStats(std::ostream& out) const; + void clearStats(); - // Public Methods - void printStats(std::ostream& out) const; - void clearStats(); + void profileMemReq(int bank); + void profileMemBankBusy(); + void profileMemBusBusy(); + void profileMemTfawBusy(); + void profileMemReadWriteBusy(); + void profileMemDataBusBusy(); + void profileMemRefresh(); + void profileMemRead(); + void profileMemWrite(); + void profileMemWaitCycles(int cycles); + void profileMemInputQ(int cycles); + void profileMemBankQ(int cycles); + void profileMemArbWait(int cycles); + void profileMemRandBusy(); + void profileMemNotOld(); - void profileMemReq(int bank); - void profileMemBankBusy(); - void profileMemBusBusy(); - void profileMemTfawBusy(); - void profileMemReadWriteBusy(); - void profileMemDataBusBusy(); - void profileMemRefresh(); - void profileMemRead(); - void profileMemWrite(); - void profileMemWaitCycles(int cycles); - void profileMemInputQ(int cycles); - void profileMemBankQ(int cycles); - void profileMemArbWait(int cycles); - void profileMemRandBusy(); - void profileMemNotOld(); + void print(std::ostream& out) const; - void print(std::ostream& out) const; private: - // Private Methods + // Private copy constructor and assignment operator + MemCntrlProfiler(const MemCntrlProfiler& obj); + MemCntrlProfiler& operator=(const MemCntrlProfiler& obj); - // Private copy constructor and assignment operator - MemCntrlProfiler(const MemCntrlProfiler& obj); - MemCntrlProfiler& operator=(const MemCntrlProfiler& obj); - - // Data Members (m_ prefix) - std::string m_description; - uint64 m_memReq; - uint64 m_memBankBusy; - uint64 m_memBusBusy; - uint64 m_memTfawBusy; - uint64 m_memReadWriteBusy; - uint64 m_memDataBusBusy; - uint64 m_memRefresh; - uint64 m_memRead; - uint64 m_memWrite; - uint64 m_memWaitCycles; - uint64 m_memInputQ; - uint64 m_memBankQ; - uint64 m_memArbWait; - uint64 m_memRandBusy; - uint64 m_memNotOld; - Vector m_memBankCount; - int m_banks_per_rank; - int m_ranks_per_dimm; - int m_dimms_per_channel; + std::string m_description; + uint64 m_memReq; + uint64 m_memBankBusy; + uint64 m_memBusBusy; + uint64 m_memTfawBusy; + uint64 m_memReadWriteBusy; + uint64 m_memDataBusBusy; + uint64 m_memRefresh; + uint64 m_memRead; + uint64 m_memWrite; + uint64 m_memWaitCycles; + uint64 m_memInputQ; + uint64 m_memBankQ; + uint64 m_memArbWait; + uint64 m_memRandBusy; + uint64 m_memNotOld; + Vector m_memBankCount; + int m_banks_per_rank; + int m_ranks_per_dimm; + int m_dimms_per_channel; }; -// Output operator declaration -std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj) +inline std::ostream& +operator<<(std::ostream& out, const MemCntrlProfiler& obj) { - obj.print(out); - out << std::flush; - return out; + obj.print(out); + out << std::flush; + return out; } -#endif //MEM_CNTRL_PROFILER_H +#endif // __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index 365f6cf42..2cc3eddfc 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -42,34 +42,24 @@ ---------------------------------------------------------------------- */ -/* - * Profiler.cc - * - * Description: See Profiler.hh - * - * $Id$ - * - */ - // Allows use of times() library call, which determines virtual runtime #include #include -#include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/profiler/AddressProfiler.hh" -#include "mem/ruby/system/System.hh" -#include "mem/ruby/network/Network.hh" -#include "mem/gems_common/PrioHeap.hh" -#include "mem/protocol/CacheMsg.hh" -#include "mem/protocol/Protocol.hh" -#include "mem/gems_common/util.hh" #include "mem/gems_common/Map.hh" -#include "mem/ruby/common/Debug.hh" +#include "mem/gems_common/PrioHeap.hh" +#include "mem/gems_common/util.hh" +#include "mem/protocol/CacheMsg.hh" #include "mem/protocol/MachineType.hh" - +#include "mem/protocol/Protocol.hh" +#include "mem/ruby/common/Debug.hh" +#include "mem/ruby/network/Network.hh" +#include "mem/ruby/profiler/AddressProfiler.hh" +#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" #include "mem/ruby/system/System.hh" -extern std::ostream * debug_cout_ptr; +extern std::ostream* debug_cout_ptr; static double process_memory_total(); static double process_memory_resident(); @@ -77,570 +67,623 @@ static double process_memory_resident(); Profiler::Profiler(const Params *p) : SimObject(p) { - m_requestProfileMap_ptr = new Map; + m_requestProfileMap_ptr = new Map; - m_inst_profiler_ptr = NULL; - m_address_profiler_ptr = NULL; + m_inst_profiler_ptr = NULL; + m_address_profiler_ptr = NULL; - m_real_time_start_time = time(NULL); // Not reset in clearStats() - m_stats_period = 1000000; // Default - m_periodic_output_file_ptr = &cerr; + m_real_time_start_time = time(NULL); // Not reset in clearStats() + m_stats_period = 1000000; // Default + m_periodic_output_file_ptr = &cerr; - m_hot_lines = p->hot_lines; - m_all_instructions = p->all_instructions; + m_hot_lines = p->hot_lines; + m_all_instructions = p->all_instructions; - m_num_of_sequencers = p->num_of_sequencers; + m_num_of_sequencers = p->num_of_sequencers; - m_hot_lines = false; - m_all_instructions = false; + m_hot_lines = false; + m_all_instructions = false; - m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); - m_address_profiler_ptr -> setHotLines(m_hot_lines); - m_address_profiler_ptr -> setAllInstructions(m_all_instructions); + m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_address_profiler_ptr->setHotLines(m_hot_lines); + m_address_profiler_ptr->setAllInstructions(m_all_instructions); - if (m_all_instructions) { - m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); - m_inst_profiler_ptr -> setHotLines(m_hot_lines); - m_inst_profiler_ptr -> setAllInstructions(m_all_instructions); - } + if (m_all_instructions) { + m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_inst_profiler_ptr->setHotLines(m_hot_lines); + m_inst_profiler_ptr->setAllInstructions(m_all_instructions); + } } Profiler::~Profiler() { - if (m_periodic_output_file_ptr != &cerr) { - delete m_periodic_output_file_ptr; - } + if (m_periodic_output_file_ptr != &cerr) { + delete m_periodic_output_file_ptr; + } - delete m_requestProfileMap_ptr; + delete m_requestProfileMap_ptr; } -void Profiler::wakeup() +void +Profiler::wakeup() { - // FIXME - avoid the repeated code + // FIXME - avoid the repeated code - Vector perProcCycleCount; - perProcCycleCount.setSize(m_num_of_sequencers); + Vector perProcCycleCount; + perProcCycleCount.setSize(m_num_of_sequencers); - for(int i=0; i < m_num_of_sequencers; i++) { - perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; - // The +1 allows us to avoid division by zero - } + for (int i = 0; i < m_num_of_sequencers; i++) { + perProcCycleCount[i] = + g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; + // The +1 allows us to avoid division by zero + } - (*m_periodic_output_file_ptr) << "ruby_cycles: " - << g_eventQueue_ptr->getTime()-m_ruby_start - << endl; + ostream &out = *m_periodic_output_file_ptr; - (*m_periodic_output_file_ptr) << "mbytes_resident: " - << process_memory_resident() - << endl; + out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl + << "mbytes_resident: " << process_memory_resident() << endl + << "mbytes_total: " << process_memory_total() << endl; - (*m_periodic_output_file_ptr) << "mbytes_total: " - << process_memory_total() - << endl; - - if (process_memory_total() > 0) { - (*m_periodic_output_file_ptr) << "resident_ratio: " - << process_memory_resident()/process_memory_total() - << endl; - } - - (*m_periodic_output_file_ptr) << "miss_latency: " - << m_allMissLatencyHistogram - << endl; - - *m_periodic_output_file_ptr << endl; - - if (m_all_instructions) { - m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr); - } - - //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr); - g_eventQueue_ptr->scheduleEvent(this, m_stats_period); -} - -void Profiler::setPeriodicStatsFile(const string& filename) -{ - cout << "Recording periodic statistics to file '" << filename << "' every " - << m_stats_period << " Ruby cycles" << endl; - - if (m_periodic_output_file_ptr != &cerr) { - delete m_periodic_output_file_ptr; - } - - m_periodic_output_file_ptr = new ofstream(filename.c_str()); - g_eventQueue_ptr->scheduleEvent(this, 1); -} - -void Profiler::setPeriodicStatsInterval(integer_t period) -{ - cout << "Recording periodic statistics every " << m_stats_period - << " Ruby cycles" << endl; - - m_stats_period = period; - g_eventQueue_ptr->scheduleEvent(this, 1); -} - -void Profiler::printConfig(ostream& out) const -{ - out << endl; - out << "Profiler Configuration" << endl; - out << "----------------------" << endl; - out << "periodic_stats_period: " << m_stats_period << endl; -} - -void Profiler::print(ostream& out) const -{ - out << "[Profiler]"; -} - -void Profiler::printStats(ostream& out, bool short_stats) -{ - out << endl; - if (short_stats) { - out << "SHORT "; - } - out << "Profiler Stats" << endl; - out << "--------------" << endl; - - time_t real_time_current = time(NULL); - double seconds = difftime(real_time_current, m_real_time_start_time); - double minutes = seconds/60.0; - double hours = minutes/60.0; - double days = hours/24.0; - Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start; - - if (!short_stats) { - out << "Elapsed_time_in_seconds: " << seconds << endl; - out << "Elapsed_time_in_minutes: " << minutes << endl; - out << "Elapsed_time_in_hours: " << hours << endl; - out << "Elapsed_time_in_days: " << days << endl; - out << endl; - } - - // print the virtual runtimes as well - struct tms vtime; - times(&vtime); - seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; - minutes = seconds / 60.0; - hours = minutes / 60.0; - days = hours / 24.0; - out << "Virtual_time_in_seconds: " << seconds << endl; - out << "Virtual_time_in_minutes: " << minutes << endl; - out << "Virtual_time_in_hours: " << hours << endl; - out << "Virtual_time_in_days: " << days << endl; - out << endl; - - out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; - out << "Ruby_start_time: " << m_ruby_start << endl; - out << "Ruby_cycles: " << ruby_cycles << endl; - out << endl; - - if (!short_stats) { - out << "mbytes_resident: " << process_memory_resident() << endl; - out << "mbytes_total: " << process_memory_total() << endl; if (process_memory_total() > 0) { - out << "resident_ratio: " - << process_memory_resident()/process_memory_total() << endl; + out << "resident_ratio: " + << process_memory_resident() / process_memory_total() << endl; } - out << endl; - } - - Vector perProcCycleCount; - perProcCycleCount.setSize(m_num_of_sequencers); - - for(int i=0; i < m_num_of_sequencers; i++) { - perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; - // The +1 allows us to avoid division by zero - } - - out << "ruby_cycles_executed: " << perProcCycleCount << endl; - - out << endl; - - if (!short_stats) { - out << "Busy Controller Counts:" << endl; - for(int i=0; i < MachineType_NUM; i++) { - for(int j=0; j < MachineType_base_count((MachineType)i); j++) { - MachineID machID; - machID.type = (MachineType)i; - machID.num = j; - out << machID << ":" << m_busyControllerCount[i][j] << " "; - if ((j+1)%8 == 0) { - out << endl; - } - } - out << endl; - } - out << endl; - - out << "Busy Bank Count:" << m_busyBankCount << endl; - out << endl; - - out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl; - out << endl; - } - - if (!short_stats) { - out << "All Non-Zero Cycle Demand Cache Accesses" << endl; - out << "----------------------------------------" << endl; out << "miss_latency: " << m_allMissLatencyHistogram << endl; - for(int i=0; i 0) { - out << "miss_latency_" << RubyRequestType(i) << ": " << m_missLatencyHistograms[i] << endl; - } - } - for(int i=0; i 0) { - out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl; - } - } out << endl; - out << "All Non-Zero Cycle SW Prefetch Requests" << endl; - out << "------------------------------------" << endl; - out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl; - for(int i=0; i 0) { - out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl; - } - } - for(int i=0; i 0) { - out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl; - } - } - out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl; - - if (m_all_sharing_histogram.size() > 0) { - out << "all_sharing: " << m_all_sharing_histogram << endl; - out << "read_sharing: " << m_read_sharing_histogram << endl; - out << "write_sharing: " << m_write_sharing_histogram << endl; - - out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl; - out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl; - out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl; - - int64 total_miss = m_cache_to_cache + m_memory_to_cache; - out << "all_misses: " << total_miss << endl; - out << "cache_to_cache_misses: " << m_cache_to_cache << endl; - out << "memory_to_cache_misses: " << m_memory_to_cache << endl; - out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl; - out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl; - out << endl; - } - - if (m_outstanding_requests.size() > 0) { - out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl; - out << endl; - } - } - - if (!short_stats) { - out << "Request vs. RubySystem State Profile" << endl; - out << "--------------------------------" << endl; - out << endl; - - Vector requestProfileKeys = m_requestProfileMap_ptr->keys(); - requestProfileKeys.sortVector(); - - for(int i=0; ilookup(requestProfileKeys[i]); - double percent = (100.0*double(temp_int))/double(m_requests); - while (requestProfileKeys[i] != "") { - out << setw(10) << string_split(requestProfileKeys[i], ':'); - } - out << setw(11) << temp_int; - out << setw(14) << percent << endl; - } - out << endl; - - out << "filter_action: " << m_filter_action_histogram << endl; - - if (!m_all_instructions) { - m_address_profiler_ptr->printStats(out); - } - if (m_all_instructions) { - m_inst_profiler_ptr->printStats(out); + m_inst_profiler_ptr->printStats(out); } + //g_system_ptr->getNetwork()->printStats(out); + g_eventQueue_ptr->scheduleEvent(this, m_stats_period); +} + +void +Profiler::setPeriodicStatsFile(const string& filename) +{ + cout << "Recording periodic statistics to file '" << filename << "' every " + << m_stats_period << " Ruby cycles" << endl; + + if (m_periodic_output_file_ptr != &cerr) { + delete m_periodic_output_file_ptr; + } + + m_periodic_output_file_ptr = new ofstream(filename.c_str()); + g_eventQueue_ptr->scheduleEvent(this, 1); +} + +void +Profiler::setPeriodicStatsInterval(integer_t period) +{ + cout << "Recording periodic statistics every " << m_stats_period + << " Ruby cycles" << endl; + + m_stats_period = period; + g_eventQueue_ptr->scheduleEvent(this, 1); +} + +void +Profiler::printConfig(ostream& out) const +{ + out << endl; + out << "Profiler Configuration" << endl; + out << "----------------------" << endl; + out << "periodic_stats_period: " << m_stats_period << endl; +} + +void +Profiler::print(ostream& out) const +{ + out << "[Profiler]"; +} + +void +Profiler::printStats(ostream& out, bool short_stats) +{ + out << endl; + if (short_stats) { + out << "SHORT "; + } + out << "Profiler Stats" << endl; + out << "--------------" << endl; + + time_t real_time_current = time(NULL); + double seconds = difftime(real_time_current, m_real_time_start_time); + double minutes = seconds / 60.0; + double hours = minutes / 60.0; + double days = hours / 24.0; + Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start; + + if (!short_stats) { + out << "Elapsed_time_in_seconds: " << seconds << endl; + out << "Elapsed_time_in_minutes: " << minutes << endl; + out << "Elapsed_time_in_hours: " << hours << endl; + out << "Elapsed_time_in_days: " << days << endl; + out << endl; + } + + // print the virtual runtimes as well + struct tms vtime; + times(&vtime); + seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; + minutes = seconds / 60.0; + hours = minutes / 60.0; + days = hours / 24.0; + out << "Virtual_time_in_seconds: " << seconds << endl; + out << "Virtual_time_in_minutes: " << minutes << endl; + out << "Virtual_time_in_hours: " << hours << endl; + out << "Virtual_time_in_days: " << days << endl; + out << endl; + + out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; + out << "Ruby_start_time: " << m_ruby_start << endl; + out << "Ruby_cycles: " << ruby_cycles << endl; + out << endl; + + if (!short_stats) { + out << "mbytes_resident: " << process_memory_resident() << endl; + out << "mbytes_total: " << process_memory_total() << endl; + if (process_memory_total() > 0) { + out << "resident_ratio: " + << process_memory_resident()/process_memory_total() << endl; + } + out << endl; + } + + Vector perProcCycleCount; + perProcCycleCount.setSize(m_num_of_sequencers); + + for (int i = 0; i < m_num_of_sequencers; i++) { + perProcCycleCount[i] = + g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; + // The +1 allows us to avoid division by zero + } + + out << "ruby_cycles_executed: " << perProcCycleCount << endl; + out << endl; - out << "Message Delayed Cycles" << endl; - out << "----------------------" << endl; - out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; - out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl; - for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { - out << " virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl; + + if (!short_stats) { + out << "Busy Controller Counts:" << endl; + for (int i = 0; i < MachineType_NUM; i++) { + int size = MachineType_base_count((MachineType)i); + for (int j = 0; j < size; j++) { + MachineID machID; + machID.type = (MachineType)i; + machID.num = j; + out << machID << ":" << m_busyControllerCount[i][j] << " "; + if ((j + 1) % 8 == 0) { + out << endl; + } + } + out << endl; + } + out << endl; + + out << "Busy Bank Count:" << m_busyBankCount << endl; + out << endl; + + out << "sequencer_requests_outstanding: " + << m_sequencer_requests << endl; + out << endl; } - printResourceUsage(out); - } + if (!short_stats) { + out << "All Non-Zero Cycle Demand Cache Accesses" << endl; + out << "----------------------------------------" << endl; + out << "miss_latency: " << m_allMissLatencyHistogram << endl; + for (int i = 0; i < m_missLatencyHistograms.size(); i++) { + if (m_missLatencyHistograms[i].size() > 0) { + out << "miss_latency_" << RubyRequestType(i) << ": " + << m_missLatencyHistograms[i] << endl; + } + } + for (int i = 0; i < m_machLatencyHistograms.size(); i++) { + if (m_machLatencyHistograms[i].size() > 0) { + out << "miss_latency_" << GenericMachineType(i) << ": " + << m_machLatencyHistograms[i] << endl; + } + } + out << endl; + + out << "All Non-Zero Cycle SW Prefetch Requests" << endl; + out << "------------------------------------" << endl; + out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl; + for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) { + if (m_SWPrefetchLatencyHistograms[i].size() > 0) { + out << "prefetch_latency_" << CacheRequestType(i) << ": " + << m_SWPrefetchLatencyHistograms[i] << endl; + } + } + for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) { + if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) { + out << "prefetch_latency_" << GenericMachineType(i) << ": " + << m_SWPrefetchMachLatencyHistograms[i] << endl; + } + } + out << "prefetch_latency_L2Miss:" + << m_SWPrefetchL2MissLatencyHistogram << endl; + + if (m_all_sharing_histogram.size() > 0) { + out << "all_sharing: " << m_all_sharing_histogram << endl; + out << "read_sharing: " << m_read_sharing_histogram << endl; + out << "write_sharing: " << m_write_sharing_histogram << endl; + + out << "all_sharing_percent: "; + m_all_sharing_histogram.printPercent(out); + out << endl; + + out << "read_sharing_percent: "; + m_read_sharing_histogram.printPercent(out); + out << endl; + + out << "write_sharing_percent: "; + m_write_sharing_histogram.printPercent(out); + out << endl; + + int64 total_miss = m_cache_to_cache + m_memory_to_cache; + out << "all_misses: " << total_miss << endl; + out << "cache_to_cache_misses: " << m_cache_to_cache << endl; + out << "memory_to_cache_misses: " << m_memory_to_cache << endl; + out << "cache_to_cache_percent: " + << 100.0 * (double(m_cache_to_cache) / double(total_miss)) + << endl; + out << "memory_to_cache_percent: " + << 100.0 * (double(m_memory_to_cache) / double(total_miss)) + << endl; + out << endl; + } + + if (m_outstanding_requests.size() > 0) { + out << "outstanding_requests: "; + m_outstanding_requests.printPercent(out); + out << endl; + out << endl; + } + } + + if (!short_stats) { + out << "Request vs. RubySystem State Profile" << endl; + out << "--------------------------------" << endl; + out << endl; + + Vector requestProfileKeys = m_requestProfileMap_ptr->keys(); + requestProfileKeys.sortVector(); + + for (int i = 0; i < requestProfileKeys.size(); i++) { + int temp_int = + m_requestProfileMap_ptr->lookup(requestProfileKeys[i]); + double percent = (100.0 * double(temp_int)) / double(m_requests); + while (requestProfileKeys[i] != "") { + out << setw(10) << string_split(requestProfileKeys[i], ':'); + } + out << setw(11) << temp_int; + out << setw(14) << percent << endl; + } + out << endl; + + out << "filter_action: " << m_filter_action_histogram << endl; + + if (!m_all_instructions) { + m_address_profiler_ptr->printStats(out); + } + + if (m_all_instructions) { + m_inst_profiler_ptr->printStats(out); + } + + out << endl; + out << "Message Delayed Cycles" << endl; + out << "----------------------" << endl; + out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; + out << "Total_nonPF_delay_cycles: " + << m_delayedCyclesNonPFHistogram << endl; + for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { + out << " virtual_network_" << i << "_delay_cycles: " + << m_delayedCyclesVCHistograms[i] << endl; + } + + printResourceUsage(out); + } } -void Profiler::printResourceUsage(ostream& out) const +void +Profiler::printResourceUsage(ostream& out) const { - out << endl; - out << "Resource Usage" << endl; - out << "--------------" << endl; + out << endl; + out << "Resource Usage" << endl; + out << "--------------" << endl; - integer_t pagesize = getpagesize(); // page size in bytes - out << "page_size: " << pagesize << endl; + integer_t pagesize = getpagesize(); // page size in bytes + out << "page_size: " << pagesize << endl; - rusage usage; - getrusage (RUSAGE_SELF, &usage); + rusage usage; + getrusage (RUSAGE_SELF, &usage); - out << "user_time: " << usage.ru_utime.tv_sec << endl; - out << "system_time: " << usage.ru_stime.tv_sec << endl; - out << "page_reclaims: " << usage.ru_minflt << endl; - out << "page_faults: " << usage.ru_majflt << endl; - out << "swaps: " << usage.ru_nswap << endl; - out << "block_inputs: " << usage.ru_inblock << endl; - out << "block_outputs: " << usage.ru_oublock << endl; + out << "user_time: " << usage.ru_utime.tv_sec << endl; + out << "system_time: " << usage.ru_stime.tv_sec << endl; + out << "page_reclaims: " << usage.ru_minflt << endl; + out << "page_faults: " << usage.ru_majflt << endl; + out << "swaps: " << usage.ru_nswap << endl; + out << "block_inputs: " << usage.ru_inblock << endl; + out << "block_outputs: " << usage.ru_oublock << endl; } -void Profiler::clearStats() +void +Profiler::clearStats() { - m_ruby_start = g_eventQueue_ptr->getTime(); + m_ruby_start = g_eventQueue_ptr->getTime(); - m_cycles_executed_at_start.setSize(m_num_of_sequencers); - for (int i=0; i < m_num_of_sequencers; i++) { - if (g_system_ptr == NULL) { - m_cycles_executed_at_start[i] = 0; + m_cycles_executed_at_start.setSize(m_num_of_sequencers); + for (int i = 0; i < m_num_of_sequencers; i++) { + if (g_system_ptr == NULL) { + m_cycles_executed_at_start[i] = 0; + } else { + m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); + } + } + + m_busyControllerCount.setSize(MachineType_NUM); // all machines + for (int i = 0; i < MachineType_NUM; i++) { + int size = MachineType_base_count((MachineType)i); + m_busyControllerCount[i].setSize(size); + for (int j = 0; j < size; j++) { + m_busyControllerCount[i][j] = 0; + } + } + m_busyBankCount = 0; + + m_delayedCyclesHistogram.clear(); + m_delayedCyclesNonPFHistogram.clear(); + int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks(); + m_delayedCyclesVCHistograms.setSize(size); + for (int i = 0; i < size; i++) { + m_delayedCyclesVCHistograms[i].clear(); + } + + m_missLatencyHistograms.setSize(RubyRequestType_NUM); + for (int i = 0; i < m_missLatencyHistograms.size(); i++) { + m_missLatencyHistograms[i].clear(200); + } + m_machLatencyHistograms.setSize(GenericMachineType_NUM+1); + for (int i = 0; i < m_machLatencyHistograms.size(); i++) { + m_machLatencyHistograms[i].clear(200); + } + m_allMissLatencyHistogram.clear(200); + + m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM); + for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) { + m_SWPrefetchLatencyHistograms[i].clear(200); + } + m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1); + for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) { + m_SWPrefetchMachLatencyHistograms[i].clear(200); + } + m_allSWPrefetchLatencyHistogram.clear(200); + + m_sequencer_requests.clear(); + m_read_sharing_histogram.clear(); + m_write_sharing_histogram.clear(); + m_all_sharing_histogram.clear(); + m_cache_to_cache = 0; + m_memory_to_cache = 0; + + // clear HashMaps + m_requestProfileMap_ptr->clear(); + + // count requests profiled + m_requests = 0; + + m_outstanding_requests.clear(); + m_outstanding_persistent_requests.clear(); + + // Flush the prefetches through the system - used so that there + // are no outstanding requests after stats are cleared + //g_eventQueue_ptr->triggerAllEvents(); + + // update the start time + m_ruby_start = g_eventQueue_ptr->getTime(); +} + +void +Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) +{ + if (msg.getType() != CacheRequestType_IFETCH) { + // Note: The following line should be commented out if you + // want to use the special profiling that is part of the GS320 + // protocol + + // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be + // profiled by the AddressProfiler + m_address_profiler_ptr-> + addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), + msg.getType(), msg.getAccessMode(), id, false); + } +} + +void +Profiler::profileSharing(const Address& addr, AccessType type, + NodeID requestor, const Set& sharers, + const Set& owner) +{ + Set set_contacted(owner); + if (type == AccessType_Write) { + set_contacted.addSet(sharers); + } + set_contacted.remove(requestor); + int number_contacted = set_contacted.count(); + + if (type == AccessType_Write) { + m_write_sharing_histogram.add(number_contacted); } else { - m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); + m_read_sharing_histogram.add(number_contacted); } - } + m_all_sharing_histogram.add(number_contacted); - m_busyControllerCount.setSize(MachineType_NUM); // all machines - for(int i=0; i < MachineType_NUM; i++) { - m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i)); - for(int j=0; j < MachineType_base_count((MachineType)i); j++) { - m_busyControllerCount[i][j] = 0; + if (number_contacted == 0) { + m_memory_to_cache++; + } else { + m_cache_to_cache++; } - } - m_busyBankCount = 0; - - m_delayedCyclesHistogram.clear(); - m_delayedCyclesNonPFHistogram.clear(); - m_delayedCyclesVCHistograms.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks()); - for (int i = 0; i < RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i++) { - m_delayedCyclesVCHistograms[i].clear(); - } - - m_missLatencyHistograms.setSize(RubyRequestType_NUM); - for(int i=0; iclear(); - - // count requests profiled - m_requests = 0; - - m_outstanding_requests.clear(); - m_outstanding_persistent_requests.clear(); - - // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared - //g_eventQueue_ptr->triggerAllEvents(); - - // update the start time - m_ruby_start = g_eventQueue_ptr->getTime(); } -void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) +void +Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) { - if (msg.getType() != CacheRequestType_IFETCH) { - - // Note: The following line should be commented out if you want to - // use the special profiling that is part of the GS320 protocol - - // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler - m_address_profiler_ptr->addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false); - } -} - -void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner) -{ - Set set_contacted(owner); - if (type == AccessType_Write) { - set_contacted.addSet(sharers); - } - set_contacted.remove(requestor); - int number_contacted = set_contacted.count(); - - if (type == AccessType_Write) { - m_write_sharing_histogram.add(number_contacted); - } else { - m_read_sharing_histogram.add(number_contacted); - } - m_all_sharing_histogram.add(number_contacted); - - if (number_contacted == 0) { - m_memory_to_cache++; - } else { - m_cache_to_cache++; - } - -} - -void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) { - assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); - m_delayedCyclesHistogram.add(delayCycles); - m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); - if (virtualNetwork != 0) { - m_delayedCyclesNonPFHistogram.add(delayCycles); - } + assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); + m_delayedCyclesHistogram.add(delayCycles); + m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); + if (virtualNetwork != 0) { + m_delayedCyclesNonPFHistogram.add(delayCycles); + } } // profiles original cache requests including PUTs -void Profiler::profileRequest(const string& requestStr) +void +Profiler::profileRequest(const string& requestStr) { - m_requests++; + m_requests++; - if (m_requestProfileMap_ptr->exist(requestStr)) { - (m_requestProfileMap_ptr->lookup(requestStr))++; - } else { - m_requestProfileMap_ptr->add(requestStr, 1); - } + if (m_requestProfileMap_ptr->exist(requestStr)) { + (m_requestProfileMap_ptr->lookup(requestStr))++; + } else { + m_requestProfileMap_ptr->add(requestStr, 1); + } } -void Profiler::controllerBusy(MachineID machID) +void +Profiler::controllerBusy(MachineID machID) { - m_busyControllerCount[(int)machID.type][(int)machID.num]++; + m_busyControllerCount[(int)machID.type][(int)machID.num]++; } -void Profiler::profilePFWait(Time waitTime) +void +Profiler::profilePFWait(Time waitTime) { - m_prefetchWaitHistogram.add(waitTime); + m_prefetchWaitHistogram.add(waitTime); } -void Profiler::bankBusy() +void +Profiler::bankBusy() { - m_busyBankCount++; + m_busyBankCount++; } // non-zero cycle demand request -void Profiler::missLatency(Time t, RubyRequestType type) +void +Profiler::missLatency(Time t, RubyRequestType type) { - m_allMissLatencyHistogram.add(t); - m_missLatencyHistograms[type].add(t); + m_allMissLatencyHistogram.add(t); + m_missLatencyHistograms[type].add(t); } // non-zero cycle prefetch request -void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach) +void +Profiler::swPrefetchLatency(Time t, CacheRequestType type, + GenericMachineType respondingMach) { - m_allSWPrefetchLatencyHistogram.add(t); - m_SWPrefetchLatencyHistograms[type].add(t); - m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); - if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { - m_SWPrefetchL2MissLatencyHistogram.add(t); - } + m_allSWPrefetchLatencyHistogram.add(t); + m_SWPrefetchLatencyHistograms[type].add(t); + m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); + if (respondingMach == GenericMachineType_Directory || + respondingMach == GenericMachineType_NUM) { + m_SWPrefetchL2MissLatencyHistogram.add(t); + } } -void Profiler::profileTransition(const string& component, NodeID version, Address addr, - const string& state, const string& event, - const string& next_state, const string& note) +void +Profiler::profileTransition(const string& component, NodeID version, + Address addr, const string& state, const string& event, + const string& next_state, const string& note) { - const int EVENT_SPACES = 20; - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - const int COMP_SPACES = 10; - const int STATE_SPACES = 6; + const int EVENT_SPACES = 20; + const int ID_SPACES = 3; + const int TIME_SPACES = 7; + const int COMP_SPACES = 10; + const int STATE_SPACES = 6; - if ((g_debug_ptr->getDebugTime() > 0) && - (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) { - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << version << " "; - (* debug_cout_ptr) << setw(COMP_SPACES) << component; - (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " "; + if (g_debug_ptr->getDebugTime() <= 0 || + g_eventQueue_ptr->getTime() < g_debug_ptr->getDebugTime()) + return; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(STATE_SPACES) << state; - (* debug_cout_ptr) << ">"; - (* debug_cout_ptr).flags(ios::left); - (* debug_cout_ptr) << setw(STATE_SPACES) << next_state; + ostream &out = *debug_cout_ptr; + out.flags(ios::right); + out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; + out << setw(ID_SPACES) << version << " "; + out << setw(COMP_SPACES) << component; + out << setw(EVENT_SPACES) << event << " "; - (* debug_cout_ptr) << " " << addr << " " << note; + out.flags(ios::right); + out << setw(STATE_SPACES) << state; + out << ">"; + out.flags(ios::left); + out << setw(STATE_SPACES) << next_state; - (* debug_cout_ptr) << endl; - } + out << " " << addr << " " << note; + + out << endl; } // Helper function -static double process_memory_total() +static double +process_memory_total() { - const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, - ifstream proc_file; - proc_file.open("/proc/self/statm"); - int total_size_in_pages = 0; - int res_size_in_pages = 0; - proc_file >> total_size_in_pages; - proc_file >> res_size_in_pages; - return double(total_size_in_pages)*MULTIPLIER; // size in megabytes + // 4kB page size, 1024*1024 bytes per MB, + const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); + ifstream proc_file; + proc_file.open("/proc/self/statm"); + int total_size_in_pages = 0; + int res_size_in_pages = 0; + proc_file >> total_size_in_pages; + proc_file >> res_size_in_pages; + return double(total_size_in_pages) * MULTIPLIER; // size in megabytes } -static double process_memory_resident() +static double +process_memory_resident() { - const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, - ifstream proc_file; - proc_file.open("/proc/self/statm"); - int total_size_in_pages = 0; - int res_size_in_pages = 0; - proc_file >> total_size_in_pages; - proc_file >> res_size_in_pages; - return double(res_size_in_pages)*MULTIPLIER; // size in megabytes + // 4kB page size, 1024*1024 bytes per MB, + const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); + ifstream proc_file; + proc_file.open("/proc/self/statm"); + int total_size_in_pages = 0; + int res_size_in_pages = 0; + proc_file >> total_size_in_pages; + proc_file >> res_size_in_pages; + return double(res_size_in_pages) * MULTIPLIER; // size in megabytes } -void Profiler::rubyWatch(int id){ +void +Profiler::rubyWatch(int id) +{ uint64 tr = 0; Address watch_address = Address(tr); const int ID_SPACES = 3; const int TIME_SPACES = 7; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << id << " " - << "RUBY WATCH " - << watch_address - << endl; + ostream &out = *debug_cout_ptr; - if(!m_watch_address_list_ptr->exist(watch_address)){ - m_watch_address_list_ptr->add(watch_address, 1); + out.flags(ios::right); + out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; + out << setw(ID_SPACES) << id << " " + << "RUBY WATCH " << watch_address << endl; + + if (!m_watch_address_list_ptr->exist(watch_address)) { + m_watch_address_list_ptr->add(watch_address, 1); } } -bool Profiler::watchAddress(Address addr){ +bool +Profiler::watchAddress(Address addr) +{ if (m_watch_address_list_ptr->exist(addr)) - return true; + return true; else - return false; + return false; } Profiler * diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 3ae1f5e31..bf4bf8a50 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -42,35 +42,24 @@ ---------------------------------------------------------------------- */ -/* - * Profiler.hh - * - * Description: - * - * $Id$ - * - */ +#ifndef __MEM_RUBY_PROFILER_PROFILER_HH__ +#define __MEM_RUBY_PROFILER_PROFILER_HH__ -#ifndef PROFILER_H -#define PROFILER_H - -#include "mem/ruby/libruby.hh" - -#include "mem/ruby/common/Global.hh" -#include "mem/protocol/GenericMachineType.hh" -#include "mem/ruby/common/Histogram.hh" -#include "mem/ruby/common/Consumer.hh" #include "mem/protocol/AccessModeType.hh" #include "mem/protocol/AccessType.hh" -#include "mem/ruby/system/NodeID.hh" -#include "mem/ruby/system/MachineID.hh" +#include "mem/protocol/CacheRequestType.hh" +#include "mem/protocol/GenericMachineType.hh" +#include "mem/protocol/GenericRequestType.hh" #include "mem/protocol/PrefetchBit.hh" #include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Consumer.hh" +#include "mem/ruby/common/Global.hh" +#include "mem/ruby/common/Histogram.hh" #include "mem/ruby/common/Set.hh" -#include "mem/protocol/CacheRequestType.hh" -#include "mem/protocol/GenericRequestType.hh" +#include "mem/ruby/libruby.hh" +#include "mem/ruby/system/MachineID.hh" #include "mem/ruby/system/MemoryControl.hh" - +#include "mem/ruby/system/NodeID.hh" #include "params/RubyProfiler.hh" #include "sim/sim_object.hh" @@ -79,155 +68,165 @@ class AddressProfiler; template class Map; -class Profiler : public SimObject, public Consumer { -public: - // Constructors +class Profiler : public SimObject, public Consumer +{ + public: typedef RubyProfilerParams Params; - Profiler(const Params *); + Profiler(const Params *); + ~Profiler(); - // Destructor - ~Profiler(); + void wakeup(); - // Public Methods - void wakeup(); + void setPeriodicStatsFile(const string& filename); + void setPeriodicStatsInterval(integer_t period); - void setPeriodicStatsFile(const string& filename); - void setPeriodicStatsInterval(integer_t period); + void printStats(ostream& out, bool short_stats=false); + void printShortStats(ostream& out) { printStats(out, true); } + void printTraceStats(ostream& out) const; + void clearStats(); + void printConfig(ostream& out) const; + void printResourceUsage(ostream& out) const; - void printStats(ostream& out, bool short_stats=false); - void printShortStats(ostream& out) { printStats(out, true); } - void printTraceStats(ostream& out) const; - void clearStats(); - void printConfig(ostream& out) const; - void printResourceUsage(ostream& out) const; + AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } + AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } - AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } - AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } + void addAddressTraceSample(const CacheMsg& msg, NodeID id); - void addAddressTraceSample(const CacheMsg& msg, NodeID id); + void profileRequest(const string& requestStr); + void profileSharing(const Address& addr, AccessType type, + NodeID requestor, const Set& sharers, + const Set& owner); - void profileRequest(const string& requestStr); - void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner); + void profileMulticastRetry(const Address& addr, int count); - void profileMulticastRetry(const Address& addr, int count); + void profileFilterAction(int action); - void profileFilterAction(int action); + void profileConflictingRequests(const Address& addr); - void profileConflictingRequests(const Address& addr); - void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); } - void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } - void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } + void + profileOutstandingRequest(int outstanding) + { + m_outstanding_requests.add(outstanding); + } - void recordPrediction(bool wasGood, bool wasPredicted); + void + profileOutstandingPersistentRequest(int outstanding) + { + m_outstanding_persistent_requests.add(outstanding); + } - void startTransaction(int cpu); - void endTransaction(int cpu); - void profilePFWait(Time waitTime); + void + profileAverageLatencyEstimate(int latency) + { + m_average_latency_estimate.add(latency); + } - void controllerBusy(MachineID machID); - void bankBusy(); - void missLatency(Time t, RubyRequestType type); - void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); - void sequencerRequests(int num) { m_sequencer_requests.add(num); } + void recordPrediction(bool wasGood, bool wasPredicted); - void profileTransition(const string& component, NodeID version, Address addr, - const string& state, const string& event, - const string& next_state, const string& note); - void profileMsgDelay(int virtualNetwork, int delayCycles); + void startTransaction(int cpu); + void endTransaction(int cpu); + void profilePFWait(Time waitTime); - void print(ostream& out) const; + void controllerBusy(MachineID machID); + void bankBusy(); + void missLatency(Time t, RubyRequestType type); + void swPrefetchLatency(Time t, CacheRequestType type, + GenericMachineType respondingMach); + void sequencerRequests(int num) { m_sequencer_requests.add(num); } - void rubyWatch(int proc); - bool watchAddress(Address addr); + void profileTransition(const string& component, NodeID version, + Address addr, const string& state, + const string& event, const string& next_state, + const string& note); + void profileMsgDelay(int virtualNetwork, int delayCycles); - // return Ruby's start time - Time getRubyStartTime(){ - return m_ruby_start; - } + void print(ostream& out) const; - //added by SS - bool getHotLines() { return m_hot_lines; } - bool getAllInstructions() { return m_all_instructions; } + void rubyWatch(int proc); + bool watchAddress(Address addr); -private: + // return Ruby's start time + Time + getRubyStartTime() + { + return m_ruby_start; + } - // Private copy constructor and assignment operator - Profiler(const Profiler& obj); - Profiler& operator=(const Profiler& obj); + // added by SS + bool getHotLines() { return m_hot_lines; } + bool getAllInstructions() { return m_all_instructions; } - // Data Members (m_ prefix) - AddressProfiler* m_address_profiler_ptr; - AddressProfiler* m_inst_profiler_ptr; + private: + // Private copy constructor and assignment operator + Profiler(const Profiler& obj); + Profiler& operator=(const Profiler& obj); - Vector m_instructions_executed_at_start; - Vector m_cycles_executed_at_start; + AddressProfiler* m_address_profiler_ptr; + AddressProfiler* m_inst_profiler_ptr; - ostream* m_periodic_output_file_ptr; - integer_t m_stats_period; + Vector m_instructions_executed_at_start; + Vector m_cycles_executed_at_start; - Time m_ruby_start; - time_t m_real_time_start_time; + ostream* m_periodic_output_file_ptr; + integer_t m_stats_period; - Vector < Vector < integer_t > > m_busyControllerCount; - integer_t m_busyBankCount; - Histogram m_multicast_retry_histogram; + Time m_ruby_start; + time_t m_real_time_start_time; - Histogram m_filter_action_histogram; - Histogram m_tbeProfile; + Vector > m_busyControllerCount; + integer_t m_busyBankCount; + Histogram m_multicast_retry_histogram; - Histogram m_sequencer_requests; - Histogram m_read_sharing_histogram; - Histogram m_write_sharing_histogram; - Histogram m_all_sharing_histogram; - int64 m_cache_to_cache; - int64 m_memory_to_cache; + Histogram m_filter_action_histogram; + Histogram m_tbeProfile; - Histogram m_prefetchWaitHistogram; + Histogram m_sequencer_requests; + Histogram m_read_sharing_histogram; + Histogram m_write_sharing_histogram; + Histogram m_all_sharing_histogram; + int64 m_cache_to_cache; + int64 m_memory_to_cache; - Vector m_missLatencyHistograms; - Vector m_machLatencyHistograms; - Histogram m_allMissLatencyHistogram; + Histogram m_prefetchWaitHistogram; - Histogram m_allSWPrefetchLatencyHistogram; - Histogram m_SWPrefetchL2MissLatencyHistogram; - Vector m_SWPrefetchLatencyHistograms; - Vector m_SWPrefetchMachLatencyHistograms; + Vector m_missLatencyHistograms; + Vector m_machLatencyHistograms; + Histogram m_allMissLatencyHistogram; - Histogram m_delayedCyclesHistogram; - Histogram m_delayedCyclesNonPFHistogram; - Vector m_delayedCyclesVCHistograms; + Histogram m_allSWPrefetchLatencyHistogram; + Histogram m_SWPrefetchL2MissLatencyHistogram; + Vector m_SWPrefetchLatencyHistograms; + Vector m_SWPrefetchMachLatencyHistograms; - Histogram m_outstanding_requests; - Histogram m_outstanding_persistent_requests; + Histogram m_delayedCyclesHistogram; + Histogram m_delayedCyclesNonPFHistogram; + Vector m_delayedCyclesVCHistograms; - Histogram m_average_latency_estimate; + Histogram m_outstanding_requests; + Histogram m_outstanding_persistent_requests; - Map* m_watch_address_list_ptr; - // counts all initiated cache request including PUTs - int m_requests; - Map * m_requestProfileMap_ptr; + Histogram m_average_latency_estimate; - //added by SS - bool m_hot_lines; - bool m_all_instructions; + Map* m_watch_address_list_ptr; + // counts all initiated cache request including PUTs + int m_requests; + Map * m_requestProfileMap_ptr; - int m_num_of_sequencers; + //added by SS + bool m_hot_lines; + bool m_all_instructions; + + int m_num_of_sequencers; }; -// Output operator declaration -ostream& operator<<(ostream& out, const Profiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const Profiler& obj) +inline ostream& +operator<<(ostream& out, const Profiler& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } -#endif //PROFILER_H +#endif // __MEM_RUBY_PROFILER_PROFILER_HH__ diff --git a/src/mem/ruby/profiler/StoreTrace.cc b/src/mem/ruby/profiler/StoreTrace.cc index 4d4e4798d..ce42560b6 100644 --- a/src/mem/ruby/profiler/StoreTrace.cc +++ b/src/mem/ruby/profiler/StoreTrace.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,132 +26,130 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - */ - -#include "mem/ruby/profiler/StoreTrace.hh" #include "mem/ruby/eventqueue/RubyEventQueue.hh" +#include "mem/ruby/profiler/StoreTrace.hh" -bool StoreTrace::s_init = false; // Total number of store lifetimes of all lines -int64 StoreTrace::s_total_samples = 0; // Total number of store lifetimes of all lines +bool StoreTrace::s_init = false; // Total number of store lifetimes of + // all lines +int64 StoreTrace::s_total_samples = 0; // Total number of store + // lifetimes of all lines Histogram* StoreTrace::s_store_count_ptr = NULL; Histogram* StoreTrace::s_store_first_to_stolen_ptr = NULL; Histogram* StoreTrace::s_store_last_to_stolen_ptr = NULL; Histogram* StoreTrace::s_store_first_to_last_ptr = NULL; -StoreTrace::StoreTrace(const Address& addr) : - m_store_count(-1), m_store_first_to_stolen(-1), m_store_last_to_stolen(-1), m_store_first_to_last(-1) +StoreTrace::StoreTrace(const Address& addr) + : m_store_count(-1), m_store_first_to_stolen(-1), + m_store_last_to_stolen(-1), m_store_first_to_last(-1) { - StoreTrace::initSummary(); - m_addr = addr; - m_total_samples = 0; - m_last_writer = -1; // Really -1 isn't valid, so this will trigger the initilization code - m_stores_this_interval = 0; + StoreTrace::initSummary(); + m_addr = addr; + m_total_samples = 0; + + // Really -1 isn't valid, so this will trigger the initilization code + m_last_writer = -1; + m_stores_this_interval = 0; } StoreTrace::~StoreTrace() { } -void StoreTrace::print(ostream& out) const +void +StoreTrace::print(ostream& out) const { - out << m_addr; - out << " total_samples: " << m_total_samples << endl; - out << "store_count: " << m_store_count << endl; - out << "store_first_to_stolen: " << m_store_first_to_stolen << endl; - out << "store_last_to_stolen: " << m_store_last_to_stolen << endl; - out << "store_first_to_last: " << m_store_first_to_last << endl; + out << m_addr + << " total_samples: " << m_total_samples << endl + << "store_count: " << m_store_count << endl + << "store_first_to_stolen: " << m_store_first_to_stolen << endl + << "store_last_to_stolen: " << m_store_last_to_stolen << endl + << "store_first_to_last: " << m_store_first_to_last << endl; } -// Class method -void StoreTrace::initSummary() +void +StoreTrace::initSummary() { - if (!s_init) { + if (!s_init) { + s_total_samples = 0; + s_store_count_ptr = new Histogram(-1); + s_store_first_to_stolen_ptr = new Histogram(-1); + s_store_last_to_stolen_ptr = new Histogram(-1); + s_store_first_to_last_ptr = new Histogram(-1); + } + s_init = true; +} + +void +StoreTrace::printSummary(ostream& out) +{ + out << "total_samples: " << s_total_samples << endl; + out << "store_count: " << (*s_store_count_ptr) << endl; + out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl; + out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl; + out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl; +} + +void +StoreTrace::clearSummary() +{ + StoreTrace::initSummary(); s_total_samples = 0; - s_store_count_ptr = new Histogram(-1); - s_store_first_to_stolen_ptr = new Histogram(-1); - s_store_last_to_stolen_ptr = new Histogram(-1); - s_store_first_to_last_ptr = new Histogram(-1); - } - s_init = true; + s_store_count_ptr->clear(); + s_store_first_to_stolen_ptr->clear(); + s_store_last_to_stolen_ptr->clear(); + s_store_first_to_last_ptr->clear(); } -// Class method -void StoreTrace::printSummary(ostream& out) +void +StoreTrace::store(NodeID node) { - out << "total_samples: " << s_total_samples << endl; - out << "store_count: " << (*s_store_count_ptr) << endl; - out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl; - out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl; - out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl; -} - -// Class method -void StoreTrace::clearSummary() -{ - StoreTrace::initSummary(); - s_total_samples = 0; - s_store_count_ptr->clear(); - s_store_first_to_stolen_ptr->clear(); - s_store_last_to_stolen_ptr->clear(); - s_store_first_to_last_ptr->clear(); -} - -void StoreTrace::store(NodeID node) -{ - Time current = g_eventQueue_ptr->getTime(); - - assert((m_last_writer == -1) || (m_last_writer == node)); - - m_last_writer = node; - if (m_last_writer == -1) { - assert(m_stores_this_interval == 0); - } - - if (m_stores_this_interval == 0) { - // A new proessor just wrote the line, so reset the stats - m_first_store = current; - } - - m_last_store = current; - m_stores_this_interval++; -} - -void StoreTrace::downgrade(NodeID node) -{ - if (node == m_last_writer) { Time current = g_eventQueue_ptr->getTime(); - assert(m_stores_this_interval != 0); - assert(m_last_store != 0); - assert(m_first_store != 0); - assert(m_last_writer != -1); - // Per line stats - m_store_first_to_stolen.add(current - m_first_store); - m_store_count.add(m_stores_this_interval); - m_store_last_to_stolen.add(current - m_last_store); - m_store_first_to_last.add(m_last_store - m_first_store); - m_total_samples++; + assert((m_last_writer == -1) || (m_last_writer == node)); - // Global stats - assert(s_store_first_to_stolen_ptr != NULL); - s_store_first_to_stolen_ptr->add(current - m_first_store); - s_store_count_ptr->add(m_stores_this_interval); - s_store_last_to_stolen_ptr->add(current - m_last_store); - s_store_first_to_last_ptr->add(m_last_store - m_first_store); - s_total_samples++; + m_last_writer = node; + if (m_last_writer == -1) { + assert(m_stores_this_interval == 0); + } - // Initilize for next go round - m_stores_this_interval = 0; - m_last_store = 0; - m_first_store = 0; - m_last_writer = -1; - } + if (m_stores_this_interval == 0) { + // A new proessor just wrote the line, so reset the stats + m_first_store = current; + } + + m_last_store = current; + m_stores_this_interval++; } -bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2) +void +StoreTrace::downgrade(NodeID node) { - return (n1->getTotal() > n2->getTotal()); + if (node == m_last_writer) { + Time current = g_eventQueue_ptr->getTime(); + assert(m_stores_this_interval != 0); + assert(m_last_store != 0); + assert(m_first_store != 0); + assert(m_last_writer != -1); + + // Per line stats + m_store_first_to_stolen.add(current - m_first_store); + m_store_count.add(m_stores_this_interval); + m_store_last_to_stolen.add(current - m_last_store); + m_store_first_to_last.add(m_last_store - m_first_store); + m_total_samples++; + + // Global stats + assert(s_store_first_to_stolen_ptr != NULL); + s_store_first_to_stolen_ptr->add(current - m_first_store); + s_store_count_ptr->add(m_stores_this_interval); + s_store_last_to_stolen_ptr->add(current - m_last_store); + s_store_first_to_last_ptr->add(m_last_store - m_first_store); + s_total_samples++; + + // Initilize for next go round + m_stores_this_interval = 0; + m_last_store = 0; + m_first_store = 0; + m_last_writer = -1; + } } diff --git a/src/mem/ruby/profiler/StoreTrace.hh b/src/mem/ruby/profiler/StoreTrace.hh index 5cdf7ce41..8bddfe6c7 100644 --- a/src/mem/ruby/profiler/StoreTrace.hh +++ b/src/mem/ruby/profiler/StoreTrace.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,82 +26,63 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - * Description: - * - */ +#ifndef __MEM_RUBY_PROFILER_STORETRACE_HH__ +#define __MEM_RUBY_PROFILER_STORETRACE_HH__ -#ifndef StoreTrace_H -#define StoreTrace_H - -#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Histogram.hh" -class StoreTrace { -public: - // Constructors - StoreTrace() { } - explicit StoreTrace(const Address& addr); +class StoreTrace +{ + public: + StoreTrace() { } + explicit StoreTrace(const Address& addr); + ~StoreTrace(); - // Destructor - ~StoreTrace(); + void store(NodeID node); + void downgrade(NodeID node); + int getTotal() const { return m_total_samples; } + static void initSummary(); + static void printSummary(ostream& out); + static void clearSummary(); - // Public Methods - void store(NodeID node); - void downgrade(NodeID node); - int getTotal() const { return m_total_samples; } - static void initSummary(); - static void printSummary(ostream& out); - static void clearSummary(); + void print(ostream& out) const; - void print(ostream& out) const; -private: - // Private Methods + private: + static bool s_init; + static int64 s_total_samples; // Total number of store lifetimes + // of all lines + static Histogram* s_store_count_ptr; + static Histogram* s_store_first_to_stolen_ptr; + static Histogram* s_store_last_to_stolen_ptr; + static Histogram* s_store_first_to_last_ptr; - // Private copy constructor and assignment operator - // StoreTrace(const StoreTrace& obj); - // StoreTrace& operator=(const StoreTrace& obj); + Address m_addr; + NodeID m_last_writer; + Time m_first_store; + Time m_last_store; + int m_stores_this_interval; - // Class Members (s_ prefix) - static bool s_init; - static int64 s_total_samples; // Total number of store lifetimes of all lines - static Histogram* s_store_count_ptr; - static Histogram* s_store_first_to_stolen_ptr; - static Histogram* s_store_last_to_stolen_ptr; - static Histogram* s_store_first_to_last_ptr; - - // Data Members (m_ prefix) - - Address m_addr; - NodeID m_last_writer; - Time m_first_store; - Time m_last_store; - int m_stores_this_interval; - - int64 m_total_samples; // Total number of store lifetimes of this line - Histogram m_store_count; - Histogram m_store_first_to_stolen; - Histogram m_store_last_to_stolen; - Histogram m_store_first_to_last; + int64 m_total_samples; // Total number of store lifetimes of this line + Histogram m_store_count; + Histogram m_store_first_to_stolen; + Histogram m_store_last_to_stolen; + Histogram m_store_first_to_last; }; -bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2); - -// Output operator declaration -ostream& operator<<(ostream& out, const StoreTrace& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const StoreTrace& obj) +inline bool +node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2) { - obj.print(out); - out << flush; - return out; + return n1->getTotal() > n2->getTotal(); } -#endif //StoreTrace_H +inline ostream& +operator<<(ostream& out, const StoreTrace& obj) +{ + obj.print(out); + out << flush; + return out; +} + +#endif // __MEM_RUBY_PROFILER_STORETRACE_HH__ From 6b293c73fd19b73758547e1bfbe38a23d1800747 Mon Sep 17 00:00:00 2001 From: "Timothy M. Jones" Date: Thu, 25 Mar 2010 12:43:52 +0000 Subject: [PATCH 5/5] CPU: Added comments to address translation classes. --- src/cpu/translation.hh | 64 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh index 33e810710..983a748cf 100644 --- a/src/cpu/translation.hh +++ b/src/cpu/translation.hh @@ -35,6 +35,16 @@ #include "sim/tlb.hh" +/** + * This class captures the state of an address translation. A translation + * can be split in two if the ISA supports it and the memory access crosses + * a page boundary. In this case, this class is shared by two data + * translations (below). Otherwise it is used by a single data translation + * class. When each part of the translation is finished, the finish + * function is called which will indicate whether the whole translation is + * completed or not. There are also functions for accessing parts of the + * translation state which deal with the possible split correctly. + */ class WholeTranslationState { protected: @@ -50,7 +60,10 @@ class WholeTranslationState uint64_t *res; BaseTLB::Mode mode; - /** Single translation state. */ + /** + * Single translation state. We set the number of outstanding + * translations to one and indicate that it is not split. + */ WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode) : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL), @@ -60,7 +73,11 @@ class WholeTranslationState assert(mode == BaseTLB::Read || mode == BaseTLB::Write); } - /** Split translation state. */ + /** + * Split translation state. We copy all state into this class, set the + * number of outstanding translations to two and then mark this as a + * split translation. + */ WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow, RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode) @@ -71,6 +88,13 @@ class WholeTranslationState assert(mode == BaseTLB::Read || mode == BaseTLB::Write); } + /** + * Finish part of a translation. If there is only one request then this + * translation is completed. If the request has been split in two then + * the outstanding count determines whether the translation is complete. + * In this case, flags from the split request are copied to the main + * request to make it easier to access them later on. + */ bool finish(Fault fault, int index) { @@ -89,6 +113,10 @@ class WholeTranslationState return outstanding == 0; } + /** + * Determine whether this translation produced a fault. Both parts of the + * translation must be checked if this is a split translation. + */ Fault getFault() const { @@ -102,36 +130,54 @@ class WholeTranslationState return NoFault; } + /** Remove all faults from the translation. */ void setNoFault() { faults[0] = faults[1] = NoFault; } + /** + * Check if this request is uncacheable. We only need to check the main + * request because the flags will have been copied here on a split + * translation. + */ bool isUncacheable() const { return mainReq->isUncacheable(); } + /** + * Check if this request is a prefetch. We only need to check the main + * request because the flags will have been copied here on a split + * translation. + */ bool isPrefetch() const { return mainReq->isPrefetch(); } + /** Get the physical address of this request. */ Addr getPaddr() const { return mainReq->getPaddr(); } + /** + * Get the flags associated with this request. We only need to access + * the main request because the flags will have been copied here on a + * split translation. + */ unsigned getFlags() { return mainReq->getFlags(); } + /** Delete all requests that make up this translation. */ void deleteReqs() { @@ -143,6 +189,16 @@ class WholeTranslationState } }; + +/** + * This class represents part of a data address translation. All state for + * the translation is held in WholeTranslationState (above). Therefore this + * class does not need to know whether the translation is split or not. The + * index variable determines this but is simply passed on to the state class. + * When this part of the translation is completed, finish is called. If the + * translation state class indicate that the whole translation is complete + * then the execution context is informed. + */ template class DataTranslation : public BaseTLB::Translation { @@ -163,6 +219,10 @@ class DataTranslation : public BaseTLB::Translation { } + /** + * Finish this part of the translation and indicate that the whole + * translation is complete if the state says so. + */ void finish(Fault fault, RequestPtr req, ThreadContext *tc, BaseTLB::Mode mode)