1961a942f3
the GPUCoalescer code is used in the ruby profiler regardless of whether or not the coalescer code has been compiled, which can lead to link/run time errors. here we add #ifdefs to guard the usage of GPUCoalescer code. eventually we should refactor this code to use probe points.
506 lines
20 KiB
C++
506 lines
20 KiB
C++
/*
|
|
* Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
This file has been modified by Kevin Moore and Dan Nussbaum of the
|
|
Scalable Systems Research Group at Sun Microsystems Laboratories
|
|
(http://research.sun.com/scalable/) to support the Adaptive
|
|
Transactional Memory Test Platform (ATMTP).
|
|
|
|
Please send email to atmtp-interest@sun.com with feedback, questions, or
|
|
to request future announcements about ATMTP.
|
|
|
|
----------------------------------------------------------------------
|
|
|
|
File modification date: 2008-02-23
|
|
|
|
----------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "mem/ruby/profiler/Profiler.hh"
|
|
|
|
#include <sys/types.h>
|
|
#include <unistd.h>
|
|
|
|
#include <algorithm>
|
|
#include <fstream>
|
|
|
|
#include "base/stl_helpers.hh"
|
|
#include "base/str.hh"
|
|
#include "mem/protocol/MachineType.hh"
|
|
#include "mem/protocol/RubyRequest.hh"
|
|
#include "mem/ruby/network/Network.hh"
|
|
#include "mem/ruby/profiler/AddressProfiler.hh"
|
|
|
|
/**
|
|
* the profiler uses GPUCoalescer code even
|
|
* though the GPUCoalescer is not built for
|
|
* all ISAs, which can lead to run/link time
|
|
* errors. here we guard the coalescer code
|
|
* with ifdefs as there is no easy way to
|
|
* refactor this code without removing
|
|
* GPUCoalescer stats from the profiler.
|
|
*
|
|
* eventually we should use probe points
|
|
* here, but until then these ifdefs will
|
|
* serve.
|
|
*/
|
|
#ifdef BUILD_GPU
|
|
#include "mem/ruby/system/GPUCoalescer.hh"
|
|
#endif
|
|
|
|
#include "mem/ruby/system/Sequencer.hh"
|
|
|
|
using namespace std;
|
|
using m5::stl_helpers::operator<<;
|
|
|
|
Profiler::Profiler(const RubySystemParams *p, RubySystem *rs)
|
|
: m_ruby_system(rs), m_hot_lines(p->hot_lines),
|
|
m_all_instructions(p->all_instructions),
|
|
m_num_vnets(p->number_of_virtual_networks)
|
|
{
|
|
m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this);
|
|
m_address_profiler_ptr->setHotLines(m_hot_lines);
|
|
m_address_profiler_ptr->setAllInstructions(m_all_instructions);
|
|
|
|
if (m_all_instructions) {
|
|
m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this);
|
|
m_inst_profiler_ptr->setHotLines(m_hot_lines);
|
|
m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
|
|
}
|
|
}
|
|
|
|
Profiler::~Profiler()
|
|
{
|
|
}
|
|
|
|
void
|
|
Profiler::regStats(const std::string &pName)
|
|
{
|
|
if (!m_all_instructions) {
|
|
m_address_profiler_ptr->regStats(pName);
|
|
}
|
|
|
|
if (m_all_instructions) {
|
|
m_inst_profiler_ptr->regStats(pName);
|
|
}
|
|
|
|
delayHistogram
|
|
.init(10)
|
|
.name(pName + ".delayHist")
|
|
.desc("delay histogram for all message")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
for (int i = 0; i < m_num_vnets; i++) {
|
|
delayVCHistogram.push_back(new Stats::Histogram());
|
|
delayVCHistogram[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".delayVCHist.vnet_%i", i))
|
|
.desc(csprintf("delay histogram for vnet_%i", i))
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
}
|
|
|
|
m_outstandReqHistSeqr
|
|
.init(10)
|
|
.name(pName + ".outstanding_req_hist_seqr")
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_outstandReqHistCoalsr
|
|
.init(10)
|
|
.name(pName + ".outstanding_req_hist_coalsr")
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_latencyHistSeqr
|
|
.init(10)
|
|
.name(pName + ".latency_hist_seqr")
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_latencyHistCoalsr
|
|
.init(10)
|
|
.name(pName + ".latency_hist_coalsr")
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_hitLatencyHistSeqr
|
|
.init(10)
|
|
.name(pName + ".hit_latency_hist_seqr")
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_missLatencyHistSeqr
|
|
.init(10)
|
|
.name(pName + ".miss_latency_hist_seqr")
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_missLatencyHistCoalsr
|
|
.init(10)
|
|
.name(pName + ".miss_latency_hist_coalsr")
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
for (int i = 0; i < RubyRequestType_NUM; i++) {
|
|
m_typeLatencyHistSeqr.push_back(new Stats::Histogram());
|
|
m_typeLatencyHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.latency_hist_seqr",
|
|
RubyRequestType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_typeLatencyHistCoalsr.push_back(new Stats::Histogram());
|
|
m_typeLatencyHistCoalsr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.latency_hist_coalsr",
|
|
RubyRequestType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram());
|
|
m_hitTypeLatencyHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.hit_latency_hist_seqr",
|
|
RubyRequestType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram());
|
|
m_missTypeLatencyHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.miss_latency_hist_seqr",
|
|
RubyRequestType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram());
|
|
m_missTypeLatencyHistCoalsr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.miss_latency_hist_coalsr",
|
|
RubyRequestType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
}
|
|
|
|
for (int i = 0; i < MachineType_NUM; i++) {
|
|
m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram());
|
|
m_hitMachLatencyHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.hit_mach_latency_hist_seqr",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_missMachLatencyHistSeqr.push_back(new Stats::Histogram());
|
|
m_missMachLatencyHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.miss_mach_latency_hist_seqr",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram());
|
|
m_missMachLatencyHistCoalsr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram());
|
|
m_IssueToInitialDelayHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(
|
|
".%s.miss_latency_hist_seqr.issue_to_initial_request",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram());
|
|
m_IssueToInitialDelayHistCoalsr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(
|
|
".%s.miss_latency_hist_coalsr.issue_to_initial_request",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram());
|
|
m_InitialToForwardDelayHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram());
|
|
m_InitialToForwardDelayHistCoalsr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram());
|
|
m_ForwardToFirstResponseDelayHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(
|
|
".%s.miss_latency_hist_seqr.forward_to_first_response",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram());
|
|
m_ForwardToFirstResponseDelayHistCoalsr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(
|
|
".%s.miss_latency_hist_coalsr.forward_to_first_response",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram());
|
|
m_FirstResponseToCompletionDelayHistSeqr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(
|
|
".%s.miss_latency_hist_seqr.first_response_to_completion",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram());
|
|
m_FirstResponseToCompletionDelayHistCoalsr[i]
|
|
->init(10)
|
|
.name(pName + csprintf(
|
|
".%s.miss_latency_hist_coalsr.first_response_to_completion",
|
|
MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_IncompleteTimesSeqr[i]
|
|
.name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i)))
|
|
.desc("")
|
|
.flags(Stats::nozero);
|
|
}
|
|
|
|
for (int i = 0; i < RubyRequestType_NUM; i++) {
|
|
m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
|
|
m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
|
|
m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>());
|
|
|
|
for (int j = 0; j < MachineType_NUM; j++) {
|
|
m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
|
|
m_hitTypeMachLatencyHistSeqr[i][j]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr",
|
|
RubyRequestType(i), MachineType(j)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
|
|
m_missTypeMachLatencyHistSeqr[i][j]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr",
|
|
RubyRequestType(i), MachineType(j)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
|
|
m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram());
|
|
m_missTypeMachLatencyHistCoalsr[i][j]
|
|
->init(10)
|
|
.name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr",
|
|
RubyRequestType(i), MachineType(j)))
|
|
.desc("")
|
|
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Profiler::collateStats()
|
|
{
|
|
if (!m_all_instructions) {
|
|
m_address_profiler_ptr->collateStats();
|
|
}
|
|
|
|
if (m_all_instructions) {
|
|
m_inst_profiler_ptr->collateStats();
|
|
}
|
|
|
|
for (uint32_t i = 0; i < MachineType_NUM; i++) {
|
|
for (map<uint32_t, AbstractController*>::iterator it =
|
|
m_ruby_system->m_abstract_controls[i].begin();
|
|
it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
|
|
|
|
AbstractController *ctr = (*it).second;
|
|
delayHistogram.add(ctr->getDelayHist());
|
|
|
|
for (uint32_t i = 0; i < m_num_vnets; i++) {
|
|
delayVCHistogram[i]->add(ctr->getDelayVCHist(i));
|
|
}
|
|
}
|
|
}
|
|
|
|
for (uint32_t i = 0; i < MachineType_NUM; i++) {
|
|
for (map<uint32_t, AbstractController*>::iterator it =
|
|
m_ruby_system->m_abstract_controls[i].begin();
|
|
it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
|
|
|
|
AbstractController *ctr = (*it).second;
|
|
Sequencer *seq = ctr->getCPUSequencer();
|
|
if (seq != NULL) {
|
|
m_outstandReqHistSeqr.add(seq->getOutstandReqHist());
|
|
}
|
|
#ifdef BUILD_GPU
|
|
GPUCoalescer *coal = ctr->getGPUCoalescer();
|
|
if (coal != NULL) {
|
|
m_outstandReqHistCoalsr.add(coal->getOutstandReqHist());
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
for (uint32_t i = 0; i < MachineType_NUM; i++) {
|
|
for (map<uint32_t, AbstractController*>::iterator it =
|
|
m_ruby_system->m_abstract_controls[i].begin();
|
|
it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
|
|
|
|
AbstractController *ctr = (*it).second;
|
|
Sequencer *seq = ctr->getCPUSequencer();
|
|
if (seq != NULL) {
|
|
// add all the latencies
|
|
m_latencyHistSeqr.add(seq->getLatencyHist());
|
|
m_hitLatencyHistSeqr.add(seq->getHitLatencyHist());
|
|
m_missLatencyHistSeqr.add(seq->getMissLatencyHist());
|
|
|
|
// add the per request type latencies
|
|
for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
|
|
m_typeLatencyHistSeqr[j]
|
|
->add(seq->getTypeLatencyHist(j));
|
|
m_hitTypeLatencyHistSeqr[j]
|
|
->add(seq->getHitTypeLatencyHist(j));
|
|
m_missTypeLatencyHistSeqr[j]
|
|
->add(seq->getMissTypeLatencyHist(j));
|
|
}
|
|
|
|
// add the per machine type miss latencies
|
|
for (uint32_t j = 0; j < MachineType_NUM; ++j) {
|
|
m_hitMachLatencyHistSeqr[j]
|
|
->add(seq->getHitMachLatencyHist(j));
|
|
m_missMachLatencyHistSeqr[j]
|
|
->add(seq->getMissMachLatencyHist(j));
|
|
|
|
m_IssueToInitialDelayHistSeqr[j]->add(
|
|
seq->getIssueToInitialDelayHist(MachineType(j)));
|
|
|
|
m_InitialToForwardDelayHistSeqr[j]->add(
|
|
seq->getInitialToForwardDelayHist(MachineType(j)));
|
|
m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq->
|
|
getForwardRequestToFirstResponseHist(MachineType(j)));
|
|
|
|
m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq->
|
|
getFirstResponseToCompletionDelayHist(
|
|
MachineType(j)));
|
|
m_IncompleteTimesSeqr[j] +=
|
|
seq->getIncompleteTimes(MachineType(j));
|
|
}
|
|
|
|
// add the per (request, machine) type miss latencies
|
|
for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
|
|
for (uint32_t k = 0; k < MachineType_NUM; k++) {
|
|
m_hitTypeMachLatencyHistSeqr[j][k]->add(
|
|
seq->getHitTypeMachLatencyHist(j,k));
|
|
m_missTypeMachLatencyHistSeqr[j][k]->add(
|
|
seq->getMissTypeMachLatencyHist(j,k));
|
|
}
|
|
}
|
|
}
|
|
#ifdef BUILD_GPU
|
|
GPUCoalescer *coal = ctr->getGPUCoalescer();
|
|
if (coal != NULL) {
|
|
// add all the latencies
|
|
m_latencyHistCoalsr.add(coal->getLatencyHist());
|
|
m_missLatencyHistCoalsr.add(coal->getMissLatencyHist());
|
|
|
|
// add the per request type latencies
|
|
for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
|
|
m_typeLatencyHistCoalsr[j]
|
|
->add(coal->getTypeLatencyHist(j));
|
|
m_missTypeLatencyHistCoalsr[j]
|
|
->add(coal->getMissTypeLatencyHist(j));
|
|
}
|
|
|
|
// add the per machine type miss latencies
|
|
for (uint32_t j = 0; j < MachineType_NUM; ++j) {
|
|
m_missMachLatencyHistCoalsr[j]
|
|
->add(coal->getMissMachLatencyHist(j));
|
|
|
|
m_IssueToInitialDelayHistCoalsr[j]->add(
|
|
coal->getIssueToInitialDelayHist(MachineType(j)));
|
|
|
|
m_InitialToForwardDelayHistCoalsr[j]->add(
|
|
coal->getInitialToForwardDelayHist(MachineType(j)));
|
|
m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal->
|
|
getForwardRequestToFirstResponseHist(MachineType(j)));
|
|
|
|
m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal->
|
|
getFirstResponseToCompletionDelayHist(
|
|
MachineType(j)));
|
|
}
|
|
|
|
// add the per (request, machine) type miss latencies
|
|
for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
|
|
for (uint32_t k = 0; k < MachineType_NUM; k++) {
|
|
m_missTypeMachLatencyHistCoalsr[j][k]->add(
|
|
coal->getMissTypeMachLatencyHist(j,k));
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
|
|
{
|
|
if (msg.getType() != RubyRequestType_IFETCH) {
|
|
// Note: The following line should be commented out if you
|
|
// want to use the special profiling that is part of the GS320
|
|
// protocol
|
|
|
|
// NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
|
|
// profiled by the AddressProfiler
|
|
m_address_profiler_ptr->
|
|
addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
|
|
msg.getType(), msg.getAccessMode(), id, false);
|
|
}
|
|
}
|