ruby: split CPU and GPU latency stats

This commit is contained in:
David Hashe 2015-07-20 09:15:18 -05:00
parent 1a7d3f9fcb
commit 698866d461
5 changed files with 254 additions and 78 deletions

View file

@ -42,6 +42,8 @@
----------------------------------------------------------------------
*/
#include "mem/ruby/profiler/Profiler.hh"
#include <sys/types.h>
#include <unistd.h>
@ -54,7 +56,7 @@
#include "mem/protocol/RubyRequest.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/profiler/AddressProfiler.hh"
#include "mem/ruby/profiler/Profiler.hh"
#include "mem/ruby/system/GPUCoalescer.hh"
#include "mem/ruby/system/Sequencer.hh"
using namespace std;
@ -106,131 +108,217 @@ Profiler::regStats(const std::string &pName)
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
}
m_outstandReqHist
m_outstandReqHistSeqr
.init(10)
.name(pName + ".outstanding_req_hist")
.name(pName + ".outstanding_req_hist_seqr")
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_latencyHist
m_outstandReqHistCoalsr
.init(10)
.name(pName + ".latency_hist")
.name(pName + ".outstanding_req_hist_coalsr")
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_hitLatencyHist
m_latencyHistSeqr
.init(10)
.name(pName + ".hit_latency_hist")
.name(pName + ".latency_hist_seqr")
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missLatencyHist
m_latencyHistCoalsr
.init(10)
.name(pName + ".miss_latency_hist")
.name(pName + ".latency_hist_coalsr")
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_hitLatencyHistSeqr
.init(10)
.name(pName + ".hit_latency_hist_seqr")
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missLatencyHistSeqr
.init(10)
.name(pName + ".miss_latency_hist_seqr")
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missLatencyHistCoalsr
.init(10)
.name(pName + ".miss_latency_hist_coalsr")
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
for (int i = 0; i < RubyRequestType_NUM; i++) {
m_typeLatencyHist.push_back(new Stats::Histogram());
m_typeLatencyHist[i]
m_typeLatencyHistSeqr.push_back(new Stats::Histogram());
m_typeLatencyHistSeqr[i]
->init(10)
.name(pName + csprintf(".%s.latency_hist",
.name(pName + csprintf(".%s.latency_hist_seqr",
RubyRequestType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_hitTypeLatencyHist.push_back(new Stats::Histogram());
m_hitTypeLatencyHist[i]
m_typeLatencyHistCoalsr.push_back(new Stats::Histogram());
m_typeLatencyHistCoalsr[i]
->init(10)
.name(pName + csprintf(".%s.hit_latency_hist",
.name(pName + csprintf(".%s.latency_hist_coalsr",
RubyRequestType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missTypeLatencyHist.push_back(new Stats::Histogram());
m_missTypeLatencyHist[i]
m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram());
m_hitTypeLatencyHistSeqr[i]
->init(10)
.name(pName + csprintf(".%s.miss_latency_hist",
.name(pName + csprintf(".%s.hit_latency_hist_seqr",
RubyRequestType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram());
m_missTypeLatencyHistSeqr[i]
->init(10)
.name(pName + csprintf(".%s.miss_latency_hist_seqr",
RubyRequestType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram());
m_missTypeLatencyHistCoalsr[i]
->init(10)
.name(pName + csprintf(".%s.miss_latency_hist_coalsr",
RubyRequestType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
}
for (int i = 0; i < MachineType_NUM; i++) {
m_hitMachLatencyHist.push_back(new Stats::Histogram());
m_hitMachLatencyHist[i]
m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram());
m_hitMachLatencyHistSeqr[i]
->init(10)
.name(pName + csprintf(".%s.hit_mach_latency_hist",
.name(pName + csprintf(".%s.hit_mach_latency_hist_seqr",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missMachLatencyHist.push_back(new Stats::Histogram());
m_missMachLatencyHist[i]
m_missMachLatencyHistSeqr.push_back(new Stats::Histogram());
m_missMachLatencyHistSeqr[i]
->init(10)
.name(pName + csprintf(".%s.miss_mach_latency_hist",
.name(pName + csprintf(".%s.miss_mach_latency_hist_seqr",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_IssueToInitialDelayHist.push_back(new Stats::Histogram());
m_IssueToInitialDelayHist[i]
m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram());
m_missMachLatencyHistCoalsr[i]
->init(10)
.name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram());
m_IssueToInitialDelayHistSeqr[i]
->init(10)
.name(pName + csprintf(
".%s.miss_latency_hist.issue_to_initial_request",
".%s.miss_latency_hist_seqr.issue_to_initial_request",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_InitialToForwardDelayHist.push_back(new Stats::Histogram());
m_InitialToForwardDelayHist[i]
m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram());
m_IssueToInitialDelayHistCoalsr[i]
->init(10)
.name(pName + csprintf(".%s.miss_latency_hist.initial_to_forward",
.name(pName + csprintf(
".%s.miss_latency_hist_coalsr.issue_to_initial_request",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram());
m_InitialToForwardDelayHistSeqr[i]
->init(10)
.name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram());
m_ForwardToFirstResponseDelayHist[i]
m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram());
m_InitialToForwardDelayHistCoalsr[i]
->init(10)
.name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram());
m_ForwardToFirstResponseDelayHistSeqr[i]
->init(10)
.name(pName + csprintf(
".%s.miss_latency_hist.forward_to_first_response",
".%s.miss_latency_hist_seqr.forward_to_first_response",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram());
m_FirstResponseToCompletionDelayHist[i]
m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram());
m_ForwardToFirstResponseDelayHistCoalsr[i]
->init(10)
.name(pName + csprintf(
".%s.miss_latency_hist.first_response_to_completion",
".%s.miss_latency_hist_coalsr.forward_to_first_response",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_IncompleteTimes[i]
.name(pName + csprintf(".%s.incomplete_times", MachineType(i)))
m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram());
m_FirstResponseToCompletionDelayHistSeqr[i]
->init(10)
.name(pName + csprintf(
".%s.miss_latency_hist_seqr.first_response_to_completion",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram());
m_FirstResponseToCompletionDelayHistCoalsr[i]
->init(10)
.name(pName + csprintf(
".%s.miss_latency_hist_coalsr.first_response_to_completion",
MachineType(i)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_IncompleteTimesSeqr[i]
.name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i)))
.desc("")
.flags(Stats::nozero);
}
for (int i = 0; i < RubyRequestType_NUM; i++) {
m_hitTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>());
for (int j = 0; j < MachineType_NUM; j++) {
m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram());
m_hitTypeMachLatencyHist[i][j]
m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
m_hitTypeMachLatencyHistSeqr[i][j]
->init(10)
.name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist",
.name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr",
RubyRequestType(i), MachineType(j)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
m_missTypeMachLatencyHist[i][j]
m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
m_missTypeMachLatencyHistSeqr[i][j]
->init(10)
.name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist",
.name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr",
RubyRequestType(i), MachineType(j)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram());
m_missTypeMachLatencyHistCoalsr[i][j]
->init(10)
.name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr",
RubyRequestType(i), MachineType(j)))
.desc("")
.flags(Stats::nozero | Stats::pdf | Stats::oneline);
@ -271,7 +359,11 @@ Profiler::collateStats()
AbstractController *ctr = (*it).second;
Sequencer *seq = ctr->getCPUSequencer();
if (seq != NULL) {
m_outstandReqHist.add(seq->getOutstandReqHist());
m_outstandReqHistSeqr.add(seq->getOutstandReqHist());
}
GPUCoalescer *coal = ctr->getGPUCoalescer();
if (coal != NULL) {
m_outstandReqHistCoalsr.add(coal->getOutstandReqHist());
}
}
}
@ -285,52 +377,93 @@ Profiler::collateStats()
Sequencer *seq = ctr->getCPUSequencer();
if (seq != NULL) {
// add all the latencies
m_latencyHist.add(seq->getLatencyHist());
m_hitLatencyHist.add(seq->getHitLatencyHist());
m_missLatencyHist.add(seq->getMissLatencyHist());
m_latencyHistSeqr.add(seq->getLatencyHist());
m_hitLatencyHistSeqr.add(seq->getHitLatencyHist());
m_missLatencyHistSeqr.add(seq->getMissLatencyHist());
// add the per request type latencies
for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
m_typeLatencyHist[j]
m_typeLatencyHistSeqr[j]
->add(seq->getTypeLatencyHist(j));
m_hitTypeLatencyHist[j]
m_hitTypeLatencyHistSeqr[j]
->add(seq->getHitTypeLatencyHist(j));
m_missTypeLatencyHist[j]
m_missTypeLatencyHistSeqr[j]
->add(seq->getMissTypeLatencyHist(j));
}
// add the per machine type miss latencies
for (uint32_t j = 0; j < MachineType_NUM; ++j) {
m_hitMachLatencyHist[j]
m_hitMachLatencyHistSeqr[j]
->add(seq->getHitMachLatencyHist(j));
m_missMachLatencyHist[j]
m_missMachLatencyHistSeqr[j]
->add(seq->getMissMachLatencyHist(j));
m_IssueToInitialDelayHist[j]->add(
m_IssueToInitialDelayHistSeqr[j]->add(
seq->getIssueToInitialDelayHist(MachineType(j)));
m_InitialToForwardDelayHist[j]->add(
m_InitialToForwardDelayHistSeqr[j]->add(
seq->getInitialToForwardDelayHist(MachineType(j)));
m_ForwardToFirstResponseDelayHist[j]->add(seq->
m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq->
getForwardRequestToFirstResponseHist(MachineType(j)));
m_FirstResponseToCompletionDelayHist[j]->add(seq->
m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq->
getFirstResponseToCompletionDelayHist(
MachineType(j)));
m_IncompleteTimes[j] +=
m_IncompleteTimesSeqr[j] +=
seq->getIncompleteTimes(MachineType(j));
}
// add the per (request, machine) type miss latencies
for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
for (uint32_t k = 0; k < MachineType_NUM; k++) {
m_hitTypeMachLatencyHist[j][k]->add(
m_hitTypeMachLatencyHistSeqr[j][k]->add(
seq->getHitTypeMachLatencyHist(j,k));
m_missTypeMachLatencyHist[j][k]->add(
m_missTypeMachLatencyHistSeqr[j][k]->add(
seq->getMissTypeMachLatencyHist(j,k));
}
}
}
GPUCoalescer *coal = ctr->getGPUCoalescer();
if (coal != NULL) {
// add all the latencies
m_latencyHistCoalsr.add(coal->getLatencyHist());
m_missLatencyHistCoalsr.add(coal->getMissLatencyHist());
// add the per request type latencies
for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
m_typeLatencyHistCoalsr[j]
->add(coal->getTypeLatencyHist(j));
m_missTypeLatencyHistCoalsr[j]
->add(coal->getMissTypeLatencyHist(j));
}
// add the per machine type miss latencies
for (uint32_t j = 0; j < MachineType_NUM; ++j) {
m_missMachLatencyHistCoalsr[j]
->add(coal->getMissMachLatencyHist(j));
m_IssueToInitialDelayHistCoalsr[j]->add(
coal->getIssueToInitialDelayHist(MachineType(j)));
m_InitialToForwardDelayHistCoalsr[j]->add(
coal->getInitialToForwardDelayHist(MachineType(j)));
m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal->
getForwardRequestToFirstResponseHist(MachineType(j)));
m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal->
getFirstResponseToCompletionDelayHist(
MachineType(j)));
}
// add the per (request, machine) type miss latencies
for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
for (uint32_t k = 0; k < MachineType_NUM; k++) {
m_missTypeMachLatencyHistCoalsr[j][k]->add(
coal->getMissTypeMachLatencyHist(j,k));
}
}
}
}
}
}

View file

@ -94,38 +94,49 @@ class Profiler
std::vector<Stats::Histogram *> delayVCHistogram;
//! Histogram for number of outstanding requests per cycle.
Stats::Histogram m_outstandReqHist;
Stats::Histogram m_outstandReqHistSeqr;
Stats::Histogram m_outstandReqHistCoalsr;
//! Histogram for holding latency profile of all requests.
Stats::Histogram m_latencyHist;
std::vector<Stats::Histogram *> m_typeLatencyHist;
Stats::Histogram m_latencyHistSeqr;
Stats::Histogram m_latencyHistCoalsr;
std::vector<Stats::Histogram *> m_typeLatencyHistSeqr;
std::vector<Stats::Histogram *> m_typeLatencyHistCoalsr;
//! Histogram for holding latency profile of all requests that
//! hit in the controller connected to this sequencer.
Stats::Histogram m_hitLatencyHist;
std::vector<Stats::Histogram *> m_hitTypeLatencyHist;
Stats::Histogram m_hitLatencyHistSeqr;
std::vector<Stats::Histogram *> m_hitTypeLatencyHistSeqr;
//! Histograms for profiling the latencies for requests that
//! did not required external messages.
std::vector<Stats::Histogram *> m_hitMachLatencyHist;
std::vector< std::vector<Stats::Histogram *> > m_hitTypeMachLatencyHist;
std::vector<Stats::Histogram *> m_hitMachLatencyHistSeqr;
std::vector< std::vector<Stats::Histogram *> > m_hitTypeMachLatencyHistSeqr;
//! Histogram for holding latency profile of all requests that
//! miss in the controller connected to this sequencer.
Stats::Histogram m_missLatencyHist;
std::vector<Stats::Histogram *> m_missTypeLatencyHist;
Stats::Histogram m_missLatencyHistSeqr;
Stats::Histogram m_missLatencyHistCoalsr;
std::vector<Stats::Histogram *> m_missTypeLatencyHistSeqr;
std::vector<Stats::Histogram *> m_missTypeLatencyHistCoalsr;
//! Histograms for profiling the latencies for requests that
//! required external messages.
std::vector<Stats::Histogram *> m_missMachLatencyHist;
std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist;
std::vector<Stats::Histogram *> m_missMachLatencyHistSeqr;
std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHistSeqr;
std::vector<Stats::Histogram *> m_missMachLatencyHistCoalsr;
std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHistCoalsr;
//! Histograms for recording the breakdown of miss latency
std::vector<Stats::Histogram *> m_IssueToInitialDelayHist;
std::vector<Stats::Histogram *> m_InitialToForwardDelayHist;
std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
Stats::Scalar m_IncompleteTimes[MachineType_NUM];
std::vector<Stats::Histogram *> m_IssueToInitialDelayHistSeqr;
std::vector<Stats::Histogram *> m_InitialToForwardDelayHistSeqr;
std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHistSeqr;
std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHistSeqr;
Stats::Scalar m_IncompleteTimesSeqr[MachineType_NUM];
std::vector<Stats::Histogram *> m_IssueToInitialDelayHistCoalsr;
std::vector<Stats::Histogram *> m_InitialToForwardDelayHistCoalsr;
std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHistCoalsr;
std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHistCoalsr;
//added by SS
const bool m_hot_lines;

View file

@ -32,6 +32,7 @@
#include "mem/protocol/MemoryMsg.hh"
#include "mem/ruby/system/RubySystem.hh"
#include "mem/ruby/system/Sequencer.hh"
#include "mem/ruby/system/GPUCoalescer.hh"
#include "sim/system.hh"
AbstractController::AbstractController(const Params *p)

View file

@ -49,6 +49,7 @@
#include "mem/mem_object.hh"
class Network;
class GPUCoalescer;
// used to communicate that an in_port peeked the wrong message type
class RejectException: public std::exception
@ -86,6 +87,7 @@ class AbstractController : public MemObject, public Consumer
virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0;
virtual Sequencer* getCPUSequencer() const = 0;
virtual GPUCoalescer* getGPUCoalescer() const = 0;
//! These functions are used by ruby system to read/write the data blocks
//! that exist with in the controller.

View file

@ -310,6 +310,7 @@ class $c_ident : public AbstractController
void recordCacheTrace(int cntrl, CacheRecorder* tr);
Sequencer* getCPUSequencer() const;
GPUCoalescer* getGPUCoalescer() const;
int functionalWriteBuffers(PacketPtr&);
@ -680,6 +681,12 @@ $c_ident::init()
assert(param.pointer)
seq_ident = "m_%s_ptr" % param.ident
coal_ident = "NULL"
for param in self.config_parameters:
if param.ident == "coalescer":
assert(param.pointer)
coal_ident = "m_%s_ptr" % param.ident
if seq_ident != "NULL":
code('''
Sequencer*
@ -700,6 +707,28 @@ $c_ident::getCPUSequencer() const
{
return NULL;
}
''')
if coal_ident != "NULL":
code('''
GPUCoalescer*
$c_ident::getGPUCoalescer() const
{
if (NULL != $coal_ident && !$coal_ident->isCPUSequencer()) {
return $coal_ident;
} else {
return NULL;
}
}
''')
else:
code('''
GPUCoalescer*
$c_ident::getGPUCoalescer() const
{
return NULL;
}
''')
code('''