diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index f8ca59734..c74e887a8 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -100,8 +100,7 @@ def create_topology(controllers, options): def create_system(options, system, piobus = None, dma_ports = []): - system.ruby = RubySystem(stats_filename = options.ruby_stats, - no_mem_vec = options.use_map) + system.ruby = RubySystem(no_mem_vec = options.use_map) ruby = system.ruby protocol = buildEnv['PROTOCOL'] @@ -186,10 +185,8 @@ def create_system(options, system, piobus = None, dma_ports = []): phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert(total_mem_size.value == phys_mem_size) - ruby_profiler = RubyProfiler(ruby_system = ruby, - num_of_sequencers = len(cpu_sequencers)) ruby.network = network - ruby.profiler = ruby_profiler ruby.mem_size = total_mem_size ruby._cpu_ruby_ports = cpu_sequencers + ruby.num_of_sequencers = len(cpu_sequencers) ruby.random_seed = options.random_seed diff --git a/src/mem/ruby/buffers/MessageBuffer.cc b/src/mem/ruby/buffers/MessageBuffer.cc index 56d807a16..e04dd3825 100644 --- a/src/mem/ruby/buffers/MessageBuffer.cc +++ b/src/mem/ruby/buffers/MessageBuffer.cc @@ -414,13 +414,6 @@ MessageBuffer::print(ostream& out) const ccprintf(out, "%s] %s", copy, m_name); } -void -MessageBuffer::printStats(ostream& out) -{ - out << "MessageBuffer: " << m_name << " stats - msgs:" << m_msg_counter - << " full:" << m_not_avail_count << endl; -} - bool MessageBuffer::isReady() const { diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh index 1133144a2..d3bd90a64 100644 --- a/src/mem/ruby/buffers/MessageBuffer.hh +++ b/src/mem/ruby/buffers/MessageBuffer.hh @@ -148,7 +148,6 @@ class MessageBuffer void clear(); void print(std::ostream& out) const; - void printStats(std::ostream& out); void clearStats() { m_not_avail_count = 0; m_msg_counter = 0; } void setIncomingLink(int link_id) { m_input_link_id = link_id; } diff --git a/src/mem/ruby/profiler/AddressProfiler.hh b/src/mem/ruby/profiler/AddressProfiler.hh index 642b5a41a..9bf1d517d 100644 --- a/src/mem/ruby/profiler/AddressProfiler.hh +++ b/src/mem/ruby/profiler/AddressProfiler.hh @@ -67,6 +67,8 @@ class AddressProfiler //added by SS void setHotLines(bool hot_lines); void setAllInstructions(bool all_instructions); + void regStats(const std::string &name) {} + void collateStats() {} private: // Private copy constructor and assignment operator diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index d0ea7921c..6f7da1eda 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,32 +61,21 @@ using namespace std; using m5::stl_helpers::operator<<; -Profiler::Profiler(const Params *p) - : SimObject(p) +Profiler::Profiler(const RubySystemParams *p) + : m_IncompleteTimes(MachineType_NUM) { - m_inst_profiler_ptr = NULL; - m_address_profiler_ptr = NULL; - m_real_time_start_time = time(NULL); // Not reset in clearStats() - m_hot_lines = p->hot_lines; m_all_instructions = p->all_instructions; - m_num_of_sequencers = p->num_of_sequencers; - - m_hot_lines = false; - m_all_instructions = false; - - m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers); m_address_profiler_ptr->setHotLines(m_hot_lines); m_address_profiler_ptr->setAllInstructions(m_all_instructions); if (m_all_instructions) { - m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers); m_inst_profiler_ptr->setHotLines(m_hot_lines); m_inst_profiler_ptr->setAllInstructions(m_all_instructions); } - - p->ruby_system->registerProfiler(this); } Profiler::~Profiler() @@ -94,74 +83,176 @@ Profiler::~Profiler() } void -Profiler::print(ostream& out) const +Profiler::regStats(const std::string &pName) { - out << "[Profiler]"; -} - -void -Profiler::printRequestProfile(ostream &out) const -{ - out << "Request vs. RubySystem State Profile" << endl; - out << "--------------------------------" << endl; - out << endl; - - map m_requestProfileMap; - uint64_t m_requests = 0; - - for (uint32_t i = 0; i < MachineType_NUM; i++) { - for (map::iterator it = - g_abs_controls[i].begin(); - it != g_abs_controls[i].end(); ++it) { - - AbstractController *ctr = (*it).second; - map mp = ctr->getRequestProfileMap(); - - for (map::iterator jt = mp.begin(); - jt != mp.end(); ++jt) { - - map::iterator kt = - m_requestProfileMap.find((*jt).first); - if (kt != m_requestProfileMap.end()) { - (*kt).second += (*jt).second; - } else { - m_requestProfileMap[(*jt).first] = (*jt).second; - } - } - - m_requests += ctr->getRequestCount(); - } + if (!m_all_instructions) { + m_address_profiler_ptr->regStats(pName); } - map::const_iterator i = m_requestProfileMap.begin(); - map::const_iterator end = m_requestProfileMap.end(); - for (; i != end; ++i) { - const string &key = i->first; - uint64_t count = i->second; - - double percent = (100.0 * double(count)) / double(m_requests); - vector items; - tokenize(items, key, ':'); - vector::iterator j = items.begin(); - vector::iterator end = items.end(); - for (; j != end; ++i) - out << setw(10) << *j; - out << setw(11) << count; - out << setw(14) << percent << endl; + if (m_all_instructions) { + m_inst_profiler_ptr->regStats(pName); } - out << endl; -} -void -Profiler::printDelayProfile(ostream &out) const -{ - out << "Message Delayed Cycles" << endl; - out << "----------------------" << endl; + delayHistogram + .init(10) + .name(pName + ".delayHist") + .desc("delay histogram for all message") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); uint32_t numVNets = Network::getNumberOfVirtualNetworks(); - Histogram delayHistogram; - std::vector delayVCHistogram(numVNets); + for (int i = 0; i < numVNets; i++) { + delayVCHistogram.push_back(new Stats::Histogram()); + delayVCHistogram[i] + ->init(10) + .name(pName + csprintf(".delayVCHist.vnet_%i", i)) + .desc(csprintf("delay histogram for vnet_%i", i)) + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + } + m_outstandReqHist + .init(10) + .name(pName + ".outstanding_req_hist") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_latencyHist + .init(10) + .name(pName + ".latency_hist") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_hitLatencyHist + .init(10) + .name(pName + ".hit_latency_hist") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missLatencyHist + .init(10) + .name(pName + ".miss_latency_hist") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_typeLatencyHist.push_back(new Stats::Histogram()); + m_typeLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.latency_hist", + RubyRequestType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_hitTypeLatencyHist.push_back(new Stats::Histogram()); + m_hitTypeLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.hit_latency_hist", + RubyRequestType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missTypeLatencyHist.push_back(new Stats::Histogram()); + m_missTypeLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.miss_latency_hist", + RubyRequestType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + } + + for (int i = 0; i < MachineType_NUM; i++) { + m_hitMachLatencyHist.push_back(new Stats::Histogram()); + m_hitMachLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.hit_mach_latency_hist", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missMachLatencyHist.push_back(new Stats::Histogram()); + m_missMachLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.miss_mach_latency_hist", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHist[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist.issue_to_initial_request", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHist[i] + ->init(10) + .name(pName + csprintf(".%s.miss_latency_hist.initial_to_forward", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHist[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist.forward_to_first_response", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHist[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist.first_response_to_completion", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_IncompleteTimes[i] + .name(pName + csprintf(".%s.incomplete_times", MachineType(i))) + .desc("") + .flags(Stats::nozero); + } + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_hitTypeMachLatencyHist.push_back(std::vector()); + m_missTypeMachLatencyHist.push_back(std::vector()); + + for (int j = 0; j < MachineType_NUM; j++) { + m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_hitTypeMachLatencyHist[i][j] + ->init(10) + .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist", + RubyRequestType(i), MachineType(j))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHist[i][j] + ->init(10) + .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist", + RubyRequestType(i), MachineType(j))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + } + } +} + +void +Profiler::collateStats() +{ + if (!m_all_instructions) { + m_address_profiler_ptr->collateStats(); + } + + if (m_all_instructions) { + m_inst_profiler_ptr->collateStats(); + } + + uint32_t numVNets = Network::getNumberOfVirtualNetworks(); for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map::iterator it = g_abs_controls[i].begin(); @@ -171,314 +262,81 @@ Profiler::printDelayProfile(ostream &out) const delayHistogram.add(ctr->getDelayHist()); for (uint32_t i = 0; i < numVNets; i++) { - delayVCHistogram[i].add(ctr->getDelayVCHist(i)); + delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); } } } - out << "Total_delay_cycles: " << delayHistogram << endl; - - for (int i = 0; i < numVNets; i++) { - out << " virtual_network_" << i << "_delay_cycles: " - << delayVCHistogram[i] << endl; - } -} - -void -Profiler::printOutstandingReqProfile(ostream &out) const -{ - Histogram sequencerRequests; - for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map::iterator it = - g_abs_controls[i].begin(); - it != g_abs_controls[i].end(); ++it) { + g_abs_controls[i].begin(); + it != g_abs_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; Sequencer *seq = ctr->getSequencer(); if (seq != NULL) { - sequencerRequests.add(seq->getOutstandReqHist()); + m_outstandReqHist.add(seq->getOutstandReqHist()); } } } - out << "sequencer_requests_outstanding: " - << sequencerRequests << endl; -} - -void -Profiler::printMissLatencyProfile(ostream &out) const -{ - // Collate the miss latencies histograms from all the sequencers - Histogram latency_hist; - std::vector type_latency_hist(RubyRequestType_NUM); - - Histogram hit_latency_hist; - std::vector hit_type_latency_hist(RubyRequestType_NUM); - - std::vector hit_mach_latency_hist(MachineType_NUM); - std::vector > - hit_type_mach_latency_hist(RubyRequestType_NUM, - std::vector(MachineType_NUM)); - - Histogram miss_latency_hist; - std::vector miss_type_latency_hist(RubyRequestType_NUM); - - std::vector miss_mach_latency_hist(MachineType_NUM); - std::vector > - miss_type_mach_latency_hist(RubyRequestType_NUM, - std::vector(MachineType_NUM)); - - std::vector issue_to_initial_delay_hist(MachineType_NUM); - std::vector initial_to_forward_delay_hist(MachineType_NUM); - std::vector - forward_to_first_response_delay_hist(MachineType_NUM); - std::vector - first_response_to_completion_delay_hist(MachineType_NUM); - std::vector incomplete_times(MachineType_NUM); - for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map::iterator it = - g_abs_controls[i].begin(); - it != g_abs_controls[i].end(); ++it) { + g_abs_controls[i].begin(); + it != g_abs_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; Sequencer *seq = ctr->getSequencer(); if (seq != NULL) { // add all the latencies - latency_hist.add(seq->getLatencyHist()); - hit_latency_hist.add(seq->getHitLatencyHist()); - miss_latency_hist.add(seq->getMissLatencyHist()); + m_latencyHist.add(seq->getLatencyHist()); + m_hitLatencyHist.add(seq->getHitLatencyHist()); + m_missLatencyHist.add(seq->getMissLatencyHist()); // add the per request type latencies for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { - type_latency_hist[j] - .add(seq->getTypeLatencyHist(j)); - hit_type_latency_hist[j] - .add(seq->getHitTypeLatencyHist(j)); - miss_type_latency_hist[j] - .add(seq->getMissTypeLatencyHist(j)); + m_typeLatencyHist[j] + ->add(seq->getTypeLatencyHist(j)); + m_hitTypeLatencyHist[j] + ->add(seq->getHitTypeLatencyHist(j)); + m_missTypeLatencyHist[j] + ->add(seq->getMissTypeLatencyHist(j)); } // add the per machine type miss latencies for (uint32_t j = 0; j < MachineType_NUM; ++j) { - hit_mach_latency_hist[j] - .add(seq->getHitMachLatencyHist(j)); - miss_mach_latency_hist[j] - .add(seq->getMissMachLatencyHist(j)); + m_hitMachLatencyHist[j] + ->add(seq->getHitMachLatencyHist(j)); + m_missMachLatencyHist[j] + ->add(seq->getMissMachLatencyHist(j)); - issue_to_initial_delay_hist[j].add( + m_IssueToInitialDelayHist[j]->add( seq->getIssueToInitialDelayHist(MachineType(j))); - initial_to_forward_delay_hist[j].add( + m_InitialToForwardDelayHist[j]->add( seq->getInitialToForwardDelayHist(MachineType(j))); - forward_to_first_response_delay_hist[j].add(seq-> + m_ForwardToFirstResponseDelayHist[j]->add(seq-> getForwardRequestToFirstResponseHist(MachineType(j))); - first_response_to_completion_delay_hist[j].add(seq-> - getFirstResponseToCompletionDelayHist(MachineType(j))); - incomplete_times[j] += + m_FirstResponseToCompletionDelayHist[j]->add(seq-> + getFirstResponseToCompletionDelayHist( + MachineType(j))); + m_IncompleteTimes[j] += seq->getIncompleteTimes(MachineType(j)); } // add the per (request, machine) type miss latencies for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { for (uint32_t k = 0; k < MachineType_NUM; k++) { - hit_type_mach_latency_hist[j][k].add( - seq->getHitTypeMachLatencyHist(j,k)); - miss_type_mach_latency_hist[j][k].add( - seq->getMissTypeMachLatencyHist(j,k)); + m_hitTypeMachLatencyHist[j][k]->add( + seq->getHitTypeMachLatencyHist(j,k)); + m_missTypeMachLatencyHist[j][k]->add( + seq->getMissTypeMachLatencyHist(j,k)); } } } } } - - out << "latency: " << latency_hist << endl; - for (int i = 0; i < RubyRequestType_NUM; i++) { - if (type_latency_hist[i].size() > 0) { - out << "latency: " << RubyRequestType(i) << ": " - << type_latency_hist[i] << endl; - } - } - - out << "hit latency: " << hit_latency_hist << endl; - for (int i = 0; i < RubyRequestType_NUM; i++) { - if (hit_type_latency_hist[i].size() > 0) { - out << "hit latency: " << RubyRequestType(i) << ": " - << hit_type_latency_hist[i] << endl; - } - } - - for (int i = 0; i < MachineType_NUM; i++) { - if (hit_mach_latency_hist[i].size() > 0) { - out << "hit latency: " << MachineType(i) << ": " - << hit_mach_latency_hist[i] << endl; - } - } - - for (int i = 0; i < RubyRequestType_NUM; i++) { - for (int j = 0; j < MachineType_NUM; j++) { - if (hit_type_mach_latency_hist[i][j].size() > 0) { - out << "hit latency: " << RubyRequestType(i) - << ": " << MachineType(j) << ": " - << hit_type_mach_latency_hist[i][j] << endl; - } - } - } - - out << "miss latency: " << miss_latency_hist << endl; - for (int i = 0; i < RubyRequestType_NUM; i++) { - if (miss_type_latency_hist[i].size() > 0) { - out << "miss latency: " << RubyRequestType(i) << ": " - << miss_type_latency_hist[i] << endl; - } - } - - for (int i = 0; i < MachineType_NUM; i++) { - if (miss_mach_latency_hist[i].size() > 0) { - out << "miss latency: " << MachineType(i) << ": " - << miss_mach_latency_hist[i] << endl; - - out << "miss latency: " << MachineType(i) - << "::issue_to_initial_request: " - << issue_to_initial_delay_hist[i] << endl; - out << "miss latency: " << MachineType(i) - << "::initial_to_forward_request: " - << initial_to_forward_delay_hist[i] << endl; - out << "miss latency: " << MachineType(i) - << "::forward_to_first_response: " - << forward_to_first_response_delay_hist[i] << endl; - out << "miss latency: " << MachineType(i) - << "::first_response_to_completion: " - << first_response_to_completion_delay_hist[i] << endl; - out << "incomplete times: " << incomplete_times[i] << endl; - } - } - - for (int i = 0; i < RubyRequestType_NUM; i++) { - for (int j = 0; j < MachineType_NUM; j++) { - if (miss_type_mach_latency_hist[i][j].size() > 0) { - out << "miss latency: " << RubyRequestType(i) - << ": " << MachineType(j) << ": " - << miss_type_mach_latency_hist[i][j] << endl; - } - } - } - - out << endl; -} - -void -Profiler::printStats(ostream& out, bool short_stats) -{ - out << endl; - if (short_stats) { - out << "SHORT "; - } - out << "Profiler Stats" << endl; - out << "--------------" << endl; - - Cycles ruby_cycles = g_system_ptr->curCycle()-m_ruby_start; - - out << "Ruby_current_time: " << g_system_ptr->curCycle() << endl; - out << "Ruby_start_time: " << m_ruby_start << endl; - out << "Ruby_cycles: " << ruby_cycles << endl; - out << endl; - - if (!short_stats) { - out << "Busy Controller Counts:" << endl; - for (uint32_t i = 0; i < MachineType_NUM; i++) { - uint32_t size = MachineType_base_count((MachineType)i); - - for (uint32_t j = 0; j < size; j++) { - MachineID machID; - machID.type = (MachineType)i; - machID.num = j; - - AbstractController *ctr = - (*(g_abs_controls[i].find(j))).second; - out << machID << ":" << ctr->getFullyBusyCycles() << " "; - if ((j + 1) % 8 == 0) { - out << endl; - } - } - out << endl; - } - out << endl; - - out << "Busy Bank Count:" << m_busyBankCount << endl; - out << endl; - - printOutstandingReqProfile(out); - out << endl; - } - - if (!short_stats) { - out << "All Non-Zero Cycle Demand Cache Accesses" << endl; - out << "----------------------------------------" << endl; - printMissLatencyProfile(out); - - if (m_all_sharing_histogram.size() > 0) { - out << "all_sharing: " << m_all_sharing_histogram << endl; - out << "read_sharing: " << m_read_sharing_histogram << endl; - out << "write_sharing: " << m_write_sharing_histogram << endl; - - out << "all_sharing_percent: "; - m_all_sharing_histogram.printPercent(out); - out << endl; - - out << "read_sharing_percent: "; - m_read_sharing_histogram.printPercent(out); - out << endl; - - out << "write_sharing_percent: "; - m_write_sharing_histogram.printPercent(out); - out << endl; - - int64 total_miss = m_cache_to_cache + m_memory_to_cache; - out << "all_misses: " << total_miss << endl; - out << "cache_to_cache_misses: " << m_cache_to_cache << endl; - out << "memory_to_cache_misses: " << m_memory_to_cache << endl; - out << "cache_to_cache_percent: " - << 100.0 * (double(m_cache_to_cache) / double(total_miss)) - << endl; - out << "memory_to_cache_percent: " - << 100.0 * (double(m_memory_to_cache) / double(total_miss)) - << endl; - out << endl; - } - - printRequestProfile(out); - - if (!m_all_instructions) { - m_address_profiler_ptr->printStats(out); - } - - if (m_all_instructions) { - m_inst_profiler_ptr->printStats(out); - } - - out << endl; - printDelayProfile(out); - } -} - -void -Profiler::clearStats() -{ - m_ruby_start = g_system_ptr->curCycle(); - m_real_time_start_time = time(NULL); - - m_busyBankCount = 0; - m_read_sharing_histogram.clear(); - m_write_sharing_histogram.clear(); - m_all_sharing_histogram.clear(); - m_cache_to_cache = 0; - m_memory_to_cache = 0; - - // update the start time - m_ruby_start = g_system_ptr->curCycle(); } void @@ -496,60 +354,3 @@ Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) msg.getType(), msg.getAccessMode(), id, false); } } - -void -Profiler::profileSharing(const Address& addr, AccessType type, - NodeID requestor, const Set& sharers, - const Set& owner) -{ - Set set_contacted(owner); - if (type == AccessType_Write) { - set_contacted.addSet(sharers); - } - set_contacted.remove(requestor); - int number_contacted = set_contacted.count(); - - if (type == AccessType_Write) { - m_write_sharing_histogram.add(number_contacted); - } else { - m_read_sharing_histogram.add(number_contacted); - } - m_all_sharing_histogram.add(number_contacted); - - if (number_contacted == 0) { - m_memory_to_cache++; - } else { - m_cache_to_cache++; - } -} - -void -Profiler::bankBusy() -{ - m_busyBankCount++; -} - -void -Profiler::rubyWatch(int id) -{ - uint64 tr = 0; - Address watch_address = Address(tr); - - DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->curCycle(), id, - watch_address); - - // don't care about success or failure - m_watch_address_set.insert(watch_address); -} - -bool -Profiler::watchAddress(Address addr) -{ - return m_watch_address_set.count(addr) > 0; -} - -Profiler * -RubyProfilerParams::create() -{ - return new Profiler(this); -} diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index e7b3c5f8d..247c705b0 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -45,89 +45,43 @@ #ifndef __MEM_RUBY_PROFILER_PROFILER_HH__ #define __MEM_RUBY_PROFILER_PROFILER_HH__ -#include #include #include #include +#include "base/callback.hh" #include "base/hashmap.hh" +#include "base/statistics.hh" #include "mem/protocol/AccessType.hh" #include "mem/protocol/PrefetchBit.hh" #include "mem/protocol/RubyAccessMode.hh" #include "mem/protocol/RubyRequestType.hh" -#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/Global.hh" -#include "mem/ruby/common/Histogram.hh" -#include "mem/ruby/common/Set.hh" #include "mem/ruby/system/MachineID.hh" -#include "mem/ruby/system/MemoryControl.hh" -#include "params/RubyProfiler.hh" -#include "sim/sim_object.hh" +#include "params/RubySystem.hh" class RubyRequest; class AddressProfiler; -class Profiler : public SimObject +class Profiler { public: - typedef RubyProfilerParams Params; - Profiler(const Params *); + Profiler(const RubySystemParams *); ~Profiler(); void wakeup(); - - void setPeriodicStatsFile(const std::string& filename); - void setPeriodicStatsInterval(int64_t period); - - void printStats(std::ostream& out, bool short_stats=false); - void printShortStats(std::ostream& out) { printStats(out, true); } - void printTraceStats(std::ostream& out) const; - void clearStats(); - void printResourceUsage(std::ostream& out) const; + void regStats(const std::string &name); + void collateStats(); AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } void addAddressTraceSample(const RubyRequest& msg, NodeID id); - void profileRequest(const std::string& requestStr); - void profileSharing(const Address& addr, AccessType type, - NodeID requestor, const Set& sharers, - const Set& owner); - - void profileMulticastRetry(const Address& addr, int count); - - void profileFilterAction(int action); - - void profileConflictingRequests(const Address& addr); - - void - profileAverageLatencyEstimate(int latency) - { - m_average_latency_estimate.add(latency); - } - - void controllerBusy(MachineID machID); - void bankBusy(); - - void print(std::ostream& out) const; - - void rubyWatch(int proc); - bool watchAddress(Address addr); - - // return Ruby's start time - Cycles getRubyStartTime() { return m_ruby_start; } - // added by SS bool getHotLines() { return m_hot_lines; } bool getAllInstructions() { return m_all_instructions; } - private: - void printRequestProfile(std::ostream &out) const; - void printDelayProfile(std::ostream &out) const; - void printOutstandingReqProfile(std::ostream &out) const; - void printMissLatencyProfile(std::ostream &out) const; - private: // Private copy constructor and assignment operator Profiler(const Profiler& obj); @@ -136,33 +90,46 @@ class Profiler : public SimObject AddressProfiler* m_address_profiler_ptr; AddressProfiler* m_inst_profiler_ptr; - Cycles m_ruby_start; - time_t m_real_time_start_time; + Stats::Histogram delayHistogram; + std::vector delayVCHistogram; - int64_t m_busyBankCount; + //! Histogram for number of outstanding requests per cycle. + Stats::Histogram m_outstandReqHist; - Histogram m_read_sharing_histogram; - Histogram m_write_sharing_histogram; - Histogram m_all_sharing_histogram; - int64 m_cache_to_cache; - int64 m_memory_to_cache; + //! Histogram for holding latency profile of all requests. + Stats::Histogram m_latencyHist; + std::vector m_typeLatencyHist; - Histogram m_average_latency_estimate; - m5::hash_set
m_watch_address_set; + //! Histogram for holding latency profile of all requests that + //! hit in the controller connected to this sequencer. + Stats::Histogram m_hitLatencyHist; + std::vector m_hitTypeLatencyHist; + + //! Histograms for profiling the latencies for requests that + //! did not required external messages. + std::vector m_hitMachLatencyHist; + std::vector< std::vector > m_hitTypeMachLatencyHist; + + //! Histogram for holding latency profile of all requests that + //! miss in the controller connected to this sequencer. + Stats::Histogram m_missLatencyHist; + std::vector m_missTypeLatencyHist; + + //! Histograms for profiling the latencies for requests that + //! required external messages. + std::vector m_missMachLatencyHist; + std::vector< std::vector > m_missTypeMachLatencyHist; + + //! Histograms for recording the breakdown of miss latency + std::vector m_IssueToInitialDelayHist; + std::vector m_InitialToForwardDelayHist; + std::vector m_ForwardToFirstResponseDelayHist; + std::vector m_FirstResponseToCompletionDelayHist; + std::vector m_IncompleteTimes; //added by SS bool m_hot_lines; bool m_all_instructions; - - int m_num_of_sequencers; }; -inline std::ostream& -operator<<(std::ostream& out, const Profiler& obj) -{ - obj.print(out); - out << std::flush; - return out; -} - #endif // __MEM_RUBY_PROFILER_PROFILER_HH__ diff --git a/src/mem/ruby/profiler/Profiler.py b/src/mem/ruby/profiler/Profiler.py deleted file mode 100644 index 0bb1bbc3d..000000000 --- a/src/mem/ruby/profiler/Profiler.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2009 Advanced Micro Devices, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Authors: Steve Reinhardt -# Brad Beckmann - -from m5.params import * -from m5.SimObject import SimObject - -class RubyProfiler(SimObject): - type = 'RubyProfiler' - cxx_class = 'Profiler' - cxx_header = "mem/ruby/profiler/Profiler.hh" - hot_lines = Param.Bool(False, "") - all_instructions = Param.Bool(False, "") - num_of_sequencers = Param.Int("") - ruby_system = Param.RubySystem("") diff --git a/src/mem/ruby/profiler/SConscript b/src/mem/ruby/profiler/SConscript index 613c70aa0..d1e9972e4 100644 --- a/src/mem/ruby/profiler/SConscript +++ b/src/mem/ruby/profiler/SConscript @@ -33,8 +33,6 @@ Import('*') if env['PROTOCOL'] == 'None': Return() -SimObject('Profiler.py') - Source('AccessTraceForAddress.cc') Source('AddressProfiler.cc') Source('MemCntrlProfiler.cc') diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index e46158ca0..0f5a70a6e 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -31,8 +31,7 @@ #include "mem/ruby/system/System.hh" AbstractController::AbstractController(const Params *p) - : ClockedObject(p), Consumer(this), m_fully_busy_cycles(0), - m_request_count(0) + : ClockedObject(p), Consumer(this) { m_version = p->version; m_clusterID = p->cluster_id; @@ -54,44 +53,39 @@ void AbstractController::init() { params()->ruby_system->registerAbstractController(this); -} - -void -AbstractController::clearStats() -{ - m_requestProfileMap.clear(); - m_request_count = 0; - - m_delayHistogram.clear(); - + m_delayHistogram.init(10); uint32_t size = Network::getNumberOfVirtualNetworks(); - m_delayVCHistogram.resize(size); for (uint32_t i = 0; i < size; i++) { - m_delayVCHistogram[i].clear(); - } - - Sequencer *seq = getSequencer(); - if (seq != NULL) { - seq->clearStats(); + m_delayVCHistogram.push_back(new Stats::Histogram()); + m_delayVCHistogram[i]->init(10); } } void -AbstractController::profileRequest(const std::string &request) +AbstractController::resetStats() { - m_request_count++; + m_delayHistogram.reset(); + uint32_t size = Network::getNumberOfVirtualNetworks(); + for (uint32_t i = 0; i < size; i++) { + m_delayVCHistogram[i]->reset(); + } +} - // if it doesn't exist, conveniently, it will be created with the - // default value which is 0 - m_requestProfileMap[request]++; +void +AbstractController::regStats() +{ + m_fully_busy_cycles + .name(name() + ".fully_busy_cycles") + .desc("cycles for which number of transistions == max transitions") + .flags(Stats::nozero); } void AbstractController::profileMsgDelay(uint32_t virtualNetwork, Cycles delay) { assert(virtualNetwork < m_delayVCHistogram.size()); - m_delayHistogram.add(delay); - m_delayVCHistogram[virtualNetwork].add(delay); + m_delayHistogram.sample(delay); + m_delayVCHistogram[virtualNetwork]->sample(delay); } void diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 345eefa0a..ac577ed09 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -72,8 +72,8 @@ class AbstractController : public ClockedObject, public Consumer virtual void print(std::ostream & out) const = 0; virtual void wakeup() = 0; - virtual void clearStats() = 0; - virtual void regStats() = 0; + virtual void resetStats() = 0; + virtual void regStats(); virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0; virtual Sequencer* getSequencer() const = 0; @@ -99,14 +99,10 @@ class AbstractController : public ClockedObject, public Consumer public: MachineID getMachineID() const { return m_machineID; } - uint64_t getFullyBusyCycles() const { return m_fully_busy_cycles; } - uint64_t getRequestCount() const { return m_request_count; } - const std::map& getRequestProfileMap() const - { return m_requestProfileMap; } - Histogram& getDelayHist() { return m_delayHistogram; } - Histogram& getDelayVCHist(uint32_t index) - { return m_delayVCHistogram[index]; } + Stats::Histogram& getDelayHist() { return m_delayHistogram; } + Stats::Histogram& getDelayVCHist(uint32_t index) + { return *(m_delayVCHistogram[index]); } MessageBuffer *getPeerQueue(uint32_t pid) { @@ -156,17 +152,12 @@ class AbstractController : public ClockedObject, public Consumer //! Counter for the number of cycles when the transitions carried out //! were equal to the maximum allowed - uint64_t m_fully_busy_cycles; - - //! Map for couting requests of different types. The controller should - //! call requisite function for updating the count. - std::map m_requestProfileMap; - uint64_t m_request_count; + Stats::Scalar m_fully_busy_cycles; //! Histogram for profiling delay for the messages this controller //! cares for - Histogram m_delayHistogram; - std::vector m_delayVCHistogram; + Stats::Histogram m_delayHistogram; + std::vector m_delayVCHistogram; //! Callback class used for collating statistics from all the //! controller of this type. @@ -177,12 +168,7 @@ class AbstractController : public ClockedObject, public Consumer public: virtual ~StatsCallback() {} - - StatsCallback(AbstractController *_ctr) - : ctr(_ctr) - { - } - + StatsCallback(AbstractController *_ctr) : ctr(_ctr) {} void process() {ctr->collateStats();} }; }; diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 1e9336d76..f8c21c91a 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -136,7 +136,7 @@ class RubyPort : public MemObject // A pointer to the controller is needed for atomic support. // void setController(AbstractController* _cntrl) { m_controller = _cntrl; } - int getId() { return m_version; } + uint32_t getId() { return m_version; } unsigned int drain(DrainManager *dm); protected: @@ -145,7 +145,7 @@ class RubyPort : public MemObject void testDrainComplete(); void ruby_eviction_callback(const Address& address); - int m_version; + uint32_t m_version; AbstractController* m_controller; MessageBuffer* m_mandatory_q_ptr; PioPort pio_port; diff --git a/src/mem/ruby/system/RubySystem.py b/src/mem/ruby/system/RubySystem.py index 29e395404..0943fb3f6 100644 --- a/src/mem/ruby/system/RubySystem.py +++ b/src/mem/ruby/system/RubySystem.py @@ -39,6 +39,9 @@ class RubySystem(ClockedObject): block_size_bytes = Param.UInt32(64, "default cache block size; must be a power of two"); mem_size = Param.MemorySize("total memory size of the system"); - stats_filename = Param.String("ruby.stats", - "file to which ruby dumps its stats") no_mem_vec = Param.Bool(False, "do not allocate Ruby's mem vector"); + + # Profiler related configuration variables + hot_lines = Param.Bool(False, "") + all_instructions = Param.Bool(False, "") + num_of_sequencers = Param.Int("") diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 8e61766b8..be554d5cf 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -55,13 +55,8 @@ RubySequencerParams::create() } Sequencer::Sequencer(const Params *p) - : RubyPort(p), deadlockCheckEvent(this) + : RubyPort(p), m_IncompleteTimes(MachineType_NUM), deadlockCheckEvent(this) { - m_store_waiting_on_load_cycles = 0; - m_store_waiting_on_store_cycles = 0; - m_load_waiting_on_store_cycles = 0; - m_load_waiting_on_load_cycles = 0; - m_outstanding_count = 0; m_instCache_ptr = p->icache; @@ -133,80 +128,34 @@ Sequencer::wakeup() } } -void Sequencer::clearStats() +void Sequencer::resetStats() { - m_outstandReqHist.clear(); - - // Initialize the histograms that track latency of all requests - m_latencyHist.clear(20); - m_typeLatencyHist.resize(RubyRequestType_NUM); + m_latencyHist.reset(); + m_hitLatencyHist.reset(); + m_missLatencyHist.reset(); for (int i = 0; i < RubyRequestType_NUM; i++) { - m_typeLatencyHist[i].clear(20); - } - - // Initialize the histograms that track latency of requests that - // hit in the cache attached to the sequencer. - m_hitLatencyHist.clear(20); - m_hitTypeLatencyHist.resize(RubyRequestType_NUM); - m_hitTypeMachLatencyHist.resize(RubyRequestType_NUM); - - for (int i = 0; i < RubyRequestType_NUM; i++) { - m_hitTypeLatencyHist[i].clear(20); - m_hitTypeMachLatencyHist[i].resize(MachineType_NUM); + m_typeLatencyHist[i]->reset(); + m_hitTypeLatencyHist[i]->reset(); + m_missTypeLatencyHist[i]->reset(); for (int j = 0; j < MachineType_NUM; j++) { - m_hitTypeMachLatencyHist[i][j].clear(20); + m_hitTypeMachLatencyHist[i][j]->reset(); + m_missTypeMachLatencyHist[i][j]->reset(); } } - // Initialize the histograms that track the latency of requests that - // missed in the cache attached to the sequencer. - m_missLatencyHist.clear(20); - m_missTypeLatencyHist.resize(RubyRequestType_NUM); - m_missTypeMachLatencyHist.resize(RubyRequestType_NUM); - - for (int i = 0; i < RubyRequestType_NUM; i++) { - m_missTypeLatencyHist[i].clear(20); - m_missTypeMachLatencyHist[i].resize(MachineType_NUM); - for (int j = 0; j < MachineType_NUM; j++) { - m_missTypeMachLatencyHist[i][j].clear(20); - } - } - - m_hitMachLatencyHist.resize(MachineType_NUM); - m_missMachLatencyHist.resize(MachineType_NUM); - m_IssueToInitialDelayHist.resize(MachineType_NUM); - m_InitialToForwardDelayHist.resize(MachineType_NUM); - m_ForwardToFirstResponseDelayHist.resize(MachineType_NUM); - m_FirstResponseToCompletionDelayHist.resize(MachineType_NUM); - m_IncompleteTimes.resize(MachineType_NUM); - for (int i = 0; i < MachineType_NUM; i++) { - m_missMachLatencyHist[i].clear(20); - m_hitMachLatencyHist[i].clear(20); + m_missMachLatencyHist[i]->reset(); + m_hitMachLatencyHist[i]->reset(); - m_IssueToInitialDelayHist[i].clear(20); - m_InitialToForwardDelayHist[i].clear(20); - m_ForwardToFirstResponseDelayHist[i].clear(20); - m_FirstResponseToCompletionDelayHist[i].clear(20); + m_IssueToInitialDelayHist[i]->reset(); + m_InitialToForwardDelayHist[i]->reset(); + m_ForwardToFirstResponseDelayHist[i]->reset(); + m_FirstResponseToCompletionDelayHist[i]->reset(); m_IncompleteTimes[i] = 0; } } -void -Sequencer::printStats(ostream & out) const -{ - out << "Sequencer: " << m_name << endl - << " store_waiting_on_load_cycles: " - << m_store_waiting_on_load_cycles << endl - << " store_waiting_on_store_cycles: " - << m_store_waiting_on_store_cycles << endl - << " load_waiting_on_load_cycles: " - << m_load_waiting_on_load_cycles << endl - << " load_waiting_on_store_cycles: " - << m_load_waiting_on_store_cycles << endl; -} - void Sequencer::printProgress(ostream& out) const { @@ -291,7 +240,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type) // Check if there is any outstanding read request for the same // cache line. if (m_readRequestTable.count(line_addr) > 0) { - m_store_waiting_on_load_cycles++; + m_store_waiting_on_load++; return RequestStatus_Aliased; } @@ -303,14 +252,14 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type) m_outstanding_count++; } else { // There is an outstanding write request for the cache line - m_store_waiting_on_store_cycles++; + m_store_waiting_on_store++; return RequestStatus_Aliased; } } else { // Check if there is any outstanding write request for the same // cache line. if (m_writeRequestTable.count(line_addr) > 0) { - m_load_waiting_on_store_cycles++; + m_load_waiting_on_store++; return RequestStatus_Aliased; } @@ -323,12 +272,12 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type) m_outstanding_count++; } else { // There is an outstanding read request for the cache line - m_load_waiting_on_load_cycles++; + m_load_waiting_on_load++; return RequestStatus_Aliased; } } - m_outstandReqHist.add(m_outstanding_count); + m_outstandReqHist.sample(m_outstanding_count); assert(m_outstanding_count == (m_writeRequestTable.size() + m_readRequestTable.size())); @@ -432,41 +381,41 @@ Sequencer::recordMissLatency(const Cycles cycles, const RubyRequestType type, Cycles forwardRequestTime, Cycles firstResponseTime, Cycles completionTime) { - m_latencyHist.add(cycles); - m_typeLatencyHist[type].add(cycles); + m_latencyHist.sample(cycles); + m_typeLatencyHist[type]->sample(cycles); if (isExternalHit) { - m_missLatencyHist.add(cycles); - m_missTypeLatencyHist[type].add(cycles); + m_missLatencyHist.sample(cycles); + m_missTypeLatencyHist[type]->sample(cycles); if (respondingMach != MachineType_NUM) { - m_missMachLatencyHist[respondingMach].add(cycles); - m_missTypeMachLatencyHist[type][respondingMach].add(cycles); + m_missMachLatencyHist[respondingMach]->sample(cycles); + m_missTypeMachLatencyHist[type][respondingMach]->sample(cycles); if ((issuedTime <= initialRequestTime) && (initialRequestTime <= forwardRequestTime) && (forwardRequestTime <= firstResponseTime) && (firstResponseTime <= completionTime)) { - m_IssueToInitialDelayHist[respondingMach].add( + m_IssueToInitialDelayHist[respondingMach]->sample( initialRequestTime - issuedTime); - m_InitialToForwardDelayHist[respondingMach].add( + m_InitialToForwardDelayHist[respondingMach]->sample( forwardRequestTime - initialRequestTime); - m_ForwardToFirstResponseDelayHist[respondingMach].add( + m_ForwardToFirstResponseDelayHist[respondingMach]->sample( firstResponseTime - forwardRequestTime); - m_FirstResponseToCompletionDelayHist[respondingMach].add( + m_FirstResponseToCompletionDelayHist[respondingMach]->sample( completionTime - firstResponseTime); } else { m_IncompleteTimes[respondingMach]++; } } } else { - m_hitLatencyHist.add(cycles); - m_hitTypeLatencyHist[type].add(cycles); + m_hitLatencyHist.sample(cycles); + m_hitTypeLatencyHist[type]->sample(cycles); if (respondingMach != MachineType_NUM) { - m_hitMachLatencyHist[respondingMach].add(cycles); - m_hitTypeMachLatencyHist[type][respondingMach].add(cycles); + m_hitMachLatencyHist[respondingMach]->sample(cycles); + m_hitTypeMachLatencyHist[type][respondingMach]->sample(cycles); } } } @@ -810,3 +759,76 @@ Sequencer::evictionCallback(const Address& address) { ruby_eviction_callback(address); } + +void +Sequencer::regStats() +{ + m_store_waiting_on_load + .name(name() + ".store_waiting_on_load") + .desc("Number of times a store aliased with a pending load") + .flags(Stats::nozero); + m_store_waiting_on_store + .name(name() + ".store_waiting_on_store") + .desc("Number of times a store aliased with a pending store") + .flags(Stats::nozero); + m_load_waiting_on_load + .name(name() + ".load_waiting_on_load") + .desc("Number of times a load aliased with a pending load") + .flags(Stats::nozero); + m_load_waiting_on_store + .name(name() + ".load_waiting_on_store") + .desc("Number of times a load aliased with a pending store") + .flags(Stats::nozero); + + // These statistical variables are not for display. + // The profiler will collate these across different + // sequencers and display those collated statistics. + m_outstandReqHist.init(10); + m_latencyHist.init(10); + m_hitLatencyHist.init(10); + m_missLatencyHist.init(10); + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_typeLatencyHist.push_back(new Stats::Histogram()); + m_typeLatencyHist[i]->init(10); + + m_hitTypeLatencyHist.push_back(new Stats::Histogram()); + m_hitTypeLatencyHist[i]->init(10); + + m_missTypeLatencyHist.push_back(new Stats::Histogram()); + m_missTypeLatencyHist[i]->init(10); + } + + for (int i = 0; i < MachineType_NUM; i++) { + m_hitMachLatencyHist.push_back(new Stats::Histogram()); + m_hitMachLatencyHist[i]->init(10); + + m_missMachLatencyHist.push_back(new Stats::Histogram()); + m_missMachLatencyHist[i]->init(10); + + m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHist[i]->init(10); + + m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHist[i]->init(10); + + m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHist[i]->init(10); + + m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHist[i]->init(10); + } + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_hitTypeMachLatencyHist.push_back(std::vector()); + m_missTypeMachLatencyHist.push_back(std::vector()); + + for (int j = 0; j < MachineType_NUM; j++) { + m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_hitTypeMachLatencyHist[i][j]->init(10); + + m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHist[i][j]->init(10); + } + } +} diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 86e6aa2a9..d7dc7d151 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -66,7 +66,9 @@ class Sequencer : public RubyPort // Public Methods void wakeup(); // Used only for deadlock detection void printProgress(std::ostream& out) const; - void clearStats(); + void resetStats(); + void collateStats(); + void regStats(); void writeCallback(const Address& address, DataBlock& data, @@ -95,7 +97,6 @@ class Sequencer : public RubyPort { deschedule(deadlockCheckEvent); } void print(std::ostream& out) const; - void printStats(std::ostream& out) const; void checkCoherence(const Address& address); void markRemoved(); @@ -104,45 +105,50 @@ class Sequencer : public RubyPort void invalidateSC(const Address& address); void recordRequestType(SequencerRequestType requestType); - Histogram& getOutstandReqHist() { return m_outstandReqHist; } + Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; } - Histogram& getLatencyHist() { return m_latencyHist; } - Histogram& getTypeLatencyHist(uint32_t t) - { return m_typeLatencyHist[t]; } + Stats::Histogram& getLatencyHist() { return m_latencyHist; } + Stats::Histogram& getTypeLatencyHist(uint32_t t) + { return *m_typeLatencyHist[t]; } - Histogram& getHitLatencyHist() { return m_hitLatencyHist; } - Histogram& getHitTypeLatencyHist(uint32_t t) - { return m_hitTypeLatencyHist[t]; } + Stats::Histogram& getHitLatencyHist() { return m_hitLatencyHist; } + Stats::Histogram& getHitTypeLatencyHist(uint32_t t) + { return *m_hitTypeLatencyHist[t]; } - Histogram& getHitMachLatencyHist(uint32_t t) - { return m_hitMachLatencyHist[t]; } + Stats::Histogram& getHitMachLatencyHist(uint32_t t) + { return *m_hitMachLatencyHist[t]; } - Histogram& getHitTypeMachLatencyHist(uint32_t r, uint32_t t) - { return m_hitTypeMachLatencyHist[r][t]; } + Stats::Histogram& getHitTypeMachLatencyHist(uint32_t r, uint32_t t) + { return *m_hitTypeMachLatencyHist[r][t]; } - Histogram& getMissLatencyHist() { return m_missLatencyHist; } - Histogram& getMissTypeLatencyHist(uint32_t t) - { return m_missTypeLatencyHist[t]; } + Stats::Histogram& getMissLatencyHist() + { return m_missLatencyHist; } + Stats::Histogram& getMissTypeLatencyHist(uint32_t t) + { return *m_missTypeLatencyHist[t]; } - Histogram& getMissMachLatencyHist(uint32_t t) - { return m_missMachLatencyHist[t]; } + Stats::Histogram& getMissMachLatencyHist(uint32_t t) const + { return *m_missMachLatencyHist[t]; } - Histogram& getMissTypeMachLatencyHist(uint32_t r, uint32_t t) - { return m_missTypeMachLatencyHist[r][t]; } + Stats::Histogram& + getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const + { return *m_missTypeMachLatencyHist[r][t]; } - Histogram& getIssueToInitialDelayHist(uint32_t t) - { return m_IssueToInitialDelayHist[t]; } + Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const + { return *m_IssueToInitialDelayHist[t]; } - Histogram& getInitialToForwardDelayHist(const MachineType t) - { return m_InitialToForwardDelayHist[t]; } + Stats::Histogram& + getInitialToForwardDelayHist(const MachineType t) const + { return *m_InitialToForwardDelayHist[t]; } - Histogram& getForwardRequestToFirstResponseHist(const MachineType t) - { return m_ForwardToFirstResponseDelayHist[t]; } + Stats::Histogram& + getForwardRequestToFirstResponseHist(const MachineType t) const + { return *m_ForwardToFirstResponseDelayHist[t]; } - Histogram& getFirstResponseToCompletionDelayHist(const MachineType t) - { return m_FirstResponseToCompletionDelayHist[t]; } + Stats::Histogram& + getFirstResponseToCompletionDelayHist(const MachineType t) const + { return *m_FirstResponseToCompletionDelayHist[t]; } - const uint64_t getIncompleteTimes(const MachineType t) const + Stats::Counter getIncompleteTimes(const MachineType t) const { return m_IncompleteTimes[t]; } private: @@ -183,46 +189,47 @@ class Sequencer : public RubyPort int m_outstanding_count; bool m_deadlock_check_scheduled; - uint32_t m_store_waiting_on_load_cycles; - uint32_t m_store_waiting_on_store_cycles; - uint32_t m_load_waiting_on_store_cycles; - uint32_t m_load_waiting_on_load_cycles; + //! Counters for recording aliasing information. + Stats::Scalar m_store_waiting_on_load; + Stats::Scalar m_store_waiting_on_store; + Stats::Scalar m_load_waiting_on_store; + Stats::Scalar m_load_waiting_on_load; bool m_usingNetworkTester; //! Histogram for number of outstanding requests per cycle. - Histogram m_outstandReqHist; + Stats::Histogram m_outstandReqHist; //! Histogram for holding latency profile of all requests. - Histogram m_latencyHist; - std::vector m_typeLatencyHist; + Stats::Histogram m_latencyHist; + std::vector m_typeLatencyHist; //! Histogram for holding latency profile of all requests that //! hit in the controller connected to this sequencer. - Histogram m_hitLatencyHist; - std::vector m_hitTypeLatencyHist; + Stats::Histogram m_hitLatencyHist; + std::vector m_hitTypeLatencyHist; //! Histograms for profiling the latencies for requests that //! did not required external messages. - std::vector m_hitMachLatencyHist; - std::vector< std::vector > m_hitTypeMachLatencyHist; + std::vector m_hitMachLatencyHist; + std::vector< std::vector > m_hitTypeMachLatencyHist; //! Histogram for holding latency profile of all requests that //! miss in the controller connected to this sequencer. - Histogram m_missLatencyHist; - std::vector m_missTypeLatencyHist; + Stats::Histogram m_missLatencyHist; + std::vector m_missTypeLatencyHist; //! Histograms for profiling the latencies for requests that //! required external messages. - std::vector m_missMachLatencyHist; - std::vector< std::vector > m_missTypeMachLatencyHist; + std::vector m_missMachLatencyHist; + std::vector< std::vector > m_missTypeMachLatencyHist; //! Histograms for recording the breakdown of miss latency - std::vector m_IssueToInitialDelayHist; - std::vector m_InitialToForwardDelayHist; - std::vector m_ForwardToFirstResponseDelayHist; - std::vector m_FirstResponseToCompletionDelayHist; - std::vector m_IncompleteTimes; + std::vector m_IssueToInitialDelayHist; + std::vector m_InitialToForwardDelayHist; + std::vector m_ForwardToFirstResponseDelayHist; + std::vector m_FirstResponseToCompletionDelayHist; + std::vector m_IncompleteTimes; class SequencerWakeupEvent : public Event diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc index 016169bcc..b2f439178 100644 --- a/src/mem/ruby/system/System.cc +++ b/src/mem/ruby/system/System.cc @@ -37,7 +37,6 @@ #include "debug/RubySystem.hh" #include "mem/ruby/common/Address.hh" #include "mem/ruby/network/Network.hh" -#include "mem/ruby/profiler/Profiler.hh" #include "mem/ruby/system/System.hh" #include "sim/eventq.hh" #include "sim/simulate.hh" @@ -73,16 +72,12 @@ RubySystem::RubySystem(const Params *p) } if (p->no_mem_vec) { - m_mem_vec_ptr = NULL; + m_mem_vec = NULL; } else { - m_mem_vec_ptr = new MemoryVector; - m_mem_vec_ptr->resize(m_memory_size_bytes); + m_mem_vec = new MemoryVector; + m_mem_vec->resize(m_memory_size_bytes); } - // Print ruby configuration and stats at exit and when asked for - Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename, - this)); - m_warmup_enabled = false; m_cooldown_enabled = false; @@ -91,18 +86,17 @@ RubySystem::RubySystem(const Params *p) // Resize to the size of different machine types g_abs_controls.resize(MachineType_NUM); + + // Collate the statistics before they are printed. + Stats::registerDumpCallback(new RubyStatsCallback(this)); + // Create the profiler + m_profiler = new Profiler(p); } void RubySystem::registerNetwork(Network* network_ptr) { - m_network_ptr = network_ptr; -} - -void -RubySystem::registerProfiler(Profiler* profiler_ptr) -{ - m_profiler_ptr = profiler_ptr; + m_network = network_ptr; } void @@ -127,16 +121,10 @@ RubySystem::registerMemController(MemoryControl *mc) { RubySystem::~RubySystem() { - delete m_network_ptr; - delete m_profiler_ptr; - if (m_mem_vec_ptr) - delete m_mem_vec_ptr; -} - -void -RubySystem::printStats(ostream& out) -{ - m_profiler_ptr->printStats(out); + delete m_network; + delete m_profiler; + if (m_mem_vec) + delete m_mem_vec; } void @@ -223,8 +211,8 @@ RubySystem::serialize(std::ostream &os) uint8_t *raw_data = NULL; - if (m_mem_vec_ptr != NULL) { - uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data); + if (m_mem_vec != NULL) { + uint64 memory_trace_size = m_mem_vec->collatePages(raw_data); string memory_trace_file = name() + ".memory.gz"; writeCompressedTrace(raw_data, memory_trace_file, @@ -289,7 +277,7 @@ RubySystem::unserialize(Checkpoint *cp, const string §ion) { uint8_t *uncompressed_trace = NULL; - if (m_mem_vec_ptr != NULL) { + if (m_mem_vec != NULL) { string memory_trace_file; uint64 memory_trace_size = 0; @@ -299,7 +287,7 @@ RubySystem::unserialize(Checkpoint *cp, const string §ion) readCompressedTrace(memory_trace_file, uncompressed_trace, memory_trace_size); - m_mem_vec_ptr->populatePages(uncompressed_trace); + m_mem_vec->populatePages(uncompressed_trace); delete [] uncompressed_trace; uncompressed_trace = NULL; @@ -401,11 +389,6 @@ RubySystem::RubyEvent::process() void RubySystem::resetStats() { - m_profiler_ptr->clearStats(); - for (uint32_t cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { - m_abs_cntrl_vec[cntrl]->clearStats(); - } - g_ruby_start = curCycle(); } @@ -552,7 +535,7 @@ RubySystem::functionalWrite(PacketPtr pkt) m_memory_controller_vec[i]->functionalWriteBuffers(pkt); } - num_functional_writes += m_network_ptr->functionalWrite(pkt); + num_functional_writes += m_network->functionalWrite(pkt); DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); return true; @@ -615,13 +598,3 @@ RubySystemParams::create() { return new RubySystem(this); } - -/** - * virtual process function that is invoked when the callback - * queue is executed. - */ -void -RubyDumpStatsCallback::process() -{ - ruby_system->printStats(*os); -} diff --git a/src/mem/ruby/system/System.hh b/src/mem/ruby/system/System.hh index 474741bf7..de35116d4 100644 --- a/src/mem/ruby/system/System.hh +++ b/src/mem/ruby/system/System.hh @@ -39,16 +39,16 @@ #include "base/output.hh" #include "mem/packet.hh" #include "mem/ruby/common/Global.hh" +#include "mem/ruby/profiler/Profiler.hh" #include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" +#include "mem/ruby/system/MemoryControl.hh" #include "mem/ruby/system/MemoryVector.hh" #include "mem/ruby/system/SparseMemory.hh" #include "params/RubySystem.hh" #include "sim/clocked_object.hh" class Network; -class Profiler; -class MemoryControl; class RubySystem : public ClockedObject { @@ -84,27 +84,27 @@ class RubySystem : public ClockedObject Network* getNetwork() { - assert(m_network_ptr != NULL); - return m_network_ptr; + assert(m_network != NULL); + return m_network; } Profiler* getProfiler() { - assert(m_profiler_ptr != NULL); - return m_profiler_ptr; + assert(m_profiler != NULL); + return m_profiler; } MemoryVector* getMemoryVector() { - assert(m_mem_vec_ptr != NULL); - return m_mem_vec_ptr; + assert(m_mem_vec != NULL); + return m_mem_vec; } - void printStats(std::ostream& out); + void regStats() { m_profiler->regStats(name()); } + void collateStats() { m_profiler->collateStats(); } void resetStats(); - void print(std::ostream& out) const; void serialize(std::ostream &os); void unserialize(Checkpoint *cp, const std::string §ion); @@ -114,7 +114,6 @@ class RubySystem : public ClockedObject bool functionalWrite(Packet *ptr); void registerNetwork(Network*); - void registerProfiler(Profiler*); void registerAbstractController(AbstractController*); void registerSparseMemory(SparseMemory*); void registerMemController(MemoryControl *mc); @@ -146,44 +145,28 @@ class RubySystem : public ClockedObject static uint64_t m_memory_size_bytes; static uint32_t m_memory_size_bits; - Network* m_network_ptr; + Network* m_network; std::vector m_memory_controller_vec; std::vector m_abs_cntrl_vec; public: - Profiler* m_profiler_ptr; - MemoryVector* m_mem_vec_ptr; + Profiler* m_profiler; + MemoryVector* m_mem_vec; bool m_warmup_enabled; bool m_cooldown_enabled; CacheRecorder* m_cache_recorder; std::vector m_sparse_memory_vector; }; -inline std::ostream& -operator<<(std::ostream& out, const RubySystem& obj) -{ - //obj.print(out); - out << std::flush; - return out; -} - -class RubyDumpStatsCallback : public Callback +class RubyStatsCallback : public Callback { private: - std::ostream *os; RubySystem *ruby_system; public: - virtual ~RubyDumpStatsCallback() {} - - RubyDumpStatsCallback(const std::string& _stats_filename, - RubySystem *system) - { - os = simout.create(_stats_filename); - ruby_system = system; - } - - void process(); + virtual ~RubyStatsCallback() {} + RubyStatsCallback(RubySystem *system) : ruby_system(system) {} + void process() { ruby_system->collateStats(); } }; #endif // __MEM_RUBY_SYSTEM_SYSTEM_HH__ diff --git a/src/mem/ruby/system/WireBuffer.cc b/src/mem/ruby/system/WireBuffer.cc index 8c7c9211e..f45bd5678 100644 --- a/src/mem/ruby/system/WireBuffer.cc +++ b/src/mem/ruby/system/WireBuffer.cc @@ -145,16 +145,6 @@ WireBuffer::print(ostream& out) const { } -void -WireBuffer::clearStats() const -{ -} - -void -WireBuffer::printStats(ostream& out) const -{ -} - void WireBuffer::wakeup() { diff --git a/src/mem/ruby/system/WireBuffer.hh b/src/mem/ruby/system/WireBuffer.hh index 3a8804798..9fb2d87a8 100644 --- a/src/mem/ruby/system/WireBuffer.hh +++ b/src/mem/ruby/system/WireBuffer.hh @@ -81,9 +81,6 @@ class WireBuffer : public SimObject bool areNSlotsAvailable(int n) { return true; }; // infinite queue length void print(std::ostream& out) const; - void clearStats() const; - void printStats(std::ostream& out) const; - uint64_t m_msg_counter; private: diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index 89bb5dc0a..679d15f97 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -257,7 +257,7 @@ class $c_ident : public AbstractController void print(std::ostream& out) const; void wakeup(); - void clearStats(); + void resetStats(); void regStats(); void collateStats(); @@ -690,7 +690,7 @@ $vid->setDescription("[Version " + to_string(m_version) + ", ${ident}, name=${{v code.dedent() code(''' AbstractController::init(); - clearStats(); + resetStats(); } ''') @@ -715,12 +715,15 @@ $vid->setDescription("[Version " + to_string(m_version) + ", ${ident}, name=${{v void $c_ident::regStats() { + AbstractController::regStats(); + if (m_version == 0) { for (${ident}_Event event = ${ident}_Event_FIRST; event < ${ident}_Event_NUM; ++event) { Stats::Vector *t = new Stats::Vector(); t->init(m_num_controllers); - t->name(name() + "." + ${ident}_Event_to_string(event)); + t->name(g_system_ptr->name() + ".${c_ident}." + + ${ident}_Event_to_string(event)); t->flags(Stats::pdf | Stats::total | Stats::oneline | Stats::nozero); @@ -737,7 +740,8 @@ $c_ident::regStats() Stats::Vector *t = new Stats::Vector(); t->init(m_num_controllers); - t->name(name() + "." + ${ident}_State_to_string(state) + + t->name(g_system_ptr->name() + ".${c_ident}." + + ${ident}_State_to_string(state) + "." + ${ident}_Event_to_string(event)); t->flags(Stats::pdf | Stats::total | Stats::oneline | @@ -842,7 +846,7 @@ $c_ident::print(ostream& out) const out << "[$c_ident " << m_version << "]"; } -void $c_ident::clearStats() +void $c_ident::resetStats() { for (int state = 0; state < ${ident}_State_NUM; state++) { for (int event = 0; event < ${ident}_Event_NUM; event++) { @@ -854,7 +858,7 @@ void $c_ident::clearStats() m_event_counters[event] = 0; } - AbstractController::clearStats(); + AbstractController::resetStats(); } ''')