From bc1daae7fd528ca72ba14d669a0d00243f553be5 Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Sun, 10 Feb 2013 21:26:22 -0600 Subject: [PATCH] ruby: modifies histogram add() function This patch modifies the Histogram class' add() function so that it can add linear histograms as well. The function assumes that the left end point of the ranges of the two histograms are the same. It also assumes that when the ranges of the two histogram are changed to accomodate an element not in the range, the factor used in changing the range is same for both the histograms. This function is then used in removing one of the calls to the global profiler*. The histograms for recording the delays incurred in processing different requests are now maintained by the controllers. The profiler adds these histograms when it needs to print the stats. --- src/mem/ruby/common/Histogram.cc | 97 +++++++++++++------ src/mem/ruby/common/Histogram.hh | 30 +++--- src/mem/ruby/network/Network.hh | 2 +- src/mem/ruby/profiler/Profiler.cc | 62 ++++++------ src/mem/ruby/profiler/Profiler.hh | 7 +- .../slicc_interface/AbstractController.cc | 16 +++ .../slicc_interface/AbstractController.hh | 12 +++ .../RubySlicc_Profiler_interface.cc | 6 -- .../RubySlicc_Profiler_interface.hh | 1 - 9 files changed, 147 insertions(+), 86 deletions(-) diff --git a/src/mem/ruby/common/Histogram.cc b/src/mem/ruby/common/Histogram.cc index dcb723f1b..0558e5198 100644 --- a/src/mem/ruby/common/Histogram.cc +++ b/src/mem/ruby/common/Histogram.cc @@ -34,11 +34,10 @@ using namespace std; -Histogram::Histogram(int binsize, int bins) +Histogram::Histogram(int binsize, uint32_t bins) { m_binsize = binsize; - m_bins = bins; - clear(); + clear(bins); } Histogram::~Histogram() @@ -46,29 +45,43 @@ Histogram::~Histogram() } void -Histogram::clear(int binsize, int bins) +Histogram::clear(int binsize, uint32_t bins) { m_binsize = binsize; clear(bins); } void -Histogram::clear(int bins) +Histogram::clear(uint32_t bins) { - m_bins = bins; m_largest_bin = 0; m_max = 0; - m_data.resize(m_bins); - for (int i = 0; i < m_bins; i++) { + m_data.resize(bins); + for (uint32_t i = 0; i < bins; i++) { m_data[i] = 0; } + m_count = 0; m_max = 0; - m_sumSamples = 0; m_sumSquaredSamples = 0; } +void +Histogram::doubleBinSize() +{ + assert(m_binsize != -1); + uint32_t t_bins = m_data.size(); + + for (uint32_t i = 0; i < t_bins/2; i++) { + m_data[i] = m_data[i*2] + m_data[i*2 + 1]; + } + for (uint32_t i = t_bins/2; i < t_bins; i++) { + m_data[i] = 0; + } + + m_binsize *= 2; +} void Histogram::add(int64 value) @@ -80,7 +93,8 @@ Histogram::add(int64 value) m_sumSamples += value; m_sumSquaredSamples += (value*value); - int index; + uint32_t index; + if (m_binsize == -1) { // This is a log base 2 histogram if (value == 0) { @@ -93,37 +107,59 @@ Histogram::add(int64 value) } } else { // This is a linear histogram - while (m_max >= (m_bins * m_binsize)) { - for (int i = 0; i < m_bins/2; i++) { - m_data[i] = m_data[i*2] + m_data[i*2 + 1]; - } - for (int i = m_bins/2; i < m_bins; i++) { - m_data[i] = 0; - } - m_binsize *= 2; - } + uint32_t t_bins = m_data.size(); + + while (m_max >= (t_bins * m_binsize)) doubleBinSize(); index = value/m_binsize; } - assert(index >= 0); + + assert(index < m_data.size()); m_data[index]++; m_largest_bin = max(m_largest_bin, index); } void -Histogram::add(const Histogram& hist) +Histogram::add(Histogram& hist) { - assert(hist.getBins() == m_bins); - assert(hist.getBinSize() == -1); // assume log histogram - assert(m_binsize == -1); + uint32_t t_bins = m_data.size(); - for (int j = 0; j < hist.getData(0); j++) { - add(0); + if (hist.getBins() != t_bins) { + fatal("Histograms with different number of bins cannot be combined!"); } - for (int i = 1; i < m_bins; i++) { - for (int j = 0; j < hist.getData(i); j++) { - add(1<<(i-1)); // account for the + 1 index + m_max = max(m_max, hist.getMax()); + m_count += hist.size(); + m_sumSamples += hist.getTotal(); + m_sumSquaredSamples += hist.getSquaredTotal(); + + // Both histograms are log base 2. + if (hist.getBinSize() == -1 && m_binsize == -1) { + for (int j = 0; j < hist.getData(0); j++) { + add(0); } + + for (uint32_t i = 1; i < t_bins; i++) { + for (int j = 0; j < hist.getData(i); j++) { + add(1<<(i-1)); // account for the + 1 index + } + } + } else if (hist.getBinSize() >= 1 && m_binsize >= 1) { + // Both the histogram are linear. + // We are assuming that the two histograms have the same + // minimum value that they can store. + + while (m_binsize > hist.getBinSize()) hist.doubleBinSize(); + while (hist.getBinSize() > m_binsize) doubleBinSize(); + + assert(m_binsize == hist.getBinSize()); + + for (uint32_t i = 0; i < t_bins; i++) { + m_data[i] += hist.getData(i); + + if (m_data[i] > 0) m_largest_bin = i; + } + } else { + fatal("Don't know how to combine log and linear histograms!"); } } @@ -177,7 +213,8 @@ Histogram::printWithMultiplier(ostream& out, double multiplier) const << " | "; out << "standard deviation: " << getStandardDeviation() << " |"; } - for (int i = 0; i < m_bins && i <= m_largest_bin; i++) { + + for (uint32_t i = 0; i <= m_largest_bin; i++) { if (multiplier == 1.0) { out << " " << m_data[i]; } else { diff --git a/src/mem/ruby/common/Histogram.hh b/src/mem/ruby/common/Histogram.hh index bfc0e4293..c34e39af1 100644 --- a/src/mem/ruby/common/Histogram.hh +++ b/src/mem/ruby/common/Histogram.hh @@ -37,34 +37,38 @@ class Histogram { public: - Histogram(int binsize = 1, int bins = 50); + Histogram(int binsize = 1, uint32_t bins = 50); ~Histogram(); void add(int64 value); - void add(const Histogram& hist); - void clear() { clear(m_bins); } - void clear(int bins); - void clear(int binsize, int bins); - int64 size() const { return m_count; } - int getBins() const { return m_bins; } + void add(Histogram& hist); + void doubleBinSize(); + + void clear() { clear(m_data.size()); } + void clear(uint32_t bins); + void clear(int binsize, uint32_t bins); + + uint64_t size() const { return m_count; } + uint32_t getBins() const { return m_data.size(); } int getBinSize() const { return m_binsize; } int64 getTotal() const { return m_sumSamples; } - int64 getData(int index) const { return m_data[index]; } + uint64_t getSquaredTotal() const { return m_sumSquaredSamples; } + uint64_t getData(int index) const { return m_data[index]; } + int64 getMax() const { return m_max; } void printWithMultiplier(std::ostream& out, double multiplier) const; void printPercent(std::ostream& out) const; void print(std::ostream& out) const; private: - std::vector m_data; + std::vector m_data; int64 m_max; // the maximum value seen so far - int64 m_count; // the number of elements added + uint64_t m_count; // the number of elements added int m_binsize; // the size of each bucket - int m_bins; // the number of buckets - int m_largest_bin; // the largest bin used + uint32_t m_largest_bin; // the largest bin used int64 m_sumSamples; // the sum of all samples - int64 m_sumSquaredSamples; // the sum of the square of all samples + uint64_t m_sumSquaredSamples; // the sum of the square of all samples double getStandardDeviation() const; }; diff --git a/src/mem/ruby/network/Network.hh b/src/mem/ruby/network/Network.hh index a59caebbd..9784af759 100644 --- a/src/mem/ruby/network/Network.hh +++ b/src/mem/ruby/network/Network.hh @@ -65,7 +65,7 @@ class Network : public ClockedObject virtual void init(); - static int getNumberOfVirtualNetworks() { return m_virtual_networks; } + static uint32_t getNumberOfVirtualNetworks() { return m_virtual_networks; } static uint32_t MessageSizeType_to_int(MessageSizeType size_type); // returns the queue requested for the given component diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index 546934d52..165561fe8 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -223,6 +223,38 @@ Profiler::printRequestProfile(ostream &out) out << endl; } +void +Profiler::printDelayProfile(ostream &out) +{ + out << "Message Delayed Cycles" << endl; + out << "----------------------" << endl; + + uint32_t numVNets = Network::getNumberOfVirtualNetworks(); + Histogram delayHistogram; + std::vector delayVCHistogram(numVNets); + + for (uint32_t i = 0; i < MachineType_NUM; i++) { + for (map::iterator it = + g_abs_controls[i].begin(); + it != g_abs_controls[i].end(); ++it) { + + AbstractController *ctr = (*it).second; + delayHistogram.add(ctr->getDelayHist()); + + for (uint32_t i = 0; i < numVNets; i++) { + delayVCHistogram[i].add(ctr->getDelayVCHist(i)); + } + } + } + + out << "Total_delay_cycles: " << delayHistogram << endl; + + for (int i = 0; i < numVNets; i++) { + out << " virtual_network_" << i << "_delay_cycles: " + << delayVCHistogram[i] << endl; + } +} + void Profiler::printStats(ostream& out, bool short_stats) { @@ -435,16 +467,7 @@ Profiler::printStats(ostream& out, bool short_stats) } out << endl; - out << "Message Delayed Cycles" << endl; - out << "----------------------" << endl; - out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; - out << "Total_nonPF_delay_cycles: " - << m_delayedCyclesNonPFHistogram << endl; - for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { - out << " virtual_network_" << i << "_delay_cycles: " - << m_delayedCyclesVCHistograms[i] << endl; - } - + printDelayProfile(out); printResourceUsage(out); } } @@ -488,14 +511,6 @@ Profiler::clearStats() m_busyBankCount = 0; - m_delayedCyclesHistogram.clear(); - m_delayedCyclesNonPFHistogram.clear(); - int size = Network::getNumberOfVirtualNetworks(); - m_delayedCyclesVCHistograms.resize(size); - for (int i = 0; i < size; i++) { - m_delayedCyclesVCHistograms[i].clear(); - } - m_missLatencyHistograms.resize(RubyRequestType_NUM); for (int i = 0; i < m_missLatencyHistograms.size(); i++) { m_missLatencyHistograms[i].clear(200); @@ -593,17 +608,6 @@ Profiler::profileSharing(const Address& addr, AccessType type, } } -void -Profiler::profileMsgDelay(uint32_t virtualNetwork, Time delayCycles) -{ - assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); - m_delayedCyclesHistogram.add(delayCycles); - m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); - if (virtualNetwork != 0) { - m_delayedCyclesNonPFHistogram.add(delayCycles); - } -} - void Profiler::profilePFWait(Time waitTime) { diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 5b370de54..ecd57c035 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -152,8 +152,6 @@ class Profiler : public SimObject void sequencerRequests(int num) { m_sequencer_requests.add(num); } - void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles); - void print(std::ostream& out) const; void rubyWatch(int proc); @@ -172,6 +170,7 @@ class Profiler : public SimObject private: void printRequestProfile(std::ostream &out); + void printDelayProfile(std::ostream &out); private: // Private copy constructor and assignment operator @@ -226,10 +225,6 @@ class Profiler : public SimObject std::vector m_SWPrefetchLatencyHistograms; std::vector m_SWPrefetchMachLatencyHistograms; - Histogram m_delayedCyclesHistogram; - Histogram m_delayedCyclesNonPFHistogram; - std::vector m_delayedCyclesVCHistograms; - Histogram m_outstanding_requests; Histogram m_outstanding_persistent_requests; diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index adf411f82..bcd09796a 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -52,6 +52,14 @@ AbstractController::clearStats() { m_requestProfileMap.clear(); m_request_count = 0; + + m_delayHistogram.clear(); + + uint32_t size = Network::getNumberOfVirtualNetworks(); + m_delayVCHistogram.resize(size); + for (uint32_t i = 0; i < size; i++) { + m_delayVCHistogram[i].clear(); + } } void @@ -63,3 +71,11 @@ AbstractController::profileRequest(const std::string &request) // default value which is 0 m_requestProfileMap[request]++; } + +void +AbstractController::profileMsgDelay(uint32_t virtualNetwork, Time delay) +{ + assert(virtualNetwork < m_delayVCHistogram.size()); + m_delayHistogram.add(delay); + m_delayVCHistogram[virtualNetwork].add(delay); +} diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 0e3af44a1..c452da723 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -36,6 +36,7 @@ #include "mem/ruby/common/Address.hh" #include "mem/ruby/common/Consumer.hh" #include "mem/ruby/common/DataBlock.hh" +#include "mem/ruby/common/Histogram.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/ruby/system/MachineID.hh" @@ -92,9 +93,15 @@ class AbstractController : public ClockedObject, public Consumer const std::map& getRequestProfileMap() const { return m_requestProfileMap; } + Histogram& getDelayHist() { return m_delayHistogram; } + Histogram& getDelayVCHist(uint32_t index) + { return m_delayVCHistogram[index]; } + protected: //! Profiles original cache requests including PUTs void profileRequest(const std::string &request); + //! Profiles the delay associated with messages. + void profileMsgDelay(uint32_t virtualNetwork, Time delay); protected: int m_transitions_per_cycle; @@ -121,6 +128,11 @@ class AbstractController : public ClockedObject, public Consumer //! call requisite function for updating the count. std::map m_requestProfileMap; uint64_t m_request_count; + + //! Histogram for profiling delay for the messages this controller + //! cares for + Histogram m_delayHistogram; + std::vector m_delayVCHistogram; }; #endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__ diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc index b8503c2cb..a8d8198ca 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc +++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc @@ -55,12 +55,6 @@ profile_sharing(const Address& addr, AccessType type, NodeID requestor, profileSharing(addr, type, requestor, sharers, owner); } -void -profileMsgDelay(uint32_t virtualNetwork, Time delayCycles) -{ - g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles); -} - void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh index 1796d9442..bfc0afd56 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh @@ -51,7 +51,6 @@ void profile_token_retry(const Address& addr, AccessType type, int count); void profile_filter_action(int action); void profile_persistent_prediction(const Address& addr, AccessType type); void profile_average_latency_estimate(int latency); -void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles); void profile_multicast_retry(const Address& addr, int count); void profileGetX(const Address& datablock, const Address& PC, const Set& owner,