diff --git a/src/mem/ruby/libruby.hh b/src/mem/ruby/libruby.hh index 94018e9b9..3b6e19c41 100644 --- a/src/mem/ruby/libruby.hh +++ b/src/mem/ruby/libruby.hh @@ -14,7 +14,8 @@ enum RubyRequestType { RubyRequestType_Locked_Read, RubyRequestType_Locked_Write, RubyRequestType_RMW_Read, - RubyRequestType_RMW_Write + RubyRequestType_RMW_Write, + RubyRequestType_NUM }; enum RubyAccessMode { diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc index 9ff10dc90..9c9445de3 100644 --- a/src/mem/ruby/profiler/AddressProfiler.cc +++ b/src/mem/ruby/profiler/AddressProfiler.cc @@ -54,7 +54,6 @@ AddressProfiler::AddressProfiler() m_macroBlockAccessTrace = new Map; m_programCounterAccessTrace = new Map; m_retryProfileMap = new Map; - m_persistentPredictionProfileMap = new Map; clearStats(); } @@ -64,7 +63,6 @@ AddressProfiler::~AddressProfiler() delete m_macroBlockAccessTrace; delete m_programCounterAccessTrace; delete m_retryProfileMap; - delete m_persistentPredictionProfileMap; } void AddressProfiler::setHotLines(bool hot_lines){ @@ -125,31 +123,10 @@ void AddressProfiler::printStats(ostream& out) const m_retryProfileHisto.printPercent(out); out << endl; - out << "retry_histogram_per_instruction: "; - m_retryProfileHisto.printWithMultiplier(out, 1.0 / double(g_system_ptr->getProfiler()->getTotalInstructionsExecuted())); - out << endl; - printSorted(out, m_retryProfileMap, "block_address"); out << endl; } - if (m_persistentPredictionProfileHisto.size() > 0) { - out << "Persistent Prediction Profile" << endl; - out << "-------------" << endl; - out << endl; - out << "persistent prediction_histogram: " << m_persistentPredictionProfileHisto << endl; - - out << "persistent prediction_histogram_percent: "; - m_persistentPredictionProfileHisto.printPercent(out); - out << endl; - - out << "persistentPrediction_histogram_per_instruction: "; - m_persistentPredictionProfileHisto.printWithMultiplier(out, 1.0 / double(g_system_ptr->getProfiler()->getTotalInstructionsExecuted())); - out << endl; - - printSorted(out, m_persistentPredictionProfileMap, "block_address"); - out << endl; - } } void AddressProfiler::clearStats() @@ -233,12 +210,6 @@ void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, in } } -void AddressProfiler::profilePersistentPrediction(const Address& data_addr, AccessType type) -{ - m_persistentPredictionProfileHisto.add(1); - lookupTraceForAddress(data_addr, m_persistentPredictionProfileMap).addSample(1); -} - // ***** Normal Functions ****** static void printSorted(ostream& out, const Map* record_map, string description) diff --git a/src/mem/ruby/profiler/AddressProfiler.hh b/src/mem/ruby/profiler/AddressProfiler.hh index 69bf1319a..24ee2af05 100644 --- a/src/mem/ruby/profiler/AddressProfiler.hh +++ b/src/mem/ruby/profiler/AddressProfiler.hh @@ -63,7 +63,6 @@ public: void clearStats(); void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss); - void profilePersistentPrediction(const Address& data_addr, AccessType type); void profileRetry(const Address& data_addr, AccessType type, int count); void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); @@ -87,8 +86,6 @@ private: Map* m_macroBlockAccessTrace; Map* m_programCounterAccessTrace; Map* m_retryProfileMap; - Map* m_persistentPredictionProfileMap; - Histogram m_persistentPredictionProfileHisto; Histogram m_retryProfileHisto; Histogram m_retryProfileHistoWrite; Histogram m_retryProfileHistoRead; diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index e8aa7edf9..d5c47825f 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -52,7 +52,6 @@ */ #include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/profiler/CacheProfiler.hh" #include "mem/ruby/profiler/AddressProfiler.hh" #include "mem/ruby/system/System.hh" #include "mem/ruby/network/Network.hh" @@ -73,27 +72,13 @@ static double process_memory_total(); static double process_memory_resident(); Profiler::Profiler(const string & name) - : m_conflicting_histogram(-1) { m_name = name; m_requestProfileMap_ptr = new Map; - m_L1D_cache_profiler_ptr = new CacheProfiler("L1D_cache"); - m_L1I_cache_profiler_ptr = new CacheProfiler("L1I_cache"); - - m_L2_cache_profiler_ptr = new CacheProfiler("L2_cache"); m_inst_profiler_ptr = NULL; m_address_profiler_ptr = NULL; -/* - m_address_profiler_ptr = new AddressProfiler; - m_inst_profiler_ptr = NULL; - if (m_all_instructions) { - m_inst_profiler_ptr = new AddressProfiler; - } -*/ - m_conflicting_map_ptr = new Map; - m_real_time_start_time = time(NULL); // Not reset in clearStats() m_stats_period = 1000000; // Default m_periodic_output_file_ptr = &cerr; @@ -105,12 +90,7 @@ Profiler::~Profiler() if (m_periodic_output_file_ptr != &cerr) { delete m_periodic_output_file_ptr; } - delete m_address_profiler_ptr; - delete m_L1D_cache_profiler_ptr; - delete m_L1I_cache_profiler_ptr; - delete m_L2_cache_profiler_ptr; delete m_requestProfileMap_ptr; - delete m_conflicting_map_ptr; } void Profiler::init(const vector & argv, vector memory_control_names) @@ -182,32 +162,24 @@ void Profiler::wakeup() { // FIXME - avoid the repeated code - Vector perProcInstructionCount; - perProcInstructionCount.setSize(RubySystem::getNumberOfSequencers()); - Vector perProcCycleCount; perProcCycleCount.setSize(RubySystem::getNumberOfSequencers()); for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { - perProcInstructionCount[i] = g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i] + 1; perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; // The +1 allows us to avoid division by zero } integer_t total_misses = m_perProcTotalMisses.sum(); - integer_t instruction_executed = perProcInstructionCount.sum(); integer_t simics_cycles_executed = perProcCycleCount.sum(); integer_t transactions_started = m_perProcStartTransaction.sum(); integer_t transactions_ended = m_perProcEndTransaction.sum(); (*m_periodic_output_file_ptr) << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl; (*m_periodic_output_file_ptr) << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl; - (*m_periodic_output_file_ptr) << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl; (*m_periodic_output_file_ptr) << "simics_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; (*m_periodic_output_file_ptr) << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl; (*m_periodic_output_file_ptr) << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl; - (*m_periodic_output_file_ptr) << "L1TBE_usage: " << m_L1tbeProfile << endl; - (*m_periodic_output_file_ptr) << "L2TBE_usage: " << m_L2tbeProfile << endl; (*m_periodic_output_file_ptr) << "mbytes_resident: " << process_memory_resident() << endl; (*m_periodic_output_file_ptr) << "mbytes_total: " << process_memory_total() << endl; if (process_memory_total() > 0) { @@ -292,7 +264,7 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "Virtual_time_in_seconds: " << seconds << endl; out << "Virtual_time_in_minutes: " << minutes << endl; out << "Virtual_time_in_hours: " << hours << endl; - out << "Virtual_time_in_days: " << hours << endl; + out << "Virtual_time_in_days: " << days << endl; out << endl; out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; @@ -308,43 +280,26 @@ void Profiler::printStats(ostream& out, bool short_stats) } out << endl; - if(m_num_BA_broadcasts + m_num_BA_unicasts != 0){ - out << endl; - out << "Broadcast_percent: " << (float)m_num_BA_broadcasts/(m_num_BA_broadcasts+m_num_BA_unicasts) << endl; - } } - Vector perProcInstructionCount; Vector perProcCycleCount; - Vector perProcCPI; - Vector perProcMissesPerInsn; - Vector perProcInsnPerTrans; Vector perProcCyclesPerTrans; Vector perProcMissesPerTrans; - perProcInstructionCount.setSize(RubySystem::getNumberOfSequencers()); - perProcCycleCount.setSize(RubySystem::getNumberOfSequencers()); - perProcCPI.setSize(RubySystem::getNumberOfSequencers()); - perProcMissesPerInsn.setSize(RubySystem::getNumberOfSequencers()); - perProcInsnPerTrans.setSize(RubySystem::getNumberOfSequencers()); + perProcCycleCount.setSize(RubySystem::getNumberOfSequencers()); perProcCyclesPerTrans.setSize(RubySystem::getNumberOfSequencers()); perProcMissesPerTrans.setSize(RubySystem::getNumberOfSequencers()); for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { - perProcInstructionCount[i] = g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i] + 1; perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; // The +1 allows us to avoid division by zero - perProcCPI[i] = double(ruby_cycles)/perProcInstructionCount[i]; - perProcMissesPerInsn[i] = 1000.0 * (double(m_perProcTotalMisses[i]) / double(perProcInstructionCount[i])); int trans = m_perProcEndTransaction[i]; if (trans == 0) { - perProcInsnPerTrans[i] = 0; perProcCyclesPerTrans[i] = 0; perProcMissesPerTrans[i] = 0; } else { - perProcInsnPerTrans[i] = perProcInstructionCount[i] / double(trans); perProcCyclesPerTrans[i] = ruby_cycles / double(trans); perProcMissesPerTrans[i] = m_perProcTotalMisses[i] / double(trans); } @@ -353,12 +308,10 @@ void Profiler::printStats(ostream& out, bool short_stats) integer_t total_misses = m_perProcTotalMisses.sum(); integer_t user_misses = m_perProcUserMisses.sum(); integer_t supervisor_misses = m_perProcSupervisorMisses.sum(); - integer_t instruction_executed = perProcInstructionCount.sum(); integer_t simics_cycles_executed = perProcCycleCount.sum(); integer_t transactions_started = m_perProcStartTransaction.sum(); integer_t transactions_ended = m_perProcEndTransaction.sum(); - double instructions_per_transaction = (transactions_ended != 0) ? double(instruction_executed) / double(transactions_ended) : 0; double cycles_per_transaction = (transactions_ended != 0) ? (RubySystem::getNumberOfSequencers() * double(ruby_cycles)) / double(transactions_ended) : 0; double misses_per_transaction = (transactions_ended != 0) ? double(total_misses) / double(transactions_ended) : 0; @@ -367,23 +320,15 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "user_misses: " << user_misses << " " << m_perProcUserMisses << endl; out << "supervisor_misses: " << supervisor_misses << " " << m_perProcSupervisorMisses << endl; out << endl; - out << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl; out << "ruby_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; - out << "cycles_per_instruction: " << (RubySystem::getNumberOfSequencers()*double(ruby_cycles))/double(instruction_executed) << " " << perProcCPI << endl; - out << "misses_per_thousand_instructions: " << 1000.0 * (double(total_misses) / double(instruction_executed)) << " " << perProcMissesPerInsn << endl; out << endl; out << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl; out << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl; - out << "instructions_per_transaction: " << instructions_per_transaction << " " << perProcInsnPerTrans << endl; out << "cycles_per_transaction: " << cycles_per_transaction << " " << perProcCyclesPerTrans << endl; out << "misses_per_transaction: " << misses_per_transaction << " " << perProcMissesPerTrans << endl; out << endl; - // m_L1D_cache_profiler_ptr->printStats(out); - // m_L1I_cache_profiler_ptr->printStats(out); - // m_L2_cache_profiler_ptr->printStats(out); - out << endl; vector::iterator it; @@ -409,7 +354,7 @@ void Profiler::printStats(ostream& out, bool short_stats) if (m_memReq || m_memRefresh) { // if there's a memory controller at all long long int total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; double stallsPerReq = total_stalls * 1.0 / m_memReq; - out << "Memory control:" << endl; + out << "Memory control " << (*it) << ":" << endl; out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes out << " memory_reads: " << m_memRead << endl; out << " memory_writes: " << m_memWrite << endl; @@ -463,12 +408,7 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "Busy Bank Count:" << m_busyBankCount << endl; out << endl; - out << "L1TBE_usage: " << m_L1tbeProfile << endl; - out << "L2TBE_usage: " << m_L2tbeProfile << endl; - out << "StopTable_usage: " << m_stopTableProfile << endl; out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl; - out << "store_buffer_size: " << m_store_buffer_size << endl; - out << "unique_blocks_in_store_buffer: " << m_store_buffer_blocks << endl; out << endl; } @@ -486,7 +426,6 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl; } } - out << "miss_latency_L2Miss: " << m_L2MissLatencyHistogram << endl; out << endl; @@ -505,12 +444,6 @@ void Profiler::printStats(ostream& out, bool short_stats) } out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl; - out << "multicast_retries: " << m_multicast_retry_histogram << endl; - out << "gets_mask_prediction_count: " << m_gets_mask_prediction << endl; - out << "getx_mask_prediction_count: " << m_getx_mask_prediction << endl; - out << "explicit_training_mask: " << m_explicit_training_mask << endl; - out << endl; - if (m_all_sharing_histogram.size() > 0) { out << "all_sharing: " << m_all_sharing_histogram << endl; out << "read_sharing: " << m_read_sharing_histogram << endl; @@ -529,17 +462,8 @@ void Profiler::printStats(ostream& out, bool short_stats) out << endl; } - if (m_conflicting_histogram.size() > 0) { - out << "conflicting_histogram: " << m_conflicting_histogram << endl; - out << "conflicting_histogram_percent: "; m_conflicting_histogram.printPercent(out); out << endl; - out << endl; - } - if (m_outstanding_requests.size() > 0) { out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl; - if (m_outstanding_persistent_requests.size() > 0) { - out << "outstanding_persistent_requests: "; m_outstanding_persistent_requests.printPercent(out); out << endl; - } out << endl; } } @@ -610,19 +534,13 @@ void Profiler::printResourceUsage(ostream& out) const void Profiler::clearStats() { - m_num_BA_unicasts = 0; - m_num_BA_broadcasts = 0; - m_ruby_start = g_eventQueue_ptr->getTime(); - m_instructions_executed_at_start.setSize(RubySystem::getNumberOfSequencers()); m_cycles_executed_at_start.setSize(RubySystem::getNumberOfSequencers()); for (int i=0; i < RubySystem::getNumberOfSequencers(); i++) { if (g_system_ptr == NULL) { - m_instructions_executed_at_start[i] = 0; m_cycles_executed_at_start[i] = 0; } else { - m_instructions_executed_at_start[i] = g_system_ptr->getInstructionCount(i); m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); } } @@ -657,11 +575,7 @@ void Profiler::clearStats() m_delayedCyclesVCHistograms[i].clear(); } - m_gets_mask_prediction.clear(); - m_getx_mask_prediction.clear(); - m_explicit_training_mask.clear(); - - m_missLatencyHistograms.setSize(CacheRequestType_NUM); + m_missLatencyHistograms.setSize(RubyRequestType_NUM); for(int i=0; iclear(); // count requests profiled m_requests = 0; - // Conflicting requests - m_conflicting_map_ptr->clear(); - m_conflicting_histogram.clear(); - m_outstanding_requests.clear(); m_outstanding_persistent_requests.clear(); - m_L1D_cache_profiler_ptr->clearStats(); - m_L1I_cache_profiler_ptr->clearStats(); - m_L2_cache_profiler_ptr->clearStats(); - - // for MemoryControl: -/* - m_memReq = 0; - m_memBankBusy = 0; - m_memBusBusy = 0; - m_memTfawBusy = 0; - m_memReadWriteBusy = 0; - m_memDataBusBusy = 0; - m_memRefresh = 0; - m_memRead = 0; - m_memWrite = 0; - m_memWaitCycles = 0; - m_memInputQ = 0; - m_memBankQ = 0; - m_memArbWait = 0; - m_memRandBusy = 0; - m_memNotOld = 0; - - for (int bank=0; bank < m_memBankCount.size(); bank++) { - m_memBankCount[bank] = 0; - } -*/ //added by SS vector::iterator it; @@ -773,19 +642,6 @@ void Profiler::clearStats() m_ruby_start = g_eventQueue_ptr->getTime(); } -void Profiler::profileConflictingRequests(const Address& addr) -{ - assert(addr == line_address(addr)); - Time last_time = m_ruby_start; - if (m_conflicting_map_ptr->exist(addr)) { - last_time = m_conflicting_map_ptr->lookup(addr); - } - Time current_time = g_eventQueue_ptr->getTime(); - assert (current_time - last_time > 0); - m_conflicting_histogram.add(current_time - last_time); - m_conflicting_map_ptr->add(addr, current_time); -} - void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) { if (msg.getType() != CacheRequestType_IFETCH) { @@ -843,27 +699,6 @@ void Profiler::profileRequest(const string& requestStr) } } -void Profiler::recordPrediction(bool wasGood, bool wasPredicted) -{ - m_predictionOpportunities++; - if(wasPredicted){ - m_predictions++; - if(wasGood){ - m_goodPredictions++; - } - } -} - -void Profiler::profileFilterAction(int action) -{ - m_filter_action_histogram.add(action); -} - -void Profiler::profileMulticastRetry(const Address& addr, int count) -{ - m_multicast_retry_histogram.add(count); -} - void Profiler::startTransaction(int cpu) { m_perProcStartTransaction[cpu]++; @@ -894,12 +729,6 @@ void Profiler::missLatency(Time t, RubyRequestType type) { m_allMissLatencyHistogram.add(t); m_missLatencyHistograms[type].add(t); - /* - m_machLatencyHistograms[respondingMach].add(t); - if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { - m_L2MissLatencyHistogram.add(t); - } - */ } // non-zero cycle prefetch request @@ -968,41 +797,6 @@ static double process_memory_resident() return double(res_size_in_pages)*MULTIPLIER; // size in megabytes } -void Profiler::profileGetXMaskPrediction(const Set& pred_set) -{ - m_getx_mask_prediction.add(pred_set.count()); -} - -void Profiler::profileGetSMaskPrediction(const Set& pred_set) -{ - m_gets_mask_prediction.add(pred_set.count()); -} - -void Profiler::profileTrainingMask(const Set& pred_set) -{ - m_explicit_training_mask.add(pred_set.count()); -} - -int64 Profiler::getTotalInstructionsExecuted() const -{ - int64 sum = 1; // Starting at 1 allows us to avoid division by zero - for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { - sum += (g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i]); - } - return sum; -} - -int64 Profiler::getTotalTransactionsExecuted() const -{ - int64 sum = m_perProcEndTransaction.sum(); - if (sum > 0) { - return sum; - } else { - return 1; // Avoid division by zero errors - } -} - - void Profiler::rubyWatch(int id){ //int rn_g1 = 0;//SIMICS_get_register_number(id, "g1"); uint64 tr = 0;//SIMICS_read_register(id, rn_g1); @@ -1029,6 +823,10 @@ bool Profiler::watchAddress(Address addr){ return false; } +int64 Profiler::getTotalTransactionsExecuted() const { + return m_perProcEndTransaction.sum(); +} + // For MemoryControl: void Profiler::profileMemReq(string name, int bank) { // printf("name is %s", name.c_str()); diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 4549e3ea7..673051db3 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -72,7 +72,6 @@ #include "mem/ruby/system/MemoryControl.hh" class CacheMsg; -class CacheProfiler; class AddressProfiler; template class Map; @@ -140,9 +139,6 @@ public: void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } - void countBAUnicast() { m_num_BA_unicasts++; } - void countBABroadcast() { m_num_BA_broadcasts++; } - void recordPrediction(bool wasGood, bool wasPredicted); void startTransaction(int cpu); @@ -153,15 +149,8 @@ public: void bankBusy(); void missLatency(Time t, RubyRequestType type); void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); - void stopTableUsageSample(int num) { m_stopTableProfile.add(num); } - void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); } - void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); } void sequencerRequests(int num) { m_sequencer_requests.add(num); } - void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);} - void profileGetXMaskPrediction(const Set& pred_set); - void profileGetSMaskPrediction(const Set& pred_set); - void profileTrainingMask(const Set& pred_set); void profileTransition(const string& component, NodeID version, Address addr, const string& state, const string& event, const string& next_state, const string& note); @@ -169,7 +158,6 @@ public: void print(ostream& out) const; - int64 getTotalInstructionsExecuted() const; int64 getTotalTransactionsExecuted() const; void rubyWatch(int proc); @@ -209,9 +197,6 @@ private: Profiler& operator=(const Profiler& obj); // Data Members (m_ prefix) - CacheProfiler* m_L1D_cache_profiler_ptr; - CacheProfiler* m_L1I_cache_profiler_ptr; - CacheProfiler* m_L2_cache_profiler_ptr; AddressProfiler* m_address_profiler_ptr; AddressProfiler* m_inst_profiler_ptr; @@ -224,9 +209,6 @@ private: Time m_ruby_start; time_t m_real_time_start_time; - int m_num_BA_unicasts; - int m_num_BA_broadcasts; - Vector m_perProcTotalMisses; Vector m_perProcUserMisses; Vector m_perProcSupervisorMisses; @@ -236,16 +218,10 @@ private: integer_t m_busyBankCount; Histogram m_multicast_retry_histogram; - Histogram m_L1tbeProfile; - Histogram m_L2tbeProfile; - Histogram m_stopTableProfile; - Histogram m_filter_action_histogram; Histogram m_tbeProfile; Histogram m_sequencer_requests; - Histogram m_store_buffer_size; - Histogram m_store_buffer_blocks; Histogram m_read_sharing_histogram; Histogram m_write_sharing_histogram; Histogram m_all_sharing_histogram; @@ -256,7 +232,6 @@ private: Vector m_missLatencyHistograms; Vector m_machLatencyHistograms; - Histogram m_L2MissLatencyHistogram; Histogram m_allMissLatencyHistogram; Histogram m_allSWPrefetchLatencyHistogram; @@ -268,18 +243,6 @@ private: Histogram m_delayedCyclesNonPFHistogram; Vector m_delayedCyclesVCHistograms; - int m_predictions; - int m_predictionOpportunities; - int m_goodPredictions; - - Histogram m_gets_mask_prediction; - Histogram m_getx_mask_prediction; - Histogram m_explicit_training_mask; - - // For profiling possibly conflicting requests - Map* m_conflicting_map_ptr; - Histogram m_conflicting_histogram; - Histogram m_outstanding_requests; Histogram m_outstanding_persistent_requests; diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc index 883edd3c8..54d38c187 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc +++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc @@ -64,11 +64,6 @@ void profile_outstanding_request(int outstanding) g_system_ptr->getProfiler()->profileOutstandingRequest(outstanding); } -void profile_outstanding_persistent_request(int outstanding) -{ - g_system_ptr->getProfiler()->profileOutstandingPersistentRequest(outstanding); -} - void profile_average_latency_estimate(int latency) { g_system_ptr->getProfiler()->profileAverageLatencyEstimate(latency); @@ -84,26 +79,6 @@ void profileMsgDelay(int virtualNetwork, int delayCycles) g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles); } -void profile_token_retry(const Address& addr, AccessType type, int count) -{ - g_system_ptr->getProfiler()->getAddressProfiler()->profileRetry(addr, type, count); -} - -void profile_filter_action(int action) -{ - g_system_ptr->getProfiler()->profileFilterAction(action); -} - -void profile_persistent_prediction(const Address& addr, AccessType type) -{ - g_system_ptr->getProfiler()->getAddressProfiler()->profilePersistentPrediction(addr, type); -} - -void profile_multicast_retry(const Address& addr, int count) -{ - g_system_ptr->getProfiler()->profileMulticastRetry(addr, count); -} - void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) { g_system_ptr->getProfiler()->getAddressProfiler()->profileGetX(datablock, PC, owner, sharers, requestor); diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh index cfaa229a5..7a46bd3a5 100644 --- a/src/mem/ruby/system/CacheMemory.hh +++ b/src/mem/ruby/system/CacheMemory.hh @@ -127,6 +127,7 @@ public: void print(ostream& out) const; void printData(ostream& out) const; + void clearStats() const; void printStats(ostream& out) const; private: @@ -561,6 +562,11 @@ void CacheMemory::printData(ostream& out) const out << "printData() not supported" << endl; } +inline void CacheMemory::clearStats() const +{ + m_profiler_ptr->clearStats(); +} + inline void CacheMemory::printStats(ostream& out) const { diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index 58ec7bb45..d29dba602 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -51,6 +51,7 @@ int64_t DMASequencer::makeRequest(const RubyRequest & request) case RubyRequestType_Locked_Write: case RubyRequestType_RMW_Read: case RubyRequestType_RMW_Write: + case RubyRequestType_NUM: assert(0); } diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc index 2c24c9ade..ad67cdc80 100644 --- a/src/mem/ruby/system/System.cc +++ b/src/mem/ruby/system/System.cc @@ -347,15 +347,16 @@ void RubySystem::printStats(ostream& out) void RubySystem::clearStats() const { - /* m_profiler_ptr->clearStats(); - for (int i=0; iclearStats(); m_network_ptr->clearStats(); - for (int i=0; i < MachineType_base_level(MachineType_NUM); i++) - m_controllers[i][0]->clearStats(); - */ + for (map::const_iterator it = m_caches.begin(); + it != m_caches.end(); it++) { + (*it).second->clearStats(); + } + for (map::const_iterator it = m_controllers.begin(); + it != m_controllers.end(); it++) { + (*it).second->clearStats(); + } } void RubySystem::recordCacheContents(CacheRecorder& tr) const diff --git a/src/mem/ruby/system/TBETable.hh b/src/mem/ruby/system/TBETable.hh index 7d2daa55a..2b00f7a06 100644 --- a/src/mem/ruby/system/TBETable.hh +++ b/src/mem/ruby/system/TBETable.hh @@ -128,7 +128,6 @@ void TBETable::allocate(const Address& address) { assert(isPresent(address) == false); assert(m_map.size() < m_number_of_TBEs); - g_system_ptr->getProfiler()->L2tbeUsageSample(m_map.size()); m_map.add(address, ENTRY()); } diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/ruby.stats b/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/ruby.stats index 455d5bfef..f8b15caeb 100644 --- a/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/ruby.stats +++ b/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/ruby.stats @@ -22,7 +22,7 @@ Directory_Controller config: DirectoryController_0 directory_latency: 6 directory_name: DirectoryMemory_0 memory_controller_name: MemoryControl_0 - memory_latency: 158 + memory_latency: 1 number_of_TBEs: 256 recycle_latency: 10 to_mem_ctrl_latency: 1 @@ -376,46 +376,42 @@ periodic_stats_period: 1000000 ================ End RubySystem Configuration Print ================ -Real time: Jul/19/2009 15:34:56 +Real time: Jul/29/2009 15:40:36 Profiler Stats -------------- -Elapsed_time_in_seconds: 2553 -Elapsed_time_in_minutes: 42.55 -Elapsed_time_in_hours: 0.709167 -Elapsed_time_in_days: 0.0295486 +Elapsed_time_in_seconds: 1279 +Elapsed_time_in_minutes: 21.3167 +Elapsed_time_in_hours: 0.355278 +Elapsed_time_in_days: 0.0148032 -Virtual_time_in_seconds: 2552.07 -Virtual_time_in_minutes: 42.5345 -Virtual_time_in_hours: 0.708908 -Virtual_time_in_days: 0.708908 +Virtual_time_in_seconds: 1279.21 +Virtual_time_in_minutes: 21.3202 +Virtual_time_in_hours: 0.355336 +Virtual_time_in_days: 0.0148057 Ruby_current_time: 31814465 Ruby_start_time: 1 Ruby_cycles: 31814464 -mbytes_resident: 150.715 -mbytes_total: 1502.59 -resident_ratio: 0.100309 +mbytes_resident: 150.707 +mbytes_total: 1502.61 +resident_ratio: 0.100302 Total_misses: 0 total_misses: 0 [ 0 0 0 0 0 0 0 0 ] user_misses: 0 [ 0 0 0 0 0 0 0 0 ] supervisor_misses: 0 [ 0 0 0 0 0 0 0 0 ] -instruction_executed: 8 [ 1 1 1 1 1 1 1 1 ] ruby_cycles_executed: 254515720 [ 31814465 31814465 31814465 31814465 31814465 31814465 31814465 31814465 ] -cycles_per_instruction: 3.18145e+07 [ 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 ] -misses_per_thousand_instructions: 0 [ 0 0 0 0 0 0 0 0 ] transactions_started: 0 [ 0 0 0 0 0 0 0 0 ] transactions_ended: 0 [ 0 0 0 0 0 0 0 0 ] -instructions_per_transaction: 0 [ 0 0 0 0 0 0 0 0 ] cycles_per_transaction: 0 [ 0 0 0 0 0 0 0 0 ] misses_per_transaction: 0 [ 0 0 0 0 0 0 0 0 ] -Memory control: +Memory control MemoryControl_0: memory_total_requests: 1388468 memory_reads: 694293 memory_writes: 694043 @@ -443,29 +439,18 @@ DMA-0:0 Busy Bank Count:0 -L1TBE_usage: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] -L2TBE_usage: [binsize: 4 max: 134 count: 2136875 average: 36.3375 | standard deviation: 28.2827 | 23454 78361 154838 242576 298777 279946 206526 134119 41748 9990 11123 15179 20545 27694 35924 44642 53192 60320 64615 65514 62260 55913 47160 36948 26700 17667 10728 5693 2725 1295 483 159 53 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ] -StopTable_usage: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] sequencer_requests_outstanding: [binsize: 1 max: 16 count: 748260 average: 11.8029 | standard deviation: 3.40671 | 0 1091 2889 5609 9615 15772 23675 33311 44184 55041 64248 70323 72503 72248 68934 64870 143947 ] -store_buffer_size: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] -unique_blocks_in_store_buffer: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] All Non-Zero Cycle Demand Cache Accesses ---------------------------------------- miss_latency: [binsize: 128 max: 20559 count: 748171 average: 3866.31 | standard deviation: 2352.95 | 21417 1969 3723 6729 8868 8455 7676 8627 10203 11965 13796 13743 11900 13009 16352 17532 16234 15941 17304 16977 16916 18538 19194 16531 16082 17521 18191 15886 15702 16749 15616 14095 14916 15648 13793 11856 12863 13378 11663 10762 11443 11095 9691 9387 10128 9009 7817 8024 8496 7458 6302 6700 6887 5633 5066 5555 5357 4326 4220 4651 4016 3318 3403 3600 3054 2613 2796 2637 2141 2011 2128 1973 1548 1420 1531 1276 1047 1080 1093 914 741 749 732 584 493 515 525 388 363 345 325 251 268 277 202 190 183 189 147 117 143 119 90 93 91 82 60 58 58 49 51 48 39 28 34 36 30 17 16 21 24 23 12 17 16 9 12 16 12 13 7 4 7 8 7 8 5 7 5 8 4 4 6 5 3 3 2 1 4 1 2 1 0 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ] miss_latency_2: [binsize: 128 max: 20559 count: 486192 average: 3864.95 | standard deviation: 2353.73 | 13998 1281 2484 4424 5714 5472 5029 5648 6631 7775 8926 8800 7735 8448 10496 11466 10602 10387 11224 11076 10939 12065 12497 10830 10391 11396 11931 10259 10262 10939 10169 9130 9608 10113 8955 7714 8408 8711 7593 6973 7459 7162 6232 6134 6554 5848 5110 5134 5495 4860 4083 4319 4432 3674 3259 3647 3406 2774 2755 3099 2579 2160 2269 2367 1984 1705 1833 1725 1372 1293 1349 1289 1004 902 970 862 693 720 732 613 484 488 462 374 341 336 349 246 226 213 205 156 178 186 130 122 119 126 100 72 94 79 57 64 63 57 37 38 35 33 35 27 23 19 22 28 17 7 10 14 16 16 10 7 11 6 8 9 3 6 5 4 4 4 4 4 2 6 3 5 4 3 2 5 3 3 1 1 2 1 2 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ] miss_latency_3: [binsize: 128 max: 19863 count: 261979 average: 3868.82 | standard deviation: 2351.5 | 7419 688 1239 2305 3154 2983 2647 2979 3572 4190 4870 4943 4165 4561 5856 6066 5632 5554 6080 5901 5977 6473 6697 5701 5691 6125 6260 5627 5440 5810 5447 4965 5308 5535 4838 4142 4455 4667 4070 3789 3984 3933 3459 3253 3574 3161 2707 2890 3001 2598 2219 2381 2455 1959 1807 1908 1951 1552 1465 1552 1437 1158 1134 1233 1070 908 963 912 769 718 779 684 544 518 561 414 354 360 361 301 257 261 270 210 152 179 176 142 137 132 120 95 90 91 72 68 64 63 47 45 49 40 33 29 28 25 23 20 23 16 16 21 16 9 12 8 13 10 6 7 8 7 2 10 5 3 4 7 9 7 2 0 3 4 3 4 3 1 2 3 0 1 4 0 0 0 1 0 2 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ] -miss_latency_L2Miss: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] All Non-Zero Cycle SW Prefetch Requests ------------------------------------ prefetch_latency: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] prefetch_latency_L2Miss:[binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] -multicast_retries: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] -gets_mask_prediction_count: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] -getx_mask_prediction_count: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] -explicit_training_mask: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ] - Request vs. RubySystem State Profile -------------------------------- @@ -486,9 +471,9 @@ Total_nonPF_delay_cycles: [binsize: 1 max: 18 count: 1496498 average: 0.0019285 Resource Usage -------------- page_size: 4096 -user_time: 2550 -system_time: 1 -page_reclaims: 39807 +user_time: 1279 +system_time: 0 +page_reclaims: 39805 page_faults: 0 swaps: 0 block_inputs: 0 @@ -650,7 +635,7 @@ l1u_0 cache stats: l1u_0_total_prefetches: 0 l1u_0_total_sw_prefetches: 0 l1u_0_total_hw_prefetches: 0 - l1u_0_misses_per_transaction: 93523 + l1u_0_misses_per_transaction: inf l1u_0_request_type_LD: 64.8311% l1u_0_request_type_ST: 35.1689% @@ -664,7 +649,7 @@ l1u_1 cache stats: l1u_1_total_prefetches: 0 l1u_1_total_sw_prefetches: 0 l1u_1_total_hw_prefetches: 0 - l1u_1_misses_per_transaction: 93506 + l1u_1_misses_per_transaction: inf l1u_1_request_type_LD: 64.8162% l1u_1_request_type_ST: 35.1838% @@ -678,7 +663,7 @@ l1u_2 cache stats: l1u_2_total_prefetches: 0 l1u_2_total_sw_prefetches: 0 l1u_2_total_hw_prefetches: 0 - l1u_2_misses_per_transaction: 93510 + l1u_2_misses_per_transaction: inf l1u_2_request_type_LD: 64.931% l1u_2_request_type_ST: 35.069% @@ -692,7 +677,7 @@ l1u_3 cache stats: l1u_3_total_prefetches: 0 l1u_3_total_sw_prefetches: 0 l1u_3_total_hw_prefetches: 0 - l1u_3_misses_per_transaction: 93558 + l1u_3_misses_per_transaction: inf l1u_3_request_type_LD: 64.9693% l1u_3_request_type_ST: 35.0307% @@ -706,7 +691,7 @@ l1u_4 cache stats: l1u_4_total_prefetches: 0 l1u_4_total_sw_prefetches: 0 l1u_4_total_hw_prefetches: 0 - l1u_4_misses_per_transaction: 93567 + l1u_4_misses_per_transaction: inf l1u_4_request_type_LD: 65.2474% l1u_4_request_type_ST: 34.7526% @@ -720,7 +705,7 @@ l1u_5 cache stats: l1u_5_total_prefetches: 0 l1u_5_total_sw_prefetches: 0 l1u_5_total_hw_prefetches: 0 - l1u_5_misses_per_transaction: 93561 + l1u_5_misses_per_transaction: inf l1u_5_request_type_LD: 65.0004% l1u_5_request_type_ST: 34.9996% @@ -734,7 +719,7 @@ l1u_6 cache stats: l1u_6_total_prefetches: 0 l1u_6_total_sw_prefetches: 0 l1u_6_total_hw_prefetches: 0 - l1u_6_misses_per_transaction: 93502 + l1u_6_misses_per_transaction: inf l1u_6_request_type_LD: 64.9569% l1u_6_request_type_ST: 35.0431% @@ -748,7 +733,7 @@ l1u_7 cache stats: l1u_7_total_prefetches: 0 l1u_7_total_sw_prefetches: 0 l1u_7_total_hw_prefetches: 0 - l1u_7_misses_per_transaction: 93509 + l1u_7_misses_per_transaction: inf l1u_7_request_type_LD: 65.1189% l1u_7_request_type_ST: 34.8811% diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/simout b/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/simout index 81934512d..511812c26 100755 --- a/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/simout +++ b/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/simout @@ -5,10 +5,10 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jul 19 2009 14:52:18 -M5 revision 544d33334ee1+ 6369+ default tip -M5 started Jul 19 2009 14:52:23 -M5 executing on clover-01.cs.wisc.edu +M5 compiled Jul 29 2009 15:19:07 +M5 revision a6e8795b73de+ 6384+ default tip +M5 started Jul 29 2009 15:19:16 +M5 executing on clover-02.cs.wisc.edu command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest-ruby -re tests/run.py build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest-ruby Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/stats.txt b/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/stats.txt index b2eef7422..53437462a 100644 --- a/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/stats.txt +++ b/tests/quick/50.memtest/ref/alpha/linux/memtest-ruby/stats.txt @@ -1,8 +1,8 @@ ---------- Begin Simulation Statistics ---------- -host_mem_usage 1538656 # Number of bytes of host memory used -host_seconds 2552.36 # Real time elapsed on the host -host_tick_rate 12465 # Simulator tick rate (ticks/s) +host_mem_usage 1538672 # Number of bytes of host memory used +host_seconds 1279.29 # Real time elapsed on the host +host_tick_rate 24869 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_seconds 0.000032 # Number of seconds simulated sim_ticks 31814464 # Number of ticks simulated