Automated merge with ssh://hg@m5sim.org/m5

This commit is contained in:
Derek Hower 2009-08-03 11:39:08 -05:00
commit ac15e42c17
13 changed files with 57 additions and 360 deletions

View file

@ -14,7 +14,8 @@ enum RubyRequestType {
RubyRequestType_Locked_Read,
RubyRequestType_Locked_Write,
RubyRequestType_RMW_Read,
RubyRequestType_RMW_Write
RubyRequestType_RMW_Write,
RubyRequestType_NUM
};
enum RubyAccessMode {

View file

@ -54,7 +54,6 @@ AddressProfiler::AddressProfiler()
m_macroBlockAccessTrace = new Map<Address, AccessTraceForAddress>;
m_programCounterAccessTrace = new Map<Address, AccessTraceForAddress>;
m_retryProfileMap = new Map<Address, AccessTraceForAddress>;
m_persistentPredictionProfileMap = new Map<Address, AccessTraceForAddress>;
clearStats();
}
@ -64,7 +63,6 @@ AddressProfiler::~AddressProfiler()
delete m_macroBlockAccessTrace;
delete m_programCounterAccessTrace;
delete m_retryProfileMap;
delete m_persistentPredictionProfileMap;
}
void AddressProfiler::setHotLines(bool hot_lines){
@ -125,31 +123,10 @@ void AddressProfiler::printStats(ostream& out) const
m_retryProfileHisto.printPercent(out);
out << endl;
out << "retry_histogram_per_instruction: ";
m_retryProfileHisto.printWithMultiplier(out, 1.0 / double(g_system_ptr->getProfiler()->getTotalInstructionsExecuted()));
out << endl;
printSorted(out, m_retryProfileMap, "block_address");
out << endl;
}
if (m_persistentPredictionProfileHisto.size() > 0) {
out << "Persistent Prediction Profile" << endl;
out << "-------------" << endl;
out << endl;
out << "persistent prediction_histogram: " << m_persistentPredictionProfileHisto << endl;
out << "persistent prediction_histogram_percent: ";
m_persistentPredictionProfileHisto.printPercent(out);
out << endl;
out << "persistentPrediction_histogram_per_instruction: ";
m_persistentPredictionProfileHisto.printWithMultiplier(out, 1.0 / double(g_system_ptr->getProfiler()->getTotalInstructionsExecuted()));
out << endl;
printSorted(out, m_persistentPredictionProfileMap, "block_address");
out << endl;
}
}
void AddressProfiler::clearStats()
@ -233,12 +210,6 @@ void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, in
}
}
void AddressProfiler::profilePersistentPrediction(const Address& data_addr, AccessType type)
{
m_persistentPredictionProfileHisto.add(1);
lookupTraceForAddress(data_addr, m_persistentPredictionProfileMap).addSample(1);
}
// ***** Normal Functions ******
static void printSorted(ostream& out, const Map<Address, AccessTraceForAddress>* record_map, string description)

View file

@ -63,7 +63,6 @@ public:
void clearStats();
void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss);
void profilePersistentPrediction(const Address& data_addr, AccessType type);
void profileRetry(const Address& data_addr, AccessType type, int count);
void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor);
void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor);
@ -87,8 +86,6 @@ private:
Map<Address, AccessTraceForAddress>* m_macroBlockAccessTrace;
Map<Address, AccessTraceForAddress>* m_programCounterAccessTrace;
Map<Address, AccessTraceForAddress>* m_retryProfileMap;
Map<Address, AccessTraceForAddress>* m_persistentPredictionProfileMap;
Histogram m_persistentPredictionProfileHisto;
Histogram m_retryProfileHisto;
Histogram m_retryProfileHistoWrite;
Histogram m_retryProfileHistoRead;

View file

@ -52,7 +52,6 @@
*/
#include "mem/ruby/profiler/Profiler.hh"
#include "mem/ruby/profiler/CacheProfiler.hh"
#include "mem/ruby/profiler/AddressProfiler.hh"
#include "mem/ruby/system/System.hh"
#include "mem/ruby/network/Network.hh"
@ -73,27 +72,13 @@ static double process_memory_total();
static double process_memory_resident();
Profiler::Profiler(const string & name)
: m_conflicting_histogram(-1)
{
m_name = name;
m_requestProfileMap_ptr = new Map<string, int>;
m_L1D_cache_profiler_ptr = new CacheProfiler("L1D_cache");
m_L1I_cache_profiler_ptr = new CacheProfiler("L1I_cache");
m_L2_cache_profiler_ptr = new CacheProfiler("L2_cache");
m_inst_profiler_ptr = NULL;
m_address_profiler_ptr = NULL;
/*
m_address_profiler_ptr = new AddressProfiler;
m_inst_profiler_ptr = NULL;
if (m_all_instructions) {
m_inst_profiler_ptr = new AddressProfiler;
}
*/
m_conflicting_map_ptr = new Map<Address, Time>;
m_real_time_start_time = time(NULL); // Not reset in clearStats()
m_stats_period = 1000000; // Default
m_periodic_output_file_ptr = &cerr;
@ -105,12 +90,7 @@ Profiler::~Profiler()
if (m_periodic_output_file_ptr != &cerr) {
delete m_periodic_output_file_ptr;
}
delete m_address_profiler_ptr;
delete m_L1D_cache_profiler_ptr;
delete m_L1I_cache_profiler_ptr;
delete m_L2_cache_profiler_ptr;
delete m_requestProfileMap_ptr;
delete m_conflicting_map_ptr;
}
void Profiler::init(const vector<string> & argv, vector<string> memory_control_names)
@ -182,32 +162,24 @@ void Profiler::wakeup()
{
// FIXME - avoid the repeated code
Vector<integer_t> perProcInstructionCount;
perProcInstructionCount.setSize(RubySystem::getNumberOfSequencers());
Vector<integer_t> perProcCycleCount;
perProcCycleCount.setSize(RubySystem::getNumberOfSequencers());
for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) {
perProcInstructionCount[i] = g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i] + 1;
perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
// The +1 allows us to avoid division by zero
}
integer_t total_misses = m_perProcTotalMisses.sum();
integer_t instruction_executed = perProcInstructionCount.sum();
integer_t simics_cycles_executed = perProcCycleCount.sum();
integer_t transactions_started = m_perProcStartTransaction.sum();
integer_t transactions_ended = m_perProcEndTransaction.sum();
(*m_periodic_output_file_ptr) << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl;
(*m_periodic_output_file_ptr) << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl;
(*m_periodic_output_file_ptr) << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl;
(*m_periodic_output_file_ptr) << "simics_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl;
(*m_periodic_output_file_ptr) << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl;
(*m_periodic_output_file_ptr) << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl;
(*m_periodic_output_file_ptr) << "L1TBE_usage: " << m_L1tbeProfile << endl;
(*m_periodic_output_file_ptr) << "L2TBE_usage: " << m_L2tbeProfile << endl;
(*m_periodic_output_file_ptr) << "mbytes_resident: " << process_memory_resident() << endl;
(*m_periodic_output_file_ptr) << "mbytes_total: " << process_memory_total() << endl;
if (process_memory_total() > 0) {
@ -292,7 +264,7 @@ void Profiler::printStats(ostream& out, bool short_stats)
out << "Virtual_time_in_seconds: " << seconds << endl;
out << "Virtual_time_in_minutes: " << minutes << endl;
out << "Virtual_time_in_hours: " << hours << endl;
out << "Virtual_time_in_days: " << hours << endl;
out << "Virtual_time_in_days: " << days << endl;
out << endl;
out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
@ -308,43 +280,26 @@ void Profiler::printStats(ostream& out, bool short_stats)
}
out << endl;
if(m_num_BA_broadcasts + m_num_BA_unicasts != 0){
out << endl;
out << "Broadcast_percent: " << (float)m_num_BA_broadcasts/(m_num_BA_broadcasts+m_num_BA_unicasts) << endl;
}
}
Vector<integer_t> perProcInstructionCount;
Vector<integer_t> perProcCycleCount;
Vector<double> perProcCPI;
Vector<double> perProcMissesPerInsn;
Vector<double> perProcInsnPerTrans;
Vector<double> perProcCyclesPerTrans;
Vector<double> perProcMissesPerTrans;
perProcInstructionCount.setSize(RubySystem::getNumberOfSequencers());
perProcCycleCount.setSize(RubySystem::getNumberOfSequencers());
perProcCPI.setSize(RubySystem::getNumberOfSequencers());
perProcMissesPerInsn.setSize(RubySystem::getNumberOfSequencers());
perProcInsnPerTrans.setSize(RubySystem::getNumberOfSequencers());
perProcCycleCount.setSize(RubySystem::getNumberOfSequencers());
perProcCyclesPerTrans.setSize(RubySystem::getNumberOfSequencers());
perProcMissesPerTrans.setSize(RubySystem::getNumberOfSequencers());
for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) {
perProcInstructionCount[i] = g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i] + 1;
perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
// The +1 allows us to avoid division by zero
perProcCPI[i] = double(ruby_cycles)/perProcInstructionCount[i];
perProcMissesPerInsn[i] = 1000.0 * (double(m_perProcTotalMisses[i]) / double(perProcInstructionCount[i]));
int trans = m_perProcEndTransaction[i];
if (trans == 0) {
perProcInsnPerTrans[i] = 0;
perProcCyclesPerTrans[i] = 0;
perProcMissesPerTrans[i] = 0;
} else {
perProcInsnPerTrans[i] = perProcInstructionCount[i] / double(trans);
perProcCyclesPerTrans[i] = ruby_cycles / double(trans);
perProcMissesPerTrans[i] = m_perProcTotalMisses[i] / double(trans);
}
@ -353,12 +308,10 @@ void Profiler::printStats(ostream& out, bool short_stats)
integer_t total_misses = m_perProcTotalMisses.sum();
integer_t user_misses = m_perProcUserMisses.sum();
integer_t supervisor_misses = m_perProcSupervisorMisses.sum();
integer_t instruction_executed = perProcInstructionCount.sum();
integer_t simics_cycles_executed = perProcCycleCount.sum();
integer_t transactions_started = m_perProcStartTransaction.sum();
integer_t transactions_ended = m_perProcEndTransaction.sum();
double instructions_per_transaction = (transactions_ended != 0) ? double(instruction_executed) / double(transactions_ended) : 0;
double cycles_per_transaction = (transactions_ended != 0) ? (RubySystem::getNumberOfSequencers() * double(ruby_cycles)) / double(transactions_ended) : 0;
double misses_per_transaction = (transactions_ended != 0) ? double(total_misses) / double(transactions_ended) : 0;
@ -367,23 +320,15 @@ void Profiler::printStats(ostream& out, bool short_stats)
out << "user_misses: " << user_misses << " " << m_perProcUserMisses << endl;
out << "supervisor_misses: " << supervisor_misses << " " << m_perProcSupervisorMisses << endl;
out << endl;
out << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl;
out << "ruby_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl;
out << "cycles_per_instruction: " << (RubySystem::getNumberOfSequencers()*double(ruby_cycles))/double(instruction_executed) << " " << perProcCPI << endl;
out << "misses_per_thousand_instructions: " << 1000.0 * (double(total_misses) / double(instruction_executed)) << " " << perProcMissesPerInsn << endl;
out << endl;
out << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl;
out << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl;
out << "instructions_per_transaction: " << instructions_per_transaction << " " << perProcInsnPerTrans << endl;
out << "cycles_per_transaction: " << cycles_per_transaction << " " << perProcCyclesPerTrans << endl;
out << "misses_per_transaction: " << misses_per_transaction << " " << perProcMissesPerTrans << endl;
out << endl;
// m_L1D_cache_profiler_ptr->printStats(out);
// m_L1I_cache_profiler_ptr->printStats(out);
// m_L2_cache_profiler_ptr->printStats(out);
out << endl;
vector<string>::iterator it;
@ -409,7 +354,7 @@ void Profiler::printStats(ostream& out, bool short_stats)
if (m_memReq || m_memRefresh) { // if there's a memory controller at all
long long int total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
double stallsPerReq = total_stalls * 1.0 / m_memReq;
out << "Memory control:" << endl;
out << "Memory control " << (*it) << ":" << endl;
out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes
out << " memory_reads: " << m_memRead << endl;
out << " memory_writes: " << m_memWrite << endl;
@ -463,12 +408,7 @@ void Profiler::printStats(ostream& out, bool short_stats)
out << "Busy Bank Count:" << m_busyBankCount << endl;
out << endl;
out << "L1TBE_usage: " << m_L1tbeProfile << endl;
out << "L2TBE_usage: " << m_L2tbeProfile << endl;
out << "StopTable_usage: " << m_stopTableProfile << endl;
out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl;
out << "store_buffer_size: " << m_store_buffer_size << endl;
out << "unique_blocks_in_store_buffer: " << m_store_buffer_blocks << endl;
out << endl;
}
@ -486,7 +426,6 @@ void Profiler::printStats(ostream& out, bool short_stats)
out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl;
}
}
out << "miss_latency_L2Miss: " << m_L2MissLatencyHistogram << endl;
out << endl;
@ -505,12 +444,6 @@ void Profiler::printStats(ostream& out, bool short_stats)
}
out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl;
out << "multicast_retries: " << m_multicast_retry_histogram << endl;
out << "gets_mask_prediction_count: " << m_gets_mask_prediction << endl;
out << "getx_mask_prediction_count: " << m_getx_mask_prediction << endl;
out << "explicit_training_mask: " << m_explicit_training_mask << endl;
out << endl;
if (m_all_sharing_histogram.size() > 0) {
out << "all_sharing: " << m_all_sharing_histogram << endl;
out << "read_sharing: " << m_read_sharing_histogram << endl;
@ -529,17 +462,8 @@ void Profiler::printStats(ostream& out, bool short_stats)
out << endl;
}
if (m_conflicting_histogram.size() > 0) {
out << "conflicting_histogram: " << m_conflicting_histogram << endl;
out << "conflicting_histogram_percent: "; m_conflicting_histogram.printPercent(out); out << endl;
out << endl;
}
if (m_outstanding_requests.size() > 0) {
out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl;
if (m_outstanding_persistent_requests.size() > 0) {
out << "outstanding_persistent_requests: "; m_outstanding_persistent_requests.printPercent(out); out << endl;
}
out << endl;
}
}
@ -610,19 +534,13 @@ void Profiler::printResourceUsage(ostream& out) const
void Profiler::clearStats()
{
m_num_BA_unicasts = 0;
m_num_BA_broadcasts = 0;
m_ruby_start = g_eventQueue_ptr->getTime();
m_instructions_executed_at_start.setSize(RubySystem::getNumberOfSequencers());
m_cycles_executed_at_start.setSize(RubySystem::getNumberOfSequencers());
for (int i=0; i < RubySystem::getNumberOfSequencers(); i++) {
if (g_system_ptr == NULL) {
m_instructions_executed_at_start[i] = 0;
m_cycles_executed_at_start[i] = 0;
} else {
m_instructions_executed_at_start[i] = g_system_ptr->getInstructionCount(i);
m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
}
}
@ -657,11 +575,7 @@ void Profiler::clearStats()
m_delayedCyclesVCHistograms[i].clear();
}
m_gets_mask_prediction.clear();
m_getx_mask_prediction.clear();
m_explicit_training_mask.clear();
m_missLatencyHistograms.setSize(CacheRequestType_NUM);
m_missLatencyHistograms.setSize(RubyRequestType_NUM);
for(int i=0; i<m_missLatencyHistograms.size(); i++) {
m_missLatencyHistograms[i].clear(200);
}
@ -670,7 +584,6 @@ void Profiler::clearStats()
m_machLatencyHistograms[i].clear(200);
}
m_allMissLatencyHistogram.clear(200);
m_L2MissLatencyHistogram.clear(200);
m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM);
for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
@ -681,67 +594,23 @@ void Profiler::clearStats()
m_SWPrefetchMachLatencyHistograms[i].clear(200);
}
m_allSWPrefetchLatencyHistogram.clear(200);
m_SWPrefetchL2MissLatencyHistogram.clear(200);
m_multicast_retry_histogram.clear();
m_L1tbeProfile.clear();
m_L2tbeProfile.clear();
m_stopTableProfile.clear();
m_filter_action_histogram.clear();
m_sequencer_requests.clear();
m_store_buffer_size.clear();
m_store_buffer_blocks.clear();
m_read_sharing_histogram.clear();
m_write_sharing_histogram.clear();
m_all_sharing_histogram.clear();
m_cache_to_cache = 0;
m_memory_to_cache = 0;
m_predictions = 0;
m_predictionOpportunities = 0;
m_goodPredictions = 0;
// clear HashMaps
m_requestProfileMap_ptr->clear();
// count requests profiled
m_requests = 0;
// Conflicting requests
m_conflicting_map_ptr->clear();
m_conflicting_histogram.clear();
m_outstanding_requests.clear();
m_outstanding_persistent_requests.clear();
m_L1D_cache_profiler_ptr->clearStats();
m_L1I_cache_profiler_ptr->clearStats();
m_L2_cache_profiler_ptr->clearStats();
// for MemoryControl:
/*
m_memReq = 0;
m_memBankBusy = 0;
m_memBusBusy = 0;
m_memTfawBusy = 0;
m_memReadWriteBusy = 0;
m_memDataBusBusy = 0;
m_memRefresh = 0;
m_memRead = 0;
m_memWrite = 0;
m_memWaitCycles = 0;
m_memInputQ = 0;
m_memBankQ = 0;
m_memArbWait = 0;
m_memRandBusy = 0;
m_memNotOld = 0;
for (int bank=0; bank < m_memBankCount.size(); bank++) {
m_memBankCount[bank] = 0;
}
*/
//added by SS
vector<string>::iterator it;
@ -773,19 +642,6 @@ void Profiler::clearStats()
m_ruby_start = g_eventQueue_ptr->getTime();
}
void Profiler::profileConflictingRequests(const Address& addr)
{
assert(addr == line_address(addr));
Time last_time = m_ruby_start;
if (m_conflicting_map_ptr->exist(addr)) {
last_time = m_conflicting_map_ptr->lookup(addr);
}
Time current_time = g_eventQueue_ptr->getTime();
assert (current_time - last_time > 0);
m_conflicting_histogram.add(current_time - last_time);
m_conflicting_map_ptr->add(addr, current_time);
}
void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
{
if (msg.getType() != CacheRequestType_IFETCH) {
@ -843,27 +699,6 @@ void Profiler::profileRequest(const string& requestStr)
}
}
void Profiler::recordPrediction(bool wasGood, bool wasPredicted)
{
m_predictionOpportunities++;
if(wasPredicted){
m_predictions++;
if(wasGood){
m_goodPredictions++;
}
}
}
void Profiler::profileFilterAction(int action)
{
m_filter_action_histogram.add(action);
}
void Profiler::profileMulticastRetry(const Address& addr, int count)
{
m_multicast_retry_histogram.add(count);
}
void Profiler::startTransaction(int cpu)
{
m_perProcStartTransaction[cpu]++;
@ -894,12 +729,6 @@ void Profiler::missLatency(Time t, RubyRequestType type)
{
m_allMissLatencyHistogram.add(t);
m_missLatencyHistograms[type].add(t);
/*
m_machLatencyHistograms[respondingMach].add(t);
if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) {
m_L2MissLatencyHistogram.add(t);
}
*/
}
// non-zero cycle prefetch request
@ -968,41 +797,6 @@ static double process_memory_resident()
return double(res_size_in_pages)*MULTIPLIER; // size in megabytes
}
void Profiler::profileGetXMaskPrediction(const Set& pred_set)
{
m_getx_mask_prediction.add(pred_set.count());
}
void Profiler::profileGetSMaskPrediction(const Set& pred_set)
{
m_gets_mask_prediction.add(pred_set.count());
}
void Profiler::profileTrainingMask(const Set& pred_set)
{
m_explicit_training_mask.add(pred_set.count());
}
int64 Profiler::getTotalInstructionsExecuted() const
{
int64 sum = 1; // Starting at 1 allows us to avoid division by zero
for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) {
sum += (g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i]);
}
return sum;
}
int64 Profiler::getTotalTransactionsExecuted() const
{
int64 sum = m_perProcEndTransaction.sum();
if (sum > 0) {
return sum;
} else {
return 1; // Avoid division by zero errors
}
}
void Profiler::rubyWatch(int id){
//int rn_g1 = 0;//SIMICS_get_register_number(id, "g1");
uint64 tr = 0;//SIMICS_read_register(id, rn_g1);
@ -1029,6 +823,10 @@ bool Profiler::watchAddress(Address addr){
return false;
}
int64 Profiler::getTotalTransactionsExecuted() const {
return m_perProcEndTransaction.sum();
}
// For MemoryControl:
void Profiler::profileMemReq(string name, int bank) {
// printf("name is %s", name.c_str());

View file

@ -72,7 +72,6 @@
#include "mem/ruby/system/MemoryControl.hh"
class CacheMsg;
class CacheProfiler;
class AddressProfiler;
template <class KEY_TYPE, class VALUE_TYPE> class Map;
@ -140,9 +139,6 @@ public:
void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
void countBAUnicast() { m_num_BA_unicasts++; }
void countBABroadcast() { m_num_BA_broadcasts++; }
void recordPrediction(bool wasGood, bool wasPredicted);
void startTransaction(int cpu);
@ -153,15 +149,8 @@ public:
void bankBusy();
void missLatency(Time t, RubyRequestType type);
void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
void sequencerRequests(int num) { m_sequencer_requests.add(num); }
void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);}
void profileGetXMaskPrediction(const Set& pred_set);
void profileGetSMaskPrediction(const Set& pred_set);
void profileTrainingMask(const Set& pred_set);
void profileTransition(const string& component, NodeID version, Address addr,
const string& state, const string& event,
const string& next_state, const string& note);
@ -169,7 +158,6 @@ public:
void print(ostream& out) const;
int64 getTotalInstructionsExecuted() const;
int64 getTotalTransactionsExecuted() const;
void rubyWatch(int proc);
@ -209,9 +197,6 @@ private:
Profiler& operator=(const Profiler& obj);
// Data Members (m_ prefix)
CacheProfiler* m_L1D_cache_profiler_ptr;
CacheProfiler* m_L1I_cache_profiler_ptr;
CacheProfiler* m_L2_cache_profiler_ptr;
AddressProfiler* m_address_profiler_ptr;
AddressProfiler* m_inst_profiler_ptr;
@ -224,9 +209,6 @@ private:
Time m_ruby_start;
time_t m_real_time_start_time;
int m_num_BA_unicasts;
int m_num_BA_broadcasts;
Vector<integer_t> m_perProcTotalMisses;
Vector<integer_t> m_perProcUserMisses;
Vector<integer_t> m_perProcSupervisorMisses;
@ -236,16 +218,10 @@ private:
integer_t m_busyBankCount;
Histogram m_multicast_retry_histogram;
Histogram m_L1tbeProfile;
Histogram m_L2tbeProfile;
Histogram m_stopTableProfile;
Histogram m_filter_action_histogram;
Histogram m_tbeProfile;
Histogram m_sequencer_requests;
Histogram m_store_buffer_size;
Histogram m_store_buffer_blocks;
Histogram m_read_sharing_histogram;
Histogram m_write_sharing_histogram;
Histogram m_all_sharing_histogram;
@ -256,7 +232,6 @@ private:
Vector<Histogram> m_missLatencyHistograms;
Vector<Histogram> m_machLatencyHistograms;
Histogram m_L2MissLatencyHistogram;
Histogram m_allMissLatencyHistogram;
Histogram m_allSWPrefetchLatencyHistogram;
@ -268,18 +243,6 @@ private:
Histogram m_delayedCyclesNonPFHistogram;
Vector<Histogram> m_delayedCyclesVCHistograms;
int m_predictions;
int m_predictionOpportunities;
int m_goodPredictions;
Histogram m_gets_mask_prediction;
Histogram m_getx_mask_prediction;
Histogram m_explicit_training_mask;
// For profiling possibly conflicting requests
Map<Address, Time>* m_conflicting_map_ptr;
Histogram m_conflicting_histogram;
Histogram m_outstanding_requests;
Histogram m_outstanding_persistent_requests;

View file

@ -64,11 +64,6 @@ void profile_outstanding_request(int outstanding)
g_system_ptr->getProfiler()->profileOutstandingRequest(outstanding);
}
void profile_outstanding_persistent_request(int outstanding)
{
g_system_ptr->getProfiler()->profileOutstandingPersistentRequest(outstanding);
}
void profile_average_latency_estimate(int latency)
{
g_system_ptr->getProfiler()->profileAverageLatencyEstimate(latency);
@ -84,26 +79,6 @@ void profileMsgDelay(int virtualNetwork, int delayCycles)
g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles);
}
void profile_token_retry(const Address& addr, AccessType type, int count)
{
g_system_ptr->getProfiler()->getAddressProfiler()->profileRetry(addr, type, count);
}
void profile_filter_action(int action)
{
g_system_ptr->getProfiler()->profileFilterAction(action);
}
void profile_persistent_prediction(const Address& addr, AccessType type)
{
g_system_ptr->getProfiler()->getAddressProfiler()->profilePersistentPrediction(addr, type);
}
void profile_multicast_retry(const Address& addr, int count)
{
g_system_ptr->getProfiler()->profileMulticastRetry(addr, count);
}
void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor)
{
g_system_ptr->getProfiler()->getAddressProfiler()->profileGetX(datablock, PC, owner, sharers, requestor);

View file

@ -127,6 +127,7 @@ public:
void print(ostream& out) const;
void printData(ostream& out) const;
void clearStats() const;
void printStats(ostream& out) const;
private:
@ -561,6 +562,11 @@ void CacheMemory::printData(ostream& out) const
out << "printData() not supported" << endl;
}
inline void CacheMemory::clearStats() const
{
m_profiler_ptr->clearStats();
}
inline
void CacheMemory::printStats(ostream& out) const
{

View file

@ -51,6 +51,7 @@ int64_t DMASequencer::makeRequest(const RubyRequest & request)
case RubyRequestType_Locked_Write:
case RubyRequestType_RMW_Read:
case RubyRequestType_RMW_Write:
case RubyRequestType_NUM:
assert(0);
}

View file

@ -347,15 +347,16 @@ void RubySystem::printStats(ostream& out)
void RubySystem::clearStats() const
{
/*
m_profiler_ptr->clearStats();
for (int i=0; i<m_rubyRequestQueues.size(); i++)
for (int j=0;j<m_rubyRequestQueues[i].size(); j++)
m_rubyRequestQueues[i][j]->clearStats();
m_network_ptr->clearStats();
for (int i=0; i < MachineType_base_level(MachineType_NUM); i++)
m_controllers[i][0]->clearStats();
*/
for (map<string, CacheMemory*>::const_iterator it = m_caches.begin();
it != m_caches.end(); it++) {
(*it).second->clearStats();
}
for (map<string, AbstractController*>::const_iterator it = m_controllers.begin();
it != m_controllers.end(); it++) {
(*it).second->clearStats();
}
}
void RubySystem::recordCacheContents(CacheRecorder& tr) const

View file

@ -128,7 +128,6 @@ void TBETable<ENTRY>::allocate(const Address& address)
{
assert(isPresent(address) == false);
assert(m_map.size() < m_number_of_TBEs);
g_system_ptr->getProfiler()->L2tbeUsageSample(m_map.size());
m_map.add(address, ENTRY());
}

View file

@ -22,7 +22,7 @@ Directory_Controller config: DirectoryController_0
directory_latency: 6
directory_name: DirectoryMemory_0
memory_controller_name: MemoryControl_0
memory_latency: 158
memory_latency: 1
number_of_TBEs: 256
recycle_latency: 10
to_mem_ctrl_latency: 1
@ -376,46 +376,42 @@ periodic_stats_period: 1000000
================ End RubySystem Configuration Print ================
Real time: Jul/19/2009 15:34:56
Real time: Jul/29/2009 15:40:36
Profiler Stats
--------------
Elapsed_time_in_seconds: 2553
Elapsed_time_in_minutes: 42.55
Elapsed_time_in_hours: 0.709167
Elapsed_time_in_days: 0.0295486
Elapsed_time_in_seconds: 1279
Elapsed_time_in_minutes: 21.3167
Elapsed_time_in_hours: 0.355278
Elapsed_time_in_days: 0.0148032
Virtual_time_in_seconds: 2552.07
Virtual_time_in_minutes: 42.5345
Virtual_time_in_hours: 0.708908
Virtual_time_in_days: 0.708908
Virtual_time_in_seconds: 1279.21
Virtual_time_in_minutes: 21.3202
Virtual_time_in_hours: 0.355336
Virtual_time_in_days: 0.0148057
Ruby_current_time: 31814465
Ruby_start_time: 1
Ruby_cycles: 31814464
mbytes_resident: 150.715
mbytes_total: 1502.59
resident_ratio: 0.100309
mbytes_resident: 150.707
mbytes_total: 1502.61
resident_ratio: 0.100302
Total_misses: 0
total_misses: 0 [ 0 0 0 0 0 0 0 0 ]
user_misses: 0 [ 0 0 0 0 0 0 0 0 ]
supervisor_misses: 0 [ 0 0 0 0 0 0 0 0 ]
instruction_executed: 8 [ 1 1 1 1 1 1 1 1 ]
ruby_cycles_executed: 254515720 [ 31814465 31814465 31814465 31814465 31814465 31814465 31814465 31814465 ]
cycles_per_instruction: 3.18145e+07 [ 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 3.18145e+07 ]
misses_per_thousand_instructions: 0 [ 0 0 0 0 0 0 0 0 ]
transactions_started: 0 [ 0 0 0 0 0 0 0 0 ]
transactions_ended: 0 [ 0 0 0 0 0 0 0 0 ]
instructions_per_transaction: 0 [ 0 0 0 0 0 0 0 0 ]
cycles_per_transaction: 0 [ 0 0 0 0 0 0 0 0 ]
misses_per_transaction: 0 [ 0 0 0 0 0 0 0 0 ]
Memory control:
Memory control MemoryControl_0:
memory_total_requests: 1388468
memory_reads: 694293
memory_writes: 694043
@ -443,29 +439,18 @@ DMA-0:0
Busy Bank Count:0
L1TBE_usage: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
L2TBE_usage: [binsize: 4 max: 134 count: 2136875 average: 36.3375 | standard deviation: 28.2827 | 23454 78361 154838 242576 298777 279946 206526 134119 41748 9990 11123 15179 20545 27694 35924 44642 53192 60320 64615 65514 62260 55913 47160 36948 26700 17667 10728 5693 2725 1295 483 159 53 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
StopTable_usage: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
sequencer_requests_outstanding: [binsize: 1 max: 16 count: 748260 average: 11.8029 | standard deviation: 3.40671 | 0 1091 2889 5609 9615 15772 23675 33311 44184 55041 64248 70323 72503 72248 68934 64870 143947 ]
store_buffer_size: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
unique_blocks_in_store_buffer: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
All Non-Zero Cycle Demand Cache Accesses
----------------------------------------
miss_latency: [binsize: 128 max: 20559 count: 748171 average: 3866.31 | standard deviation: 2352.95 | 21417 1969 3723 6729 8868 8455 7676 8627 10203 11965 13796 13743 11900 13009 16352 17532 16234 15941 17304 16977 16916 18538 19194 16531 16082 17521 18191 15886 15702 16749 15616 14095 14916 15648 13793 11856 12863 13378 11663 10762 11443 11095 9691 9387 10128 9009 7817 8024 8496 7458 6302 6700 6887 5633 5066 5555 5357 4326 4220 4651 4016 3318 3403 3600 3054 2613 2796 2637 2141 2011 2128 1973 1548 1420 1531 1276 1047 1080 1093 914 741 749 732 584 493 515 525 388 363 345 325 251 268 277 202 190 183 189 147 117 143 119 90 93 91 82 60 58 58 49 51 48 39 28 34 36 30 17 16 21 24 23 12 17 16 9 12 16 12 13 7 4 7 8 7 8 5 7 5 8 4 4 6 5 3 3 2 1 4 1 2 1 0 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
miss_latency_2: [binsize: 128 max: 20559 count: 486192 average: 3864.95 | standard deviation: 2353.73 | 13998 1281 2484 4424 5714 5472 5029 5648 6631 7775 8926 8800 7735 8448 10496 11466 10602 10387 11224 11076 10939 12065 12497 10830 10391 11396 11931 10259 10262 10939 10169 9130 9608 10113 8955 7714 8408 8711 7593 6973 7459 7162 6232 6134 6554 5848 5110 5134 5495 4860 4083 4319 4432 3674 3259 3647 3406 2774 2755 3099 2579 2160 2269 2367 1984 1705 1833 1725 1372 1293 1349 1289 1004 902 970 862 693 720 732 613 484 488 462 374 341 336 349 246 226 213 205 156 178 186 130 122 119 126 100 72 94 79 57 64 63 57 37 38 35 33 35 27 23 19 22 28 17 7 10 14 16 16 10 7 11 6 8 9 3 6 5 4 4 4 4 4 2 6 3 5 4 3 2 5 3 3 1 1 2 1 2 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
miss_latency_3: [binsize: 128 max: 19863 count: 261979 average: 3868.82 | standard deviation: 2351.5 | 7419 688 1239 2305 3154 2983 2647 2979 3572 4190 4870 4943 4165 4561 5856 6066 5632 5554 6080 5901 5977 6473 6697 5701 5691 6125 6260 5627 5440 5810 5447 4965 5308 5535 4838 4142 4455 4667 4070 3789 3984 3933 3459 3253 3574 3161 2707 2890 3001 2598 2219 2381 2455 1959 1807 1908 1951 1552 1465 1552 1437 1158 1134 1233 1070 908 963 912 769 718 779 684 544 518 561 414 354 360 361 301 257 261 270 210 152 179 176 142 137 132 120 95 90 91 72 68 64 63 47 45 49 40 33 29 28 25 23 20 23 16 16 21 16 9 12 8 13 10 6 7 8 7 2 10 5 3 4 7 9 7 2 0 3 4 3 4 3 1 2 3 0 1 4 0 0 0 1 0 2 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
miss_latency_L2Miss: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
All Non-Zero Cycle SW Prefetch Requests
------------------------------------
prefetch_latency: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
prefetch_latency_L2Miss:[binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
multicast_retries: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
gets_mask_prediction_count: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
getx_mask_prediction_count: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
explicit_training_mask: [binsize: 1 max: 0 count: 0 average: NaN |standard deviation: NaN | 0 ]
Request vs. RubySystem State Profile
--------------------------------
@ -486,9 +471,9 @@ Total_nonPF_delay_cycles: [binsize: 1 max: 18 count: 1496498 average: 0.0019285
Resource Usage
--------------
page_size: 4096
user_time: 2550
system_time: 1
page_reclaims: 39807
user_time: 1279
system_time: 0
page_reclaims: 39805
page_faults: 0
swaps: 0
block_inputs: 0
@ -650,7 +635,7 @@ l1u_0 cache stats:
l1u_0_total_prefetches: 0
l1u_0_total_sw_prefetches: 0
l1u_0_total_hw_prefetches: 0
l1u_0_misses_per_transaction: 93523
l1u_0_misses_per_transaction: inf
l1u_0_request_type_LD: 64.8311%
l1u_0_request_type_ST: 35.1689%
@ -664,7 +649,7 @@ l1u_1 cache stats:
l1u_1_total_prefetches: 0
l1u_1_total_sw_prefetches: 0
l1u_1_total_hw_prefetches: 0
l1u_1_misses_per_transaction: 93506
l1u_1_misses_per_transaction: inf
l1u_1_request_type_LD: 64.8162%
l1u_1_request_type_ST: 35.1838%
@ -678,7 +663,7 @@ l1u_2 cache stats:
l1u_2_total_prefetches: 0
l1u_2_total_sw_prefetches: 0
l1u_2_total_hw_prefetches: 0
l1u_2_misses_per_transaction: 93510
l1u_2_misses_per_transaction: inf
l1u_2_request_type_LD: 64.931%
l1u_2_request_type_ST: 35.069%
@ -692,7 +677,7 @@ l1u_3 cache stats:
l1u_3_total_prefetches: 0
l1u_3_total_sw_prefetches: 0
l1u_3_total_hw_prefetches: 0
l1u_3_misses_per_transaction: 93558
l1u_3_misses_per_transaction: inf
l1u_3_request_type_LD: 64.9693%
l1u_3_request_type_ST: 35.0307%
@ -706,7 +691,7 @@ l1u_4 cache stats:
l1u_4_total_prefetches: 0
l1u_4_total_sw_prefetches: 0
l1u_4_total_hw_prefetches: 0
l1u_4_misses_per_transaction: 93567
l1u_4_misses_per_transaction: inf
l1u_4_request_type_LD: 65.2474%
l1u_4_request_type_ST: 34.7526%
@ -720,7 +705,7 @@ l1u_5 cache stats:
l1u_5_total_prefetches: 0
l1u_5_total_sw_prefetches: 0
l1u_5_total_hw_prefetches: 0
l1u_5_misses_per_transaction: 93561
l1u_5_misses_per_transaction: inf
l1u_5_request_type_LD: 65.0004%
l1u_5_request_type_ST: 34.9996%
@ -734,7 +719,7 @@ l1u_6 cache stats:
l1u_6_total_prefetches: 0
l1u_6_total_sw_prefetches: 0
l1u_6_total_hw_prefetches: 0
l1u_6_misses_per_transaction: 93502
l1u_6_misses_per_transaction: inf
l1u_6_request_type_LD: 64.9569%
l1u_6_request_type_ST: 35.0431%
@ -748,7 +733,7 @@ l1u_7 cache stats:
l1u_7_total_prefetches: 0
l1u_7_total_sw_prefetches: 0
l1u_7_total_hw_prefetches: 0
l1u_7_misses_per_transaction: 93509
l1u_7_misses_per_transaction: inf
l1u_7_request_type_LD: 65.1189%
l1u_7_request_type_ST: 34.8811%

View file

@ -5,10 +5,10 @@ The Regents of The University of Michigan
All Rights Reserved
M5 compiled Jul 19 2009 14:52:18
M5 revision 544d33334ee1+ 6369+ default tip
M5 started Jul 19 2009 14:52:23
M5 executing on clover-01.cs.wisc.edu
M5 compiled Jul 29 2009 15:19:07
M5 revision a6e8795b73de+ 6384+ default tip
M5 started Jul 29 2009 15:19:16
M5 executing on clover-02.cs.wisc.edu
command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest-ruby -re tests/run.py build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest-ruby
Global frequency set at 1000000000000 ticks per second
info: Entering event queue @ 0. Starting simulation...

View file

@ -1,8 +1,8 @@
---------- Begin Simulation Statistics ----------
host_mem_usage 1538656 # Number of bytes of host memory used
host_seconds 2552.36 # Real time elapsed on the host
host_tick_rate 12465 # Simulator tick rate (ticks/s)
host_mem_usage 1538672 # Number of bytes of host memory used
host_seconds 1279.29 # Real time elapsed on the host
host_tick_rate 24869 # Simulator tick rate (ticks/s)
sim_freq 1000000000000 # Frequency of simulated ticks
sim_seconds 0.000032 # Number of seconds simulated
sim_ticks 31814464 # Number of ticks simulated