diff --git a/src/arch/alpha/isa/mem.isa b/src/arch/alpha/isa/mem.isa index b1703221f..efff0eac7 100644 --- a/src/arch/alpha/isa/mem.isa +++ b/src/arch/alpha/isa/mem.isa @@ -275,7 +275,6 @@ def template StoreExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -310,7 +309,6 @@ def template StoreCondExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, &write_result); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -344,7 +342,6 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } return fault; @@ -478,9 +475,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) - # add hook to get effective addresses into execution trace output. - ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n' - # Some CPU models execute the memory operation as an atomic unit, # while others want to separate them into an effective address # computation and a memory access operation. As a result, we need diff --git a/src/arch/arm/isa/formats/mem.isa b/src/arch/arm/isa/formats/mem.isa index 0b0a4c9fa..2f66ca54e 100644 --- a/src/arch/arm/isa/formats/mem.isa +++ b/src/arch/arm/isa/formats/mem.isa @@ -172,7 +172,6 @@ def template StoreExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -204,7 +203,6 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } // Need to write back any potential address register update diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa index 161a52b06..411cc5fda 100644 --- a/src/arch/mips/isa/formats/mem.isa +++ b/src/arch/mips/isa/formats/mem.isa @@ -305,7 +305,6 @@ def template StoreExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -342,7 +341,6 @@ def template StoreFPExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -377,7 +375,6 @@ def template StoreCondExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, &write_result); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -411,7 +408,6 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } return fault; @@ -435,8 +431,6 @@ def template StoreCompleteAcc {{ if (fault == NoFault) { %(op_wb)s; - - if (traceData) { traceData->setData(getMemData(xc, pkt)); } } return fault; @@ -459,8 +453,6 @@ def template StoreCompleteAcc {{ if (fault == NoFault) { %(op_wb)s; - - if (traceData) { traceData->setData(getMemData(xc, pkt)); } } return fault; diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa index a6edffeda..708338074 100644 --- a/src/arch/mips/isa/formats/util.isa +++ b/src/arch/mips/isa/formats/util.isa @@ -38,9 +38,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) - # add hook to get effective addresses into execution trace output. - ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n' - # Some CPU models execute the memory operation as an atomic unit, # while others want to separate them into an effective address # computation and a memory access operation. As a result, we need diff --git a/src/arch/power/isa/formats/mem.isa b/src/arch/power/isa/formats/mem.isa index 1be49c2f7..3bcf0633a 100644 --- a/src/arch/power/isa/formats/mem.isa +++ b/src/arch/power/isa/formats/mem.isa @@ -166,7 +166,6 @@ def template StoreExecute {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } if (fault == NoFault) { @@ -196,7 +195,6 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, memAccessFlags, NULL); - if (traceData) { traceData->setData(Mem); } } // Need to write back any potential address register update diff --git a/src/arch/power/isa/formats/util.isa b/src/arch/power/isa/formats/util.isa index ab1e530b2..8fd7f7daa 100644 --- a/src/arch/power/isa/formats/util.isa +++ b/src/arch/power/isa/formats/util.isa @@ -97,9 +97,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) - # add hook to get effective addresses into execution trace output. - ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n' - # Generate InstObjParams for the memory access. iop = InstObjParams(name, Name, base_class, {'ea_code': ea_code, diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 376ea8d26..d12f11a2c 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -443,6 +443,10 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) //The size of the data we're trying to read. int dataSize = sizeof(T); + if (inst->traceData) { + inst->traceData->setAddr(addr); + } + if (inst->split2ndAccess) { dataSize = inst->split2ndSize; cache_req->splitAccess = true; @@ -541,6 +545,11 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, //The size of the data we're trying to read. int dataSize = sizeof(T); + if (inst->traceData) { + inst->traceData->setAddr(addr); + inst->traceData->setData(data); + } + if (inst->split2ndAccess) { dataSize = inst->split2ndSize; cache_req->splitAccess = true; diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 05b4ca3e2..d96adffd5 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -351,10 +351,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) } } - // This will need a new way to tell if it has a dcache attached. - if (req->isUncacheable()) - recordEvent("Uncached Read"); - //If there's a fault, return it if (fault != NoFault) { if (req->isPrefetch()) { @@ -451,6 +447,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) if (traceData) { traceData->setAddr(addr); + traceData->setData(data); } //The block size of our peer. @@ -522,20 +519,10 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) } } - // This will need a new way to tell if it's hooked up to a cache or not. - if (req->isUncacheable()) - recordEvent("Uncached Write"); - //If there's a fault or we don't need to access a second cache line, //stop now. if (fault != NoFault || secondAddr <= addr) { - // If the write needs to have a fault on the access, consider - // calling changeStatus() and changing it to "bad addr write" - // or something. - if (traceData) { - traceData->setData(gtoh(data)); - } if (req->isLocked() && fault == NoFault) { assert(locked); locked = false; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 0104e1b1f..17ba6a10b 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -205,6 +205,27 @@ change_thread_state(ThreadID tid, int activate, int priority) { } +void +BaseSimpleCPU::prefetch(Addr addr, unsigned flags) +{ + if (traceData) { + traceData->setAddr(addr); + } + + // need to do this... +} + +void +BaseSimpleCPU::writeHint(Addr addr, int size, unsigned flags) +{ + if (traceData) { + traceData->setAddr(addr); + } + + // need to do this... +} + + Fault BaseSimpleCPU::copySrcTranslate(Addr src) { diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 39961fb88..87e211521 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -232,16 +232,8 @@ class BaseSimpleCPU : public BaseCPU Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n"); M5_DUMMY_RETURN} - void prefetch(Addr addr, unsigned flags) - { - // need to do this... - } - - void writeHint(Addr addr, int size, unsigned flags) - { - // need to do this... - } - + void prefetch(Addr addr, unsigned flags); + void writeHint(Addr addr, int size, unsigned flags); Fault copySrcTranslate(Addr src); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 221cb0d0d..b8fc5ab84 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -426,16 +426,16 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) int data_size = sizeof(T); BaseTLB::Mode mode = BaseTLB::Read; + if (traceData) { + traceData->setAddr(addr); + } + RequestPtr req = new Request(asid, addr, data_size, flags, pc, _cpuId, tid); Addr split_addr = roundDown(addr + data_size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); - // This will need a new way to tell if it's hooked up to a cache or not. - if (req->isUncacheable()) - recordEvent("Uncached Write"); - _status = DTBWaitResponse; if (split_addr > addr) { RequestPtr req1, req2; @@ -460,11 +460,6 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) thread->dtb->translateTiming(req, tc, translation, mode); } - if (traceData) { - traceData->setData(data); - traceData->setAddr(addr); - } - return NoFault; } @@ -548,16 +543,17 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) int data_size = sizeof(T); BaseTLB::Mode mode = BaseTLB::Write; + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + RequestPtr req = new Request(asid, addr, data_size, flags, pc, _cpuId, tid); Addr split_addr = roundDown(addr + data_size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); - // This will need a new way to tell if it's hooked up to a cache or not. - if (req->isUncacheable()) - recordEvent("Uncached Write"); - T *dataP = new T; *dataP = TheISA::htog(data); _status = DTBWaitResponse; @@ -584,13 +580,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) thread->dtb->translateTiming(req, tc, translation, mode); } - if (traceData) { - traceData->setAddr(req->getVaddr()); - traceData->setData(data); - } - - // If the write needs to have a fault on the access, consider calling - // changeStatus() and changing it to "bad addr write" or something. + // Translation faults will be returned via finishTranslation() return NoFault; } diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh index 33e810710..983a748cf 100644 --- a/src/cpu/translation.hh +++ b/src/cpu/translation.hh @@ -35,6 +35,16 @@ #include "sim/tlb.hh" +/** + * This class captures the state of an address translation. A translation + * can be split in two if the ISA supports it and the memory access crosses + * a page boundary. In this case, this class is shared by two data + * translations (below). Otherwise it is used by a single data translation + * class. When each part of the translation is finished, the finish + * function is called which will indicate whether the whole translation is + * completed or not. There are also functions for accessing parts of the + * translation state which deal with the possible split correctly. + */ class WholeTranslationState { protected: @@ -50,7 +60,10 @@ class WholeTranslationState uint64_t *res; BaseTLB::Mode mode; - /** Single translation state. */ + /** + * Single translation state. We set the number of outstanding + * translations to one and indicate that it is not split. + */ WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode) : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL), @@ -60,7 +73,11 @@ class WholeTranslationState assert(mode == BaseTLB::Read || mode == BaseTLB::Write); } - /** Split translation state. */ + /** + * Split translation state. We copy all state into this class, set the + * number of outstanding translations to two and then mark this as a + * split translation. + */ WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow, RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode) @@ -71,6 +88,13 @@ class WholeTranslationState assert(mode == BaseTLB::Read || mode == BaseTLB::Write); } + /** + * Finish part of a translation. If there is only one request then this + * translation is completed. If the request has been split in two then + * the outstanding count determines whether the translation is complete. + * In this case, flags from the split request are copied to the main + * request to make it easier to access them later on. + */ bool finish(Fault fault, int index) { @@ -89,6 +113,10 @@ class WholeTranslationState return outstanding == 0; } + /** + * Determine whether this translation produced a fault. Both parts of the + * translation must be checked if this is a split translation. + */ Fault getFault() const { @@ -102,36 +130,54 @@ class WholeTranslationState return NoFault; } + /** Remove all faults from the translation. */ void setNoFault() { faults[0] = faults[1] = NoFault; } + /** + * Check if this request is uncacheable. We only need to check the main + * request because the flags will have been copied here on a split + * translation. + */ bool isUncacheable() const { return mainReq->isUncacheable(); } + /** + * Check if this request is a prefetch. We only need to check the main + * request because the flags will have been copied here on a split + * translation. + */ bool isPrefetch() const { return mainReq->isPrefetch(); } + /** Get the physical address of this request. */ Addr getPaddr() const { return mainReq->getPaddr(); } + /** + * Get the flags associated with this request. We only need to access + * the main request because the flags will have been copied here on a + * split translation. + */ unsigned getFlags() { return mainReq->getFlags(); } + /** Delete all requests that make up this translation. */ void deleteReqs() { @@ -143,6 +189,16 @@ class WholeTranslationState } }; + +/** + * This class represents part of a data address translation. All state for + * the translation is held in WholeTranslationState (above). Therefore this + * class does not need to know whether the translation is split or not. The + * index variable determines this but is simply passed on to the state class. + * When this part of the translation is completed, finish is called. If the + * translation state class indicate that the whole translation is complete + * then the execution context is informed. + */ template class DataTranslation : public BaseTLB::Translation { @@ -163,6 +219,10 @@ class DataTranslation : public BaseTLB::Translation { } + /** + * Finish this part of the translation and indicate that the whole + * translation is complete if the state says so. + */ void finish(Fault fault, RequestPtr req, ThreadContext *tc, BaseTLB::Mode mode) diff --git a/src/mem/ruby/profiler/AccessTraceForAddress.cc b/src/mem/ruby/profiler/AccessTraceForAddress.cc index 48b28b735..978b72982 100644 --- a/src/mem/ruby/profiler/AccessTraceForAddress.cc +++ b/src/mem/ruby/profiler/AccessTraceForAddress.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,100 +26,96 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - */ - -#include "mem/ruby/profiler/AccessTraceForAddress.hh" #include "mem/ruby/common/Histogram.hh" +#include "mem/ruby/profiler/AccessTraceForAddress.hh" AccessTraceForAddress::AccessTraceForAddress() { - m_histogram_ptr = NULL; + m_histogram_ptr = NULL; } AccessTraceForAddress::AccessTraceForAddress(const Address& addr) { - m_addr = addr; - m_total = 0; - m_loads = 0; - m_stores = 0; - m_atomics = 0; - m_user = 0; - m_sharing = 0; - m_histogram_ptr = NULL; + m_addr = addr; + m_total = 0; + m_loads = 0; + m_stores = 0; + m_atomics = 0; + m_user = 0; + m_sharing = 0; + m_histogram_ptr = NULL; } AccessTraceForAddress::~AccessTraceForAddress() { - if (m_histogram_ptr != NULL) { - delete m_histogram_ptr; - m_histogram_ptr = NULL; - } + if (m_histogram_ptr != NULL) { + delete m_histogram_ptr; + m_histogram_ptr = NULL; + } } -void AccessTraceForAddress::print(ostream& out) const +void +AccessTraceForAddress::print(ostream& out) const { - out << m_addr; + out << m_addr; - if (m_histogram_ptr == NULL) { - out << " " << m_total; - out << " | " << m_loads; - out << " " << m_stores; - out << " " << m_atomics; - out << " | " << m_user; - out << " " << m_total-m_user; - out << " | " << m_sharing; - out << " | " << m_touched_by.count(); - } else { + if (m_histogram_ptr == NULL) { + out << " " << m_total; + out << " | " << m_loads; + out << " " << m_stores; + out << " " << m_atomics; + out << " | " << m_user; + out << " " << m_total-m_user; + out << " | " << m_sharing; + out << " | " << m_touched_by.count(); + } else { + assert(m_total == 0); + out << " " << (*m_histogram_ptr); + } +} + +void +AccessTraceForAddress::update(CacheRequestType type, + AccessModeType access_mode, NodeID cpu, + bool sharing_miss) +{ + m_touched_by.add(cpu); + m_total++; + if(type == CacheRequestType_ATOMIC) { + m_atomics++; + } else if(type == CacheRequestType_LD){ + m_loads++; + } else if (type == CacheRequestType_ST){ + m_stores++; + } else { + // ERROR_MSG("Trying to add invalid access to trace"); + } + + if (access_mode == AccessModeType_UserMode) { + m_user++; + } + + if (sharing_miss) { + m_sharing++; + } +} + +int +AccessTraceForAddress::getTotal() const +{ + if (m_histogram_ptr == NULL) { + return m_total; + } else { + return m_histogram_ptr->getTotal(); + } +} + +void +AccessTraceForAddress::addSample(int value) +{ assert(m_total == 0); - out << " " << (*m_histogram_ptr); - } -} - -void AccessTraceForAddress::update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss) -{ - m_touched_by.add(cpu); - m_total++; - if(type == CacheRequestType_ATOMIC) { - m_atomics++; - } else if(type == CacheRequestType_LD){ - m_loads++; - } else if (type == CacheRequestType_ST){ - m_stores++; - } else { - // ERROR_MSG("Trying to add invalid access to trace"); - } - - if (access_mode == AccessModeType_UserMode) { - m_user++; - } - - if (sharing_miss) { - m_sharing++; - } -} - -int AccessTraceForAddress::getTotal() const -{ - if (m_histogram_ptr == NULL) { - return m_total; - } else { - return m_histogram_ptr->getTotal(); - } -} - -void AccessTraceForAddress::addSample(int value) -{ - assert(m_total == 0); - if (m_histogram_ptr == NULL) { - m_histogram_ptr = new Histogram; - } - m_histogram_ptr->add(value); -} - -bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2) -{ - return (n1->getTotal() > n2->getTotal()); + if (m_histogram_ptr == NULL) { + m_histogram_ptr = new Histogram; + } + m_histogram_ptr->add(value); } diff --git a/src/mem/ruby/profiler/AccessTraceForAddress.hh b/src/mem/ruby/profiler/AccessTraceForAddress.hh index 2761d6de8..53b96856e 100644 --- a/src/mem/ruby/profiler/AccessTraceForAddress.hh +++ b/src/mem/ruby/profiler/AccessTraceForAddress.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,77 +26,60 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - * Description: - * - */ +#ifndef __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ +#define __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ -#ifndef ACCESSTRACEFORADDRESS_H -#define ACCESSTRACEFORADDRESS_H - -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/common/Address.hh" -#include "mem/protocol/CacheRequestType.hh" #include "mem/protocol/AccessModeType.hh" -#include "mem/ruby/system/NodeID.hh" +#include "mem/protocol/CacheRequestType.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Set.hh" +#include "mem/ruby/system/NodeID.hh" + class Histogram; -class AccessTraceForAddress { -public: - // Constructors - AccessTraceForAddress(); - explicit AccessTraceForAddress(const Address& addr); +class AccessTraceForAddress +{ + public: + AccessTraceForAddress(); + explicit AccessTraceForAddress(const Address& addr); + ~AccessTraceForAddress(); - // Destructor - ~AccessTraceForAddress(); + void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, + bool sharing_miss); + int getTotal() const; + int getSharing() const { return m_sharing; } + int getTouchedBy() const { return m_touched_by.count(); } + const Address& getAddress() const { return m_addr; } + void addSample(int value); - // Public Methods + void print(ostream& out) const; - void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss); - int getTotal() const; - int getSharing() const { return m_sharing; } - int getTouchedBy() const { return m_touched_by.count(); } - const Address& getAddress() const { return m_addr; } - void addSample(int value); - - void print(ostream& out) const; -private: - // Private Methods - - // Private copy constructor and assignment operator - // AccessTraceForAddress(const AccessTraceForAddress& obj); - // AccessTraceForAddress& operator=(const AccessTraceForAddress& obj); - - // Data Members (m_ prefix) - - Address m_addr; - uint64 m_loads; - uint64 m_stores; - uint64 m_atomics; - uint64 m_total; - uint64 m_user; - uint64 m_sharing; - Set m_touched_by; - Histogram* m_histogram_ptr; + private: + Address m_addr; + uint64 m_loads; + uint64 m_stores; + uint64 m_atomics; + uint64 m_total; + uint64 m_user; + uint64 m_sharing; + Set m_touched_by; + Histogram* m_histogram_ptr; }; -bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2); - -// Output operator declaration -ostream& operator<<(ostream& out, const AccessTraceForAddress& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const AccessTraceForAddress& obj) +inline bool +node_less_then_eq(const AccessTraceForAddress* n1, + const AccessTraceForAddress* n2) { - obj.print(out); - out << flush; - return out; + return n1->getTotal() > n2->getTotal(); } -#endif //ACCESSTRACEFORADDRESS_H +inline ostream& +operator<<(ostream& out, const AccessTraceForAddress& obj) +{ + obj.print(out); + out << flush; + return out; +} + +#endif // __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc index c613431ca..2d7d655c0 100644 --- a/src/mem/ruby/profiler/AddressProfiler.cc +++ b/src/mem/ruby/profiler/AddressProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,272 +26,293 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * AddressProfiler.cc - * - * Description: See AddressProfiler.hh - * - * $Id$ - * - */ - -#include "mem/ruby/profiler/AddressProfiler.hh" +#include "mem/gems_common/Map.hh" +#include "mem/gems_common/PrioHeap.hh" #include "mem/protocol/CacheMsg.hh" #include "mem/ruby/profiler/AccessTraceForAddress.hh" -#include "mem/gems_common/PrioHeap.hh" -#include "mem/gems_common/Map.hh" -#include "mem/ruby/system/System.hh" +#include "mem/ruby/profiler/AddressProfiler.hh" #include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" + +typedef AddressProfiler::AddressMap AddressMap; // Helper functions -static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, - Map* record_map); +AccessTraceForAddress& +lookupTraceForAddress(const Address& addr, AddressMap* record_map) +{ + if (!record_map->exist(addr)) { + record_map->add(addr, AccessTraceForAddress(addr)); + } + return record_map->lookup(addr); +} -static void printSorted(ostream& out, - int num_of_sequencers, - const Map* record_map, - string description); +void +printSorted(ostream& out, int num_of_sequencers, const AddressMap* record_map, + string description) +{ + const int records_printed = 100; + + uint64 misses = 0; + PrioHeap heap; + Vector
keys = record_map->keys(); + for (int i = 0; i < keys.size(); i++) { + AccessTraceForAddress* record = &(record_map->lookup(keys[i])); + misses += record->getTotal(); + heap.insert(record); + } + + out << "Total_entries_" << description << ": " << keys.size() << endl; + if (g_system_ptr->getProfiler()->getAllInstructions()) + out << "Total_Instructions_" << description << ": " << misses << endl; + else + out << "Total_data_misses_" << description << ": " << misses << endl; + + out << "total | load store atomic | user supervisor | sharing | touched-by" + << endl; + + Histogram remaining_records(1, 100); + Histogram all_records(1, 100); + Histogram remaining_records_log(-1); + Histogram all_records_log(-1); + + // Allows us to track how many lines where touched by n processors + Vector m_touched_vec; + Vector m_touched_weighted_vec; + m_touched_vec.setSize(num_of_sequencers+1); + m_touched_weighted_vec.setSize(num_of_sequencers+1); + for (int i = 0; i < m_touched_vec.size(); i++) { + m_touched_vec[i] = 0; + m_touched_weighted_vec[i] = 0; + } + + int counter = 0; + while (heap.size() > 0 && counter < records_printed) { + AccessTraceForAddress* record = heap.extractMin(); + double percent = 100.0 * (record->getTotal() / double(misses)); + out << description << " | " << percent << " % " << *record << endl; + all_records.add(record->getTotal()); + all_records_log.add(record->getTotal()); + counter++; + m_touched_vec[record->getTouchedBy()]++; + m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); + } + + while (heap.size() > 0) { + AccessTraceForAddress* record = heap.extractMin(); + all_records.add(record->getTotal()); + remaining_records.add(record->getTotal()); + all_records_log.add(record->getTotal()); + remaining_records_log.add(record->getTotal()); + m_touched_vec[record->getTouchedBy()]++; + m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); + } + out << endl; + out << "all_records_" << description << ": " + << all_records << endl + << "all_records_log_" << description << ": " + << all_records_log << endl + << "remaining_records_" << description << ": " + << remaining_records << endl + << "remaining_records_log_" << description << ": " + << remaining_records_log << endl + << "touched_by_" << description << ": " + << m_touched_vec << endl + << "touched_by_weighted_" << description << ": " + << m_touched_weighted_vec << endl + << endl; +} AddressProfiler::AddressProfiler(int num_of_sequencers) { - m_dataAccessTrace = new Map; - m_macroBlockAccessTrace = new Map; - m_programCounterAccessTrace = new Map; - m_retryProfileMap = new Map; - m_num_of_sequencers = num_of_sequencers; - clearStats(); + m_dataAccessTrace = new AddressMap; + m_macroBlockAccessTrace = new AddressMap; + m_programCounterAccessTrace = new AddressMap; + m_retryProfileMap = new AddressMap; + m_num_of_sequencers = num_of_sequencers; + clearStats(); } AddressProfiler::~AddressProfiler() { - delete m_dataAccessTrace; - delete m_macroBlockAccessTrace; - delete m_programCounterAccessTrace; - delete m_retryProfileMap; + delete m_dataAccessTrace; + delete m_macroBlockAccessTrace; + delete m_programCounterAccessTrace; + delete m_retryProfileMap; } -void AddressProfiler::setHotLines(bool hot_lines){ - m_hot_lines = hot_lines; -} -void AddressProfiler::setAllInstructions(bool all_instructions){ - m_all_instructions = all_instructions; -} - -void AddressProfiler::printStats(ostream& out) const +void +AddressProfiler::setHotLines(bool hot_lines) { - if (m_hot_lines) { - out << endl; - out << "AddressProfiler Stats" << endl; - out << "---------------------" << endl; - - out << endl; - out << "sharing_misses: " << m_sharing_miss_counter << endl; - out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl; - out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl; - - out << endl; - out << "Hot Data Blocks" << endl; - out << "---------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_dataAccessTrace, "block_address"); - - out << endl; - out << "Hot MacroData Blocks" << endl; - out << "--------------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, "macroblock_address"); - - out << "Hot Instructions" << endl; - out << "----------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address"); - } - - if (m_all_instructions){ - out << endl; - out << "All Instructions Profile:" << endl; - out << "-------------------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address"); - out << endl; - } - - if (m_retryProfileHisto.size() > 0) { - out << "Retry Profile" << endl; - out << "-------------" << endl; - out << endl; - out << "retry_histogram_absolute: " << m_retryProfileHisto << endl; - out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl; - out << "retry_histogram_read: " << m_retryProfileHistoRead << endl; - - out << "retry_histogram_percent: "; - m_retryProfileHisto.printPercent(out); - out << endl; - - printSorted(out, m_num_of_sequencers, m_retryProfileMap, "block_address"); - out << endl; - } - + m_hot_lines = hot_lines; } -void AddressProfiler::clearStats() +void +AddressProfiler::setAllInstructions(bool all_instructions) { - // Clear the maps - m_sharing_miss_counter = 0; - m_dataAccessTrace->clear(); - m_macroBlockAccessTrace->clear(); - m_programCounterAccessTrace->clear(); - m_retryProfileMap->clear(); - m_retryProfileHisto.clear(); - m_retryProfileHistoRead.clear(); - m_retryProfileHistoWrite.clear(); - m_getx_sharing_histogram.clear(); - m_gets_sharing_histogram.clear(); + m_all_instructions = all_instructions; } -void AddressProfiler::profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) +void +AddressProfiler::printStats(ostream& out) const { - Set indirection_set; - indirection_set.addSet(sharers); - indirection_set.addSet(owner); - indirection_set.remove(requestor); - int num_indirections = indirection_set.count(); + if (m_hot_lines) { + out << endl; + out << "AddressProfiler Stats" << endl; + out << "---------------------" << endl; - m_getx_sharing_histogram.add(num_indirections); - bool indirection_miss = (num_indirections > 0); + out << endl; + out << "sharing_misses: " << m_sharing_miss_counter << endl; + out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl; + out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl; - addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), requestor, indirection_miss); -} + out << endl; + out << "Hot Data Blocks" << endl; + out << "---------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_dataAccessTrace, + "block_address"); -void AddressProfiler::profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) -{ - Set indirection_set; - indirection_set.addSet(owner); - indirection_set.remove(requestor); - int num_indirections = indirection_set.count(); + out << endl; + out << "Hot MacroData Blocks" << endl; + out << "--------------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, + "macroblock_address"); - m_gets_sharing_histogram.add(num_indirections); - bool indirection_miss = (num_indirections > 0); - - addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), requestor, indirection_miss); -} - -void AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss) -{ - if (m_all_instructions) { - if (sharing_miss) { - m_sharing_miss_counter++; + out << "Hot Instructions" << endl; + out << "----------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, + "pc_address"); } - // record data address trace info - data_addr.makeLineAddress(); - lookupTraceForAddress(data_addr, m_dataAccessTrace).update(type, access_mode, id, sharing_miss); + if (m_all_instructions) { + out << endl; + out << "All Instructions Profile:" << endl; + out << "-------------------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, + "pc_address"); + out << endl; + } - // record macro data address trace info - Address macro_addr(data_addr.maskLowOrderBits(10)); // 6 for datablock, 4 to make it 16x more coarse - lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).update(type, access_mode, id, sharing_miss); + if (m_retryProfileHisto.size() > 0) { + out << "Retry Profile" << endl; + out << "-------------" << endl; + out << endl; + out << "retry_histogram_absolute: " << m_retryProfileHisto << endl; + out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl; + out << "retry_histogram_read: " << m_retryProfileHistoRead << endl; - // record program counter address trace info - lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss); - } + out << "retry_histogram_percent: "; + m_retryProfileHisto.printPercent(out); + out << endl; - if (m_all_instructions) { - // This code is used if the address profiler is an all-instructions profiler - // record program counter address trace info - lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss); - } + printSorted(out, m_num_of_sequencers, m_retryProfileMap, + "block_address"); + out << endl; + } } -void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, int count) +void +AddressProfiler::clearStats() { - m_retryProfileHisto.add(count); - if (type == AccessType_Read) { - m_retryProfileHistoRead.add(count); - } else { - m_retryProfileHistoWrite.add(count); - } - if (count > 1) { - lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count); - } + // Clear the maps + m_sharing_miss_counter = 0; + m_dataAccessTrace->clear(); + m_macroBlockAccessTrace->clear(); + m_programCounterAccessTrace->clear(); + m_retryProfileMap->clear(); + m_retryProfileHisto.clear(); + m_retryProfileHistoRead.clear(); + m_retryProfileHistoWrite.clear(); + m_getx_sharing_histogram.clear(); + m_gets_sharing_histogram.clear(); } -// ***** Normal Functions ****** - -static void printSorted(ostream& out, - int num_of_sequencers, - const Map* record_map, - string description) +void +AddressProfiler::profileGetX(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, + NodeID requestor) { - const int records_printed = 100; + Set indirection_set; + indirection_set.addSet(sharers); + indirection_set.addSet(owner); + indirection_set.remove(requestor); + int num_indirections = indirection_set.count(); - uint64 misses = 0; - PrioHeap heap; - Vector
keys = record_map->keys(); - for(int i=0; ilookup(keys[i])); - misses += record->getTotal(); - heap.insert(record); - } + m_getx_sharing_histogram.add(num_indirections); + bool indirection_miss = (num_indirections > 0); - out << "Total_entries_" << description << ": " << keys.size() << endl; - if (g_system_ptr->getProfiler()->getAllInstructions()) - out << "Total_Instructions_" << description << ": " << misses << endl; - else - out << "Total_data_misses_" << description << ": " << misses << endl; - - out << "total | load store atomic | user supervisor | sharing | touched-by" << endl; - - Histogram remaining_records(1, 100); - Histogram all_records(1, 100); - Histogram remaining_records_log(-1); - Histogram all_records_log(-1); - - // Allows us to track how many lines where touched by n processors - Vector m_touched_vec; - Vector m_touched_weighted_vec; - m_touched_vec.setSize(num_of_sequencers+1); - m_touched_weighted_vec.setSize(num_of_sequencers+1); - for (int i=0; i 0) && (counter < records_printed)) { - AccessTraceForAddress* record = heap.extractMin(); - double percent = 100.0*(record->getTotal()/double(misses)); - out << description << " | " << percent << " % " << *record << endl; - all_records.add(record->getTotal()); - all_records_log.add(record->getTotal()); - counter++; - m_touched_vec[record->getTouchedBy()]++; - m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); - } - - while(heap.size() > 0) { - AccessTraceForAddress* record = heap.extractMin(); - all_records.add(record->getTotal()); - remaining_records.add(record->getTotal()); - all_records_log.add(record->getTotal()); - remaining_records_log.add(record->getTotal()); - m_touched_vec[record->getTouchedBy()]++; - m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); - } - out << endl; - out << "all_records_" << description << ": " << all_records << endl; - out << "all_records_log_" << description << ": " << all_records_log << endl; - out << "remaining_records_" << description << ": " << remaining_records << endl; - out << "remaining_records_log_" << description << ": " << remaining_records_log << endl; - out << "touched_by_" << description << ": " << m_touched_vec << endl; - out << "touched_by_weighted_" << description << ": " << m_touched_weighted_vec << endl; - out << endl; + addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), + requestor, indirection_miss); } -static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, Map* record_map) +void +AddressProfiler::profileGetS(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, + NodeID requestor) { - if(record_map->exist(addr) == false){ - record_map->add(addr, AccessTraceForAddress(addr)); - } - return record_map->lookup(addr); + Set indirection_set; + indirection_set.addSet(owner); + indirection_set.remove(requestor); + int num_indirections = indirection_set.count(); + + m_gets_sharing_histogram.add(num_indirections); + bool indirection_miss = (num_indirections > 0); + + addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), + requestor, indirection_miss); +} + +void +AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, + CacheRequestType type, + AccessModeType access_mode, NodeID id, + bool sharing_miss) +{ + if (m_all_instructions) { + if (sharing_miss) { + m_sharing_miss_counter++; + } + + // record data address trace info + data_addr.makeLineAddress(); + lookupTraceForAddress(data_addr, m_dataAccessTrace). + update(type, access_mode, id, sharing_miss); + + // record macro data address trace info + + // 6 for datablock, 4 to make it 16x more coarse + Address macro_addr(data_addr.maskLowOrderBits(10)); + lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace). + update(type, access_mode, id, sharing_miss); + + // record program counter address trace info + lookupTraceForAddress(pc_addr, m_programCounterAccessTrace). + update(type, access_mode, id, sharing_miss); + } + + if (m_all_instructions) { + // This code is used if the address profiler is an + // all-instructions profiler record program counter address + // trace info + lookupTraceForAddress(pc_addr, m_programCounterAccessTrace). + update(type, access_mode, id, sharing_miss); + } +} + +void +AddressProfiler::profileRetry(const Address& data_addr, AccessType type, + int count) +{ + m_retryProfileHisto.add(count); + if (type == AccessType_Read) { + m_retryProfileHistoRead.add(count); + } else { + m_retryProfileHistoWrite.add(count); + } + if (count > 1) { + lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count); + } } diff --git a/src/mem/ruby/profiler/AddressProfiler.hh b/src/mem/ruby/profiler/AddressProfiler.hh index 177aa56d6..76dac323f 100644 --- a/src/mem/ruby/profiler/AddressProfiler.hh +++ b/src/mem/ruby/profiler/AddressProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,89 +26,77 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * AddressProfiler.hh - * - * Description: - * - * $Id$ - * - */ +#ifndef __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ +#define __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ -#ifndef ADDRESSPROFILER_H -#define ADDRESSPROFILER_H - -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/system/NodeID.hh" -#include "mem/ruby/common/Histogram.hh" -#include "mem/ruby/common/Address.hh" -#include "mem/protocol/CacheMsg.hh" #include "mem/protocol/AccessType.hh" +#include "mem/protocol/CacheMsg.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Global.hh" +#include "mem/ruby/common/Histogram.hh" +#include "mem/ruby/system/NodeID.hh" class AccessTraceForAddress; class Set; template class Map; -class AddressProfiler { -public: - // Constructors - AddressProfiler(int num_of_sequencers); +class AddressProfiler +{ + public: + typedef Map AddressMap; - // Destructor - ~AddressProfiler(); + public: + AddressProfiler(int num_of_sequencers); + ~AddressProfiler(); - // Public Methods - void printStats(ostream& out) const; - void clearStats(); + void printStats(ostream& out) const; + void clearStats(); - void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss); - void profileRetry(const Address& data_addr, AccessType type, int count); - void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); - void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); + void addTraceSample(Address data_addr, Address pc_addr, + CacheRequestType type, AccessModeType access_mode, + NodeID id, bool sharing_miss); + void profileRetry(const Address& data_addr, AccessType type, int count); + void profileGetX(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, NodeID requestor); + void profileGetS(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, NodeID requestor); - void print(ostream& out) const; + void print(ostream& out) const; - //added by SS - void setHotLines(bool hot_lines); - void setAllInstructions(bool all_instructions); -private: - // Private Methods + //added by SS + void setHotLines(bool hot_lines); + void setAllInstructions(bool all_instructions); - // Private copy constructor and assignment operator - AddressProfiler(const AddressProfiler& obj); - AddressProfiler& operator=(const AddressProfiler& obj); + private: + // Private copy constructor and assignment operator + AddressProfiler(const AddressProfiler& obj); + AddressProfiler& operator=(const AddressProfiler& obj); - // Data Members (m_ prefix) - int64 m_sharing_miss_counter; + int64 m_sharing_miss_counter; - Map* m_dataAccessTrace; - Map* m_macroBlockAccessTrace; - Map* m_programCounterAccessTrace; - Map* m_retryProfileMap; - Histogram m_retryProfileHisto; - Histogram m_retryProfileHistoWrite; - Histogram m_retryProfileHistoRead; - Histogram m_getx_sharing_histogram; - Histogram m_gets_sharing_histogram; -//added by SS - bool m_hot_lines; - bool m_all_instructions; + AddressMap* m_dataAccessTrace; + AddressMap* m_macroBlockAccessTrace; + AddressMap* m_programCounterAccessTrace; + AddressMap* m_retryProfileMap; + Histogram m_retryProfileHisto; + Histogram m_retryProfileHistoWrite; + Histogram m_retryProfileHistoRead; + Histogram m_getx_sharing_histogram; + Histogram m_gets_sharing_histogram; - int m_num_of_sequencers; + //added by SS + bool m_hot_lines; + bool m_all_instructions; + + int m_num_of_sequencers; }; -// Output operator declaration -ostream& operator<<(ostream& out, const AddressProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const AddressProfiler& obj) +inline ostream& +operator<<(ostream& out, const AddressProfiler& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } -#endif //ADDRESSPROFILER_H +#endif // __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ diff --git a/src/mem/ruby/profiler/CacheProfiler.cc b/src/mem/ruby/profiler/CacheProfiler.cc index 50581fcf9..9d12a46ab 100644 --- a/src/mem/ruby/profiler/CacheProfiler.cc +++ b/src/mem/ruby/profiler/CacheProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,111 +26,113 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * CacheProfiler.C - * - * Description: See CacheProfiler.hh - * - * $Id$ - * - */ - -#include "mem/ruby/profiler/CacheProfiler.hh" -#include "mem/ruby/profiler/AccessTraceForAddress.hh" #include "mem/gems_common/PrioHeap.hh" -#include "mem/ruby/system/System.hh" -#include "mem/ruby/profiler/Profiler.hh" #include "mem/gems_common/Vector.hh" +#include "mem/ruby/profiler/AccessTraceForAddress.hh" +#include "mem/ruby/profiler/CacheProfiler.hh" +#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" CacheProfiler::CacheProfiler(const string& description) { - m_description = description; - m_requestTypeVec_ptr = new Vector; - m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM)); + m_description = description; + m_requestTypeVec_ptr = new Vector; + m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM)); - clearStats(); + clearStats(); } CacheProfiler::~CacheProfiler() { - delete m_requestTypeVec_ptr; + delete m_requestTypeVec_ptr; } -void CacheProfiler::printStats(ostream& out) const +void +CacheProfiler::printStats(ostream& out) const { - out << "Cache Stats: " << m_description << endl; - string description = " " + m_description; - - out << description << "_total_misses: " << m_misses << endl; - out << description << "_total_demand_misses: " << m_demand_misses << endl; - out << description << "_total_prefetches: " << m_prefetches << endl; - out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl; - out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl; - out << endl; - - int requests = 0; - - for(int i=0; iref(i); - } - - assert(m_misses == requests); - - if (requests > 0) { - for(int i=0; iref(i) > 0) { - out << description << "_request_type_" << CacheRequestType_to_string(CacheRequestType(i)) << ": " - << (100.0 * double((m_requestTypeVec_ptr->ref(i)))) / double(requests) - << "%" << endl; - } - } + out << "Cache Stats: " << m_description << endl; + string description = " " + m_description; + out << description << "_total_misses: " << m_misses << endl; + out << description << "_total_demand_misses: " << m_demand_misses << endl; + out << description << "_total_prefetches: " << m_prefetches << endl; + out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl; + out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl; out << endl; - for(int i=0; i 0) { - out << description << "_access_mode_type_" << (AccessModeType) i << ": " << m_accessModeTypeHistogram[i] - << " " << (100.0 * m_accessModeTypeHistogram[i]) / requests << "%" << endl; - } + int requests = 0; + + for (int i = 0; i < int(CacheRequestType_NUM); i++) { + requests += m_requestTypeVec_ptr->ref(i); } - } - out << description << "_request_size: " << m_requestSize << endl; - out << endl; + assert(m_misses == requests); + if (requests > 0) { + for (int i = 0; i < int(CacheRequestType_NUM); i++) { + if (m_requestTypeVec_ptr->ref(i) > 0) { + out << description << "_request_type_" + << CacheRequestType_to_string(CacheRequestType(i)) + << ": " + << 100.0 * (double)m_requestTypeVec_ptr->ref(i) / + (double)requests + << "%" << endl; + } + } + + out << endl; + + for (int i = 0; i < AccessModeType_NUM; i++){ + if (m_accessModeTypeHistogram[i] > 0) { + out << description << "_access_mode_type_" + << (AccessModeType) i << ": " + << m_accessModeTypeHistogram[i] << " " + << 100.0 * m_accessModeTypeHistogram[i] / requests + << "%" << endl; + } + } + } + + out << description << "_request_size: " << m_requestSize << endl; + out << endl; } -void CacheProfiler::clearStats() +void +CacheProfiler::clearStats() { - for(int i=0; iref(i) = 0; - } - m_requestSize.clear(); - m_misses = 0; - m_demand_misses = 0; - m_prefetches = 0; - m_sw_prefetches = 0; - m_hw_prefetches = 0; - for(int i=0; iref(i) = 0; + } + m_requestSize.clear(); + m_misses = 0; + m_demand_misses = 0; + m_prefetches = 0; + m_sw_prefetches = 0; + m_hw_prefetches = 0; + for (int i = 0; i < AccessModeType_NUM; i++) { + m_accessModeTypeHistogram[i] = 0; + } } -void CacheProfiler::addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit) +void +CacheProfiler::addStatSample(CacheRequestType requestType, + AccessModeType type, int msgSize, + PrefetchBit pfBit) { - m_misses++; + m_misses++; - m_requestTypeVec_ptr->ref(requestType)++; + m_requestTypeVec_ptr->ref(requestType)++; - m_accessModeTypeHistogram[type]++; - m_requestSize.add(msgSize); - if (pfBit == PrefetchBit_No) { - m_demand_misses++; - } else if (pfBit == PrefetchBit_Yes) { - m_prefetches++; - m_sw_prefetches++; - } else { // must be L1_HW || L2_HW prefetch - m_prefetches++; - m_hw_prefetches++; - } + m_accessModeTypeHistogram[type]++; + m_requestSize.add(msgSize); + if (pfBit == PrefetchBit_No) { + m_demand_misses++; + } else if (pfBit == PrefetchBit_Yes) { + m_prefetches++; + m_sw_prefetches++; + } else { + // must be L1_HW || L2_HW prefetch + m_prefetches++; + m_hw_prefetches++; + } } diff --git a/src/mem/ruby/profiler/CacheProfiler.hh b/src/mem/ruby/profiler/CacheProfiler.hh index 11f189148..7dcdf57f0 100644 --- a/src/mem/ruby/profiler/CacheProfiler.hh +++ b/src/mem/ruby/profiler/CacheProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,77 +26,58 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * CacheProfiler.hh - * - * Description: - * - * $Id$ - * - */ - -#ifndef CACHEPROFILER_H -#define CACHEPROFILER_H +#ifndef __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ +#define __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ #include #include -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/system/NodeID.hh" -#include "mem/ruby/common/Histogram.hh" #include "mem/protocol/AccessModeType.hh" -#include "mem/protocol/PrefetchBit.hh" #include "mem/protocol/CacheRequestType.hh" +#include "mem/protocol/PrefetchBit.hh" +#include "mem/ruby/common/Global.hh" +#include "mem/ruby/common/Histogram.hh" +#include "mem/ruby/system/NodeID.hh" template class Vector; -class CacheProfiler { -public: - // Constructors - CacheProfiler(const std::string& description); +class CacheProfiler +{ + public: + CacheProfiler(const std::string& description); + ~CacheProfiler(); - // Destructor - ~CacheProfiler(); + void printStats(std::ostream& out) const; + void clearStats(); - // Public Methods - void printStats(std::ostream& out) const; - void clearStats(); + void addStatSample(CacheRequestType requestType, AccessModeType type, + int msgSize, PrefetchBit pfBit); - void addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit); + void print(std::ostream& out) const; - void print(std::ostream& out) const; -private: - // Private Methods + private: + // Private copy constructor and assignment operator + CacheProfiler(const CacheProfiler& obj); + CacheProfiler& operator=(const CacheProfiler& obj); - // Private copy constructor and assignment operator - CacheProfiler(const CacheProfiler& obj); - CacheProfiler& operator=(const CacheProfiler& obj); + std::string m_description; + Histogram m_requestSize; + int64 m_misses; + int64 m_demand_misses; + int64 m_prefetches; + int64 m_sw_prefetches; + int64 m_hw_prefetches; + int64 m_accessModeTypeHistogram[AccessModeType_NUM]; - // Data Members (m_ prefix) - std::string m_description; - Histogram m_requestSize; - int64 m_misses; - int64 m_demand_misses; - int64 m_prefetches; - int64 m_sw_prefetches; - int64 m_hw_prefetches; - int64 m_accessModeTypeHistogram[AccessModeType_NUM]; - - Vector < int >* m_requestTypeVec_ptr; + Vector * m_requestTypeVec_ptr; }; -// Output operator declaration -std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj) +inline std::ostream& +operator<<(std::ostream& out, const CacheProfiler& obj) { - obj.print(out); - out << std::flush; - return out; + obj.print(out); + out << std::flush; + return out; } -#endif //CACHEPROFILER_H +#endif // __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.cc b/src/mem/ruby/profiler/MemCntrlProfiler.cc index b41d7de78..e25719666 100644 --- a/src/mem/ruby/profiler/MemCntrlProfiler.cc +++ b/src/mem/ruby/profiler/MemCntrlProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -32,19 +31,14 @@ using namespace std; MemCntrlProfiler::MemCntrlProfiler(const string& description, - int banks_per_rank, - int ranks_per_dimm, - int dimms_per_channel) + int banks_per_rank, int ranks_per_dimm, int dimms_per_channel) { m_description = description; m_banks_per_rank = banks_per_rank; m_ranks_per_dimm = ranks_per_dimm; m_dimms_per_channel = dimms_per_channel; - int totalBanks = banks_per_rank * - ranks_per_dimm * - dimms_per_channel; - + int totalBanks = banks_per_rank * ranks_per_dimm * dimms_per_channel; m_memBankCount.setSize(totalBanks); clearStats(); @@ -54,50 +48,65 @@ MemCntrlProfiler::~MemCntrlProfiler() { } -void MemCntrlProfiler::printStats(ostream& out) const +void +MemCntrlProfiler::printStats(ostream& out) const { - if (m_memReq || m_memRefresh) { // if there's a memory controller at all - uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; - double stallsPerReq = total_stalls * 1.0 / m_memReq; - out << "Memory controller: " << m_description << ":" << endl; - out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes - out << " memory_reads: " << m_memRead << endl; - out << " memory_writes: " << m_memWrite << endl; - out << " memory_refreshes: " << m_memRefresh << endl; - out << " memory_total_request_delays: " << total_stalls << endl; - out << " memory_delays_per_request: " << stallsPerReq << endl; - out << " memory_delays_in_input_queue: " << m_memInputQ << endl; - out << " memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl; - out << " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl; - // Note: The following "memory stalls" entries are a breakdown of the - // cycles which already showed up in m_memWaitCycles. The order is - // significant; it is the priority of attributing the cycles. - // For example, bank_busy is before arbitration because if the bank was - // busy, we didn't even check arbitration. - // Note: "not old enough" means that since we grouped waiting heads-of-queues - // into batches to avoid starvation, a request in a newer batch - // didn't try to arbitrate yet because there are older requests waiting. - out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl; - out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl; - out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl; - out << " memory_stalls_for_arbitration: " << m_memArbWait << endl; - out << " memory_stalls_for_bus: " << m_memBusBusy << endl; - out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl; - out << " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl; - out << " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl; - out << " accesses_per_bank: "; - for (int bank=0; bank < m_memBankCount.size(); bank++) { - out << m_memBankCount[bank] << " "; - } - } else { + if (!m_memReq && !m_memRefresh) { out << "Memory Controller: " << m_description - << " no stats recorded." << endl; - } + << " no stats recorded." << endl + << endl + << endl; + return; + } + + // if there's a memory controller at all + uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; + double stallsPerReq = total_stalls * 1.0 / m_memReq; + out << "Memory controller: " << m_description << ":" << endl; + + // does not include refreshes + out << " memory_total_requests: " << m_memReq << endl; + out << " memory_reads: " << m_memRead << endl; + out << " memory_writes: " << m_memWrite << endl; + out << " memory_refreshes: " << m_memRefresh << endl; + out << " memory_total_request_delays: " << total_stalls << endl; + out << " memory_delays_per_request: " << stallsPerReq << endl; + out << " memory_delays_in_input_queue: " << m_memInputQ << endl; + out << " memory_delays_behind_head_of_bank_queue: " + << m_memBankQ << endl; + out << " memory_delays_stalled_at_head_of_bank_queue: " + << m_memWaitCycles << endl; + + // Note: The following "memory stalls" entries are a breakdown of + // the cycles which already showed up in m_memWaitCycles. The + // order is significant; it is the priority of attributing the + // cycles. For example, bank_busy is before arbitration because + // if the bank was busy, we didn't even check arbitration. + // Note: "not old enough" means that since we grouped waiting + // heads-of-queues into batches to avoid starvation, a request in + // a newer batch didn't try to arbitrate yet because there are + // older requests waiting. + out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl; + out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl; + out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl; + out << " memory_stalls_for_arbitration: " << m_memArbWait << endl; + out << " memory_stalls_for_bus: " << m_memBusBusy << endl; + out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl; + out << " memory_stalls_for_read_write_turnaround: " + << m_memReadWriteBusy << endl; + out << " memory_stalls_for_read_read_turnaround: " + << m_memDataBusBusy << endl; + out << " accesses_per_bank: "; + + for (int bank = 0; bank < m_memBankCount.size(); bank++) { + out << m_memBankCount[bank] << " "; + } out << endl; out << endl; } -void MemCntrlProfiler::clearStats() +void +MemCntrlProfiler::clearStats() { m_memReq = 0; m_memBankBusy = 0; @@ -115,72 +124,100 @@ void MemCntrlProfiler::clearStats() m_memRandBusy = 0; m_memNotOld = 0; - for (int bank=0; - bank < m_memBankCount.size(); - bank++) { + for (int bank = 0; bank < m_memBankCount.size(); bank++) { m_memBankCount[bank] = 0; } } -void MemCntrlProfiler::profileMemReq(int bank) { - m_memReq++; - m_memBankCount[bank]++; +void +MemCntrlProfiler::profileMemReq(int bank) +{ + m_memReq++; + m_memBankCount[bank]++; } -void MemCntrlProfiler::profileMemBankBusy() { - m_memBankBusy++; +void +MemCntrlProfiler::profileMemBankBusy() +{ + m_memBankBusy++; } -void MemCntrlProfiler::profileMemBusBusy() { - m_memBusBusy++; +void +MemCntrlProfiler::profileMemBusBusy() +{ + m_memBusBusy++; } -void MemCntrlProfiler::profileMemReadWriteBusy() { - m_memReadWriteBusy++; +void +MemCntrlProfiler::profileMemReadWriteBusy() +{ + m_memReadWriteBusy++; } -void MemCntrlProfiler::profileMemDataBusBusy() { - m_memDataBusBusy++; +void +MemCntrlProfiler::profileMemDataBusBusy() +{ + m_memDataBusBusy++; } -void MemCntrlProfiler::profileMemTfawBusy() { - m_memTfawBusy++; +void +MemCntrlProfiler::profileMemTfawBusy() +{ + m_memTfawBusy++; } -void MemCntrlProfiler::profileMemRefresh() { - m_memRefresh++; +void +MemCntrlProfiler::profileMemRefresh() +{ + m_memRefresh++; } -void MemCntrlProfiler::profileMemRead() { - m_memRead++; +void +MemCntrlProfiler::profileMemRead() +{ + m_memRead++; } -void MemCntrlProfiler::profileMemWrite() { - m_memWrite++; +void +MemCntrlProfiler::profileMemWrite() +{ + m_memWrite++; } -void MemCntrlProfiler::profileMemWaitCycles(int cycles) { - m_memWaitCycles += cycles; +void +MemCntrlProfiler::profileMemWaitCycles(int cycles) +{ + m_memWaitCycles += cycles; } -void MemCntrlProfiler::profileMemInputQ(int cycles) { - m_memInputQ += cycles; +void +MemCntrlProfiler::profileMemInputQ(int cycles) +{ + m_memInputQ += cycles; } -void MemCntrlProfiler::profileMemBankQ(int cycles) { - m_memBankQ += cycles; +void +MemCntrlProfiler::profileMemBankQ(int cycles) +{ + m_memBankQ += cycles; } -void MemCntrlProfiler::profileMemArbWait(int cycles) { - m_memArbWait += cycles; +void +MemCntrlProfiler::profileMemArbWait(int cycles) +{ + m_memArbWait += cycles; } -void MemCntrlProfiler::profileMemRandBusy() { - m_memRandBusy++; +void +MemCntrlProfiler::profileMemRandBusy() +{ + m_memRandBusy++; } -void MemCntrlProfiler::profileMemNotOld() { - m_memNotOld++; +void +MemCntrlProfiler::profileMemNotOld() +{ + m_memNotOld++; } diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.hh b/src/mem/ruby/profiler/MemCntrlProfiler.hh index ebedd5185..85c39e0ad 100644 --- a/src/mem/ruby/profiler/MemCntrlProfiler.hh +++ b/src/mem/ruby/profiler/MemCntrlProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,17 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * MemCntrlProfiler.hh - * - * Description: - * - * $Id$ - * - */ - -#ifndef MEM_CNTRL_PROFILER_H -#define MEM_CNTRL_PROFILER_H +#ifndef __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ +#define __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ #include #include @@ -47,80 +37,67 @@ template class Vector; -class MemCntrlProfiler { -public: - // Constructors - MemCntrlProfiler(const std::string& description, - int banks_per_rank, - int ranks_per_dimm, - int dimms_per_channel); +class MemCntrlProfiler +{ + public: + MemCntrlProfiler(const std::string& description, int banks_per_rank, + int ranks_per_dimm, int dimms_per_channel); + ~MemCntrlProfiler(); - // Destructor - ~MemCntrlProfiler(); + void printStats(std::ostream& out) const; + void clearStats(); - // Public Methods - void printStats(std::ostream& out) const; - void clearStats(); + void profileMemReq(int bank); + void profileMemBankBusy(); + void profileMemBusBusy(); + void profileMemTfawBusy(); + void profileMemReadWriteBusy(); + void profileMemDataBusBusy(); + void profileMemRefresh(); + void profileMemRead(); + void profileMemWrite(); + void profileMemWaitCycles(int cycles); + void profileMemInputQ(int cycles); + void profileMemBankQ(int cycles); + void profileMemArbWait(int cycles); + void profileMemRandBusy(); + void profileMemNotOld(); - void profileMemReq(int bank); - void profileMemBankBusy(); - void profileMemBusBusy(); - void profileMemTfawBusy(); - void profileMemReadWriteBusy(); - void profileMemDataBusBusy(); - void profileMemRefresh(); - void profileMemRead(); - void profileMemWrite(); - void profileMemWaitCycles(int cycles); - void profileMemInputQ(int cycles); - void profileMemBankQ(int cycles); - void profileMemArbWait(int cycles); - void profileMemRandBusy(); - void profileMemNotOld(); + void print(std::ostream& out) const; - void print(std::ostream& out) const; private: - // Private Methods + // Private copy constructor and assignment operator + MemCntrlProfiler(const MemCntrlProfiler& obj); + MemCntrlProfiler& operator=(const MemCntrlProfiler& obj); - // Private copy constructor and assignment operator - MemCntrlProfiler(const MemCntrlProfiler& obj); - MemCntrlProfiler& operator=(const MemCntrlProfiler& obj); - - // Data Members (m_ prefix) - std::string m_description; - uint64 m_memReq; - uint64 m_memBankBusy; - uint64 m_memBusBusy; - uint64 m_memTfawBusy; - uint64 m_memReadWriteBusy; - uint64 m_memDataBusBusy; - uint64 m_memRefresh; - uint64 m_memRead; - uint64 m_memWrite; - uint64 m_memWaitCycles; - uint64 m_memInputQ; - uint64 m_memBankQ; - uint64 m_memArbWait; - uint64 m_memRandBusy; - uint64 m_memNotOld; - Vector m_memBankCount; - int m_banks_per_rank; - int m_ranks_per_dimm; - int m_dimms_per_channel; + std::string m_description; + uint64 m_memReq; + uint64 m_memBankBusy; + uint64 m_memBusBusy; + uint64 m_memTfawBusy; + uint64 m_memReadWriteBusy; + uint64 m_memDataBusBusy; + uint64 m_memRefresh; + uint64 m_memRead; + uint64 m_memWrite; + uint64 m_memWaitCycles; + uint64 m_memInputQ; + uint64 m_memBankQ; + uint64 m_memArbWait; + uint64 m_memRandBusy; + uint64 m_memNotOld; + Vector m_memBankCount; + int m_banks_per_rank; + int m_ranks_per_dimm; + int m_dimms_per_channel; }; -// Output operator declaration -std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj) +inline std::ostream& +operator<<(std::ostream& out, const MemCntrlProfiler& obj) { - obj.print(out); - out << std::flush; - return out; + obj.print(out); + out << std::flush; + return out; } -#endif //MEM_CNTRL_PROFILER_H +#endif // __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index 365f6cf42..2cc3eddfc 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -42,34 +42,24 @@ ---------------------------------------------------------------------- */ -/* - * Profiler.cc - * - * Description: See Profiler.hh - * - * $Id$ - * - */ - // Allows use of times() library call, which determines virtual runtime #include #include -#include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/profiler/AddressProfiler.hh" -#include "mem/ruby/system/System.hh" -#include "mem/ruby/network/Network.hh" -#include "mem/gems_common/PrioHeap.hh" -#include "mem/protocol/CacheMsg.hh" -#include "mem/protocol/Protocol.hh" -#include "mem/gems_common/util.hh" #include "mem/gems_common/Map.hh" -#include "mem/ruby/common/Debug.hh" +#include "mem/gems_common/PrioHeap.hh" +#include "mem/gems_common/util.hh" +#include "mem/protocol/CacheMsg.hh" #include "mem/protocol/MachineType.hh" - +#include "mem/protocol/Protocol.hh" +#include "mem/ruby/common/Debug.hh" +#include "mem/ruby/network/Network.hh" +#include "mem/ruby/profiler/AddressProfiler.hh" +#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" #include "mem/ruby/system/System.hh" -extern std::ostream * debug_cout_ptr; +extern std::ostream* debug_cout_ptr; static double process_memory_total(); static double process_memory_resident(); @@ -77,570 +67,623 @@ static double process_memory_resident(); Profiler::Profiler(const Params *p) : SimObject(p) { - m_requestProfileMap_ptr = new Map; + m_requestProfileMap_ptr = new Map; - m_inst_profiler_ptr = NULL; - m_address_profiler_ptr = NULL; + m_inst_profiler_ptr = NULL; + m_address_profiler_ptr = NULL; - m_real_time_start_time = time(NULL); // Not reset in clearStats() - m_stats_period = 1000000; // Default - m_periodic_output_file_ptr = &cerr; + m_real_time_start_time = time(NULL); // Not reset in clearStats() + m_stats_period = 1000000; // Default + m_periodic_output_file_ptr = &cerr; - m_hot_lines = p->hot_lines; - m_all_instructions = p->all_instructions; + m_hot_lines = p->hot_lines; + m_all_instructions = p->all_instructions; - m_num_of_sequencers = p->num_of_sequencers; + m_num_of_sequencers = p->num_of_sequencers; - m_hot_lines = false; - m_all_instructions = false; + m_hot_lines = false; + m_all_instructions = false; - m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); - m_address_profiler_ptr -> setHotLines(m_hot_lines); - m_address_profiler_ptr -> setAllInstructions(m_all_instructions); + m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_address_profiler_ptr->setHotLines(m_hot_lines); + m_address_profiler_ptr->setAllInstructions(m_all_instructions); - if (m_all_instructions) { - m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); - m_inst_profiler_ptr -> setHotLines(m_hot_lines); - m_inst_profiler_ptr -> setAllInstructions(m_all_instructions); - } + if (m_all_instructions) { + m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_inst_profiler_ptr->setHotLines(m_hot_lines); + m_inst_profiler_ptr->setAllInstructions(m_all_instructions); + } } Profiler::~Profiler() { - if (m_periodic_output_file_ptr != &cerr) { - delete m_periodic_output_file_ptr; - } + if (m_periodic_output_file_ptr != &cerr) { + delete m_periodic_output_file_ptr; + } - delete m_requestProfileMap_ptr; + delete m_requestProfileMap_ptr; } -void Profiler::wakeup() +void +Profiler::wakeup() { - // FIXME - avoid the repeated code + // FIXME - avoid the repeated code - Vector perProcCycleCount; - perProcCycleCount.setSize(m_num_of_sequencers); + Vector perProcCycleCount; + perProcCycleCount.setSize(m_num_of_sequencers); - for(int i=0; i < m_num_of_sequencers; i++) { - perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; - // The +1 allows us to avoid division by zero - } + for (int i = 0; i < m_num_of_sequencers; i++) { + perProcCycleCount[i] = + g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; + // The +1 allows us to avoid division by zero + } - (*m_periodic_output_file_ptr) << "ruby_cycles: " - << g_eventQueue_ptr->getTime()-m_ruby_start - << endl; + ostream &out = *m_periodic_output_file_ptr; - (*m_periodic_output_file_ptr) << "mbytes_resident: " - << process_memory_resident() - << endl; + out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl + << "mbytes_resident: " << process_memory_resident() << endl + << "mbytes_total: " << process_memory_total() << endl; - (*m_periodic_output_file_ptr) << "mbytes_total: " - << process_memory_total() - << endl; - - if (process_memory_total() > 0) { - (*m_periodic_output_file_ptr) << "resident_ratio: " - << process_memory_resident()/process_memory_total() - << endl; - } - - (*m_periodic_output_file_ptr) << "miss_latency: " - << m_allMissLatencyHistogram - << endl; - - *m_periodic_output_file_ptr << endl; - - if (m_all_instructions) { - m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr); - } - - //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr); - g_eventQueue_ptr->scheduleEvent(this, m_stats_period); -} - -void Profiler::setPeriodicStatsFile(const string& filename) -{ - cout << "Recording periodic statistics to file '" << filename << "' every " - << m_stats_period << " Ruby cycles" << endl; - - if (m_periodic_output_file_ptr != &cerr) { - delete m_periodic_output_file_ptr; - } - - m_periodic_output_file_ptr = new ofstream(filename.c_str()); - g_eventQueue_ptr->scheduleEvent(this, 1); -} - -void Profiler::setPeriodicStatsInterval(integer_t period) -{ - cout << "Recording periodic statistics every " << m_stats_period - << " Ruby cycles" << endl; - - m_stats_period = period; - g_eventQueue_ptr->scheduleEvent(this, 1); -} - -void Profiler::printConfig(ostream& out) const -{ - out << endl; - out << "Profiler Configuration" << endl; - out << "----------------------" << endl; - out << "periodic_stats_period: " << m_stats_period << endl; -} - -void Profiler::print(ostream& out) const -{ - out << "[Profiler]"; -} - -void Profiler::printStats(ostream& out, bool short_stats) -{ - out << endl; - if (short_stats) { - out << "SHORT "; - } - out << "Profiler Stats" << endl; - out << "--------------" << endl; - - time_t real_time_current = time(NULL); - double seconds = difftime(real_time_current, m_real_time_start_time); - double minutes = seconds/60.0; - double hours = minutes/60.0; - double days = hours/24.0; - Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start; - - if (!short_stats) { - out << "Elapsed_time_in_seconds: " << seconds << endl; - out << "Elapsed_time_in_minutes: " << minutes << endl; - out << "Elapsed_time_in_hours: " << hours << endl; - out << "Elapsed_time_in_days: " << days << endl; - out << endl; - } - - // print the virtual runtimes as well - struct tms vtime; - times(&vtime); - seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; - minutes = seconds / 60.0; - hours = minutes / 60.0; - days = hours / 24.0; - out << "Virtual_time_in_seconds: " << seconds << endl; - out << "Virtual_time_in_minutes: " << minutes << endl; - out << "Virtual_time_in_hours: " << hours << endl; - out << "Virtual_time_in_days: " << days << endl; - out << endl; - - out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; - out << "Ruby_start_time: " << m_ruby_start << endl; - out << "Ruby_cycles: " << ruby_cycles << endl; - out << endl; - - if (!short_stats) { - out << "mbytes_resident: " << process_memory_resident() << endl; - out << "mbytes_total: " << process_memory_total() << endl; if (process_memory_total() > 0) { - out << "resident_ratio: " - << process_memory_resident()/process_memory_total() << endl; + out << "resident_ratio: " + << process_memory_resident() / process_memory_total() << endl; } - out << endl; - } - - Vector perProcCycleCount; - perProcCycleCount.setSize(m_num_of_sequencers); - - for(int i=0; i < m_num_of_sequencers; i++) { - perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; - // The +1 allows us to avoid division by zero - } - - out << "ruby_cycles_executed: " << perProcCycleCount << endl; - - out << endl; - - if (!short_stats) { - out << "Busy Controller Counts:" << endl; - for(int i=0; i < MachineType_NUM; i++) { - for(int j=0; j < MachineType_base_count((MachineType)i); j++) { - MachineID machID; - machID.type = (MachineType)i; - machID.num = j; - out << machID << ":" << m_busyControllerCount[i][j] << " "; - if ((j+1)%8 == 0) { - out << endl; - } - } - out << endl; - } - out << endl; - - out << "Busy Bank Count:" << m_busyBankCount << endl; - out << endl; - - out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl; - out << endl; - } - - if (!short_stats) { - out << "All Non-Zero Cycle Demand Cache Accesses" << endl; - out << "----------------------------------------" << endl; out << "miss_latency: " << m_allMissLatencyHistogram << endl; - for(int i=0; i 0) { - out << "miss_latency_" << RubyRequestType(i) << ": " << m_missLatencyHistograms[i] << endl; - } - } - for(int i=0; i 0) { - out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl; - } - } out << endl; - out << "All Non-Zero Cycle SW Prefetch Requests" << endl; - out << "------------------------------------" << endl; - out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl; - for(int i=0; i 0) { - out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl; - } - } - for(int i=0; i 0) { - out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl; - } - } - out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl; - - if (m_all_sharing_histogram.size() > 0) { - out << "all_sharing: " << m_all_sharing_histogram << endl; - out << "read_sharing: " << m_read_sharing_histogram << endl; - out << "write_sharing: " << m_write_sharing_histogram << endl; - - out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl; - out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl; - out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl; - - int64 total_miss = m_cache_to_cache + m_memory_to_cache; - out << "all_misses: " << total_miss << endl; - out << "cache_to_cache_misses: " << m_cache_to_cache << endl; - out << "memory_to_cache_misses: " << m_memory_to_cache << endl; - out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl; - out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl; - out << endl; - } - - if (m_outstanding_requests.size() > 0) { - out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl; - out << endl; - } - } - - if (!short_stats) { - out << "Request vs. RubySystem State Profile" << endl; - out << "--------------------------------" << endl; - out << endl; - - Vector requestProfileKeys = m_requestProfileMap_ptr->keys(); - requestProfileKeys.sortVector(); - - for(int i=0; ilookup(requestProfileKeys[i]); - double percent = (100.0*double(temp_int))/double(m_requests); - while (requestProfileKeys[i] != "") { - out << setw(10) << string_split(requestProfileKeys[i], ':'); - } - out << setw(11) << temp_int; - out << setw(14) << percent << endl; - } - out << endl; - - out << "filter_action: " << m_filter_action_histogram << endl; - - if (!m_all_instructions) { - m_address_profiler_ptr->printStats(out); - } - if (m_all_instructions) { - m_inst_profiler_ptr->printStats(out); + m_inst_profiler_ptr->printStats(out); } + //g_system_ptr->getNetwork()->printStats(out); + g_eventQueue_ptr->scheduleEvent(this, m_stats_period); +} + +void +Profiler::setPeriodicStatsFile(const string& filename) +{ + cout << "Recording periodic statistics to file '" << filename << "' every " + << m_stats_period << " Ruby cycles" << endl; + + if (m_periodic_output_file_ptr != &cerr) { + delete m_periodic_output_file_ptr; + } + + m_periodic_output_file_ptr = new ofstream(filename.c_str()); + g_eventQueue_ptr->scheduleEvent(this, 1); +} + +void +Profiler::setPeriodicStatsInterval(integer_t period) +{ + cout << "Recording periodic statistics every " << m_stats_period + << " Ruby cycles" << endl; + + m_stats_period = period; + g_eventQueue_ptr->scheduleEvent(this, 1); +} + +void +Profiler::printConfig(ostream& out) const +{ + out << endl; + out << "Profiler Configuration" << endl; + out << "----------------------" << endl; + out << "periodic_stats_period: " << m_stats_period << endl; +} + +void +Profiler::print(ostream& out) const +{ + out << "[Profiler]"; +} + +void +Profiler::printStats(ostream& out, bool short_stats) +{ + out << endl; + if (short_stats) { + out << "SHORT "; + } + out << "Profiler Stats" << endl; + out << "--------------" << endl; + + time_t real_time_current = time(NULL); + double seconds = difftime(real_time_current, m_real_time_start_time); + double minutes = seconds / 60.0; + double hours = minutes / 60.0; + double days = hours / 24.0; + Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start; + + if (!short_stats) { + out << "Elapsed_time_in_seconds: " << seconds << endl; + out << "Elapsed_time_in_minutes: " << minutes << endl; + out << "Elapsed_time_in_hours: " << hours << endl; + out << "Elapsed_time_in_days: " << days << endl; + out << endl; + } + + // print the virtual runtimes as well + struct tms vtime; + times(&vtime); + seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; + minutes = seconds / 60.0; + hours = minutes / 60.0; + days = hours / 24.0; + out << "Virtual_time_in_seconds: " << seconds << endl; + out << "Virtual_time_in_minutes: " << minutes << endl; + out << "Virtual_time_in_hours: " << hours << endl; + out << "Virtual_time_in_days: " << days << endl; + out << endl; + + out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; + out << "Ruby_start_time: " << m_ruby_start << endl; + out << "Ruby_cycles: " << ruby_cycles << endl; + out << endl; + + if (!short_stats) { + out << "mbytes_resident: " << process_memory_resident() << endl; + out << "mbytes_total: " << process_memory_total() << endl; + if (process_memory_total() > 0) { + out << "resident_ratio: " + << process_memory_resident()/process_memory_total() << endl; + } + out << endl; + } + + Vector perProcCycleCount; + perProcCycleCount.setSize(m_num_of_sequencers); + + for (int i = 0; i < m_num_of_sequencers; i++) { + perProcCycleCount[i] = + g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; + // The +1 allows us to avoid division by zero + } + + out << "ruby_cycles_executed: " << perProcCycleCount << endl; + out << endl; - out << "Message Delayed Cycles" << endl; - out << "----------------------" << endl; - out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; - out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl; - for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { - out << " virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl; + + if (!short_stats) { + out << "Busy Controller Counts:" << endl; + for (int i = 0; i < MachineType_NUM; i++) { + int size = MachineType_base_count((MachineType)i); + for (int j = 0; j < size; j++) { + MachineID machID; + machID.type = (MachineType)i; + machID.num = j; + out << machID << ":" << m_busyControllerCount[i][j] << " "; + if ((j + 1) % 8 == 0) { + out << endl; + } + } + out << endl; + } + out << endl; + + out << "Busy Bank Count:" << m_busyBankCount << endl; + out << endl; + + out << "sequencer_requests_outstanding: " + << m_sequencer_requests << endl; + out << endl; } - printResourceUsage(out); - } + if (!short_stats) { + out << "All Non-Zero Cycle Demand Cache Accesses" << endl; + out << "----------------------------------------" << endl; + out << "miss_latency: " << m_allMissLatencyHistogram << endl; + for (int i = 0; i < m_missLatencyHistograms.size(); i++) { + if (m_missLatencyHistograms[i].size() > 0) { + out << "miss_latency_" << RubyRequestType(i) << ": " + << m_missLatencyHistograms[i] << endl; + } + } + for (int i = 0; i < m_machLatencyHistograms.size(); i++) { + if (m_machLatencyHistograms[i].size() > 0) { + out << "miss_latency_" << GenericMachineType(i) << ": " + << m_machLatencyHistograms[i] << endl; + } + } + out << endl; + + out << "All Non-Zero Cycle SW Prefetch Requests" << endl; + out << "------------------------------------" << endl; + out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl; + for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) { + if (m_SWPrefetchLatencyHistograms[i].size() > 0) { + out << "prefetch_latency_" << CacheRequestType(i) << ": " + << m_SWPrefetchLatencyHistograms[i] << endl; + } + } + for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) { + if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) { + out << "prefetch_latency_" << GenericMachineType(i) << ": " + << m_SWPrefetchMachLatencyHistograms[i] << endl; + } + } + out << "prefetch_latency_L2Miss:" + << m_SWPrefetchL2MissLatencyHistogram << endl; + + if (m_all_sharing_histogram.size() > 0) { + out << "all_sharing: " << m_all_sharing_histogram << endl; + out << "read_sharing: " << m_read_sharing_histogram << endl; + out << "write_sharing: " << m_write_sharing_histogram << endl; + + out << "all_sharing_percent: "; + m_all_sharing_histogram.printPercent(out); + out << endl; + + out << "read_sharing_percent: "; + m_read_sharing_histogram.printPercent(out); + out << endl; + + out << "write_sharing_percent: "; + m_write_sharing_histogram.printPercent(out); + out << endl; + + int64 total_miss = m_cache_to_cache + m_memory_to_cache; + out << "all_misses: " << total_miss << endl; + out << "cache_to_cache_misses: " << m_cache_to_cache << endl; + out << "memory_to_cache_misses: " << m_memory_to_cache << endl; + out << "cache_to_cache_percent: " + << 100.0 * (double(m_cache_to_cache) / double(total_miss)) + << endl; + out << "memory_to_cache_percent: " + << 100.0 * (double(m_memory_to_cache) / double(total_miss)) + << endl; + out << endl; + } + + if (m_outstanding_requests.size() > 0) { + out << "outstanding_requests: "; + m_outstanding_requests.printPercent(out); + out << endl; + out << endl; + } + } + + if (!short_stats) { + out << "Request vs. RubySystem State Profile" << endl; + out << "--------------------------------" << endl; + out << endl; + + Vector requestProfileKeys = m_requestProfileMap_ptr->keys(); + requestProfileKeys.sortVector(); + + for (int i = 0; i < requestProfileKeys.size(); i++) { + int temp_int = + m_requestProfileMap_ptr->lookup(requestProfileKeys[i]); + double percent = (100.0 * double(temp_int)) / double(m_requests); + while (requestProfileKeys[i] != "") { + out << setw(10) << string_split(requestProfileKeys[i], ':'); + } + out << setw(11) << temp_int; + out << setw(14) << percent << endl; + } + out << endl; + + out << "filter_action: " << m_filter_action_histogram << endl; + + if (!m_all_instructions) { + m_address_profiler_ptr->printStats(out); + } + + if (m_all_instructions) { + m_inst_profiler_ptr->printStats(out); + } + + out << endl; + out << "Message Delayed Cycles" << endl; + out << "----------------------" << endl; + out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; + out << "Total_nonPF_delay_cycles: " + << m_delayedCyclesNonPFHistogram << endl; + for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { + out << " virtual_network_" << i << "_delay_cycles: " + << m_delayedCyclesVCHistograms[i] << endl; + } + + printResourceUsage(out); + } } -void Profiler::printResourceUsage(ostream& out) const +void +Profiler::printResourceUsage(ostream& out) const { - out << endl; - out << "Resource Usage" << endl; - out << "--------------" << endl; + out << endl; + out << "Resource Usage" << endl; + out << "--------------" << endl; - integer_t pagesize = getpagesize(); // page size in bytes - out << "page_size: " << pagesize << endl; + integer_t pagesize = getpagesize(); // page size in bytes + out << "page_size: " << pagesize << endl; - rusage usage; - getrusage (RUSAGE_SELF, &usage); + rusage usage; + getrusage (RUSAGE_SELF, &usage); - out << "user_time: " << usage.ru_utime.tv_sec << endl; - out << "system_time: " << usage.ru_stime.tv_sec << endl; - out << "page_reclaims: " << usage.ru_minflt << endl; - out << "page_faults: " << usage.ru_majflt << endl; - out << "swaps: " << usage.ru_nswap << endl; - out << "block_inputs: " << usage.ru_inblock << endl; - out << "block_outputs: " << usage.ru_oublock << endl; + out << "user_time: " << usage.ru_utime.tv_sec << endl; + out << "system_time: " << usage.ru_stime.tv_sec << endl; + out << "page_reclaims: " << usage.ru_minflt << endl; + out << "page_faults: " << usage.ru_majflt << endl; + out << "swaps: " << usage.ru_nswap << endl; + out << "block_inputs: " << usage.ru_inblock << endl; + out << "block_outputs: " << usage.ru_oublock << endl; } -void Profiler::clearStats() +void +Profiler::clearStats() { - m_ruby_start = g_eventQueue_ptr->getTime(); + m_ruby_start = g_eventQueue_ptr->getTime(); - m_cycles_executed_at_start.setSize(m_num_of_sequencers); - for (int i=0; i < m_num_of_sequencers; i++) { - if (g_system_ptr == NULL) { - m_cycles_executed_at_start[i] = 0; + m_cycles_executed_at_start.setSize(m_num_of_sequencers); + for (int i = 0; i < m_num_of_sequencers; i++) { + if (g_system_ptr == NULL) { + m_cycles_executed_at_start[i] = 0; + } else { + m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); + } + } + + m_busyControllerCount.setSize(MachineType_NUM); // all machines + for (int i = 0; i < MachineType_NUM; i++) { + int size = MachineType_base_count((MachineType)i); + m_busyControllerCount[i].setSize(size); + for (int j = 0; j < size; j++) { + m_busyControllerCount[i][j] = 0; + } + } + m_busyBankCount = 0; + + m_delayedCyclesHistogram.clear(); + m_delayedCyclesNonPFHistogram.clear(); + int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks(); + m_delayedCyclesVCHistograms.setSize(size); + for (int i = 0; i < size; i++) { + m_delayedCyclesVCHistograms[i].clear(); + } + + m_missLatencyHistograms.setSize(RubyRequestType_NUM); + for (int i = 0; i < m_missLatencyHistograms.size(); i++) { + m_missLatencyHistograms[i].clear(200); + } + m_machLatencyHistograms.setSize(GenericMachineType_NUM+1); + for (int i = 0; i < m_machLatencyHistograms.size(); i++) { + m_machLatencyHistograms[i].clear(200); + } + m_allMissLatencyHistogram.clear(200); + + m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM); + for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) { + m_SWPrefetchLatencyHistograms[i].clear(200); + } + m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1); + for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) { + m_SWPrefetchMachLatencyHistograms[i].clear(200); + } + m_allSWPrefetchLatencyHistogram.clear(200); + + m_sequencer_requests.clear(); + m_read_sharing_histogram.clear(); + m_write_sharing_histogram.clear(); + m_all_sharing_histogram.clear(); + m_cache_to_cache = 0; + m_memory_to_cache = 0; + + // clear HashMaps + m_requestProfileMap_ptr->clear(); + + // count requests profiled + m_requests = 0; + + m_outstanding_requests.clear(); + m_outstanding_persistent_requests.clear(); + + // Flush the prefetches through the system - used so that there + // are no outstanding requests after stats are cleared + //g_eventQueue_ptr->triggerAllEvents(); + + // update the start time + m_ruby_start = g_eventQueue_ptr->getTime(); +} + +void +Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) +{ + if (msg.getType() != CacheRequestType_IFETCH) { + // Note: The following line should be commented out if you + // want to use the special profiling that is part of the GS320 + // protocol + + // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be + // profiled by the AddressProfiler + m_address_profiler_ptr-> + addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), + msg.getType(), msg.getAccessMode(), id, false); + } +} + +void +Profiler::profileSharing(const Address& addr, AccessType type, + NodeID requestor, const Set& sharers, + const Set& owner) +{ + Set set_contacted(owner); + if (type == AccessType_Write) { + set_contacted.addSet(sharers); + } + set_contacted.remove(requestor); + int number_contacted = set_contacted.count(); + + if (type == AccessType_Write) { + m_write_sharing_histogram.add(number_contacted); } else { - m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); + m_read_sharing_histogram.add(number_contacted); } - } + m_all_sharing_histogram.add(number_contacted); - m_busyControllerCount.setSize(MachineType_NUM); // all machines - for(int i=0; i < MachineType_NUM; i++) { - m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i)); - for(int j=0; j < MachineType_base_count((MachineType)i); j++) { - m_busyControllerCount[i][j] = 0; + if (number_contacted == 0) { + m_memory_to_cache++; + } else { + m_cache_to_cache++; } - } - m_busyBankCount = 0; - - m_delayedCyclesHistogram.clear(); - m_delayedCyclesNonPFHistogram.clear(); - m_delayedCyclesVCHistograms.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks()); - for (int i = 0; i < RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i++) { - m_delayedCyclesVCHistograms[i].clear(); - } - - m_missLatencyHistograms.setSize(RubyRequestType_NUM); - for(int i=0; iclear(); - - // count requests profiled - m_requests = 0; - - m_outstanding_requests.clear(); - m_outstanding_persistent_requests.clear(); - - // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared - //g_eventQueue_ptr->triggerAllEvents(); - - // update the start time - m_ruby_start = g_eventQueue_ptr->getTime(); } -void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) +void +Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) { - if (msg.getType() != CacheRequestType_IFETCH) { - - // Note: The following line should be commented out if you want to - // use the special profiling that is part of the GS320 protocol - - // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler - m_address_profiler_ptr->addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false); - } -} - -void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner) -{ - Set set_contacted(owner); - if (type == AccessType_Write) { - set_contacted.addSet(sharers); - } - set_contacted.remove(requestor); - int number_contacted = set_contacted.count(); - - if (type == AccessType_Write) { - m_write_sharing_histogram.add(number_contacted); - } else { - m_read_sharing_histogram.add(number_contacted); - } - m_all_sharing_histogram.add(number_contacted); - - if (number_contacted == 0) { - m_memory_to_cache++; - } else { - m_cache_to_cache++; - } - -} - -void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) { - assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); - m_delayedCyclesHistogram.add(delayCycles); - m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); - if (virtualNetwork != 0) { - m_delayedCyclesNonPFHistogram.add(delayCycles); - } + assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); + m_delayedCyclesHistogram.add(delayCycles); + m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); + if (virtualNetwork != 0) { + m_delayedCyclesNonPFHistogram.add(delayCycles); + } } // profiles original cache requests including PUTs -void Profiler::profileRequest(const string& requestStr) +void +Profiler::profileRequest(const string& requestStr) { - m_requests++; + m_requests++; - if (m_requestProfileMap_ptr->exist(requestStr)) { - (m_requestProfileMap_ptr->lookup(requestStr))++; - } else { - m_requestProfileMap_ptr->add(requestStr, 1); - } + if (m_requestProfileMap_ptr->exist(requestStr)) { + (m_requestProfileMap_ptr->lookup(requestStr))++; + } else { + m_requestProfileMap_ptr->add(requestStr, 1); + } } -void Profiler::controllerBusy(MachineID machID) +void +Profiler::controllerBusy(MachineID machID) { - m_busyControllerCount[(int)machID.type][(int)machID.num]++; + m_busyControllerCount[(int)machID.type][(int)machID.num]++; } -void Profiler::profilePFWait(Time waitTime) +void +Profiler::profilePFWait(Time waitTime) { - m_prefetchWaitHistogram.add(waitTime); + m_prefetchWaitHistogram.add(waitTime); } -void Profiler::bankBusy() +void +Profiler::bankBusy() { - m_busyBankCount++; + m_busyBankCount++; } // non-zero cycle demand request -void Profiler::missLatency(Time t, RubyRequestType type) +void +Profiler::missLatency(Time t, RubyRequestType type) { - m_allMissLatencyHistogram.add(t); - m_missLatencyHistograms[type].add(t); + m_allMissLatencyHistogram.add(t); + m_missLatencyHistograms[type].add(t); } // non-zero cycle prefetch request -void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach) +void +Profiler::swPrefetchLatency(Time t, CacheRequestType type, + GenericMachineType respondingMach) { - m_allSWPrefetchLatencyHistogram.add(t); - m_SWPrefetchLatencyHistograms[type].add(t); - m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); - if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { - m_SWPrefetchL2MissLatencyHistogram.add(t); - } + m_allSWPrefetchLatencyHistogram.add(t); + m_SWPrefetchLatencyHistograms[type].add(t); + m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); + if (respondingMach == GenericMachineType_Directory || + respondingMach == GenericMachineType_NUM) { + m_SWPrefetchL2MissLatencyHistogram.add(t); + } } -void Profiler::profileTransition(const string& component, NodeID version, Address addr, - const string& state, const string& event, - const string& next_state, const string& note) +void +Profiler::profileTransition(const string& component, NodeID version, + Address addr, const string& state, const string& event, + const string& next_state, const string& note) { - const int EVENT_SPACES = 20; - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - const int COMP_SPACES = 10; - const int STATE_SPACES = 6; + const int EVENT_SPACES = 20; + const int ID_SPACES = 3; + const int TIME_SPACES = 7; + const int COMP_SPACES = 10; + const int STATE_SPACES = 6; - if ((g_debug_ptr->getDebugTime() > 0) && - (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) { - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << version << " "; - (* debug_cout_ptr) << setw(COMP_SPACES) << component; - (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " "; + if (g_debug_ptr->getDebugTime() <= 0 || + g_eventQueue_ptr->getTime() < g_debug_ptr->getDebugTime()) + return; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(STATE_SPACES) << state; - (* debug_cout_ptr) << ">"; - (* debug_cout_ptr).flags(ios::left); - (* debug_cout_ptr) << setw(STATE_SPACES) << next_state; + ostream &out = *debug_cout_ptr; + out.flags(ios::right); + out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; + out << setw(ID_SPACES) << version << " "; + out << setw(COMP_SPACES) << component; + out << setw(EVENT_SPACES) << event << " "; - (* debug_cout_ptr) << " " << addr << " " << note; + out.flags(ios::right); + out << setw(STATE_SPACES) << state; + out << ">"; + out.flags(ios::left); + out << setw(STATE_SPACES) << next_state; - (* debug_cout_ptr) << endl; - } + out << " " << addr << " " << note; + + out << endl; } // Helper function -static double process_memory_total() +static double +process_memory_total() { - const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, - ifstream proc_file; - proc_file.open("/proc/self/statm"); - int total_size_in_pages = 0; - int res_size_in_pages = 0; - proc_file >> total_size_in_pages; - proc_file >> res_size_in_pages; - return double(total_size_in_pages)*MULTIPLIER; // size in megabytes + // 4kB page size, 1024*1024 bytes per MB, + const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); + ifstream proc_file; + proc_file.open("/proc/self/statm"); + int total_size_in_pages = 0; + int res_size_in_pages = 0; + proc_file >> total_size_in_pages; + proc_file >> res_size_in_pages; + return double(total_size_in_pages) * MULTIPLIER; // size in megabytes } -static double process_memory_resident() +static double +process_memory_resident() { - const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, - ifstream proc_file; - proc_file.open("/proc/self/statm"); - int total_size_in_pages = 0; - int res_size_in_pages = 0; - proc_file >> total_size_in_pages; - proc_file >> res_size_in_pages; - return double(res_size_in_pages)*MULTIPLIER; // size in megabytes + // 4kB page size, 1024*1024 bytes per MB, + const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); + ifstream proc_file; + proc_file.open("/proc/self/statm"); + int total_size_in_pages = 0; + int res_size_in_pages = 0; + proc_file >> total_size_in_pages; + proc_file >> res_size_in_pages; + return double(res_size_in_pages) * MULTIPLIER; // size in megabytes } -void Profiler::rubyWatch(int id){ +void +Profiler::rubyWatch(int id) +{ uint64 tr = 0; Address watch_address = Address(tr); const int ID_SPACES = 3; const int TIME_SPACES = 7; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << id << " " - << "RUBY WATCH " - << watch_address - << endl; + ostream &out = *debug_cout_ptr; - if(!m_watch_address_list_ptr->exist(watch_address)){ - m_watch_address_list_ptr->add(watch_address, 1); + out.flags(ios::right); + out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; + out << setw(ID_SPACES) << id << " " + << "RUBY WATCH " << watch_address << endl; + + if (!m_watch_address_list_ptr->exist(watch_address)) { + m_watch_address_list_ptr->add(watch_address, 1); } } -bool Profiler::watchAddress(Address addr){ +bool +Profiler::watchAddress(Address addr) +{ if (m_watch_address_list_ptr->exist(addr)) - return true; + return true; else - return false; + return false; } Profiler * diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 3ae1f5e31..bf4bf8a50 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -42,35 +42,24 @@ ---------------------------------------------------------------------- */ -/* - * Profiler.hh - * - * Description: - * - * $Id$ - * - */ +#ifndef __MEM_RUBY_PROFILER_PROFILER_HH__ +#define __MEM_RUBY_PROFILER_PROFILER_HH__ -#ifndef PROFILER_H -#define PROFILER_H - -#include "mem/ruby/libruby.hh" - -#include "mem/ruby/common/Global.hh" -#include "mem/protocol/GenericMachineType.hh" -#include "mem/ruby/common/Histogram.hh" -#include "mem/ruby/common/Consumer.hh" #include "mem/protocol/AccessModeType.hh" #include "mem/protocol/AccessType.hh" -#include "mem/ruby/system/NodeID.hh" -#include "mem/ruby/system/MachineID.hh" +#include "mem/protocol/CacheRequestType.hh" +#include "mem/protocol/GenericMachineType.hh" +#include "mem/protocol/GenericRequestType.hh" #include "mem/protocol/PrefetchBit.hh" #include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Consumer.hh" +#include "mem/ruby/common/Global.hh" +#include "mem/ruby/common/Histogram.hh" #include "mem/ruby/common/Set.hh" -#include "mem/protocol/CacheRequestType.hh" -#include "mem/protocol/GenericRequestType.hh" +#include "mem/ruby/libruby.hh" +#include "mem/ruby/system/MachineID.hh" #include "mem/ruby/system/MemoryControl.hh" - +#include "mem/ruby/system/NodeID.hh" #include "params/RubyProfiler.hh" #include "sim/sim_object.hh" @@ -79,155 +68,165 @@ class AddressProfiler; template class Map; -class Profiler : public SimObject, public Consumer { -public: - // Constructors +class Profiler : public SimObject, public Consumer +{ + public: typedef RubyProfilerParams Params; - Profiler(const Params *); + Profiler(const Params *); + ~Profiler(); - // Destructor - ~Profiler(); + void wakeup(); - // Public Methods - void wakeup(); + void setPeriodicStatsFile(const string& filename); + void setPeriodicStatsInterval(integer_t period); - void setPeriodicStatsFile(const string& filename); - void setPeriodicStatsInterval(integer_t period); + void printStats(ostream& out, bool short_stats=false); + void printShortStats(ostream& out) { printStats(out, true); } + void printTraceStats(ostream& out) const; + void clearStats(); + void printConfig(ostream& out) const; + void printResourceUsage(ostream& out) const; - void printStats(ostream& out, bool short_stats=false); - void printShortStats(ostream& out) { printStats(out, true); } - void printTraceStats(ostream& out) const; - void clearStats(); - void printConfig(ostream& out) const; - void printResourceUsage(ostream& out) const; + AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } + AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } - AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } - AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } + void addAddressTraceSample(const CacheMsg& msg, NodeID id); - void addAddressTraceSample(const CacheMsg& msg, NodeID id); + void profileRequest(const string& requestStr); + void profileSharing(const Address& addr, AccessType type, + NodeID requestor, const Set& sharers, + const Set& owner); - void profileRequest(const string& requestStr); - void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner); + void profileMulticastRetry(const Address& addr, int count); - void profileMulticastRetry(const Address& addr, int count); + void profileFilterAction(int action); - void profileFilterAction(int action); + void profileConflictingRequests(const Address& addr); - void profileConflictingRequests(const Address& addr); - void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); } - void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } - void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } + void + profileOutstandingRequest(int outstanding) + { + m_outstanding_requests.add(outstanding); + } - void recordPrediction(bool wasGood, bool wasPredicted); + void + profileOutstandingPersistentRequest(int outstanding) + { + m_outstanding_persistent_requests.add(outstanding); + } - void startTransaction(int cpu); - void endTransaction(int cpu); - void profilePFWait(Time waitTime); + void + profileAverageLatencyEstimate(int latency) + { + m_average_latency_estimate.add(latency); + } - void controllerBusy(MachineID machID); - void bankBusy(); - void missLatency(Time t, RubyRequestType type); - void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); - void sequencerRequests(int num) { m_sequencer_requests.add(num); } + void recordPrediction(bool wasGood, bool wasPredicted); - void profileTransition(const string& component, NodeID version, Address addr, - const string& state, const string& event, - const string& next_state, const string& note); - void profileMsgDelay(int virtualNetwork, int delayCycles); + void startTransaction(int cpu); + void endTransaction(int cpu); + void profilePFWait(Time waitTime); - void print(ostream& out) const; + void controllerBusy(MachineID machID); + void bankBusy(); + void missLatency(Time t, RubyRequestType type); + void swPrefetchLatency(Time t, CacheRequestType type, + GenericMachineType respondingMach); + void sequencerRequests(int num) { m_sequencer_requests.add(num); } - void rubyWatch(int proc); - bool watchAddress(Address addr); + void profileTransition(const string& component, NodeID version, + Address addr, const string& state, + const string& event, const string& next_state, + const string& note); + void profileMsgDelay(int virtualNetwork, int delayCycles); - // return Ruby's start time - Time getRubyStartTime(){ - return m_ruby_start; - } + void print(ostream& out) const; - //added by SS - bool getHotLines() { return m_hot_lines; } - bool getAllInstructions() { return m_all_instructions; } + void rubyWatch(int proc); + bool watchAddress(Address addr); -private: + // return Ruby's start time + Time + getRubyStartTime() + { + return m_ruby_start; + } - // Private copy constructor and assignment operator - Profiler(const Profiler& obj); - Profiler& operator=(const Profiler& obj); + // added by SS + bool getHotLines() { return m_hot_lines; } + bool getAllInstructions() { return m_all_instructions; } - // Data Members (m_ prefix) - AddressProfiler* m_address_profiler_ptr; - AddressProfiler* m_inst_profiler_ptr; + private: + // Private copy constructor and assignment operator + Profiler(const Profiler& obj); + Profiler& operator=(const Profiler& obj); - Vector m_instructions_executed_at_start; - Vector m_cycles_executed_at_start; + AddressProfiler* m_address_profiler_ptr; + AddressProfiler* m_inst_profiler_ptr; - ostream* m_periodic_output_file_ptr; - integer_t m_stats_period; + Vector m_instructions_executed_at_start; + Vector m_cycles_executed_at_start; - Time m_ruby_start; - time_t m_real_time_start_time; + ostream* m_periodic_output_file_ptr; + integer_t m_stats_period; - Vector < Vector < integer_t > > m_busyControllerCount; - integer_t m_busyBankCount; - Histogram m_multicast_retry_histogram; + Time m_ruby_start; + time_t m_real_time_start_time; - Histogram m_filter_action_histogram; - Histogram m_tbeProfile; + Vector > m_busyControllerCount; + integer_t m_busyBankCount; + Histogram m_multicast_retry_histogram; - Histogram m_sequencer_requests; - Histogram m_read_sharing_histogram; - Histogram m_write_sharing_histogram; - Histogram m_all_sharing_histogram; - int64 m_cache_to_cache; - int64 m_memory_to_cache; + Histogram m_filter_action_histogram; + Histogram m_tbeProfile; - Histogram m_prefetchWaitHistogram; + Histogram m_sequencer_requests; + Histogram m_read_sharing_histogram; + Histogram m_write_sharing_histogram; + Histogram m_all_sharing_histogram; + int64 m_cache_to_cache; + int64 m_memory_to_cache; - Vector m_missLatencyHistograms; - Vector m_machLatencyHistograms; - Histogram m_allMissLatencyHistogram; + Histogram m_prefetchWaitHistogram; - Histogram m_allSWPrefetchLatencyHistogram; - Histogram m_SWPrefetchL2MissLatencyHistogram; - Vector m_SWPrefetchLatencyHistograms; - Vector m_SWPrefetchMachLatencyHistograms; + Vector m_missLatencyHistograms; + Vector m_machLatencyHistograms; + Histogram m_allMissLatencyHistogram; - Histogram m_delayedCyclesHistogram; - Histogram m_delayedCyclesNonPFHistogram; - Vector m_delayedCyclesVCHistograms; + Histogram m_allSWPrefetchLatencyHistogram; + Histogram m_SWPrefetchL2MissLatencyHistogram; + Vector m_SWPrefetchLatencyHistograms; + Vector m_SWPrefetchMachLatencyHistograms; - Histogram m_outstanding_requests; - Histogram m_outstanding_persistent_requests; + Histogram m_delayedCyclesHistogram; + Histogram m_delayedCyclesNonPFHistogram; + Vector m_delayedCyclesVCHistograms; - Histogram m_average_latency_estimate; + Histogram m_outstanding_requests; + Histogram m_outstanding_persistent_requests; - Map* m_watch_address_list_ptr; - // counts all initiated cache request including PUTs - int m_requests; - Map * m_requestProfileMap_ptr; + Histogram m_average_latency_estimate; - //added by SS - bool m_hot_lines; - bool m_all_instructions; + Map* m_watch_address_list_ptr; + // counts all initiated cache request including PUTs + int m_requests; + Map * m_requestProfileMap_ptr; - int m_num_of_sequencers; + //added by SS + bool m_hot_lines; + bool m_all_instructions; + + int m_num_of_sequencers; }; -// Output operator declaration -ostream& operator<<(ostream& out, const Profiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const Profiler& obj) +inline ostream& +operator<<(ostream& out, const Profiler& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } -#endif //PROFILER_H +#endif // __MEM_RUBY_PROFILER_PROFILER_HH__ diff --git a/src/mem/ruby/profiler/StoreTrace.cc b/src/mem/ruby/profiler/StoreTrace.cc index 4d4e4798d..ce42560b6 100644 --- a/src/mem/ruby/profiler/StoreTrace.cc +++ b/src/mem/ruby/profiler/StoreTrace.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,132 +26,130 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - */ - -#include "mem/ruby/profiler/StoreTrace.hh" #include "mem/ruby/eventqueue/RubyEventQueue.hh" +#include "mem/ruby/profiler/StoreTrace.hh" -bool StoreTrace::s_init = false; // Total number of store lifetimes of all lines -int64 StoreTrace::s_total_samples = 0; // Total number of store lifetimes of all lines +bool StoreTrace::s_init = false; // Total number of store lifetimes of + // all lines +int64 StoreTrace::s_total_samples = 0; // Total number of store + // lifetimes of all lines Histogram* StoreTrace::s_store_count_ptr = NULL; Histogram* StoreTrace::s_store_first_to_stolen_ptr = NULL; Histogram* StoreTrace::s_store_last_to_stolen_ptr = NULL; Histogram* StoreTrace::s_store_first_to_last_ptr = NULL; -StoreTrace::StoreTrace(const Address& addr) : - m_store_count(-1), m_store_first_to_stolen(-1), m_store_last_to_stolen(-1), m_store_first_to_last(-1) +StoreTrace::StoreTrace(const Address& addr) + : m_store_count(-1), m_store_first_to_stolen(-1), + m_store_last_to_stolen(-1), m_store_first_to_last(-1) { - StoreTrace::initSummary(); - m_addr = addr; - m_total_samples = 0; - m_last_writer = -1; // Really -1 isn't valid, so this will trigger the initilization code - m_stores_this_interval = 0; + StoreTrace::initSummary(); + m_addr = addr; + m_total_samples = 0; + + // Really -1 isn't valid, so this will trigger the initilization code + m_last_writer = -1; + m_stores_this_interval = 0; } StoreTrace::~StoreTrace() { } -void StoreTrace::print(ostream& out) const +void +StoreTrace::print(ostream& out) const { - out << m_addr; - out << " total_samples: " << m_total_samples << endl; - out << "store_count: " << m_store_count << endl; - out << "store_first_to_stolen: " << m_store_first_to_stolen << endl; - out << "store_last_to_stolen: " << m_store_last_to_stolen << endl; - out << "store_first_to_last: " << m_store_first_to_last << endl; + out << m_addr + << " total_samples: " << m_total_samples << endl + << "store_count: " << m_store_count << endl + << "store_first_to_stolen: " << m_store_first_to_stolen << endl + << "store_last_to_stolen: " << m_store_last_to_stolen << endl + << "store_first_to_last: " << m_store_first_to_last << endl; } -// Class method -void StoreTrace::initSummary() +void +StoreTrace::initSummary() { - if (!s_init) { + if (!s_init) { + s_total_samples = 0; + s_store_count_ptr = new Histogram(-1); + s_store_first_to_stolen_ptr = new Histogram(-1); + s_store_last_to_stolen_ptr = new Histogram(-1); + s_store_first_to_last_ptr = new Histogram(-1); + } + s_init = true; +} + +void +StoreTrace::printSummary(ostream& out) +{ + out << "total_samples: " << s_total_samples << endl; + out << "store_count: " << (*s_store_count_ptr) << endl; + out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl; + out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl; + out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl; +} + +void +StoreTrace::clearSummary() +{ + StoreTrace::initSummary(); s_total_samples = 0; - s_store_count_ptr = new Histogram(-1); - s_store_first_to_stolen_ptr = new Histogram(-1); - s_store_last_to_stolen_ptr = new Histogram(-1); - s_store_first_to_last_ptr = new Histogram(-1); - } - s_init = true; + s_store_count_ptr->clear(); + s_store_first_to_stolen_ptr->clear(); + s_store_last_to_stolen_ptr->clear(); + s_store_first_to_last_ptr->clear(); } -// Class method -void StoreTrace::printSummary(ostream& out) +void +StoreTrace::store(NodeID node) { - out << "total_samples: " << s_total_samples << endl; - out << "store_count: " << (*s_store_count_ptr) << endl; - out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl; - out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl; - out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl; -} - -// Class method -void StoreTrace::clearSummary() -{ - StoreTrace::initSummary(); - s_total_samples = 0; - s_store_count_ptr->clear(); - s_store_first_to_stolen_ptr->clear(); - s_store_last_to_stolen_ptr->clear(); - s_store_first_to_last_ptr->clear(); -} - -void StoreTrace::store(NodeID node) -{ - Time current = g_eventQueue_ptr->getTime(); - - assert((m_last_writer == -1) || (m_last_writer == node)); - - m_last_writer = node; - if (m_last_writer == -1) { - assert(m_stores_this_interval == 0); - } - - if (m_stores_this_interval == 0) { - // A new proessor just wrote the line, so reset the stats - m_first_store = current; - } - - m_last_store = current; - m_stores_this_interval++; -} - -void StoreTrace::downgrade(NodeID node) -{ - if (node == m_last_writer) { Time current = g_eventQueue_ptr->getTime(); - assert(m_stores_this_interval != 0); - assert(m_last_store != 0); - assert(m_first_store != 0); - assert(m_last_writer != -1); - // Per line stats - m_store_first_to_stolen.add(current - m_first_store); - m_store_count.add(m_stores_this_interval); - m_store_last_to_stolen.add(current - m_last_store); - m_store_first_to_last.add(m_last_store - m_first_store); - m_total_samples++; + assert((m_last_writer == -1) || (m_last_writer == node)); - // Global stats - assert(s_store_first_to_stolen_ptr != NULL); - s_store_first_to_stolen_ptr->add(current - m_first_store); - s_store_count_ptr->add(m_stores_this_interval); - s_store_last_to_stolen_ptr->add(current - m_last_store); - s_store_first_to_last_ptr->add(m_last_store - m_first_store); - s_total_samples++; + m_last_writer = node; + if (m_last_writer == -1) { + assert(m_stores_this_interval == 0); + } - // Initilize for next go round - m_stores_this_interval = 0; - m_last_store = 0; - m_first_store = 0; - m_last_writer = -1; - } + if (m_stores_this_interval == 0) { + // A new proessor just wrote the line, so reset the stats + m_first_store = current; + } + + m_last_store = current; + m_stores_this_interval++; } -bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2) +void +StoreTrace::downgrade(NodeID node) { - return (n1->getTotal() > n2->getTotal()); + if (node == m_last_writer) { + Time current = g_eventQueue_ptr->getTime(); + assert(m_stores_this_interval != 0); + assert(m_last_store != 0); + assert(m_first_store != 0); + assert(m_last_writer != -1); + + // Per line stats + m_store_first_to_stolen.add(current - m_first_store); + m_store_count.add(m_stores_this_interval); + m_store_last_to_stolen.add(current - m_last_store); + m_store_first_to_last.add(m_last_store - m_first_store); + m_total_samples++; + + // Global stats + assert(s_store_first_to_stolen_ptr != NULL); + s_store_first_to_stolen_ptr->add(current - m_first_store); + s_store_count_ptr->add(m_stores_this_interval); + s_store_last_to_stolen_ptr->add(current - m_last_store); + s_store_first_to_last_ptr->add(m_last_store - m_first_store); + s_total_samples++; + + // Initilize for next go round + m_stores_this_interval = 0; + m_last_store = 0; + m_first_store = 0; + m_last_writer = -1; + } } diff --git a/src/mem/ruby/profiler/StoreTrace.hh b/src/mem/ruby/profiler/StoreTrace.hh index 5cdf7ce41..8bddfe6c7 100644 --- a/src/mem/ruby/profiler/StoreTrace.hh +++ b/src/mem/ruby/profiler/StoreTrace.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,82 +26,63 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - * Description: - * - */ +#ifndef __MEM_RUBY_PROFILER_STORETRACE_HH__ +#define __MEM_RUBY_PROFILER_STORETRACE_HH__ -#ifndef StoreTrace_H -#define StoreTrace_H - -#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Histogram.hh" -class StoreTrace { -public: - // Constructors - StoreTrace() { } - explicit StoreTrace(const Address& addr); +class StoreTrace +{ + public: + StoreTrace() { } + explicit StoreTrace(const Address& addr); + ~StoreTrace(); - // Destructor - ~StoreTrace(); + void store(NodeID node); + void downgrade(NodeID node); + int getTotal() const { return m_total_samples; } + static void initSummary(); + static void printSummary(ostream& out); + static void clearSummary(); - // Public Methods - void store(NodeID node); - void downgrade(NodeID node); - int getTotal() const { return m_total_samples; } - static void initSummary(); - static void printSummary(ostream& out); - static void clearSummary(); + void print(ostream& out) const; - void print(ostream& out) const; -private: - // Private Methods + private: + static bool s_init; + static int64 s_total_samples; // Total number of store lifetimes + // of all lines + static Histogram* s_store_count_ptr; + static Histogram* s_store_first_to_stolen_ptr; + static Histogram* s_store_last_to_stolen_ptr; + static Histogram* s_store_first_to_last_ptr; - // Private copy constructor and assignment operator - // StoreTrace(const StoreTrace& obj); - // StoreTrace& operator=(const StoreTrace& obj); + Address m_addr; + NodeID m_last_writer; + Time m_first_store; + Time m_last_store; + int m_stores_this_interval; - // Class Members (s_ prefix) - static bool s_init; - static int64 s_total_samples; // Total number of store lifetimes of all lines - static Histogram* s_store_count_ptr; - static Histogram* s_store_first_to_stolen_ptr; - static Histogram* s_store_last_to_stolen_ptr; - static Histogram* s_store_first_to_last_ptr; - - // Data Members (m_ prefix) - - Address m_addr; - NodeID m_last_writer; - Time m_first_store; - Time m_last_store; - int m_stores_this_interval; - - int64 m_total_samples; // Total number of store lifetimes of this line - Histogram m_store_count; - Histogram m_store_first_to_stolen; - Histogram m_store_last_to_stolen; - Histogram m_store_first_to_last; + int64 m_total_samples; // Total number of store lifetimes of this line + Histogram m_store_count; + Histogram m_store_first_to_stolen; + Histogram m_store_last_to_stolen; + Histogram m_store_first_to_last; }; -bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2); - -// Output operator declaration -ostream& operator<<(ostream& out, const StoreTrace& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const StoreTrace& obj) +inline bool +node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2) { - obj.print(out); - out << flush; - return out; + return n1->getTotal() > n2->getTotal(); } -#endif //StoreTrace_H +inline ostream& +operator<<(ostream& out, const StoreTrace& obj) +{ + obj.print(out); + out << flush; + return out; +} + +#endif // __MEM_RUBY_PROFILER_STORETRACE_HH__ diff --git a/util/regress b/util/regress index 1d0b9049a..a74bd09c3 100755 --- a/util/regress +++ b/util/regress @@ -36,28 +36,29 @@ from subprocess import call progname = os.path.basename(sys.argv[0]) optparser = optparse.OptionParser() -optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', - default=False, - help='echo commands before executing') -optparser.add_option('--builds', dest='builds', - default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \ - 'ALPHA_SE_MESI_CMP_directory,' \ - 'ALPHA_SE_MOESI_CMP_directory,' \ - 'ALPHA_SE_MOESI_CMP_token,' \ - 'ALPHA_FS,MIPS_SE,' \ - 'POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE', - help='comma-separated list of build targets to test ' - " (default: '%default')" ) -optparser.add_option('--variants', dest='variants', - default='fast', - help='comma-separated list of build variants to test ' - " (default: '%default')" ) -optparser.add_option('--scons-opts', dest='scons_opts', default='', - help='scons options', metavar='OPTS') -optparser.add_option('-j', '--jobs', type='int', default=1, - help='number of parallel jobs to use') -optparser.add_option('-k', '--keep-going', action='store_true', - help='keep going after errors') +add_option = optparser.add_option +add_option('-v', '--verbose', dest='verbose', action='store_true', + default=False, + help='echo commands before executing') +add_option('--builds', dest='builds', + default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \ + 'ALPHA_SE_MESI_CMP_directory,' \ + 'ALPHA_SE_MOESI_CMP_directory,' \ + 'ALPHA_SE_MOESI_CMP_token,' \ + 'ALPHA_FS,MIPS_SE,POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE', + help="comma-separated build targets to test (default: '%default')") +add_option('--variants', dest='variants', default='fast', + help="comma-separated build variants to test (default: '%default')") +add_option('--scons-opts', dest='scons_opts', default='', metavar='OPTS', + help='scons options') +add_option('-j', '--jobs', type='int', default=1, + help='number of parallel jobs to use') +add_option('-k', '--keep-going', action='store_true', + help='keep going after errors') +add_option('-D', '--build-dir', default='', + help='build directory location') +add_option('-n', "--no-exec", default=False, action='store_true', + help="don't actually invoke scons, just echo SCons command line") (options, tests) = optparser.parse_args() @@ -66,6 +67,8 @@ optparser.add_option('-k', '--keep-going', action='store_true', builds = options.builds.split(',') variants = options.variants.split(',') +options.build_dir = os.path.join(options.build_dir, 'build') + # Call os.system() and raise exception if return status is non-zero def system(cmd): try: @@ -91,11 +94,11 @@ def shellquote(s): if not tests: print "No tests specified, just building binaries." - targets = ['build/%s/m5.%s' % (build, variant) + targets = ['%s/%s/m5.%s' % (options.build_dir, build, variant) for build in builds for variant in variants] elif 'all' in tests: - targets = ['build/%s/tests/%s' % (build, variant) + targets = ['%s/%s/tests/%s' % (options.build_dir, build, variant) for build in builds for variant in variants] else: @@ -103,17 +106,36 @@ else: # If we ever get a quick SPARC_FS test, this code should be removed if 'quick' in tests and 'SPARC_FS' in builds: builds.remove('SPARC_FS') - targets = ['build/%s/tests/%s/%s' % (build, variant, test) + targets = ['%s/%s/tests/%s/%s' % (options.build_dir, build, variant, test) for build in builds for variant in variants for test in tests] +def cpu_count(): + if 'bsd' in sys.platform or sys.platform == 'darwin': + try: + return int(os.popen('sysctl -n hw.ncpu').read()) + except ValueError: + pass + else: + try: + return os.sysconf('SC_NPROCESSORS_ONLN') + except (ValueError, OSError, AttributeError): + pass + + raise NotImplementedError('cannot determine number of cpus') + scons_opts = options.scons_opts if options.jobs != 1: + if options.jobs == 0: + options.jobs = cpu_count() scons_opts += ' -j %d' % options.jobs if options.keep_going: scons_opts += ' -k' -system('scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets))) - -sys.exit(0) +cmd = 'scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets)) +if options.no_exec: + print cmd +else: + system(cmd) + sys.exit(0)