m5: merge inorder updates

This commit is contained in:
Korey Sewell 2010-03-27 02:23:00 -04:00
commit 1c98bc5a56
25 changed files with 1662 additions and 1607 deletions

View file

@ -275,7 +275,6 @@ def template StoreExecute {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
if (fault == NoFault) { if (fault == NoFault) {
@ -310,7 +309,6 @@ def template StoreCondExecute {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, &write_result); memAccessFlags, &write_result);
if (traceData) { traceData->setData(Mem); }
} }
if (fault == NoFault) { if (fault == NoFault) {
@ -344,7 +342,6 @@ def template StoreInitiateAcc {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
return fault; return fault;
@ -478,9 +475,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
mem_flags = makeList(mem_flags) mem_flags = makeList(mem_flags)
inst_flags = makeList(inst_flags) inst_flags = makeList(inst_flags)
# add hook to get effective addresses into execution trace output.
ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
# Some CPU models execute the memory operation as an atomic unit, # Some CPU models execute the memory operation as an atomic unit,
# while others want to separate them into an effective address # while others want to separate them into an effective address
# computation and a memory access operation. As a result, we need # computation and a memory access operation. As a result, we need

View file

@ -172,7 +172,6 @@ def template StoreExecute {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
if (fault == NoFault) { if (fault == NoFault) {
@ -204,7 +203,6 @@ def template StoreInitiateAcc {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
// Need to write back any potential address register update // Need to write back any potential address register update

View file

@ -305,7 +305,6 @@ def template StoreExecute {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
if (fault == NoFault) { if (fault == NoFault) {
@ -342,7 +341,6 @@ def template StoreFPExecute {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
if (fault == NoFault) { if (fault == NoFault) {
@ -377,7 +375,6 @@ def template StoreCondExecute {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, &write_result); memAccessFlags, &write_result);
if (traceData) { traceData->setData(Mem); }
} }
if (fault == NoFault) { if (fault == NoFault) {
@ -411,7 +408,6 @@ def template StoreInitiateAcc {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
return fault; return fault;
@ -435,8 +431,6 @@ def template StoreCompleteAcc {{
if (fault == NoFault) { if (fault == NoFault) {
%(op_wb)s; %(op_wb)s;
if (traceData) { traceData->setData(getMemData(xc, pkt)); }
} }
return fault; return fault;
@ -459,8 +453,6 @@ def template StoreCompleteAcc {{
if (fault == NoFault) { if (fault == NoFault) {
%(op_wb)s; %(op_wb)s;
if (traceData) { traceData->setData(getMemData(xc, pkt)); }
} }
return fault; return fault;

View file

@ -38,9 +38,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
mem_flags = makeList(mem_flags) mem_flags = makeList(mem_flags)
inst_flags = makeList(inst_flags) inst_flags = makeList(inst_flags)
# add hook to get effective addresses into execution trace output.
ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
# Some CPU models execute the memory operation as an atomic unit, # Some CPU models execute the memory operation as an atomic unit,
# while others want to separate them into an effective address # while others want to separate them into an effective address
# computation and a memory access operation. As a result, we need # computation and a memory access operation. As a result, we need

View file

@ -166,7 +166,6 @@ def template StoreExecute {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
if (fault == NoFault) { if (fault == NoFault) {
@ -196,7 +195,6 @@ def template StoreInitiateAcc {{
if (fault == NoFault) { if (fault == NoFault) {
fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
memAccessFlags, NULL); memAccessFlags, NULL);
if (traceData) { traceData->setData(Mem); }
} }
// Need to write back any potential address register update // Need to write back any potential address register update

View file

@ -97,9 +97,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
mem_flags = makeList(mem_flags) mem_flags = makeList(mem_flags)
inst_flags = makeList(inst_flags) inst_flags = makeList(inst_flags)
# add hook to get effective addresses into execution trace output.
ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
# Generate InstObjParams for the memory access. # Generate InstObjParams for the memory access.
iop = InstObjParams(name, Name, base_class, iop = InstObjParams(name, Name, base_class,
{'ea_code': ea_code, {'ea_code': ea_code,

View file

@ -443,6 +443,10 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
//The size of the data we're trying to read. //The size of the data we're trying to read.
int dataSize = sizeof(T); int dataSize = sizeof(T);
if (inst->traceData) {
inst->traceData->setAddr(addr);
}
if (inst->split2ndAccess) { if (inst->split2ndAccess) {
dataSize = inst->split2ndSize; dataSize = inst->split2ndSize;
cache_req->splitAccess = true; cache_req->splitAccess = true;
@ -541,6 +545,11 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
//The size of the data we're trying to read. //The size of the data we're trying to read.
int dataSize = sizeof(T); int dataSize = sizeof(T);
if (inst->traceData) {
inst->traceData->setAddr(addr);
inst->traceData->setData(data);
}
if (inst->split2ndAccess) { if (inst->split2ndAccess) {
dataSize = inst->split2ndSize; dataSize = inst->split2ndSize;
cache_req->splitAccess = true; cache_req->splitAccess = true;

View file

@ -351,10 +351,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
} }
} }
// This will need a new way to tell if it has a dcache attached.
if (req->isUncacheable())
recordEvent("Uncached Read");
//If there's a fault, return it //If there's a fault, return it
if (fault != NoFault) { if (fault != NoFault) {
if (req->isPrefetch()) { if (req->isPrefetch()) {
@ -451,6 +447,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
if (traceData) { if (traceData) {
traceData->setAddr(addr); traceData->setAddr(addr);
traceData->setData(data);
} }
//The block size of our peer. //The block size of our peer.
@ -522,20 +519,10 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
} }
} }
// This will need a new way to tell if it's hooked up to a cache or not.
if (req->isUncacheable())
recordEvent("Uncached Write");
//If there's a fault or we don't need to access a second cache line, //If there's a fault or we don't need to access a second cache line,
//stop now. //stop now.
if (fault != NoFault || secondAddr <= addr) if (fault != NoFault || secondAddr <= addr)
{ {
// If the write needs to have a fault on the access, consider
// calling changeStatus() and changing it to "bad addr write"
// or something.
if (traceData) {
traceData->setData(gtoh(data));
}
if (req->isLocked() && fault == NoFault) { if (req->isLocked() && fault == NoFault) {
assert(locked); assert(locked);
locked = false; locked = false;

View file

@ -205,6 +205,27 @@ change_thread_state(ThreadID tid, int activate, int priority)
{ {
} }
void
BaseSimpleCPU::prefetch(Addr addr, unsigned flags)
{
if (traceData) {
traceData->setAddr(addr);
}
// need to do this...
}
void
BaseSimpleCPU::writeHint(Addr addr, int size, unsigned flags)
{
if (traceData) {
traceData->setAddr(addr);
}
// need to do this...
}
Fault Fault
BaseSimpleCPU::copySrcTranslate(Addr src) BaseSimpleCPU::copySrcTranslate(Addr src)
{ {

View file

@ -232,16 +232,8 @@ class BaseSimpleCPU : public BaseCPU
Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n"); Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n");
M5_DUMMY_RETURN} M5_DUMMY_RETURN}
void prefetch(Addr addr, unsigned flags) void prefetch(Addr addr, unsigned flags);
{ void writeHint(Addr addr, int size, unsigned flags);
// need to do this...
}
void writeHint(Addr addr, int size, unsigned flags)
{
// need to do this...
}
Fault copySrcTranslate(Addr src); Fault copySrcTranslate(Addr src);

View file

@ -426,16 +426,16 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
int data_size = sizeof(T); int data_size = sizeof(T);
BaseTLB::Mode mode = BaseTLB::Read; BaseTLB::Mode mode = BaseTLB::Read;
if (traceData) {
traceData->setAddr(addr);
}
RequestPtr req = new Request(asid, addr, data_size, RequestPtr req = new Request(asid, addr, data_size,
flags, pc, _cpuId, tid); flags, pc, _cpuId, tid);
Addr split_addr = roundDown(addr + data_size - 1, block_size); Addr split_addr = roundDown(addr + data_size - 1, block_size);
assert(split_addr <= addr || split_addr - addr < block_size); assert(split_addr <= addr || split_addr - addr < block_size);
// This will need a new way to tell if it's hooked up to a cache or not.
if (req->isUncacheable())
recordEvent("Uncached Write");
_status = DTBWaitResponse; _status = DTBWaitResponse;
if (split_addr > addr) { if (split_addr > addr) {
RequestPtr req1, req2; RequestPtr req1, req2;
@ -460,11 +460,6 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
thread->dtb->translateTiming(req, tc, translation, mode); thread->dtb->translateTiming(req, tc, translation, mode);
} }
if (traceData) {
traceData->setData(data);
traceData->setAddr(addr);
}
return NoFault; return NoFault;
} }
@ -548,16 +543,17 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
int data_size = sizeof(T); int data_size = sizeof(T);
BaseTLB::Mode mode = BaseTLB::Write; BaseTLB::Mode mode = BaseTLB::Write;
if (traceData) {
traceData->setAddr(addr);
traceData->setData(data);
}
RequestPtr req = new Request(asid, addr, data_size, RequestPtr req = new Request(asid, addr, data_size,
flags, pc, _cpuId, tid); flags, pc, _cpuId, tid);
Addr split_addr = roundDown(addr + data_size - 1, block_size); Addr split_addr = roundDown(addr + data_size - 1, block_size);
assert(split_addr <= addr || split_addr - addr < block_size); assert(split_addr <= addr || split_addr - addr < block_size);
// This will need a new way to tell if it's hooked up to a cache or not.
if (req->isUncacheable())
recordEvent("Uncached Write");
T *dataP = new T; T *dataP = new T;
*dataP = TheISA::htog(data); *dataP = TheISA::htog(data);
_status = DTBWaitResponse; _status = DTBWaitResponse;
@ -584,13 +580,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
thread->dtb->translateTiming(req, tc, translation, mode); thread->dtb->translateTiming(req, tc, translation, mode);
} }
if (traceData) { // Translation faults will be returned via finishTranslation()
traceData->setAddr(req->getVaddr());
traceData->setData(data);
}
// If the write needs to have a fault on the access, consider calling
// changeStatus() and changing it to "bad addr write" or something.
return NoFault; return NoFault;
} }

View file

@ -35,6 +35,16 @@
#include "sim/tlb.hh" #include "sim/tlb.hh"
/**
* This class captures the state of an address translation. A translation
* can be split in two if the ISA supports it and the memory access crosses
* a page boundary. In this case, this class is shared by two data
* translations (below). Otherwise it is used by a single data translation
* class. When each part of the translation is finished, the finish
* function is called which will indicate whether the whole translation is
* completed or not. There are also functions for accessing parts of the
* translation state which deal with the possible split correctly.
*/
class WholeTranslationState class WholeTranslationState
{ {
protected: protected:
@ -50,7 +60,10 @@ class WholeTranslationState
uint64_t *res; uint64_t *res;
BaseTLB::Mode mode; BaseTLB::Mode mode;
/** Single translation state. */ /**
* Single translation state. We set the number of outstanding
* translations to one and indicate that it is not split.
*/
WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res, WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res,
BaseTLB::Mode _mode) BaseTLB::Mode _mode)
: outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL), : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL),
@ -60,7 +73,11 @@ class WholeTranslationState
assert(mode == BaseTLB::Read || mode == BaseTLB::Write); assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
} }
/** Split translation state. */ /**
* Split translation state. We copy all state into this class, set the
* number of outstanding translations to two and then mark this as a
* split translation.
*/
WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow, WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow,
RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res, RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res,
BaseTLB::Mode _mode) BaseTLB::Mode _mode)
@ -71,6 +88,13 @@ class WholeTranslationState
assert(mode == BaseTLB::Read || mode == BaseTLB::Write); assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
} }
/**
* Finish part of a translation. If there is only one request then this
* translation is completed. If the request has been split in two then
* the outstanding count determines whether the translation is complete.
* In this case, flags from the split request are copied to the main
* request to make it easier to access them later on.
*/
bool bool
finish(Fault fault, int index) finish(Fault fault, int index)
{ {
@ -89,6 +113,10 @@ class WholeTranslationState
return outstanding == 0; return outstanding == 0;
} }
/**
* Determine whether this translation produced a fault. Both parts of the
* translation must be checked if this is a split translation.
*/
Fault Fault
getFault() const getFault() const
{ {
@ -102,36 +130,54 @@ class WholeTranslationState
return NoFault; return NoFault;
} }
/** Remove all faults from the translation. */
void void
setNoFault() setNoFault()
{ {
faults[0] = faults[1] = NoFault; faults[0] = faults[1] = NoFault;
} }
/**
* Check if this request is uncacheable. We only need to check the main
* request because the flags will have been copied here on a split
* translation.
*/
bool bool
isUncacheable() const isUncacheable() const
{ {
return mainReq->isUncacheable(); return mainReq->isUncacheable();
} }
/**
* Check if this request is a prefetch. We only need to check the main
* request because the flags will have been copied here on a split
* translation.
*/
bool bool
isPrefetch() const isPrefetch() const
{ {
return mainReq->isPrefetch(); return mainReq->isPrefetch();
} }
/** Get the physical address of this request. */
Addr Addr
getPaddr() const getPaddr() const
{ {
return mainReq->getPaddr(); return mainReq->getPaddr();
} }
/**
* Get the flags associated with this request. We only need to access
* the main request because the flags will have been copied here on a
* split translation.
*/
unsigned unsigned
getFlags() getFlags()
{ {
return mainReq->getFlags(); return mainReq->getFlags();
} }
/** Delete all requests that make up this translation. */
void void
deleteReqs() deleteReqs()
{ {
@ -143,6 +189,16 @@ class WholeTranslationState
} }
}; };
/**
* This class represents part of a data address translation. All state for
* the translation is held in WholeTranslationState (above). Therefore this
* class does not need to know whether the translation is split or not. The
* index variable determines this but is simply passed on to the state class.
* When this part of the translation is completed, finish is called. If the
* translation state class indicate that the whole translation is complete
* then the execution context is informed.
*/
template <class ExecContext> template <class ExecContext>
class DataTranslation : public BaseTLB::Translation class DataTranslation : public BaseTLB::Translation
{ {
@ -163,6 +219,10 @@ class DataTranslation : public BaseTLB::Translation
{ {
} }
/**
* Finish this part of the translation and indicate that the whole
* translation is complete if the state says so.
*/
void void
finish(Fault fault, RequestPtr req, ThreadContext *tc, finish(Fault fault, RequestPtr req, ThreadContext *tc,
BaseTLB::Mode mode) BaseTLB::Mode mode)

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,100 +26,96 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/*
* $Id$
*
*/
#include "mem/ruby/profiler/AccessTraceForAddress.hh"
#include "mem/ruby/common/Histogram.hh" #include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/profiler/AccessTraceForAddress.hh"
AccessTraceForAddress::AccessTraceForAddress() AccessTraceForAddress::AccessTraceForAddress()
{ {
m_histogram_ptr = NULL; m_histogram_ptr = NULL;
} }
AccessTraceForAddress::AccessTraceForAddress(const Address& addr) AccessTraceForAddress::AccessTraceForAddress(const Address& addr)
{ {
m_addr = addr; m_addr = addr;
m_total = 0; m_total = 0;
m_loads = 0; m_loads = 0;
m_stores = 0; m_stores = 0;
m_atomics = 0; m_atomics = 0;
m_user = 0; m_user = 0;
m_sharing = 0; m_sharing = 0;
m_histogram_ptr = NULL; m_histogram_ptr = NULL;
} }
AccessTraceForAddress::~AccessTraceForAddress() AccessTraceForAddress::~AccessTraceForAddress()
{ {
if (m_histogram_ptr != NULL) { if (m_histogram_ptr != NULL) {
delete m_histogram_ptr; delete m_histogram_ptr;
m_histogram_ptr = NULL; m_histogram_ptr = NULL;
} }
} }
void AccessTraceForAddress::print(ostream& out) const void
AccessTraceForAddress::print(ostream& out) const
{ {
out << m_addr; out << m_addr;
if (m_histogram_ptr == NULL) { if (m_histogram_ptr == NULL) {
out << " " << m_total; out << " " << m_total;
out << " | " << m_loads; out << " | " << m_loads;
out << " " << m_stores; out << " " << m_stores;
out << " " << m_atomics; out << " " << m_atomics;
out << " | " << m_user; out << " | " << m_user;
out << " " << m_total-m_user; out << " " << m_total-m_user;
out << " | " << m_sharing; out << " | " << m_sharing;
out << " | " << m_touched_by.count(); out << " | " << m_touched_by.count();
} else { } else {
assert(m_total == 0);
out << " " << (*m_histogram_ptr);
}
}
void
AccessTraceForAddress::update(CacheRequestType type,
AccessModeType access_mode, NodeID cpu,
bool sharing_miss)
{
m_touched_by.add(cpu);
m_total++;
if(type == CacheRequestType_ATOMIC) {
m_atomics++;
} else if(type == CacheRequestType_LD){
m_loads++;
} else if (type == CacheRequestType_ST){
m_stores++;
} else {
// ERROR_MSG("Trying to add invalid access to trace");
}
if (access_mode == AccessModeType_UserMode) {
m_user++;
}
if (sharing_miss) {
m_sharing++;
}
}
int
AccessTraceForAddress::getTotal() const
{
if (m_histogram_ptr == NULL) {
return m_total;
} else {
return m_histogram_ptr->getTotal();
}
}
void
AccessTraceForAddress::addSample(int value)
{
assert(m_total == 0); assert(m_total == 0);
out << " " << (*m_histogram_ptr); if (m_histogram_ptr == NULL) {
} m_histogram_ptr = new Histogram;
} }
m_histogram_ptr->add(value);
void AccessTraceForAddress::update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss)
{
m_touched_by.add(cpu);
m_total++;
if(type == CacheRequestType_ATOMIC) {
m_atomics++;
} else if(type == CacheRequestType_LD){
m_loads++;
} else if (type == CacheRequestType_ST){
m_stores++;
} else {
// ERROR_MSG("Trying to add invalid access to trace");
}
if (access_mode == AccessModeType_UserMode) {
m_user++;
}
if (sharing_miss) {
m_sharing++;
}
}
int AccessTraceForAddress::getTotal() const
{
if (m_histogram_ptr == NULL) {
return m_total;
} else {
return m_histogram_ptr->getTotal();
}
}
void AccessTraceForAddress::addSample(int value)
{
assert(m_total == 0);
if (m_histogram_ptr == NULL) {
m_histogram_ptr = new Histogram;
}
m_histogram_ptr->add(value);
}
bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2)
{
return (n1->getTotal() > n2->getTotal());
} }

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,77 +26,60 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/* #ifndef __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__
* $Id$ #define __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__
*
* Description:
*
*/
#ifndef ACCESSTRACEFORADDRESS_H
#define ACCESSTRACEFORADDRESS_H
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/protocol/CacheRequestType.hh"
#include "mem/protocol/AccessModeType.hh" #include "mem/protocol/AccessModeType.hh"
#include "mem/ruby/system/NodeID.hh" #include "mem/protocol/CacheRequestType.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/common/Set.hh" #include "mem/ruby/common/Set.hh"
#include "mem/ruby/system/NodeID.hh"
class Histogram; class Histogram;
class AccessTraceForAddress { class AccessTraceForAddress
public: {
// Constructors public:
AccessTraceForAddress(); AccessTraceForAddress();
explicit AccessTraceForAddress(const Address& addr); explicit AccessTraceForAddress(const Address& addr);
~AccessTraceForAddress();
// Destructor void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu,
~AccessTraceForAddress(); bool sharing_miss);
int getTotal() const;
int getSharing() const { return m_sharing; }
int getTouchedBy() const { return m_touched_by.count(); }
const Address& getAddress() const { return m_addr; }
void addSample(int value);
// Public Methods void print(ostream& out) const;
void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss); private:
int getTotal() const; Address m_addr;
int getSharing() const { return m_sharing; } uint64 m_loads;
int getTouchedBy() const { return m_touched_by.count(); } uint64 m_stores;
const Address& getAddress() const { return m_addr; } uint64 m_atomics;
void addSample(int value); uint64 m_total;
uint64 m_user;
void print(ostream& out) const; uint64 m_sharing;
private: Set m_touched_by;
// Private Methods Histogram* m_histogram_ptr;
// Private copy constructor and assignment operator
// AccessTraceForAddress(const AccessTraceForAddress& obj);
// AccessTraceForAddress& operator=(const AccessTraceForAddress& obj);
// Data Members (m_ prefix)
Address m_addr;
uint64 m_loads;
uint64 m_stores;
uint64 m_atomics;
uint64 m_total;
uint64 m_user;
uint64 m_sharing;
Set m_touched_by;
Histogram* m_histogram_ptr;
}; };
bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2); inline bool
node_less_then_eq(const AccessTraceForAddress* n1,
// Output operator declaration const AccessTraceForAddress* n2)
ostream& operator<<(ostream& out, const AccessTraceForAddress& obj);
// ******************* Definitions *******************
// Output operator definition
extern inline
ostream& operator<<(ostream& out, const AccessTraceForAddress& obj)
{ {
obj.print(out); return n1->getTotal() > n2->getTotal();
out << flush;
return out;
} }
#endif //ACCESSTRACEFORADDRESS_H inline ostream&
operator<<(ostream& out, const AccessTraceForAddress& obj)
{
obj.print(out);
out << flush;
return out;
}
#endif // __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,272 +26,293 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/* #include "mem/gems_common/Map.hh"
* AddressProfiler.cc #include "mem/gems_common/PrioHeap.hh"
*
* Description: See AddressProfiler.hh
*
* $Id$
*
*/
#include "mem/ruby/profiler/AddressProfiler.hh"
#include "mem/protocol/CacheMsg.hh" #include "mem/protocol/CacheMsg.hh"
#include "mem/ruby/profiler/AccessTraceForAddress.hh" #include "mem/ruby/profiler/AccessTraceForAddress.hh"
#include "mem/gems_common/PrioHeap.hh" #include "mem/ruby/profiler/AddressProfiler.hh"
#include "mem/gems_common/Map.hh"
#include "mem/ruby/system/System.hh"
#include "mem/ruby/profiler/Profiler.hh" #include "mem/ruby/profiler/Profiler.hh"
#include "mem/ruby/system/System.hh"
typedef AddressProfiler::AddressMap AddressMap;
// Helper functions // Helper functions
static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, AccessTraceForAddress&
Map<Address, lookupTraceForAddress(const Address& addr, AddressMap* record_map)
AccessTraceForAddress>* record_map); {
if (!record_map->exist(addr)) {
record_map->add(addr, AccessTraceForAddress(addr));
}
return record_map->lookup(addr);
}
static void printSorted(ostream& out, void
int num_of_sequencers, printSorted(ostream& out, int num_of_sequencers, const AddressMap* record_map,
const Map<Address, AccessTraceForAddress>* record_map, string description)
string description); {
const int records_printed = 100;
uint64 misses = 0;
PrioHeap<AccessTraceForAddress*> heap;
Vector<Address> keys = record_map->keys();
for (int i = 0; i < keys.size(); i++) {
AccessTraceForAddress* record = &(record_map->lookup(keys[i]));
misses += record->getTotal();
heap.insert(record);
}
out << "Total_entries_" << description << ": " << keys.size() << endl;
if (g_system_ptr->getProfiler()->getAllInstructions())
out << "Total_Instructions_" << description << ": " << misses << endl;
else
out << "Total_data_misses_" << description << ": " << misses << endl;
out << "total | load store atomic | user supervisor | sharing | touched-by"
<< endl;
Histogram remaining_records(1, 100);
Histogram all_records(1, 100);
Histogram remaining_records_log(-1);
Histogram all_records_log(-1);
// Allows us to track how many lines where touched by n processors
Vector<int64> m_touched_vec;
Vector<int64> m_touched_weighted_vec;
m_touched_vec.setSize(num_of_sequencers+1);
m_touched_weighted_vec.setSize(num_of_sequencers+1);
for (int i = 0; i < m_touched_vec.size(); i++) {
m_touched_vec[i] = 0;
m_touched_weighted_vec[i] = 0;
}
int counter = 0;
while (heap.size() > 0 && counter < records_printed) {
AccessTraceForAddress* record = heap.extractMin();
double percent = 100.0 * (record->getTotal() / double(misses));
out << description << " | " << percent << " % " << *record << endl;
all_records.add(record->getTotal());
all_records_log.add(record->getTotal());
counter++;
m_touched_vec[record->getTouchedBy()]++;
m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
}
while (heap.size() > 0) {
AccessTraceForAddress* record = heap.extractMin();
all_records.add(record->getTotal());
remaining_records.add(record->getTotal());
all_records_log.add(record->getTotal());
remaining_records_log.add(record->getTotal());
m_touched_vec[record->getTouchedBy()]++;
m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
}
out << endl;
out << "all_records_" << description << ": "
<< all_records << endl
<< "all_records_log_" << description << ": "
<< all_records_log << endl
<< "remaining_records_" << description << ": "
<< remaining_records << endl
<< "remaining_records_log_" << description << ": "
<< remaining_records_log << endl
<< "touched_by_" << description << ": "
<< m_touched_vec << endl
<< "touched_by_weighted_" << description << ": "
<< m_touched_weighted_vec << endl
<< endl;
}
AddressProfiler::AddressProfiler(int num_of_sequencers) AddressProfiler::AddressProfiler(int num_of_sequencers)
{ {
m_dataAccessTrace = new Map<Address, AccessTraceForAddress>; m_dataAccessTrace = new AddressMap;
m_macroBlockAccessTrace = new Map<Address, AccessTraceForAddress>; m_macroBlockAccessTrace = new AddressMap;
m_programCounterAccessTrace = new Map<Address, AccessTraceForAddress>; m_programCounterAccessTrace = new AddressMap;
m_retryProfileMap = new Map<Address, AccessTraceForAddress>; m_retryProfileMap = new AddressMap;
m_num_of_sequencers = num_of_sequencers; m_num_of_sequencers = num_of_sequencers;
clearStats(); clearStats();
} }
AddressProfiler::~AddressProfiler() AddressProfiler::~AddressProfiler()
{ {
delete m_dataAccessTrace; delete m_dataAccessTrace;
delete m_macroBlockAccessTrace; delete m_macroBlockAccessTrace;
delete m_programCounterAccessTrace; delete m_programCounterAccessTrace;
delete m_retryProfileMap; delete m_retryProfileMap;
} }
void AddressProfiler::setHotLines(bool hot_lines){ void
m_hot_lines = hot_lines; AddressProfiler::setHotLines(bool hot_lines)
}
void AddressProfiler::setAllInstructions(bool all_instructions){
m_all_instructions = all_instructions;
}
void AddressProfiler::printStats(ostream& out) const
{ {
if (m_hot_lines) { m_hot_lines = hot_lines;
out << endl;
out << "AddressProfiler Stats" << endl;
out << "---------------------" << endl;
out << endl;
out << "sharing_misses: " << m_sharing_miss_counter << endl;
out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl;
out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl;
out << endl;
out << "Hot Data Blocks" << endl;
out << "---------------" << endl;
out << endl;
printSorted(out, m_num_of_sequencers, m_dataAccessTrace, "block_address");
out << endl;
out << "Hot MacroData Blocks" << endl;
out << "--------------------" << endl;
out << endl;
printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, "macroblock_address");
out << "Hot Instructions" << endl;
out << "----------------" << endl;
out << endl;
printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address");
}
if (m_all_instructions){
out << endl;
out << "All Instructions Profile:" << endl;
out << "-------------------------" << endl;
out << endl;
printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address");
out << endl;
}
if (m_retryProfileHisto.size() > 0) {
out << "Retry Profile" << endl;
out << "-------------" << endl;
out << endl;
out << "retry_histogram_absolute: " << m_retryProfileHisto << endl;
out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl;
out << "retry_histogram_read: " << m_retryProfileHistoRead << endl;
out << "retry_histogram_percent: ";
m_retryProfileHisto.printPercent(out);
out << endl;
printSorted(out, m_num_of_sequencers, m_retryProfileMap, "block_address");
out << endl;
}
} }
void AddressProfiler::clearStats() void
AddressProfiler::setAllInstructions(bool all_instructions)
{ {
// Clear the maps m_all_instructions = all_instructions;
m_sharing_miss_counter = 0;
m_dataAccessTrace->clear();
m_macroBlockAccessTrace->clear();
m_programCounterAccessTrace->clear();
m_retryProfileMap->clear();
m_retryProfileHisto.clear();
m_retryProfileHistoRead.clear();
m_retryProfileHistoWrite.clear();
m_getx_sharing_histogram.clear();
m_gets_sharing_histogram.clear();
} }
void AddressProfiler::profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) void
AddressProfiler::printStats(ostream& out) const
{ {
Set indirection_set; if (m_hot_lines) {
indirection_set.addSet(sharers); out << endl;
indirection_set.addSet(owner); out << "AddressProfiler Stats" << endl;
indirection_set.remove(requestor); out << "---------------------" << endl;
int num_indirections = indirection_set.count();
m_getx_sharing_histogram.add(num_indirections); out << endl;
bool indirection_miss = (num_indirections > 0); out << "sharing_misses: " << m_sharing_miss_counter << endl;
out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl;
out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl;
addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), requestor, indirection_miss); out << endl;
} out << "Hot Data Blocks" << endl;
out << "---------------" << endl;
out << endl;
printSorted(out, m_num_of_sequencers, m_dataAccessTrace,
"block_address");
void AddressProfiler::profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) out << endl;
{ out << "Hot MacroData Blocks" << endl;
Set indirection_set; out << "--------------------" << endl;
indirection_set.addSet(owner); out << endl;
indirection_set.remove(requestor); printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace,
int num_indirections = indirection_set.count(); "macroblock_address");
m_gets_sharing_histogram.add(num_indirections); out << "Hot Instructions" << endl;
bool indirection_miss = (num_indirections > 0); out << "----------------" << endl;
out << endl;
addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), requestor, indirection_miss); printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace,
} "pc_address");
void AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss)
{
if (m_all_instructions) {
if (sharing_miss) {
m_sharing_miss_counter++;
} }
// record data address trace info if (m_all_instructions) {
data_addr.makeLineAddress(); out << endl;
lookupTraceForAddress(data_addr, m_dataAccessTrace).update(type, access_mode, id, sharing_miss); out << "All Instructions Profile:" << endl;
out << "-------------------------" << endl;
out << endl;
printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace,
"pc_address");
out << endl;
}
// record macro data address trace info if (m_retryProfileHisto.size() > 0) {
Address macro_addr(data_addr.maskLowOrderBits(10)); // 6 for datablock, 4 to make it 16x more coarse out << "Retry Profile" << endl;
lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).update(type, access_mode, id, sharing_miss); out << "-------------" << endl;
out << endl;
out << "retry_histogram_absolute: " << m_retryProfileHisto << endl;
out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl;
out << "retry_histogram_read: " << m_retryProfileHistoRead << endl;
// record program counter address trace info out << "retry_histogram_percent: ";
lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss); m_retryProfileHisto.printPercent(out);
} out << endl;
if (m_all_instructions) { printSorted(out, m_num_of_sequencers, m_retryProfileMap,
// This code is used if the address profiler is an all-instructions profiler "block_address");
// record program counter address trace info out << endl;
lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss); }
}
} }
void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, int count) void
AddressProfiler::clearStats()
{ {
m_retryProfileHisto.add(count); // Clear the maps
if (type == AccessType_Read) { m_sharing_miss_counter = 0;
m_retryProfileHistoRead.add(count); m_dataAccessTrace->clear();
} else { m_macroBlockAccessTrace->clear();
m_retryProfileHistoWrite.add(count); m_programCounterAccessTrace->clear();
} m_retryProfileMap->clear();
if (count > 1) { m_retryProfileHisto.clear();
lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count); m_retryProfileHistoRead.clear();
} m_retryProfileHistoWrite.clear();
m_getx_sharing_histogram.clear();
m_gets_sharing_histogram.clear();
} }
// ***** Normal Functions ****** void
AddressProfiler::profileGetX(const Address& datablock, const Address& PC,
static void printSorted(ostream& out, const Set& owner, const Set& sharers,
int num_of_sequencers, NodeID requestor)
const Map<Address, AccessTraceForAddress>* record_map,
string description)
{ {
const int records_printed = 100; Set indirection_set;
indirection_set.addSet(sharers);
indirection_set.addSet(owner);
indirection_set.remove(requestor);
int num_indirections = indirection_set.count();
uint64 misses = 0; m_getx_sharing_histogram.add(num_indirections);
PrioHeap<AccessTraceForAddress*> heap; bool indirection_miss = (num_indirections > 0);
Vector<Address> keys = record_map->keys();
for(int i=0; i<keys.size(); i++){
AccessTraceForAddress* record = &(record_map->lookup(keys[i]));
misses += record->getTotal();
heap.insert(record);
}
out << "Total_entries_" << description << ": " << keys.size() << endl; addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0),
if (g_system_ptr->getProfiler()->getAllInstructions()) requestor, indirection_miss);
out << "Total_Instructions_" << description << ": " << misses << endl;
else
out << "Total_data_misses_" << description << ": " << misses << endl;
out << "total | load store atomic | user supervisor | sharing | touched-by" << endl;
Histogram remaining_records(1, 100);
Histogram all_records(1, 100);
Histogram remaining_records_log(-1);
Histogram all_records_log(-1);
// Allows us to track how many lines where touched by n processors
Vector<int64> m_touched_vec;
Vector<int64> m_touched_weighted_vec;
m_touched_vec.setSize(num_of_sequencers+1);
m_touched_weighted_vec.setSize(num_of_sequencers+1);
for (int i=0; i<m_touched_vec.size(); i++) {
m_touched_vec[i] = 0;
m_touched_weighted_vec[i] = 0;
}
int counter = 0;
while((heap.size() > 0) && (counter < records_printed)) {
AccessTraceForAddress* record = heap.extractMin();
double percent = 100.0*(record->getTotal()/double(misses));
out << description << " | " << percent << " % " << *record << endl;
all_records.add(record->getTotal());
all_records_log.add(record->getTotal());
counter++;
m_touched_vec[record->getTouchedBy()]++;
m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
}
while(heap.size() > 0) {
AccessTraceForAddress* record = heap.extractMin();
all_records.add(record->getTotal());
remaining_records.add(record->getTotal());
all_records_log.add(record->getTotal());
remaining_records_log.add(record->getTotal());
m_touched_vec[record->getTouchedBy()]++;
m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
}
out << endl;
out << "all_records_" << description << ": " << all_records << endl;
out << "all_records_log_" << description << ": " << all_records_log << endl;
out << "remaining_records_" << description << ": " << remaining_records << endl;
out << "remaining_records_log_" << description << ": " << remaining_records_log << endl;
out << "touched_by_" << description << ": " << m_touched_vec << endl;
out << "touched_by_weighted_" << description << ": " << m_touched_weighted_vec << endl;
out << endl;
} }
static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, Map<Address, AccessTraceForAddress>* record_map) void
AddressProfiler::profileGetS(const Address& datablock, const Address& PC,
const Set& owner, const Set& sharers,
NodeID requestor)
{ {
if(record_map->exist(addr) == false){ Set indirection_set;
record_map->add(addr, AccessTraceForAddress(addr)); indirection_set.addSet(owner);
} indirection_set.remove(requestor);
return record_map->lookup(addr); int num_indirections = indirection_set.count();
m_gets_sharing_histogram.add(num_indirections);
bool indirection_miss = (num_indirections > 0);
addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0),
requestor, indirection_miss);
}
void
AddressProfiler::addTraceSample(Address data_addr, Address pc_addr,
CacheRequestType type,
AccessModeType access_mode, NodeID id,
bool sharing_miss)
{
if (m_all_instructions) {
if (sharing_miss) {
m_sharing_miss_counter++;
}
// record data address trace info
data_addr.makeLineAddress();
lookupTraceForAddress(data_addr, m_dataAccessTrace).
update(type, access_mode, id, sharing_miss);
// record macro data address trace info
// 6 for datablock, 4 to make it 16x more coarse
Address macro_addr(data_addr.maskLowOrderBits(10));
lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).
update(type, access_mode, id, sharing_miss);
// record program counter address trace info
lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).
update(type, access_mode, id, sharing_miss);
}
if (m_all_instructions) {
// This code is used if the address profiler is an
// all-instructions profiler record program counter address
// trace info
lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).
update(type, access_mode, id, sharing_miss);
}
}
void
AddressProfiler::profileRetry(const Address& data_addr, AccessType type,
int count)
{
m_retryProfileHisto.add(count);
if (type == AccessType_Read) {
m_retryProfileHistoRead.add(count);
} else {
m_retryProfileHistoWrite.add(count);
}
if (count > 1) {
lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count);
}
} }

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,89 +26,77 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/* #ifndef __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__
* AddressProfiler.hh #define __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__
*
* Description:
*
* $Id$
*
*/
#ifndef ADDRESSPROFILER_H
#define ADDRESSPROFILER_H
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/system/NodeID.hh"
#include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/protocol/CacheMsg.hh"
#include "mem/protocol/AccessType.hh" #include "mem/protocol/AccessType.hh"
#include "mem/protocol/CacheMsg.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/system/NodeID.hh"
class AccessTraceForAddress; class AccessTraceForAddress;
class Set; class Set;
template <class KEY_TYPE, class VALUE_TYPE> class Map; template <class KEY_TYPE, class VALUE_TYPE> class Map;
class AddressProfiler { class AddressProfiler
public: {
// Constructors public:
AddressProfiler(int num_of_sequencers); typedef Map<Address, AccessTraceForAddress> AddressMap;
// Destructor public:
~AddressProfiler(); AddressProfiler(int num_of_sequencers);
~AddressProfiler();
// Public Methods void printStats(ostream& out) const;
void printStats(ostream& out) const; void clearStats();
void clearStats();
void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss); void addTraceSample(Address data_addr, Address pc_addr,
void profileRetry(const Address& data_addr, AccessType type, int count); CacheRequestType type, AccessModeType access_mode,
void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); NodeID id, bool sharing_miss);
void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); void profileRetry(const Address& data_addr, AccessType type, int count);
void profileGetX(const Address& datablock, const Address& PC,
const Set& owner, const Set& sharers, NodeID requestor);
void profileGetS(const Address& datablock, const Address& PC,
const Set& owner, const Set& sharers, NodeID requestor);
void print(ostream& out) const; void print(ostream& out) const;
//added by SS //added by SS
void setHotLines(bool hot_lines); void setHotLines(bool hot_lines);
void setAllInstructions(bool all_instructions); void setAllInstructions(bool all_instructions);
private:
// Private Methods
// Private copy constructor and assignment operator private:
AddressProfiler(const AddressProfiler& obj); // Private copy constructor and assignment operator
AddressProfiler& operator=(const AddressProfiler& obj); AddressProfiler(const AddressProfiler& obj);
AddressProfiler& operator=(const AddressProfiler& obj);
// Data Members (m_ prefix) int64 m_sharing_miss_counter;
int64 m_sharing_miss_counter;
Map<Address, AccessTraceForAddress>* m_dataAccessTrace; AddressMap* m_dataAccessTrace;
Map<Address, AccessTraceForAddress>* m_macroBlockAccessTrace; AddressMap* m_macroBlockAccessTrace;
Map<Address, AccessTraceForAddress>* m_programCounterAccessTrace; AddressMap* m_programCounterAccessTrace;
Map<Address, AccessTraceForAddress>* m_retryProfileMap; AddressMap* m_retryProfileMap;
Histogram m_retryProfileHisto; Histogram m_retryProfileHisto;
Histogram m_retryProfileHistoWrite; Histogram m_retryProfileHistoWrite;
Histogram m_retryProfileHistoRead; Histogram m_retryProfileHistoRead;
Histogram m_getx_sharing_histogram; Histogram m_getx_sharing_histogram;
Histogram m_gets_sharing_histogram; Histogram m_gets_sharing_histogram;
//added by SS
bool m_hot_lines;
bool m_all_instructions;
int m_num_of_sequencers; //added by SS
bool m_hot_lines;
bool m_all_instructions;
int m_num_of_sequencers;
}; };
// Output operator declaration inline ostream&
ostream& operator<<(ostream& out, const AddressProfiler& obj); operator<<(ostream& out, const AddressProfiler& obj)
// ******************* Definitions *******************
// Output operator definition
extern inline
ostream& operator<<(ostream& out, const AddressProfiler& obj)
{ {
obj.print(out); obj.print(out);
out << flush; out << flush;
return out; return out;
} }
#endif //ADDRESSPROFILER_H #endif // __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,111 +26,113 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/*
* CacheProfiler.C
*
* Description: See CacheProfiler.hh
*
* $Id$
*
*/
#include "mem/ruby/profiler/CacheProfiler.hh"
#include "mem/ruby/profiler/AccessTraceForAddress.hh"
#include "mem/gems_common/PrioHeap.hh" #include "mem/gems_common/PrioHeap.hh"
#include "mem/ruby/system/System.hh"
#include "mem/ruby/profiler/Profiler.hh"
#include "mem/gems_common/Vector.hh" #include "mem/gems_common/Vector.hh"
#include "mem/ruby/profiler/AccessTraceForAddress.hh"
#include "mem/ruby/profiler/CacheProfiler.hh"
#include "mem/ruby/profiler/Profiler.hh"
#include "mem/ruby/system/System.hh"
CacheProfiler::CacheProfiler(const string& description) CacheProfiler::CacheProfiler(const string& description)
{ {
m_description = description; m_description = description;
m_requestTypeVec_ptr = new Vector<int>; m_requestTypeVec_ptr = new Vector<int>;
m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM)); m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM));
clearStats(); clearStats();
} }
CacheProfiler::~CacheProfiler() CacheProfiler::~CacheProfiler()
{ {
delete m_requestTypeVec_ptr; delete m_requestTypeVec_ptr;
} }
void CacheProfiler::printStats(ostream& out) const void
CacheProfiler::printStats(ostream& out) const
{ {
out << "Cache Stats: " << m_description << endl; out << "Cache Stats: " << m_description << endl;
string description = " " + m_description; string description = " " + m_description;
out << description << "_total_misses: " << m_misses << endl;
out << description << "_total_demand_misses: " << m_demand_misses << endl;
out << description << "_total_prefetches: " << m_prefetches << endl;
out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl;
out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl;
out << endl;
int requests = 0;
for(int i=0; i<int(CacheRequestType_NUM); i++) {
requests += m_requestTypeVec_ptr->ref(i);
}
assert(m_misses == requests);
if (requests > 0) {
for(int i=0; i<int(CacheRequestType_NUM); i++){
if (m_requestTypeVec_ptr->ref(i) > 0) {
out << description << "_request_type_" << CacheRequestType_to_string(CacheRequestType(i)) << ": "
<< (100.0 * double((m_requestTypeVec_ptr->ref(i)))) / double(requests)
<< "%" << endl;
}
}
out << description << "_total_misses: " << m_misses << endl;
out << description << "_total_demand_misses: " << m_demand_misses << endl;
out << description << "_total_prefetches: " << m_prefetches << endl;
out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl;
out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl;
out << endl; out << endl;
for(int i=0; i<AccessModeType_NUM; i++){ int requests = 0;
if (m_accessModeTypeHistogram[i] > 0) {
out << description << "_access_mode_type_" << (AccessModeType) i << ": " << m_accessModeTypeHistogram[i] for (int i = 0; i < int(CacheRequestType_NUM); i++) {
<< " " << (100.0 * m_accessModeTypeHistogram[i]) / requests << "%" << endl; requests += m_requestTypeVec_ptr->ref(i);
}
} }
}
out << description << "_request_size: " << m_requestSize << endl; assert(m_misses == requests);
out << endl;
if (requests > 0) {
for (int i = 0; i < int(CacheRequestType_NUM); i++) {
if (m_requestTypeVec_ptr->ref(i) > 0) {
out << description << "_request_type_"
<< CacheRequestType_to_string(CacheRequestType(i))
<< ": "
<< 100.0 * (double)m_requestTypeVec_ptr->ref(i) /
(double)requests
<< "%" << endl;
}
}
out << endl;
for (int i = 0; i < AccessModeType_NUM; i++){
if (m_accessModeTypeHistogram[i] > 0) {
out << description << "_access_mode_type_"
<< (AccessModeType) i << ": "
<< m_accessModeTypeHistogram[i] << " "
<< 100.0 * m_accessModeTypeHistogram[i] / requests
<< "%" << endl;
}
}
}
out << description << "_request_size: " << m_requestSize << endl;
out << endl;
} }
void CacheProfiler::clearStats() void
CacheProfiler::clearStats()
{ {
for(int i=0; i<int(CacheRequestType_NUM); i++) { for (int i = 0; i < int(CacheRequestType_NUM); i++) {
m_requestTypeVec_ptr->ref(i) = 0; m_requestTypeVec_ptr->ref(i) = 0;
} }
m_requestSize.clear(); m_requestSize.clear();
m_misses = 0; m_misses = 0;
m_demand_misses = 0; m_demand_misses = 0;
m_prefetches = 0; m_prefetches = 0;
m_sw_prefetches = 0; m_sw_prefetches = 0;
m_hw_prefetches = 0; m_hw_prefetches = 0;
for(int i=0; i<AccessModeType_NUM; i++){ for (int i = 0; i < AccessModeType_NUM; i++) {
m_accessModeTypeHistogram[i] = 0; m_accessModeTypeHistogram[i] = 0;
} }
} }
void CacheProfiler::addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit) void
CacheProfiler::addStatSample(CacheRequestType requestType,
AccessModeType type, int msgSize,
PrefetchBit pfBit)
{ {
m_misses++; m_misses++;
m_requestTypeVec_ptr->ref(requestType)++; m_requestTypeVec_ptr->ref(requestType)++;
m_accessModeTypeHistogram[type]++; m_accessModeTypeHistogram[type]++;
m_requestSize.add(msgSize); m_requestSize.add(msgSize);
if (pfBit == PrefetchBit_No) { if (pfBit == PrefetchBit_No) {
m_demand_misses++; m_demand_misses++;
} else if (pfBit == PrefetchBit_Yes) { } else if (pfBit == PrefetchBit_Yes) {
m_prefetches++; m_prefetches++;
m_sw_prefetches++; m_sw_prefetches++;
} else { // must be L1_HW || L2_HW prefetch } else {
m_prefetches++; // must be L1_HW || L2_HW prefetch
m_hw_prefetches++; m_prefetches++;
} m_hw_prefetches++;
}
} }

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,77 +26,58 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/* #ifndef __MEM_RUBY_PROFILER_CACHEPROFILER_HH__
* CacheProfiler.hh #define __MEM_RUBY_PROFILER_CACHEPROFILER_HH__
*
* Description:
*
* $Id$
*
*/
#ifndef CACHEPROFILER_H
#define CACHEPROFILER_H
#include <iostream> #include <iostream>
#include <string> #include <string>
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/system/NodeID.hh"
#include "mem/ruby/common/Histogram.hh"
#include "mem/protocol/AccessModeType.hh" #include "mem/protocol/AccessModeType.hh"
#include "mem/protocol/PrefetchBit.hh"
#include "mem/protocol/CacheRequestType.hh" #include "mem/protocol/CacheRequestType.hh"
#include "mem/protocol/PrefetchBit.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/system/NodeID.hh"
template <class TYPE> class Vector; template <class TYPE> class Vector;
class CacheProfiler { class CacheProfiler
public: {
// Constructors public:
CacheProfiler(const std::string& description); CacheProfiler(const std::string& description);
~CacheProfiler();
// Destructor void printStats(std::ostream& out) const;
~CacheProfiler(); void clearStats();
// Public Methods void addStatSample(CacheRequestType requestType, AccessModeType type,
void printStats(std::ostream& out) const; int msgSize, PrefetchBit pfBit);
void clearStats();
void addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit); void print(std::ostream& out) const;
void print(std::ostream& out) const; private:
private: // Private copy constructor and assignment operator
// Private Methods CacheProfiler(const CacheProfiler& obj);
CacheProfiler& operator=(const CacheProfiler& obj);
// Private copy constructor and assignment operator std::string m_description;
CacheProfiler(const CacheProfiler& obj); Histogram m_requestSize;
CacheProfiler& operator=(const CacheProfiler& obj); int64 m_misses;
int64 m_demand_misses;
int64 m_prefetches;
int64 m_sw_prefetches;
int64 m_hw_prefetches;
int64 m_accessModeTypeHistogram[AccessModeType_NUM];
// Data Members (m_ prefix) Vector <int>* m_requestTypeVec_ptr;
std::string m_description;
Histogram m_requestSize;
int64 m_misses;
int64 m_demand_misses;
int64 m_prefetches;
int64 m_sw_prefetches;
int64 m_hw_prefetches;
int64 m_accessModeTypeHistogram[AccessModeType_NUM];
Vector < int >* m_requestTypeVec_ptr;
}; };
// Output operator declaration inline std::ostream&
std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj); operator<<(std::ostream& out, const CacheProfiler& obj)
// ******************* Definitions *******************
// Output operator definition
extern inline
std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj)
{ {
obj.print(out); obj.print(out);
out << std::flush; out << std::flush;
return out; return out;
} }
#endif //CACHEPROFILER_H #endif // __MEM_RUBY_PROFILER_CACHEPROFILER_HH__

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -32,19 +31,14 @@
using namespace std; using namespace std;
MemCntrlProfiler::MemCntrlProfiler(const string& description, MemCntrlProfiler::MemCntrlProfiler(const string& description,
int banks_per_rank, int banks_per_rank, int ranks_per_dimm, int dimms_per_channel)
int ranks_per_dimm,
int dimms_per_channel)
{ {
m_description = description; m_description = description;
m_banks_per_rank = banks_per_rank; m_banks_per_rank = banks_per_rank;
m_ranks_per_dimm = ranks_per_dimm; m_ranks_per_dimm = ranks_per_dimm;
m_dimms_per_channel = dimms_per_channel; m_dimms_per_channel = dimms_per_channel;
int totalBanks = banks_per_rank * int totalBanks = banks_per_rank * ranks_per_dimm * dimms_per_channel;
ranks_per_dimm *
dimms_per_channel;
m_memBankCount.setSize(totalBanks); m_memBankCount.setSize(totalBanks);
clearStats(); clearStats();
@ -54,50 +48,65 @@ MemCntrlProfiler::~MemCntrlProfiler()
{ {
} }
void MemCntrlProfiler::printStats(ostream& out) const void
MemCntrlProfiler::printStats(ostream& out) const
{ {
if (m_memReq || m_memRefresh) { // if there's a memory controller at all if (!m_memReq && !m_memRefresh) {
uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
double stallsPerReq = total_stalls * 1.0 / m_memReq;
out << "Memory controller: " << m_description << ":" << endl;
out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes
out << " memory_reads: " << m_memRead << endl;
out << " memory_writes: " << m_memWrite << endl;
out << " memory_refreshes: " << m_memRefresh << endl;
out << " memory_total_request_delays: " << total_stalls << endl;
out << " memory_delays_per_request: " << stallsPerReq << endl;
out << " memory_delays_in_input_queue: " << m_memInputQ << endl;
out << " memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl;
out << " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl;
// Note: The following "memory stalls" entries are a breakdown of the
// cycles which already showed up in m_memWaitCycles. The order is
// significant; it is the priority of attributing the cycles.
// For example, bank_busy is before arbitration because if the bank was
// busy, we didn't even check arbitration.
// Note: "not old enough" means that since we grouped waiting heads-of-queues
// into batches to avoid starvation, a request in a newer batch
// didn't try to arbitrate yet because there are older requests waiting.
out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl;
out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl;
out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl;
out << " memory_stalls_for_arbitration: " << m_memArbWait << endl;
out << " memory_stalls_for_bus: " << m_memBusBusy << endl;
out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl;
out << " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl;
out << " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl;
out << " accesses_per_bank: ";
for (int bank=0; bank < m_memBankCount.size(); bank++) {
out << m_memBankCount[bank] << " ";
}
} else {
out << "Memory Controller: " << m_description out << "Memory Controller: " << m_description
<< " no stats recorded." << endl; << " no stats recorded." << endl
} << endl
<< endl;
return;
}
// if there's a memory controller at all
uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
double stallsPerReq = total_stalls * 1.0 / m_memReq;
out << "Memory controller: " << m_description << ":" << endl;
// does not include refreshes
out << " memory_total_requests: " << m_memReq << endl;
out << " memory_reads: " << m_memRead << endl;
out << " memory_writes: " << m_memWrite << endl;
out << " memory_refreshes: " << m_memRefresh << endl;
out << " memory_total_request_delays: " << total_stalls << endl;
out << " memory_delays_per_request: " << stallsPerReq << endl;
out << " memory_delays_in_input_queue: " << m_memInputQ << endl;
out << " memory_delays_behind_head_of_bank_queue: "
<< m_memBankQ << endl;
out << " memory_delays_stalled_at_head_of_bank_queue: "
<< m_memWaitCycles << endl;
// Note: The following "memory stalls" entries are a breakdown of
// the cycles which already showed up in m_memWaitCycles. The
// order is significant; it is the priority of attributing the
// cycles. For example, bank_busy is before arbitration because
// if the bank was busy, we didn't even check arbitration.
// Note: "not old enough" means that since we grouped waiting
// heads-of-queues into batches to avoid starvation, a request in
// a newer batch didn't try to arbitrate yet because there are
// older requests waiting.
out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl;
out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl;
out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl;
out << " memory_stalls_for_arbitration: " << m_memArbWait << endl;
out << " memory_stalls_for_bus: " << m_memBusBusy << endl;
out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl;
out << " memory_stalls_for_read_write_turnaround: "
<< m_memReadWriteBusy << endl;
out << " memory_stalls_for_read_read_turnaround: "
<< m_memDataBusBusy << endl;
out << " accesses_per_bank: ";
for (int bank = 0; bank < m_memBankCount.size(); bank++) {
out << m_memBankCount[bank] << " ";
}
out << endl; out << endl;
out << endl; out << endl;
} }
void MemCntrlProfiler::clearStats() void
MemCntrlProfiler::clearStats()
{ {
m_memReq = 0; m_memReq = 0;
m_memBankBusy = 0; m_memBankBusy = 0;
@ -115,72 +124,100 @@ void MemCntrlProfiler::clearStats()
m_memRandBusy = 0; m_memRandBusy = 0;
m_memNotOld = 0; m_memNotOld = 0;
for (int bank=0; for (int bank = 0; bank < m_memBankCount.size(); bank++) {
bank < m_memBankCount.size();
bank++) {
m_memBankCount[bank] = 0; m_memBankCount[bank] = 0;
} }
} }
void MemCntrlProfiler::profileMemReq(int bank) { void
m_memReq++; MemCntrlProfiler::profileMemReq(int bank)
m_memBankCount[bank]++; {
m_memReq++;
m_memBankCount[bank]++;
} }
void MemCntrlProfiler::profileMemBankBusy() { void
m_memBankBusy++; MemCntrlProfiler::profileMemBankBusy()
{
m_memBankBusy++;
} }
void MemCntrlProfiler::profileMemBusBusy() { void
m_memBusBusy++; MemCntrlProfiler::profileMemBusBusy()
{
m_memBusBusy++;
} }
void MemCntrlProfiler::profileMemReadWriteBusy() { void
m_memReadWriteBusy++; MemCntrlProfiler::profileMemReadWriteBusy()
{
m_memReadWriteBusy++;
} }
void MemCntrlProfiler::profileMemDataBusBusy() { void
m_memDataBusBusy++; MemCntrlProfiler::profileMemDataBusBusy()
{
m_memDataBusBusy++;
} }
void MemCntrlProfiler::profileMemTfawBusy() { void
m_memTfawBusy++; MemCntrlProfiler::profileMemTfawBusy()
{
m_memTfawBusy++;
} }
void MemCntrlProfiler::profileMemRefresh() { void
m_memRefresh++; MemCntrlProfiler::profileMemRefresh()
{
m_memRefresh++;
} }
void MemCntrlProfiler::profileMemRead() { void
m_memRead++; MemCntrlProfiler::profileMemRead()
{
m_memRead++;
} }
void MemCntrlProfiler::profileMemWrite() { void
m_memWrite++; MemCntrlProfiler::profileMemWrite()
{
m_memWrite++;
} }
void MemCntrlProfiler::profileMemWaitCycles(int cycles) { void
m_memWaitCycles += cycles; MemCntrlProfiler::profileMemWaitCycles(int cycles)
{
m_memWaitCycles += cycles;
} }
void MemCntrlProfiler::profileMemInputQ(int cycles) { void
m_memInputQ += cycles; MemCntrlProfiler::profileMemInputQ(int cycles)
{
m_memInputQ += cycles;
} }
void MemCntrlProfiler::profileMemBankQ(int cycles) { void
m_memBankQ += cycles; MemCntrlProfiler::profileMemBankQ(int cycles)
{
m_memBankQ += cycles;
} }
void MemCntrlProfiler::profileMemArbWait(int cycles) { void
m_memArbWait += cycles; MemCntrlProfiler::profileMemArbWait(int cycles)
{
m_memArbWait += cycles;
} }
void MemCntrlProfiler::profileMemRandBusy() { void
m_memRandBusy++; MemCntrlProfiler::profileMemRandBusy()
{
m_memRandBusy++;
} }
void MemCntrlProfiler::profileMemNotOld() { void
m_memNotOld++; MemCntrlProfiler::profileMemNotOld()
{
m_memNotOld++;
} }

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,17 +26,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/* #ifndef __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__
* MemCntrlProfiler.hh #define __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__
*
* Description:
*
* $Id$
*
*/
#ifndef MEM_CNTRL_PROFILER_H
#define MEM_CNTRL_PROFILER_H
#include <iostream> #include <iostream>
#include <string> #include <string>
@ -47,80 +37,67 @@
template <class TYPE> class Vector; template <class TYPE> class Vector;
class MemCntrlProfiler { class MemCntrlProfiler
public: {
// Constructors public:
MemCntrlProfiler(const std::string& description, MemCntrlProfiler(const std::string& description, int banks_per_rank,
int banks_per_rank, int ranks_per_dimm, int dimms_per_channel);
int ranks_per_dimm, ~MemCntrlProfiler();
int dimms_per_channel);
// Destructor void printStats(std::ostream& out) const;
~MemCntrlProfiler(); void clearStats();
// Public Methods void profileMemReq(int bank);
void printStats(std::ostream& out) const; void profileMemBankBusy();
void clearStats(); void profileMemBusBusy();
void profileMemTfawBusy();
void profileMemReadWriteBusy();
void profileMemDataBusBusy();
void profileMemRefresh();
void profileMemRead();
void profileMemWrite();
void profileMemWaitCycles(int cycles);
void profileMemInputQ(int cycles);
void profileMemBankQ(int cycles);
void profileMemArbWait(int cycles);
void profileMemRandBusy();
void profileMemNotOld();
void profileMemReq(int bank); void print(std::ostream& out) const;
void profileMemBankBusy();
void profileMemBusBusy();
void profileMemTfawBusy();
void profileMemReadWriteBusy();
void profileMemDataBusBusy();
void profileMemRefresh();
void profileMemRead();
void profileMemWrite();
void profileMemWaitCycles(int cycles);
void profileMemInputQ(int cycles);
void profileMemBankQ(int cycles);
void profileMemArbWait(int cycles);
void profileMemRandBusy();
void profileMemNotOld();
void print(std::ostream& out) const;
private: private:
// Private Methods // Private copy constructor and assignment operator
MemCntrlProfiler(const MemCntrlProfiler& obj);
MemCntrlProfiler& operator=(const MemCntrlProfiler& obj);
// Private copy constructor and assignment operator std::string m_description;
MemCntrlProfiler(const MemCntrlProfiler& obj); uint64 m_memReq;
MemCntrlProfiler& operator=(const MemCntrlProfiler& obj); uint64 m_memBankBusy;
uint64 m_memBusBusy;
// Data Members (m_ prefix) uint64 m_memTfawBusy;
std::string m_description; uint64 m_memReadWriteBusy;
uint64 m_memReq; uint64 m_memDataBusBusy;
uint64 m_memBankBusy; uint64 m_memRefresh;
uint64 m_memBusBusy; uint64 m_memRead;
uint64 m_memTfawBusy; uint64 m_memWrite;
uint64 m_memReadWriteBusy; uint64 m_memWaitCycles;
uint64 m_memDataBusBusy; uint64 m_memInputQ;
uint64 m_memRefresh; uint64 m_memBankQ;
uint64 m_memRead; uint64 m_memArbWait;
uint64 m_memWrite; uint64 m_memRandBusy;
uint64 m_memWaitCycles; uint64 m_memNotOld;
uint64 m_memInputQ; Vector<uint64> m_memBankCount;
uint64 m_memBankQ; int m_banks_per_rank;
uint64 m_memArbWait; int m_ranks_per_dimm;
uint64 m_memRandBusy; int m_dimms_per_channel;
uint64 m_memNotOld;
Vector<uint64> m_memBankCount;
int m_banks_per_rank;
int m_ranks_per_dimm;
int m_dimms_per_channel;
}; };
// Output operator declaration inline std::ostream&
std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj); operator<<(std::ostream& out, const MemCntrlProfiler& obj)
// ******************* Definitions *******************
// Output operator definition
extern inline
std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj)
{ {
obj.print(out); obj.print(out);
out << std::flush; out << std::flush;
return out; return out;
} }
#endif //MEM_CNTRL_PROFILER_H #endif // __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__

File diff suppressed because it is too large Load diff

View file

@ -42,35 +42,24 @@
---------------------------------------------------------------------- ----------------------------------------------------------------------
*/ */
/* #ifndef __MEM_RUBY_PROFILER_PROFILER_HH__
* Profiler.hh #define __MEM_RUBY_PROFILER_PROFILER_HH__
*
* Description:
*
* $Id$
*
*/
#ifndef PROFILER_H
#define PROFILER_H
#include "mem/ruby/libruby.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/protocol/GenericMachineType.hh"
#include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/common/Consumer.hh"
#include "mem/protocol/AccessModeType.hh" #include "mem/protocol/AccessModeType.hh"
#include "mem/protocol/AccessType.hh" #include "mem/protocol/AccessType.hh"
#include "mem/ruby/system/NodeID.hh" #include "mem/protocol/CacheRequestType.hh"
#include "mem/ruby/system/MachineID.hh" #include "mem/protocol/GenericMachineType.hh"
#include "mem/protocol/GenericRequestType.hh"
#include "mem/protocol/PrefetchBit.hh" #include "mem/protocol/PrefetchBit.hh"
#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Consumer.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/common/Set.hh" #include "mem/ruby/common/Set.hh"
#include "mem/protocol/CacheRequestType.hh" #include "mem/ruby/libruby.hh"
#include "mem/protocol/GenericRequestType.hh" #include "mem/ruby/system/MachineID.hh"
#include "mem/ruby/system/MemoryControl.hh" #include "mem/ruby/system/MemoryControl.hh"
#include "mem/ruby/system/NodeID.hh"
#include "params/RubyProfiler.hh" #include "params/RubyProfiler.hh"
#include "sim/sim_object.hh" #include "sim/sim_object.hh"
@ -79,155 +68,165 @@ class AddressProfiler;
template <class KEY_TYPE, class VALUE_TYPE> class Map; template <class KEY_TYPE, class VALUE_TYPE> class Map;
class Profiler : public SimObject, public Consumer { class Profiler : public SimObject, public Consumer
public: {
// Constructors public:
typedef RubyProfilerParams Params; typedef RubyProfilerParams Params;
Profiler(const Params *); Profiler(const Params *);
~Profiler();
// Destructor void wakeup();
~Profiler();
// Public Methods void setPeriodicStatsFile(const string& filename);
void wakeup(); void setPeriodicStatsInterval(integer_t period);
void setPeriodicStatsFile(const string& filename); void printStats(ostream& out, bool short_stats=false);
void setPeriodicStatsInterval(integer_t period); void printShortStats(ostream& out) { printStats(out, true); }
void printTraceStats(ostream& out) const;
void clearStats();
void printConfig(ostream& out) const;
void printResourceUsage(ostream& out) const;
void printStats(ostream& out, bool short_stats=false); AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
void printShortStats(ostream& out) { printStats(out, true); } AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
void printTraceStats(ostream& out) const;
void clearStats();
void printConfig(ostream& out) const;
void printResourceUsage(ostream& out) const;
AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } void addAddressTraceSample(const CacheMsg& msg, NodeID id);
AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
void addAddressTraceSample(const CacheMsg& msg, NodeID id); void profileRequest(const string& requestStr);
void profileSharing(const Address& addr, AccessType type,
NodeID requestor, const Set& sharers,
const Set& owner);
void profileRequest(const string& requestStr); void profileMulticastRetry(const Address& addr, int count);
void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
void profileMulticastRetry(const Address& addr, int count); void profileFilterAction(int action);
void profileFilterAction(int action); void profileConflictingRequests(const Address& addr);
void profileConflictingRequests(const Address& addr); void
void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); } profileOutstandingRequest(int outstanding)
void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } {
void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } m_outstanding_requests.add(outstanding);
}
void recordPrediction(bool wasGood, bool wasPredicted); void
profileOutstandingPersistentRequest(int outstanding)
{
m_outstanding_persistent_requests.add(outstanding);
}
void startTransaction(int cpu); void
void endTransaction(int cpu); profileAverageLatencyEstimate(int latency)
void profilePFWait(Time waitTime); {
m_average_latency_estimate.add(latency);
}
void controllerBusy(MachineID machID); void recordPrediction(bool wasGood, bool wasPredicted);
void bankBusy();
void missLatency(Time t, RubyRequestType type);
void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
void sequencerRequests(int num) { m_sequencer_requests.add(num); }
void profileTransition(const string& component, NodeID version, Address addr, void startTransaction(int cpu);
const string& state, const string& event, void endTransaction(int cpu);
const string& next_state, const string& note); void profilePFWait(Time waitTime);
void profileMsgDelay(int virtualNetwork, int delayCycles);
void print(ostream& out) const; void controllerBusy(MachineID machID);
void bankBusy();
void missLatency(Time t, RubyRequestType type);
void swPrefetchLatency(Time t, CacheRequestType type,
GenericMachineType respondingMach);
void sequencerRequests(int num) { m_sequencer_requests.add(num); }
void rubyWatch(int proc); void profileTransition(const string& component, NodeID version,
bool watchAddress(Address addr); Address addr, const string& state,
const string& event, const string& next_state,
const string& note);
void profileMsgDelay(int virtualNetwork, int delayCycles);
// return Ruby's start time void print(ostream& out) const;
Time getRubyStartTime(){
return m_ruby_start;
}
//added by SS void rubyWatch(int proc);
bool getHotLines() { return m_hot_lines; } bool watchAddress(Address addr);
bool getAllInstructions() { return m_all_instructions; }
private: // return Ruby's start time
Time
getRubyStartTime()
{
return m_ruby_start;
}
// Private copy constructor and assignment operator // added by SS
Profiler(const Profiler& obj); bool getHotLines() { return m_hot_lines; }
Profiler& operator=(const Profiler& obj); bool getAllInstructions() { return m_all_instructions; }
// Data Members (m_ prefix) private:
AddressProfiler* m_address_profiler_ptr; // Private copy constructor and assignment operator
AddressProfiler* m_inst_profiler_ptr; Profiler(const Profiler& obj);
Profiler& operator=(const Profiler& obj);
Vector<int64> m_instructions_executed_at_start; AddressProfiler* m_address_profiler_ptr;
Vector<int64> m_cycles_executed_at_start; AddressProfiler* m_inst_profiler_ptr;
ostream* m_periodic_output_file_ptr; Vector<int64> m_instructions_executed_at_start;
integer_t m_stats_period; Vector<int64> m_cycles_executed_at_start;
Time m_ruby_start; ostream* m_periodic_output_file_ptr;
time_t m_real_time_start_time; integer_t m_stats_period;
Vector < Vector < integer_t > > m_busyControllerCount; Time m_ruby_start;
integer_t m_busyBankCount; time_t m_real_time_start_time;
Histogram m_multicast_retry_histogram;
Histogram m_filter_action_histogram; Vector <Vector<integer_t> > m_busyControllerCount;
Histogram m_tbeProfile; integer_t m_busyBankCount;
Histogram m_multicast_retry_histogram;
Histogram m_sequencer_requests; Histogram m_filter_action_histogram;
Histogram m_read_sharing_histogram; Histogram m_tbeProfile;
Histogram m_write_sharing_histogram;
Histogram m_all_sharing_histogram;
int64 m_cache_to_cache;
int64 m_memory_to_cache;
Histogram m_prefetchWaitHistogram; Histogram m_sequencer_requests;
Histogram m_read_sharing_histogram;
Histogram m_write_sharing_histogram;
Histogram m_all_sharing_histogram;
int64 m_cache_to_cache;
int64 m_memory_to_cache;
Vector<Histogram> m_missLatencyHistograms; Histogram m_prefetchWaitHistogram;
Vector<Histogram> m_machLatencyHistograms;
Histogram m_allMissLatencyHistogram;
Histogram m_allSWPrefetchLatencyHistogram; Vector<Histogram> m_missLatencyHistograms;
Histogram m_SWPrefetchL2MissLatencyHistogram; Vector<Histogram> m_machLatencyHistograms;
Vector<Histogram> m_SWPrefetchLatencyHistograms; Histogram m_allMissLatencyHistogram;
Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
Histogram m_delayedCyclesHistogram; Histogram m_allSWPrefetchLatencyHistogram;
Histogram m_delayedCyclesNonPFHistogram; Histogram m_SWPrefetchL2MissLatencyHistogram;
Vector<Histogram> m_delayedCyclesVCHistograms; Vector<Histogram> m_SWPrefetchLatencyHistograms;
Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
Histogram m_outstanding_requests; Histogram m_delayedCyclesHistogram;
Histogram m_outstanding_persistent_requests; Histogram m_delayedCyclesNonPFHistogram;
Vector<Histogram> m_delayedCyclesVCHistograms;
Histogram m_average_latency_estimate; Histogram m_outstanding_requests;
Histogram m_outstanding_persistent_requests;
Map<Address, int>* m_watch_address_list_ptr; Histogram m_average_latency_estimate;
// counts all initiated cache request including PUTs
int m_requests;
Map <string, int>* m_requestProfileMap_ptr;
//added by SS Map<Address, int>* m_watch_address_list_ptr;
bool m_hot_lines; // counts all initiated cache request including PUTs
bool m_all_instructions; int m_requests;
Map <string, int>* m_requestProfileMap_ptr;
int m_num_of_sequencers; //added by SS
bool m_hot_lines;
bool m_all_instructions;
int m_num_of_sequencers;
}; };
// Output operator declaration inline ostream&
ostream& operator<<(ostream& out, const Profiler& obj); operator<<(ostream& out, const Profiler& obj)
// ******************* Definitions *******************
// Output operator definition
extern inline
ostream& operator<<(ostream& out, const Profiler& obj)
{ {
obj.print(out); obj.print(out);
out << flush; out << flush;
return out; return out;
} }
#endif //PROFILER_H #endif // __MEM_RUBY_PROFILER_PROFILER_HH__

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,132 +26,130 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/*
* $Id$
*
*/
#include "mem/ruby/profiler/StoreTrace.hh"
#include "mem/ruby/eventqueue/RubyEventQueue.hh" #include "mem/ruby/eventqueue/RubyEventQueue.hh"
#include "mem/ruby/profiler/StoreTrace.hh"
bool StoreTrace::s_init = false; // Total number of store lifetimes of all lines bool StoreTrace::s_init = false; // Total number of store lifetimes of
int64 StoreTrace::s_total_samples = 0; // Total number of store lifetimes of all lines // all lines
int64 StoreTrace::s_total_samples = 0; // Total number of store
// lifetimes of all lines
Histogram* StoreTrace::s_store_count_ptr = NULL; Histogram* StoreTrace::s_store_count_ptr = NULL;
Histogram* StoreTrace::s_store_first_to_stolen_ptr = NULL; Histogram* StoreTrace::s_store_first_to_stolen_ptr = NULL;
Histogram* StoreTrace::s_store_last_to_stolen_ptr = NULL; Histogram* StoreTrace::s_store_last_to_stolen_ptr = NULL;
Histogram* StoreTrace::s_store_first_to_last_ptr = NULL; Histogram* StoreTrace::s_store_first_to_last_ptr = NULL;
StoreTrace::StoreTrace(const Address& addr) : StoreTrace::StoreTrace(const Address& addr)
m_store_count(-1), m_store_first_to_stolen(-1), m_store_last_to_stolen(-1), m_store_first_to_last(-1) : m_store_count(-1), m_store_first_to_stolen(-1),
m_store_last_to_stolen(-1), m_store_first_to_last(-1)
{ {
StoreTrace::initSummary(); StoreTrace::initSummary();
m_addr = addr; m_addr = addr;
m_total_samples = 0; m_total_samples = 0;
m_last_writer = -1; // Really -1 isn't valid, so this will trigger the initilization code
m_stores_this_interval = 0; // Really -1 isn't valid, so this will trigger the initilization code
m_last_writer = -1;
m_stores_this_interval = 0;
} }
StoreTrace::~StoreTrace() StoreTrace::~StoreTrace()
{ {
} }
void StoreTrace::print(ostream& out) const void
StoreTrace::print(ostream& out) const
{ {
out << m_addr; out << m_addr
out << " total_samples: " << m_total_samples << endl; << " total_samples: " << m_total_samples << endl
out << "store_count: " << m_store_count << endl; << "store_count: " << m_store_count << endl
out << "store_first_to_stolen: " << m_store_first_to_stolen << endl; << "store_first_to_stolen: " << m_store_first_to_stolen << endl
out << "store_last_to_stolen: " << m_store_last_to_stolen << endl; << "store_last_to_stolen: " << m_store_last_to_stolen << endl
out << "store_first_to_last: " << m_store_first_to_last << endl; << "store_first_to_last: " << m_store_first_to_last << endl;
} }
// Class method void
void StoreTrace::initSummary() StoreTrace::initSummary()
{ {
if (!s_init) { if (!s_init) {
s_total_samples = 0;
s_store_count_ptr = new Histogram(-1);
s_store_first_to_stolen_ptr = new Histogram(-1);
s_store_last_to_stolen_ptr = new Histogram(-1);
s_store_first_to_last_ptr = new Histogram(-1);
}
s_init = true;
}
void
StoreTrace::printSummary(ostream& out)
{
out << "total_samples: " << s_total_samples << endl;
out << "store_count: " << (*s_store_count_ptr) << endl;
out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl;
out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl;
out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl;
}
void
StoreTrace::clearSummary()
{
StoreTrace::initSummary();
s_total_samples = 0; s_total_samples = 0;
s_store_count_ptr = new Histogram(-1); s_store_count_ptr->clear();
s_store_first_to_stolen_ptr = new Histogram(-1); s_store_first_to_stolen_ptr->clear();
s_store_last_to_stolen_ptr = new Histogram(-1); s_store_last_to_stolen_ptr->clear();
s_store_first_to_last_ptr = new Histogram(-1); s_store_first_to_last_ptr->clear();
}
s_init = true;
} }
// Class method void
void StoreTrace::printSummary(ostream& out) StoreTrace::store(NodeID node)
{ {
out << "total_samples: " << s_total_samples << endl;
out << "store_count: " << (*s_store_count_ptr) << endl;
out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl;
out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl;
out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl;
}
// Class method
void StoreTrace::clearSummary()
{
StoreTrace::initSummary();
s_total_samples = 0;
s_store_count_ptr->clear();
s_store_first_to_stolen_ptr->clear();
s_store_last_to_stolen_ptr->clear();
s_store_first_to_last_ptr->clear();
}
void StoreTrace::store(NodeID node)
{
Time current = g_eventQueue_ptr->getTime();
assert((m_last_writer == -1) || (m_last_writer == node));
m_last_writer = node;
if (m_last_writer == -1) {
assert(m_stores_this_interval == 0);
}
if (m_stores_this_interval == 0) {
// A new proessor just wrote the line, so reset the stats
m_first_store = current;
}
m_last_store = current;
m_stores_this_interval++;
}
void StoreTrace::downgrade(NodeID node)
{
if (node == m_last_writer) {
Time current = g_eventQueue_ptr->getTime(); Time current = g_eventQueue_ptr->getTime();
assert(m_stores_this_interval != 0);
assert(m_last_store != 0);
assert(m_first_store != 0);
assert(m_last_writer != -1);
// Per line stats assert((m_last_writer == -1) || (m_last_writer == node));
m_store_first_to_stolen.add(current - m_first_store);
m_store_count.add(m_stores_this_interval);
m_store_last_to_stolen.add(current - m_last_store);
m_store_first_to_last.add(m_last_store - m_first_store);
m_total_samples++;
// Global stats m_last_writer = node;
assert(s_store_first_to_stolen_ptr != NULL); if (m_last_writer == -1) {
s_store_first_to_stolen_ptr->add(current - m_first_store); assert(m_stores_this_interval == 0);
s_store_count_ptr->add(m_stores_this_interval); }
s_store_last_to_stolen_ptr->add(current - m_last_store);
s_store_first_to_last_ptr->add(m_last_store - m_first_store);
s_total_samples++;
// Initilize for next go round if (m_stores_this_interval == 0) {
m_stores_this_interval = 0; // A new proessor just wrote the line, so reset the stats
m_last_store = 0; m_first_store = current;
m_first_store = 0; }
m_last_writer = -1;
} m_last_store = current;
m_stores_this_interval++;
} }
bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2) void
StoreTrace::downgrade(NodeID node)
{ {
return (n1->getTotal() > n2->getTotal()); if (node == m_last_writer) {
Time current = g_eventQueue_ptr->getTime();
assert(m_stores_this_interval != 0);
assert(m_last_store != 0);
assert(m_first_store != 0);
assert(m_last_writer != -1);
// Per line stats
m_store_first_to_stolen.add(current - m_first_store);
m_store_count.add(m_stores_this_interval);
m_store_last_to_stolen.add(current - m_last_store);
m_store_first_to_last.add(m_last_store - m_first_store);
m_total_samples++;
// Global stats
assert(s_store_first_to_stolen_ptr != NULL);
s_store_first_to_stolen_ptr->add(current - m_first_store);
s_store_count_ptr->add(m_stores_this_interval);
s_store_last_to_stolen_ptr->add(current - m_last_store);
s_store_first_to_last_ptr->add(m_last_store - m_first_store);
s_total_samples++;
// Initilize for next go round
m_stores_this_interval = 0;
m_last_store = 0;
m_first_store = 0;
m_last_writer = -1;
}
} }

View file

@ -1,4 +1,3 @@
/* /*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved. * All rights reserved.
@ -27,82 +26,63 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
/* #ifndef __MEM_RUBY_PROFILER_STORETRACE_HH__
* $Id$ #define __MEM_RUBY_PROFILER_STORETRACE_HH__
*
* Description:
*
*/
#ifndef StoreTrace_H
#define StoreTrace_H
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/common/Histogram.hh" #include "mem/ruby/common/Histogram.hh"
class StoreTrace { class StoreTrace
public: {
// Constructors public:
StoreTrace() { } StoreTrace() { }
explicit StoreTrace(const Address& addr); explicit StoreTrace(const Address& addr);
~StoreTrace();
// Destructor void store(NodeID node);
~StoreTrace(); void downgrade(NodeID node);
int getTotal() const { return m_total_samples; }
static void initSummary();
static void printSummary(ostream& out);
static void clearSummary();
// Public Methods void print(ostream& out) const;
void store(NodeID node);
void downgrade(NodeID node);
int getTotal() const { return m_total_samples; }
static void initSummary();
static void printSummary(ostream& out);
static void clearSummary();
void print(ostream& out) const; private:
private: static bool s_init;
// Private Methods static int64 s_total_samples; // Total number of store lifetimes
// of all lines
static Histogram* s_store_count_ptr;
static Histogram* s_store_first_to_stolen_ptr;
static Histogram* s_store_last_to_stolen_ptr;
static Histogram* s_store_first_to_last_ptr;
// Private copy constructor and assignment operator Address m_addr;
// StoreTrace(const StoreTrace& obj); NodeID m_last_writer;
// StoreTrace& operator=(const StoreTrace& obj); Time m_first_store;
Time m_last_store;
int m_stores_this_interval;
// Class Members (s_ prefix) int64 m_total_samples; // Total number of store lifetimes of this line
static bool s_init; Histogram m_store_count;
static int64 s_total_samples; // Total number of store lifetimes of all lines Histogram m_store_first_to_stolen;
static Histogram* s_store_count_ptr; Histogram m_store_last_to_stolen;
static Histogram* s_store_first_to_stolen_ptr; Histogram m_store_first_to_last;
static Histogram* s_store_last_to_stolen_ptr;
static Histogram* s_store_first_to_last_ptr;
// Data Members (m_ prefix)
Address m_addr;
NodeID m_last_writer;
Time m_first_store;
Time m_last_store;
int m_stores_this_interval;
int64 m_total_samples; // Total number of store lifetimes of this line
Histogram m_store_count;
Histogram m_store_first_to_stolen;
Histogram m_store_last_to_stolen;
Histogram m_store_first_to_last;
}; };
bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2); inline bool
node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2)
// Output operator declaration
ostream& operator<<(ostream& out, const StoreTrace& obj);
// ******************* Definitions *******************
// Output operator definition
extern inline
ostream& operator<<(ostream& out, const StoreTrace& obj)
{ {
obj.print(out); return n1->getTotal() > n2->getTotal();
out << flush;
return out;
} }
#endif //StoreTrace_H inline ostream&
operator<<(ostream& out, const StoreTrace& obj)
{
obj.print(out);
out << flush;
return out;
}
#endif // __MEM_RUBY_PROFILER_STORETRACE_HH__

View file

@ -36,28 +36,29 @@ from subprocess import call
progname = os.path.basename(sys.argv[0]) progname = os.path.basename(sys.argv[0])
optparser = optparse.OptionParser() optparser = optparse.OptionParser()
optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', add_option = optparser.add_option
default=False, add_option('-v', '--verbose', dest='verbose', action='store_true',
help='echo commands before executing') default=False,
optparser.add_option('--builds', dest='builds', help='echo commands before executing')
default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \ add_option('--builds', dest='builds',
'ALPHA_SE_MESI_CMP_directory,' \ default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \
'ALPHA_SE_MOESI_CMP_directory,' \ 'ALPHA_SE_MESI_CMP_directory,' \
'ALPHA_SE_MOESI_CMP_token,' \ 'ALPHA_SE_MOESI_CMP_directory,' \
'ALPHA_FS,MIPS_SE,' \ 'ALPHA_SE_MOESI_CMP_token,' \
'POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE', 'ALPHA_FS,MIPS_SE,POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE',
help='comma-separated list of build targets to test ' help="comma-separated build targets to test (default: '%default')")
" (default: '%default')" ) add_option('--variants', dest='variants', default='fast',
optparser.add_option('--variants', dest='variants', help="comma-separated build variants to test (default: '%default')")
default='fast', add_option('--scons-opts', dest='scons_opts', default='', metavar='OPTS',
help='comma-separated list of build variants to test ' help='scons options')
" (default: '%default')" ) add_option('-j', '--jobs', type='int', default=1,
optparser.add_option('--scons-opts', dest='scons_opts', default='', help='number of parallel jobs to use')
help='scons options', metavar='OPTS') add_option('-k', '--keep-going', action='store_true',
optparser.add_option('-j', '--jobs', type='int', default=1, help='keep going after errors')
help='number of parallel jobs to use') add_option('-D', '--build-dir', default='',
optparser.add_option('-k', '--keep-going', action='store_true', help='build directory location')
help='keep going after errors') add_option('-n', "--no-exec", default=False, action='store_true',
help="don't actually invoke scons, just echo SCons command line")
(options, tests) = optparser.parse_args() (options, tests) = optparser.parse_args()
@ -66,6 +67,8 @@ optparser.add_option('-k', '--keep-going', action='store_true',
builds = options.builds.split(',') builds = options.builds.split(',')
variants = options.variants.split(',') variants = options.variants.split(',')
options.build_dir = os.path.join(options.build_dir, 'build')
# Call os.system() and raise exception if return status is non-zero # Call os.system() and raise exception if return status is non-zero
def system(cmd): def system(cmd):
try: try:
@ -91,11 +94,11 @@ def shellquote(s):
if not tests: if not tests:
print "No tests specified, just building binaries." print "No tests specified, just building binaries."
targets = ['build/%s/m5.%s' % (build, variant) targets = ['%s/%s/m5.%s' % (options.build_dir, build, variant)
for build in builds for build in builds
for variant in variants] for variant in variants]
elif 'all' in tests: elif 'all' in tests:
targets = ['build/%s/tests/%s' % (build, variant) targets = ['%s/%s/tests/%s' % (options.build_dir, build, variant)
for build in builds for build in builds
for variant in variants] for variant in variants]
else: else:
@ -103,17 +106,36 @@ else:
# If we ever get a quick SPARC_FS test, this code should be removed # If we ever get a quick SPARC_FS test, this code should be removed
if 'quick' in tests and 'SPARC_FS' in builds: if 'quick' in tests and 'SPARC_FS' in builds:
builds.remove('SPARC_FS') builds.remove('SPARC_FS')
targets = ['build/%s/tests/%s/%s' % (build, variant, test) targets = ['%s/%s/tests/%s/%s' % (options.build_dir, build, variant, test)
for build in builds for build in builds
for variant in variants for variant in variants
for test in tests] for test in tests]
def cpu_count():
if 'bsd' in sys.platform or sys.platform == 'darwin':
try:
return int(os.popen('sysctl -n hw.ncpu').read())
except ValueError:
pass
else:
try:
return os.sysconf('SC_NPROCESSORS_ONLN')
except (ValueError, OSError, AttributeError):
pass
raise NotImplementedError('cannot determine number of cpus')
scons_opts = options.scons_opts scons_opts = options.scons_opts
if options.jobs != 1: if options.jobs != 1:
if options.jobs == 0:
options.jobs = cpu_count()
scons_opts += ' -j %d' % options.jobs scons_opts += ' -j %d' % options.jobs
if options.keep_going: if options.keep_going:
scons_opts += ' -k' scons_opts += ' -k'
system('scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets))) cmd = 'scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets))
if options.no_exec:
sys.exit(0) print cmd
else:
system(cmd)
sys.exit(0)