Merge yet again with the main repository.

This commit is contained in:
Gabe Black 2012-01-16 04:27:10 -08:00
commit da2a4acc26
190 changed files with 5557 additions and 5001 deletions

View file

@ -663,10 +663,14 @@ if not py_getvar('Py_ENABLE_SHARED'):
py_libs = []
for lib in py_getvar('LIBS').split() + py_getvar('SYSLIBS').split():
assert lib.startswith('-l')
lib = lib[2:]
if lib not in py_libs:
py_libs.append(lib)
if not lib.startswith('-l'):
# Python requires some special flags to link (e.g. -framework
# common on OS X systems), assume appending preserves order
main.Append(LINKFLAGS=[lib])
else:
lib = lib[2:]
if lib not in py_libs:
py_libs.append(lib)
py_libs.append(py_version)
main.Append(CPPPATH=py_includes)

View file

@ -483,6 +483,15 @@ def makeDualRoot(testSystem, driveSystem, dumpfile):
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
if hasattr(testSystem, 'realview'):
self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
self.etherlink.int1 = Parent.drivesys.realview.ethernet.interface
elif hasattr(testSystem, 'tsunami'):
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
else:
fatal("Don't know how to connect these system together")
if dumpfile:
self.etherdump = EtherDump(file=dumpfile)
self.etherlink.dump = Parent.etherdump

View file

@ -27,7 +27,7 @@
# Authors: Lisa Hsu
# system options
parser.add_option("-c", "--cpu-type", type="choice", default="atomic",
parser.add_option("--cpu-type", type="choice", default="atomic",
choices = ["atomic", "timing", "detailed", "inorder"],
help = "type of cpu to run with")
parser.add_option("-n", "--num-cpus", type="int", default=1)
@ -63,7 +63,8 @@ parser.add_option("--work-end-exit-count", action="store", type="int",
help="exit at specified work end count")
parser.add_option("--work-begin-exit-count", action="store", type="int",
help="exit at specified work begin count")
parser.add_option("--init-param", action="store", type="int", default=0,
help="Parameter available in simulation with m5 initparam")
# Checkpointing options
###Note that performing checkpointing via python script files will override
@ -84,6 +85,10 @@ parser.add_option("--work-end-checkpoint-count", action="store", type="int",
help="checkpoint at specified work end count")
parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
help="checkpoint and exit when active cpu count is reached")
parser.add_option("--restore-with-cpu", action="store", type="choice",
default="atomic", choices = ["atomic", "timing",
"detailed", "inorder"],
help = "cpu type for restoring from a checkpoint")
# CPU Switching - default switch model goes from a checkpoint

View file

@ -60,7 +60,15 @@ def setCPUClass(options):
test_mem_mode = 'atomic'
if not atomic:
if options.checkpoint_restore != None or options.fast_forward:
if options.checkpoint_restore != None:
if options.restore_with_cpu != options.cpu_type:
CPUClass = TmpClass
class TmpClass(AtomicSimpleCPU): pass
else:
if options.restore_with_cpu != "atomic":
test_mem_mode = 'timing'
elif options.fast_forward:
CPUClass = TmpClass
class TmpClass(AtomicSimpleCPU): pass
else:

View file

@ -731,7 +731,7 @@ class vpr_route(vpr):
'-first_iter_pres_fac', '4', '-initial_pres_fac', '8' ]
output = 'route_log.out'
all = [ ammp, applu, apsi, art110, art470, equake, facerec, fma3d, galgel,
all = [ ammp, applu, apsi, art, art110, art470, equake, facerec, fma3d, galgel,
lucas, mesa, mgrid, sixtrack, swim, wupwise, bzip2_source,
bzip2_graphic, bzip2_program, crafty, eon_kajiya, eon_cook,
eon_rushmeier, gap, gcc_166, gcc_200, gcc_expr, gcc_integrate,

View file

@ -151,6 +151,8 @@ if options.kernel is not None:
if options.script is not None:
test_sys.readfile = options.script
test_sys.init_param = options.init_param
test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)]
CacheConfig.config_cache(options, test_sys)
@ -190,8 +192,8 @@ if len(bm) == 2:
elif buildEnv['TARGET_ISA'] == 'x86':
drive_sys = makeX86System(drive_mem_mode, np, bm[1])
elif buildEnv['TARGET_ISA'] == 'arm':
drive_sys = makeArmSystem(drive_mem_mode,
machine_options.machine_type, bm[1])
drive_sys = makeArmSystem(drive_mem_mode, options.machine_type, bm[1])
drive_sys.cpu = DriveCPUClass(cpu_id=0)
drive_sys.cpu.connectAllPorts(drive_sys.membus)
if options.fastmem:
@ -199,6 +201,7 @@ if len(bm) == 2:
if options.kernel is not None:
drive_sys.kernel = binary(options.kernel)
drive_sys.init_param = options.init_param
root = makeDualRoot(test_sys, drive_sys, options.etherdump)
elif len(bm) == 1:
root = Root(system=test_sys)

View file

@ -67,7 +67,8 @@ def define_options(parser):
def create_system(options, system, piobus = None, dma_devices = []):
system.ruby = RubySystem(clock = options.clock,
stats_filename = options.ruby_stats)
stats_filename = options.ruby_stats,
no_mem_vec = options.use_map)
ruby = system.ruby
protocol = buildEnv['PROTOCOL']
@ -154,11 +155,8 @@ def create_system(options, system, piobus = None, dma_devices = []):
ruby_profiler = RubyProfiler(ruby_system = ruby,
num_of_sequencers = len(cpu_sequencers))
ruby_tracer = RubyTracer(ruby_system = ruby)
ruby.network = network
ruby.profiler = ruby_profiler
ruby.tracer = ruby_tracer
ruby.mem_size = total_mem_size
ruby._cpu_ruby_ports = cpu_sequencers
ruby.random_seed = options.random_seed

View file

@ -91,7 +91,7 @@ ElfFile('libelf_msize.c')
m4env = main.Clone()
if m4env['GCC']:
major,minor,dot = [ int(x) for x in m4env['CXXVERSION'].split('.')]
major,minor,dot = [int(x) for x in m4env['GCC_VERSION'].split('.')]
if major >= 4:
m4env.Append(CCFLAGS=['-Wno-pointer-sign'])
m4env.Append(CCFLAGS=['-Wno-implicit'])

View file

@ -851,8 +851,8 @@ def makeEnv(label, objsfx, strip = False, **kwargs):
swig_env.Append(CCFLAGS='-Wno-uninitialized')
swig_env.Append(CCFLAGS='-Wno-sign-compare')
swig_env.Append(CCFLAGS='-Wno-parentheses')
swig_env.Append(CCFLAGS='-Wno-unused-label')
if compareVersions(env['GCC_VERSION'], '4.6.0') != -1:
swig_env.Append(CCFLAGS='-Wno-unused-label')
swig_env.Append(CCFLAGS='-Wno-unused-but-set-variable')
werror_env = new_env.Clone()

View file

@ -190,12 +190,15 @@ let {{
exec_output += PredOpExecute.subst(loadsymbolIop)
initparamCode = '''
Rt = PseudoInst::initParam(xc->tcBase());
uint64_t ip_val = PseudoInst::initParam(xc->tcBase());
R0 = bits(ip_val, 31, 0);
R1 = bits(ip_val, 63, 32);
'''
initparamIop = InstObjParams("initparam", "Initparam", "PredOp",
{ "code": initparamCode,
"predicate_test": predicateTest })
"predicate_test": predicateTest },
["IsNonSpeculative"])
header_output += BasicDeclare.subst(initparamIop)
decoder_output += BasicConstructor.subst(initparamIop)
exec_output += PredOpExecute.subst(initparamIop)

View file

@ -29,6 +29,8 @@
* Authors: Gabe Black
* Korey Sewell
* Jaidev Patwardhan
* Zhengxing Li
* Deyuan Guo
*/
#include "arch/mips/faults.hh"
@ -118,7 +120,7 @@ MipsFaultBase::setExceptionState(ThreadContext *tc, uint8_t excCode)
DPRINTF(MipsPRA, "PC: %s\n", pc);
bool delay_slot = pc.pc() + sizeof(MachInst) != pc.npc();
tc->setMiscRegNoEffect(MISCREG_EPC,
pc.pc() - delay_slot ? sizeof(MachInst) : 0);
pc.pc() - (delay_slot ? sizeof(MachInst) : 0));
// Set Cause_EXCCODE field
CauseReg cause = tc->readMiscReg(MISCREG_CAUSE);

View file

@ -29,6 +29,8 @@
* Authors: Gabe Black
* Korey Sewell
* Jaidev Patwardhan
* Zhengxing Li
* Deyuan Guo
*/
#ifndef __MIPS_FAULTS_HH__
@ -88,7 +90,7 @@ class MipsFaultBase : public FaultBase
virtual FaultVect base(ThreadContext *tc) const
{
StatusReg status = tc->readMiscReg(MISCREG_STATUS);
if (status.bev)
if (!status.bev)
return tc->readMiscReg(MISCREG_EBASE);
else
return 0xbfc00200;
@ -167,7 +169,7 @@ class CoprocessorUnusableFault : public MipsFault<CoprocessorUnusableFault>
if (FullSystem) {
CauseReg cause = tc->readMiscReg(MISCREG_CAUSE);
cause.ce = coProcID;
tc->setMiscReg(MISCREG_CAUSE, cause);
tc->setMiscRegNoEffect(MISCREG_CAUSE, cause);
}
}
};
@ -179,7 +181,8 @@ class InterruptFault : public MipsFault<InterruptFault>
offset(ThreadContext *tc) const
{
CauseReg cause = tc->readMiscRegNoEffect(MISCREG_CAUSE);
return cause.iv ? 0x200 : 0x000;
// offset 0x200 for release 2, 0x180 for release 1.
return cause.iv ? 0x200 : 0x180;
}
};
@ -251,9 +254,10 @@ class TlbFault : public AddressFault<T>
StaticInstPtr inst = StaticInst::nullStaticInstPtr)
{
if (FullSystem) {
DPRINTF(MipsPRA, "Fault %s encountered.\n", name());
tc->pcState(this->vect(tc));
DPRINTF(MipsPRA, "Fault %s encountered.\n", this->name());
Addr vect = this->vect(tc);
setTlbExceptionState(tc, this->code());
tc->pcState(vect);
} else {
AddressFault<T>::invoke(tc, inst);
}

View file

@ -1253,7 +1253,7 @@ decode OPCODE_HI default Unknown::unknown() {
//When rs=L1
//Note: "1. Format type L is legal only if 64-bit
//floating point operations are enabled."
0x5: decode FUNCTION_HI {
0x5: decode FUNCTION {
format FloatConvertOp {
0x20: cvt_s_l({{ val = Fs_ud; }}, ToSingle);
0x21: cvt_d_l({{ val = Fs_ud; }}, ToDouble);

View file

@ -55,7 +55,7 @@ unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
strcpy(name->sysname, "Linux");
strcpy(name->nodename,"m5.eecs.umich.edu");
strcpy(name->release, "2.4.20");
strcpy(name->release, "2.6.35");
strcpy(name->version, "#1 Mon Aug 18 11:32:15 EDT 2003");
strcpy(name->machine, "mips");

View file

@ -55,7 +55,7 @@ const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; //HI & LO Regs
const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;//
const uint32_t MIPS32_QNAN = 0x7fbfffff;
const uint64_t MIPS64_QNAN = ULL(0x7fbfffffffffffff);
const uint64_t MIPS64_QNAN = ULL(0x7ff7ffffffffffff);
enum FPControlRegNums {
FLOATREG_FIR = NumFloatArchRegs,

View file

@ -29,6 +29,8 @@
* Authors: Nathan Binkert
* Steve Reinhardt
* Jaidev Patwardhan
* Zhengxing Li
* Deyuan Guo
*/
#include <string>
@ -310,18 +312,6 @@ Fault
TLB::translateData(RequestPtr req, ThreadContext *tc, bool write)
{
if (!FullSystem) {
//@TODO: This should actually use TLB instead of going directly
// to the page table in syscall mode.
/**
* Check for alignment faults
*/
if (req->getVaddr() & (req->getSize() - 1)) {
DPRINTF(TLB, "Alignment Fault on %#x, size = %d", req->getVaddr(),
req->getSize());
return new AddressErrorFault(req->getVaddr(), write);
}
Process * p = tc->getProcessPtr();
Fault fault = p->pTable->translate(req);

View file

@ -42,26 +42,34 @@ microcode = '''
def macroop IN_R_I {
.adjust_imm trimImm(8)
limm t1, imm, dataSize=asz
mfence
ld reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \
nonSpec=True
mfence
};
def macroop IN_R_R {
zexti t2, regm, 15, dataSize=8
mfence
ld reg, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
nonSpec=True
mfence
};
def macroop OUT_I_R {
.adjust_imm trimImm(8)
limm t1, imm, dataSize=8
mfence
st reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \
nonSpec=True
mfence
};
def macroop OUT_R_R {
zexti t2, reg, 15, dataSize=8
mfence
st regm, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
nonSpec=True
mfence
};
'''

View file

@ -45,9 +45,11 @@ def macroop INS_M_R {
zexti t2, reg, 15, dataSize=8
mfence
ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
nonSpec=True
st t6, es, [1, t0, rdi]
mfence
add rdi, rdi, t3, dataSize=asz
};
@ -63,6 +65,7 @@ def macroop INS_E_M_R {
zexti t2, reg, 15, dataSize=8
mfence
topOfLoop:
ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
nonSpec=True
@ -72,6 +75,7 @@ topOfLoop:
add rdi, rdi, t3, dataSize=asz
br label("topOfLoop"), flags=(nCEZF,)
end:
mfence
fault "NoFault"
};
@ -84,9 +88,11 @@ def macroop OUTS_R_M {
zexti t2, reg, 15, dataSize=8
mfence
ld t6, ds, [1, t0, rsi]
st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
nonSpec=True
mfence
add rsi, rsi, t3, dataSize=asz
};
@ -102,6 +108,7 @@ def macroop OUTS_E_R_M {
zexti t2, reg, 15, dataSize=8
mfence
topOfLoop:
ld t6, ds, [1, t0, rsi]
st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
@ -111,6 +118,7 @@ topOfLoop:
add rsi, rsi, t3, dataSize=asz
br label("topOfLoop"), flags=(nCEZF,)
end:
mfence
fault "NoFault"
};
'''

View file

@ -30,6 +30,12 @@
#include <unistd.h>
#ifdef __APPLE__
#include <mach/mach_init.h>
#include <mach/shared_region.h>
#include <mach/task.h>
#endif
#include <cctype>
#include <cerrno>
#include <cmath>
@ -82,7 +88,31 @@ procInfo(const char *filename, const char *target)
}
if (fp)
fclose(fp);
fclose(fp);
return 0;
}
uint64_t
memUsage()
{
// For the Mach-based Darwin kernel, use the task_info of the self task
#ifdef __APPLE__
struct task_basic_info t_info;
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
if (KERN_SUCCESS != task_info(mach_task_self(),
TASK_BASIC_INFO, (task_info_t)&t_info,
&t_info_count)) {
return 0;
}
// Mimic Darwin's implementation of top and subtract
// SHARED_REGION_SIZE from the tasks virtual size to account for the
// shared memory submap that is incorporated into every process.
return (t_info.virtual_size - SHARED_REGION_SIZE) / 1024;
#else
// Linux implementation
return procInfo("/proc/self/status", "VmSize:");
#endif
}

View file

@ -39,7 +39,11 @@ std::string &hostname();
uint64_t procInfo(const char *filename, const char *target);
inline uint64_t memUsage()
{ return procInfo("/proc/self/status", "VmSize:"); }
/**
* Determine the simulator process' total virtual memory usage.
*
* @return virtual memory usage in kilobytes
*/
uint64_t memUsage();
#endif // __HOSTINFO_HH__

View file

@ -29,6 +29,7 @@
* Ali Saidi
*/
#include <limits>
#include "base/fenv.hh"
#include "base/intmath.hh"
#include "base/misc.hh"
@ -67,7 +68,10 @@ Random::genrand(uint32_t max)
{
if (max == 0)
return 0;
int log = ceilLog2(max) + 1;
if (max == std::numeric_limits<uint32_t>::max())
return genrand();
int log = ceilLog2(max + 1);
int shift = (sizeof(uint32_t) * 8 - log);
uint32_t random;
@ -83,7 +87,10 @@ Random::genrand(uint64_t max)
{
if (max == 0)
return 0;
int log = ceilLog2(max) + 1;
if (max == std::numeric_limits<uint64_t>::max())
return genrand();
int log = ceilLog2(max + 1);
int shift = (sizeof(uint64_t) * 8 - log);
uint64_t random;

View file

@ -1477,6 +1477,8 @@ class HistStor
/** The current sum. */
Counter sum;
/** The sum of logarithm of each sample, used to compute geometric mean. */
Counter logs;
/** The sum of squares. */
Counter squares;
/** The number of samples. */
@ -1528,6 +1530,7 @@ class HistStor
sum += val * number;
squares += val * val * number;
logs += log(val) * number;
samples += number;
}
@ -1567,6 +1570,7 @@ class HistStor
data.cvec[i] = cvec[i];
data.sum = sum;
data.logs = logs;
data.squares = squares;
data.samples = samples;
}
@ -1589,6 +1593,7 @@ class HistStor
sum = Counter();
squares = Counter();
samples = Counter();
logs = Counter();
}
};

View file

@ -183,6 +183,7 @@ struct DistData
VCounter cvec;
Counter sum;
Counter squares;
Counter logs;
Counter samples;
};

View file

@ -367,6 +367,12 @@ DistPrint::operator()(ostream &stream) const
print.value = data.samples ? data.sum / data.samples : NAN;
print(stream);
if (data.type == Hist) {
print.name = base + "gmean";
print.value = data.samples ? exp(data.logs / data.samples) : NAN;
print(stream);
}
Result stdev = NAN;
if (data.samples)
stdev = sqrt((data.samples * data.squares - data.sum * data.sum) /
@ -507,7 +513,14 @@ Text::visit(const Vector2dInfo &info)
bool havesub = false;
VectorPrint print;
print.subnames = info.y_subnames;
if (!info.y_subnames.empty()) {
for (off_type i = 0; i < info.y; ++i) {
if (!info.y_subnames[i].empty()) {
print.subnames = info.y_subnames;
}
break;
}
}
print.flags = info.flags;
print.separatorString = info.separatorString;
print.descriptions = descriptions;

View file

@ -284,17 +284,16 @@ class BaseCPU : public MemObject
void enableFunctionTrace();
void traceFunctionsInternal(Addr pc);
protected:
private:
static std::vector<BaseCPU *> cpuList; //!< Static global cpu list
public:
void traceFunctions(Addr pc)
{
if (functionTracingEnabled)
traceFunctionsInternal(pc);
}
private:
static std::vector<BaseCPU *> cpuList; //!< Static global cpu list
public:
static int numSimulatedCPUs() { return cpuList.size(); }
static Counter numSimulatedInstructions()
{

View file

@ -299,6 +299,7 @@ MultDivUnit::exeMulDiv(int slot_num)
}
mult_div_req->setProcessing(false);
cpu->wakeCPU();
}
void

View file

@ -51,6 +51,7 @@
#include "config/use_checker.hh"
#include "cpu/o3/commit.hh"
#include "cpu/o3/thread_state.hh"
#include "cpu/base.hh"
#include "cpu/exetrace.hh"
#include "cpu/timebuf.hh"
#include "debug/Activity.hh"
@ -987,6 +988,8 @@ DefaultCommit<Impl>::commitInsts()
// Updates misc. registers.
head_inst->updateMiscRegs();
cpu->traceFunctions(pc[tid].instAddr());
TheISA::advancePC(pc[tid], head_inst->staticInst);
// Keep track of the last sequence number commited

View file

@ -446,10 +446,6 @@ void
DefaultDecode<Impl>::sortInsts()
{
int insts_from_fetch = fromFetch->size;
#ifdef DEBUG
for (ThreadID tid = 0; tid < numThreads; tid++)
assert(insts[tid].empty());
#endif
for (int i = 0; i < insts_from_fetch; ++i) {
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
}

View file

@ -1340,10 +1340,10 @@ DefaultIEW<Impl>::executeInsts()
fetchRedirect[tid] = true;
DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n",
inst->predInstAddr(), inst->predNextInstAddr());
DPRINTF(IEW, "Predicted target was PC: %s.\n",
inst->readPredTarg());
DPRINTF(IEW, "Execute: Redirecting fetch to PC: %s.\n",
inst->pcState(), inst->nextInstAddr());
inst->pcState());
// If incorrect, then signal the ROB that it must be squashed.
squashDueToBranch(inst, tid);

View file

@ -766,10 +766,6 @@ void
DefaultRename<Impl>::sortInsts()
{
int insts_from_decode = fromDecode->size;
#ifdef DEBUG
for (ThreadID tid = 0; tid < numThreads; tid++)
assert(insts[tid].empty());
#endif
for (int i = 0; i < insts_from_decode; ++i) {
DynInstPtr inst = fromDecode->insts[i];
insts[inst->threadNumber].push_back(inst);

View file

@ -83,7 +83,9 @@ PCEventQueue::schedule(PCEvent *event)
bool
PCEventQueue::doService(ThreadContext *tc)
{
Addr pc = tc->instAddr() & ~0x3;
// This will fail to break on Alpha PALcode addresses, but that is
// a rare use case.
Addr pc = tc->instAddr();
int serviced = 0;
range_t range = equal_range(pc);
for (iterator i = range.first; i != range.second; ++i) {
@ -91,7 +93,7 @@ PCEventQueue::doService(ThreadContext *tc)
// another event. This for example, prevents two invocations
// of the SkipFuncEvent. Maybe we should have separate PC
// event queues for each processor?
if (pc != (tc->instAddr() & ~0x3))
if (pc != tc->instAddr())
continue;
DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n",

View file

@ -43,7 +43,7 @@ class DirectedGenerator : public SimObject
virtual ~DirectedGenerator() {}
virtual bool initiate() = 0;
virtual void performCallback(uint proc, Addr address) = 0;
virtual void performCallback(uint32_t proc, Addr address) = 0;
void setDirectedTester(RubyDirectedTester* directed_tester);

View file

@ -103,7 +103,7 @@ InvalidateGenerator::initiate()
}
void
InvalidateGenerator::performCallback(uint proc, Addr address)
InvalidateGenerator::performCallback(uint32_t proc, Addr address)
{
assert(m_address == address);

View file

@ -49,14 +49,14 @@ class InvalidateGenerator : public DirectedGenerator
~InvalidateGenerator();
bool initiate();
void performCallback(uint proc, Addr address);
void performCallback(uint32_t proc, Addr address);
private:
InvalidateGeneratorStatus m_status;
Addr m_address;
uint m_active_read_node;
uint m_active_inv_node;
uint m_addr_increment_size;
uint32_t m_active_read_node;
uint32_t m_active_inv_node;
uint32_t m_addr_increment_size;
};
#endif //__CPU_DIRECTEDTEST_INVALIDATEGENERATOR_HH__

View file

@ -53,11 +53,11 @@ class RubyDirectedTester : public MemObject
RubyDirectedTester *tester;
public:
CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint _idx)
CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint32_t _idx)
: SimpleTimingPort(_name, _tester), tester(_tester), idx(_idx)
{}
uint idx;
uint32_t idx;
protected:
virtual bool recvTiming(PacketPtr pkt);

View file

@ -89,7 +89,7 @@ SeriesRequestGenerator::initiate()
}
void
SeriesRequestGenerator::performCallback(uint proc, Addr address)
SeriesRequestGenerator::performCallback(uint32_t proc, Addr address)
{
assert(m_active_node == proc);
assert(m_address == address);

View file

@ -49,13 +49,13 @@ class SeriesRequestGenerator : public DirectedGenerator
~SeriesRequestGenerator();
bool initiate();
void performCallback(uint proc, Addr address);
void performCallback(uint32_t proc, Addr address);
private:
SeriesRequestGeneratorStatus m_status;
Addr m_address;
uint m_active_node;
uint m_addr_increment_size;
uint32_t m_active_node;
uint32_t m_addr_increment_size;
bool m_issue_writes;
};

View file

@ -67,7 +67,7 @@ Pl111::Pl111(const Params *p)
{
pioSize = 0xFFFF;
pic = simout.create("framebuffer.bmp", true);
pic = simout.create(csprintf("%s.framebuffer.bmp", sys->name()), true);
dmaBuffer = new uint8_t[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)];

View file

@ -71,7 +71,7 @@ void
PioDevice::init()
{
if (!pioPort)
panic("Pio port %s not connected to anything!", name());
panic("Pio port of %s not connected to anything!", name());
pioPort->sendStatusChange(Port::RangeChange);
}

View file

@ -59,6 +59,7 @@ DebugFlag('MemoryAccess')
DebugFlag('ProtocolTrace')
DebugFlag('RubyCache')
DebugFlag('RubyCacheTrace')
DebugFlag('RubyDma')
DebugFlag('RubyGenerated')
DebugFlag('RubyMemory')
@ -67,9 +68,9 @@ DebugFlag('RubyPort')
DebugFlag('RubyQueue')
DebugFlag('RubySequencer')
DebugFlag('RubySlicc')
DebugFlag('RubyStorebuffer')
DebugFlag('RubySystem')
DebugFlag('RubyTester')
CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester',
'RubyGenerated', 'RubySlicc', 'RubyStorebuffer', 'RubyCache',
'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer'])
'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache',
'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer', 'RubyCacheTrace'])

View file

@ -447,13 +447,6 @@ Bus::recvAtomic(PacketPtr pkt)
void
Bus::recvFunctional(PacketPtr pkt)
{
if (!pkt->isPrint()) {
// don't do DPRINTFs on PrintReq as it clutters up the output
DPRINTF(Bus,
"recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
pkt->getSrc(), pkt->getDest(), pkt->getAddr(),
pkt->cmdString());
}
assert(pkt->getDest() == Packet::Broadcast);
int port_id = findPort(pkt->getAddr());
@ -462,6 +455,14 @@ Bus::recvFunctional(PacketPtr pkt)
// id after each
int src_id = pkt->getSrc();
if (!pkt->isPrint()) {
// don't do DPRINTFs on PrintReq as it clutters up the output
DPRINTF(Bus,
"recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
src_id, port_id, pkt->getAddr(),
pkt->cmdString());
}
assert(pkt->isRequest()); // hasn't already been satisfied
SnoopIter s_end = snoopPorts.end();

View file

@ -1,4 +1,16 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2006 The Regents of The University of Michigan
* Copyright (c) 2010 Advanced Micro Devices, Inc.
* All rights reserved.
@ -192,14 +204,98 @@ Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data)
memcpy(getPtr<uint8_t>(), data + offset, getSize());
return true;
} else {
// In this case the timing packet only partially satisfies
// the request, so we would need more information to make
// this work. Like bytes valid in the packet or
// something, so the request could continue and get this
// bit of possibly newer data along with the older data
// not written to yet.
panic("Memory value only partially satisfies the functional "
"request. Now what?");
// Offsets and sizes to copy in case of partial overlap
int func_offset;
int val_offset;
int overlap_size;
// calculate offsets and copy sizes for the two byte arrays
if (val_start < func_start && val_end <= func_end) {
val_offset = func_start - val_start;
func_offset = 0;
overlap_size = val_end - func_start;
} else if (val_start >= func_start && val_end > func_end) {
val_offset = 0;
func_offset = val_start - func_start;
overlap_size = func_end - val_start;
} else if (val_start >= func_start && val_end <= func_end) {
val_offset = 0;
func_offset = val_start - func_start;
overlap_size = size;
} else {
panic("BUG: Missed a case for a partial functional request");
}
// Figure out how much of the partial overlap should be copied
// into the packet and not overwrite previously found bytes.
if (bytesValidStart == 0 && bytesValidEnd == 0) {
// No bytes have been copied yet, just set indices
// to found range
bytesValidStart = func_offset;
bytesValidEnd = func_offset + overlap_size;
} else {
// Some bytes have already been copied. Use bytesValid
// indices and offset values to figure out how much data
// to copy and where to copy it to.
// Indice overlap conditions to check
int a = func_offset - bytesValidStart;
int b = (func_offset + overlap_size) - bytesValidEnd;
int c = func_offset - bytesValidEnd;
int d = (func_offset + overlap_size) - bytesValidStart;
if (a >= 0 && b <= 0) {
// bytes already in pkt data array are superset of
// found bytes, will not copy any bytes
overlap_size = 0;
} else if (a < 0 && d >= 0 && b <= 0) {
// found bytes will move bytesValidStart towards 0
overlap_size = bytesValidStart - func_offset;
bytesValidStart = func_offset;
} else if (b > 0 && c <= 0 && a >= 0) {
// found bytes will move bytesValidEnd
// towards end of pkt data array
overlap_size =
(func_offset + overlap_size) - bytesValidEnd;
val_offset += bytesValidEnd - func_offset;
func_offset = bytesValidEnd;
bytesValidEnd += overlap_size;
} else if (a < 0 && b > 0) {
// Found bytes are superset of copied range. Will move
// bytesValidStart towards 0 and bytesValidEnd towards
// end of pkt data array. Need to break copy into two
// pieces so as to not overwrite previously found data.
// copy the first half
uint8_t *dest = getPtr<uint8_t>() + func_offset;
uint8_t *src = data + val_offset;
memcpy(dest, src, (bytesValidStart - func_offset));
// re-calc the offsets and indices to do the copy
// required for the second half
val_offset += (bytesValidEnd - func_offset);
bytesValidStart = func_offset;
overlap_size =
(func_offset + overlap_size) - bytesValidEnd;
func_offset = bytesValidEnd;
bytesValidEnd += overlap_size;
} else if ((c > 0 && b > 0)
|| (a < 0 && d < 0)) {
// region to be copied is discontiguous! Not supported.
panic("BUG: Discontiguous bytes found"
"for functional copying!");
}
}
assert(bytesValidEnd <= getSize());
// copy partial data into the packet's data array
uint8_t *dest = getPtr<uint8_t>() + func_offset;
uint8_t *src = data + val_offset;
memcpy(dest, src, overlap_size);
// check if we're done filling the functional access
bool done = (bytesValidStart == 0) && (bytesValidEnd == getSize());
return done;
}
} else if (isWrite()) {
if (offset >= 0) {

View file

@ -299,6 +299,13 @@ class Packet : public FastAlloc, public Printable
*/
MemCmd origCmd;
/**
* These values specify the range of bytes found that satisfy a
* functional read.
*/
uint16_t bytesValidStart;
uint16_t bytesValidEnd;
public:
/// Used to calculate latencies for each packet.
Tick time;
@ -507,7 +514,8 @@ class Packet : public FastAlloc, public Printable
*/
Packet(Request *_req, MemCmd _cmd, NodeID _dest)
: flags(VALID_DST), cmd(_cmd), req(_req), data(NULL),
dest(_dest), time(curTick()), senderState(NULL)
dest(_dest), bytesValidStart(0), bytesValidEnd(0),
time(curTick()), senderState(NULL)
{
if (req->hasPaddr()) {
addr = req->getPaddr();
@ -526,7 +534,8 @@ class Packet : public FastAlloc, public Printable
*/
Packet(Request *_req, MemCmd _cmd, NodeID _dest, int _blkSize)
: flags(VALID_DST), cmd(_cmd), req(_req), data(NULL),
dest(_dest), time(curTick()), senderState(NULL)
dest(_dest), bytesValidStart(0), bytesValidEnd(0),
time(curTick()), senderState(NULL)
{
if (req->hasPaddr()) {
addr = req->getPaddr() & ~(_blkSize - 1);
@ -547,6 +556,7 @@ class Packet : public FastAlloc, public Printable
: cmd(pkt->cmd), req(pkt->req),
data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL),
addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest),
bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd),
time(curTick()), senderState(pkt->senderState)
{
if (!clearFlags)
@ -554,6 +564,7 @@ class Packet : public FastAlloc, public Printable
flags.set(pkt->flags & (VALID_ADDR|VALID_SIZE|VALID_SRC|VALID_DST));
flags.set(pkt->flags & STATIC_DATA);
}
/**

View file

@ -1285,7 +1285,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
vv_allocateL2CacheBlock;
hp_copyFromTBEToL2;
s_deallocateTBE;
ka_wakeUpAllDependents;
}
transition(I, Trigger_L2_to_L1D, IT) {
@ -1566,7 +1565,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
k_popMandatoryQueue;
}
transition({MM, M, MMR}, Flush_line, MM_F) {
transition({MM, M, MMR, MR}, Flush_line, MM_F) {
i_allocateTBE;
bf_issueGETF;
p_decrementNumberOfMessagesByOne;

View file

@ -198,7 +198,11 @@ MessageBuffer::enqueue(MsgPtr message, Time delta)
m_last_arrival_time * g_eventQueue_ptr->getClock());
}
}
m_last_arrival_time = arrival_time;
// If running a cache trace, don't worry about the last arrival checks
if (!g_system_ptr->m_warmup_enabled) {
m_last_arrival_time = arrival_time;
}
// compute the delay cycles and set enqueue time
Message* msg_ptr = message.get();

View file

@ -58,7 +58,6 @@
#include <iostream>
#include "config/no_vector_bounds_checks.hh"
#include "mem/ruby/common/TypeDefines.hh"
#include "sim/eventq.hh"
@ -77,9 +76,6 @@ class RubyEventQueue : public EventManager
void scheduleEventAbsolute(Consumer* consumer, Time timeAbs);
void print(std::ostream& out) const;
void triggerEvents(Time t) { assert(0); }
void triggerAllEvents() { assert(0); }
private:
// Private copy constructor and assignment operator
RubyEventQueue(const RubyEventQueue& obj);

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
* Copyright (c) 2010 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -26,43 +27,154 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <algorithm>
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
#include "debug/RubyCacheTrace.hh"
#include "mem/ruby/recorder/CacheRecorder.hh"
#include "gzstream.hh"
#include "mem/ruby/system/Sequencer.hh"
#include "mem/ruby/system/System.hh"
using namespace std;
void
CacheRecorder::addRecord(Sequencer* sequencer, const Address& data_addr,
const Address& pc_addr, RubyRequestType type, Time time)
TraceRecord::print(ostream& out) const
{
TraceRecord rec(sequencer, data_addr, pc_addr, type, time);
m_records.push_back(rec);
out << "[TraceRecord: Node, " << m_cntrl_id << ", "
<< m_data_address << ", " << m_pc_address << ", "
<< m_type << ", Time: " << m_time << "]";
}
int
CacheRecorder::dumpRecords(string filename)
CacheRecorder::CacheRecorder()
: m_uncompressed_trace(NULL),
m_uncompressed_trace_size(0)
{
ogzstream out(filename.c_str());
if (out.fail()) {
cout << "Error: error opening file '" << filename << "'" << endl;
return 0;
}
CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace,
uint64_t uncompressed_trace_size,
std::vector<Sequencer*>& seq_map)
: m_uncompressed_trace(uncompressed_trace),
m_uncompressed_trace_size(uncompressed_trace_size),
m_seq_map(seq_map), m_bytes_read(0), m_records_read(0),
m_records_flushed(0)
{
}
CacheRecorder::~CacheRecorder()
{
if (m_uncompressed_trace != NULL) {
delete m_uncompressed_trace;
m_uncompressed_trace = NULL;
}
std::sort(m_records.begin(), m_records.end(), greater<TraceRecord>());
int size = m_records.size();
for (int i = 0; i < size; ++i)
m_records[i].output(out);
m_records.clear();
return size;
m_seq_map.clear();
}
void
CacheRecorder::print(ostream& out) const
CacheRecorder::enqueueNextFlushRequest()
{
if (m_records_flushed < m_records.size()) {
TraceRecord* rec = m_records[m_records_flushed];
m_records_flushed++;
Request* req = new Request(rec->m_data_address,
RubySystem::getBlockSizeBytes(),0);
MemCmd::Command requestType = MemCmd::FlushReq;
Packet *pkt = new Packet(req, requestType, -1);
Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id];
assert(m_sequencer_ptr != NULL);
m_sequencer_ptr->makeRequest(pkt);
DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec);
}
}
void
CacheRecorder::enqueueNextFetchRequest()
{
if (m_bytes_read < m_uncompressed_trace_size) {
TraceRecord* traceRecord = (TraceRecord*) (m_uncompressed_trace +
m_bytes_read);
DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord);
Request* req = new Request();
MemCmd::Command requestType;
if (traceRecord->m_type == RubyRequestType_LD) {
requestType = MemCmd::ReadReq;
req->setPhys(traceRecord->m_data_address,
RubySystem::getBlockSizeBytes(),0);
} else if (traceRecord->m_type == RubyRequestType_IFETCH) {
requestType = MemCmd::ReadReq;
req->setPhys(traceRecord->m_data_address,
RubySystem::getBlockSizeBytes(),
Request::INST_FETCH);
} else {
requestType = MemCmd::WriteReq;
req->setPhys(traceRecord->m_data_address,
RubySystem::getBlockSizeBytes(),0);
}
Packet *pkt = new Packet(req, requestType, -1);
pkt->dataStatic(traceRecord->m_data);
Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id];
assert(m_sequencer_ptr != NULL);
m_sequencer_ptr->makeRequest(pkt);
m_bytes_read += (sizeof(TraceRecord) +
RubySystem::getBlockSizeBytes());
m_records_read++;
}
}
void
CacheRecorder::addRecord(int cntrl, const physical_address_t data_addr,
const physical_address_t pc_addr,
RubyRequestType type, Time time, DataBlock& data)
{
TraceRecord* rec = (TraceRecord*)malloc(sizeof(TraceRecord) +
RubySystem::getBlockSizeBytes());
rec->m_cntrl_id = cntrl;
rec->m_time = time;
rec->m_data_address = data_addr;
rec->m_pc_address = pc_addr;
rec->m_type = type;
memcpy(rec->m_data, data.getData(0, RubySystem::getBlockSizeBytes()),
RubySystem::getBlockSizeBytes());
m_records.push_back(rec);
}
uint64
CacheRecorder::aggregateRecords(uint8_t** buf, uint64 total_size)
{
std::sort(m_records.begin(), m_records.end(), compareTraceRecords);
int size = m_records.size();
uint64 current_size = 0;
int record_size = sizeof(TraceRecord) + RubySystem::getBlockSizeBytes();
for (int i = 0; i < size; ++i) {
// Determine if we need to expand the buffer size
if (current_size + record_size > total_size) {
uint8_t* new_buf = new (nothrow) uint8_t[total_size * 2];
if (new_buf == NULL) {
fatal("Unable to allocate buffer of size %s\n",
total_size * 2);
}
total_size = total_size * 2;
uint8_t* old_buf = *buf;
memcpy(new_buf, old_buf, current_size);
*buf = new_buf;
delete [] old_buf;
}
// Copy the current record into the buffer
memcpy(&((*buf)[current_size]), m_records[i], record_size);
current_size += record_size;
free(m_records[i]);
m_records[i] = NULL;
}
m_records.clear();
return current_size;
}

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
* Copyright (c) 2010 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -34,37 +35,90 @@
#ifndef __MEM_RUBY_RECORDER_CACHERECORDER_HH__
#define __MEM_RUBY_RECORDER_CACHERECORDER_HH__
#include <iostream>
#include <string>
#include <vector>
#include "base/hashmap.hh"
#include "mem/protocol/RubyRequestType.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/recorder/TraceRecord.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/DataBlock.hh"
#include "mem/ruby/common/TypeDefines.hh"
class Address;
class TraceRecord;
class Sequencer;
/*!
* Class for recording cache contents. Note that the last element of the
* class is an array of length zero. It is used for creating variable
* length object, so that while writing the data to a file one does not
* need to copy the meta data and the actual data separately.
*/
class TraceRecord {
public:
int m_cntrl_id;
Time m_time;
physical_address_t m_data_address;
physical_address_t m_pc_address;
RubyRequestType m_type;
uint8_t m_data[0];
void print(std::ostream& out) const;
};
class CacheRecorder
{
public:
void addRecord(Sequencer* sequencer, const Address& data_addr,
const Address& pc_addr, RubyRequestType type, Time time);
int dumpRecords(std::string filename);
CacheRecorder();
~CacheRecorder();
void print(std::ostream& out) const;
CacheRecorder(uint8_t* uncompressed_trace,
uint64_t uncompressed_trace_size,
std::vector<Sequencer*>& SequencerMap);
void addRecord(int cntrl, const physical_address_t data_addr,
const physical_address_t pc_addr, RubyRequestType type,
Time time, DataBlock& data);
uint64 aggregateRecords(uint8_t** data, uint64 size);
/*!
* Function for flushing the memory contents of the caches to the
* main memory. It goes through the recorded contents of the caches,
* and issues flush requests. Except for the first one, a flush request
* is issued only after the previous one has completed. This currently
* requires use of MOESI Hammer protocol since only that protocol
* supports flush requests.
*/
void enqueueNextFlushRequest();
/*!
* Function for fetching warming up the memory and the caches. It goes
* through the recorded contents of the caches, as available in the
* checkpoint and issues fetch requests. Except for the first one, a
* fetch request is issued only after the previous one has completed.
* It should be possible to use this with any protocol.
*/
void enqueueNextFetchRequest();
private:
// Private copy constructor and assignment operator
CacheRecorder(const CacheRecorder& obj);
CacheRecorder& operator=(const CacheRecorder& obj);
std::vector<TraceRecord> m_records;
std::vector<TraceRecord*> m_records;
uint8_t* m_uncompressed_trace;
uint64_t m_uncompressed_trace_size;
std::vector<Sequencer*> m_seq_map;
uint64_t m_bytes_read;
uint64_t m_records_read;
uint64_t m_records_flushed;
};
inline bool
compareTraceRecords(const TraceRecord* n1, const TraceRecord* n2)
{
return n1->m_time > n2->m_time;
}
inline std::ostream&
operator<<(std::ostream& out, const CacheRecorder& obj)
operator<<(std::ostream& out, const TraceRecord& obj)
{
obj.print(out);
out << std::flush;

View file

@ -33,8 +33,4 @@ Import('*')
if env['PROTOCOL'] == 'None':
Return()
SimObject('Tracer.py')
Source('CacheRecorder.cc')
Source('Tracer.cc')
Source('TraceRecord.cc', Werror=False)

View file

@ -1,139 +0,0 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mem/protocol/RubyRequest.hh"
#include "mem/ruby/recorder/TraceRecord.hh"
#include "mem/ruby/system/Sequencer.hh"
#include "mem/ruby/system/System.hh"
#include "sim/sim_object.hh"
using namespace std;
TraceRecord::TraceRecord(Sequencer* _sequencer, const Address& data_addr,
const Address& pc_addr, RubyRequestType type, Time time)
{
m_sequencer_ptr = _sequencer;
m_data_address = data_addr;
m_pc_address = pc_addr;
m_time = time;
m_type = type;
// Don't differentiate between store misses and atomic requests in
// the trace
if (m_type == RubyRequestType_Load_Linked) {
m_type = RubyRequestType_ST;
} else if (m_type == RubyRequestType_Store_Conditional) {
m_type = RubyRequestType_ST;
}
}
TraceRecord::TraceRecord(const TraceRecord& obj)
{
// Call assignment operator
*this = obj;
}
TraceRecord&
TraceRecord::operator=(const TraceRecord& obj)
{
m_sequencer_ptr = obj.m_sequencer_ptr;
m_time = obj.m_time;
m_data_address = obj.m_data_address;
m_pc_address = obj.m_pc_address;
m_type = obj.m_type;
return *this;
}
void
TraceRecord::issueRequest() const
{
assert(m_sequencer_ptr != NULL);
Request req(m_data_address.getAddress(), 0, 0);
Packet *pkt = new Packet(&req, MemCmd(MemCmd::InvalidCmd), -1);
// Clear out the sequencer
while (!m_sequencer_ptr->empty()) {
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100);
}
m_sequencer_ptr->makeRequest(pkt);
// Clear out the sequencer
while (!m_sequencer_ptr->empty()) {
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100);
}
}
void
TraceRecord::print(ostream& out) const
{
out << "[TraceRecord: Node, " << m_sequencer_ptr->name() << ", "
<< m_data_address << ", " << m_pc_address << ", "
<< m_type << ", Time: " << m_time << "]";
}
void
TraceRecord::output(ostream& out) const
{
out << m_sequencer_ptr->name() << " ";
m_data_address.output(out);
out << " ";
m_pc_address.output(out);
out << " ";
out << m_type;
out << endl;
}
bool
TraceRecord::input(istream& in)
{
string sequencer_name;
in >> sequencer_name;
// The SimObject find function is slow and iterates through the
// simObjectList to find the sequencer pointer. Therefore, expect
// trace playback to be slow.
m_sequencer_ptr = (Sequencer*)SimObject::find(sequencer_name.c_str());
m_data_address.input(in);
m_pc_address.input(in);
if (in.eof())
return false;
string type;
in >> type;
m_type = string_to_RubyRequestType(type);
// Ignore the rest of the line
char c = '\0';
while ((!in.eof()) && (c != '\n')) {
in.get(c);
}
return true;
}

View file

@ -1,91 +0,0 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* A entry in the cache request record. It is aware of the ruby time
* and can issue the request back to the cache.
*/
#ifndef __MEM_RUBY_RECORDER_TRACERECORD_HH__
#define __MEM_RUBY_RECORDER_TRACERECORD_HH__
#include <iostream>
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/system/Sequencer.hh"
class CacheMsg;
class TraceRecord
{
public:
TraceRecord(Sequencer* _sequencer, const Address& data_addr,
const Address& pc_addr, RubyRequestType type, Time time);
TraceRecord()
{
m_sequencer_ptr = NULL;
m_time = 0;
m_type = RubyRequestType_NULL;
}
TraceRecord(const TraceRecord& obj);
TraceRecord& operator=(const TraceRecord& obj);
void issueRequest() const;
void print(std::ostream& out) const;
void output(std::ostream& out) const;
bool input(std::istream& in);
private:
friend bool operator>(const TraceRecord& n1, const TraceRecord& n2);
Sequencer* m_sequencer_ptr;
Time m_time;
Address m_data_address;
Address m_pc_address;
RubyRequestType m_type;
};
inline bool
operator>(const TraceRecord& n1, const TraceRecord& n2)
{
return n1.m_time > n2.m_time;
}
inline std::ostream&
operator<<(std::ostream& out, const TraceRecord& obj)
{
obj.print(out);
out << std::flush;
return out;
}
#endif // __MEM_RUBY_RECORDER_TRACERECORD_HH__

View file

@ -1,135 +0,0 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "base/cprintf.hh"
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
#include "mem/ruby/recorder/TraceRecord.hh"
#include "mem/ruby/recorder/Tracer.hh"
#include "mem/ruby/system/System.hh"
using namespace std;
Tracer::Tracer(const Params *p)
: SimObject(p)
{
m_enabled = false;
m_warmup_length = p->warmup_length;
assert(m_warmup_length > 0);
p->ruby_system->registerTracer(this);
}
void
Tracer::startTrace(string filename)
{
if (m_enabled)
stopTrace();
if (filename != "") {
m_trace_file.open(filename.c_str());
if (m_trace_file.fail()) {
cprintf("Error: error opening file '%s'\n", filename);
cprintf("Trace not enabled.\n");
return;
}
cprintf("Request trace enabled to output file '%s'\n", filename);
m_enabled = true;
}
}
void
Tracer::stopTrace()
{
if (m_enabled) {
m_trace_file.close();
cout << "Request trace file closed." << endl;
m_enabled = false;
}
}
void
Tracer::traceRequest(Sequencer* sequencer, const Address& data_addr,
const Address& pc_addr, RubyRequestType type, Time time)
{
assert(m_enabled);
TraceRecord tr(sequencer, data_addr, pc_addr, type, time);
tr.output(m_trace_file);
}
int
Tracer::playbackTrace(string filename)
{
igzstream in(filename.c_str());
if (in.fail()) {
cprintf("Error: error opening file '%s'\n", filename);
return 0;
}
time_t start_time = time(NULL);
TraceRecord record;
int counter = 0;
// Read in the next TraceRecord
bool ok = record.input(in);
while (ok) {
// Put it in the right cache
record.issueRequest();
counter++;
// Read in the next TraceRecord
ok = record.input(in);
// Clear the statistics after warmup
if (counter == m_warmup_length) {
cprintf("Clearing stats after warmup of length %s\n",
m_warmup_length);
g_system_ptr->clearStats();
}
}
// Flush the prefetches through the system
// FIXME - should be smarter
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 1000);
time_t stop_time = time(NULL);
double seconds = difftime(stop_time, start_time);
double minutes = seconds / 60.0;
cout << "playbackTrace: " << minutes << " minutes" << endl;
return counter;
}
void
Tracer::print(ostream& out) const
{
}
Tracer *
RubyTracerParams::create()
{
return new Tracer(this);
}

View file

@ -1,86 +0,0 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Controller class of the tracer. Can stop/start/playback the ruby
* cache requests trace.
*/
#ifndef __MEM_RUBY_RECORDER_TRACER_HH__
#define __MEM_RUBY_RECORDER_TRACER_HH__
#include <iostream>
#include <string>
#include "mem/protocol/RubyRequestType.hh"
#include "mem/ruby/common/Global.hh"
#include "params/RubyTracer.hh"
#include "sim/sim_object.hh"
#include "gzstream.hh"
class Address;
class TraceRecord;
class Sequencer;
class Tracer : public SimObject
{
public:
typedef RubyTracerParams Params;
Tracer(const Params *p);
void startTrace(std::string filename);
void stopTrace();
bool traceEnabled() { return m_enabled; }
void traceRequest(Sequencer* sequencer, const Address& data_addr,
const Address& pc_addr, RubyRequestType type, Time time);
void print(std::ostream& out) const;
int playbackTrace(std::string filename);
private:
// Private copy constructor and assignment operator
Tracer(const Tracer& obj);
Tracer& operator=(const Tracer& obj);
ogzstream m_trace_file;
bool m_enabled;
//added by SS
int m_warmup_length;
};
inline std::ostream&
operator<<(std::ostream& out, const Tracer& obj)
{
obj.print(out);
out << std::flush;
return out;
}
#endif // __MEM_RUBY_RECORDER_TRACER_HH__

View file

@ -1,37 +0,0 @@
# Copyright (c) 2009 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Steve Reinhardt
# Brad Beckmann
from m5.params import *
from m5.SimObject import SimObject
class RubyTracer(SimObject):
type = 'RubyTracer'
cxx_class = 'Tracer'
warmup_length = Param.Int(100000, "")
ruby_system = Param.RubySystem("")

View file

@ -33,12 +33,11 @@
#include <string>
#include "mem/protocol/AccessPermission.hh"
#include "mem/protocol/MachineType.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Consumer.hh"
#include "mem/ruby/common/DataBlock.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/system/System.hh"
#include "mem/ruby/recorder/CacheRecorder.hh"
#include "params/RubyController.hh"
#include "sim/sim_object.hh"
@ -68,6 +67,8 @@ class AbstractController : public SimObject, public Consumer
virtual void wakeup() = 0;
// virtual void dumpStats(std::ostream & out) = 0;
virtual void clearStats() = 0;
virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0;
virtual Sequencer* getSequencer() const = 0;
};
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -28,7 +28,9 @@
#include "base/intmath.hh"
#include "debug/RubyCache.hh"
#include "mem/protocol/AccessPermission.hh"
#include "mem/ruby/system/CacheMemory.hh"
#include "mem/ruby/system/System.hh"
using namespace std;
@ -364,31 +366,42 @@ CacheMemory::profileGenericRequest(GenericRequestType requestType,
}
void
CacheMemory::recordCacheContents(CacheRecorder& tr) const
CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const
{
uint64 warmedUpBlocks = 0;
uint64 totalBlocks M5_VAR_USED = (uint64)m_cache_num_sets
* (uint64)m_cache_assoc;
for (int i = 0; i < m_cache_num_sets; i++) {
for (int j = 0; j < m_cache_assoc; j++) {
AccessPermission perm = m_cache[i][j]->m_Permission;
RubyRequestType request_type = RubyRequestType_NULL;
if (perm == AccessPermission_Read_Only) {
if (m_is_instruction_only_cache) {
request_type = RubyRequestType_IFETCH;
} else {
request_type = RubyRequestType_LD;
if (m_cache[i][j] != NULL) {
AccessPermission perm = m_cache[i][j]->m_Permission;
RubyRequestType request_type = RubyRequestType_NULL;
if (perm == AccessPermission_Read_Only) {
if (m_is_instruction_only_cache) {
request_type = RubyRequestType_IFETCH;
} else {
request_type = RubyRequestType_LD;
}
} else if (perm == AccessPermission_Read_Write) {
request_type = RubyRequestType_ST;
}
} else if (perm == AccessPermission_Read_Write) {
request_type = RubyRequestType_ST;
}
if (request_type != RubyRequestType_NULL) {
#if 0
tr.addRecord(m_chip_ptr->getID(), m_cache[i][j].m_Address,
Address(0), request_type,
m_replacementPolicy_ptr->getLastAccess(i, j));
#endif
if (request_type != RubyRequestType_NULL) {
tr->addRecord(cntrl, m_cache[i][j]->m_Address.getAddress(),
0, request_type,
m_replacementPolicy_ptr->getLastAccess(i, j),
m_cache[i][j]->getDataBlk());
warmedUpBlocks++;
}
}
}
}
DPRINTF(RubyCache, "%s: %lli blocks of %lli total blocks"
"recorded %.2f%% \n", name().c_str(), warmedUpBlocks,
(uint64)m_cache_num_sets * (uint64)m_cache_assoc,
(float(warmedUpBlocks)/float(totalBlocks))*100.0);
}
void

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -34,21 +34,15 @@
#include <vector>
#include "base/hashmap.hh"
#include "mem/protocol/AccessPermission.hh"
#include "mem/protocol/GenericRequestType.hh"
#include "mem/protocol/RubyRequest.hh"
#include "mem/protocol/RubyRequestType.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/DataBlock.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/profiler/CacheProfiler.hh"
#include "mem/ruby/recorder/CacheRecorder.hh"
#include "mem/ruby/slicc_interface/AbstractCacheEntry.hh"
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
#include "mem/ruby/system/LRUPolicy.hh"
#include "mem/ruby/system/PseudoLRUPolicy.hh"
#include "mem/ruby/system/System.hh"
#include "params/RubyCache.hh"
#include "sim/sim_object.hh"
@ -100,12 +94,7 @@ class CacheMemory : public SimObject
int getLatency() const { return m_latency; }
// Hook for checkpointing the contents of the cache
void recordCacheContents(CacheRecorder& tr) const;
void
setAsInstructionCache(bool is_icache)
{
m_is_instruction_only_cache = is_icache;
}
void recordCacheContents(int cntrl, CacheRecorder* tr) const;
// Set this address to most recently used
void setMRU(const Address& address);
@ -146,7 +135,6 @@ class CacheMemory : public SimObject
// Data Members (m_prefix)
bool m_is_instruction_only_cache;
bool m_is_data_only_cache;
// The first index is the # of cache lines.
// The second index is the the amount associativity.

View file

@ -55,6 +55,9 @@ class DMASequencer : public RubyPort
/* external interface */
RequestStatus makeRequest(PacketPtr pkt);
bool busy() { return m_is_busy;}
int outstandingCount() const { return (m_is_busy ? 1 : 0); }
bool isDeadlockEventScheduled() const { return false; }
void descheduleDeadlockEvent() {}
/* SLICC callback */
void dataCallback(const DataBlock & dblk);

View file

@ -58,6 +58,7 @@ DirectoryMemory::init()
if (m_use_map) {
m_sparseMemory = new SparseMemory(m_map_levels);
g_system_ptr->registerSparseMemory(m_sparseMemory);
} else {
m_entries = new AbstractEntry*[m_num_entries];
for (int i = 0; i < m_num_entries; i++)

View file

@ -29,6 +29,7 @@
#ifndef __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
#define __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
#include "base/trace.hh"
#include "mem/ruby/common/Address.hh"
class DirectoryMemory;
@ -48,6 +49,8 @@ class MemoryVector
void write(const Address & paddr, uint8* data, int len);
uint8* read(const Address & paddr, uint8* data, int len);
uint32 collatePages(uint8* &raw_data);
void populatePages(uint8* raw_data);
private:
uint8* getBlockPtr(const PhysAddress & addr);
@ -56,6 +59,7 @@ class MemoryVector
uint8** m_pages;
uint32 m_num_pages;
const uint32 m_page_offset_mask;
static const uint32 PAGE_SIZE = 4096;
};
inline
@ -97,7 +101,7 @@ MemoryVector::resize(uint32 size)
delete [] m_pages;
}
m_size = size;
assert(size%4096 == 0);
assert(size%PAGE_SIZE == 0);
m_num_pages = size >> 12;
m_pages = new uint8*[m_num_pages];
memset(m_pages, 0, m_num_pages * sizeof(uint8*));
@ -118,8 +122,8 @@ MemoryVector::write(const Address & paddr, uint8* data, int len)
}
if (all_zeros)
return;
m_pages[page_num] = new uint8[4096];
memset(m_pages[page_num], 0, 4096);
m_pages[page_num] = new uint8[PAGE_SIZE];
memset(m_pages[page_num], 0, PAGE_SIZE);
uint32 offset = paddr.getAddress() & m_page_offset_mask;
memcpy(&m_pages[page_num][offset], data, len);
} else {
@ -147,10 +151,82 @@ MemoryVector::getBlockPtr(const PhysAddress & paddr)
{
uint32 page_num = paddr.getAddress() >> 12;
if (m_pages[page_num] == 0) {
m_pages[page_num] = new uint8[4096];
memset(m_pages[page_num], 0, 4096);
m_pages[page_num] = new uint8[PAGE_SIZE];
memset(m_pages[page_num], 0, PAGE_SIZE);
}
return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask];
}
/*!
* Function for collating all the pages of the physical memory together.
* In case a pointer for a page is NULL, this page needs only a single byte
* to represent that the pointer is NULL. Otherwise, it needs 1 + PAGE_SIZE
* bytes. The first represents that the page pointer is not NULL, and rest of
* the bytes represent the data on the page.
*/
inline uint32
MemoryVector::collatePages(uint8* &raw_data)
{
uint32 num_zero_pages = 0;
uint32 data_size = 0;
for (uint32 i = 0;i < m_num_pages; ++i)
{
if (m_pages[i] == 0) num_zero_pages++;
}
raw_data = new uint8[ sizeof(uint32) /* number of pages*/
+ m_num_pages /* whether the page is all zeros */
+ PAGE_SIZE * (m_num_pages - num_zero_pages)];
/* Write the number of pages to be stored. */
memcpy(raw_data, &m_num_pages, sizeof(uint32));
data_size = sizeof(uint32);
for (uint32 i = 0;i < m_num_pages; ++i)
{
if (m_pages[i] == 0) {
raw_data[data_size] = 0;
} else {
raw_data[data_size] = 1;
memcpy(raw_data + data_size + 1, m_pages[i], PAGE_SIZE);
data_size += PAGE_SIZE;
}
data_size += 1;
}
return data_size;
}
/*!
* Function for populating the pages of the memory using the available raw
* data. Each page has a byte associate with it, which represents whether the
* page was NULL or not, when all the pages were collated. The function assumes
* that the number of pages in the memory are same as those that were recorded
* in the checkpoint.
*/
inline void
MemoryVector::populatePages(uint8* raw_data)
{
uint32 data_size = 0;
uint32 num_pages = 0;
/* Read the number of pages that were stored. */
memcpy(&num_pages, raw_data, sizeof(uint32));
data_size = sizeof(uint32);
assert(num_pages == m_num_pages);
for (uint32 i = 0;i < m_num_pages; ++i)
{
assert(m_pages[i] == 0);
if (raw_data[data_size] != 0) {
m_pages[i] = new uint8[PAGE_SIZE];
memcpy(m_pages[i], raw_data + data_size + 1, PAGE_SIZE);
data_size += PAGE_SIZE;
}
data_size += 1;
}
}
#endif // __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__

View file

@ -32,7 +32,6 @@
#include "base/hashmap.hh"
#include "mem/protocol/AccessPermission.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Global.hh"
template<class ENTRY>
struct PerfectCacheLineState
@ -57,10 +56,6 @@ class PerfectCacheMemory
static void printConfig(std::ostream& out);
// perform a cache access and see if we hit or not. Return true
// on a hit.
bool tryCacheAccess(const CacheMsg& msg, bool& block_stc, ENTRY*& entry);
// tests to see if an address is present in the cache
bool isTagPresent(const Address& address) const;
@ -118,15 +113,6 @@ PerfectCacheMemory<ENTRY>::printConfig(std::ostream& out)
{
}
template<class ENTRY>
inline bool
PerfectCacheMemory<ENTRY>::tryCacheAccess(const CacheMsg& msg,
bool& block_stc, ENTRY*& entry)
{
panic("not implemented");
return true;
}
// tests to see if an address is present in the cache
template<class ENTRY>
inline bool

View file

@ -27,11 +27,11 @@
*/
#include "cpu/testers/rubytest/RubyTester.hh"
#include "debug/Config.hh"
#include "debug/Ruby.hh"
#include "mem/protocol/AccessPermission.hh"
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "mem/ruby/system/RubyPort.hh"
#include "mem/physical.hh"
RubyPort::RubyPort(const Params *p)
: MemObject(p)
@ -51,6 +51,8 @@ RubyPort::RubyPort(const Params *p)
m_usingRubyTester = p->using_ruby_tester;
access_phys_mem = p->access_phys_mem;
drainEvent = NULL;
ruby_system = p->ruby_system;
waitingOnSequencer = false;
}
@ -66,8 +68,10 @@ Port *
RubyPort::getPort(const std::string &if_name, int idx)
{
if (if_name == "port") {
return new M5Port(csprintf("%s-port%d", name(), idx), this,
ruby_system, access_phys_mem);
M5Port* cpuPort = new M5Port(csprintf("%s-port%d", name(), idx),
this, ruby_system, access_phys_mem);
cpu_ports.push_back(cpuPort);
return cpuPort;
}
if (if_name == "pio_port") {
@ -508,6 +512,82 @@ RubyPort::ruby_hit_callback(PacketPtr pkt)
(*i)->sendRetry();
}
}
testDrainComplete();
}
void
RubyPort::testDrainComplete()
{
//If we weren't able to drain before, we might be able to now.
if (drainEvent != NULL) {
unsigned int drainCount = getDrainCount(drainEvent);
DPRINTF(Config, "Drain count: %u\n", drainCount);
if (drainCount == 0) {
drainEvent->process();
// Clear the drain event once we're done with it.
drainEvent = NULL;
}
}
}
unsigned int
RubyPort::getDrainCount(Event *de)
{
int count = 0;
//
// If the sequencer is not empty, then requests need to drain.
// The outstandingCount is the number of requests outstanding and thus the
// number of times M5's timing port will process the drain event.
//
count += outstandingCount();
DPRINTF(Config, "outstanding count %d\n", outstandingCount());
// To simplify the draining process, the sequencer's deadlock detection
// event should have been descheduled.
assert(isDeadlockEventScheduled() == false);
if (pio_port != NULL) {
count += pio_port->drain(de);
DPRINTF(Config, "count after pio check %d\n", count);
}
if (physMemPort != NULL) {
count += physMemPort->drain(de);
DPRINTF(Config, "count after physmem check %d\n", count);
}
for (CpuPortIter p_iter = cpu_ports.begin(); p_iter != cpu_ports.end();
p_iter++) {
M5Port* cpu_port = *p_iter;
count += cpu_port->drain(de);
DPRINTF(Config, "count after cpu port check %d\n", count);
}
DPRINTF(Config, "final count %d\n", count);
return count;
}
unsigned int
RubyPort::drain(Event *de)
{
if (isDeadlockEventScheduled()) {
descheduleDeadlockEvent();
}
int count = getDrainCount(de);
// Set status
if (count != 0) {
drainEvent = de;
changeState(SimObject::Draining);
return count;
}
changeState(SimObject::Drained);
return 0;
}
void

View file

@ -33,7 +33,6 @@
#include <string>
#include "mem/protocol/RequestStatus.hh"
#include "mem/ruby/slicc_interface/RubyRequest.hh"
#include "mem/ruby/system/System.hh"
#include "mem/mem_object.hh"
#include "mem/physical.hh"
@ -115,17 +114,23 @@ class RubyPort : public MemObject
Port *getPort(const std::string &if_name, int idx);
virtual RequestStatus makeRequest(PacketPtr pkt) = 0;
virtual int outstandingCount() const = 0;
virtual bool isDeadlockEventScheduled() const = 0;
virtual void descheduleDeadlockEvent() = 0;
//
// Called by the controller to give the sequencer a pointer.
// A pointer to the controller is needed for atomic support.
//
void setController(AbstractController* _cntrl) { m_controller = _cntrl; }
int getId() { return m_version; }
unsigned int drain(Event *de);
protected:
const std::string m_name;
void ruby_hit_callback(PacketPtr pkt);
void hit(PacketPtr pkt);
void testDrainComplete();
int m_version;
AbstractController* m_controller;
@ -143,11 +148,19 @@ class RubyPort : public MemObject
}
}
unsigned int getDrainCount(Event *de);
uint16_t m_port_id;
uint64_t m_request_cnt;
M5Port* physMemPort;
/*! Vector of CPU Port attached to this Ruby port. */
typedef std::vector<M5Port*>::iterator CpuPortIter;
std::vector<M5Port*> cpu_ports;
Event *drainEvent;
PhysicalMemory* physmem;
RubySystem* ruby_system;

View file

@ -40,9 +40,7 @@
#include "mem/protocol/RubyAccessMode.hh"
#include "mem/ruby/buffers/MessageBuffer.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/common/SubBlock.hh"
#include "mem/ruby/profiler/Profiler.hh"
#include "mem/ruby/recorder/Tracer.hh"
#include "mem/ruby/slicc_interface/RubyRequest.hh"
#include "mem/ruby/system/CacheMemory.hh"
#include "mem/ruby/system/Sequencer.hh"
@ -521,7 +519,11 @@ Sequencer::hitCallback(SequencerRequest* srequest,
}
// update the data
if (pkt->getPtr<uint8_t>(true) != NULL) {
if (g_system_ptr->m_warmup_enabled) {
assert(pkt->getPtr<uint8_t>(false) != NULL);
data.setData(pkt->getPtr<uint8_t>(false),
request_address.getOffset(), pkt->getSize());
} else if (pkt->getPtr<uint8_t>(true) != NULL) {
if ((type == RubyRequestType_LD) ||
(type == RubyRequestType_IFETCH) ||
(type == RubyRequestType_RMW_Read) ||
@ -553,8 +555,17 @@ Sequencer::hitCallback(SequencerRequest* srequest,
testerSenderState->subBlock->mergeFrom(data);
}
ruby_hit_callback(pkt);
delete srequest;
if (g_system_ptr->m_warmup_enabled) {
delete pkt;
g_system_ptr->m_cache_recorder->enqueueNextFetchRequest();
} else if (g_system_ptr->m_cooldown_enabled) {
delete pkt;
g_system_ptr->m_cache_recorder->enqueueNextFlushRequest();
} else {
ruby_hit_callback(pkt);
}
}
bool

View file

@ -39,8 +39,6 @@
#include "mem/ruby/system/RubyPort.hh"
class DataBlock;
class CacheMsg;
class MachineID;
class CacheMemory;
class RubySequencerParams;
@ -100,6 +98,18 @@ class Sequencer : public RubyPort, public Consumer
RequestStatus makeRequest(PacketPtr pkt);
bool empty() const;
int outstandingCount() const { return m_outstanding_count; }
bool
isDeadlockEventScheduled() const
{
return deadlockCheckEvent.scheduled();
}
void
descheduleDeadlockEvent()
{
deschedule(deadlockCheckEvent);
}
void print(std::ostream& out) const;
void printStats(std::ostream& out) const;

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2009 Advanced Micro Devices, Inc.
* Copyright (c) 2012 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -26,6 +27,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <queue>
#include "debug/RubyCache.hh"
#include "mem/ruby/system/SparseMemory.hh"
#include "mem/ruby/system/System.hh"
@ -82,19 +85,19 @@ SparseMemory::recursivelyRemoveTables(SparseMapType* curTable, int curLevel)
SparseMapType::iterator iter;
for (iter = curTable->begin(); iter != curTable->end(); iter++) {
SparseMemEntry* entryStruct = &((*iter).second);
SparseMemEntry entry = (*iter).second;
if (curLevel != (m_number_of_levels - 1)) {
// If the not at the last level, analyze those lower level
// tables first, then delete those next tables
SparseMapType* nextTable = (SparseMapType*)(entryStruct->entry);
SparseMapType* nextTable = (SparseMapType*)(entry);
recursivelyRemoveTables(nextTable, (curLevel + 1));
delete nextTable;
} else {
// If at the last level, delete the directory entry
delete (AbstractEntry*)(entryStruct->entry);
delete (AbstractEntry*)(entry);
}
entryStruct->entry = NULL;
entry = NULL;
}
// Once all entries have been deleted, erase the entries
@ -134,7 +137,7 @@ SparseMemory::exist(const Address& address) const
// If the address is found, move on to the next level.
// Otherwise, return not found
if (curTable->count(curAddress) != 0) {
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
curTable = (SparseMapType*)((*curTable)[curAddress]);
} else {
DPRINTF(RubyCache, "Not found\n");
return false;
@ -156,7 +159,6 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
Address curAddress;
SparseMapType* curTable = m_map_head;
SparseMemEntry* entryStruct = NULL;
// Initiallize the high bit to be the total number of bits plus
// the block offset. However the highest bit index is one less
@ -179,7 +181,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
// if the address exists in the cur table, move on. Otherwise
// create a new table.
if (curTable->count(curAddress) != 0) {
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
curTable = (SparseMapType*)((*curTable)[curAddress]);
} else {
m_adds_per_level[level]++;
@ -194,9 +196,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
// Create the pointer container SparseMemEntry and add it
// to the table.
entryStruct = new SparseMemEntry;
entryStruct->entry = newEntry;
(*curTable)[curAddress] = *entryStruct;
(*curTable)[curAddress] = newEntry;
// Move to the next level of the heirarchy
curTable = (SparseMapType*)newEntry;
@ -215,7 +215,7 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
{
Address curAddress;
CurNextInfo nextInfo;
SparseMemEntry* entryStruct;
SparseMemEntry entry;
// create the appropriate address for this level
// Note: that set Address is inclusive of the specified range,
@ -231,11 +231,11 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
assert(curInfo.curTable->count(curAddress) != 0);
entryStruct = &((*(curInfo.curTable))[curAddress]);
entry = (*(curInfo.curTable))[curAddress];
if (curInfo.level < (m_number_of_levels - 1)) {
// set up next level's info
nextInfo.curTable = (SparseMapType*)(entryStruct->entry);
nextInfo.curTable = (SparseMapType*)(entry);
nextInfo.level = curInfo.level + 1;
nextInfo.highBit = curInfo.highBit -
@ -252,15 +252,15 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
if (tableSize == 0) {
m_removes_per_level[curInfo.level]++;
delete nextInfo.curTable;
entryStruct->entry = NULL;
entry = NULL;
curInfo.curTable->erase(curAddress);
}
} else {
// if this is the last level, we have reached the Directory
// Entry and thus we should delete it including the
// SparseMemEntry container struct.
delete (AbstractEntry*)(entryStruct->entry);
entryStruct->entry = NULL;
delete (AbstractEntry*)(entry);
entry = NULL;
curInfo.curTable->erase(curAddress);
m_removes_per_level[curInfo.level]++;
}
@ -331,7 +331,7 @@ SparseMemory::lookup(const Address& address)
// If the address is found, move on to the next level.
// Otherwise, return not found
if (curTable->count(curAddress) != 0) {
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
curTable = (SparseMapType*)((*curTable)[curAddress]);
} else {
DPRINTF(RubyCache, "Not found\n");
return NULL;
@ -344,6 +344,70 @@ SparseMemory::lookup(const Address& address)
return entry;
}
void
SparseMemory::recordBlocks(int cntrl_id, CacheRecorder* tr) const
{
queue<SparseMapType*> unexplored_nodes[2];
queue<physical_address_t> address_of_nodes[2];
unexplored_nodes[0].push(m_map_head);
address_of_nodes[0].push(0);
int parity_of_level = 0;
physical_address_t address, temp_address;
Address curAddress;
// Initiallize the high bit to be the total number of bits plus
// the block offset. However the highest bit index is one less
// than this value.
int highBit = m_total_number_of_bits + RubySystem::getBlockSizeBits();
int lowBit;
for (int cur_level = 0; cur_level < m_number_of_levels; cur_level++) {
// create the appropriate address for this level
// Note: that set Address is inclusive of the specified range,
// thus the high bit is one less than the total number of bits
// used to create the address.
lowBit = highBit - m_number_of_bits_per_level[cur_level];
while (!unexplored_nodes[parity_of_level].empty()) {
SparseMapType* node = unexplored_nodes[parity_of_level].front();
unexplored_nodes[parity_of_level].pop();
address = address_of_nodes[parity_of_level].front();
address_of_nodes[parity_of_level].pop();
SparseMapType::iterator iter;
for (iter = node->begin(); iter != node->end(); iter++) {
SparseMemEntry entry = (*iter).second;
curAddress = (*iter).first;
if (cur_level != (m_number_of_levels - 1)) {
// If not at the last level, put this node in the queue
unexplored_nodes[1 - parity_of_level].push(
(SparseMapType*)(entry));
address_of_nodes[1 - parity_of_level].push(address |
(curAddress.getAddress() << lowBit));
} else {
// If at the last level, add a trace record
temp_address = address | (curAddress.getAddress()
<< lowBit);
DataBlock block = ((AbstractEntry*)entry)->getDataBlk();
tr->addRecord(cntrl_id, temp_address, 0, RubyRequestType_ST, 0,
block);
}
}
}
// Adjust the highBit value for the next level
highBit -= m_number_of_bits_per_level[cur_level];
parity_of_level = 1 - parity_of_level;
}
}
void
SparseMemory::print(ostream& out) const
{

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2009 Advanced Micro Devices, Inc.
* Copyright (c) 2012 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -32,15 +33,11 @@
#include <iostream>
#include "base/hashmap.hh"
#include "mem/ruby/slicc_interface/AbstractEntry.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Global.hh"
struct SparseMemEntry
{
void* entry;
};
#include "mem/ruby/recorder/CacheRecorder.hh"
#include "mem/ruby/slicc_interface/AbstractEntry.hh"
typedef void* SparseMemEntry;
typedef m5::hash_map<Address, SparseMemEntry> SparseMapType;
struct CurNextInfo
@ -63,6 +60,14 @@ class SparseMemory
void add(const Address& address, AbstractEntry*);
void remove(const Address& address);
/*!
* Function for recording the contents of memory. This function walks
* through all the levels of the sparse memory in a breadth first
* fashion. This might need more memory than a depth first approach.
* But breadth first seems easier to me than a depth first approach.
*/
void recordBlocks(int cntrl_id, CacheRecorder *) const;
AbstractEntry* lookup(const Address& address);
// Print cache contents
@ -95,12 +100,4 @@ class SparseMemory
uint64_t* m_removes_per_level;
};
inline std::ostream&
operator<<(std::ostream& out, const SparseMemEntry& obj)
{
out << "SparseMemEntry";
out << std::flush;
return out;
}
#endif // __MEM_RUBY_SYSTEM_SPARSEMEMORY_HH__

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -26,16 +26,19 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fcntl.h>
#include <zlib.h>
#include <cstdio>
#include "base/intmath.hh"
#include "base/output.hh"
#include "mem/ruby/buffers/MessageBuffer.hh"
#include "debug/RubySystem.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/profiler/Profiler.hh"
#include "mem/ruby/recorder/Tracer.hh"
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "mem/ruby/system/MemoryVector.hh"
#include "mem/ruby/system/System.hh"
#include "sim/simulate.hh"
using namespace std;
@ -49,7 +52,6 @@ int RubySystem::m_memory_size_bits;
Network* RubySystem::m_network_ptr;
Profiler* RubySystem::m_profiler_ptr;
Tracer* RubySystem::m_tracer_ptr;
MemoryVector* RubySystem::m_mem_vec_ptr;
RubySystem::RubySystem(const Params *p)
@ -88,6 +90,8 @@ RubySystem::RubySystem(const Params *p)
//
RubyExitCallback* rubyExitCB = new RubyExitCallback(p->stats_filename);
registerExitCallback(rubyExitCB);
m_warmup_enabled = false;
m_cooldown_enabled = false;
}
void
@ -108,23 +112,22 @@ RubySystem::registerProfiler(Profiler* profiler_ptr)
m_profiler_ptr = profiler_ptr;
}
void
RubySystem::registerTracer(Tracer* tracer_ptr)
{
m_tracer_ptr = tracer_ptr;
}
void
RubySystem::registerAbstractController(AbstractController* cntrl)
{
m_abs_cntrl_vec.push_back(cntrl);
}
void
RubySystem::registerSparseMemory(SparseMemory* s)
{
m_sparse_memory_vector.push_back(s);
}
RubySystem::~RubySystem()
{
delete m_network_ptr;
delete m_profiler_ptr;
delete m_tracer_ptr;
if (m_mem_vec_ptr)
delete m_mem_vec_ptr;
}
@ -166,10 +169,144 @@ RubySystem::printStats(ostream& out)
m_network_ptr->printStats(out);
}
void
RubySystem::writeCompressedTrace(uint8* raw_data, string filename,
uint64 uncompressed_trace_size)
{
// Create the checkpoint file for the memory
string thefile = Checkpoint::dir() + "/" + filename.c_str();
int fd = creat(thefile.c_str(), 0664);
if (fd < 0) {
perror("creat");
fatal("Can't open memory trace file '%s'\n", filename);
}
gzFile compressedMemory = gzdopen(fd, "wb");
if (compressedMemory == NULL)
fatal("Insufficient memory to allocate compression state for %s\n",
filename);
if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
uncompressed_trace_size) {
fatal("Write failed on memory trace file '%s'\n", filename);
}
if (gzclose(compressedMemory)) {
fatal("Close failed on memory trace file '%s'\n", filename);
}
delete raw_data;
}
void
RubySystem::serialize(std::ostream &os)
{
m_cooldown_enabled = true;
vector<Sequencer*> sequencer_map;
Sequencer* sequencer_ptr = NULL;
int cntrl_id = -1;
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
if (sequencer_ptr == NULL) {
sequencer_ptr = sequencer_map[cntrl];
cntrl_id = cntrl;
}
}
assert(sequencer_ptr != NULL);
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
if (sequencer_map[cntrl] == NULL) {
sequencer_map[cntrl] = sequencer_ptr;
}
}
// Create the CacheRecorder and record the cache trace
m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
}
// save the current tick value
Tick curtick_original = curTick();
// save the event queue head
Event* eventq_head = eventq->replaceHead(NULL);
// Schedule an event to start cache cooldown
RubyEvent* e = new RubyEvent(this);
schedule(e,curTick());
simulate();
// Restore eventq head
eventq_head = eventq->replaceHead(eventq_head);
// Restore curTick
curTick(curtick_original);
uint8* raw_data = NULL;
if (m_mem_vec_ptr != NULL) {
uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
string memory_trace_file = name() + ".memory.gz";
writeCompressedTrace(raw_data, memory_trace_file,
memory_trace_size);
SERIALIZE_SCALAR(memory_trace_file);
SERIALIZE_SCALAR(memory_trace_size);
} else {
for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
m_cache_recorder);
}
}
// Aggergate the trace entries together into a single array
raw_data = new uint8_t[4096];
uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
4096);
string cache_trace_file = name() + ".cache.gz";
writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
SERIALIZE_SCALAR(cache_trace_file);
SERIALIZE_SCALAR(cache_trace_size);
m_cooldown_enabled = false;
}
void
RubySystem::readCompressedTrace(string filename, uint8*& raw_data,
uint64& uncompressed_trace_size)
{
// Read the trace file
gzFile compressedTrace;
// trace file
int fd = open(filename.c_str(), O_RDONLY);
if (fd < 0) {
perror("open");
fatal("Unable to open trace file %s", filename);
}
compressedTrace = gzdopen(fd, "rb");
if (compressedTrace == NULL) {
fatal("Insufficient memory to allocate compression state for %s\n",
filename);
}
raw_data = new uint8_t[uncompressed_trace_size];
if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
uncompressed_trace_size) {
fatal("Unable to read complete trace from file %s\n", filename);
}
if (gzclose(compressedTrace)) {
fatal("Failed to close cache trace file '%s'\n", filename);
}
}
void
@ -181,6 +318,88 @@ RubySystem::unserialize(Checkpoint *cp, const string &section)
// value of curTick()
//
clearStats();
uint8* uncompressed_trace = NULL;
if (m_mem_vec_ptr != NULL) {
string memory_trace_file;
uint64 memory_trace_size = 0;
UNSERIALIZE_SCALAR(memory_trace_file);
UNSERIALIZE_SCALAR(memory_trace_size);
memory_trace_file = cp->cptDir + "/" + memory_trace_file;
readCompressedTrace(memory_trace_file, uncompressed_trace,
memory_trace_size);
m_mem_vec_ptr->populatePages(uncompressed_trace);
delete uncompressed_trace;
uncompressed_trace = NULL;
}
string cache_trace_file;
uint64 cache_trace_size = 0;
UNSERIALIZE_SCALAR(cache_trace_file);
UNSERIALIZE_SCALAR(cache_trace_size);
cache_trace_file = cp->cptDir + "/" + cache_trace_file;
readCompressedTrace(cache_trace_file, uncompressed_trace,
cache_trace_size);
m_warmup_enabled = true;
vector<Sequencer*> sequencer_map;
Sequencer* t = NULL;
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
if(t == NULL) t = sequencer_map[cntrl];
}
assert(t != NULL);
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
if (sequencer_map[cntrl] == NULL) {
sequencer_map[cntrl] = t;
}
}
m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
sequencer_map);
}
void
RubySystem::startup()
{
if (m_warmup_enabled) {
// save the current tick value
Tick curtick_original = curTick();
// save the event queue head
Event* eventq_head = eventq->replaceHead(NULL);
// set curTick to 0
curTick(0);
// Schedule an event to start cache warmup
RubyEvent* e = new RubyEvent(this);
schedule(e,curTick());
simulate();
delete m_cache_recorder;
m_cache_recorder = NULL;
m_warmup_enabled = false;
// Restore eventq head
eventq_head = eventq->replaceHead(eventq_head);
// Restore curTick
curTick(curtick_original);
}
}
void
RubySystem::RubyEvent::process()
{
if (ruby_system->m_warmup_enabled) {
ruby_system->m_cache_recorder->enqueueNextFetchRequest();
} else if (ruby_system->m_cooldown_enabled) {
ruby_system->m_cache_recorder->enqueueNextFlushRequest();
}
}
void
@ -190,11 +409,6 @@ RubySystem::clearStats() const
m_network_ptr->clearStats();
}
void
RubySystem::recordCacheContents(CacheRecorder& tr) const
{
}
#ifdef CHECK_COHERENCE
// This code will check for cases if the given cache block is exclusive in
// one node and shared in another-- a coherence violation

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -38,21 +38,34 @@
#include "base/callback.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
#include "mem/ruby/system/RubyPort.hh"
#include "mem/ruby/recorder/CacheRecorder.hh"
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "mem/ruby/system/MemoryVector.hh"
#include "mem/ruby/system/SparseMemory.hh"
#include "params/RubySystem.hh"
#include "sim/sim_object.hh"
class AbstractController;
class CacheRecorder;
class MemoryVector;
class Network;
class Profiler;
class Tracer;
class RubySystem : public SimObject
{
public:
class RubyEvent : public Event
{
public:
RubyEvent(RubySystem* _ruby_system)
{
ruby_system = _ruby_system;
}
private:
void process();
RubySystem* ruby_system;
};
friend class RubyEvent;
typedef RubySystemParams Params;
RubySystem(const Params *p);
~RubySystem();
@ -86,13 +99,6 @@ class RubySystem : public SimObject
return m_profiler_ptr;
}
static Tracer*
getTracer()
{
assert(m_tracer_ptr != NULL);
return m_tracer_ptr;
}
static MemoryVector*
getMemoryVector()
{
@ -100,7 +106,6 @@ class RubySystem : public SimObject
return m_mem_vec_ptr;
}
void recordCacheContents(CacheRecorder& tr) const;
static void printConfig(std::ostream& out);
static void printStats(std::ostream& out);
void clearStats() const;
@ -114,13 +119,15 @@ class RubySystem : public SimObject
void print(std::ostream& out) const;
virtual void serialize(std::ostream &os);
virtual void unserialize(Checkpoint *cp, const std::string &section);
void serialize(std::ostream &os);
void unserialize(Checkpoint *cp, const std::string &section);
void process();
void startup();
void registerNetwork(Network*);
void registerProfiler(Profiler*);
void registerTracer(Tracer*);
void registerAbstractController(AbstractController*);
void registerSparseMemory(SparseMemory*);
private:
// Private copy constructor and assignment operator
@ -130,6 +137,11 @@ class RubySystem : public SimObject
void init();
static void printSystemConfig(std::ostream& out);
void readCompressedTrace(std::string filename,
uint8*& raw_data,
uint64& uncompressed_trace_size);
void writeCompressedTrace(uint8* raw_data, std::string file,
uint64 uncompressed_trace_size);
private:
// configuration parameters
@ -140,14 +152,16 @@ class RubySystem : public SimObject
static int m_block_size_bits;
static uint64 m_memory_size_bytes;
static int m_memory_size_bits;
static Network* m_network_ptr;
public:
static Profiler* m_profiler_ptr;
static Tracer* m_tracer_ptr;
static MemoryVector* m_mem_vec_ptr;
std::vector<AbstractController*> m_abs_cntrl_vec;
bool m_warmup_enabled;
bool m_cooldown_enabled;
CacheRecorder* m_cache_recorder;
std::vector<SparseMemory*> m_sparse_memory_vector;
};
inline std::ostream&

View file

@ -264,6 +264,8 @@ public:
void clearStats();
void blockOnQueue(Address addr, MessageBuffer* port);
void unblock(Address addr);
void recordCacheTrace(int cntrl, CacheRecorder* tr);
Sequencer* getSequencer() const;
private:
''')
@ -674,6 +676,12 @@ $vid->setDescription("[Version " + to_string(m_version) + ", ${ident}, name=${{v
else:
mq_ident = "NULL"
seq_ident = "NULL"
for param in self.config_parameters:
if param.name == "sequencer":
assert(param.pointer)
seq_ident = "m_%s_ptr" % param.name
code('''
int
$c_ident::getNumControllers()
@ -687,6 +695,12 @@ $c_ident::getMandatoryQueue() const
return $mq_ident;
}
Sequencer*
$c_ident::getSequencer() const
{
return $seq_ident;
}
const int &
$c_ident::getVersion() const
{
@ -875,6 +889,23 @@ $c_ident::unset_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr)
code('''
void
$c_ident::recordCacheTrace(int cntrl, CacheRecorder* tr)
{
''')
#
# Record cache contents for all associated caches.
#
code.indent()
for param in self.config_parameters:
if param.type_ast.type.ident == "CacheMemory":
assert(param.pointer)
code('m_${{param.ident}}_ptr->recordCacheContents(cntrl, tr);')
code.dedent()
code('''
}
// Actions
''')
if self.TBEType != None and self.EntryType != None:

View file

@ -874,29 +874,62 @@ class SimObject(object):
if hasattr(self, 'type'):
print >>ini_file, 'type=%s' % self.type
child_names = self._children.keys()
child_names.sort()
if len(child_names):
if len(self._children.keys()):
print >>ini_file, 'children=%s' % \
' '.join(self._children[n].get_name() for n in child_names)
' '.join(self._children[n].get_name() \
for n in sorted(self._children.keys()))
param_names = self._params.keys()
param_names.sort()
for param in param_names:
for param in sorted(self._params.keys()):
value = self._values.get(param)
if value != None:
print >>ini_file, '%s=%s' % (param,
self._values[param].ini_str())
port_names = self._ports.keys()
port_names.sort()
for port_name in port_names:
for port_name in sorted(self._ports.keys()):
port = self._port_refs.get(port_name, None)
if port != None:
print >>ini_file, '%s=%s' % (port_name, port.ini_str())
print >>ini_file # blank line between objects
# generate a tree of dictionaries expressing all the parameters in the
# instantiated system for use by scripts that want to do power, thermal
# visualization, and other similar tasks
def get_config_as_dict(self):
d = attrdict()
if hasattr(self, 'type'):
d.type = self.type
if hasattr(self, 'cxx_class'):
d.cxx_class = self.cxx_class
for param in sorted(self._params.keys()):
value = self._values.get(param)
try:
# Use native type for those supported by JSON and
# strings for everything else. skipkeys=True seems
# to not work as well as one would hope
if type(self._values[param].value) in \
[str, unicode, int, long, float, bool, None]:
d[param] = self._values[param].value
else:
d[param] = str(self._values[param])
except AttributeError:
pass
for n in sorted(self._children.keys()):
d[self._children[n].get_name()] = self._children[n].get_config_as_dict()
for port_name in sorted(self._ports.keys()):
port = self._port_refs.get(port_name, None)
if port != None:
# Might want to actually make this reference the object
# in the future, although execing the string problem would
# get some of the way there
d[port_name] = port.ini_str()
return d
def getCCParams(self):
if self._ccParams:
return self._ccParams

View file

@ -87,6 +87,8 @@ def parse_options():
group("Configuration Options")
option("--dump-config", metavar="FILE", default="config.ini",
help="Dump configuration output file [Default: %default]")
option("--json-config", metavar="FILE", default="config.json",
help="Create JSON output of the configuration [Default: %default]")
# Debugging options
group("Debugging Options")
@ -121,7 +123,6 @@ def parse_options():
execfile(options_file, scope)
arguments = options.parse_args()
return options,arguments
def interact(scope):

View file

@ -228,6 +228,12 @@ class SimObjectVector(VectorParamValue):
for obj in v.descendants():
yield obj
def get_config_as_dict(self):
a = []
for v in self:
a.append(v.get_config_as_dict())
return a
class VectorParamDesc(ParamDesc):
# Convert assigned value to appropriate type. If the RHS is not a
# list or tuple, it generates a single-element list.
@ -256,6 +262,9 @@ class VectorParamDesc(ParamDesc):
self.ptype.cxx_predecls(code)
code('%}')
code()
# Make sure the SWIGPY_SLICE_ARG is defined through this inclusion
code('%include "std_container.i"')
code()
self.ptype.swig_predecls(code)
code()
code('%include "std_vector.i"')
@ -961,6 +970,9 @@ class Time(ParamValue):
def ini_str(self):
return str(self)
def get_config_as_dict(self):
return str(self)
# Enumerated types are a little more complex. The user specifies the
# type as Enum(foo) where foo is either a list or dictionary of
# alternatives (typically strings, but not necessarily so). (In the

View file

@ -40,6 +40,7 @@ import SimObject
import ticks
import objects
from util import fatal
from util import attrdict
# define a MaxTick parameter
MaxTick = 2**63 - 1
@ -71,6 +72,17 @@ def instantiate(ckpt_dir=None):
obj.print_ini(ini_file)
ini_file.close()
if options.json_config:
try:
import json
json_file = file(os.path.join(options.outdir, options.json_config), 'w')
d = root.get_config_as_dict()
json.dump(d, json_file, indent=4)
json_file.close()
except ImportError:
pass
# Initialize the global statistics
stats.initSimStats()

View file

@ -54,8 +54,8 @@ class System(SimObject):
physmem = Param.PhysicalMemory("Physical Memory")
mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in")
memories = VectorParam.PhysicalMemory(Self.all, "All memories is the system")
work_item_id = Param.Int(-1, "specific work item id")
num_work_ids = Param.Int(16, "Number of distinct work item types")
work_begin_cpu_id_exit = Param.Int(-1,
"work started on specific id, now exit simulation")
work_begin_ckpt_count = Param.Counter(0,

View file

@ -417,6 +417,7 @@ workbegin(ThreadContext *tc, uint64_t workid, uint64_t threadid)
tc->getCpuPtr()->workItemBegin();
System *sys = tc->getSystemPtr();
const System::Params *params = sys->params();
sys->workItemBegin(threadid, workid);
DPRINTF(WorkItems, "Work Begin workid: %d, threadid %d\n", workid,
threadid);
@ -473,6 +474,7 @@ workend(ThreadContext *tc, uint64_t workid, uint64_t threadid)
tc->getCpuPtr()->workItemEnd();
System *sys = tc->getSystemPtr();
const System::Params *params = sys->params();
sys->workItemEnd(threadid, workid);
DPRINTF(WorkItems, "Work End workid: %d, threadid %d\n", workid, threadid);

View file

@ -1,4 +1,16 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2003-2006 The Regents of The University of Michigan
* Copyright (c) 2011 Regents of the University of California
* All rights reserved.
@ -43,6 +55,7 @@
#include "config/the_isa.hh"
#include "cpu/thread_context.hh"
#include "debug/Loader.hh"
#include "debug/WorkItems.hh"
#include "kern/kernel_stats.hh"
#include "mem/mem_object.hh"
#include "mem/physical.hh"
@ -68,8 +81,9 @@ System::System(Params *p)
memoryMode(p->mem_mode),
workItemsBegin(0),
workItemsEnd(0),
numWorkIds(p->num_work_ids),
_params(p),
totalNumInsts(0),
totalNumInsts(0),
instEventQueue("system instruction-based event queue")
{
// add self to global system list
@ -158,6 +172,9 @@ System::~System()
{
delete kernelSymtab;
delete kernel;
for (uint32_t j = 0; j < numWorkIds; j++)
delete workItemStats[j];
}
void
@ -319,6 +336,37 @@ System::unserialize(Checkpoint *cp, const string &section)
UNSERIALIZE_SCALAR(nextPID);
}
void
System::regStats()
{
for (uint32_t j = 0; j < numWorkIds ; j++) {
workItemStats[j] = new Stats::Histogram();
stringstream namestr;
ccprintf(namestr, "work_item_type%d", j);
workItemStats[j]->init(20)
.name(name() + "." + namestr.str())
.desc("Run time stat for" + namestr.str())
.prereq(*workItemStats[j]);
}
}
void
System::workItemEnd(uint32_t tid, uint32_t workid)
{
std::pair<uint32_t,uint32_t> p(tid, workid);
if (!lastWorkItemStarted.count(p))
return;
Tick samp = curTick() - lastWorkItemStarted[p];
DPRINTF(WorkItems, "Work item end: %d\t%d\t%lld\n", tid, workid, samp);
if (workid >= numWorkIds)
fatal("Got workid greater than specified in system configuration\n");
workItemStats[workid]->sample(samp);
lastWorkItemStarted.erase(p);
}
void
System::printSystems()
{

View file

@ -157,14 +157,16 @@ class System : public SimObject
Enums::MemoryMode memoryMode;
uint64_t workItemsBegin;
uint64_t workItemsEnd;
uint32_t numWorkIds;
std::vector<bool> activeCpus;
public:
virtual void regStats();
/**
* Called by pseudo_inst to track the number of work items started by this
* system.
*/
uint64_t
uint64_t
incWorkItemsBegin()
{
return ++workItemsBegin;
@ -198,6 +200,14 @@ class System : public SimObject
return count;
}
inline void workItemBegin(uint32_t tid, uint32_t workid)
{
std::pair<uint32_t,uint32_t> p(tid, workid);
lastWorkItemStarted[p] = curTick();
}
void workItemEnd(uint32_t tid, uint32_t workid);
/**
* Fix up an address used to match PCs for hooking simulator
* events on to target function executions. See comment in
@ -285,6 +295,8 @@ class System : public SimObject
public:
Counter totalNumInsts;
EventQueue instEventQueue;
std::map<std::pair<uint32_t,uint32_t>, Tick> lastWorkItemStarted;
std::map<uint32_t, Stats::Histogram*> workItemStats;
////////////////////////////////////////////
//

View file

@ -500,7 +500,7 @@ egid=100
env=
errout=cerr
euid=100
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
gid=100
input=cin
max_stack_size=67108864

View file

@ -3,11 +3,10 @@ Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing/si
gem5 Simulator System. http://gem5.org
gem5 is copyrighted software; use the --copyright option for details.
gem5 compiled Nov 16 2011 11:08:03
gem5 started Nov 17 2011 13:09:16
gem5 compiled Jan 9 2012 14:18:02
gem5 started Jan 9 2012 14:29:08
gem5 executing on ribera.cs.wisc.edu
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing
tests
Global frequency set at 1000000000000 ticks per second
info: Entering event queue @ 0. Starting simulation...
spec_init

View file

@ -3,26 +3,26 @@
sim_seconds 0.586294 # Number of seconds simulated
sim_ticks 586294224000 # Number of ticks simulated
sim_freq 1000000000000 # Frequency of simulated ticks
host_inst_rate 112274 # Simulator instruction rate (inst/s)
host_tick_rate 40595683 # Simulator tick rate (ticks/s)
host_mem_usage 244844 # Number of bytes of host memory used
host_seconds 14442.28 # Real time elapsed on the host
host_inst_rate 115446 # Simulator instruction rate (inst/s)
host_tick_rate 41742717 # Simulator tick rate (ticks/s)
host_mem_usage 244900 # Number of bytes of host memory used
host_seconds 14045.43 # Real time elapsed on the host
sim_insts 1621493982 # Number of instructions simulated
system.cpu.workload.num_syscalls 48 # Number of system calls
system.cpu.numCycles 1172588449 # number of cpu cycles simulated
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
system.cpu.BPredUnit.lookups 142448983 # Number of BP lookups
system.cpu.BPredUnit.condPredicted 142448983 # Number of conditional branches predicted
system.cpu.BPredUnit.lookups 142448982 # Number of BP lookups
system.cpu.BPredUnit.condPredicted 142448982 # Number of conditional branches predicted
system.cpu.BPredUnit.condIncorrect 7804844 # Number of conditional branches incorrect
system.cpu.BPredUnit.BTBLookups 134509889 # Number of BTB lookups
system.cpu.BPredUnit.BTBLookups 134509888 # Number of BTB lookups
system.cpu.BPredUnit.BTBHits 133615988 # Number of BTB hits
system.cpu.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly.
system.cpu.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target.
system.cpu.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions.
system.cpu.fetch.icacheStallCycles 143149229 # Number of cycles fetch is stalled on an Icache miss
system.cpu.fetch.Insts 1143761055 # Number of instructions fetch has processed
system.cpu.fetch.Branches 142448983 # Number of branches that fetch encountered
system.cpu.fetch.Insts 1143761054 # Number of instructions fetch has processed
system.cpu.fetch.Branches 142448982 # Number of branches that fetch encountered
system.cpu.fetch.predictedBranches 133615988 # Number of branches that fetch has predicted taken
system.cpu.fetch.Cycles 330199440 # Number of cycles fetch has run and was not squashing or blocked
system.cpu.fetch.SquashCycles 57554993 # Number of cycles fetch has spent squashing
@ -66,32 +66,32 @@ system.cpu.rename.RenamedInsts 2043122328 # Nu
system.cpu.rename.ROBFullEvents 2634 # Number of times rename has blocked due to ROB full
system.cpu.rename.IQFullEvents 278313629 # Number of times rename has blocked due to IQ full
system.cpu.rename.LSQFullEvents 129499394 # Number of times rename has blocked due to LSQ full
system.cpu.rename.RenamedOperands 2031527324 # Number of destination operands rename has renamed
system.cpu.rename.RenameLookups 4954653616 # Number of register rename lookups that rename has made
system.cpu.rename.int_rename_lookups 4954649396 # Number of integer rename lookups
system.cpu.rename.RenamedOperands 2031527322 # Number of destination operands rename has renamed
system.cpu.rename.RenameLookups 4954653611 # Number of register rename lookups that rename has made
system.cpu.rename.int_rename_lookups 4954649391 # Number of integer rename lookups
system.cpu.rename.fp_rename_lookups 4220 # Number of floating rename lookups
system.cpu.rename.CommittedMaps 1617994650 # Number of HB maps that are committed
system.cpu.rename.UndoneMaps 413532674 # Number of HB maps that are undone due to squashing
system.cpu.rename.UndoneMaps 413532672 # Number of HB maps that are undone due to squashing
system.cpu.rename.serializingInsts 91 # count of serializing insts renamed
system.cpu.rename.tempSerializingInsts 91 # count of temporary serializing insts renamed
system.cpu.rename.skidInsts 793190427 # count of insts added to the skid buffer
system.cpu.memDep0.insertedLoads 519090632 # Number of loads inserted to the mem dependence unit.
system.cpu.memDep0.insertedStores 226808407 # Number of stores inserted to the mem dependence unit.
system.cpu.memDep0.conflictingLoads 354951645 # Number of conflicting loads.
system.cpu.memDep0.conflictingStores 148937435 # Number of conflicting stores.
system.cpu.iq.iqInstsAdded 1986583518 # Number of instructions added to the IQ (excludes non-spec)
system.cpu.iq.iqNonSpecInstsAdded 216 # Number of non-speculative instructions added to the IQ
system.cpu.iq.iqInstsIssued 1781630005 # Number of instructions issued
system.cpu.memDep0.conflictingStores 148937436 # Number of conflicting stores.
system.cpu.iq.iqInstsAdded 1986583516 # Number of instructions added to the IQ (excludes non-spec)
system.cpu.iq.iqNonSpecInstsAdded 218 # Number of non-speculative instructions added to the IQ
system.cpu.iq.iqInstsIssued 1781630004 # Number of instructions issued
system.cpu.iq.iqSquashedInstsIssued 180825 # Number of squashed instructions issued
system.cpu.iq.iqSquashedInstsExamined 364939190 # Number of squashed instructions iterated over during squash; mainly for profiling
system.cpu.iq.iqSquashedOperandsExamined 670712331 # Number of squashed operands that are examined and possibly removed from graph
system.cpu.iq.iqSquashedNonSpecRemoved 166 # Number of squashed non-spec instructions that were removed
system.cpu.iq.iqSquashedOperandsExamined 670712329 # Number of squashed operands that are examined and possibly removed from graph
system.cpu.iq.iqSquashedNonSpecRemoved 168 # Number of squashed non-spec instructions that were removed
system.cpu.iq.issued_per_cycle::samples 1172439660 # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::mean 1.519592 # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::stdev 1.333662 # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::underflows 0 0.00% 0.00% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::0 271921708 23.19% 23.19% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::1 416937500 35.56% 58.75% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::0 271921709 23.19% 23.19% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::1 416937499 35.56% 58.75% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::2 234725234 20.02% 78.77% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::3 156776493 13.37% 92.15% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::4 54385701 4.64% 96.79% # Number of insts issued each cycle
@ -138,7 +138,7 @@ system.cpu.iq.fu_full::MemWrite 148998 5.73% 100.00% # at
system.cpu.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
system.cpu.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
system.cpu.iq.FU_type_0::No_OpClass 26894248 1.51% 1.51% # Type of FU issued
system.cpu.iq.FU_type_0::IntAlu 1102052870 61.86% 63.37% # Type of FU issued
system.cpu.iq.FU_type_0::IntAlu 1102052869 61.86% 63.37% # Type of FU issued
system.cpu.iq.FU_type_0::IntMult 0 0.00% 63.37% # Type of FU issued
system.cpu.iq.FU_type_0::IntDiv 0 0.00% 63.37% # Type of FU issued
system.cpu.iq.FU_type_0::FloatAdd 0 0.00% 63.37% # Type of FU issued
@ -171,17 +171,17 @@ system.cpu.iq.FU_type_0::MemRead 457985397 25.71% 89.07% # Ty
system.cpu.iq.FU_type_0::MemWrite 194697490 10.93% 100.00% # Type of FU issued
system.cpu.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
system.cpu.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
system.cpu.iq.FU_type_0::total 1781630005 # Type of FU issued
system.cpu.iq.FU_type_0::total 1781630004 # Type of FU issued
system.cpu.iq.rate 1.519399 # Inst issue rate
system.cpu.iq.fu_busy_cnt 2598665 # FU busy when requested
system.cpu.iq.fu_busy_rate 0.001459 # FU busy rate (busy events/executed inst)
system.cpu.iq.int_inst_queue_reads 4738479065 # Number of integer instruction queue reads
system.cpu.iq.int_inst_queue_reads 4738479063 # Number of integer instruction queue reads
system.cpu.iq.int_inst_queue_writes 2351732069 # Number of integer instruction queue writes
system.cpu.iq.int_inst_queue_wakeup_accesses 1760053766 # Number of integer instruction queue wakeup accesses
system.cpu.iq.int_inst_queue_wakeup_accesses 1760053765 # Number of integer instruction queue wakeup accesses
system.cpu.iq.fp_inst_queue_reads 95 # Number of floating instruction queue reads
system.cpu.iq.fp_inst_queue_writes 542 # Number of floating instruction queue writes
system.cpu.iq.fp_inst_queue_wakeup_accesses 12 # Number of floating instruction queue wakeup accesses
system.cpu.iq.int_alu_accesses 1757334382 # Number of integer alu accesses
system.cpu.iq.int_alu_accesses 1757334381 # Number of integer alu accesses
system.cpu.iq.fp_alu_accesses 40 # Number of floating point alu accesses
system.cpu.iew.lsq.thread0.forwLoads 205665909 # Number of loads that had data forwarded from stores
system.cpu.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
@ -208,7 +208,7 @@ system.cpu.iew.memOrderViolationEvents 216417 # Nu
system.cpu.iew.predictedTakenIncorrect 4603219 # Number of branches that were predicted taken incorrectly
system.cpu.iew.predictedNotTakenIncorrect 3388875 # Number of branches that were predicted not taken incorrectly
system.cpu.iew.branchMispredicts 7992094 # Number of branch mispredicts detected at execute
system.cpu.iew.iewExecutedInsts 1768232809 # Number of executed instructions
system.cpu.iew.iewExecutedInsts 1768232808 # Number of executed instructions
system.cpu.iew.iewExecLoadInsts 452047218 # Number of load instructions executed
system.cpu.iew.iewExecSquashedInsts 13397196 # Number of squashed instructions skipped in execute
system.cpu.iew.exec_swp 0 # number of swp insts executed
@ -217,8 +217,8 @@ system.cpu.iew.exec_refs 645919458 # nu
system.cpu.iew.exec_branches 112169596 # Number of branches executed
system.cpu.iew.exec_stores 193872240 # Number of stores executed
system.cpu.iew.exec_rate 1.507974 # Inst execution rate
system.cpu.iew.wb_sent 1766226830 # cumulative count of insts sent to commit
system.cpu.iew.wb_count 1760053778 # cumulative count of insts written-back
system.cpu.iew.wb_sent 1766226829 # cumulative count of insts sent to commit
system.cpu.iew.wb_count 1760053777 # cumulative count of insts written-back
system.cpu.iew.wb_producers 1336567337 # num instructions producing a value
system.cpu.iew.wb_consumers 2003494286 # num instructions consuming a value
system.cpu.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
@ -268,9 +268,9 @@ system.cpu.cpi_total 0.723153 # CP
system.cpu.ipc 1.382833 # IPC: Instructions Per Cycle
system.cpu.ipc_total 1.382833 # IPC: Total IPC of All Threads
system.cpu.int_regfile_reads 3273039620 # number of integer regfile reads
system.cpu.int_regfile_writes 1756091293 # number of integer regfile writes
system.cpu.int_regfile_writes 1756091292 # number of integer regfile writes
system.cpu.fp_regfile_reads 12 # number of floating regfile reads
system.cpu.misc_regfile_reads 908871446 # number of misc regfile reads
system.cpu.misc_regfile_reads 908871445 # number of misc regfile reads
system.cpu.icache.replacements 12 # number of replacements
system.cpu.icache.tagsinuse 810.394392 # Cycle average of tags in use
system.cpu.icache.total_refs 137025977 # Total number of references to valid blocks.

View file

@ -9,6 +9,7 @@ time_sync_spin_threshold=100000000
type=System
children=cpu membus physmem
mem_mode=atomic
memories=system.physmem
physmem=system.physmem
work_begin_ckpt_count=0
work_begin_cpu_id_exit=-1
@ -61,12 +62,12 @@ type=ExeTracer
[system.cpu.workload]
type=LiveProcess
cmd=gzip input.log 1
cwd=build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic
cwd=build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic
egid=100
env=
errout=cerr
euid=100
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
gid=100
input=cin
max_stack_size=67108864

View file

@ -1,7 +1,4 @@
warn: Sockets disabled, not accepting gdb connections
For more information see: http://www.m5sim.org/warn/d946bea6
warn: instruction 'fnstcw_Mw' unimplemented
For more information see: http://www.m5sim.org/warn/437d5238
warn: instruction 'fldcw_Mw' unimplemented
For more information see: http://www.m5sim.org/warn/437d5238
hack: be nice to actually delete the event here

View file

@ -1,14 +1,12 @@
M5 Simulator System
Redirecting stdout to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic/simout
Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic/simerr
gem5 Simulator System. http://gem5.org
gem5 is copyrighted software; use the --copyright option for details.
Copyright (c) 2001-2008
The Regents of The University of Michigan
All Rights Reserved
M5 compiled Apr 19 2011 12:22:33
M5 started Apr 19 2011 12:22:36
M5 executing on maize
command line: build/X86_SE/m5.fast -d build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic -re tests/run.py build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic
gem5 compiled Jan 9 2012 14:18:02
gem5 started Jan 9 2012 14:29:08
gem5 executing on ribera.cs.wisc.edu
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic
Global frequency set at 1000000000000 ticks per second
info: Entering event queue @ 0. Starting simulation...
spec_init

View file

@ -1,34 +1,34 @@
---------- Begin Simulation Statistics ----------
host_inst_rate 3280168 # Simulator instruction rate (inst/s)
host_mem_usage 202508 # Number of bytes of host memory used
host_seconds 494.33 # Real time elapsed on the host
host_tick_rate 1950088412 # Simulator tick rate (ticks/s)
sim_freq 1000000000000 # Frequency of simulated ticks
sim_insts 1621493983 # Number of instructions simulated
sim_seconds 0.963993 # Number of seconds simulated
sim_ticks 963992704000 # Number of ticks simulated
system.cpu.idle_fraction 0 # Percentage of idle cycles
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
sim_freq 1000000000000 # Frequency of simulated ticks
host_inst_rate 1220339 # Simulator instruction rate (inst/s)
host_tick_rate 725502264 # Simulator tick rate (ticks/s)
host_mem_usage 234168 # Number of bytes of host memory used
host_seconds 1328.72 # Real time elapsed on the host
sim_insts 1621493983 # Number of instructions simulated
system.cpu.workload.num_syscalls 48 # Number of system calls
system.cpu.numCycles 1927985409 # number of cpu cycles simulated
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
system.cpu.num_busy_cycles 1927985409 # Number of busy cycles
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
system.cpu.num_fp_insts 0 # number of float instructions
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
system.cpu.num_func_calls 0 # number of times a function call or return occured
system.cpu.num_idle_cycles 0 # Number of idle cycles
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
system.cpu.num_insts 1621493983 # Number of instructions executed
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
system.cpu.num_func_calls 0 # number of times a function call or return occured
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
system.cpu.num_int_insts 1621354493 # number of integer instructions
system.cpu.num_fp_insts 0 # number of float instructions
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
system.cpu.num_load_insts 419042125 # Number of load instructions
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
system.cpu.num_mem_refs 607228182 # number of memory refs
system.cpu.num_load_insts 419042125 # Number of load instructions
system.cpu.num_store_insts 188186057 # Number of store instructions
system.cpu.workload.num_syscalls 48 # Number of system calls
system.cpu.num_idle_cycles 0 # Number of idle cycles
system.cpu.num_busy_cycles 1927985409 # Number of busy cycles
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
system.cpu.idle_fraction 0 # Percentage of idle cycles
---------- End Simulation Statistics ----------

View file

@ -9,6 +9,7 @@ time_sync_spin_threshold=100000000
type=System
children=cpu membus physmem
mem_mode=atomic
memories=system.physmem
physmem=system.physmem
work_begin_ckpt_count=0
work_begin_cpu_id_exit=-1
@ -164,12 +165,12 @@ type=ExeTracer
[system.cpu.workload]
type=LiveProcess
cmd=gzip input.log 1
cwd=build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing
cwd=build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing
egid=100
env=
errout=cerr
euid=100
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
gid=100
input=cin
max_stack_size=67108864

View file

@ -1,7 +1,4 @@
warn: Sockets disabled, not accepting gdb connections
For more information see: http://www.m5sim.org/warn/d946bea6
warn: instruction 'fnstcw_Mw' unimplemented
For more information see: http://www.m5sim.org/warn/437d5238
warn: instruction 'fldcw_Mw' unimplemented
For more information see: http://www.m5sim.org/warn/437d5238
hack: be nice to actually delete the event here

View file

@ -1,14 +1,12 @@
M5 Simulator System
Redirecting stdout to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing/simout
Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing/simerr
gem5 Simulator System. http://gem5.org
gem5 is copyrighted software; use the --copyright option for details.
Copyright (c) 2001-2008
The Regents of The University of Michigan
All Rights Reserved
M5 compiled Apr 19 2011 12:22:33
M5 started Apr 19 2011 12:23:09
M5 executing on maize
command line: build/X86_SE/m5.fast -d build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing -re tests/run.py build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing
gem5 compiled Jan 9 2012 14:18:02
gem5 started Jan 9 2012 14:29:08
gem5 executing on ribera.cs.wisc.edu
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing
Global frequency set at 1000000000000 ticks per second
info: Entering event queue @ 0. Starting simulation...
spec_init

View file

@ -1,223 +1,223 @@
---------- Begin Simulation Statistics ----------
host_inst_rate 2023797 # Simulator instruction rate (inst/s)
host_mem_usage 210248 # Number of bytes of host memory used
host_seconds 801.21 # Real time elapsed on the host
host_tick_rate 2250658484 # Simulator tick rate (ticks/s)
sim_freq 1000000000000 # Frequency of simulated ticks
sim_insts 1621493983 # Number of instructions simulated
sim_seconds 1.803259 # Number of seconds simulated
sim_ticks 1803258587000 # Number of ticks simulated
system.cpu.dcache.ReadReq_accesses 419042125 # number of ReadReq accesses(hits+misses)
system.cpu.dcache.ReadReq_avg_miss_latency 20490.305383 # average ReadReq miss latency
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 17490.305383 # average ReadReq mshr miss latency
system.cpu.dcache.ReadReq_hits 418844799 # number of ReadReq hits
system.cpu.dcache.ReadReq_miss_latency 4043270000 # number of ReadReq miss cycles
system.cpu.dcache.ReadReq_miss_rate 0.000471 # miss rate for ReadReq accesses
system.cpu.dcache.ReadReq_misses 197326 # number of ReadReq misses
system.cpu.dcache.ReadReq_mshr_miss_latency 3451292000 # number of ReadReq MSHR miss cycles
system.cpu.dcache.ReadReq_mshr_miss_rate 0.000471 # mshr miss rate for ReadReq accesses
system.cpu.dcache.ReadReq_mshr_misses 197326 # number of ReadReq MSHR misses
system.cpu.dcache.WriteReq_accesses 188186057 # number of WriteReq accesses(hits+misses)
system.cpu.dcache.WriteReq_avg_miss_latency 23997.572756 # average WriteReq miss latency
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 20997.572756 # average WriteReq mshr miss latency
system.cpu.dcache.WriteReq_hits 187941335 # number of WriteReq hits
system.cpu.dcache.WriteReq_miss_latency 5872734000 # number of WriteReq miss cycles
system.cpu.dcache.WriteReq_miss_rate 0.001300 # miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_misses 244722 # number of WriteReq misses
system.cpu.dcache.WriteReq_mshr_miss_latency 5138568000 # number of WriteReq MSHR miss cycles
system.cpu.dcache.WriteReq_mshr_miss_rate 0.001300 # mshr miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_mshr_misses 244722 # number of WriteReq MSHR misses
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
system.cpu.dcache.avg_refs 1372.670239 # Average number of references to valid blocks.
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
system.cpu.dcache.cache_copies 0 # number of cache copies performed
system.cpu.dcache.demand_accesses 607228182 # number of demand (read+write) accesses
system.cpu.dcache.demand_avg_miss_latency 22431.962140 # average overall miss latency
system.cpu.dcache.demand_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
system.cpu.dcache.demand_hits 606786134 # number of demand (read+write) hits
system.cpu.dcache.demand_miss_latency 9916004000 # number of demand (read+write) miss cycles
system.cpu.dcache.demand_miss_rate 0.000728 # miss rate for demand accesses
system.cpu.dcache.demand_misses 442048 # number of demand (read+write) misses
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
system.cpu.dcache.demand_mshr_miss_latency 8589860000 # number of demand (read+write) MSHR miss cycles
system.cpu.dcache.demand_mshr_miss_rate 0.000728 # mshr miss rate for demand accesses
system.cpu.dcache.demand_mshr_misses 442048 # number of demand (read+write) MSHR misses
system.cpu.dcache.fast_writes 0 # number of fast writes performed
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.dcache.occ_blocks::0 4094.896939 # Average occupied blocks per context
system.cpu.dcache.occ_percent::0 0.999731 # Average percentage of cache occupancy
system.cpu.dcache.overall_accesses 607228182 # number of overall (read+write) accesses
system.cpu.dcache.overall_avg_miss_latency 22431.962140 # average overall miss latency
system.cpu.dcache.overall_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
system.cpu.dcache.overall_hits 606786134 # number of overall hits
system.cpu.dcache.overall_miss_latency 9916004000 # number of overall miss cycles
system.cpu.dcache.overall_miss_rate 0.000728 # miss rate for overall accesses
system.cpu.dcache.overall_misses 442048 # number of overall misses
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
system.cpu.dcache.overall_mshr_miss_latency 8589860000 # number of overall MSHR miss cycles
system.cpu.dcache.overall_mshr_miss_rate 0.000728 # mshr miss rate for overall accesses
system.cpu.dcache.overall_mshr_misses 442048 # number of overall MSHR misses
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
system.cpu.dcache.replacements 437952 # number of replacements
system.cpu.dcache.sampled_refs 442048 # Sample count of references to valid blocks.
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
system.cpu.dcache.tagsinuse 4094.896939 # Cycle average of tags in use
system.cpu.dcache.total_refs 606786134 # Total number of references to valid blocks.
system.cpu.dcache.warmup_cycle 778540000 # Cycle when the warmup percentage was hit.
system.cpu.dcache.writebacks 396372 # number of writebacks
system.cpu.icache.ReadReq_accesses 1186516740 # number of ReadReq accesses(hits+misses)
system.cpu.icache.ReadReq_avg_miss_latency 56000 # average ReadReq miss latency
system.cpu.icache.ReadReq_avg_mshr_miss_latency 53000 # average ReadReq mshr miss latency
system.cpu.icache.ReadReq_hits 1186516018 # number of ReadReq hits
system.cpu.icache.ReadReq_miss_latency 40432000 # number of ReadReq miss cycles
system.cpu.icache.ReadReq_miss_rate 0.000001 # miss rate for ReadReq accesses
system.cpu.icache.ReadReq_misses 722 # number of ReadReq misses
system.cpu.icache.ReadReq_mshr_miss_latency 38266000 # number of ReadReq MSHR miss cycles
system.cpu.icache.ReadReq_mshr_miss_rate 0.000001 # mshr miss rate for ReadReq accesses
system.cpu.icache.ReadReq_mshr_misses 722 # number of ReadReq MSHR misses
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
system.cpu.icache.avg_refs 1643373.986150 # Average number of references to valid blocks.
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
system.cpu.icache.cache_copies 0 # number of cache copies performed
system.cpu.icache.demand_accesses 1186516740 # number of demand (read+write) accesses
system.cpu.icache.demand_avg_miss_latency 56000 # average overall miss latency
system.cpu.icache.demand_avg_mshr_miss_latency 53000 # average overall mshr miss latency
system.cpu.icache.demand_hits 1186516018 # number of demand (read+write) hits
system.cpu.icache.demand_miss_latency 40432000 # number of demand (read+write) miss cycles
system.cpu.icache.demand_miss_rate 0.000001 # miss rate for demand accesses
system.cpu.icache.demand_misses 722 # number of demand (read+write) misses
system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
system.cpu.icache.demand_mshr_miss_latency 38266000 # number of demand (read+write) MSHR miss cycles
system.cpu.icache.demand_mshr_miss_rate 0.000001 # mshr miss rate for demand accesses
system.cpu.icache.demand_mshr_misses 722 # number of demand (read+write) MSHR misses
system.cpu.icache.fast_writes 0 # number of fast writes performed
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.icache.occ_blocks::0 660.186297 # Average occupied blocks per context
system.cpu.icache.occ_percent::0 0.322357 # Average percentage of cache occupancy
system.cpu.icache.overall_accesses 1186516740 # number of overall (read+write) accesses
system.cpu.icache.overall_avg_miss_latency 56000 # average overall miss latency
system.cpu.icache.overall_avg_mshr_miss_latency 53000 # average overall mshr miss latency
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
system.cpu.icache.overall_hits 1186516018 # number of overall hits
system.cpu.icache.overall_miss_latency 40432000 # number of overall miss cycles
system.cpu.icache.overall_miss_rate 0.000001 # miss rate for overall accesses
system.cpu.icache.overall_misses 722 # number of overall misses
system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits
system.cpu.icache.overall_mshr_miss_latency 38266000 # number of overall MSHR miss cycles
system.cpu.icache.overall_mshr_miss_rate 0.000001 # mshr miss rate for overall accesses
system.cpu.icache.overall_mshr_misses 722 # number of overall MSHR misses
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
sim_freq 1000000000000 # Frequency of simulated ticks
host_inst_rate 760773 # Simulator instruction rate (inst/s)
host_tick_rate 846053445 # Simulator tick rate (ticks/s)
host_mem_usage 242892 # Number of bytes of host memory used
host_seconds 2131.38 # Real time elapsed on the host
sim_insts 1621493983 # Number of instructions simulated
system.cpu.workload.num_syscalls 48 # Number of system calls
system.cpu.numCycles 3606517174 # number of cpu cycles simulated
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
system.cpu.num_insts 1621493983 # Number of instructions executed
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
system.cpu.num_func_calls 0 # number of times a function call or return occured
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
system.cpu.num_int_insts 1621354493 # number of integer instructions
system.cpu.num_fp_insts 0 # number of float instructions
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
system.cpu.num_mem_refs 607228182 # number of memory refs
system.cpu.num_load_insts 419042125 # Number of load instructions
system.cpu.num_store_insts 188186057 # Number of store instructions
system.cpu.num_idle_cycles 0 # Number of idle cycles
system.cpu.num_busy_cycles 3606517174 # Number of busy cycles
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
system.cpu.idle_fraction 0 # Percentage of idle cycles
system.cpu.icache.replacements 4 # number of replacements
system.cpu.icache.sampled_refs 722 # Sample count of references to valid blocks.
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
system.cpu.icache.tagsinuse 660.186297 # Cycle average of tags in use
system.cpu.icache.total_refs 1186516018 # Total number of references to valid blocks.
system.cpu.icache.sampled_refs 722 # Sample count of references to valid blocks.
system.cpu.icache.avg_refs 1643373.986150 # Average number of references to valid blocks.
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.icache.occ_blocks::0 660.186297 # Average occupied blocks per context
system.cpu.icache.occ_percent::0 0.322357 # Average percentage of cache occupancy
system.cpu.icache.ReadReq_hits 1186516018 # number of ReadReq hits
system.cpu.icache.demand_hits 1186516018 # number of demand (read+write) hits
system.cpu.icache.overall_hits 1186516018 # number of overall hits
system.cpu.icache.ReadReq_misses 722 # number of ReadReq misses
system.cpu.icache.demand_misses 722 # number of demand (read+write) misses
system.cpu.icache.overall_misses 722 # number of overall misses
system.cpu.icache.ReadReq_miss_latency 40432000 # number of ReadReq miss cycles
system.cpu.icache.demand_miss_latency 40432000 # number of demand (read+write) miss cycles
system.cpu.icache.overall_miss_latency 40432000 # number of overall miss cycles
system.cpu.icache.ReadReq_accesses 1186516740 # number of ReadReq accesses(hits+misses)
system.cpu.icache.demand_accesses 1186516740 # number of demand (read+write) accesses
system.cpu.icache.overall_accesses 1186516740 # number of overall (read+write) accesses
system.cpu.icache.ReadReq_miss_rate 0.000001 # miss rate for ReadReq accesses
system.cpu.icache.demand_miss_rate 0.000001 # miss rate for demand accesses
system.cpu.icache.overall_miss_rate 0.000001 # miss rate for overall accesses
system.cpu.icache.ReadReq_avg_miss_latency 56000 # average ReadReq miss latency
system.cpu.icache.demand_avg_miss_latency 56000 # average overall miss latency
system.cpu.icache.overall_avg_miss_latency 56000 # average overall miss latency
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
system.cpu.icache.fast_writes 0 # number of fast writes performed
system.cpu.icache.cache_copies 0 # number of cache copies performed
system.cpu.icache.writebacks 0 # number of writebacks
system.cpu.idle_fraction 0 # Percentage of idle cycles
system.cpu.l2cache.ReadExReq_accesses 244722 # number of ReadExReq accesses(hits+misses)
system.cpu.l2cache.ReadExReq_avg_miss_latency 52000 # average ReadExReq miss latency
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000 # average ReadExReq mshr miss latency
system.cpu.l2cache.ReadExReq_hits 186469 # number of ReadExReq hits
system.cpu.l2cache.ReadExReq_miss_latency 3029156000 # number of ReadExReq miss cycles
system.cpu.l2cache.ReadExReq_miss_rate 0.238037 # miss rate for ReadExReq accesses
system.cpu.l2cache.ReadExReq_misses 58253 # number of ReadExReq misses
system.cpu.l2cache.ReadExReq_mshr_miss_latency 2330120000 # number of ReadExReq MSHR miss cycles
system.cpu.l2cache.ReadExReq_mshr_miss_rate 0.238037 # mshr miss rate for ReadExReq accesses
system.cpu.l2cache.ReadExReq_mshr_misses 58253 # number of ReadExReq MSHR misses
system.cpu.l2cache.ReadReq_accesses 198048 # number of ReadReq accesses(hits+misses)
system.cpu.l2cache.ReadReq_avg_miss_latency 52000 # average ReadReq miss latency
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40000 # average ReadReq mshr miss latency
system.cpu.l2cache.ReadReq_hits 166833 # number of ReadReq hits
system.cpu.l2cache.ReadReq_miss_latency 1623180000 # number of ReadReq miss cycles
system.cpu.l2cache.ReadReq_miss_rate 0.157613 # miss rate for ReadReq accesses
system.cpu.l2cache.ReadReq_misses 31215 # number of ReadReq misses
system.cpu.l2cache.ReadReq_mshr_miss_latency 1248600000 # number of ReadReq MSHR miss cycles
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.157613 # mshr miss rate for ReadReq accesses
system.cpu.l2cache.ReadReq_mshr_misses 31215 # number of ReadReq MSHR misses
system.cpu.l2cache.Writeback_accesses 396372 # number of Writeback accesses(hits+misses)
system.cpu.l2cache.Writeback_hits 396372 # number of Writeback hits
system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits
system.cpu.icache.ReadReq_mshr_misses 722 # number of ReadReq MSHR misses
system.cpu.icache.demand_mshr_misses 722 # number of demand (read+write) MSHR misses
system.cpu.icache.overall_mshr_misses 722 # number of overall MSHR misses
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
system.cpu.icache.ReadReq_mshr_miss_latency 38266000 # number of ReadReq MSHR miss cycles
system.cpu.icache.demand_mshr_miss_latency 38266000 # number of demand (read+write) MSHR miss cycles
system.cpu.icache.overall_mshr_miss_latency 38266000 # number of overall MSHR miss cycles
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.icache.ReadReq_mshr_miss_rate 0.000001 # mshr miss rate for ReadReq accesses
system.cpu.icache.demand_mshr_miss_rate 0.000001 # mshr miss rate for demand accesses
system.cpu.icache.overall_mshr_miss_rate 0.000001 # mshr miss rate for overall accesses
system.cpu.icache.ReadReq_avg_mshr_miss_latency 53000 # average ReadReq mshr miss latency
system.cpu.icache.demand_avg_mshr_miss_latency 53000 # average overall mshr miss latency
system.cpu.icache.overall_avg_mshr_miss_latency 53000 # average overall mshr miss latency
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.dcache.replacements 437952 # number of replacements
system.cpu.dcache.tagsinuse 4094.896939 # Cycle average of tags in use
system.cpu.dcache.total_refs 606786134 # Total number of references to valid blocks.
system.cpu.dcache.sampled_refs 442048 # Sample count of references to valid blocks.
system.cpu.dcache.avg_refs 1372.670239 # Average number of references to valid blocks.
system.cpu.dcache.warmup_cycle 778540000 # Cycle when the warmup percentage was hit.
system.cpu.dcache.occ_blocks::0 4094.896939 # Average occupied blocks per context
system.cpu.dcache.occ_percent::0 0.999731 # Average percentage of cache occupancy
system.cpu.dcache.ReadReq_hits 418844799 # number of ReadReq hits
system.cpu.dcache.WriteReq_hits 187941335 # number of WriteReq hits
system.cpu.dcache.demand_hits 606786134 # number of demand (read+write) hits
system.cpu.dcache.overall_hits 606786134 # number of overall hits
system.cpu.dcache.ReadReq_misses 197326 # number of ReadReq misses
system.cpu.dcache.WriteReq_misses 244722 # number of WriteReq misses
system.cpu.dcache.demand_misses 442048 # number of demand (read+write) misses
system.cpu.dcache.overall_misses 442048 # number of overall misses
system.cpu.dcache.ReadReq_miss_latency 4043270000 # number of ReadReq miss cycles
system.cpu.dcache.WriteReq_miss_latency 5872734000 # number of WriteReq miss cycles
system.cpu.dcache.demand_miss_latency 9916004000 # number of demand (read+write) miss cycles
system.cpu.dcache.overall_miss_latency 9916004000 # number of overall miss cycles
system.cpu.dcache.ReadReq_accesses 419042125 # number of ReadReq accesses(hits+misses)
system.cpu.dcache.WriteReq_accesses 188186057 # number of WriteReq accesses(hits+misses)
system.cpu.dcache.demand_accesses 607228182 # number of demand (read+write) accesses
system.cpu.dcache.overall_accesses 607228182 # number of overall (read+write) accesses
system.cpu.dcache.ReadReq_miss_rate 0.000471 # miss rate for ReadReq accesses
system.cpu.dcache.WriteReq_miss_rate 0.001300 # miss rate for WriteReq accesses
system.cpu.dcache.demand_miss_rate 0.000728 # miss rate for demand accesses
system.cpu.dcache.overall_miss_rate 0.000728 # miss rate for overall accesses
system.cpu.dcache.ReadReq_avg_miss_latency 20490.305383 # average ReadReq miss latency
system.cpu.dcache.WriteReq_avg_miss_latency 23997.572756 # average WriteReq miss latency
system.cpu.dcache.demand_avg_miss_latency 22431.962140 # average overall miss latency
system.cpu.dcache.overall_avg_miss_latency 22431.962140 # average overall miss latency
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
system.cpu.dcache.fast_writes 0 # number of fast writes performed
system.cpu.dcache.cache_copies 0 # number of cache copies performed
system.cpu.dcache.writebacks 396372 # number of writebacks
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
system.cpu.dcache.ReadReq_mshr_misses 197326 # number of ReadReq MSHR misses
system.cpu.dcache.WriteReq_mshr_misses 244722 # number of WriteReq MSHR misses
system.cpu.dcache.demand_mshr_misses 442048 # number of demand (read+write) MSHR misses
system.cpu.dcache.overall_mshr_misses 442048 # number of overall MSHR misses
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
system.cpu.dcache.ReadReq_mshr_miss_latency 3451292000 # number of ReadReq MSHR miss cycles
system.cpu.dcache.WriteReq_mshr_miss_latency 5138568000 # number of WriteReq MSHR miss cycles
system.cpu.dcache.demand_mshr_miss_latency 8589860000 # number of demand (read+write) MSHR miss cycles
system.cpu.dcache.overall_mshr_miss_latency 8589860000 # number of overall MSHR miss cycles
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.dcache.ReadReq_mshr_miss_rate 0.000471 # mshr miss rate for ReadReq accesses
system.cpu.dcache.WriteReq_mshr_miss_rate 0.001300 # mshr miss rate for WriteReq accesses
system.cpu.dcache.demand_mshr_miss_rate 0.000728 # mshr miss rate for demand accesses
system.cpu.dcache.overall_mshr_miss_rate 0.000728 # mshr miss rate for overall accesses
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 17490.305383 # average ReadReq mshr miss latency
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 20997.572756 # average WriteReq mshr miss latency
system.cpu.dcache.demand_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
system.cpu.dcache.overall_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.l2cache.replacements 71208 # number of replacements
system.cpu.l2cache.tagsinuse 18056.923092 # Cycle average of tags in use
system.cpu.l2cache.total_refs 423014 # Total number of references to valid blocks.
system.cpu.l2cache.sampled_refs 86793 # Sample count of references to valid blocks.
system.cpu.l2cache.avg_refs 4.873826 # Average number of references to valid blocks.
system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked
system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked
system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
system.cpu.l2cache.demand_accesses 442770 # number of demand (read+write) accesses
system.cpu.l2cache.demand_avg_miss_latency 52000 # average overall miss latency
system.cpu.l2cache.demand_avg_mshr_miss_latency 40000 # average overall mshr miss latency
system.cpu.l2cache.demand_hits 353302 # number of demand (read+write) hits
system.cpu.l2cache.demand_miss_latency 4652336000 # number of demand (read+write) miss cycles
system.cpu.l2cache.demand_miss_rate 0.202064 # miss rate for demand accesses
system.cpu.l2cache.demand_misses 89468 # number of demand (read+write) misses
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
system.cpu.l2cache.demand_mshr_miss_latency 3578720000 # number of demand (read+write) MSHR miss cycles
system.cpu.l2cache.demand_mshr_miss_rate 0.202064 # mshr miss rate for demand accesses
system.cpu.l2cache.demand_mshr_misses 89468 # number of demand (read+write) MSHR misses
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.l2cache.occ_blocks::0 1869.199731 # Average occupied blocks per context
system.cpu.l2cache.occ_blocks::1 16187.723361 # Average occupied blocks per context
system.cpu.l2cache.occ_percent::0 0.057043 # Average percentage of cache occupancy
system.cpu.l2cache.occ_percent::1 0.494010 # Average percentage of cache occupancy
system.cpu.l2cache.ReadReq_hits 166833 # number of ReadReq hits
system.cpu.l2cache.Writeback_hits 396372 # number of Writeback hits
system.cpu.l2cache.ReadExReq_hits 186469 # number of ReadExReq hits
system.cpu.l2cache.demand_hits 353302 # number of demand (read+write) hits
system.cpu.l2cache.overall_hits 353302 # number of overall hits
system.cpu.l2cache.ReadReq_misses 31215 # number of ReadReq misses
system.cpu.l2cache.ReadExReq_misses 58253 # number of ReadExReq misses
system.cpu.l2cache.demand_misses 89468 # number of demand (read+write) misses
system.cpu.l2cache.overall_misses 89468 # number of overall misses
system.cpu.l2cache.ReadReq_miss_latency 1623180000 # number of ReadReq miss cycles
system.cpu.l2cache.ReadExReq_miss_latency 3029156000 # number of ReadExReq miss cycles
system.cpu.l2cache.demand_miss_latency 4652336000 # number of demand (read+write) miss cycles
system.cpu.l2cache.overall_miss_latency 4652336000 # number of overall miss cycles
system.cpu.l2cache.ReadReq_accesses 198048 # number of ReadReq accesses(hits+misses)
system.cpu.l2cache.Writeback_accesses 396372 # number of Writeback accesses(hits+misses)
system.cpu.l2cache.ReadExReq_accesses 244722 # number of ReadExReq accesses(hits+misses)
system.cpu.l2cache.demand_accesses 442770 # number of demand (read+write) accesses
system.cpu.l2cache.overall_accesses 442770 # number of overall (read+write) accesses
system.cpu.l2cache.ReadReq_miss_rate 0.157613 # miss rate for ReadReq accesses
system.cpu.l2cache.ReadExReq_miss_rate 0.238037 # miss rate for ReadExReq accesses
system.cpu.l2cache.demand_miss_rate 0.202064 # miss rate for demand accesses
system.cpu.l2cache.overall_miss_rate 0.202064 # miss rate for overall accesses
system.cpu.l2cache.ReadReq_avg_miss_latency 52000 # average ReadReq miss latency
system.cpu.l2cache.ReadExReq_avg_miss_latency 52000 # average ReadExReq miss latency
system.cpu.l2cache.demand_avg_miss_latency 52000 # average overall miss latency
system.cpu.l2cache.overall_avg_miss_latency 52000 # average overall miss latency
system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked
system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked
system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
system.cpu.l2cache.writebacks 58007 # number of writebacks
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
system.cpu.l2cache.ReadReq_mshr_misses 31215 # number of ReadReq MSHR misses
system.cpu.l2cache.ReadExReq_mshr_misses 58253 # number of ReadExReq MSHR misses
system.cpu.l2cache.demand_mshr_misses 89468 # number of demand (read+write) MSHR misses
system.cpu.l2cache.overall_mshr_misses 89468 # number of overall MSHR misses
system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
system.cpu.l2cache.ReadReq_mshr_miss_latency 1248600000 # number of ReadReq MSHR miss cycles
system.cpu.l2cache.ReadExReq_mshr_miss_latency 2330120000 # number of ReadExReq MSHR miss cycles
system.cpu.l2cache.demand_mshr_miss_latency 3578720000 # number of demand (read+write) MSHR miss cycles
system.cpu.l2cache.overall_mshr_miss_latency 3578720000 # number of overall MSHR miss cycles
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.157613 # mshr miss rate for ReadReq accesses
system.cpu.l2cache.ReadExReq_mshr_miss_rate 0.238037 # mshr miss rate for ReadExReq accesses
system.cpu.l2cache.demand_mshr_miss_rate 0.202064 # mshr miss rate for demand accesses
system.cpu.l2cache.overall_mshr_miss_rate 0.202064 # mshr miss rate for overall accesses
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40000 # average ReadReq mshr miss latency
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000 # average ReadExReq mshr miss latency
system.cpu.l2cache.demand_avg_mshr_miss_latency 40000 # average overall mshr miss latency
system.cpu.l2cache.overall_avg_mshr_miss_latency 40000 # average overall mshr miss latency
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
system.cpu.l2cache.overall_hits 353302 # number of overall hits
system.cpu.l2cache.overall_miss_latency 4652336000 # number of overall miss cycles
system.cpu.l2cache.overall_miss_rate 0.202064 # miss rate for overall accesses
system.cpu.l2cache.overall_misses 89468 # number of overall misses
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
system.cpu.l2cache.overall_mshr_miss_latency 3578720000 # number of overall MSHR miss cycles
system.cpu.l2cache.overall_mshr_miss_rate 0.202064 # mshr miss rate for overall accesses
system.cpu.l2cache.overall_mshr_misses 89468 # number of overall MSHR misses
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
system.cpu.l2cache.replacements 71208 # number of replacements
system.cpu.l2cache.sampled_refs 86793 # Sample count of references to valid blocks.
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
system.cpu.l2cache.tagsinuse 18056.923092 # Cycle average of tags in use
system.cpu.l2cache.total_refs 423014 # Total number of references to valid blocks.
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.l2cache.writebacks 58007 # number of writebacks
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
system.cpu.numCycles 3606517174 # number of cpu cycles simulated
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
system.cpu.num_busy_cycles 3606517174 # Number of busy cycles
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
system.cpu.num_fp_insts 0 # number of float instructions
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
system.cpu.num_func_calls 0 # number of times a function call or return occured
system.cpu.num_idle_cycles 0 # Number of idle cycles
system.cpu.num_insts 1621493983 # Number of instructions executed
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
system.cpu.num_int_insts 1621354493 # number of integer instructions
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
system.cpu.num_load_insts 419042125 # Number of load instructions
system.cpu.num_mem_refs 607228182 # number of memory refs
system.cpu.num_store_insts 188186057 # Number of store instructions
system.cpu.workload.num_syscalls 48 # Number of system calls
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
---------- End Simulation Statistics ----------

View file

@ -9,18 +9,19 @@ time_sync_spin_threshold=100000000
type=LinuxArmSystem
children=bridge cf0 cpu0 cpu1 intrctrl iobus iocache l2c membus nvmem physmem realview terminal toL2Bus vncserver
boot_cpu_frequency=500
boot_loader=/projects/pd/randd/dist/binaries/boot.arm
boot_loader=/dist/m5/system/binaries/boot.arm
boot_loader_mem=system.nvmem
boot_osflags=earlyprintk console=ttyAMA0 lpj=19988480 norandmaps rw loglevel=8 mem=128MB root=/dev/sda1
flags_addr=268435504
gic_cpu_addr=520093952
init_param=0
kernel=/projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
kernel=/dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
load_addr_mask=268435455
machine_type=RealView_PBX
mem_mode=timing
memories=system.nvmem system.physmem
midr_regval=890224640
num_work_ids=16
physmem=system.physmem
readfile=tests/halt.sh
symbolfile=
@ -63,7 +64,7 @@ table_size=65536
[system.cf0.image.child]
type=RawDiskImage
image_file=/projects/pd/randd/dist/disks/linux-arm-ael.img
image_file=/dist/m5/system/disks/linux-arm-ael.img
read_only=true
[system.cpu0]
@ -1495,6 +1496,7 @@ port=system.l2c.cpu_side system.cpu0.icache.mem_side system.cpu0.dcache.mem_side
[system.vncserver]
type=VncServer
frame_capture=false
number=0
port=5900

View file

@ -1,12 +1,14 @@
Redirecting stdout to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual/simout
Redirecting stderr to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual/simerr
gem5 Simulator System. http://gem5.org
gem5 is copyrighted software; use the --copyright option for details.
gem5 compiled Nov 21 2011 16:32:34
gem5 started Nov 22 2011 02:00:14
gem5 executing on u200540-lin
gem5 compiled Jan 8 2012 22:12:58
gem5 started Jan 9 2012 03:33:38
gem5 executing on zizzer
command line: build/ARM_FS/gem5.opt -d build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual -re tests/run.py build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual
Global frequency set at 1000000000000 ticks per second
info: kernel located at: /projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
info: kernel located at: /dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
info: Using bootloader at address 0x80000000
info: Entering event queue @ 0. Starting simulation...
Exiting @ tick 2582494395500 because m5_exit instruction encountered

View file

@ -3,10 +3,10 @@
sim_seconds 2.582494 # Number of seconds simulated
sim_ticks 2582494395500 # Number of ticks simulated
sim_freq 1000000000000 # Frequency of simulated ticks
host_inst_rate 86259 # Simulator instruction rate (inst/s)
host_tick_rate 2789337609 # Simulator tick rate (ticks/s)
host_mem_usage 380504 # Number of bytes of host memory used
host_seconds 925.85 # Real time elapsed on the host
host_inst_rate 65512 # Simulator instruction rate (inst/s)
host_tick_rate 2118472138 # Simulator tick rate (ticks/s)
host_mem_usage 384260 # Number of bytes of host memory used
host_seconds 1219.04 # Real time elapsed on the host
sim_insts 79862069 # Number of instructions simulated
system.l2c.replacements 132200 # number of replacements
system.l2c.tagsinuse 27582.989225 # Cycle average of tags in use
@ -312,12 +312,12 @@ system.cpu0.rename.ROBFullEvents 1483 # Nu
system.cpu0.rename.IQFullEvents 580883 # Number of times rename has blocked due to IQ full
system.cpu0.rename.LSQFullEvents 3149232 # Number of times rename has blocked due to LSQ full
system.cpu0.rename.FullRegisterEvents 205 # Number of times there has been no free registers
system.cpu0.rename.RenamedOperands 54779836 # Number of destination operands rename has renamed
system.cpu0.rename.RenamedOperands 54779837 # Number of destination operands rename has renamed
system.cpu0.rename.RenameLookups 247536349 # Number of register rename lookups that rename has made
system.cpu0.rename.int_rename_lookups 247487579 # Number of integer rename lookups
system.cpu0.rename.fp_rename_lookups 48770 # Number of floating rename lookups
system.cpu0.rename.CommittedMaps 41441157 # Number of HB maps that are committed
system.cpu0.rename.UndoneMaps 13338678 # Number of HB maps that are undone due to squashing
system.cpu0.rename.UndoneMaps 13338679 # Number of HB maps that are undone due to squashing
system.cpu0.rename.serializingInsts 828868 # count of serializing insts renamed
system.cpu0.rename.tempSerializingInsts 763855 # count of temporary serializing insts renamed
system.cpu0.rename.skidInsts 8500592 # count of insts added to the skid buffer
@ -325,13 +325,13 @@ system.cpu0.memDep0.insertedLoads 11770384 # Nu
system.cpu0.memDep0.insertedStores 7686805 # Number of stores inserted to the mem dependence unit.
system.cpu0.memDep0.conflictingLoads 1443183 # Number of conflicting loads.
system.cpu0.memDep0.conflictingStores 1570137 # Number of conflicting stores.
system.cpu0.iq.iqInstsAdded 50961906 # Number of instructions added to the IQ (excludes non-spec)
system.cpu0.iq.iqNonSpecInstsAdded 1297751 # Number of non-speculative instructions added to the IQ
system.cpu0.iq.iqInstsIssued 80276175 # Number of instructions issued
system.cpu0.iq.iqInstsAdded 50961905 # Number of instructions added to the IQ (excludes non-spec)
system.cpu0.iq.iqNonSpecInstsAdded 1297752 # Number of non-speculative instructions added to the IQ
system.cpu0.iq.iqInstsIssued 80276174 # Number of instructions issued
system.cpu0.iq.iqSquashedInstsIssued 137636 # Number of squashed instructions issued
system.cpu0.iq.iqSquashedInstsExamined 9888896 # Number of squashed instructions iterated over during squash; mainly for profiling
system.cpu0.iq.iqSquashedOperandsExamined 22816025 # Number of squashed operands that are examined and possibly removed from graph
system.cpu0.iq.iqSquashedNonSpecRemoved 253323 # Number of squashed non-spec instructions that were removed
system.cpu0.iq.iqSquashedNonSpecRemoved 253324 # Number of squashed non-spec instructions that were removed
system.cpu0.iq.issued_per_cycle::samples 109741052 # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::mean 0.731505 # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::stdev 1.440076 # Number of insts issued each cycle
@ -340,8 +340,8 @@ system.cpu0.iq.issued_per_cycle::0 80125799 73.01% 73.01% # Nu
system.cpu0.iq.issued_per_cycle::1 10111373 9.21% 82.23% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::2 4133530 3.77% 85.99% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::3 3177611 2.90% 88.89% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::4 9954077 9.07% 97.96% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::5 1265280 1.15% 99.11% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::4 9954078 9.07% 97.96% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::5 1265279 1.15% 99.11% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::6 670333 0.61% 99.72% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::7 224189 0.20% 99.93% # Number of insts issued each cycle
system.cpu0.iq.issued_per_cycle::8 78860 0.07% 100.00% # Number of insts issued each cycle
@ -384,7 +384,7 @@ system.cpu0.iq.fu_full::MemWrite 285533 3.56% 100.00% # at
system.cpu0.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
system.cpu0.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
system.cpu0.iq.FU_type_0::No_OpClass 88461 0.11% 0.11% # Type of FU issued
system.cpu0.iq.FU_type_0::IntAlu 29731482 37.04% 37.15% # Type of FU issued
system.cpu0.iq.FU_type_0::IntAlu 29731481 37.04% 37.15% # Type of FU issued
system.cpu0.iq.FU_type_0::IntMult 62351 0.08% 37.22% # Type of FU issued
system.cpu0.iq.FU_type_0::IntDiv 0 0.00% 37.22% # Type of FU issued
system.cpu0.iq.FU_type_0::FloatAdd 0 0.00% 37.22% # Type of FU issued
@ -417,17 +417,17 @@ system.cpu0.iq.FU_type_0::MemRead 43135014 53.73% 90.96% # Ty
system.cpu0.iq.FU_type_0::MemWrite 7257159 9.04% 100.00% # Type of FU issued
system.cpu0.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
system.cpu0.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
system.cpu0.iq.FU_type_0::total 80276175 # Type of FU issued
system.cpu0.iq.FU_type_0::total 80276174 # Type of FU issued
system.cpu0.iq.rate 0.227757 # Inst issue rate
system.cpu0.iq.fu_busy_cnt 8028360 # FU busy when requested
system.cpu0.iq.fu_busy_rate 0.100009 # FU busy rate (busy events/executed inst)
system.cpu0.iq.int_inst_queue_reads 278513866 # Number of integer instruction queue reads
system.cpu0.iq.int_inst_queue_reads 278513864 # Number of integer instruction queue reads
system.cpu0.iq.int_inst_queue_writes 62161443 # Number of integer instruction queue writes
system.cpu0.iq.int_inst_queue_wakeup_accesses 46668616 # Number of integer instruction queue wakeup accesses
system.cpu0.iq.int_inst_queue_wakeup_accesses 46668615 # Number of integer instruction queue wakeup accesses
system.cpu0.iq.fp_inst_queue_reads 11568 # Number of floating instruction queue reads
system.cpu0.iq.fp_inst_queue_writes 6980 # Number of floating instruction queue writes
system.cpu0.iq.fp_inst_queue_wakeup_accesses 5172 # Number of floating instruction queue wakeup accesses
system.cpu0.iq.int_alu_accesses 88210043 # Number of integer alu accesses
system.cpu0.iq.int_alu_accesses 88210042 # Number of integer alu accesses
system.cpu0.iq.fp_alu_accesses 6031 # Number of floating point alu accesses
system.cpu0.iew.lsq.thread0.forwLoads 399886 # Number of loads that had data forwarded from stores
system.cpu0.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
@ -447,14 +447,14 @@ system.cpu0.iew.iewDispatchedInsts 52433539 # Nu
system.cpu0.iew.iewDispSquashedInsts 243567 # Number of squashed instructions skipped by dispatch
system.cpu0.iew.iewDispLoadInsts 11770384 # Number of dispatched load instructions
system.cpu0.iew.iewDispStoreInsts 7686805 # Number of dispatched store instructions
system.cpu0.iew.iewDispNonSpecInsts 865739 # Number of dispatched non-speculative instructions
system.cpu0.iew.iewDispNonSpecInsts 865740 # Number of dispatched non-speculative instructions
system.cpu0.iew.iewIQFullEvents 62160 # Number of times the IQ has become full, causing a stall
system.cpu0.iew.iewLSQFullEvents 5553 # Number of times the LSQ has become full, causing a stall
system.cpu0.iew.memOrderViolationEvents 20554 # Number of memory order violations
system.cpu0.iew.predictedTakenIncorrect 507509 # Number of branches that were predicted taken incorrectly
system.cpu0.iew.predictedNotTakenIncorrect 136100 # Number of branches that were predicted not taken incorrectly
system.cpu0.iew.branchMispredicts 643609 # Number of branch mispredicts detected at execute
system.cpu0.iew.iewExecutedInsts 79551296 # Number of executed instructions
system.cpu0.iew.iewExecutedInsts 79551295 # Number of executed instructions
system.cpu0.iew.iewExecLoadInsts 42843907 # Number of load instructions executed
system.cpu0.iew.iewExecSquashedInsts 724879 # Number of squashed instructions skipped in execute
system.cpu0.iew.exec_swp 0 # number of swp insts executed
@ -463,8 +463,8 @@ system.cpu0.iew.exec_refs 50011427 # nu
system.cpu0.iew.exec_branches 6433542 # Number of branches executed
system.cpu0.iew.exec_stores 7167520 # Number of stores executed
system.cpu0.iew.exec_rate 0.225700 # Inst execution rate
system.cpu0.iew.wb_sent 79133798 # cumulative count of insts sent to commit
system.cpu0.iew.wb_count 46673788 # cumulative count of insts written-back
system.cpu0.iew.wb_sent 79133797 # cumulative count of insts sent to commit
system.cpu0.iew.wb_count 46673787 # cumulative count of insts written-back
system.cpu0.iew.wb_producers 24793926 # num instructions producing a value
system.cpu0.iew.wb_consumers 46078393 # num instructions consuming a value
system.cpu0.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
@ -514,8 +514,8 @@ system.cpu0.cpi 8.431852 # CP
system.cpu0.cpi_total 8.431852 # CPI: Total CPI of All Threads
system.cpu0.ipc 0.118598 # IPC: Instructions Per Cycle
system.cpu0.ipc_total 0.118598 # IPC: Total IPC of All Threads
system.cpu0.int_regfile_reads 354175082 # number of integer regfile reads
system.cpu0.int_regfile_writes 46137252 # number of integer regfile writes
system.cpu0.int_regfile_reads 354175079 # number of integer regfile reads
system.cpu0.int_regfile_writes 46137251 # number of integer regfile writes
system.cpu0.fp_regfile_reads 4205 # number of floating regfile reads
system.cpu0.fp_regfile_writes 1348 # number of floating regfile writes
system.cpu0.misc_regfile_reads 65629786 # number of misc regfile reads

View file

@ -9,18 +9,19 @@ time_sync_spin_threshold=100000000
type=LinuxArmSystem
children=bridge cf0 cpu intrctrl iobus iocache l2c membus nvmem physmem realview terminal toL2Bus vncserver
boot_cpu_frequency=500
boot_loader=/projects/pd/randd/dist/binaries/boot.arm
boot_loader=/dist/m5/system/binaries/boot.arm
boot_loader_mem=system.nvmem
boot_osflags=earlyprintk console=ttyAMA0 lpj=19988480 norandmaps rw loglevel=8 mem=128MB root=/dev/sda1
flags_addr=268435504
gic_cpu_addr=520093952
init_param=0
kernel=/projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
kernel=/dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
load_addr_mask=268435455
machine_type=RealView_PBX
mem_mode=timing
memories=system.nvmem system.physmem
midr_regval=890224640
num_work_ids=16
physmem=system.physmem
readfile=tests/halt.sh
symbolfile=
@ -63,7 +64,7 @@ table_size=65536
[system.cf0.image.child]
type=RawDiskImage
image_file=/projects/pd/randd/dist/disks/linux-arm-ael.img
image_file=/dist/m5/system/disks/linux-arm-ael.img
read_only=true
[system.cpu]
@ -1041,6 +1042,7 @@ port=system.l2c.cpu_side system.cpu.icache.mem_side system.cpu.dcache.mem_side s
[system.vncserver]
type=VncServer
frame_capture=false
number=0
port=5900

View file

@ -1,12 +1,14 @@
Redirecting stdout to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3/simout
Redirecting stderr to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3/simerr
gem5 Simulator System. http://gem5.org
gem5 is copyrighted software; use the --copyright option for details.
gem5 compiled Nov 21 2011 16:32:34
gem5 started Nov 22 2011 02:00:08
gem5 executing on u200540-lin
gem5 compiled Jan 8 2012 22:12:58
gem5 started Jan 9 2012 03:32:35
gem5 executing on zizzer
command line: build/ARM_FS/gem5.opt -d build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3 -re tests/run.py build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3
Global frequency set at 1000000000000 ticks per second
info: kernel located at: /projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
info: kernel located at: /dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
info: Using bootloader at address 0x80000000
info: Entering event queue @ 0. Starting simulation...
Exiting @ tick 2503566110500 because m5_exit instruction encountered

View file

@ -3,10 +3,10 @@
sim_seconds 2.503566 # Number of seconds simulated
sim_ticks 2503566110500 # Number of ticks simulated
sim_freq 1000000000000 # Frequency of simulated ticks
host_inst_rate 84156 # Simulator instruction rate (inst/s)
host_tick_rate 2743719152 # Simulator tick rate (ticks/s)
host_mem_usage 380536 # Number of bytes of host memory used
host_seconds 912.47 # Real time elapsed on the host
host_inst_rate 72389 # Simulator instruction rate (inst/s)
host_tick_rate 2360079964 # Simulator tick rate (ticks/s)
host_mem_usage 384076 # Number of bytes of host memory used
host_seconds 1060.80 # Real time elapsed on the host
sim_insts 76790007 # Number of instructions simulated
system.l2c.replacements 119509 # number of replacements
system.l2c.tagsinuse 25929.897253 # Cycle average of tags in use
@ -270,12 +270,12 @@ system.cpu.rename.ROBFullEvents 4400 # Nu
system.cpu.rename.IQFullEvents 914485 # Number of times rename has blocked due to IQ full
system.cpu.rename.LSQFullEvents 3979731 # Number of times rename has blocked due to LSQ full
system.cpu.rename.FullRegisterEvents 42252 # Number of times there has been no free registers
system.cpu.rename.RenamedOperands 118358542 # Number of destination operands rename has renamed
system.cpu.rename.RenamedOperands 118358543 # Number of destination operands rename has renamed
system.cpu.rename.RenameLookups 523323093 # Number of register rename lookups that rename has made
system.cpu.rename.int_rename_lookups 523225639 # Number of integer rename lookups
system.cpu.rename.fp_rename_lookups 97454 # Number of floating rename lookups
system.cpu.rename.CommittedMaps 77492718 # Number of HB maps that are committed
system.cpu.rename.UndoneMaps 40865823 # Number of HB maps that are undone due to squashing
system.cpu.rename.UndoneMaps 40865824 # Number of HB maps that are undone due to squashing
system.cpu.rename.serializingInsts 1204637 # count of serializing insts renamed
system.cpu.rename.tempSerializingInsts 1098724 # count of temporary serializing insts renamed
system.cpu.rename.skidInsts 12304657 # count of insts added to the skid buffer
@ -283,13 +283,13 @@ system.cpu.memDep0.insertedLoads 21982315 # Nu
system.cpu.memDep0.insertedStores 14168730 # Number of stores inserted to the mem dependence unit.
system.cpu.memDep0.conflictingLoads 1896802 # Number of conflicting loads.
system.cpu.memDep0.conflictingStores 2281380 # Number of conflicting stores.
system.cpu.iq.iqInstsAdded 102860212 # Number of instructions added to the IQ (excludes non-spec)
system.cpu.iq.iqNonSpecInstsAdded 1874615 # Number of non-speculative instructions added to the IQ
system.cpu.iq.iqInstsIssued 126873317 # Number of instructions issued
system.cpu.iq.iqInstsAdded 102860211 # Number of instructions added to the IQ (excludes non-spec)
system.cpu.iq.iqNonSpecInstsAdded 1874616 # Number of non-speculative instructions added to the IQ
system.cpu.iq.iqInstsIssued 126873316 # Number of instructions issued
system.cpu.iq.iqSquashedInstsIssued 252471 # Number of squashed instructions issued
system.cpu.iq.iqSquashedInstsExamined 26973483 # Number of squashed instructions iterated over during squash; mainly for profiling
system.cpu.iq.iqSquashedOperandsExamined 72956952 # Number of squashed operands that are examined and possibly removed from graph
system.cpu.iq.iqSquashedNonSpecRemoved 374922 # Number of squashed non-spec instructions that were removed
system.cpu.iq.iqSquashedNonSpecRemoved 374923 # Number of squashed non-spec instructions that were removed
system.cpu.iq.issued_per_cycle::samples 155542524 # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::mean 0.815683 # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::stdev 1.505358 # Number of insts issued each cycle
@ -298,8 +298,8 @@ system.cpu.iq.issued_per_cycle::0 108919716 70.03% 70.03% # Nu
system.cpu.iq.issued_per_cycle::1 15115277 9.72% 79.74% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::2 7538109 4.85% 84.59% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::3 6517896 4.19% 88.78% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::4 12766128 8.21% 96.99% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::5 2735747 1.76% 98.75% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::4 12766129 8.21% 96.99% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::5 2735746 1.76% 98.75% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::6 1395145 0.90% 99.64% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::7 422031 0.27% 99.91% # Number of insts issued each cycle
system.cpu.iq.issued_per_cycle::8 132475 0.09% 100.00% # Number of insts issued each cycle
@ -342,7 +342,7 @@ system.cpu.iq.fu_full::MemWrite 436630 4.91% 100.00% # at
system.cpu.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
system.cpu.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
system.cpu.iq.FU_type_0::No_OpClass 106530 0.08% 0.08% # Type of FU issued
system.cpu.iq.FU_type_0::IntAlu 60069483 47.35% 47.43% # Type of FU issued
system.cpu.iq.FU_type_0::IntAlu 60069482 47.35% 47.43% # Type of FU issued
system.cpu.iq.FU_type_0::IntMult 96615 0.08% 47.51% # Type of FU issued
system.cpu.iq.FU_type_0::IntDiv 0 0.00% 47.51% # Type of FU issued
system.cpu.iq.FU_type_0::FloatAdd 0 0.00% 47.51% # Type of FU issued
@ -375,17 +375,17 @@ system.cpu.iq.FU_type_0::MemRead 53942685 42.52% 90.02% # Ty
system.cpu.iq.FU_type_0::MemWrite 12655733 9.98% 100.00% # Type of FU issued
system.cpu.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
system.cpu.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
system.cpu.iq.FU_type_0::total 126873317 # Type of FU issued
system.cpu.iq.FU_type_0::total 126873316 # Type of FU issued
system.cpu.iq.rate 0.305048 # Inst issue rate
system.cpu.iq.fu_busy_cnt 8900311 # FU busy when requested
system.cpu.iq.fu_busy_rate 0.070151 # FU busy rate (busy events/executed inst)
system.cpu.iq.int_inst_queue_reads 418533130 # Number of integer instruction queue reads
system.cpu.iq.int_inst_queue_reads 418533128 # Number of integer instruction queue reads
system.cpu.iq.int_inst_queue_writes 131726191 # Number of integer instruction queue writes
system.cpu.iq.int_inst_queue_wakeup_accesses 87292109 # Number of integer instruction queue wakeup accesses
system.cpu.iq.int_inst_queue_wakeup_accesses 87292108 # Number of integer instruction queue wakeup accesses
system.cpu.iq.fp_inst_queue_reads 24017 # Number of floating instruction queue reads
system.cpu.iq.fp_inst_queue_writes 13690 # Number of floating instruction queue writes
system.cpu.iq.fp_inst_queue_wakeup_accesses 10446 # Number of floating instruction queue wakeup accesses
system.cpu.iq.int_alu_accesses 135654306 # Number of integer alu accesses
system.cpu.iq.int_alu_accesses 135654305 # Number of integer alu accesses
system.cpu.iq.fp_alu_accesses 12792 # Number of floating point alu accesses
system.cpu.iew.lsq.thread0.forwLoads 614767 # Number of loads that had data forwarded from stores
system.cpu.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
@ -405,14 +405,14 @@ system.cpu.iew.iewDispatchedInsts 104949442 # Nu
system.cpu.iew.iewDispSquashedInsts 473979 # Number of squashed instructions skipped by dispatch
system.cpu.iew.iewDispLoadInsts 21982315 # Number of dispatched load instructions
system.cpu.iew.iewDispStoreInsts 14168730 # Number of dispatched store instructions
system.cpu.iew.iewDispNonSpecInsts 1228030 # Number of dispatched non-speculative instructions
system.cpu.iew.iewDispNonSpecInsts 1228031 # Number of dispatched non-speculative instructions
system.cpu.iew.iewIQFullEvents 85187 # Number of times the IQ has become full, causing a stall
system.cpu.iew.iewLSQFullEvents 7556 # Number of times the LSQ has become full, causing a stall
system.cpu.iew.memOrderViolationEvents 32657 # Number of memory order violations
system.cpu.iew.predictedTakenIncorrect 850397 # Number of branches that were predicted taken incorrectly
system.cpu.iew.predictedNotTakenIncorrect 257130 # Number of branches that were predicted not taken incorrectly
system.cpu.iew.branchMispredicts 1107527 # Number of branch mispredicts detected at execute
system.cpu.iew.iewExecutedInsts 123429780 # Number of executed instructions
system.cpu.iew.iewExecutedInsts 123429779 # Number of executed instructions
system.cpu.iew.iewExecLoadInsts 52914304 # Number of load instructions executed
system.cpu.iew.iewExecSquashedInsts 3443537 # Number of squashed instructions skipped in execute
system.cpu.iew.exec_swp 0 # number of swp insts executed
@ -421,8 +421,8 @@ system.cpu.iew.exec_refs 65401525 # nu
system.cpu.iew.exec_branches 11705842 # Number of branches executed
system.cpu.iew.exec_stores 12487221 # Number of stores executed
system.cpu.iew.exec_rate 0.296769 # Inst execution rate
system.cpu.iew.wb_sent 121771134 # cumulative count of insts sent to commit
system.cpu.iew.wb_count 87302555 # cumulative count of insts written-back
system.cpu.iew.wb_sent 121771133 # cumulative count of insts sent to commit
system.cpu.iew.wb_count 87302554 # cumulative count of insts written-back
system.cpu.iew.wb_producers 47043389 # num instructions producing a value
system.cpu.iew.wb_consumers 86638668 # num instructions consuming a value
system.cpu.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
@ -472,8 +472,8 @@ system.cpu.cpi 5.416227 # CP
system.cpu.cpi_total 5.416227 # CPI: Total CPI of All Threads
system.cpu.ipc 0.184630 # IPC: Instructions Per Cycle
system.cpu.ipc_total 0.184630 # IPC: Total IPC of All Threads
system.cpu.int_regfile_reads 559625789 # number of integer regfile reads
system.cpu.int_regfile_writes 89694790 # number of integer regfile writes
system.cpu.int_regfile_reads 559625786 # number of integer regfile reads
system.cpu.int_regfile_writes 89694789 # number of integer regfile writes
system.cpu.fp_regfile_reads 8322 # number of floating regfile reads
system.cpu.fp_regfile_writes 2832 # number of floating regfile writes
system.cpu.misc_regfile_reads 137256850 # number of misc regfile reads

View file

@ -15,10 +15,11 @@ e820_table=system.e820_table
init_param=0
intel_mp_pointer=system.intel_mp_pointer
intel_mp_table=system.intel_mp_table
kernel=/projects/pd/randd/dist/binaries/x86_64-vmlinux-2.6.22.9
kernel=/scratch/nilay/GEM5/system/binaries/x86_64-vmlinux-2.6.22.9
load_addr_mask=18446744073709551615
mem_mode=timing
memories=system.physmem
num_work_ids=16
physmem=system.physmem
readfile=tests/halt.sh
smbios_table=system.smbios_table
@ -1301,7 +1302,7 @@ table_size=65536
[system.pc.south_bridge.ide.disks0.image.child]
type=RawDiskImage
image_file=/projects/pd/randd/dist/disks/linux-x86.img
image_file=/scratch/nilay/GEM5/system/disks/linux-x86.img
read_only=true
[system.pc.south_bridge.ide.disks1]
@ -1321,7 +1322,7 @@ table_size=65536
[system.pc.south_bridge.ide.disks1.image.child]
type=RawDiskImage
image_file=/projects/pd/randd/dist/disks/linux-bigswap2.img
image_file=/scratch/nilay/GEM5/system/disks/linux-bigswap2.img
read_only=true
[system.pc.south_bridge.int_lines0]

View file

@ -1,13 +1,12 @@
gem5 Simulator System. http://gem5.org
gem5 is copyrighted software; use the --copyright option for details.
gem5 compiled Nov 21 2011 16:24:08
gem5 started Nov 21 2011 23:30:30
gem5 executing on u200540-lin
command line: build/X86_FS/gem5.opt -d build/X86_FS/tests/opt/long/10.linux-boot/x86/linux/pc-o3-timing -re tests/run.py build/X86_FS/tests/opt/long/10.linux-boot/x86/linux/pc-o3-timing
gem5 compiled Jan 9 2012 20:47:38
gem5 started Jan 9 2012 21:13:16
gem5 executing on ribera.cs.wisc.edu
command line: build/X86_FS/gem5.fast -d build/X86_FS/tests/fast/long/10.linux-boot/x86/linux/pc-o3-timing -re tests/run.py build/X86_FS/tests/fast/long/10.linux-boot/x86/linux/pc-o3-timing
warning: add_child('terminal'): child 'terminal' already has parent
Global frequency set at 1000000000000 ticks per second
info: kernel located at: /projects/pd/randd/dist/binaries/x86_64-vmlinux-2.6.22.9
0: rtc: Real-time clock set to Sun Jan 1 00:00:00 2012
info: kernel located at: /scratch/nilay/GEM5/system/binaries/x86_64-vmlinux-2.6.22.9
info: Entering event queue @ 0. Starting simulation...
Exiting @ tick 5145286546500 because m5_exit instruction encountered
Exiting @ tick 5161177988500 because m5_exit instruction encountered

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more