Merge yet again with the main repository.
This commit is contained in:
commit
da2a4acc26
190 changed files with 5557 additions and 5001 deletions
|
@ -663,7 +663,11 @@ if not py_getvar('Py_ENABLE_SHARED'):
|
|||
|
||||
py_libs = []
|
||||
for lib in py_getvar('LIBS').split() + py_getvar('SYSLIBS').split():
|
||||
assert lib.startswith('-l')
|
||||
if not lib.startswith('-l'):
|
||||
# Python requires some special flags to link (e.g. -framework
|
||||
# common on OS X systems), assume appending preserves order
|
||||
main.Append(LINKFLAGS=[lib])
|
||||
else:
|
||||
lib = lib[2:]
|
||||
if lib not in py_libs:
|
||||
py_libs.append(lib)
|
||||
|
|
|
@ -483,6 +483,15 @@ def makeDualRoot(testSystem, driveSystem, dumpfile):
|
|||
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
|
||||
self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
|
||||
|
||||
if hasattr(testSystem, 'realview'):
|
||||
self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
|
||||
self.etherlink.int1 = Parent.drivesys.realview.ethernet.interface
|
||||
elif hasattr(testSystem, 'tsunami'):
|
||||
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
|
||||
self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
|
||||
else:
|
||||
fatal("Don't know how to connect these system together")
|
||||
|
||||
if dumpfile:
|
||||
self.etherdump = EtherDump(file=dumpfile)
|
||||
self.etherlink.dump = Parent.etherdump
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
# Authors: Lisa Hsu
|
||||
|
||||
# system options
|
||||
parser.add_option("-c", "--cpu-type", type="choice", default="atomic",
|
||||
parser.add_option("--cpu-type", type="choice", default="atomic",
|
||||
choices = ["atomic", "timing", "detailed", "inorder"],
|
||||
help = "type of cpu to run with")
|
||||
parser.add_option("-n", "--num-cpus", type="int", default=1)
|
||||
|
@ -63,7 +63,8 @@ parser.add_option("--work-end-exit-count", action="store", type="int",
|
|||
help="exit at specified work end count")
|
||||
parser.add_option("--work-begin-exit-count", action="store", type="int",
|
||||
help="exit at specified work begin count")
|
||||
|
||||
parser.add_option("--init-param", action="store", type="int", default=0,
|
||||
help="Parameter available in simulation with m5 initparam")
|
||||
|
||||
# Checkpointing options
|
||||
###Note that performing checkpointing via python script files will override
|
||||
|
@ -84,6 +85,10 @@ parser.add_option("--work-end-checkpoint-count", action="store", type="int",
|
|||
help="checkpoint at specified work end count")
|
||||
parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
|
||||
help="checkpoint and exit when active cpu count is reached")
|
||||
parser.add_option("--restore-with-cpu", action="store", type="choice",
|
||||
default="atomic", choices = ["atomic", "timing",
|
||||
"detailed", "inorder"],
|
||||
help = "cpu type for restoring from a checkpoint")
|
||||
|
||||
|
||||
# CPU Switching - default switch model goes from a checkpoint
|
||||
|
|
|
@ -60,7 +60,15 @@ def setCPUClass(options):
|
|||
test_mem_mode = 'atomic'
|
||||
|
||||
if not atomic:
|
||||
if options.checkpoint_restore != None or options.fast_forward:
|
||||
if options.checkpoint_restore != None:
|
||||
if options.restore_with_cpu != options.cpu_type:
|
||||
CPUClass = TmpClass
|
||||
class TmpClass(AtomicSimpleCPU): pass
|
||||
else:
|
||||
if options.restore_with_cpu != "atomic":
|
||||
test_mem_mode = 'timing'
|
||||
|
||||
elif options.fast_forward:
|
||||
CPUClass = TmpClass
|
||||
class TmpClass(AtomicSimpleCPU): pass
|
||||
else:
|
||||
|
|
|
@ -731,7 +731,7 @@ class vpr_route(vpr):
|
|||
'-first_iter_pres_fac', '4', '-initial_pres_fac', '8' ]
|
||||
output = 'route_log.out'
|
||||
|
||||
all = [ ammp, applu, apsi, art110, art470, equake, facerec, fma3d, galgel,
|
||||
all = [ ammp, applu, apsi, art, art110, art470, equake, facerec, fma3d, galgel,
|
||||
lucas, mesa, mgrid, sixtrack, swim, wupwise, bzip2_source,
|
||||
bzip2_graphic, bzip2_program, crafty, eon_kajiya, eon_cook,
|
||||
eon_rushmeier, gap, gcc_166, gcc_200, gcc_expr, gcc_integrate,
|
||||
|
|
|
@ -151,6 +151,8 @@ if options.kernel is not None:
|
|||
if options.script is not None:
|
||||
test_sys.readfile = options.script
|
||||
|
||||
test_sys.init_param = options.init_param
|
||||
|
||||
test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)]
|
||||
|
||||
CacheConfig.config_cache(options, test_sys)
|
||||
|
@ -190,8 +192,8 @@ if len(bm) == 2:
|
|||
elif buildEnv['TARGET_ISA'] == 'x86':
|
||||
drive_sys = makeX86System(drive_mem_mode, np, bm[1])
|
||||
elif buildEnv['TARGET_ISA'] == 'arm':
|
||||
drive_sys = makeArmSystem(drive_mem_mode,
|
||||
machine_options.machine_type, bm[1])
|
||||
drive_sys = makeArmSystem(drive_mem_mode, options.machine_type, bm[1])
|
||||
|
||||
drive_sys.cpu = DriveCPUClass(cpu_id=0)
|
||||
drive_sys.cpu.connectAllPorts(drive_sys.membus)
|
||||
if options.fastmem:
|
||||
|
@ -199,6 +201,7 @@ if len(bm) == 2:
|
|||
if options.kernel is not None:
|
||||
drive_sys.kernel = binary(options.kernel)
|
||||
|
||||
drive_sys.init_param = options.init_param
|
||||
root = makeDualRoot(test_sys, drive_sys, options.etherdump)
|
||||
elif len(bm) == 1:
|
||||
root = Root(system=test_sys)
|
||||
|
|
|
@ -67,7 +67,8 @@ def define_options(parser):
|
|||
def create_system(options, system, piobus = None, dma_devices = []):
|
||||
|
||||
system.ruby = RubySystem(clock = options.clock,
|
||||
stats_filename = options.ruby_stats)
|
||||
stats_filename = options.ruby_stats,
|
||||
no_mem_vec = options.use_map)
|
||||
ruby = system.ruby
|
||||
|
||||
protocol = buildEnv['PROTOCOL']
|
||||
|
@ -154,11 +155,8 @@ def create_system(options, system, piobus = None, dma_devices = []):
|
|||
|
||||
ruby_profiler = RubyProfiler(ruby_system = ruby,
|
||||
num_of_sequencers = len(cpu_sequencers))
|
||||
ruby_tracer = RubyTracer(ruby_system = ruby)
|
||||
|
||||
ruby.network = network
|
||||
ruby.profiler = ruby_profiler
|
||||
ruby.tracer = ruby_tracer
|
||||
ruby.mem_size = total_mem_size
|
||||
ruby._cpu_ruby_ports = cpu_sequencers
|
||||
ruby.random_seed = options.random_seed
|
||||
|
|
|
@ -91,7 +91,7 @@ ElfFile('libelf_msize.c')
|
|||
|
||||
m4env = main.Clone()
|
||||
if m4env['GCC']:
|
||||
major,minor,dot = [ int(x) for x in m4env['CXXVERSION'].split('.')]
|
||||
major,minor,dot = [int(x) for x in m4env['GCC_VERSION'].split('.')]
|
||||
if major >= 4:
|
||||
m4env.Append(CCFLAGS=['-Wno-pointer-sign'])
|
||||
m4env.Append(CCFLAGS=['-Wno-implicit'])
|
||||
|
|
|
@ -851,8 +851,8 @@ def makeEnv(label, objsfx, strip = False, **kwargs):
|
|||
swig_env.Append(CCFLAGS='-Wno-uninitialized')
|
||||
swig_env.Append(CCFLAGS='-Wno-sign-compare')
|
||||
swig_env.Append(CCFLAGS='-Wno-parentheses')
|
||||
if compareVersions(env['GCC_VERSION'], '4.6.0') != -1:
|
||||
swig_env.Append(CCFLAGS='-Wno-unused-label')
|
||||
if compareVersions(env['GCC_VERSION'], '4.6.0') != -1:
|
||||
swig_env.Append(CCFLAGS='-Wno-unused-but-set-variable')
|
||||
|
||||
werror_env = new_env.Clone()
|
||||
|
|
|
@ -190,12 +190,15 @@ let {{
|
|||
exec_output += PredOpExecute.subst(loadsymbolIop)
|
||||
|
||||
initparamCode = '''
|
||||
Rt = PseudoInst::initParam(xc->tcBase());
|
||||
uint64_t ip_val = PseudoInst::initParam(xc->tcBase());
|
||||
R0 = bits(ip_val, 31, 0);
|
||||
R1 = bits(ip_val, 63, 32);
|
||||
'''
|
||||
|
||||
initparamIop = InstObjParams("initparam", "Initparam", "PredOp",
|
||||
{ "code": initparamCode,
|
||||
"predicate_test": predicateTest })
|
||||
"predicate_test": predicateTest },
|
||||
["IsNonSpeculative"])
|
||||
header_output += BasicDeclare.subst(initparamIop)
|
||||
decoder_output += BasicConstructor.subst(initparamIop)
|
||||
exec_output += PredOpExecute.subst(initparamIop)
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
* Authors: Gabe Black
|
||||
* Korey Sewell
|
||||
* Jaidev Patwardhan
|
||||
* Zhengxing Li
|
||||
* Deyuan Guo
|
||||
*/
|
||||
|
||||
#include "arch/mips/faults.hh"
|
||||
|
@ -118,7 +120,7 @@ MipsFaultBase::setExceptionState(ThreadContext *tc, uint8_t excCode)
|
|||
DPRINTF(MipsPRA, "PC: %s\n", pc);
|
||||
bool delay_slot = pc.pc() + sizeof(MachInst) != pc.npc();
|
||||
tc->setMiscRegNoEffect(MISCREG_EPC,
|
||||
pc.pc() - delay_slot ? sizeof(MachInst) : 0);
|
||||
pc.pc() - (delay_slot ? sizeof(MachInst) : 0));
|
||||
|
||||
// Set Cause_EXCCODE field
|
||||
CauseReg cause = tc->readMiscReg(MISCREG_CAUSE);
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
* Authors: Gabe Black
|
||||
* Korey Sewell
|
||||
* Jaidev Patwardhan
|
||||
* Zhengxing Li
|
||||
* Deyuan Guo
|
||||
*/
|
||||
|
||||
#ifndef __MIPS_FAULTS_HH__
|
||||
|
@ -88,7 +90,7 @@ class MipsFaultBase : public FaultBase
|
|||
virtual FaultVect base(ThreadContext *tc) const
|
||||
{
|
||||
StatusReg status = tc->readMiscReg(MISCREG_STATUS);
|
||||
if (status.bev)
|
||||
if (!status.bev)
|
||||
return tc->readMiscReg(MISCREG_EBASE);
|
||||
else
|
||||
return 0xbfc00200;
|
||||
|
@ -167,7 +169,7 @@ class CoprocessorUnusableFault : public MipsFault<CoprocessorUnusableFault>
|
|||
if (FullSystem) {
|
||||
CauseReg cause = tc->readMiscReg(MISCREG_CAUSE);
|
||||
cause.ce = coProcID;
|
||||
tc->setMiscReg(MISCREG_CAUSE, cause);
|
||||
tc->setMiscRegNoEffect(MISCREG_CAUSE, cause);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -179,7 +181,8 @@ class InterruptFault : public MipsFault<InterruptFault>
|
|||
offset(ThreadContext *tc) const
|
||||
{
|
||||
CauseReg cause = tc->readMiscRegNoEffect(MISCREG_CAUSE);
|
||||
return cause.iv ? 0x200 : 0x000;
|
||||
// offset 0x200 for release 2, 0x180 for release 1.
|
||||
return cause.iv ? 0x200 : 0x180;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -251,9 +254,10 @@ class TlbFault : public AddressFault<T>
|
|||
StaticInstPtr inst = StaticInst::nullStaticInstPtr)
|
||||
{
|
||||
if (FullSystem) {
|
||||
DPRINTF(MipsPRA, "Fault %s encountered.\n", name());
|
||||
tc->pcState(this->vect(tc));
|
||||
DPRINTF(MipsPRA, "Fault %s encountered.\n", this->name());
|
||||
Addr vect = this->vect(tc);
|
||||
setTlbExceptionState(tc, this->code());
|
||||
tc->pcState(vect);
|
||||
} else {
|
||||
AddressFault<T>::invoke(tc, inst);
|
||||
}
|
||||
|
|
|
@ -1253,7 +1253,7 @@ decode OPCODE_HI default Unknown::unknown() {
|
|||
//When rs=L1
|
||||
//Note: "1. Format type L is legal only if 64-bit
|
||||
//floating point operations are enabled."
|
||||
0x5: decode FUNCTION_HI {
|
||||
0x5: decode FUNCTION {
|
||||
format FloatConvertOp {
|
||||
0x20: cvt_s_l({{ val = Fs_ud; }}, ToSingle);
|
||||
0x21: cvt_d_l({{ val = Fs_ud; }}, ToDouble);
|
||||
|
|
|
@ -55,7 +55,7 @@ unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
|
|||
|
||||
strcpy(name->sysname, "Linux");
|
||||
strcpy(name->nodename,"m5.eecs.umich.edu");
|
||||
strcpy(name->release, "2.4.20");
|
||||
strcpy(name->release, "2.6.35");
|
||||
strcpy(name->version, "#1 Mon Aug 18 11:32:15 EDT 2003");
|
||||
strcpy(name->machine, "mips");
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; //HI & LO Regs
|
|||
const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;//
|
||||
|
||||
const uint32_t MIPS32_QNAN = 0x7fbfffff;
|
||||
const uint64_t MIPS64_QNAN = ULL(0x7fbfffffffffffff);
|
||||
const uint64_t MIPS64_QNAN = ULL(0x7ff7ffffffffffff);
|
||||
|
||||
enum FPControlRegNums {
|
||||
FLOATREG_FIR = NumFloatArchRegs,
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
* Authors: Nathan Binkert
|
||||
* Steve Reinhardt
|
||||
* Jaidev Patwardhan
|
||||
* Zhengxing Li
|
||||
* Deyuan Guo
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
@ -310,18 +312,6 @@ Fault
|
|||
TLB::translateData(RequestPtr req, ThreadContext *tc, bool write)
|
||||
{
|
||||
if (!FullSystem) {
|
||||
//@TODO: This should actually use TLB instead of going directly
|
||||
// to the page table in syscall mode.
|
||||
/**
|
||||
* Check for alignment faults
|
||||
*/
|
||||
if (req->getVaddr() & (req->getSize() - 1)) {
|
||||
DPRINTF(TLB, "Alignment Fault on %#x, size = %d", req->getVaddr(),
|
||||
req->getSize());
|
||||
return new AddressErrorFault(req->getVaddr(), write);
|
||||
}
|
||||
|
||||
|
||||
Process * p = tc->getProcessPtr();
|
||||
|
||||
Fault fault = p->pTable->translate(req);
|
||||
|
|
|
@ -42,26 +42,34 @@ microcode = '''
|
|||
def macroop IN_R_I {
|
||||
.adjust_imm trimImm(8)
|
||||
limm t1, imm, dataSize=asz
|
||||
mfence
|
||||
ld reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
};
|
||||
|
||||
def macroop IN_R_R {
|
||||
zexti t2, regm, 15, dataSize=8
|
||||
mfence
|
||||
ld reg, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
};
|
||||
|
||||
def macroop OUT_I_R {
|
||||
.adjust_imm trimImm(8)
|
||||
limm t1, imm, dataSize=8
|
||||
mfence
|
||||
st reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
};
|
||||
|
||||
def macroop OUT_R_R {
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
mfence
|
||||
st regm, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
};
|
||||
'''
|
||||
|
|
|
@ -45,9 +45,11 @@ def macroop INS_M_R {
|
|||
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
|
||||
mfence
|
||||
ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
st t6, es, [1, t0, rdi]
|
||||
mfence
|
||||
|
||||
add rdi, rdi, t3, dataSize=asz
|
||||
};
|
||||
|
@ -63,6 +65,7 @@ def macroop INS_E_M_R {
|
|||
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
|
||||
mfence
|
||||
topOfLoop:
|
||||
ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
|
@ -72,6 +75,7 @@ topOfLoop:
|
|||
add rdi, rdi, t3, dataSize=asz
|
||||
br label("topOfLoop"), flags=(nCEZF,)
|
||||
end:
|
||||
mfence
|
||||
fault "NoFault"
|
||||
};
|
||||
|
||||
|
@ -84,9 +88,11 @@ def macroop OUTS_R_M {
|
|||
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
|
||||
mfence
|
||||
ld t6, ds, [1, t0, rsi]
|
||||
st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
|
||||
add rsi, rsi, t3, dataSize=asz
|
||||
};
|
||||
|
@ -102,6 +108,7 @@ def macroop OUTS_E_R_M {
|
|||
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
|
||||
mfence
|
||||
topOfLoop:
|
||||
ld t6, ds, [1, t0, rsi]
|
||||
st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
|
@ -111,6 +118,7 @@ topOfLoop:
|
|||
add rsi, rsi, t3, dataSize=asz
|
||||
br label("topOfLoop"), flags=(nCEZF,)
|
||||
end:
|
||||
mfence
|
||||
fault "NoFault"
|
||||
};
|
||||
'''
|
||||
|
|
|
@ -30,6 +30,12 @@
|
|||
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/shared_region.h>
|
||||
#include <mach/task.h>
|
||||
#endif
|
||||
|
||||
#include <cctype>
|
||||
#include <cerrno>
|
||||
#include <cmath>
|
||||
|
@ -86,3 +92,27 @@ procInfo(const char *filename, const char *target)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
memUsage()
|
||||
{
|
||||
// For the Mach-based Darwin kernel, use the task_info of the self task
|
||||
#ifdef __APPLE__
|
||||
struct task_basic_info t_info;
|
||||
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
|
||||
|
||||
if (KERN_SUCCESS != task_info(mach_task_self(),
|
||||
TASK_BASIC_INFO, (task_info_t)&t_info,
|
||||
&t_info_count)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Mimic Darwin's implementation of top and subtract
|
||||
// SHARED_REGION_SIZE from the tasks virtual size to account for the
|
||||
// shared memory submap that is incorporated into every process.
|
||||
return (t_info.virtual_size - SHARED_REGION_SIZE) / 1024;
|
||||
#else
|
||||
// Linux implementation
|
||||
return procInfo("/proc/self/status", "VmSize:");
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -39,7 +39,11 @@ std::string &hostname();
|
|||
|
||||
uint64_t procInfo(const char *filename, const char *target);
|
||||
|
||||
inline uint64_t memUsage()
|
||||
{ return procInfo("/proc/self/status", "VmSize:"); }
|
||||
/**
|
||||
* Determine the simulator process' total virtual memory usage.
|
||||
*
|
||||
* @return virtual memory usage in kilobytes
|
||||
*/
|
||||
uint64_t memUsage();
|
||||
|
||||
#endif // __HOSTINFO_HH__
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
* Ali Saidi
|
||||
*/
|
||||
|
||||
#include <limits>
|
||||
#include "base/fenv.hh"
|
||||
#include "base/intmath.hh"
|
||||
#include "base/misc.hh"
|
||||
|
@ -67,7 +68,10 @@ Random::genrand(uint32_t max)
|
|||
{
|
||||
if (max == 0)
|
||||
return 0;
|
||||
int log = ceilLog2(max) + 1;
|
||||
if (max == std::numeric_limits<uint32_t>::max())
|
||||
return genrand();
|
||||
|
||||
int log = ceilLog2(max + 1);
|
||||
int shift = (sizeof(uint32_t) * 8 - log);
|
||||
uint32_t random;
|
||||
|
||||
|
@ -83,7 +87,10 @@ Random::genrand(uint64_t max)
|
|||
{
|
||||
if (max == 0)
|
||||
return 0;
|
||||
int log = ceilLog2(max) + 1;
|
||||
if (max == std::numeric_limits<uint64_t>::max())
|
||||
return genrand();
|
||||
|
||||
int log = ceilLog2(max + 1);
|
||||
int shift = (sizeof(uint64_t) * 8 - log);
|
||||
uint64_t random;
|
||||
|
||||
|
|
|
@ -1477,6 +1477,8 @@ class HistStor
|
|||
|
||||
/** The current sum. */
|
||||
Counter sum;
|
||||
/** The sum of logarithm of each sample, used to compute geometric mean. */
|
||||
Counter logs;
|
||||
/** The sum of squares. */
|
||||
Counter squares;
|
||||
/** The number of samples. */
|
||||
|
@ -1528,6 +1530,7 @@ class HistStor
|
|||
|
||||
sum += val * number;
|
||||
squares += val * val * number;
|
||||
logs += log(val) * number;
|
||||
samples += number;
|
||||
}
|
||||
|
||||
|
@ -1567,6 +1570,7 @@ class HistStor
|
|||
data.cvec[i] = cvec[i];
|
||||
|
||||
data.sum = sum;
|
||||
data.logs = logs;
|
||||
data.squares = squares;
|
||||
data.samples = samples;
|
||||
}
|
||||
|
@ -1589,6 +1593,7 @@ class HistStor
|
|||
sum = Counter();
|
||||
squares = Counter();
|
||||
samples = Counter();
|
||||
logs = Counter();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -183,6 +183,7 @@ struct DistData
|
|||
VCounter cvec;
|
||||
Counter sum;
|
||||
Counter squares;
|
||||
Counter logs;
|
||||
Counter samples;
|
||||
};
|
||||
|
||||
|
|
|
@ -367,6 +367,12 @@ DistPrint::operator()(ostream &stream) const
|
|||
print.value = data.samples ? data.sum / data.samples : NAN;
|
||||
print(stream);
|
||||
|
||||
if (data.type == Hist) {
|
||||
print.name = base + "gmean";
|
||||
print.value = data.samples ? exp(data.logs / data.samples) : NAN;
|
||||
print(stream);
|
||||
}
|
||||
|
||||
Result stdev = NAN;
|
||||
if (data.samples)
|
||||
stdev = sqrt((data.samples * data.squares - data.sum * data.sum) /
|
||||
|
@ -507,7 +513,14 @@ Text::visit(const Vector2dInfo &info)
|
|||
bool havesub = false;
|
||||
VectorPrint print;
|
||||
|
||||
if (!info.y_subnames.empty()) {
|
||||
for (off_type i = 0; i < info.y; ++i) {
|
||||
if (!info.y_subnames[i].empty()) {
|
||||
print.subnames = info.y_subnames;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
print.flags = info.flags;
|
||||
print.separatorString = info.separatorString;
|
||||
print.descriptions = descriptions;
|
||||
|
|
|
@ -284,17 +284,16 @@ class BaseCPU : public MemObject
|
|||
void enableFunctionTrace();
|
||||
void traceFunctionsInternal(Addr pc);
|
||||
|
||||
protected:
|
||||
private:
|
||||
static std::vector<BaseCPU *> cpuList; //!< Static global cpu list
|
||||
|
||||
public:
|
||||
void traceFunctions(Addr pc)
|
||||
{
|
||||
if (functionTracingEnabled)
|
||||
traceFunctionsInternal(pc);
|
||||
}
|
||||
|
||||
private:
|
||||
static std::vector<BaseCPU *> cpuList; //!< Static global cpu list
|
||||
|
||||
public:
|
||||
static int numSimulatedCPUs() { return cpuList.size(); }
|
||||
static Counter numSimulatedInstructions()
|
||||
{
|
||||
|
|
|
@ -299,6 +299,7 @@ MultDivUnit::exeMulDiv(int slot_num)
|
|||
}
|
||||
|
||||
mult_div_req->setProcessing(false);
|
||||
cpu->wakeCPU();
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
#include "config/use_checker.hh"
|
||||
#include "cpu/o3/commit.hh"
|
||||
#include "cpu/o3/thread_state.hh"
|
||||
#include "cpu/base.hh"
|
||||
#include "cpu/exetrace.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
#include "debug/Activity.hh"
|
||||
|
@ -987,6 +988,8 @@ DefaultCommit<Impl>::commitInsts()
|
|||
// Updates misc. registers.
|
||||
head_inst->updateMiscRegs();
|
||||
|
||||
cpu->traceFunctions(pc[tid].instAddr());
|
||||
|
||||
TheISA::advancePC(pc[tid], head_inst->staticInst);
|
||||
|
||||
// Keep track of the last sequence number commited
|
||||
|
|
|
@ -446,10 +446,6 @@ void
|
|||
DefaultDecode<Impl>::sortInsts()
|
||||
{
|
||||
int insts_from_fetch = fromFetch->size;
|
||||
#ifdef DEBUG
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++)
|
||||
assert(insts[tid].empty());
|
||||
#endif
|
||||
for (int i = 0; i < insts_from_fetch; ++i) {
|
||||
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
|
||||
}
|
||||
|
|
|
@ -1340,10 +1340,10 @@ DefaultIEW<Impl>::executeInsts()
|
|||
fetchRedirect[tid] = true;
|
||||
|
||||
DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
|
||||
DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n",
|
||||
inst->predInstAddr(), inst->predNextInstAddr());
|
||||
DPRINTF(IEW, "Predicted target was PC: %s.\n",
|
||||
inst->readPredTarg());
|
||||
DPRINTF(IEW, "Execute: Redirecting fetch to PC: %s.\n",
|
||||
inst->pcState(), inst->nextInstAddr());
|
||||
inst->pcState());
|
||||
// If incorrect, then signal the ROB that it must be squashed.
|
||||
squashDueToBranch(inst, tid);
|
||||
|
||||
|
|
|
@ -766,10 +766,6 @@ void
|
|||
DefaultRename<Impl>::sortInsts()
|
||||
{
|
||||
int insts_from_decode = fromDecode->size;
|
||||
#ifdef DEBUG
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++)
|
||||
assert(insts[tid].empty());
|
||||
#endif
|
||||
for (int i = 0; i < insts_from_decode; ++i) {
|
||||
DynInstPtr inst = fromDecode->insts[i];
|
||||
insts[inst->threadNumber].push_back(inst);
|
||||
|
|
|
@ -83,7 +83,9 @@ PCEventQueue::schedule(PCEvent *event)
|
|||
bool
|
||||
PCEventQueue::doService(ThreadContext *tc)
|
||||
{
|
||||
Addr pc = tc->instAddr() & ~0x3;
|
||||
// This will fail to break on Alpha PALcode addresses, but that is
|
||||
// a rare use case.
|
||||
Addr pc = tc->instAddr();
|
||||
int serviced = 0;
|
||||
range_t range = equal_range(pc);
|
||||
for (iterator i = range.first; i != range.second; ++i) {
|
||||
|
@ -91,7 +93,7 @@ PCEventQueue::doService(ThreadContext *tc)
|
|||
// another event. This for example, prevents two invocations
|
||||
// of the SkipFuncEvent. Maybe we should have separate PC
|
||||
// event queues for each processor?
|
||||
if (pc != (tc->instAddr() & ~0x3))
|
||||
if (pc != tc->instAddr())
|
||||
continue;
|
||||
|
||||
DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n",
|
||||
|
|
|
@ -43,7 +43,7 @@ class DirectedGenerator : public SimObject
|
|||
virtual ~DirectedGenerator() {}
|
||||
|
||||
virtual bool initiate() = 0;
|
||||
virtual void performCallback(uint proc, Addr address) = 0;
|
||||
virtual void performCallback(uint32_t proc, Addr address) = 0;
|
||||
|
||||
void setDirectedTester(RubyDirectedTester* directed_tester);
|
||||
|
||||
|
|
|
@ -103,7 +103,7 @@ InvalidateGenerator::initiate()
|
|||
}
|
||||
|
||||
void
|
||||
InvalidateGenerator::performCallback(uint proc, Addr address)
|
||||
InvalidateGenerator::performCallback(uint32_t proc, Addr address)
|
||||
{
|
||||
assert(m_address == address);
|
||||
|
||||
|
|
|
@ -49,14 +49,14 @@ class InvalidateGenerator : public DirectedGenerator
|
|||
~InvalidateGenerator();
|
||||
|
||||
bool initiate();
|
||||
void performCallback(uint proc, Addr address);
|
||||
void performCallback(uint32_t proc, Addr address);
|
||||
|
||||
private:
|
||||
InvalidateGeneratorStatus m_status;
|
||||
Addr m_address;
|
||||
uint m_active_read_node;
|
||||
uint m_active_inv_node;
|
||||
uint m_addr_increment_size;
|
||||
uint32_t m_active_read_node;
|
||||
uint32_t m_active_inv_node;
|
||||
uint32_t m_addr_increment_size;
|
||||
};
|
||||
|
||||
#endif //__CPU_DIRECTEDTEST_INVALIDATEGENERATOR_HH__
|
||||
|
|
|
@ -53,11 +53,11 @@ class RubyDirectedTester : public MemObject
|
|||
RubyDirectedTester *tester;
|
||||
|
||||
public:
|
||||
CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint _idx)
|
||||
CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint32_t _idx)
|
||||
: SimpleTimingPort(_name, _tester), tester(_tester), idx(_idx)
|
||||
{}
|
||||
|
||||
uint idx;
|
||||
uint32_t idx;
|
||||
|
||||
protected:
|
||||
virtual bool recvTiming(PacketPtr pkt);
|
||||
|
|
|
@ -89,7 +89,7 @@ SeriesRequestGenerator::initiate()
|
|||
}
|
||||
|
||||
void
|
||||
SeriesRequestGenerator::performCallback(uint proc, Addr address)
|
||||
SeriesRequestGenerator::performCallback(uint32_t proc, Addr address)
|
||||
{
|
||||
assert(m_active_node == proc);
|
||||
assert(m_address == address);
|
||||
|
|
|
@ -49,13 +49,13 @@ class SeriesRequestGenerator : public DirectedGenerator
|
|||
~SeriesRequestGenerator();
|
||||
|
||||
bool initiate();
|
||||
void performCallback(uint proc, Addr address);
|
||||
void performCallback(uint32_t proc, Addr address);
|
||||
|
||||
private:
|
||||
SeriesRequestGeneratorStatus m_status;
|
||||
Addr m_address;
|
||||
uint m_active_node;
|
||||
uint m_addr_increment_size;
|
||||
uint32_t m_active_node;
|
||||
uint32_t m_addr_increment_size;
|
||||
bool m_issue_writes;
|
||||
};
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ Pl111::Pl111(const Params *p)
|
|||
{
|
||||
pioSize = 0xFFFF;
|
||||
|
||||
pic = simout.create("framebuffer.bmp", true);
|
||||
pic = simout.create(csprintf("%s.framebuffer.bmp", sys->name()), true);
|
||||
|
||||
dmaBuffer = new uint8_t[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)];
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ void
|
|||
PioDevice::init()
|
||||
{
|
||||
if (!pioPort)
|
||||
panic("Pio port %s not connected to anything!", name());
|
||||
panic("Pio port of %s not connected to anything!", name());
|
||||
pioPort->sendStatusChange(Port::RangeChange);
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ DebugFlag('MemoryAccess')
|
|||
|
||||
DebugFlag('ProtocolTrace')
|
||||
DebugFlag('RubyCache')
|
||||
DebugFlag('RubyCacheTrace')
|
||||
DebugFlag('RubyDma')
|
||||
DebugFlag('RubyGenerated')
|
||||
DebugFlag('RubyMemory')
|
||||
|
@ -67,9 +68,9 @@ DebugFlag('RubyPort')
|
|||
DebugFlag('RubyQueue')
|
||||
DebugFlag('RubySequencer')
|
||||
DebugFlag('RubySlicc')
|
||||
DebugFlag('RubyStorebuffer')
|
||||
DebugFlag('RubySystem')
|
||||
DebugFlag('RubyTester')
|
||||
|
||||
CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester',
|
||||
'RubyGenerated', 'RubySlicc', 'RubyStorebuffer', 'RubyCache',
|
||||
'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer'])
|
||||
'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache',
|
||||
'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer', 'RubyCacheTrace'])
|
||||
|
|
|
@ -447,13 +447,6 @@ Bus::recvAtomic(PacketPtr pkt)
|
|||
void
|
||||
Bus::recvFunctional(PacketPtr pkt)
|
||||
{
|
||||
if (!pkt->isPrint()) {
|
||||
// don't do DPRINTFs on PrintReq as it clutters up the output
|
||||
DPRINTF(Bus,
|
||||
"recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
|
||||
pkt->getSrc(), pkt->getDest(), pkt->getAddr(),
|
||||
pkt->cmdString());
|
||||
}
|
||||
assert(pkt->getDest() == Packet::Broadcast);
|
||||
|
||||
int port_id = findPort(pkt->getAddr());
|
||||
|
@ -462,6 +455,14 @@ Bus::recvFunctional(PacketPtr pkt)
|
|||
// id after each
|
||||
int src_id = pkt->getSrc();
|
||||
|
||||
if (!pkt->isPrint()) {
|
||||
// don't do DPRINTFs on PrintReq as it clutters up the output
|
||||
DPRINTF(Bus,
|
||||
"recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
|
||||
src_id, port_id, pkt->getAddr(),
|
||||
pkt->cmdString());
|
||||
}
|
||||
|
||||
assert(pkt->isRequest()); // hasn't already been satisfied
|
||||
|
||||
SnoopIter s_end = snoopPorts.end();
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* Copyright (c) 2010 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
|
@ -192,14 +204,98 @@ Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data)
|
|||
memcpy(getPtr<uint8_t>(), data + offset, getSize());
|
||||
return true;
|
||||
} else {
|
||||
// In this case the timing packet only partially satisfies
|
||||
// the request, so we would need more information to make
|
||||
// this work. Like bytes valid in the packet or
|
||||
// something, so the request could continue and get this
|
||||
// bit of possibly newer data along with the older data
|
||||
// not written to yet.
|
||||
panic("Memory value only partially satisfies the functional "
|
||||
"request. Now what?");
|
||||
// Offsets and sizes to copy in case of partial overlap
|
||||
int func_offset;
|
||||
int val_offset;
|
||||
int overlap_size;
|
||||
|
||||
// calculate offsets and copy sizes for the two byte arrays
|
||||
if (val_start < func_start && val_end <= func_end) {
|
||||
val_offset = func_start - val_start;
|
||||
func_offset = 0;
|
||||
overlap_size = val_end - func_start;
|
||||
} else if (val_start >= func_start && val_end > func_end) {
|
||||
val_offset = 0;
|
||||
func_offset = val_start - func_start;
|
||||
overlap_size = func_end - val_start;
|
||||
} else if (val_start >= func_start && val_end <= func_end) {
|
||||
val_offset = 0;
|
||||
func_offset = val_start - func_start;
|
||||
overlap_size = size;
|
||||
} else {
|
||||
panic("BUG: Missed a case for a partial functional request");
|
||||
}
|
||||
|
||||
// Figure out how much of the partial overlap should be copied
|
||||
// into the packet and not overwrite previously found bytes.
|
||||
if (bytesValidStart == 0 && bytesValidEnd == 0) {
|
||||
// No bytes have been copied yet, just set indices
|
||||
// to found range
|
||||
bytesValidStart = func_offset;
|
||||
bytesValidEnd = func_offset + overlap_size;
|
||||
} else {
|
||||
// Some bytes have already been copied. Use bytesValid
|
||||
// indices and offset values to figure out how much data
|
||||
// to copy and where to copy it to.
|
||||
|
||||
// Indice overlap conditions to check
|
||||
int a = func_offset - bytesValidStart;
|
||||
int b = (func_offset + overlap_size) - bytesValidEnd;
|
||||
int c = func_offset - bytesValidEnd;
|
||||
int d = (func_offset + overlap_size) - bytesValidStart;
|
||||
|
||||
if (a >= 0 && b <= 0) {
|
||||
// bytes already in pkt data array are superset of
|
||||
// found bytes, will not copy any bytes
|
||||
overlap_size = 0;
|
||||
} else if (a < 0 && d >= 0 && b <= 0) {
|
||||
// found bytes will move bytesValidStart towards 0
|
||||
overlap_size = bytesValidStart - func_offset;
|
||||
bytesValidStart = func_offset;
|
||||
} else if (b > 0 && c <= 0 && a >= 0) {
|
||||
// found bytes will move bytesValidEnd
|
||||
// towards end of pkt data array
|
||||
overlap_size =
|
||||
(func_offset + overlap_size) - bytesValidEnd;
|
||||
val_offset += bytesValidEnd - func_offset;
|
||||
func_offset = bytesValidEnd;
|
||||
bytesValidEnd += overlap_size;
|
||||
} else if (a < 0 && b > 0) {
|
||||
// Found bytes are superset of copied range. Will move
|
||||
// bytesValidStart towards 0 and bytesValidEnd towards
|
||||
// end of pkt data array. Need to break copy into two
|
||||
// pieces so as to not overwrite previously found data.
|
||||
|
||||
// copy the first half
|
||||
uint8_t *dest = getPtr<uint8_t>() + func_offset;
|
||||
uint8_t *src = data + val_offset;
|
||||
memcpy(dest, src, (bytesValidStart - func_offset));
|
||||
|
||||
// re-calc the offsets and indices to do the copy
|
||||
// required for the second half
|
||||
val_offset += (bytesValidEnd - func_offset);
|
||||
bytesValidStart = func_offset;
|
||||
overlap_size =
|
||||
(func_offset + overlap_size) - bytesValidEnd;
|
||||
func_offset = bytesValidEnd;
|
||||
bytesValidEnd += overlap_size;
|
||||
} else if ((c > 0 && b > 0)
|
||||
|| (a < 0 && d < 0)) {
|
||||
// region to be copied is discontiguous! Not supported.
|
||||
panic("BUG: Discontiguous bytes found"
|
||||
"for functional copying!");
|
||||
}
|
||||
}
|
||||
assert(bytesValidEnd <= getSize());
|
||||
|
||||
// copy partial data into the packet's data array
|
||||
uint8_t *dest = getPtr<uint8_t>() + func_offset;
|
||||
uint8_t *src = data + val_offset;
|
||||
memcpy(dest, src, overlap_size);
|
||||
|
||||
// check if we're done filling the functional access
|
||||
bool done = (bytesValidStart == 0) && (bytesValidEnd == getSize());
|
||||
return done;
|
||||
}
|
||||
} else if (isWrite()) {
|
||||
if (offset >= 0) {
|
||||
|
|
|
@ -299,6 +299,13 @@ class Packet : public FastAlloc, public Printable
|
|||
*/
|
||||
MemCmd origCmd;
|
||||
|
||||
/**
|
||||
* These values specify the range of bytes found that satisfy a
|
||||
* functional read.
|
||||
*/
|
||||
uint16_t bytesValidStart;
|
||||
uint16_t bytesValidEnd;
|
||||
|
||||
public:
|
||||
/// Used to calculate latencies for each packet.
|
||||
Tick time;
|
||||
|
@ -507,7 +514,8 @@ class Packet : public FastAlloc, public Printable
|
|||
*/
|
||||
Packet(Request *_req, MemCmd _cmd, NodeID _dest)
|
||||
: flags(VALID_DST), cmd(_cmd), req(_req), data(NULL),
|
||||
dest(_dest), time(curTick()), senderState(NULL)
|
||||
dest(_dest), bytesValidStart(0), bytesValidEnd(0),
|
||||
time(curTick()), senderState(NULL)
|
||||
{
|
||||
if (req->hasPaddr()) {
|
||||
addr = req->getPaddr();
|
||||
|
@ -526,7 +534,8 @@ class Packet : public FastAlloc, public Printable
|
|||
*/
|
||||
Packet(Request *_req, MemCmd _cmd, NodeID _dest, int _blkSize)
|
||||
: flags(VALID_DST), cmd(_cmd), req(_req), data(NULL),
|
||||
dest(_dest), time(curTick()), senderState(NULL)
|
||||
dest(_dest), bytesValidStart(0), bytesValidEnd(0),
|
||||
time(curTick()), senderState(NULL)
|
||||
{
|
||||
if (req->hasPaddr()) {
|
||||
addr = req->getPaddr() & ~(_blkSize - 1);
|
||||
|
@ -547,6 +556,7 @@ class Packet : public FastAlloc, public Printable
|
|||
: cmd(pkt->cmd), req(pkt->req),
|
||||
data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL),
|
||||
addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest),
|
||||
bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd),
|
||||
time(curTick()), senderState(pkt->senderState)
|
||||
{
|
||||
if (!clearFlags)
|
||||
|
@ -554,6 +564,7 @@ class Packet : public FastAlloc, public Printable
|
|||
|
||||
flags.set(pkt->flags & (VALID_ADDR|VALID_SIZE|VALID_SRC|VALID_DST));
|
||||
flags.set(pkt->flags & STATIC_DATA);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1285,7 +1285,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
vv_allocateL2CacheBlock;
|
||||
hp_copyFromTBEToL2;
|
||||
s_deallocateTBE;
|
||||
ka_wakeUpAllDependents;
|
||||
}
|
||||
|
||||
transition(I, Trigger_L2_to_L1D, IT) {
|
||||
|
@ -1566,7 +1565,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
|||
k_popMandatoryQueue;
|
||||
}
|
||||
|
||||
transition({MM, M, MMR}, Flush_line, MM_F) {
|
||||
transition({MM, M, MMR, MR}, Flush_line, MM_F) {
|
||||
i_allocateTBE;
|
||||
bf_issueGETF;
|
||||
p_decrementNumberOfMessagesByOne;
|
||||
|
|
|
@ -198,7 +198,11 @@ MessageBuffer::enqueue(MsgPtr message, Time delta)
|
|||
m_last_arrival_time * g_eventQueue_ptr->getClock());
|
||||
}
|
||||
}
|
||||
|
||||
// If running a cache trace, don't worry about the last arrival checks
|
||||
if (!g_system_ptr->m_warmup_enabled) {
|
||||
m_last_arrival_time = arrival_time;
|
||||
}
|
||||
|
||||
// compute the delay cycles and set enqueue time
|
||||
Message* msg_ptr = message.get();
|
||||
|
|
|
@ -58,7 +58,6 @@
|
|||
|
||||
#include <iostream>
|
||||
|
||||
#include "config/no_vector_bounds_checks.hh"
|
||||
#include "mem/ruby/common/TypeDefines.hh"
|
||||
#include "sim/eventq.hh"
|
||||
|
||||
|
@ -77,9 +76,6 @@ class RubyEventQueue : public EventManager
|
|||
void scheduleEventAbsolute(Consumer* consumer, Time timeAbs);
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
void triggerEvents(Time t) { assert(0); }
|
||||
void triggerAllEvents() { assert(0); }
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
RubyEventQueue(const RubyEventQueue& obj);
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 2010 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -26,43 +27,154 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
|
||||
#include "debug/RubyCacheTrace.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "gzstream.hh"
|
||||
#include "mem/ruby/system/Sequencer.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
CacheRecorder::addRecord(Sequencer* sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time)
|
||||
TraceRecord::print(ostream& out) const
|
||||
{
|
||||
TraceRecord rec(sequencer, data_addr, pc_addr, type, time);
|
||||
m_records.push_back(rec);
|
||||
out << "[TraceRecord: Node, " << m_cntrl_id << ", "
|
||||
<< m_data_address << ", " << m_pc_address << ", "
|
||||
<< m_type << ", Time: " << m_time << "]";
|
||||
}
|
||||
|
||||
int
|
||||
CacheRecorder::dumpRecords(string filename)
|
||||
CacheRecorder::CacheRecorder()
|
||||
: m_uncompressed_trace(NULL),
|
||||
m_uncompressed_trace_size(0)
|
||||
{
|
||||
ogzstream out(filename.c_str());
|
||||
if (out.fail()) {
|
||||
cout << "Error: error opening file '" << filename << "'" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::sort(m_records.begin(), m_records.end(), greater<TraceRecord>());
|
||||
CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace,
|
||||
uint64_t uncompressed_trace_size,
|
||||
std::vector<Sequencer*>& seq_map)
|
||||
: m_uncompressed_trace(uncompressed_trace),
|
||||
m_uncompressed_trace_size(uncompressed_trace_size),
|
||||
m_seq_map(seq_map), m_bytes_read(0), m_records_read(0),
|
||||
m_records_flushed(0)
|
||||
{
|
||||
}
|
||||
|
||||
int size = m_records.size();
|
||||
for (int i = 0; i < size; ++i)
|
||||
m_records[i].output(out);
|
||||
|
||||
m_records.clear();
|
||||
|
||||
return size;
|
||||
CacheRecorder::~CacheRecorder()
|
||||
{
|
||||
if (m_uncompressed_trace != NULL) {
|
||||
delete m_uncompressed_trace;
|
||||
m_uncompressed_trace = NULL;
|
||||
}
|
||||
m_seq_map.clear();
|
||||
}
|
||||
|
||||
void
|
||||
CacheRecorder::print(ostream& out) const
|
||||
CacheRecorder::enqueueNextFlushRequest()
|
||||
{
|
||||
if (m_records_flushed < m_records.size()) {
|
||||
TraceRecord* rec = m_records[m_records_flushed];
|
||||
m_records_flushed++;
|
||||
Request* req = new Request(rec->m_data_address,
|
||||
RubySystem::getBlockSizeBytes(),0);
|
||||
MemCmd::Command requestType = MemCmd::FlushReq;
|
||||
Packet *pkt = new Packet(req, requestType, -1);
|
||||
|
||||
Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id];
|
||||
assert(m_sequencer_ptr != NULL);
|
||||
m_sequencer_ptr->makeRequest(pkt);
|
||||
|
||||
DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CacheRecorder::enqueueNextFetchRequest()
|
||||
{
|
||||
if (m_bytes_read < m_uncompressed_trace_size) {
|
||||
TraceRecord* traceRecord = (TraceRecord*) (m_uncompressed_trace +
|
||||
m_bytes_read);
|
||||
|
||||
DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord);
|
||||
Request* req = new Request();
|
||||
MemCmd::Command requestType;
|
||||
|
||||
if (traceRecord->m_type == RubyRequestType_LD) {
|
||||
requestType = MemCmd::ReadReq;
|
||||
req->setPhys(traceRecord->m_data_address,
|
||||
RubySystem::getBlockSizeBytes(),0);
|
||||
} else if (traceRecord->m_type == RubyRequestType_IFETCH) {
|
||||
requestType = MemCmd::ReadReq;
|
||||
req->setPhys(traceRecord->m_data_address,
|
||||
RubySystem::getBlockSizeBytes(),
|
||||
Request::INST_FETCH);
|
||||
} else {
|
||||
requestType = MemCmd::WriteReq;
|
||||
req->setPhys(traceRecord->m_data_address,
|
||||
RubySystem::getBlockSizeBytes(),0);
|
||||
}
|
||||
|
||||
Packet *pkt = new Packet(req, requestType, -1);
|
||||
pkt->dataStatic(traceRecord->m_data);
|
||||
|
||||
Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id];
|
||||
assert(m_sequencer_ptr != NULL);
|
||||
m_sequencer_ptr->makeRequest(pkt);
|
||||
|
||||
m_bytes_read += (sizeof(TraceRecord) +
|
||||
RubySystem::getBlockSizeBytes());
|
||||
m_records_read++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CacheRecorder::addRecord(int cntrl, const physical_address_t data_addr,
|
||||
const physical_address_t pc_addr,
|
||||
RubyRequestType type, Time time, DataBlock& data)
|
||||
{
|
||||
TraceRecord* rec = (TraceRecord*)malloc(sizeof(TraceRecord) +
|
||||
RubySystem::getBlockSizeBytes());
|
||||
rec->m_cntrl_id = cntrl;
|
||||
rec->m_time = time;
|
||||
rec->m_data_address = data_addr;
|
||||
rec->m_pc_address = pc_addr;
|
||||
rec->m_type = type;
|
||||
memcpy(rec->m_data, data.getData(0, RubySystem::getBlockSizeBytes()),
|
||||
RubySystem::getBlockSizeBytes());
|
||||
|
||||
m_records.push_back(rec);
|
||||
}
|
||||
|
||||
uint64
|
||||
CacheRecorder::aggregateRecords(uint8_t** buf, uint64 total_size)
|
||||
{
|
||||
std::sort(m_records.begin(), m_records.end(), compareTraceRecords);
|
||||
|
||||
int size = m_records.size();
|
||||
uint64 current_size = 0;
|
||||
int record_size = sizeof(TraceRecord) + RubySystem::getBlockSizeBytes();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
// Determine if we need to expand the buffer size
|
||||
if (current_size + record_size > total_size) {
|
||||
uint8_t* new_buf = new (nothrow) uint8_t[total_size * 2];
|
||||
if (new_buf == NULL) {
|
||||
fatal("Unable to allocate buffer of size %s\n",
|
||||
total_size * 2);
|
||||
}
|
||||
total_size = total_size * 2;
|
||||
uint8_t* old_buf = *buf;
|
||||
memcpy(new_buf, old_buf, current_size);
|
||||
*buf = new_buf;
|
||||
delete [] old_buf;
|
||||
}
|
||||
|
||||
// Copy the current record into the buffer
|
||||
memcpy(&((*buf)[current_size]), m_records[i], record_size);
|
||||
current_size += record_size;
|
||||
|
||||
free(m_records[i]);
|
||||
m_records[i] = NULL;
|
||||
}
|
||||
|
||||
m_records.clear();
|
||||
return current_size;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 2010 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -34,37 +35,90 @@
|
|||
#ifndef __MEM_RUBY_RECORDER_CACHERECORDER_HH__
|
||||
#define __MEM_RUBY_RECORDER_CACHERECORDER_HH__
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/hashmap.hh"
|
||||
#include "mem/protocol/RubyRequestType.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/recorder/TraceRecord.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
#include "mem/ruby/common/TypeDefines.hh"
|
||||
|
||||
class Address;
|
||||
class TraceRecord;
|
||||
class Sequencer;
|
||||
|
||||
/*!
|
||||
* Class for recording cache contents. Note that the last element of the
|
||||
* class is an array of length zero. It is used for creating variable
|
||||
* length object, so that while writing the data to a file one does not
|
||||
* need to copy the meta data and the actual data separately.
|
||||
*/
|
||||
class TraceRecord {
|
||||
public:
|
||||
int m_cntrl_id;
|
||||
Time m_time;
|
||||
physical_address_t m_data_address;
|
||||
physical_address_t m_pc_address;
|
||||
RubyRequestType m_type;
|
||||
uint8_t m_data[0];
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
};
|
||||
|
||||
class CacheRecorder
|
||||
{
|
||||
public:
|
||||
void addRecord(Sequencer* sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time);
|
||||
int dumpRecords(std::string filename);
|
||||
CacheRecorder();
|
||||
~CacheRecorder();
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
CacheRecorder(uint8_t* uncompressed_trace,
|
||||
uint64_t uncompressed_trace_size,
|
||||
std::vector<Sequencer*>& SequencerMap);
|
||||
void addRecord(int cntrl, const physical_address_t data_addr,
|
||||
const physical_address_t pc_addr, RubyRequestType type,
|
||||
Time time, DataBlock& data);
|
||||
|
||||
uint64 aggregateRecords(uint8_t** data, uint64 size);
|
||||
|
||||
/*!
|
||||
* Function for flushing the memory contents of the caches to the
|
||||
* main memory. It goes through the recorded contents of the caches,
|
||||
* and issues flush requests. Except for the first one, a flush request
|
||||
* is issued only after the previous one has completed. This currently
|
||||
* requires use of MOESI Hammer protocol since only that protocol
|
||||
* supports flush requests.
|
||||
*/
|
||||
void enqueueNextFlushRequest();
|
||||
|
||||
/*!
|
||||
* Function for fetching warming up the memory and the caches. It goes
|
||||
* through the recorded contents of the caches, as available in the
|
||||
* checkpoint and issues fetch requests. Except for the first one, a
|
||||
* fetch request is issued only after the previous one has completed.
|
||||
* It should be possible to use this with any protocol.
|
||||
*/
|
||||
void enqueueNextFetchRequest();
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
CacheRecorder(const CacheRecorder& obj);
|
||||
CacheRecorder& operator=(const CacheRecorder& obj);
|
||||
|
||||
std::vector<TraceRecord> m_records;
|
||||
std::vector<TraceRecord*> m_records;
|
||||
uint8_t* m_uncompressed_trace;
|
||||
uint64_t m_uncompressed_trace_size;
|
||||
std::vector<Sequencer*> m_seq_map;
|
||||
uint64_t m_bytes_read;
|
||||
uint64_t m_records_read;
|
||||
uint64_t m_records_flushed;
|
||||
};
|
||||
|
||||
inline bool
|
||||
compareTraceRecords(const TraceRecord* n1, const TraceRecord* n2)
|
||||
{
|
||||
return n1->m_time > n2->m_time;
|
||||
}
|
||||
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& out, const CacheRecorder& obj)
|
||||
operator<<(std::ostream& out, const TraceRecord& obj)
|
||||
{
|
||||
obj.print(out);
|
||||
out << std::flush;
|
||||
|
|
|
@ -33,8 +33,4 @@ Import('*')
|
|||
if env['PROTOCOL'] == 'None':
|
||||
Return()
|
||||
|
||||
SimObject('Tracer.py')
|
||||
|
||||
Source('CacheRecorder.cc')
|
||||
Source('Tracer.cc')
|
||||
Source('TraceRecord.cc', Werror=False)
|
||||
|
|
|
@ -1,139 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "mem/protocol/RubyRequest.hh"
|
||||
#include "mem/ruby/recorder/TraceRecord.hh"
|
||||
#include "mem/ruby/system/Sequencer.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
TraceRecord::TraceRecord(Sequencer* _sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time)
|
||||
{
|
||||
m_sequencer_ptr = _sequencer;
|
||||
m_data_address = data_addr;
|
||||
m_pc_address = pc_addr;
|
||||
m_time = time;
|
||||
m_type = type;
|
||||
|
||||
// Don't differentiate between store misses and atomic requests in
|
||||
// the trace
|
||||
if (m_type == RubyRequestType_Load_Linked) {
|
||||
m_type = RubyRequestType_ST;
|
||||
} else if (m_type == RubyRequestType_Store_Conditional) {
|
||||
m_type = RubyRequestType_ST;
|
||||
}
|
||||
}
|
||||
|
||||
TraceRecord::TraceRecord(const TraceRecord& obj)
|
||||
{
|
||||
// Call assignment operator
|
||||
*this = obj;
|
||||
}
|
||||
|
||||
TraceRecord&
|
||||
TraceRecord::operator=(const TraceRecord& obj)
|
||||
{
|
||||
m_sequencer_ptr = obj.m_sequencer_ptr;
|
||||
m_time = obj.m_time;
|
||||
m_data_address = obj.m_data_address;
|
||||
m_pc_address = obj.m_pc_address;
|
||||
m_type = obj.m_type;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void
|
||||
TraceRecord::issueRequest() const
|
||||
{
|
||||
assert(m_sequencer_ptr != NULL);
|
||||
Request req(m_data_address.getAddress(), 0, 0);
|
||||
Packet *pkt = new Packet(&req, MemCmd(MemCmd::InvalidCmd), -1);
|
||||
|
||||
// Clear out the sequencer
|
||||
while (!m_sequencer_ptr->empty()) {
|
||||
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100);
|
||||
}
|
||||
|
||||
m_sequencer_ptr->makeRequest(pkt);
|
||||
|
||||
// Clear out the sequencer
|
||||
while (!m_sequencer_ptr->empty()) {
|
||||
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TraceRecord::print(ostream& out) const
|
||||
{
|
||||
out << "[TraceRecord: Node, " << m_sequencer_ptr->name() << ", "
|
||||
<< m_data_address << ", " << m_pc_address << ", "
|
||||
<< m_type << ", Time: " << m_time << "]";
|
||||
}
|
||||
|
||||
void
|
||||
TraceRecord::output(ostream& out) const
|
||||
{
|
||||
out << m_sequencer_ptr->name() << " ";
|
||||
m_data_address.output(out);
|
||||
out << " ";
|
||||
m_pc_address.output(out);
|
||||
out << " ";
|
||||
out << m_type;
|
||||
out << endl;
|
||||
}
|
||||
|
||||
bool
|
||||
TraceRecord::input(istream& in)
|
||||
{
|
||||
string sequencer_name;
|
||||
in >> sequencer_name;
|
||||
|
||||
// The SimObject find function is slow and iterates through the
|
||||
// simObjectList to find the sequencer pointer. Therefore, expect
|
||||
// trace playback to be slow.
|
||||
m_sequencer_ptr = (Sequencer*)SimObject::find(sequencer_name.c_str());
|
||||
|
||||
m_data_address.input(in);
|
||||
m_pc_address.input(in);
|
||||
if (in.eof())
|
||||
return false;
|
||||
|
||||
string type;
|
||||
in >> type;
|
||||
m_type = string_to_RubyRequestType(type);
|
||||
|
||||
// Ignore the rest of the line
|
||||
char c = '\0';
|
||||
while ((!in.eof()) && (c != '\n')) {
|
||||
in.get(c);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -1,91 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* A entry in the cache request record. It is aware of the ruby time
|
||||
* and can issue the request back to the cache.
|
||||
*/
|
||||
|
||||
#ifndef __MEM_RUBY_RECORDER_TRACERECORD_HH__
|
||||
#define __MEM_RUBY_RECORDER_TRACERECORD_HH__
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/system/Sequencer.hh"
|
||||
|
||||
class CacheMsg;
|
||||
|
||||
class TraceRecord
|
||||
{
|
||||
public:
|
||||
TraceRecord(Sequencer* _sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time);
|
||||
|
||||
TraceRecord()
|
||||
{
|
||||
m_sequencer_ptr = NULL;
|
||||
m_time = 0;
|
||||
m_type = RubyRequestType_NULL;
|
||||
}
|
||||
|
||||
TraceRecord(const TraceRecord& obj);
|
||||
TraceRecord& operator=(const TraceRecord& obj);
|
||||
|
||||
void issueRequest() const;
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
void output(std::ostream& out) const;
|
||||
bool input(std::istream& in);
|
||||
|
||||
private:
|
||||
friend bool operator>(const TraceRecord& n1, const TraceRecord& n2);
|
||||
|
||||
Sequencer* m_sequencer_ptr;
|
||||
Time m_time;
|
||||
Address m_data_address;
|
||||
Address m_pc_address;
|
||||
RubyRequestType m_type;
|
||||
};
|
||||
|
||||
inline bool
|
||||
operator>(const TraceRecord& n1, const TraceRecord& n2)
|
||||
{
|
||||
return n1.m_time > n2.m_time;
|
||||
}
|
||||
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& out, const TraceRecord& obj)
|
||||
{
|
||||
obj.print(out);
|
||||
out << std::flush;
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif // __MEM_RUBY_RECORDER_TRACERECORD_HH__
|
|
@ -1,135 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "base/cprintf.hh"
|
||||
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
|
||||
#include "mem/ruby/recorder/TraceRecord.hh"
|
||||
#include "mem/ruby/recorder/Tracer.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Tracer::Tracer(const Params *p)
|
||||
: SimObject(p)
|
||||
{
|
||||
m_enabled = false;
|
||||
m_warmup_length = p->warmup_length;
|
||||
assert(m_warmup_length > 0);
|
||||
p->ruby_system->registerTracer(this);
|
||||
}
|
||||
|
||||
void
|
||||
Tracer::startTrace(string filename)
|
||||
{
|
||||
if (m_enabled)
|
||||
stopTrace();
|
||||
|
||||
if (filename != "") {
|
||||
m_trace_file.open(filename.c_str());
|
||||
if (m_trace_file.fail()) {
|
||||
cprintf("Error: error opening file '%s'\n", filename);
|
||||
cprintf("Trace not enabled.\n");
|
||||
return;
|
||||
}
|
||||
cprintf("Request trace enabled to output file '%s'\n", filename);
|
||||
m_enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Tracer::stopTrace()
|
||||
{
|
||||
if (m_enabled) {
|
||||
m_trace_file.close();
|
||||
cout << "Request trace file closed." << endl;
|
||||
m_enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Tracer::traceRequest(Sequencer* sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time)
|
||||
{
|
||||
assert(m_enabled);
|
||||
TraceRecord tr(sequencer, data_addr, pc_addr, type, time);
|
||||
tr.output(m_trace_file);
|
||||
}
|
||||
|
||||
int
|
||||
Tracer::playbackTrace(string filename)
|
||||
{
|
||||
igzstream in(filename.c_str());
|
||||
if (in.fail()) {
|
||||
cprintf("Error: error opening file '%s'\n", filename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
time_t start_time = time(NULL);
|
||||
|
||||
TraceRecord record;
|
||||
int counter = 0;
|
||||
// Read in the next TraceRecord
|
||||
bool ok = record.input(in);
|
||||
while (ok) {
|
||||
// Put it in the right cache
|
||||
record.issueRequest();
|
||||
counter++;
|
||||
|
||||
// Read in the next TraceRecord
|
||||
ok = record.input(in);
|
||||
|
||||
// Clear the statistics after warmup
|
||||
if (counter == m_warmup_length) {
|
||||
cprintf("Clearing stats after warmup of length %s\n",
|
||||
m_warmup_length);
|
||||
g_system_ptr->clearStats();
|
||||
}
|
||||
}
|
||||
|
||||
// Flush the prefetches through the system
|
||||
// FIXME - should be smarter
|
||||
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 1000);
|
||||
|
||||
time_t stop_time = time(NULL);
|
||||
double seconds = difftime(stop_time, start_time);
|
||||
double minutes = seconds / 60.0;
|
||||
cout << "playbackTrace: " << minutes << " minutes" << endl;
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
void
|
||||
Tracer::print(ostream& out) const
|
||||
{
|
||||
}
|
||||
|
||||
Tracer *
|
||||
RubyTracerParams::create()
|
||||
{
|
||||
return new Tracer(this);
|
||||
}
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Controller class of the tracer. Can stop/start/playback the ruby
|
||||
* cache requests trace.
|
||||
*/
|
||||
|
||||
#ifndef __MEM_RUBY_RECORDER_TRACER_HH__
|
||||
#define __MEM_RUBY_RECORDER_TRACER_HH__
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "mem/protocol/RubyRequestType.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "params/RubyTracer.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
#include "gzstream.hh"
|
||||
|
||||
class Address;
|
||||
class TraceRecord;
|
||||
class Sequencer;
|
||||
|
||||
class Tracer : public SimObject
|
||||
{
|
||||
public:
|
||||
typedef RubyTracerParams Params;
|
||||
Tracer(const Params *p);
|
||||
|
||||
void startTrace(std::string filename);
|
||||
void stopTrace();
|
||||
bool traceEnabled() { return m_enabled; }
|
||||
void traceRequest(Sequencer* sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time);
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
int playbackTrace(std::string filename);
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
Tracer(const Tracer& obj);
|
||||
Tracer& operator=(const Tracer& obj);
|
||||
|
||||
ogzstream m_trace_file;
|
||||
bool m_enabled;
|
||||
|
||||
//added by SS
|
||||
int m_warmup_length;
|
||||
};
|
||||
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& out, const Tracer& obj)
|
||||
{
|
||||
obj.print(out);
|
||||
out << std::flush;
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif // __MEM_RUBY_RECORDER_TRACER_HH__
|
|
@ -1,37 +0,0 @@
|
|||
# Copyright (c) 2009 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Steve Reinhardt
|
||||
# Brad Beckmann
|
||||
|
||||
from m5.params import *
|
||||
from m5.SimObject import SimObject
|
||||
|
||||
class RubyTracer(SimObject):
|
||||
type = 'RubyTracer'
|
||||
cxx_class = 'Tracer'
|
||||
warmup_length = Param.Int(100000, "")
|
||||
ruby_system = Param.RubySystem("")
|
|
@ -33,12 +33,11 @@
|
|||
#include <string>
|
||||
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/protocol/MachineType.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Consumer.hh"
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
#include "mem/ruby/network/Network.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "params/RubyController.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
|
@ -68,6 +67,8 @@ class AbstractController : public SimObject, public Consumer
|
|||
virtual void wakeup() = 0;
|
||||
// virtual void dumpStats(std::ostream & out) = 0;
|
||||
virtual void clearStats() = 0;
|
||||
virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0;
|
||||
virtual Sequencer* getSequencer() const = 0;
|
||||
};
|
||||
|
||||
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -28,7 +28,9 @@
|
|||
|
||||
#include "base/intmath.hh"
|
||||
#include "debug/RubyCache.hh"
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/ruby/system/CacheMemory.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -364,10 +366,15 @@ CacheMemory::profileGenericRequest(GenericRequestType requestType,
|
|||
}
|
||||
|
||||
void
|
||||
CacheMemory::recordCacheContents(CacheRecorder& tr) const
|
||||
CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const
|
||||
{
|
||||
uint64 warmedUpBlocks = 0;
|
||||
uint64 totalBlocks M5_VAR_USED = (uint64)m_cache_num_sets
|
||||
* (uint64)m_cache_assoc;
|
||||
|
||||
for (int i = 0; i < m_cache_num_sets; i++) {
|
||||
for (int j = 0; j < m_cache_assoc; j++) {
|
||||
if (m_cache[i][j] != NULL) {
|
||||
AccessPermission perm = m_cache[i][j]->m_Permission;
|
||||
RubyRequestType request_type = RubyRequestType_NULL;
|
||||
if (perm == AccessPermission_Read_Only) {
|
||||
|
@ -381,16 +388,22 @@ CacheMemory::recordCacheContents(CacheRecorder& tr) const
|
|||
}
|
||||
|
||||
if (request_type != RubyRequestType_NULL) {
|
||||
#if 0
|
||||
tr.addRecord(m_chip_ptr->getID(), m_cache[i][j].m_Address,
|
||||
Address(0), request_type,
|
||||
m_replacementPolicy_ptr->getLastAccess(i, j));
|
||||
#endif
|
||||
tr->addRecord(cntrl, m_cache[i][j]->m_Address.getAddress(),
|
||||
0, request_type,
|
||||
m_replacementPolicy_ptr->getLastAccess(i, j),
|
||||
m_cache[i][j]->getDataBlk());
|
||||
warmedUpBlocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DPRINTF(RubyCache, "%s: %lli blocks of %lli total blocks"
|
||||
"recorded %.2f%% \n", name().c_str(), warmedUpBlocks,
|
||||
(uint64)m_cache_num_sets * (uint64)m_cache_assoc,
|
||||
(float(warmedUpBlocks)/float(totalBlocks))*100.0);
|
||||
}
|
||||
|
||||
void
|
||||
CacheMemory::print(ostream& out) const
|
||||
{
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -34,21 +34,15 @@
|
|||
#include <vector>
|
||||
|
||||
#include "base/hashmap.hh"
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/protocol/GenericRequestType.hh"
|
||||
#include "mem/protocol/RubyRequest.hh"
|
||||
#include "mem/protocol/RubyRequestType.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/profiler/CacheProfiler.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractCacheEntry.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractController.hh"
|
||||
#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
|
||||
#include "mem/ruby/system/LRUPolicy.hh"
|
||||
#include "mem/ruby/system/PseudoLRUPolicy.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "params/RubyCache.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
|
@ -100,12 +94,7 @@ class CacheMemory : public SimObject
|
|||
int getLatency() const { return m_latency; }
|
||||
|
||||
// Hook for checkpointing the contents of the cache
|
||||
void recordCacheContents(CacheRecorder& tr) const;
|
||||
void
|
||||
setAsInstructionCache(bool is_icache)
|
||||
{
|
||||
m_is_instruction_only_cache = is_icache;
|
||||
}
|
||||
void recordCacheContents(int cntrl, CacheRecorder* tr) const;
|
||||
|
||||
// Set this address to most recently used
|
||||
void setMRU(const Address& address);
|
||||
|
@ -146,7 +135,6 @@ class CacheMemory : public SimObject
|
|||
|
||||
// Data Members (m_prefix)
|
||||
bool m_is_instruction_only_cache;
|
||||
bool m_is_data_only_cache;
|
||||
|
||||
// The first index is the # of cache lines.
|
||||
// The second index is the the amount associativity.
|
||||
|
|
|
@ -55,6 +55,9 @@ class DMASequencer : public RubyPort
|
|||
/* external interface */
|
||||
RequestStatus makeRequest(PacketPtr pkt);
|
||||
bool busy() { return m_is_busy;}
|
||||
int outstandingCount() const { return (m_is_busy ? 1 : 0); }
|
||||
bool isDeadlockEventScheduled() const { return false; }
|
||||
void descheduleDeadlockEvent() {}
|
||||
|
||||
/* SLICC callback */
|
||||
void dataCallback(const DataBlock & dblk);
|
||||
|
|
|
@ -58,6 +58,7 @@ DirectoryMemory::init()
|
|||
|
||||
if (m_use_map) {
|
||||
m_sparseMemory = new SparseMemory(m_map_levels);
|
||||
g_system_ptr->registerSparseMemory(m_sparseMemory);
|
||||
} else {
|
||||
m_entries = new AbstractEntry*[m_num_entries];
|
||||
for (int i = 0; i < m_num_entries; i++)
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#ifndef __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
|
||||
#define __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
|
||||
|
||||
#include "base/trace.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
|
||||
class DirectoryMemory;
|
||||
|
@ -48,6 +49,8 @@ class MemoryVector
|
|||
|
||||
void write(const Address & paddr, uint8* data, int len);
|
||||
uint8* read(const Address & paddr, uint8* data, int len);
|
||||
uint32 collatePages(uint8* &raw_data);
|
||||
void populatePages(uint8* raw_data);
|
||||
|
||||
private:
|
||||
uint8* getBlockPtr(const PhysAddress & addr);
|
||||
|
@ -56,6 +59,7 @@ class MemoryVector
|
|||
uint8** m_pages;
|
||||
uint32 m_num_pages;
|
||||
const uint32 m_page_offset_mask;
|
||||
static const uint32 PAGE_SIZE = 4096;
|
||||
};
|
||||
|
||||
inline
|
||||
|
@ -97,7 +101,7 @@ MemoryVector::resize(uint32 size)
|
|||
delete [] m_pages;
|
||||
}
|
||||
m_size = size;
|
||||
assert(size%4096 == 0);
|
||||
assert(size%PAGE_SIZE == 0);
|
||||
m_num_pages = size >> 12;
|
||||
m_pages = new uint8*[m_num_pages];
|
||||
memset(m_pages, 0, m_num_pages * sizeof(uint8*));
|
||||
|
@ -118,8 +122,8 @@ MemoryVector::write(const Address & paddr, uint8* data, int len)
|
|||
}
|
||||
if (all_zeros)
|
||||
return;
|
||||
m_pages[page_num] = new uint8[4096];
|
||||
memset(m_pages[page_num], 0, 4096);
|
||||
m_pages[page_num] = new uint8[PAGE_SIZE];
|
||||
memset(m_pages[page_num], 0, PAGE_SIZE);
|
||||
uint32 offset = paddr.getAddress() & m_page_offset_mask;
|
||||
memcpy(&m_pages[page_num][offset], data, len);
|
||||
} else {
|
||||
|
@ -147,10 +151,82 @@ MemoryVector::getBlockPtr(const PhysAddress & paddr)
|
|||
{
|
||||
uint32 page_num = paddr.getAddress() >> 12;
|
||||
if (m_pages[page_num] == 0) {
|
||||
m_pages[page_num] = new uint8[4096];
|
||||
memset(m_pages[page_num], 0, 4096);
|
||||
m_pages[page_num] = new uint8[PAGE_SIZE];
|
||||
memset(m_pages[page_num], 0, PAGE_SIZE);
|
||||
}
|
||||
return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Function for collating all the pages of the physical memory together.
|
||||
* In case a pointer for a page is NULL, this page needs only a single byte
|
||||
* to represent that the pointer is NULL. Otherwise, it needs 1 + PAGE_SIZE
|
||||
* bytes. The first represents that the page pointer is not NULL, and rest of
|
||||
* the bytes represent the data on the page.
|
||||
*/
|
||||
|
||||
inline uint32
|
||||
MemoryVector::collatePages(uint8* &raw_data)
|
||||
{
|
||||
uint32 num_zero_pages = 0;
|
||||
uint32 data_size = 0;
|
||||
|
||||
for (uint32 i = 0;i < m_num_pages; ++i)
|
||||
{
|
||||
if (m_pages[i] == 0) num_zero_pages++;
|
||||
}
|
||||
|
||||
raw_data = new uint8[ sizeof(uint32) /* number of pages*/
|
||||
+ m_num_pages /* whether the page is all zeros */
|
||||
+ PAGE_SIZE * (m_num_pages - num_zero_pages)];
|
||||
|
||||
/* Write the number of pages to be stored. */
|
||||
memcpy(raw_data, &m_num_pages, sizeof(uint32));
|
||||
data_size = sizeof(uint32);
|
||||
|
||||
for (uint32 i = 0;i < m_num_pages; ++i)
|
||||
{
|
||||
if (m_pages[i] == 0) {
|
||||
raw_data[data_size] = 0;
|
||||
} else {
|
||||
raw_data[data_size] = 1;
|
||||
memcpy(raw_data + data_size + 1, m_pages[i], PAGE_SIZE);
|
||||
data_size += PAGE_SIZE;
|
||||
}
|
||||
data_size += 1;
|
||||
}
|
||||
|
||||
return data_size;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Function for populating the pages of the memory using the available raw
|
||||
* data. Each page has a byte associate with it, which represents whether the
|
||||
* page was NULL or not, when all the pages were collated. The function assumes
|
||||
* that the number of pages in the memory are same as those that were recorded
|
||||
* in the checkpoint.
|
||||
*/
|
||||
inline void
|
||||
MemoryVector::populatePages(uint8* raw_data)
|
||||
{
|
||||
uint32 data_size = 0;
|
||||
uint32 num_pages = 0;
|
||||
|
||||
/* Read the number of pages that were stored. */
|
||||
memcpy(&num_pages, raw_data, sizeof(uint32));
|
||||
data_size = sizeof(uint32);
|
||||
assert(num_pages == m_num_pages);
|
||||
|
||||
for (uint32 i = 0;i < m_num_pages; ++i)
|
||||
{
|
||||
assert(m_pages[i] == 0);
|
||||
if (raw_data[data_size] != 0) {
|
||||
m_pages[i] = new uint8[PAGE_SIZE];
|
||||
memcpy(m_pages[i], raw_data + data_size + 1, PAGE_SIZE);
|
||||
data_size += PAGE_SIZE;
|
||||
}
|
||||
data_size += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include "base/hashmap.hh"
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
|
||||
template<class ENTRY>
|
||||
struct PerfectCacheLineState
|
||||
|
@ -57,10 +56,6 @@ class PerfectCacheMemory
|
|||
|
||||
static void printConfig(std::ostream& out);
|
||||
|
||||
// perform a cache access and see if we hit or not. Return true
|
||||
// on a hit.
|
||||
bool tryCacheAccess(const CacheMsg& msg, bool& block_stc, ENTRY*& entry);
|
||||
|
||||
// tests to see if an address is present in the cache
|
||||
bool isTagPresent(const Address& address) const;
|
||||
|
||||
|
@ -118,15 +113,6 @@ PerfectCacheMemory<ENTRY>::printConfig(std::ostream& out)
|
|||
{
|
||||
}
|
||||
|
||||
template<class ENTRY>
|
||||
inline bool
|
||||
PerfectCacheMemory<ENTRY>::tryCacheAccess(const CacheMsg& msg,
|
||||
bool& block_stc, ENTRY*& entry)
|
||||
{
|
||||
panic("not implemented");
|
||||
return true;
|
||||
}
|
||||
|
||||
// tests to see if an address is present in the cache
|
||||
template<class ENTRY>
|
||||
inline bool
|
||||
|
|
|
@ -27,11 +27,11 @@
|
|||
*/
|
||||
|
||||
#include "cpu/testers/rubytest/RubyTester.hh"
|
||||
#include "debug/Config.hh"
|
||||
#include "debug/Ruby.hh"
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractController.hh"
|
||||
#include "mem/ruby/system/RubyPort.hh"
|
||||
#include "mem/physical.hh"
|
||||
|
||||
RubyPort::RubyPort(const Params *p)
|
||||
: MemObject(p)
|
||||
|
@ -51,6 +51,8 @@ RubyPort::RubyPort(const Params *p)
|
|||
m_usingRubyTester = p->using_ruby_tester;
|
||||
access_phys_mem = p->access_phys_mem;
|
||||
|
||||
drainEvent = NULL;
|
||||
|
||||
ruby_system = p->ruby_system;
|
||||
waitingOnSequencer = false;
|
||||
}
|
||||
|
@ -66,8 +68,10 @@ Port *
|
|||
RubyPort::getPort(const std::string &if_name, int idx)
|
||||
{
|
||||
if (if_name == "port") {
|
||||
return new M5Port(csprintf("%s-port%d", name(), idx), this,
|
||||
ruby_system, access_phys_mem);
|
||||
M5Port* cpuPort = new M5Port(csprintf("%s-port%d", name(), idx),
|
||||
this, ruby_system, access_phys_mem);
|
||||
cpu_ports.push_back(cpuPort);
|
||||
return cpuPort;
|
||||
}
|
||||
|
||||
if (if_name == "pio_port") {
|
||||
|
@ -508,6 +512,82 @@ RubyPort::ruby_hit_callback(PacketPtr pkt)
|
|||
(*i)->sendRetry();
|
||||
}
|
||||
}
|
||||
|
||||
testDrainComplete();
|
||||
}
|
||||
|
||||
void
|
||||
RubyPort::testDrainComplete()
|
||||
{
|
||||
//If we weren't able to drain before, we might be able to now.
|
||||
if (drainEvent != NULL) {
|
||||
unsigned int drainCount = getDrainCount(drainEvent);
|
||||
DPRINTF(Config, "Drain count: %u\n", drainCount);
|
||||
if (drainCount == 0) {
|
||||
drainEvent->process();
|
||||
// Clear the drain event once we're done with it.
|
||||
drainEvent = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int
|
||||
RubyPort::getDrainCount(Event *de)
|
||||
{
|
||||
int count = 0;
|
||||
//
|
||||
// If the sequencer is not empty, then requests need to drain.
|
||||
// The outstandingCount is the number of requests outstanding and thus the
|
||||
// number of times M5's timing port will process the drain event.
|
||||
//
|
||||
count += outstandingCount();
|
||||
|
||||
DPRINTF(Config, "outstanding count %d\n", outstandingCount());
|
||||
|
||||
// To simplify the draining process, the sequencer's deadlock detection
|
||||
// event should have been descheduled.
|
||||
assert(isDeadlockEventScheduled() == false);
|
||||
|
||||
if (pio_port != NULL) {
|
||||
count += pio_port->drain(de);
|
||||
DPRINTF(Config, "count after pio check %d\n", count);
|
||||
}
|
||||
if (physMemPort != NULL) {
|
||||
count += physMemPort->drain(de);
|
||||
DPRINTF(Config, "count after physmem check %d\n", count);
|
||||
}
|
||||
|
||||
for (CpuPortIter p_iter = cpu_ports.begin(); p_iter != cpu_ports.end();
|
||||
p_iter++) {
|
||||
M5Port* cpu_port = *p_iter;
|
||||
count += cpu_port->drain(de);
|
||||
DPRINTF(Config, "count after cpu port check %d\n", count);
|
||||
}
|
||||
|
||||
DPRINTF(Config, "final count %d\n", count);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
RubyPort::drain(Event *de)
|
||||
{
|
||||
if (isDeadlockEventScheduled()) {
|
||||
descheduleDeadlockEvent();
|
||||
}
|
||||
|
||||
int count = getDrainCount(de);
|
||||
|
||||
// Set status
|
||||
if (count != 0) {
|
||||
drainEvent = de;
|
||||
|
||||
changeState(SimObject::Draining);
|
||||
return count;
|
||||
}
|
||||
|
||||
changeState(SimObject::Drained);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -33,7 +33,6 @@
|
|||
#include <string>
|
||||
|
||||
#include "mem/protocol/RequestStatus.hh"
|
||||
#include "mem/ruby/slicc_interface/RubyRequest.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/physical.hh"
|
||||
|
@ -115,17 +114,23 @@ class RubyPort : public MemObject
|
|||
Port *getPort(const std::string &if_name, int idx);
|
||||
|
||||
virtual RequestStatus makeRequest(PacketPtr pkt) = 0;
|
||||
virtual int outstandingCount() const = 0;
|
||||
virtual bool isDeadlockEventScheduled() const = 0;
|
||||
virtual void descheduleDeadlockEvent() = 0;
|
||||
|
||||
//
|
||||
// Called by the controller to give the sequencer a pointer.
|
||||
// A pointer to the controller is needed for atomic support.
|
||||
//
|
||||
void setController(AbstractController* _cntrl) { m_controller = _cntrl; }
|
||||
int getId() { return m_version; }
|
||||
unsigned int drain(Event *de);
|
||||
|
||||
protected:
|
||||
const std::string m_name;
|
||||
void ruby_hit_callback(PacketPtr pkt);
|
||||
void hit(PacketPtr pkt);
|
||||
void testDrainComplete();
|
||||
|
||||
int m_version;
|
||||
AbstractController* m_controller;
|
||||
|
@ -143,11 +148,19 @@ class RubyPort : public MemObject
|
|||
}
|
||||
}
|
||||
|
||||
unsigned int getDrainCount(Event *de);
|
||||
|
||||
uint16_t m_port_id;
|
||||
uint64_t m_request_cnt;
|
||||
|
||||
M5Port* physMemPort;
|
||||
|
||||
/*! Vector of CPU Port attached to this Ruby port. */
|
||||
typedef std::vector<M5Port*>::iterator CpuPortIter;
|
||||
std::vector<M5Port*> cpu_ports;
|
||||
|
||||
Event *drainEvent;
|
||||
|
||||
PhysicalMemory* physmem;
|
||||
RubySystem* ruby_system;
|
||||
|
||||
|
|
|
@ -40,9 +40,7 @@
|
|||
#include "mem/protocol/RubyAccessMode.hh"
|
||||
#include "mem/ruby/buffers/MessageBuffer.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/common/SubBlock.hh"
|
||||
#include "mem/ruby/profiler/Profiler.hh"
|
||||
#include "mem/ruby/recorder/Tracer.hh"
|
||||
#include "mem/ruby/slicc_interface/RubyRequest.hh"
|
||||
#include "mem/ruby/system/CacheMemory.hh"
|
||||
#include "mem/ruby/system/Sequencer.hh"
|
||||
|
@ -521,7 +519,11 @@ Sequencer::hitCallback(SequencerRequest* srequest,
|
|||
}
|
||||
|
||||
// update the data
|
||||
if (pkt->getPtr<uint8_t>(true) != NULL) {
|
||||
if (g_system_ptr->m_warmup_enabled) {
|
||||
assert(pkt->getPtr<uint8_t>(false) != NULL);
|
||||
data.setData(pkt->getPtr<uint8_t>(false),
|
||||
request_address.getOffset(), pkt->getSize());
|
||||
} else if (pkt->getPtr<uint8_t>(true) != NULL) {
|
||||
if ((type == RubyRequestType_LD) ||
|
||||
(type == RubyRequestType_IFETCH) ||
|
||||
(type == RubyRequestType_RMW_Read) ||
|
||||
|
@ -553,8 +555,17 @@ Sequencer::hitCallback(SequencerRequest* srequest,
|
|||
testerSenderState->subBlock->mergeFrom(data);
|
||||
}
|
||||
|
||||
ruby_hit_callback(pkt);
|
||||
delete srequest;
|
||||
|
||||
if (g_system_ptr->m_warmup_enabled) {
|
||||
delete pkt;
|
||||
g_system_ptr->m_cache_recorder->enqueueNextFetchRequest();
|
||||
} else if (g_system_ptr->m_cooldown_enabled) {
|
||||
delete pkt;
|
||||
g_system_ptr->m_cache_recorder->enqueueNextFlushRequest();
|
||||
} else {
|
||||
ruby_hit_callback(pkt);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -39,8 +39,6 @@
|
|||
#include "mem/ruby/system/RubyPort.hh"
|
||||
|
||||
class DataBlock;
|
||||
class CacheMsg;
|
||||
class MachineID;
|
||||
class CacheMemory;
|
||||
|
||||
class RubySequencerParams;
|
||||
|
@ -100,6 +98,18 @@ class Sequencer : public RubyPort, public Consumer
|
|||
|
||||
RequestStatus makeRequest(PacketPtr pkt);
|
||||
bool empty() const;
|
||||
int outstandingCount() const { return m_outstanding_count; }
|
||||
bool
|
||||
isDeadlockEventScheduled() const
|
||||
{
|
||||
return deadlockCheckEvent.scheduled();
|
||||
}
|
||||
|
||||
void
|
||||
descheduleDeadlockEvent()
|
||||
{
|
||||
deschedule(deadlockCheckEvent);
|
||||
}
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
void printStats(std::ostream& out) const;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2009 Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -26,6 +27,8 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "debug/RubyCache.hh"
|
||||
#include "mem/ruby/system/SparseMemory.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
|
@ -82,19 +85,19 @@ SparseMemory::recursivelyRemoveTables(SparseMapType* curTable, int curLevel)
|
|||
SparseMapType::iterator iter;
|
||||
|
||||
for (iter = curTable->begin(); iter != curTable->end(); iter++) {
|
||||
SparseMemEntry* entryStruct = &((*iter).second);
|
||||
SparseMemEntry entry = (*iter).second;
|
||||
|
||||
if (curLevel != (m_number_of_levels - 1)) {
|
||||
// If the not at the last level, analyze those lower level
|
||||
// tables first, then delete those next tables
|
||||
SparseMapType* nextTable = (SparseMapType*)(entryStruct->entry);
|
||||
SparseMapType* nextTable = (SparseMapType*)(entry);
|
||||
recursivelyRemoveTables(nextTable, (curLevel + 1));
|
||||
delete nextTable;
|
||||
} else {
|
||||
// If at the last level, delete the directory entry
|
||||
delete (AbstractEntry*)(entryStruct->entry);
|
||||
delete (AbstractEntry*)(entry);
|
||||
}
|
||||
entryStruct->entry = NULL;
|
||||
entry = NULL;
|
||||
}
|
||||
|
||||
// Once all entries have been deleted, erase the entries
|
||||
|
@ -134,7 +137,7 @@ SparseMemory::exist(const Address& address) const
|
|||
// If the address is found, move on to the next level.
|
||||
// Otherwise, return not found
|
||||
if (curTable->count(curAddress) != 0) {
|
||||
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
|
||||
curTable = (SparseMapType*)((*curTable)[curAddress]);
|
||||
} else {
|
||||
DPRINTF(RubyCache, "Not found\n");
|
||||
return false;
|
||||
|
@ -156,7 +159,6 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
|
|||
|
||||
Address curAddress;
|
||||
SparseMapType* curTable = m_map_head;
|
||||
SparseMemEntry* entryStruct = NULL;
|
||||
|
||||
// Initiallize the high bit to be the total number of bits plus
|
||||
// the block offset. However the highest bit index is one less
|
||||
|
@ -179,7 +181,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
|
|||
// if the address exists in the cur table, move on. Otherwise
|
||||
// create a new table.
|
||||
if (curTable->count(curAddress) != 0) {
|
||||
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
|
||||
curTable = (SparseMapType*)((*curTable)[curAddress]);
|
||||
} else {
|
||||
m_adds_per_level[level]++;
|
||||
|
||||
|
@ -194,9 +196,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
|
|||
|
||||
// Create the pointer container SparseMemEntry and add it
|
||||
// to the table.
|
||||
entryStruct = new SparseMemEntry;
|
||||
entryStruct->entry = newEntry;
|
||||
(*curTable)[curAddress] = *entryStruct;
|
||||
(*curTable)[curAddress] = newEntry;
|
||||
|
||||
// Move to the next level of the heirarchy
|
||||
curTable = (SparseMapType*)newEntry;
|
||||
|
@ -215,7 +215,7 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
|
|||
{
|
||||
Address curAddress;
|
||||
CurNextInfo nextInfo;
|
||||
SparseMemEntry* entryStruct;
|
||||
SparseMemEntry entry;
|
||||
|
||||
// create the appropriate address for this level
|
||||
// Note: that set Address is inclusive of the specified range,
|
||||
|
@ -231,11 +231,11 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
|
|||
|
||||
assert(curInfo.curTable->count(curAddress) != 0);
|
||||
|
||||
entryStruct = &((*(curInfo.curTable))[curAddress]);
|
||||
entry = (*(curInfo.curTable))[curAddress];
|
||||
|
||||
if (curInfo.level < (m_number_of_levels - 1)) {
|
||||
// set up next level's info
|
||||
nextInfo.curTable = (SparseMapType*)(entryStruct->entry);
|
||||
nextInfo.curTable = (SparseMapType*)(entry);
|
||||
nextInfo.level = curInfo.level + 1;
|
||||
|
||||
nextInfo.highBit = curInfo.highBit -
|
||||
|
@ -252,15 +252,15 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
|
|||
if (tableSize == 0) {
|
||||
m_removes_per_level[curInfo.level]++;
|
||||
delete nextInfo.curTable;
|
||||
entryStruct->entry = NULL;
|
||||
entry = NULL;
|
||||
curInfo.curTable->erase(curAddress);
|
||||
}
|
||||
} else {
|
||||
// if this is the last level, we have reached the Directory
|
||||
// Entry and thus we should delete it including the
|
||||
// SparseMemEntry container struct.
|
||||
delete (AbstractEntry*)(entryStruct->entry);
|
||||
entryStruct->entry = NULL;
|
||||
delete (AbstractEntry*)(entry);
|
||||
entry = NULL;
|
||||
curInfo.curTable->erase(curAddress);
|
||||
m_removes_per_level[curInfo.level]++;
|
||||
}
|
||||
|
@ -331,7 +331,7 @@ SparseMemory::lookup(const Address& address)
|
|||
// If the address is found, move on to the next level.
|
||||
// Otherwise, return not found
|
||||
if (curTable->count(curAddress) != 0) {
|
||||
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
|
||||
curTable = (SparseMapType*)((*curTable)[curAddress]);
|
||||
} else {
|
||||
DPRINTF(RubyCache, "Not found\n");
|
||||
return NULL;
|
||||
|
@ -344,6 +344,70 @@ SparseMemory::lookup(const Address& address)
|
|||
return entry;
|
||||
}
|
||||
|
||||
void
|
||||
SparseMemory::recordBlocks(int cntrl_id, CacheRecorder* tr) const
|
||||
{
|
||||
queue<SparseMapType*> unexplored_nodes[2];
|
||||
queue<physical_address_t> address_of_nodes[2];
|
||||
|
||||
unexplored_nodes[0].push(m_map_head);
|
||||
address_of_nodes[0].push(0);
|
||||
|
||||
int parity_of_level = 0;
|
||||
physical_address_t address, temp_address;
|
||||
Address curAddress;
|
||||
|
||||
// Initiallize the high bit to be the total number of bits plus
|
||||
// the block offset. However the highest bit index is one less
|
||||
// than this value.
|
||||
int highBit = m_total_number_of_bits + RubySystem::getBlockSizeBits();
|
||||
int lowBit;
|
||||
|
||||
for (int cur_level = 0; cur_level < m_number_of_levels; cur_level++) {
|
||||
|
||||
// create the appropriate address for this level
|
||||
// Note: that set Address is inclusive of the specified range,
|
||||
// thus the high bit is one less than the total number of bits
|
||||
// used to create the address.
|
||||
lowBit = highBit - m_number_of_bits_per_level[cur_level];
|
||||
|
||||
while (!unexplored_nodes[parity_of_level].empty()) {
|
||||
|
||||
SparseMapType* node = unexplored_nodes[parity_of_level].front();
|
||||
unexplored_nodes[parity_of_level].pop();
|
||||
|
||||
address = address_of_nodes[parity_of_level].front();
|
||||
address_of_nodes[parity_of_level].pop();
|
||||
|
||||
SparseMapType::iterator iter;
|
||||
|
||||
for (iter = node->begin(); iter != node->end(); iter++) {
|
||||
SparseMemEntry entry = (*iter).second;
|
||||
curAddress = (*iter).first;
|
||||
|
||||
if (cur_level != (m_number_of_levels - 1)) {
|
||||
// If not at the last level, put this node in the queue
|
||||
unexplored_nodes[1 - parity_of_level].push(
|
||||
(SparseMapType*)(entry));
|
||||
address_of_nodes[1 - parity_of_level].push(address |
|
||||
(curAddress.getAddress() << lowBit));
|
||||
} else {
|
||||
// If at the last level, add a trace record
|
||||
temp_address = address | (curAddress.getAddress()
|
||||
<< lowBit);
|
||||
DataBlock block = ((AbstractEntry*)entry)->getDataBlk();
|
||||
tr->addRecord(cntrl_id, temp_address, 0, RubyRequestType_ST, 0,
|
||||
block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust the highBit value for the next level
|
||||
highBit -= m_number_of_bits_per_level[cur_level];
|
||||
parity_of_level = 1 - parity_of_level;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SparseMemory::print(ostream& out) const
|
||||
{
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2009 Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -32,15 +33,11 @@
|
|||
#include <iostream>
|
||||
|
||||
#include "base/hashmap.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractEntry.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
|
||||
struct SparseMemEntry
|
||||
{
|
||||
void* entry;
|
||||
};
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractEntry.hh"
|
||||
|
||||
typedef void* SparseMemEntry;
|
||||
typedef m5::hash_map<Address, SparseMemEntry> SparseMapType;
|
||||
|
||||
struct CurNextInfo
|
||||
|
@ -63,6 +60,14 @@ class SparseMemory
|
|||
void add(const Address& address, AbstractEntry*);
|
||||
void remove(const Address& address);
|
||||
|
||||
/*!
|
||||
* Function for recording the contents of memory. This function walks
|
||||
* through all the levels of the sparse memory in a breadth first
|
||||
* fashion. This might need more memory than a depth first approach.
|
||||
* But breadth first seems easier to me than a depth first approach.
|
||||
*/
|
||||
void recordBlocks(int cntrl_id, CacheRecorder *) const;
|
||||
|
||||
AbstractEntry* lookup(const Address& address);
|
||||
|
||||
// Print cache contents
|
||||
|
@ -95,12 +100,4 @@ class SparseMemory
|
|||
uint64_t* m_removes_per_level;
|
||||
};
|
||||
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& out, const SparseMemEntry& obj)
|
||||
{
|
||||
out << "SparseMemEntry";
|
||||
out << std::flush;
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif // __MEM_RUBY_SYSTEM_SPARSEMEMORY_HH__
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -26,16 +26,19 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "base/output.hh"
|
||||
#include "mem/ruby/buffers/MessageBuffer.hh"
|
||||
#include "debug/RubySystem.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/network/Network.hh"
|
||||
#include "mem/ruby/profiler/Profiler.hh"
|
||||
#include "mem/ruby/recorder/Tracer.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractController.hh"
|
||||
#include "mem/ruby/system/MemoryVector.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "sim/simulate.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -49,7 +52,6 @@ int RubySystem::m_memory_size_bits;
|
|||
|
||||
Network* RubySystem::m_network_ptr;
|
||||
Profiler* RubySystem::m_profiler_ptr;
|
||||
Tracer* RubySystem::m_tracer_ptr;
|
||||
MemoryVector* RubySystem::m_mem_vec_ptr;
|
||||
|
||||
RubySystem::RubySystem(const Params *p)
|
||||
|
@ -88,6 +90,8 @@ RubySystem::RubySystem(const Params *p)
|
|||
//
|
||||
RubyExitCallback* rubyExitCB = new RubyExitCallback(p->stats_filename);
|
||||
registerExitCallback(rubyExitCB);
|
||||
m_warmup_enabled = false;
|
||||
m_cooldown_enabled = false;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -108,23 +112,22 @@ RubySystem::registerProfiler(Profiler* profiler_ptr)
|
|||
m_profiler_ptr = profiler_ptr;
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::registerTracer(Tracer* tracer_ptr)
|
||||
{
|
||||
m_tracer_ptr = tracer_ptr;
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::registerAbstractController(AbstractController* cntrl)
|
||||
{
|
||||
m_abs_cntrl_vec.push_back(cntrl);
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::registerSparseMemory(SparseMemory* s)
|
||||
{
|
||||
m_sparse_memory_vector.push_back(s);
|
||||
}
|
||||
|
||||
RubySystem::~RubySystem()
|
||||
{
|
||||
delete m_network_ptr;
|
||||
delete m_profiler_ptr;
|
||||
delete m_tracer_ptr;
|
||||
if (m_mem_vec_ptr)
|
||||
delete m_mem_vec_ptr;
|
||||
}
|
||||
|
@ -166,10 +169,144 @@ RubySystem::printStats(ostream& out)
|
|||
m_network_ptr->printStats(out);
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::writeCompressedTrace(uint8* raw_data, string filename,
|
||||
uint64 uncompressed_trace_size)
|
||||
{
|
||||
// Create the checkpoint file for the memory
|
||||
string thefile = Checkpoint::dir() + "/" + filename.c_str();
|
||||
|
||||
int fd = creat(thefile.c_str(), 0664);
|
||||
if (fd < 0) {
|
||||
perror("creat");
|
||||
fatal("Can't open memory trace file '%s'\n", filename);
|
||||
}
|
||||
|
||||
gzFile compressedMemory = gzdopen(fd, "wb");
|
||||
if (compressedMemory == NULL)
|
||||
fatal("Insufficient memory to allocate compression state for %s\n",
|
||||
filename);
|
||||
|
||||
if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
|
||||
uncompressed_trace_size) {
|
||||
fatal("Write failed on memory trace file '%s'\n", filename);
|
||||
}
|
||||
|
||||
if (gzclose(compressedMemory)) {
|
||||
fatal("Close failed on memory trace file '%s'\n", filename);
|
||||
}
|
||||
delete raw_data;
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::serialize(std::ostream &os)
|
||||
{
|
||||
m_cooldown_enabled = true;
|
||||
|
||||
vector<Sequencer*> sequencer_map;
|
||||
Sequencer* sequencer_ptr = NULL;
|
||||
int cntrl_id = -1;
|
||||
|
||||
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
|
||||
if (sequencer_ptr == NULL) {
|
||||
sequencer_ptr = sequencer_map[cntrl];
|
||||
cntrl_id = cntrl;
|
||||
}
|
||||
}
|
||||
|
||||
assert(sequencer_ptr != NULL);
|
||||
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
if (sequencer_map[cntrl] == NULL) {
|
||||
sequencer_map[cntrl] = sequencer_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the CacheRecorder and record the cache trace
|
||||
m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
|
||||
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
|
||||
}
|
||||
|
||||
// save the current tick value
|
||||
Tick curtick_original = curTick();
|
||||
// save the event queue head
|
||||
Event* eventq_head = eventq->replaceHead(NULL);
|
||||
|
||||
// Schedule an event to start cache cooldown
|
||||
RubyEvent* e = new RubyEvent(this);
|
||||
schedule(e,curTick());
|
||||
simulate();
|
||||
|
||||
// Restore eventq head
|
||||
eventq_head = eventq->replaceHead(eventq_head);
|
||||
// Restore curTick
|
||||
curTick(curtick_original);
|
||||
|
||||
uint8* raw_data = NULL;
|
||||
|
||||
if (m_mem_vec_ptr != NULL) {
|
||||
uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
|
||||
|
||||
string memory_trace_file = name() + ".memory.gz";
|
||||
writeCompressedTrace(raw_data, memory_trace_file,
|
||||
memory_trace_size);
|
||||
|
||||
SERIALIZE_SCALAR(memory_trace_file);
|
||||
SERIALIZE_SCALAR(memory_trace_size);
|
||||
|
||||
} else {
|
||||
for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
|
||||
m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
|
||||
m_cache_recorder);
|
||||
}
|
||||
}
|
||||
|
||||
// Aggergate the trace entries together into a single array
|
||||
raw_data = new uint8_t[4096];
|
||||
uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
|
||||
4096);
|
||||
string cache_trace_file = name() + ".cache.gz";
|
||||
writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
|
||||
|
||||
SERIALIZE_SCALAR(cache_trace_file);
|
||||
SERIALIZE_SCALAR(cache_trace_size);
|
||||
|
||||
m_cooldown_enabled = false;
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::readCompressedTrace(string filename, uint8*& raw_data,
|
||||
uint64& uncompressed_trace_size)
|
||||
{
|
||||
// Read the trace file
|
||||
gzFile compressedTrace;
|
||||
|
||||
// trace file
|
||||
int fd = open(filename.c_str(), O_RDONLY);
|
||||
if (fd < 0) {
|
||||
perror("open");
|
||||
fatal("Unable to open trace file %s", filename);
|
||||
}
|
||||
|
||||
compressedTrace = gzdopen(fd, "rb");
|
||||
if (compressedTrace == NULL) {
|
||||
fatal("Insufficient memory to allocate compression state for %s\n",
|
||||
filename);
|
||||
}
|
||||
|
||||
raw_data = new uint8_t[uncompressed_trace_size];
|
||||
if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
|
||||
uncompressed_trace_size) {
|
||||
fatal("Unable to read complete trace from file %s\n", filename);
|
||||
}
|
||||
|
||||
if (gzclose(compressedTrace)) {
|
||||
fatal("Failed to close cache trace file '%s'\n", filename);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -181,6 +318,88 @@ RubySystem::unserialize(Checkpoint *cp, const string §ion)
|
|||
// value of curTick()
|
||||
//
|
||||
clearStats();
|
||||
uint8* uncompressed_trace = NULL;
|
||||
|
||||
if (m_mem_vec_ptr != NULL) {
|
||||
string memory_trace_file;
|
||||
uint64 memory_trace_size = 0;
|
||||
|
||||
UNSERIALIZE_SCALAR(memory_trace_file);
|
||||
UNSERIALIZE_SCALAR(memory_trace_size);
|
||||
memory_trace_file = cp->cptDir + "/" + memory_trace_file;
|
||||
|
||||
readCompressedTrace(memory_trace_file, uncompressed_trace,
|
||||
memory_trace_size);
|
||||
m_mem_vec_ptr->populatePages(uncompressed_trace);
|
||||
|
||||
delete uncompressed_trace;
|
||||
uncompressed_trace = NULL;
|
||||
}
|
||||
|
||||
string cache_trace_file;
|
||||
uint64 cache_trace_size = 0;
|
||||
|
||||
UNSERIALIZE_SCALAR(cache_trace_file);
|
||||
UNSERIALIZE_SCALAR(cache_trace_size);
|
||||
cache_trace_file = cp->cptDir + "/" + cache_trace_file;
|
||||
|
||||
readCompressedTrace(cache_trace_file, uncompressed_trace,
|
||||
cache_trace_size);
|
||||
m_warmup_enabled = true;
|
||||
|
||||
vector<Sequencer*> sequencer_map;
|
||||
Sequencer* t = NULL;
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
|
||||
if(t == NULL) t = sequencer_map[cntrl];
|
||||
}
|
||||
|
||||
assert(t != NULL);
|
||||
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
if (sequencer_map[cntrl] == NULL) {
|
||||
sequencer_map[cntrl] = t;
|
||||
}
|
||||
}
|
||||
|
||||
m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
|
||||
sequencer_map);
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::startup()
|
||||
{
|
||||
if (m_warmup_enabled) {
|
||||
// save the current tick value
|
||||
Tick curtick_original = curTick();
|
||||
// save the event queue head
|
||||
Event* eventq_head = eventq->replaceHead(NULL);
|
||||
// set curTick to 0
|
||||
curTick(0);
|
||||
|
||||
// Schedule an event to start cache warmup
|
||||
RubyEvent* e = new RubyEvent(this);
|
||||
schedule(e,curTick());
|
||||
simulate();
|
||||
|
||||
delete m_cache_recorder;
|
||||
m_cache_recorder = NULL;
|
||||
m_warmup_enabled = false;
|
||||
// Restore eventq head
|
||||
eventq_head = eventq->replaceHead(eventq_head);
|
||||
// Restore curTick
|
||||
curTick(curtick_original);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::RubyEvent::process()
|
||||
{
|
||||
if (ruby_system->m_warmup_enabled) {
|
||||
ruby_system->m_cache_recorder->enqueueNextFetchRequest();
|
||||
} else if (ruby_system->m_cooldown_enabled) {
|
||||
ruby_system->m_cache_recorder->enqueueNextFlushRequest();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -190,11 +409,6 @@ RubySystem::clearStats() const
|
|||
m_network_ptr->clearStats();
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::recordCacheContents(CacheRecorder& tr) const
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CHECK_COHERENCE
|
||||
// This code will check for cases if the given cache block is exclusive in
|
||||
// one node and shared in another-- a coherence violation
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -38,21 +38,34 @@
|
|||
#include "base/callback.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
|
||||
#include "mem/ruby/system/RubyPort.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractController.hh"
|
||||
#include "mem/ruby/system/MemoryVector.hh"
|
||||
#include "mem/ruby/system/SparseMemory.hh"
|
||||
#include "params/RubySystem.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
class AbstractController;
|
||||
class CacheRecorder;
|
||||
class MemoryVector;
|
||||
class Network;
|
||||
class Profiler;
|
||||
class Tracer;
|
||||
|
||||
class RubySystem : public SimObject
|
||||
{
|
||||
public:
|
||||
class RubyEvent : public Event
|
||||
{
|
||||
public:
|
||||
RubyEvent(RubySystem* _ruby_system)
|
||||
{
|
||||
ruby_system = _ruby_system;
|
||||
}
|
||||
private:
|
||||
void process();
|
||||
|
||||
RubySystem* ruby_system;
|
||||
};
|
||||
|
||||
friend class RubyEvent;
|
||||
|
||||
typedef RubySystemParams Params;
|
||||
RubySystem(const Params *p);
|
||||
~RubySystem();
|
||||
|
@ -86,13 +99,6 @@ class RubySystem : public SimObject
|
|||
return m_profiler_ptr;
|
||||
}
|
||||
|
||||
static Tracer*
|
||||
getTracer()
|
||||
{
|
||||
assert(m_tracer_ptr != NULL);
|
||||
return m_tracer_ptr;
|
||||
}
|
||||
|
||||
static MemoryVector*
|
||||
getMemoryVector()
|
||||
{
|
||||
|
@ -100,7 +106,6 @@ class RubySystem : public SimObject
|
|||
return m_mem_vec_ptr;
|
||||
}
|
||||
|
||||
void recordCacheContents(CacheRecorder& tr) const;
|
||||
static void printConfig(std::ostream& out);
|
||||
static void printStats(std::ostream& out);
|
||||
void clearStats() const;
|
||||
|
@ -114,13 +119,15 @@ class RubySystem : public SimObject
|
|||
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
virtual void serialize(std::ostream &os);
|
||||
virtual void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
void serialize(std::ostream &os);
|
||||
void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
void process();
|
||||
void startup();
|
||||
|
||||
void registerNetwork(Network*);
|
||||
void registerProfiler(Profiler*);
|
||||
void registerTracer(Tracer*);
|
||||
void registerAbstractController(AbstractController*);
|
||||
void registerSparseMemory(SparseMemory*);
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
|
@ -130,6 +137,11 @@ class RubySystem : public SimObject
|
|||
void init();
|
||||
|
||||
static void printSystemConfig(std::ostream& out);
|
||||
void readCompressedTrace(std::string filename,
|
||||
uint8*& raw_data,
|
||||
uint64& uncompressed_trace_size);
|
||||
void writeCompressedTrace(uint8* raw_data, std::string file,
|
||||
uint64 uncompressed_trace_size);
|
||||
|
||||
private:
|
||||
// configuration parameters
|
||||
|
@ -140,14 +152,16 @@ class RubySystem : public SimObject
|
|||
static int m_block_size_bits;
|
||||
static uint64 m_memory_size_bytes;
|
||||
static int m_memory_size_bits;
|
||||
|
||||
static Network* m_network_ptr;
|
||||
|
||||
public:
|
||||
static Profiler* m_profiler_ptr;
|
||||
static Tracer* m_tracer_ptr;
|
||||
static MemoryVector* m_mem_vec_ptr;
|
||||
std::vector<AbstractController*> m_abs_cntrl_vec;
|
||||
bool m_warmup_enabled;
|
||||
bool m_cooldown_enabled;
|
||||
CacheRecorder* m_cache_recorder;
|
||||
std::vector<SparseMemory*> m_sparse_memory_vector;
|
||||
};
|
||||
|
||||
inline std::ostream&
|
||||
|
|
|
@ -264,6 +264,8 @@ public:
|
|||
void clearStats();
|
||||
void blockOnQueue(Address addr, MessageBuffer* port);
|
||||
void unblock(Address addr);
|
||||
void recordCacheTrace(int cntrl, CacheRecorder* tr);
|
||||
Sequencer* getSequencer() const;
|
||||
|
||||
private:
|
||||
''')
|
||||
|
@ -674,6 +676,12 @@ $vid->setDescription("[Version " + to_string(m_version) + ", ${ident}, name=${{v
|
|||
else:
|
||||
mq_ident = "NULL"
|
||||
|
||||
seq_ident = "NULL"
|
||||
for param in self.config_parameters:
|
||||
if param.name == "sequencer":
|
||||
assert(param.pointer)
|
||||
seq_ident = "m_%s_ptr" % param.name
|
||||
|
||||
code('''
|
||||
int
|
||||
$c_ident::getNumControllers()
|
||||
|
@ -687,6 +695,12 @@ $c_ident::getMandatoryQueue() const
|
|||
return $mq_ident;
|
||||
}
|
||||
|
||||
Sequencer*
|
||||
$c_ident::getSequencer() const
|
||||
{
|
||||
return $seq_ident;
|
||||
}
|
||||
|
||||
const int &
|
||||
$c_ident::getVersion() const
|
||||
{
|
||||
|
@ -875,6 +889,23 @@ $c_ident::unset_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr)
|
|||
|
||||
code('''
|
||||
|
||||
void
|
||||
$c_ident::recordCacheTrace(int cntrl, CacheRecorder* tr)
|
||||
{
|
||||
''')
|
||||
#
|
||||
# Record cache contents for all associated caches.
|
||||
#
|
||||
code.indent()
|
||||
for param in self.config_parameters:
|
||||
if param.type_ast.type.ident == "CacheMemory":
|
||||
assert(param.pointer)
|
||||
code('m_${{param.ident}}_ptr->recordCacheContents(cntrl, tr);')
|
||||
|
||||
code.dedent()
|
||||
code('''
|
||||
}
|
||||
|
||||
// Actions
|
||||
''')
|
||||
if self.TBEType != None and self.EntryType != None:
|
||||
|
|
|
@ -874,29 +874,62 @@ class SimObject(object):
|
|||
if hasattr(self, 'type'):
|
||||
print >>ini_file, 'type=%s' % self.type
|
||||
|
||||
child_names = self._children.keys()
|
||||
child_names.sort()
|
||||
if len(child_names):
|
||||
if len(self._children.keys()):
|
||||
print >>ini_file, 'children=%s' % \
|
||||
' '.join(self._children[n].get_name() for n in child_names)
|
||||
' '.join(self._children[n].get_name() \
|
||||
for n in sorted(self._children.keys()))
|
||||
|
||||
param_names = self._params.keys()
|
||||
param_names.sort()
|
||||
for param in param_names:
|
||||
for param in sorted(self._params.keys()):
|
||||
value = self._values.get(param)
|
||||
if value != None:
|
||||
print >>ini_file, '%s=%s' % (param,
|
||||
self._values[param].ini_str())
|
||||
|
||||
port_names = self._ports.keys()
|
||||
port_names.sort()
|
||||
for port_name in port_names:
|
||||
for port_name in sorted(self._ports.keys()):
|
||||
port = self._port_refs.get(port_name, None)
|
||||
if port != None:
|
||||
print >>ini_file, '%s=%s' % (port_name, port.ini_str())
|
||||
|
||||
print >>ini_file # blank line between objects
|
||||
|
||||
# generate a tree of dictionaries expressing all the parameters in the
|
||||
# instantiated system for use by scripts that want to do power, thermal
|
||||
# visualization, and other similar tasks
|
||||
def get_config_as_dict(self):
|
||||
d = attrdict()
|
||||
if hasattr(self, 'type'):
|
||||
d.type = self.type
|
||||
if hasattr(self, 'cxx_class'):
|
||||
d.cxx_class = self.cxx_class
|
||||
|
||||
for param in sorted(self._params.keys()):
|
||||
value = self._values.get(param)
|
||||
try:
|
||||
# Use native type for those supported by JSON and
|
||||
# strings for everything else. skipkeys=True seems
|
||||
# to not work as well as one would hope
|
||||
if type(self._values[param].value) in \
|
||||
[str, unicode, int, long, float, bool, None]:
|
||||
d[param] = self._values[param].value
|
||||
else:
|
||||
d[param] = str(self._values[param])
|
||||
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
for n in sorted(self._children.keys()):
|
||||
d[self._children[n].get_name()] = self._children[n].get_config_as_dict()
|
||||
|
||||
for port_name in sorted(self._ports.keys()):
|
||||
port = self._port_refs.get(port_name, None)
|
||||
if port != None:
|
||||
# Might want to actually make this reference the object
|
||||
# in the future, although execing the string problem would
|
||||
# get some of the way there
|
||||
d[port_name] = port.ini_str()
|
||||
|
||||
return d
|
||||
|
||||
def getCCParams(self):
|
||||
if self._ccParams:
|
||||
return self._ccParams
|
||||
|
|
|
@ -87,6 +87,8 @@ def parse_options():
|
|||
group("Configuration Options")
|
||||
option("--dump-config", metavar="FILE", default="config.ini",
|
||||
help="Dump configuration output file [Default: %default]")
|
||||
option("--json-config", metavar="FILE", default="config.json",
|
||||
help="Create JSON output of the configuration [Default: %default]")
|
||||
|
||||
# Debugging options
|
||||
group("Debugging Options")
|
||||
|
@ -121,7 +123,6 @@ def parse_options():
|
|||
execfile(options_file, scope)
|
||||
|
||||
arguments = options.parse_args()
|
||||
|
||||
return options,arguments
|
||||
|
||||
def interact(scope):
|
||||
|
|
|
@ -228,6 +228,12 @@ class SimObjectVector(VectorParamValue):
|
|||
for obj in v.descendants():
|
||||
yield obj
|
||||
|
||||
def get_config_as_dict(self):
|
||||
a = []
|
||||
for v in self:
|
||||
a.append(v.get_config_as_dict())
|
||||
return a
|
||||
|
||||
class VectorParamDesc(ParamDesc):
|
||||
# Convert assigned value to appropriate type. If the RHS is not a
|
||||
# list or tuple, it generates a single-element list.
|
||||
|
@ -256,6 +262,9 @@ class VectorParamDesc(ParamDesc):
|
|||
self.ptype.cxx_predecls(code)
|
||||
code('%}')
|
||||
code()
|
||||
# Make sure the SWIGPY_SLICE_ARG is defined through this inclusion
|
||||
code('%include "std_container.i"')
|
||||
code()
|
||||
self.ptype.swig_predecls(code)
|
||||
code()
|
||||
code('%include "std_vector.i"')
|
||||
|
@ -961,6 +970,9 @@ class Time(ParamValue):
|
|||
def ini_str(self):
|
||||
return str(self)
|
||||
|
||||
def get_config_as_dict(self):
|
||||
return str(self)
|
||||
|
||||
# Enumerated types are a little more complex. The user specifies the
|
||||
# type as Enum(foo) where foo is either a list or dictionary of
|
||||
# alternatives (typically strings, but not necessarily so). (In the
|
||||
|
|
|
@ -40,6 +40,7 @@ import SimObject
|
|||
import ticks
|
||||
import objects
|
||||
from util import fatal
|
||||
from util import attrdict
|
||||
|
||||
# define a MaxTick parameter
|
||||
MaxTick = 2**63 - 1
|
||||
|
@ -71,6 +72,17 @@ def instantiate(ckpt_dir=None):
|
|||
obj.print_ini(ini_file)
|
||||
ini_file.close()
|
||||
|
||||
if options.json_config:
|
||||
try:
|
||||
import json
|
||||
json_file = file(os.path.join(options.outdir, options.json_config), 'w')
|
||||
d = root.get_config_as_dict()
|
||||
json.dump(d, json_file, indent=4)
|
||||
json_file.close()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# Initialize the global statistics
|
||||
stats.initSimStats()
|
||||
|
||||
|
|
|
@ -54,8 +54,8 @@ class System(SimObject):
|
|||
physmem = Param.PhysicalMemory("Physical Memory")
|
||||
mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in")
|
||||
memories = VectorParam.PhysicalMemory(Self.all, "All memories is the system")
|
||||
|
||||
work_item_id = Param.Int(-1, "specific work item id")
|
||||
num_work_ids = Param.Int(16, "Number of distinct work item types")
|
||||
work_begin_cpu_id_exit = Param.Int(-1,
|
||||
"work started on specific id, now exit simulation")
|
||||
work_begin_ckpt_count = Param.Counter(0,
|
||||
|
|
|
@ -417,6 +417,7 @@ workbegin(ThreadContext *tc, uint64_t workid, uint64_t threadid)
|
|||
tc->getCpuPtr()->workItemBegin();
|
||||
System *sys = tc->getSystemPtr();
|
||||
const System::Params *params = sys->params();
|
||||
sys->workItemBegin(threadid, workid);
|
||||
|
||||
DPRINTF(WorkItems, "Work Begin workid: %d, threadid %d\n", workid,
|
||||
threadid);
|
||||
|
@ -473,6 +474,7 @@ workend(ThreadContext *tc, uint64_t workid, uint64_t threadid)
|
|||
tc->getCpuPtr()->workItemEnd();
|
||||
System *sys = tc->getSystemPtr();
|
||||
const System::Params *params = sys->params();
|
||||
sys->workItemEnd(threadid, workid);
|
||||
|
||||
DPRINTF(WorkItems, "Work End workid: %d, threadid %d\n", workid, threadid);
|
||||
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2003-2006 The Regents of The University of Michigan
|
||||
* Copyright (c) 2011 Regents of the University of California
|
||||
* All rights reserved.
|
||||
|
@ -43,6 +55,7 @@
|
|||
#include "config/the_isa.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "debug/Loader.hh"
|
||||
#include "debug/WorkItems.hh"
|
||||
#include "kern/kernel_stats.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/physical.hh"
|
||||
|
@ -68,6 +81,7 @@ System::System(Params *p)
|
|||
memoryMode(p->mem_mode),
|
||||
workItemsBegin(0),
|
||||
workItemsEnd(0),
|
||||
numWorkIds(p->num_work_ids),
|
||||
_params(p),
|
||||
totalNumInsts(0),
|
||||
instEventQueue("system instruction-based event queue")
|
||||
|
@ -158,6 +172,9 @@ System::~System()
|
|||
{
|
||||
delete kernelSymtab;
|
||||
delete kernel;
|
||||
|
||||
for (uint32_t j = 0; j < numWorkIds; j++)
|
||||
delete workItemStats[j];
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -319,6 +336,37 @@ System::unserialize(Checkpoint *cp, const string §ion)
|
|||
UNSERIALIZE_SCALAR(nextPID);
|
||||
}
|
||||
|
||||
void
|
||||
System::regStats()
|
||||
{
|
||||
for (uint32_t j = 0; j < numWorkIds ; j++) {
|
||||
workItemStats[j] = new Stats::Histogram();
|
||||
stringstream namestr;
|
||||
ccprintf(namestr, "work_item_type%d", j);
|
||||
workItemStats[j]->init(20)
|
||||
.name(name() + "." + namestr.str())
|
||||
.desc("Run time stat for" + namestr.str())
|
||||
.prereq(*workItemStats[j]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
System::workItemEnd(uint32_t tid, uint32_t workid)
|
||||
{
|
||||
std::pair<uint32_t,uint32_t> p(tid, workid);
|
||||
if (!lastWorkItemStarted.count(p))
|
||||
return;
|
||||
|
||||
Tick samp = curTick() - lastWorkItemStarted[p];
|
||||
DPRINTF(WorkItems, "Work item end: %d\t%d\t%lld\n", tid, workid, samp);
|
||||
|
||||
if (workid >= numWorkIds)
|
||||
fatal("Got workid greater than specified in system configuration\n");
|
||||
|
||||
workItemStats[workid]->sample(samp);
|
||||
lastWorkItemStarted.erase(p);
|
||||
}
|
||||
|
||||
void
|
||||
System::printSystems()
|
||||
{
|
||||
|
|
|
@ -157,9 +157,11 @@ class System : public SimObject
|
|||
Enums::MemoryMode memoryMode;
|
||||
uint64_t workItemsBegin;
|
||||
uint64_t workItemsEnd;
|
||||
uint32_t numWorkIds;
|
||||
std::vector<bool> activeCpus;
|
||||
|
||||
public:
|
||||
virtual void regStats();
|
||||
/**
|
||||
* Called by pseudo_inst to track the number of work items started by this
|
||||
* system.
|
||||
|
@ -198,6 +200,14 @@ class System : public SimObject
|
|||
return count;
|
||||
}
|
||||
|
||||
inline void workItemBegin(uint32_t tid, uint32_t workid)
|
||||
{
|
||||
std::pair<uint32_t,uint32_t> p(tid, workid);
|
||||
lastWorkItemStarted[p] = curTick();
|
||||
}
|
||||
|
||||
void workItemEnd(uint32_t tid, uint32_t workid);
|
||||
|
||||
/**
|
||||
* Fix up an address used to match PCs for hooking simulator
|
||||
* events on to target function executions. See comment in
|
||||
|
@ -285,6 +295,8 @@ class System : public SimObject
|
|||
public:
|
||||
Counter totalNumInsts;
|
||||
EventQueue instEventQueue;
|
||||
std::map<std::pair<uint32_t,uint32_t>, Tick> lastWorkItemStarted;
|
||||
std::map<uint32_t, Stats::Histogram*> workItemStats;
|
||||
|
||||
////////////////////////////////////////////
|
||||
//
|
||||
|
|
|
@ -500,7 +500,7 @@ egid=100
|
|||
env=
|
||||
errout=cerr
|
||||
euid=100
|
||||
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
gid=100
|
||||
input=cin
|
||||
max_stack_size=67108864
|
||||
|
|
|
@ -3,11 +3,10 @@ Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing/si
|
|||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
gem5 compiled Nov 16 2011 11:08:03
|
||||
gem5 started Nov 17 2011 13:09:16
|
||||
gem5 compiled Jan 9 2012 14:18:02
|
||||
gem5 started Jan 9 2012 14:29:08
|
||||
gem5 executing on ribera.cs.wisc.edu
|
||||
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing
|
||||
tests
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
spec_init
|
||||
|
|
|
@ -3,26 +3,26 @@
|
|||
sim_seconds 0.586294 # Number of seconds simulated
|
||||
sim_ticks 586294224000 # Number of ticks simulated
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 112274 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 40595683 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 244844 # Number of bytes of host memory used
|
||||
host_seconds 14442.28 # Real time elapsed on the host
|
||||
host_inst_rate 115446 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 41742717 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 244900 # Number of bytes of host memory used
|
||||
host_seconds 14045.43 # Real time elapsed on the host
|
||||
sim_insts 1621493982 # Number of instructions simulated
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.numCycles 1172588449 # number of cpu cycles simulated
|
||||
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.BPredUnit.lookups 142448983 # Number of BP lookups
|
||||
system.cpu.BPredUnit.condPredicted 142448983 # Number of conditional branches predicted
|
||||
system.cpu.BPredUnit.lookups 142448982 # Number of BP lookups
|
||||
system.cpu.BPredUnit.condPredicted 142448982 # Number of conditional branches predicted
|
||||
system.cpu.BPredUnit.condIncorrect 7804844 # Number of conditional branches incorrect
|
||||
system.cpu.BPredUnit.BTBLookups 134509889 # Number of BTB lookups
|
||||
system.cpu.BPredUnit.BTBLookups 134509888 # Number of BTB lookups
|
||||
system.cpu.BPredUnit.BTBHits 133615988 # Number of BTB hits
|
||||
system.cpu.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly.
|
||||
system.cpu.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target.
|
||||
system.cpu.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions.
|
||||
system.cpu.fetch.icacheStallCycles 143149229 # Number of cycles fetch is stalled on an Icache miss
|
||||
system.cpu.fetch.Insts 1143761055 # Number of instructions fetch has processed
|
||||
system.cpu.fetch.Branches 142448983 # Number of branches that fetch encountered
|
||||
system.cpu.fetch.Insts 1143761054 # Number of instructions fetch has processed
|
||||
system.cpu.fetch.Branches 142448982 # Number of branches that fetch encountered
|
||||
system.cpu.fetch.predictedBranches 133615988 # Number of branches that fetch has predicted taken
|
||||
system.cpu.fetch.Cycles 330199440 # Number of cycles fetch has run and was not squashing or blocked
|
||||
system.cpu.fetch.SquashCycles 57554993 # Number of cycles fetch has spent squashing
|
||||
|
@ -66,32 +66,32 @@ system.cpu.rename.RenamedInsts 2043122328 # Nu
|
|||
system.cpu.rename.ROBFullEvents 2634 # Number of times rename has blocked due to ROB full
|
||||
system.cpu.rename.IQFullEvents 278313629 # Number of times rename has blocked due to IQ full
|
||||
system.cpu.rename.LSQFullEvents 129499394 # Number of times rename has blocked due to LSQ full
|
||||
system.cpu.rename.RenamedOperands 2031527324 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RenameLookups 4954653616 # Number of register rename lookups that rename has made
|
||||
system.cpu.rename.int_rename_lookups 4954649396 # Number of integer rename lookups
|
||||
system.cpu.rename.RenamedOperands 2031527322 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RenameLookups 4954653611 # Number of register rename lookups that rename has made
|
||||
system.cpu.rename.int_rename_lookups 4954649391 # Number of integer rename lookups
|
||||
system.cpu.rename.fp_rename_lookups 4220 # Number of floating rename lookups
|
||||
system.cpu.rename.CommittedMaps 1617994650 # Number of HB maps that are committed
|
||||
system.cpu.rename.UndoneMaps 413532674 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.UndoneMaps 413532672 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.serializingInsts 91 # count of serializing insts renamed
|
||||
system.cpu.rename.tempSerializingInsts 91 # count of temporary serializing insts renamed
|
||||
system.cpu.rename.skidInsts 793190427 # count of insts added to the skid buffer
|
||||
system.cpu.memDep0.insertedLoads 519090632 # Number of loads inserted to the mem dependence unit.
|
||||
system.cpu.memDep0.insertedStores 226808407 # Number of stores inserted to the mem dependence unit.
|
||||
system.cpu.memDep0.conflictingLoads 354951645 # Number of conflicting loads.
|
||||
system.cpu.memDep0.conflictingStores 148937435 # Number of conflicting stores.
|
||||
system.cpu.iq.iqInstsAdded 1986583518 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqNonSpecInstsAdded 216 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu.iq.iqInstsIssued 1781630005 # Number of instructions issued
|
||||
system.cpu.memDep0.conflictingStores 148937436 # Number of conflicting stores.
|
||||
system.cpu.iq.iqInstsAdded 1986583516 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqNonSpecInstsAdded 218 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu.iq.iqInstsIssued 1781630004 # Number of instructions issued
|
||||
system.cpu.iq.iqSquashedInstsIssued 180825 # Number of squashed instructions issued
|
||||
system.cpu.iq.iqSquashedInstsExamined 364939190 # Number of squashed instructions iterated over during squash; mainly for profiling
|
||||
system.cpu.iq.iqSquashedOperandsExamined 670712331 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu.iq.iqSquashedNonSpecRemoved 166 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.iqSquashedOperandsExamined 670712329 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu.iq.iqSquashedNonSpecRemoved 168 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.issued_per_cycle::samples 1172439660 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::mean 1.519592 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::stdev 1.333662 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::underflows 0 0.00% 0.00% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::0 271921708 23.19% 23.19% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::1 416937500 35.56% 58.75% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::0 271921709 23.19% 23.19% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::1 416937499 35.56% 58.75% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::2 234725234 20.02% 78.77% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::3 156776493 13.37% 92.15% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::4 54385701 4.64% 96.79% # Number of insts issued each cycle
|
||||
|
@ -138,7 +138,7 @@ system.cpu.iq.fu_full::MemWrite 148998 5.73% 100.00% # at
|
|||
system.cpu.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu.iq.FU_type_0::No_OpClass 26894248 1.51% 1.51% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntAlu 1102052870 61.86% 63.37% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntAlu 1102052869 61.86% 63.37% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntMult 0 0.00% 63.37% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntDiv 0 0.00% 63.37% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::FloatAdd 0 0.00% 63.37% # Type of FU issued
|
||||
|
@ -171,17 +171,17 @@ system.cpu.iq.FU_type_0::MemRead 457985397 25.71% 89.07% # Ty
|
|||
system.cpu.iq.FU_type_0::MemWrite 194697490 10.93% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::total 1781630005 # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::total 1781630004 # Type of FU issued
|
||||
system.cpu.iq.rate 1.519399 # Inst issue rate
|
||||
system.cpu.iq.fu_busy_cnt 2598665 # FU busy when requested
|
||||
system.cpu.iq.fu_busy_rate 0.001459 # FU busy rate (busy events/executed inst)
|
||||
system.cpu.iq.int_inst_queue_reads 4738479065 # Number of integer instruction queue reads
|
||||
system.cpu.iq.int_inst_queue_reads 4738479063 # Number of integer instruction queue reads
|
||||
system.cpu.iq.int_inst_queue_writes 2351732069 # Number of integer instruction queue writes
|
||||
system.cpu.iq.int_inst_queue_wakeup_accesses 1760053766 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu.iq.int_inst_queue_wakeup_accesses 1760053765 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu.iq.fp_inst_queue_reads 95 # Number of floating instruction queue reads
|
||||
system.cpu.iq.fp_inst_queue_writes 542 # Number of floating instruction queue writes
|
||||
system.cpu.iq.fp_inst_queue_wakeup_accesses 12 # Number of floating instruction queue wakeup accesses
|
||||
system.cpu.iq.int_alu_accesses 1757334382 # Number of integer alu accesses
|
||||
system.cpu.iq.int_alu_accesses 1757334381 # Number of integer alu accesses
|
||||
system.cpu.iq.fp_alu_accesses 40 # Number of floating point alu accesses
|
||||
system.cpu.iew.lsq.thread0.forwLoads 205665909 # Number of loads that had data forwarded from stores
|
||||
system.cpu.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
|
||||
|
@ -208,7 +208,7 @@ system.cpu.iew.memOrderViolationEvents 216417 # Nu
|
|||
system.cpu.iew.predictedTakenIncorrect 4603219 # Number of branches that were predicted taken incorrectly
|
||||
system.cpu.iew.predictedNotTakenIncorrect 3388875 # Number of branches that were predicted not taken incorrectly
|
||||
system.cpu.iew.branchMispredicts 7992094 # Number of branch mispredicts detected at execute
|
||||
system.cpu.iew.iewExecutedInsts 1768232809 # Number of executed instructions
|
||||
system.cpu.iew.iewExecutedInsts 1768232808 # Number of executed instructions
|
||||
system.cpu.iew.iewExecLoadInsts 452047218 # Number of load instructions executed
|
||||
system.cpu.iew.iewExecSquashedInsts 13397196 # Number of squashed instructions skipped in execute
|
||||
system.cpu.iew.exec_swp 0 # number of swp insts executed
|
||||
|
@ -217,8 +217,8 @@ system.cpu.iew.exec_refs 645919458 # nu
|
|||
system.cpu.iew.exec_branches 112169596 # Number of branches executed
|
||||
system.cpu.iew.exec_stores 193872240 # Number of stores executed
|
||||
system.cpu.iew.exec_rate 1.507974 # Inst execution rate
|
||||
system.cpu.iew.wb_sent 1766226830 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.wb_count 1760053778 # cumulative count of insts written-back
|
||||
system.cpu.iew.wb_sent 1766226829 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.wb_count 1760053777 # cumulative count of insts written-back
|
||||
system.cpu.iew.wb_producers 1336567337 # num instructions producing a value
|
||||
system.cpu.iew.wb_consumers 2003494286 # num instructions consuming a value
|
||||
system.cpu.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
|
||||
|
@ -268,9 +268,9 @@ system.cpu.cpi_total 0.723153 # CP
|
|||
system.cpu.ipc 1.382833 # IPC: Instructions Per Cycle
|
||||
system.cpu.ipc_total 1.382833 # IPC: Total IPC of All Threads
|
||||
system.cpu.int_regfile_reads 3273039620 # number of integer regfile reads
|
||||
system.cpu.int_regfile_writes 1756091293 # number of integer regfile writes
|
||||
system.cpu.int_regfile_writes 1756091292 # number of integer regfile writes
|
||||
system.cpu.fp_regfile_reads 12 # number of floating regfile reads
|
||||
system.cpu.misc_regfile_reads 908871446 # number of misc regfile reads
|
||||
system.cpu.misc_regfile_reads 908871445 # number of misc regfile reads
|
||||
system.cpu.icache.replacements 12 # number of replacements
|
||||
system.cpu.icache.tagsinuse 810.394392 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 137025977 # Total number of references to valid blocks.
|
||||
|
|
|
@ -9,6 +9,7 @@ time_sync_spin_threshold=100000000
|
|||
type=System
|
||||
children=cpu membus physmem
|
||||
mem_mode=atomic
|
||||
memories=system.physmem
|
||||
physmem=system.physmem
|
||||
work_begin_ckpt_count=0
|
||||
work_begin_cpu_id_exit=-1
|
||||
|
@ -61,12 +62,12 @@ type=ExeTracer
|
|||
[system.cpu.workload]
|
||||
type=LiveProcess
|
||||
cmd=gzip input.log 1
|
||||
cwd=build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic
|
||||
cwd=build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic
|
||||
egid=100
|
||||
env=
|
||||
errout=cerr
|
||||
euid=100
|
||||
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
gid=100
|
||||
input=cin
|
||||
max_stack_size=67108864
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
warn: Sockets disabled, not accepting gdb connections
|
||||
For more information see: http://www.m5sim.org/warn/d946bea6
|
||||
warn: instruction 'fnstcw_Mw' unimplemented
|
||||
For more information see: http://www.m5sim.org/warn/437d5238
|
||||
warn: instruction 'fldcw_Mw' unimplemented
|
||||
For more information see: http://www.m5sim.org/warn/437d5238
|
||||
hack: be nice to actually delete the event here
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
M5 Simulator System
|
||||
Redirecting stdout to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic/simout
|
||||
Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic/simerr
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
Copyright (c) 2001-2008
|
||||
The Regents of The University of Michigan
|
||||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled Apr 19 2011 12:22:33
|
||||
M5 started Apr 19 2011 12:22:36
|
||||
M5 executing on maize
|
||||
command line: build/X86_SE/m5.fast -d build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic -re tests/run.py build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic
|
||||
gem5 compiled Jan 9 2012 14:18:02
|
||||
gem5 started Jan 9 2012 14:29:08
|
||||
gem5 executing on ribera.cs.wisc.edu
|
||||
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
spec_init
|
||||
|
|
|
@ -1,34 +1,34 @@
|
|||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
host_inst_rate 3280168 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 202508 # Number of bytes of host memory used
|
||||
host_seconds 494.33 # Real time elapsed on the host
|
||||
host_tick_rate 1950088412 # Simulator tick rate (ticks/s)
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 1621493983 # Number of instructions simulated
|
||||
sim_seconds 0.963993 # Number of seconds simulated
|
||||
sim_ticks 963992704000 # Number of ticks simulated
|
||||
system.cpu.idle_fraction 0 # Percentage of idle cycles
|
||||
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 1220339 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 725502264 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 234168 # Number of bytes of host memory used
|
||||
host_seconds 1328.72 # Real time elapsed on the host
|
||||
sim_insts 1621493983 # Number of instructions simulated
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.numCycles 1927985409 # number of cpu cycles simulated
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
|
||||
system.cpu.num_busy_cycles 1927985409 # Number of busy cycles
|
||||
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
|
||||
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
|
||||
system.cpu.num_fp_insts 0 # number of float instructions
|
||||
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
|
||||
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
|
||||
system.cpu.num_func_calls 0 # number of times a function call or return occured
|
||||
system.cpu.num_idle_cycles 0 # Number of idle cycles
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.num_insts 1621493983 # Number of instructions executed
|
||||
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
|
||||
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
|
||||
system.cpu.num_func_calls 0 # number of times a function call or return occured
|
||||
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
|
||||
system.cpu.num_int_insts 1621354493 # number of integer instructions
|
||||
system.cpu.num_fp_insts 0 # number of float instructions
|
||||
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
|
||||
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
|
||||
system.cpu.num_load_insts 419042125 # Number of load instructions
|
||||
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
|
||||
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
|
||||
system.cpu.num_mem_refs 607228182 # number of memory refs
|
||||
system.cpu.num_load_insts 419042125 # Number of load instructions
|
||||
system.cpu.num_store_insts 188186057 # Number of store instructions
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.num_idle_cycles 0 # Number of idle cycles
|
||||
system.cpu.num_busy_cycles 1927985409 # Number of busy cycles
|
||||
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
|
||||
system.cpu.idle_fraction 0 # Percentage of idle cycles
|
||||
|
||||
---------- End Simulation Statistics ----------
|
||||
|
|
|
@ -9,6 +9,7 @@ time_sync_spin_threshold=100000000
|
|||
type=System
|
||||
children=cpu membus physmem
|
||||
mem_mode=atomic
|
||||
memories=system.physmem
|
||||
physmem=system.physmem
|
||||
work_begin_ckpt_count=0
|
||||
work_begin_cpu_id_exit=-1
|
||||
|
@ -164,12 +165,12 @@ type=ExeTracer
|
|||
[system.cpu.workload]
|
||||
type=LiveProcess
|
||||
cmd=gzip input.log 1
|
||||
cwd=build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing
|
||||
cwd=build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing
|
||||
egid=100
|
||||
env=
|
||||
errout=cerr
|
||||
euid=100
|
||||
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
gid=100
|
||||
input=cin
|
||||
max_stack_size=67108864
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
warn: Sockets disabled, not accepting gdb connections
|
||||
For more information see: http://www.m5sim.org/warn/d946bea6
|
||||
warn: instruction 'fnstcw_Mw' unimplemented
|
||||
For more information see: http://www.m5sim.org/warn/437d5238
|
||||
warn: instruction 'fldcw_Mw' unimplemented
|
||||
For more information see: http://www.m5sim.org/warn/437d5238
|
||||
hack: be nice to actually delete the event here
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
M5 Simulator System
|
||||
Redirecting stdout to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing/simout
|
||||
Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing/simerr
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
Copyright (c) 2001-2008
|
||||
The Regents of The University of Michigan
|
||||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled Apr 19 2011 12:22:33
|
||||
M5 started Apr 19 2011 12:23:09
|
||||
M5 executing on maize
|
||||
command line: build/X86_SE/m5.fast -d build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing -re tests/run.py build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing
|
||||
gem5 compiled Jan 9 2012 14:18:02
|
||||
gem5 started Jan 9 2012 14:29:08
|
||||
gem5 executing on ribera.cs.wisc.edu
|
||||
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
spec_init
|
||||
|
|
|
@ -1,223 +1,223 @@
|
|||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
host_inst_rate 2023797 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 210248 # Number of bytes of host memory used
|
||||
host_seconds 801.21 # Real time elapsed on the host
|
||||
host_tick_rate 2250658484 # Simulator tick rate (ticks/s)
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 1621493983 # Number of instructions simulated
|
||||
sim_seconds 1.803259 # Number of seconds simulated
|
||||
sim_ticks 1803258587000 # Number of ticks simulated
|
||||
system.cpu.dcache.ReadReq_accesses 419042125 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 20490.305383 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 17490.305383 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 418844799 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 4043270000 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.000471 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_misses 197326 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 3451292000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.000471 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_misses 197326 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.WriteReq_accesses 188186057 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 23997.572756 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 20997.572756 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_hits 187941335 # number of WriteReq hits
|
||||
system.cpu.dcache.WriteReq_miss_latency 5872734000 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.001300 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_misses 244722 # number of WriteReq misses
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 5138568000 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.001300 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 244722 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_refs 1372.670239 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.demand_accesses 607228182 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 22431.962140 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 606786134 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 9916004000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.000728 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_misses 442048 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 8589860000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.000728 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_misses 442048 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.occ_blocks::0 4094.896939 # Average occupied blocks per context
|
||||
system.cpu.dcache.occ_percent::0 0.999731 # Average percentage of cache occupancy
|
||||
system.cpu.dcache.overall_accesses 607228182 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 22431.962140 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.overall_hits 606786134 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 9916004000 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.000728 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_misses 442048 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 8589860000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.000728 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_misses 442048 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.dcache.replacements 437952 # number of replacements
|
||||
system.cpu.dcache.sampled_refs 442048 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.tagsinuse 4094.896939 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 606786134 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 778540000 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.writebacks 396372 # number of writebacks
|
||||
system.cpu.icache.ReadReq_accesses 1186516740 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 56000 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 53000 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 1186516018 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 40432000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.000001 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 722 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 38266000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.000001 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_misses 722 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_refs 1643373.986150 # Average number of references to valid blocks.
|
||||
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.demand_accesses 1186516740 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 56000 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 53000 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 1186516018 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 40432000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.000001 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 722 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 38266000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.000001 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_misses 722 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.icache.occ_blocks::0 660.186297 # Average occupied blocks per context
|
||||
system.cpu.icache.occ_percent::0 0.322357 # Average percentage of cache occupancy
|
||||
system.cpu.icache.overall_accesses 1186516740 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 56000 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 53000 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.icache.overall_hits 1186516018 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 40432000 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.000001 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 722 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 38266000 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.000001 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_misses 722 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 760773 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 846053445 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 242892 # Number of bytes of host memory used
|
||||
host_seconds 2131.38 # Real time elapsed on the host
|
||||
sim_insts 1621493983 # Number of instructions simulated
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.numCycles 3606517174 # number of cpu cycles simulated
|
||||
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.num_insts 1621493983 # Number of instructions executed
|
||||
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
|
||||
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
|
||||
system.cpu.num_func_calls 0 # number of times a function call or return occured
|
||||
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
|
||||
system.cpu.num_int_insts 1621354493 # number of integer instructions
|
||||
system.cpu.num_fp_insts 0 # number of float instructions
|
||||
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
|
||||
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
|
||||
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
|
||||
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
|
||||
system.cpu.num_mem_refs 607228182 # number of memory refs
|
||||
system.cpu.num_load_insts 419042125 # Number of load instructions
|
||||
system.cpu.num_store_insts 188186057 # Number of store instructions
|
||||
system.cpu.num_idle_cycles 0 # Number of idle cycles
|
||||
system.cpu.num_busy_cycles 3606517174 # Number of busy cycles
|
||||
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
|
||||
system.cpu.idle_fraction 0 # Percentage of idle cycles
|
||||
system.cpu.icache.replacements 4 # number of replacements
|
||||
system.cpu.icache.sampled_refs 722 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.tagsinuse 660.186297 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 1186516018 # Total number of references to valid blocks.
|
||||
system.cpu.icache.sampled_refs 722 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.avg_refs 1643373.986150 # Average number of references to valid blocks.
|
||||
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.icache.occ_blocks::0 660.186297 # Average occupied blocks per context
|
||||
system.cpu.icache.occ_percent::0 0.322357 # Average percentage of cache occupancy
|
||||
system.cpu.icache.ReadReq_hits 1186516018 # number of ReadReq hits
|
||||
system.cpu.icache.demand_hits 1186516018 # number of demand (read+write) hits
|
||||
system.cpu.icache.overall_hits 1186516018 # number of overall hits
|
||||
system.cpu.icache.ReadReq_misses 722 # number of ReadReq misses
|
||||
system.cpu.icache.demand_misses 722 # number of demand (read+write) misses
|
||||
system.cpu.icache.overall_misses 722 # number of overall misses
|
||||
system.cpu.icache.ReadReq_miss_latency 40432000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.demand_miss_latency 40432000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.overall_miss_latency 40432000 # number of overall miss cycles
|
||||
system.cpu.icache.ReadReq_accesses 1186516740 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.demand_accesses 1186516740 # number of demand (read+write) accesses
|
||||
system.cpu.icache.overall_accesses 1186516740 # number of overall (read+write) accesses
|
||||
system.cpu.icache.ReadReq_miss_rate 0.000001 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.demand_miss_rate 0.000001 # miss rate for demand accesses
|
||||
system.cpu.icache.overall_miss_rate 0.000001 # miss rate for overall accesses
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 56000 # average ReadReq miss latency
|
||||
system.cpu.icache.demand_avg_miss_latency 56000 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_miss_latency 56000 # average overall miss latency
|
||||
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.writebacks 0 # number of writebacks
|
||||
system.cpu.idle_fraction 0 # Percentage of idle cycles
|
||||
system.cpu.l2cache.ReadExReq_accesses 244722 # number of ReadExReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52000 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_hits 186469 # number of ReadExReq hits
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 3029156000 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_rate 0.238037 # miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_misses 58253 # number of ReadExReq misses
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 2330120000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_rate 0.238037 # mshr miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_mshr_misses 58253 # number of ReadExReq MSHR misses
|
||||
system.cpu.l2cache.ReadReq_accesses 198048 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52000 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40000 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_hits 166833 # number of ReadReq hits
|
||||
system.cpu.l2cache.ReadReq_miss_latency 1623180000 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.157613 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_misses 31215 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 1248600000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.157613 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 31215 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.Writeback_accesses 396372 # number of Writeback accesses(hits+misses)
|
||||
system.cpu.l2cache.Writeback_hits 396372 # number of Writeback hits
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_misses 722 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.demand_mshr_misses 722 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.overall_mshr_misses 722 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 38266000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_latency 38266000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_latency 38266000 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.000001 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.000001 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.000001 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 53000 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 53000 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 53000 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.replacements 437952 # number of replacements
|
||||
system.cpu.dcache.tagsinuse 4094.896939 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 606786134 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.sampled_refs 442048 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.avg_refs 1372.670239 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 778540000 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.occ_blocks::0 4094.896939 # Average occupied blocks per context
|
||||
system.cpu.dcache.occ_percent::0 0.999731 # Average percentage of cache occupancy
|
||||
system.cpu.dcache.ReadReq_hits 418844799 # number of ReadReq hits
|
||||
system.cpu.dcache.WriteReq_hits 187941335 # number of WriteReq hits
|
||||
system.cpu.dcache.demand_hits 606786134 # number of demand (read+write) hits
|
||||
system.cpu.dcache.overall_hits 606786134 # number of overall hits
|
||||
system.cpu.dcache.ReadReq_misses 197326 # number of ReadReq misses
|
||||
system.cpu.dcache.WriteReq_misses 244722 # number of WriteReq misses
|
||||
system.cpu.dcache.demand_misses 442048 # number of demand (read+write) misses
|
||||
system.cpu.dcache.overall_misses 442048 # number of overall misses
|
||||
system.cpu.dcache.ReadReq_miss_latency 4043270000 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_latency 5872734000 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.demand_miss_latency 9916004000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.overall_miss_latency 9916004000 # number of overall miss cycles
|
||||
system.cpu.dcache.ReadReq_accesses 419042125 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_accesses 188186057 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.demand_accesses 607228182 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.overall_accesses 607228182 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.000471 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.001300 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.demand_miss_rate 0.000728 # miss rate for demand accesses
|
||||
system.cpu.dcache.overall_miss_rate 0.000728 # miss rate for overall accesses
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 20490.305383 # average ReadReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 23997.572756 # average WriteReq miss latency
|
||||
system.cpu.dcache.demand_avg_miss_latency 22431.962140 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_miss_latency 22431.962140 # average overall miss latency
|
||||
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.writebacks 396372 # number of writebacks
|
||||
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.dcache.ReadReq_mshr_misses 197326 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 244722 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.demand_mshr_misses 442048 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.overall_mshr_misses 442048 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 3451292000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 5138568000 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_latency 8589860000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_latency 8589860000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.000471 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.001300 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.000728 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.000728 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 17490.305383 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 20997.572756 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.replacements 71208 # number of replacements
|
||||
system.cpu.l2cache.tagsinuse 18056.923092 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 423014 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.sampled_refs 86793 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.avg_refs 4.873826 # Average number of references to valid blocks.
|
||||
system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.demand_accesses 442770 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52000 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40000 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_hits 353302 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.demand_miss_latency 4652336000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_rate 0.202064 # miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_misses 89468 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 3578720000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.202064 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_mshr_misses 89468 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.occ_blocks::0 1869.199731 # Average occupied blocks per context
|
||||
system.cpu.l2cache.occ_blocks::1 16187.723361 # Average occupied blocks per context
|
||||
system.cpu.l2cache.occ_percent::0 0.057043 # Average percentage of cache occupancy
|
||||
system.cpu.l2cache.occ_percent::1 0.494010 # Average percentage of cache occupancy
|
||||
system.cpu.l2cache.ReadReq_hits 166833 # number of ReadReq hits
|
||||
system.cpu.l2cache.Writeback_hits 396372 # number of Writeback hits
|
||||
system.cpu.l2cache.ReadExReq_hits 186469 # number of ReadExReq hits
|
||||
system.cpu.l2cache.demand_hits 353302 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.overall_hits 353302 # number of overall hits
|
||||
system.cpu.l2cache.ReadReq_misses 31215 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadExReq_misses 58253 # number of ReadExReq misses
|
||||
system.cpu.l2cache.demand_misses 89468 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.overall_misses 89468 # number of overall misses
|
||||
system.cpu.l2cache.ReadReq_miss_latency 1623180000 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 3029156000 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.demand_miss_latency 4652336000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.overall_miss_latency 4652336000 # number of overall miss cycles
|
||||
system.cpu.l2cache.ReadReq_accesses 198048 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.Writeback_accesses 396372 # number of Writeback accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadExReq_accesses 244722 # number of ReadExReq accesses(hits+misses)
|
||||
system.cpu.l2cache.demand_accesses 442770 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.overall_accesses 442770 # number of overall (read+write) accesses
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.157613 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadExReq_miss_rate 0.238037 # miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.demand_miss_rate 0.202064 # miss rate for demand accesses
|
||||
system.cpu.l2cache.overall_miss_rate 0.202064 # miss rate for overall accesses
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52000 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52000 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52000 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52000 # average overall miss latency
|
||||
system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.writebacks 58007 # number of writebacks
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 31215 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.ReadExReq_mshr_misses 58253 # number of ReadExReq MSHR misses
|
||||
system.cpu.l2cache.demand_mshr_misses 89468 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_misses 89468 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 1248600000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 2330120000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 3578720000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 3578720000 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.157613 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_rate 0.238037 # mshr miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.202064 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.202064 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40000 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40000 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 40000 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.l2cache.overall_hits 353302 # number of overall hits
|
||||
system.cpu.l2cache.overall_miss_latency 4652336000 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_rate 0.202064 # miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_misses 89468 # number of overall misses
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 3578720000 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.202064 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_mshr_misses 89468 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.l2cache.replacements 71208 # number of replacements
|
||||
system.cpu.l2cache.sampled_refs 86793 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.l2cache.tagsinuse 18056.923092 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 423014 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.writebacks 58007 # number of writebacks
|
||||
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
|
||||
system.cpu.numCycles 3606517174 # number of cpu cycles simulated
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
|
||||
system.cpu.num_busy_cycles 3606517174 # Number of busy cycles
|
||||
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
|
||||
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
|
||||
system.cpu.num_fp_insts 0 # number of float instructions
|
||||
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
|
||||
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
|
||||
system.cpu.num_func_calls 0 # number of times a function call or return occured
|
||||
system.cpu.num_idle_cycles 0 # Number of idle cycles
|
||||
system.cpu.num_insts 1621493983 # Number of instructions executed
|
||||
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
|
||||
system.cpu.num_int_insts 1621354493 # number of integer instructions
|
||||
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
|
||||
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
|
||||
system.cpu.num_load_insts 419042125 # Number of load instructions
|
||||
system.cpu.num_mem_refs 607228182 # number of memory refs
|
||||
system.cpu.num_store_insts 188186057 # Number of store instructions
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
|
||||
---------- End Simulation Statistics ----------
|
||||
|
|
|
@ -9,18 +9,19 @@ time_sync_spin_threshold=100000000
|
|||
type=LinuxArmSystem
|
||||
children=bridge cf0 cpu0 cpu1 intrctrl iobus iocache l2c membus nvmem physmem realview terminal toL2Bus vncserver
|
||||
boot_cpu_frequency=500
|
||||
boot_loader=/projects/pd/randd/dist/binaries/boot.arm
|
||||
boot_loader=/dist/m5/system/binaries/boot.arm
|
||||
boot_loader_mem=system.nvmem
|
||||
boot_osflags=earlyprintk console=ttyAMA0 lpj=19988480 norandmaps rw loglevel=8 mem=128MB root=/dev/sda1
|
||||
flags_addr=268435504
|
||||
gic_cpu_addr=520093952
|
||||
init_param=0
|
||||
kernel=/projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
kernel=/dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
load_addr_mask=268435455
|
||||
machine_type=RealView_PBX
|
||||
mem_mode=timing
|
||||
memories=system.nvmem system.physmem
|
||||
midr_regval=890224640
|
||||
num_work_ids=16
|
||||
physmem=system.physmem
|
||||
readfile=tests/halt.sh
|
||||
symbolfile=
|
||||
|
@ -63,7 +64,7 @@ table_size=65536
|
|||
|
||||
[system.cf0.image.child]
|
||||
type=RawDiskImage
|
||||
image_file=/projects/pd/randd/dist/disks/linux-arm-ael.img
|
||||
image_file=/dist/m5/system/disks/linux-arm-ael.img
|
||||
read_only=true
|
||||
|
||||
[system.cpu0]
|
||||
|
@ -1495,6 +1496,7 @@ port=system.l2c.cpu_side system.cpu0.icache.mem_side system.cpu0.dcache.mem_side
|
|||
|
||||
[system.vncserver]
|
||||
type=VncServer
|
||||
frame_capture=false
|
||||
number=0
|
||||
port=5900
|
||||
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
Redirecting stdout to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual/simout
|
||||
Redirecting stderr to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual/simerr
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
gem5 compiled Nov 21 2011 16:32:34
|
||||
gem5 started Nov 22 2011 02:00:14
|
||||
gem5 executing on u200540-lin
|
||||
gem5 compiled Jan 8 2012 22:12:58
|
||||
gem5 started Jan 9 2012 03:33:38
|
||||
gem5 executing on zizzer
|
||||
command line: build/ARM_FS/gem5.opt -d build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual -re tests/run.py build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: kernel located at: /projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
info: kernel located at: /dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
info: Using bootloader at address 0x80000000
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
Exiting @ tick 2582494395500 because m5_exit instruction encountered
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
sim_seconds 2.582494 # Number of seconds simulated
|
||||
sim_ticks 2582494395500 # Number of ticks simulated
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 86259 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 2789337609 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 380504 # Number of bytes of host memory used
|
||||
host_seconds 925.85 # Real time elapsed on the host
|
||||
host_inst_rate 65512 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 2118472138 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 384260 # Number of bytes of host memory used
|
||||
host_seconds 1219.04 # Real time elapsed on the host
|
||||
sim_insts 79862069 # Number of instructions simulated
|
||||
system.l2c.replacements 132200 # number of replacements
|
||||
system.l2c.tagsinuse 27582.989225 # Cycle average of tags in use
|
||||
|
@ -312,12 +312,12 @@ system.cpu0.rename.ROBFullEvents 1483 # Nu
|
|||
system.cpu0.rename.IQFullEvents 580883 # Number of times rename has blocked due to IQ full
|
||||
system.cpu0.rename.LSQFullEvents 3149232 # Number of times rename has blocked due to LSQ full
|
||||
system.cpu0.rename.FullRegisterEvents 205 # Number of times there has been no free registers
|
||||
system.cpu0.rename.RenamedOperands 54779836 # Number of destination operands rename has renamed
|
||||
system.cpu0.rename.RenamedOperands 54779837 # Number of destination operands rename has renamed
|
||||
system.cpu0.rename.RenameLookups 247536349 # Number of register rename lookups that rename has made
|
||||
system.cpu0.rename.int_rename_lookups 247487579 # Number of integer rename lookups
|
||||
system.cpu0.rename.fp_rename_lookups 48770 # Number of floating rename lookups
|
||||
system.cpu0.rename.CommittedMaps 41441157 # Number of HB maps that are committed
|
||||
system.cpu0.rename.UndoneMaps 13338678 # Number of HB maps that are undone due to squashing
|
||||
system.cpu0.rename.UndoneMaps 13338679 # Number of HB maps that are undone due to squashing
|
||||
system.cpu0.rename.serializingInsts 828868 # count of serializing insts renamed
|
||||
system.cpu0.rename.tempSerializingInsts 763855 # count of temporary serializing insts renamed
|
||||
system.cpu0.rename.skidInsts 8500592 # count of insts added to the skid buffer
|
||||
|
@ -325,13 +325,13 @@ system.cpu0.memDep0.insertedLoads 11770384 # Nu
|
|||
system.cpu0.memDep0.insertedStores 7686805 # Number of stores inserted to the mem dependence unit.
|
||||
system.cpu0.memDep0.conflictingLoads 1443183 # Number of conflicting loads.
|
||||
system.cpu0.memDep0.conflictingStores 1570137 # Number of conflicting stores.
|
||||
system.cpu0.iq.iqInstsAdded 50961906 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu0.iq.iqNonSpecInstsAdded 1297751 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu0.iq.iqInstsIssued 80276175 # Number of instructions issued
|
||||
system.cpu0.iq.iqInstsAdded 50961905 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu0.iq.iqNonSpecInstsAdded 1297752 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu0.iq.iqInstsIssued 80276174 # Number of instructions issued
|
||||
system.cpu0.iq.iqSquashedInstsIssued 137636 # Number of squashed instructions issued
|
||||
system.cpu0.iq.iqSquashedInstsExamined 9888896 # Number of squashed instructions iterated over during squash; mainly for profiling
|
||||
system.cpu0.iq.iqSquashedOperandsExamined 22816025 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu0.iq.iqSquashedNonSpecRemoved 253323 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu0.iq.iqSquashedNonSpecRemoved 253324 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu0.iq.issued_per_cycle::samples 109741052 # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::mean 0.731505 # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::stdev 1.440076 # Number of insts issued each cycle
|
||||
|
@ -340,8 +340,8 @@ system.cpu0.iq.issued_per_cycle::0 80125799 73.01% 73.01% # Nu
|
|||
system.cpu0.iq.issued_per_cycle::1 10111373 9.21% 82.23% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::2 4133530 3.77% 85.99% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::3 3177611 2.90% 88.89% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::4 9954077 9.07% 97.96% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::5 1265280 1.15% 99.11% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::4 9954078 9.07% 97.96% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::5 1265279 1.15% 99.11% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::6 670333 0.61% 99.72% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::7 224189 0.20% 99.93% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::8 78860 0.07% 100.00% # Number of insts issued each cycle
|
||||
|
@ -384,7 +384,7 @@ system.cpu0.iq.fu_full::MemWrite 285533 3.56% 100.00% # at
|
|||
system.cpu0.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu0.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu0.iq.FU_type_0::No_OpClass 88461 0.11% 0.11% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IntAlu 29731482 37.04% 37.15% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IntAlu 29731481 37.04% 37.15% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IntMult 62351 0.08% 37.22% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IntDiv 0 0.00% 37.22% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::FloatAdd 0 0.00% 37.22% # Type of FU issued
|
||||
|
@ -417,17 +417,17 @@ system.cpu0.iq.FU_type_0::MemRead 43135014 53.73% 90.96% # Ty
|
|||
system.cpu0.iq.FU_type_0::MemWrite 7257159 9.04% 100.00% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::total 80276175 # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::total 80276174 # Type of FU issued
|
||||
system.cpu0.iq.rate 0.227757 # Inst issue rate
|
||||
system.cpu0.iq.fu_busy_cnt 8028360 # FU busy when requested
|
||||
system.cpu0.iq.fu_busy_rate 0.100009 # FU busy rate (busy events/executed inst)
|
||||
system.cpu0.iq.int_inst_queue_reads 278513866 # Number of integer instruction queue reads
|
||||
system.cpu0.iq.int_inst_queue_reads 278513864 # Number of integer instruction queue reads
|
||||
system.cpu0.iq.int_inst_queue_writes 62161443 # Number of integer instruction queue writes
|
||||
system.cpu0.iq.int_inst_queue_wakeup_accesses 46668616 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu0.iq.int_inst_queue_wakeup_accesses 46668615 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu0.iq.fp_inst_queue_reads 11568 # Number of floating instruction queue reads
|
||||
system.cpu0.iq.fp_inst_queue_writes 6980 # Number of floating instruction queue writes
|
||||
system.cpu0.iq.fp_inst_queue_wakeup_accesses 5172 # Number of floating instruction queue wakeup accesses
|
||||
system.cpu0.iq.int_alu_accesses 88210043 # Number of integer alu accesses
|
||||
system.cpu0.iq.int_alu_accesses 88210042 # Number of integer alu accesses
|
||||
system.cpu0.iq.fp_alu_accesses 6031 # Number of floating point alu accesses
|
||||
system.cpu0.iew.lsq.thread0.forwLoads 399886 # Number of loads that had data forwarded from stores
|
||||
system.cpu0.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
|
||||
|
@ -447,14 +447,14 @@ system.cpu0.iew.iewDispatchedInsts 52433539 # Nu
|
|||
system.cpu0.iew.iewDispSquashedInsts 243567 # Number of squashed instructions skipped by dispatch
|
||||
system.cpu0.iew.iewDispLoadInsts 11770384 # Number of dispatched load instructions
|
||||
system.cpu0.iew.iewDispStoreInsts 7686805 # Number of dispatched store instructions
|
||||
system.cpu0.iew.iewDispNonSpecInsts 865739 # Number of dispatched non-speculative instructions
|
||||
system.cpu0.iew.iewDispNonSpecInsts 865740 # Number of dispatched non-speculative instructions
|
||||
system.cpu0.iew.iewIQFullEvents 62160 # Number of times the IQ has become full, causing a stall
|
||||
system.cpu0.iew.iewLSQFullEvents 5553 # Number of times the LSQ has become full, causing a stall
|
||||
system.cpu0.iew.memOrderViolationEvents 20554 # Number of memory order violations
|
||||
system.cpu0.iew.predictedTakenIncorrect 507509 # Number of branches that were predicted taken incorrectly
|
||||
system.cpu0.iew.predictedNotTakenIncorrect 136100 # Number of branches that were predicted not taken incorrectly
|
||||
system.cpu0.iew.branchMispredicts 643609 # Number of branch mispredicts detected at execute
|
||||
system.cpu0.iew.iewExecutedInsts 79551296 # Number of executed instructions
|
||||
system.cpu0.iew.iewExecutedInsts 79551295 # Number of executed instructions
|
||||
system.cpu0.iew.iewExecLoadInsts 42843907 # Number of load instructions executed
|
||||
system.cpu0.iew.iewExecSquashedInsts 724879 # Number of squashed instructions skipped in execute
|
||||
system.cpu0.iew.exec_swp 0 # number of swp insts executed
|
||||
|
@ -463,8 +463,8 @@ system.cpu0.iew.exec_refs 50011427 # nu
|
|||
system.cpu0.iew.exec_branches 6433542 # Number of branches executed
|
||||
system.cpu0.iew.exec_stores 7167520 # Number of stores executed
|
||||
system.cpu0.iew.exec_rate 0.225700 # Inst execution rate
|
||||
system.cpu0.iew.wb_sent 79133798 # cumulative count of insts sent to commit
|
||||
system.cpu0.iew.wb_count 46673788 # cumulative count of insts written-back
|
||||
system.cpu0.iew.wb_sent 79133797 # cumulative count of insts sent to commit
|
||||
system.cpu0.iew.wb_count 46673787 # cumulative count of insts written-back
|
||||
system.cpu0.iew.wb_producers 24793926 # num instructions producing a value
|
||||
system.cpu0.iew.wb_consumers 46078393 # num instructions consuming a value
|
||||
system.cpu0.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
|
||||
|
@ -514,8 +514,8 @@ system.cpu0.cpi 8.431852 # CP
|
|||
system.cpu0.cpi_total 8.431852 # CPI: Total CPI of All Threads
|
||||
system.cpu0.ipc 0.118598 # IPC: Instructions Per Cycle
|
||||
system.cpu0.ipc_total 0.118598 # IPC: Total IPC of All Threads
|
||||
system.cpu0.int_regfile_reads 354175082 # number of integer regfile reads
|
||||
system.cpu0.int_regfile_writes 46137252 # number of integer regfile writes
|
||||
system.cpu0.int_regfile_reads 354175079 # number of integer regfile reads
|
||||
system.cpu0.int_regfile_writes 46137251 # number of integer regfile writes
|
||||
system.cpu0.fp_regfile_reads 4205 # number of floating regfile reads
|
||||
system.cpu0.fp_regfile_writes 1348 # number of floating regfile writes
|
||||
system.cpu0.misc_regfile_reads 65629786 # number of misc regfile reads
|
||||
|
|
Binary file not shown.
|
@ -9,18 +9,19 @@ time_sync_spin_threshold=100000000
|
|||
type=LinuxArmSystem
|
||||
children=bridge cf0 cpu intrctrl iobus iocache l2c membus nvmem physmem realview terminal toL2Bus vncserver
|
||||
boot_cpu_frequency=500
|
||||
boot_loader=/projects/pd/randd/dist/binaries/boot.arm
|
||||
boot_loader=/dist/m5/system/binaries/boot.arm
|
||||
boot_loader_mem=system.nvmem
|
||||
boot_osflags=earlyprintk console=ttyAMA0 lpj=19988480 norandmaps rw loglevel=8 mem=128MB root=/dev/sda1
|
||||
flags_addr=268435504
|
||||
gic_cpu_addr=520093952
|
||||
init_param=0
|
||||
kernel=/projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
kernel=/dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
load_addr_mask=268435455
|
||||
machine_type=RealView_PBX
|
||||
mem_mode=timing
|
||||
memories=system.nvmem system.physmem
|
||||
midr_regval=890224640
|
||||
num_work_ids=16
|
||||
physmem=system.physmem
|
||||
readfile=tests/halt.sh
|
||||
symbolfile=
|
||||
|
@ -63,7 +64,7 @@ table_size=65536
|
|||
|
||||
[system.cf0.image.child]
|
||||
type=RawDiskImage
|
||||
image_file=/projects/pd/randd/dist/disks/linux-arm-ael.img
|
||||
image_file=/dist/m5/system/disks/linux-arm-ael.img
|
||||
read_only=true
|
||||
|
||||
[system.cpu]
|
||||
|
@ -1041,6 +1042,7 @@ port=system.l2c.cpu_side system.cpu.icache.mem_side system.cpu.dcache.mem_side s
|
|||
|
||||
[system.vncserver]
|
||||
type=VncServer
|
||||
frame_capture=false
|
||||
number=0
|
||||
port=5900
|
||||
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
Redirecting stdout to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3/simout
|
||||
Redirecting stderr to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3/simerr
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
gem5 compiled Nov 21 2011 16:32:34
|
||||
gem5 started Nov 22 2011 02:00:08
|
||||
gem5 executing on u200540-lin
|
||||
gem5 compiled Jan 8 2012 22:12:58
|
||||
gem5 started Jan 9 2012 03:32:35
|
||||
gem5 executing on zizzer
|
||||
command line: build/ARM_FS/gem5.opt -d build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3 -re tests/run.py build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: kernel located at: /projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
info: kernel located at: /dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
info: Using bootloader at address 0x80000000
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
Exiting @ tick 2503566110500 because m5_exit instruction encountered
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
sim_seconds 2.503566 # Number of seconds simulated
|
||||
sim_ticks 2503566110500 # Number of ticks simulated
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 84156 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 2743719152 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 380536 # Number of bytes of host memory used
|
||||
host_seconds 912.47 # Real time elapsed on the host
|
||||
host_inst_rate 72389 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 2360079964 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 384076 # Number of bytes of host memory used
|
||||
host_seconds 1060.80 # Real time elapsed on the host
|
||||
sim_insts 76790007 # Number of instructions simulated
|
||||
system.l2c.replacements 119509 # number of replacements
|
||||
system.l2c.tagsinuse 25929.897253 # Cycle average of tags in use
|
||||
|
@ -270,12 +270,12 @@ system.cpu.rename.ROBFullEvents 4400 # Nu
|
|||
system.cpu.rename.IQFullEvents 914485 # Number of times rename has blocked due to IQ full
|
||||
system.cpu.rename.LSQFullEvents 3979731 # Number of times rename has blocked due to LSQ full
|
||||
system.cpu.rename.FullRegisterEvents 42252 # Number of times there has been no free registers
|
||||
system.cpu.rename.RenamedOperands 118358542 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RenamedOperands 118358543 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RenameLookups 523323093 # Number of register rename lookups that rename has made
|
||||
system.cpu.rename.int_rename_lookups 523225639 # Number of integer rename lookups
|
||||
system.cpu.rename.fp_rename_lookups 97454 # Number of floating rename lookups
|
||||
system.cpu.rename.CommittedMaps 77492718 # Number of HB maps that are committed
|
||||
system.cpu.rename.UndoneMaps 40865823 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.UndoneMaps 40865824 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.serializingInsts 1204637 # count of serializing insts renamed
|
||||
system.cpu.rename.tempSerializingInsts 1098724 # count of temporary serializing insts renamed
|
||||
system.cpu.rename.skidInsts 12304657 # count of insts added to the skid buffer
|
||||
|
@ -283,13 +283,13 @@ system.cpu.memDep0.insertedLoads 21982315 # Nu
|
|||
system.cpu.memDep0.insertedStores 14168730 # Number of stores inserted to the mem dependence unit.
|
||||
system.cpu.memDep0.conflictingLoads 1896802 # Number of conflicting loads.
|
||||
system.cpu.memDep0.conflictingStores 2281380 # Number of conflicting stores.
|
||||
system.cpu.iq.iqInstsAdded 102860212 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqNonSpecInstsAdded 1874615 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu.iq.iqInstsIssued 126873317 # Number of instructions issued
|
||||
system.cpu.iq.iqInstsAdded 102860211 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqNonSpecInstsAdded 1874616 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu.iq.iqInstsIssued 126873316 # Number of instructions issued
|
||||
system.cpu.iq.iqSquashedInstsIssued 252471 # Number of squashed instructions issued
|
||||
system.cpu.iq.iqSquashedInstsExamined 26973483 # Number of squashed instructions iterated over during squash; mainly for profiling
|
||||
system.cpu.iq.iqSquashedOperandsExamined 72956952 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu.iq.iqSquashedNonSpecRemoved 374922 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.iqSquashedNonSpecRemoved 374923 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.issued_per_cycle::samples 155542524 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::mean 0.815683 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::stdev 1.505358 # Number of insts issued each cycle
|
||||
|
@ -298,8 +298,8 @@ system.cpu.iq.issued_per_cycle::0 108919716 70.03% 70.03% # Nu
|
|||
system.cpu.iq.issued_per_cycle::1 15115277 9.72% 79.74% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::2 7538109 4.85% 84.59% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::3 6517896 4.19% 88.78% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::4 12766128 8.21% 96.99% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::5 2735747 1.76% 98.75% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::4 12766129 8.21% 96.99% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::5 2735746 1.76% 98.75% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::6 1395145 0.90% 99.64% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::7 422031 0.27% 99.91% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::8 132475 0.09% 100.00% # Number of insts issued each cycle
|
||||
|
@ -342,7 +342,7 @@ system.cpu.iq.fu_full::MemWrite 436630 4.91% 100.00% # at
|
|||
system.cpu.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu.iq.FU_type_0::No_OpClass 106530 0.08% 0.08% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntAlu 60069483 47.35% 47.43% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntAlu 60069482 47.35% 47.43% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntMult 96615 0.08% 47.51% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntDiv 0 0.00% 47.51% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::FloatAdd 0 0.00% 47.51% # Type of FU issued
|
||||
|
@ -375,17 +375,17 @@ system.cpu.iq.FU_type_0::MemRead 53942685 42.52% 90.02% # Ty
|
|||
system.cpu.iq.FU_type_0::MemWrite 12655733 9.98% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::total 126873317 # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::total 126873316 # Type of FU issued
|
||||
system.cpu.iq.rate 0.305048 # Inst issue rate
|
||||
system.cpu.iq.fu_busy_cnt 8900311 # FU busy when requested
|
||||
system.cpu.iq.fu_busy_rate 0.070151 # FU busy rate (busy events/executed inst)
|
||||
system.cpu.iq.int_inst_queue_reads 418533130 # Number of integer instruction queue reads
|
||||
system.cpu.iq.int_inst_queue_reads 418533128 # Number of integer instruction queue reads
|
||||
system.cpu.iq.int_inst_queue_writes 131726191 # Number of integer instruction queue writes
|
||||
system.cpu.iq.int_inst_queue_wakeup_accesses 87292109 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu.iq.int_inst_queue_wakeup_accesses 87292108 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu.iq.fp_inst_queue_reads 24017 # Number of floating instruction queue reads
|
||||
system.cpu.iq.fp_inst_queue_writes 13690 # Number of floating instruction queue writes
|
||||
system.cpu.iq.fp_inst_queue_wakeup_accesses 10446 # Number of floating instruction queue wakeup accesses
|
||||
system.cpu.iq.int_alu_accesses 135654306 # Number of integer alu accesses
|
||||
system.cpu.iq.int_alu_accesses 135654305 # Number of integer alu accesses
|
||||
system.cpu.iq.fp_alu_accesses 12792 # Number of floating point alu accesses
|
||||
system.cpu.iew.lsq.thread0.forwLoads 614767 # Number of loads that had data forwarded from stores
|
||||
system.cpu.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
|
||||
|
@ -405,14 +405,14 @@ system.cpu.iew.iewDispatchedInsts 104949442 # Nu
|
|||
system.cpu.iew.iewDispSquashedInsts 473979 # Number of squashed instructions skipped by dispatch
|
||||
system.cpu.iew.iewDispLoadInsts 21982315 # Number of dispatched load instructions
|
||||
system.cpu.iew.iewDispStoreInsts 14168730 # Number of dispatched store instructions
|
||||
system.cpu.iew.iewDispNonSpecInsts 1228030 # Number of dispatched non-speculative instructions
|
||||
system.cpu.iew.iewDispNonSpecInsts 1228031 # Number of dispatched non-speculative instructions
|
||||
system.cpu.iew.iewIQFullEvents 85187 # Number of times the IQ has become full, causing a stall
|
||||
system.cpu.iew.iewLSQFullEvents 7556 # Number of times the LSQ has become full, causing a stall
|
||||
system.cpu.iew.memOrderViolationEvents 32657 # Number of memory order violations
|
||||
system.cpu.iew.predictedTakenIncorrect 850397 # Number of branches that were predicted taken incorrectly
|
||||
system.cpu.iew.predictedNotTakenIncorrect 257130 # Number of branches that were predicted not taken incorrectly
|
||||
system.cpu.iew.branchMispredicts 1107527 # Number of branch mispredicts detected at execute
|
||||
system.cpu.iew.iewExecutedInsts 123429780 # Number of executed instructions
|
||||
system.cpu.iew.iewExecutedInsts 123429779 # Number of executed instructions
|
||||
system.cpu.iew.iewExecLoadInsts 52914304 # Number of load instructions executed
|
||||
system.cpu.iew.iewExecSquashedInsts 3443537 # Number of squashed instructions skipped in execute
|
||||
system.cpu.iew.exec_swp 0 # number of swp insts executed
|
||||
|
@ -421,8 +421,8 @@ system.cpu.iew.exec_refs 65401525 # nu
|
|||
system.cpu.iew.exec_branches 11705842 # Number of branches executed
|
||||
system.cpu.iew.exec_stores 12487221 # Number of stores executed
|
||||
system.cpu.iew.exec_rate 0.296769 # Inst execution rate
|
||||
system.cpu.iew.wb_sent 121771134 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.wb_count 87302555 # cumulative count of insts written-back
|
||||
system.cpu.iew.wb_sent 121771133 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.wb_count 87302554 # cumulative count of insts written-back
|
||||
system.cpu.iew.wb_producers 47043389 # num instructions producing a value
|
||||
system.cpu.iew.wb_consumers 86638668 # num instructions consuming a value
|
||||
system.cpu.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
|
||||
|
@ -472,8 +472,8 @@ system.cpu.cpi 5.416227 # CP
|
|||
system.cpu.cpi_total 5.416227 # CPI: Total CPI of All Threads
|
||||
system.cpu.ipc 0.184630 # IPC: Instructions Per Cycle
|
||||
system.cpu.ipc_total 0.184630 # IPC: Total IPC of All Threads
|
||||
system.cpu.int_regfile_reads 559625789 # number of integer regfile reads
|
||||
system.cpu.int_regfile_writes 89694790 # number of integer regfile writes
|
||||
system.cpu.int_regfile_reads 559625786 # number of integer regfile reads
|
||||
system.cpu.int_regfile_writes 89694789 # number of integer regfile writes
|
||||
system.cpu.fp_regfile_reads 8322 # number of floating regfile reads
|
||||
system.cpu.fp_regfile_writes 2832 # number of floating regfile writes
|
||||
system.cpu.misc_regfile_reads 137256850 # number of misc regfile reads
|
||||
|
|
Binary file not shown.
|
@ -15,10 +15,11 @@ e820_table=system.e820_table
|
|||
init_param=0
|
||||
intel_mp_pointer=system.intel_mp_pointer
|
||||
intel_mp_table=system.intel_mp_table
|
||||
kernel=/projects/pd/randd/dist/binaries/x86_64-vmlinux-2.6.22.9
|
||||
kernel=/scratch/nilay/GEM5/system/binaries/x86_64-vmlinux-2.6.22.9
|
||||
load_addr_mask=18446744073709551615
|
||||
mem_mode=timing
|
||||
memories=system.physmem
|
||||
num_work_ids=16
|
||||
physmem=system.physmem
|
||||
readfile=tests/halt.sh
|
||||
smbios_table=system.smbios_table
|
||||
|
@ -1301,7 +1302,7 @@ table_size=65536
|
|||
|
||||
[system.pc.south_bridge.ide.disks0.image.child]
|
||||
type=RawDiskImage
|
||||
image_file=/projects/pd/randd/dist/disks/linux-x86.img
|
||||
image_file=/scratch/nilay/GEM5/system/disks/linux-x86.img
|
||||
read_only=true
|
||||
|
||||
[system.pc.south_bridge.ide.disks1]
|
||||
|
@ -1321,7 +1322,7 @@ table_size=65536
|
|||
|
||||
[system.pc.south_bridge.ide.disks1.image.child]
|
||||
type=RawDiskImage
|
||||
image_file=/projects/pd/randd/dist/disks/linux-bigswap2.img
|
||||
image_file=/scratch/nilay/GEM5/system/disks/linux-bigswap2.img
|
||||
read_only=true
|
||||
|
||||
[system.pc.south_bridge.int_lines0]
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
gem5 compiled Nov 21 2011 16:24:08
|
||||
gem5 started Nov 21 2011 23:30:30
|
||||
gem5 executing on u200540-lin
|
||||
command line: build/X86_FS/gem5.opt -d build/X86_FS/tests/opt/long/10.linux-boot/x86/linux/pc-o3-timing -re tests/run.py build/X86_FS/tests/opt/long/10.linux-boot/x86/linux/pc-o3-timing
|
||||
gem5 compiled Jan 9 2012 20:47:38
|
||||
gem5 started Jan 9 2012 21:13:16
|
||||
gem5 executing on ribera.cs.wisc.edu
|
||||
command line: build/X86_FS/gem5.fast -d build/X86_FS/tests/fast/long/10.linux-boot/x86/linux/pc-o3-timing -re tests/run.py build/X86_FS/tests/fast/long/10.linux-boot/x86/linux/pc-o3-timing
|
||||
warning: add_child('terminal'): child 'terminal' already has parent
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: kernel located at: /projects/pd/randd/dist/binaries/x86_64-vmlinux-2.6.22.9
|
||||
0: rtc: Real-time clock set to Sun Jan 1 00:00:00 2012
|
||||
info: kernel located at: /scratch/nilay/GEM5/system/binaries/x86_64-vmlinux-2.6.22.9
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
Exiting @ tick 5145286546500 because m5_exit instruction encountered
|
||||
Exiting @ tick 5161177988500 because m5_exit instruction encountered
|
||||
|
|
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue