Totally re-do/reorganize the python part of the statistics code

Make the database creation/removal/cleanup code use python
Make formulas work with the database
Add support to do some graphing, but needs more work
Still need to work on vectors, 2d vectors, dists and vectordists

--HG--
extra : convert_revision : 1a88320dcc036a3751e8a036770766dce76a568c
This commit is contained in:
Nathan Binkert 2004-08-09 21:20:52 -04:00
parent 2c5356835d
commit 2771abb6ed
7 changed files with 2292 additions and 0 deletions

415
util/stats/db.py Normal file
View file

@ -0,0 +1,415 @@
import MySQLdb, re, string
def statcmp(a, b):
v1 = a.split('.')
v2 = b.split('.')
last = min(len(v1), len(v2)) - 1
for i,j in zip(v1[0:last], v2[0:last]):
if i != j:
return cmp(i, j)
# Special compare for last element.
if len(v1) == len(v2):
return cmp(v1[last], v2[last])
else:
return cmp(len(v1), len(v2))
class RunData:
def __init__(self, row):
self.run = int(row[0])
self.name = row[1]
self.user = row[2]
self.project = row[3]
class SubData:
def __init__(self, row):
self.stat = int(row[0])
self.x = int(row[1])
self.y = int(row[2])
self.name = row[3]
self.descr = row[4]
class Data:
def __init__(self, row):
if len(row) != 5:
raise 'stat db error'
self.stat = int(row[0])
self.run = int(row[1])
self.x = int(row[2])
self.y = int(row[3])
self.data = float(row[4])
def __repr__(self):
return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
self.run, self.x, self.y, self.data)
class StatData(object):
def __init__(self, row):
self.stat = int(row[0])
self.name = row[1]
self.desc = row[2]
self.type = row[3]
self.prereq = int(row[5])
self.precision = int(row[6])
import flags
self.flags = 0
if int(row[4]): self.flags |= flags.printable
if int(row[7]): self.flags |= flags.nozero
if int(row[8]): self.flags |= flags.nonan
if int(row[9]): self.flags |= flags.total
if int(row[10]): self.flags |= flags.pdf
if int(row[11]): self.flags |= flags.cdf
if self.type == 'DIST' or self.type == 'VECTORDIST':
self.min = float(row[12])
self.max = float(row[13])
self.bktsize = float(row[14])
self.size = int(row[15])
if self.type == 'FORMULA':
self.formula = self.db.allFormulas[self.stat]
class Node(object):
def __init__(self, name):
self.name = name
def __str__(self):
return name
class Database(object):
def __init__(self):
self.host = 'zizzer.pool'
self.user = ''
self.passwd = ''
self.db = 'm5stats'
self.cursor = None
self.allStats = []
self.allStatIds = {}
self.allStatNames = {}
self.allSubData = {}
self.allRuns = []
self.allRunIds = {}
self.allRunNames = {}
self.allBins = []
self.allBinIds = {}
self.allBinNames = {}
self.allFormulas = {}
self.stattop = {}
self.statdict = {}
self.statlist = []
self.mode = 'sum';
self.runs = None
self.bins = None
self.ticks = None
self.__dict__['get'] = type(self).sum
def query(self, sql):
self.cursor.execute(sql)
def update_dict(self, dict):
dict.update(self.stattop)
def append(self, stat):
statname = re.sub(':', '__', stat.name)
path = string.split(statname, '.')
pathtop = path[0]
fullname = ''
x = self
while len(path) > 1:
name = path.pop(0)
if not x.__dict__.has_key(name):
x.__dict__[name] = Node(fullname + name)
x = x.__dict__[name]
fullname = '%s%s.' % (fullname, name)
name = path.pop(0)
x.__dict__[name] = stat
self.stattop[pathtop] = self.__dict__[pathtop]
self.statdict[statname] = stat
self.statlist.append(statname)
def connect(self):
# connect
self.thedb = MySQLdb.connect(db=self.db,
host=self.host,
user=self.user,
passwd=self.passwd)
# create a cursor
self.cursor = self.thedb.cursor()
self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
from runs''')
for result in self.cursor.fetchall():
run = RunData(result);
self.allRuns.append(run)
self.allRunIds[run.run] = run
self.allRunNames[run.name] = run
self.query('select * from bins')
for id,name in self.cursor.fetchall():
self.allBinIds[int(id)] = name
self.allBinNames[name] = int(id)
self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
for result in self.cursor.fetchall():
subdata = SubData(result)
if self.allSubData.has_key(subdata.stat):
self.allSubData[subdata.stat].append(subdata)
else:
self.allSubData[subdata.stat] = [ subdata ]
self.query('select * from formulas')
for id,formula in self.cursor.fetchall():
self.allFormulas[int(id)] = formula
StatData.db = self
self.query('select * from stats')
import info
for result in self.cursor.fetchall():
stat = info.NewStat(StatData(result))
self.append(stat)
self.allStats.append(stat)
self.allStatIds[stat.stat] = stat
self.allStatNames[stat.name] = stat
# Name: listbins
# Desc: Prints all bins matching regex argument, if no argument
# is given all bins are returned
def listBins(self, regex='.*'):
print '%-50s %-10s' % ('bin name', 'id')
print '-' * 61
names = self.allBinNames.keys()
names.sort()
for name in names:
id = self.allBinNames[name]
print '%-50s %-10d' % (name, id)
# Name: listruns
# Desc: Prints all runs matching a given user, if no argument
# is given all runs are returned
def listRuns(self, user=None):
print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
print '-' * 62
for run in self.allRuns:
if user == None or user == run.user:
print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
# Name: listTicks
# Desc: Prints all samples for a given run
def listTicks(self, run=None):
print "tick"
print "----------------------------------------"
sql = 'select distinct dt_tick from data where dt_stat=1950'
#if run != None:
# sql += ' where dt_run=%d' % run
self.query(sql)
for r in self.cursor.fetchall():
print r[0]
# Name: liststats
# Desc: Prints all statistics that appear in the database,
# the optional argument is a regular expression that can
# be used to prune the result set
def listStats(self, regex=None):
print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
print '-' * 80
rx = None
if regex != None:
rx = re.compile(regex)
stats = [ stat.name for stat in self.allStats ]
stats.sort(statcmp)
for stat in stats:
stat = self.allStatNames[stat]
if rx == None or rx.match(stat.name):
print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
# Name: liststats
# Desc: Prints all statistics that appear in the database,
# the optional argument is a regular expression that can
# be used to prune the result set
def listFormulas(self, regex=None):
print '%-60s %s' % ('formula name', 'formula')
print '-' * 80
rx = None
if regex != None:
rx = re.compile(regex)
stats = [ stat.name for stat in self.allStats ]
stats.sort(statcmp)
for stat in stats:
stat = self.allStatNames[stat]
if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
def getStat(self, stats):
if type(stats) is not list:
stats = [ stats ]
ret = []
for stat in stats:
if type(stat) is int:
ret.append(self.allStatIds[stat])
if type(stat) is str:
rx = re.compile(stat)
for stat in self.allStats:
if rx.match(stat.name):
ret.append(stat)
return ret
def getBin(self, bins):
if type(bins) is not list:
bins = [ bins ]
ret = []
for bin in bins:
if type(bin) is int:
ret.append(bin)
elif type(bin) is str:
ret.append(self.allBinNames[bin])
else:
for name,id in self.allBinNames.items():
if bin.match(name):
ret.append(id)
return ret
def getNotBin(self, bin):
map = {}
for bin in getBin(bin):
map[bin] = 1
ret = []
for bin in self.allBinIds.keys():
if not map.has_key(bin):
ret.append(bin)
return ret
#########################################
# get the data
#
def inner(self, op, stat, bins, ticks, group=False):
sql = 'select '
sql += 'dt_stat as stat, '
sql += 'dt_run as run, '
sql += 'dt_x as x, '
sql += 'dt_y as y, '
if group:
sql += 'dt_tick as tick, '
sql += '%s(dt_data) as data ' % op
sql += 'from data '
sql += 'where '
if isinstance(stat, list):
val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
sql += ' (%s)' % val
else:
sql += ' dt_stat=%d' % stat.stat
if self.runs != None and len(self.runs):
val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
sql += ' and (%s)' % val
if bins != None and len(bins):
val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
sql += ' and (%s)' % val
if ticks != None and len(ticks):
val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
sql += ' and (%s)' % val
sql += ' group by dt_stat,dt_run,dt_x,dt_y'
if group:
sql += ',dt_tick'
return sql
def outer(self, op_out, op_in, stat, bins, ticks):
sql = self.inner(op_in, stat, bins, ticks, True)
sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
sql += 'group by stat,run,x,y'
return sql
# Name: sum
# Desc: given a run, a stat and an array of samples and bins,
# sum all the bins and then get the standard deviation of the
# samples for non-binned runs. This will just return the average
# of samples, however a bin array still must be passed
def sum(self, stat, bins, ticks):
return self.inner('sum', stat, bins, ticks)
# Name: avg
# Desc: given a run, a stat and an array of samples and bins,
# sum all the bins and then average the samples for non-binned
# runs this will just return the average of samples, however
# a bin array still must be passed
def avg(self, stat, bins, ticks):
return self.outer('avg', 'sum', stat, bins, ticks)
# Name: stdev
# Desc: given a run, a stat and an array of samples and bins,
# sum all the bins and then get the standard deviation of the
# samples for non-binned runs. This will just return the average
# of samples, however a bin array still must be passed
def stdev(self, stat, bins, ticks):
return self.outer('stddev', 'sum', stat, bins, ticks)
def __getattribute__(self, attr):
if attr != 'get':
return super(Database, self).__getattribute__(attr)
if self.__dict__['get'] == type(self).sum:
return 'sum'
elif self.__dict__['get'] == type(self).avg:
return 'avg'
elif self.__dict__['get'] == type(self).stdev:
return 'stdev'
else:
return ''
def __setattr__(self, attr, value):
if attr != 'get':
super(Database, self).__setattr__(attr, value)
return
if value == 'sum':
self.__dict__['get'] = type(self).sum
elif value == 'avg':
self.__dict__['get'] = type(self).avg
elif value == 'stdev':
self.__dict__['get'] = type(self).stdev
else:
raise AttributeError, "can only set get to: sum | avg | stdev"
def data(self, stat, bins=None, ticks=None):
if bins is None:
bins = self.bins
if ticks is None:
ticks = self.ticks
sql = self.__dict__['get'](self, stat, bins, ticks)
self.query(sql)
runs = {}
for x in self.cursor.fetchall():
data = Data(x)
if not runs.has_key(data.run):
runs[data.run] = {}
if not runs[data.run].has_key(data.x):
runs[data.run][data.x] = {}
runs[data.run][data.x][data.y] = data.data
return runs

388
util/stats/dbinit.py Normal file
View file

@ -0,0 +1,388 @@
import MySQLdb
class MyDB(object):
def __init__(self, options):
self.name = options.db
self.host = options.host
self.user = options.user
self.passwd = options.passwd
self.mydb = None
self.cursor = None
def admin(self):
self.close()
self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
passwd=self.passwd)
self.cursor = self.mydb.cursor()
def connect(self):
self.close()
self.mydb = MySQLdb.connect(db=self.name, host=self.host,
user=self.user, passwd=self.passwd)
self.cursor = self.mydb.cursor()
def close(self):
if self.mydb is not None:
self.mydb.close()
self.cursor = None
def query(self, sql):
self.cursor.execute(sql)
def drop(self):
self.query('DROP DATABASE IF EXISTS %s' % self.name)
def create(self):
self.query('CREATE DATABASE %s' % self.name)
def populate(self):
#
# Each run (or simulation) gets its own entry in the runs table to
# group stats by where they were generated
#
# COLUMNS:
# 'id' is a unique identifier for each run to be used in other
# tables.
# 'name' is the user designated name for the data generated. It is
# configured in the simulator.
# 'user' identifies the user that generated the data for the given
# run.
# 'project' another name to identify runs for a specific goal
# 'date' is a timestamp for when the data was generated. It can be
# used to easily expire data that was generated in the past.
# 'expire' is a timestamp for when the data should be removed from
# the database so we don't have years worth of junk.
#
# INDEXES:
# 'run' is indexed so you can find out details of a run if the run
# was retreived from the data table.
# 'name' is indexed so that two all run names are forced to be unique
#
self.query('''
CREATE TABLE runs(
rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
rn_name VARCHAR(200) NOT NULL,
rn_sample VARCHAR(32) NOT NULL,
rn_user VARCHAR(32) NOT NULL,
rn_project VARCHAR(100) NOT NULL,
rn_date TIMESTAMP NOT NULL,
rn_expire TIMESTAMP NOT NULL,
PRIMARY KEY (rn_id),
UNIQUE (rn_name,rn_sample)
) TYPE=InnoDB''')
#
# We keep the bin names separate so that the data table doesn't get
# huge since bin names are frequently repeated.
#
# COLUMNS:
# 'id' is the unique bin identifer.
# 'name' is the string name for the bin.
#
# INDEXES:
# 'bin' is indexed to get the name of a bin when data is retrieved
# via the data table.
# 'name' is indexed to get the bin id for a named bin when you want
# to search the data table based on a specific bin.
#
self.query('''
CREATE TABLE bins(
bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
bn_name VARCHAR(255) NOT NULL,
PRIMARY KEY(bn_id),
UNIQUE (bn_name)
) TYPE=InnoDB''')
#
# The stat table gives us all of the data for a particular stat.
#
# COLUMNS:
# 'stat' is a unique identifier for each stat to be used in other
# tables for references.
# 'name' is simply the simulator derived name for a given
# statistic.
# 'descr' is the description of the statistic and what it tells
# you.
# 'type' defines what the stat tells you. Types are:
# SCALAR: A simple scalar statistic that holds one value
# VECTOR: An array of statistic values. Such a something that
# is generated per-thread. Vectors exist to give averages,
# pdfs, cdfs, means, standard deviations, etc across the
# stat values.
# DIST: Is a distribution of data. When the statistic value is
# sampled, its value is counted in a particular bucket.
# Useful for keeping track of utilization of a resource.
# (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
# VECTORDIST: Can be used when the distribution needs to be
# factored out into a per-thread distribution of data for
# example. It can still be summed across threads to find
# the total distribution.
# VECTOR2D: Can be used when you have a stat that is not only
# per-thread, but it is per-something else. Like
# per-message type.
# FORMULA: This statistic is a formula, and its data must be
# looked up in the formula table, for indicating how to
# present its values.
# 'subdata' is potentially used by any of the vector types to
# give a specific name to all of the data elements within a
# stat.
# 'print' indicates whether this stat should be printed ever.
# (Unnamed stats don't usually get printed)
# 'prereq' only print the stat if the prereq is not zero.
# 'prec' number of decimal places to print
# 'nozero' don't print zero values
# 'nonan' don't print NaN values
# 'total' for vector type stats, print the total.
# 'pdf' for vector type stats, print the pdf.
# 'cdf' for vector type stats, print the cdf.
#
# The Following are for dist type stats:
# 'min' is the minimum bucket value. Anything less is an underflow.
# 'max' is the maximum bucket value. Anything more is an overflow.
# 'bktsize' is the approximate number of entries in each bucket.
# 'size' is the number of buckets. equal to (min/max)/bktsize.
#
# INDEXES:
# 'stat' is indexed so that you can find out details about a stat
# if the stat id was retrieved from the data table.
# 'name' is indexed so that you can simply look up data about a
# named stat.
#
self.query('''
CREATE TABLE stats(
st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
st_name VARCHAR(255) NOT NULL,
st_descr TEXT NOT NULL,
st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
"VECTOR2D", "FORMULA") NOT NULL,
st_print BOOL NOT NULL,
st_prereq SMALLINT UNSIGNED NOT NULL,
st_prec TINYINT NOT NULL,
st_nozero BOOL NOT NULL,
st_nonan BOOL NOT NULL,
st_total BOOL NOT NULL,
st_pdf BOOL NOT NULL,
st_cdf BOOL NOT NULL,
st_min DOUBLE NOT NULL,
st_max DOUBLE NOT NULL,
st_bktsize DOUBLE NOT NULL,
st_size SMALLINT UNSIGNED NOT NULL,
PRIMARY KEY (st_id),
UNIQUE (st_name)
) TYPE=InnoDB''')
#
# This is the main table of data for stats.
#
# COLUMNS:
# 'stat' refers to the stat field given in the stat table.
#
# 'x' referrs to the first dimension of a multi-dimensional stat. For
# a vector, x will start at 0 and increase for each vector
# element.
# For a distribution:
# -1: sum (for calculating standard deviation)
# -2: sum of squares (for calculating standard deviation)
# -3: total number of samples taken (for calculating
# standard deviation)
# -4: minimum value
# -5: maximum value
# -6: underflow
# -7: overflow
# 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
# dimension.
# 'run' is the run that the data was generated from. Details up in
# the run table
# 'tick' is a timestamp generated by the simulator.
# 'bin' is the name of the bin that the data was generated in, if
# any.
# 'data' is the actual stat value.
#
# INDEXES:
# 'stat' is indexed so that a user can find all of the data for a
# particular stat. It is not unique, because that specific stat
# can be found in many runs, bins, and samples, in addition to
# having entries for the mulidimensional cases.
# 'run' is indexed to allow a user to remove all of the data for a
# particular execution run. It can also be used to allow the
# user to print out all of the data for a given run.
#
self.query('''
CREATE TABLE data(
dt_stat SMALLINT UNSIGNED NOT NULL,
dt_x SMALLINT NOT NULL,
dt_y SMALLINT NOT NULL,
dt_run SMALLINT UNSIGNED NOT NULL,
dt_tick BIGINT UNSIGNED NOT NULL,
dt_bin SMALLINT UNSIGNED NOT NULL,
dt_data DOUBLE NOT NULL,
INDEX (dt_stat),
INDEX (dt_run),
UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
) TYPE=InnoDB;''')
#
# Names and descriptions for multi-dimensional stats (vectors, etc.)
# are stored here instead of having their own entry in the statistics
# table. This allows all parts of a single stat to easily share a
# single id.
#
# COLUMNS:
# 'stat' is the unique stat identifier from the stat table.
# 'x' is the first dimension for multi-dimensional stats
# corresponding to the data table above.
# 'y' is the second dimension for multi-dimensional stats
# corresponding to the data table above.
# 'name' is the specific subname for the unique stat,x,y combination.
# 'descr' is the specific description for the uniqe stat,x,y
# combination.
#
# INDEXES:
# 'stat' is indexed so you can get the subdata for a specific stat.
#
self.query('''
CREATE TABLE subdata(
sd_stat SMALLINT UNSIGNED NOT NULL,
sd_x SMALLINT NOT NULL,
sd_y SMALLINT NOT NULL,
sd_name VARCHAR(255) NOT NULL,
sd_descr TEXT,
UNIQUE (sd_stat,sd_x,sd_y)
) TYPE=InnoDB''')
#
# The formula table is maintained separately from the data table
# because formula data, unlike other stat data cannot be represented
# there.
#
# COLUMNS:
# 'stat' refers to the stat field generated in the stat table.
# 'formula' is the actual string representation of the formula
# itself.
#
# INDEXES:
# 'stat' is indexed so that you can just look up a formula.
#
self.query('''
CREATE TABLE formulas(
fm_stat SMALLINT UNSIGNED NOT NULL,
fm_formula BLOB NOT NULL,
PRIMARY KEY(fm_stat)
) TYPE=InnoDB''')
#
# Each stat used in each formula is kept in this table. This way, if
# you want to print out a particular formula, you can simply find out
# which stats you need by looking in this table. Additionally, when
# you remove a stat from the stats table and data table, you remove
# any references to the formula in this table. When a formula is no
# longer referred to, you remove its entry.
#
# COLUMNS:
# 'stat' is the stat id from the stat table above.
# 'child' is the stat id of a stat that is used for this formula.
# There may be many children for any given 'stat' (formula)
#
# INDEXES:
# 'stat' is indexed so you can look up all of the children for a
# particular stat.
# 'child' is indexed so that you can remove an entry when a stat is
# removed.
#
self.query('''
CREATE TABLE formula_ref(
fr_stat SMALLINT UNSIGNED NOT NULL,
fr_run SMALLINT UNSIGNED NOT NULL,
UNIQUE (fr_stat,fr_run),
INDEX (fr_stat),
INDEX (fr_run)
) TYPE=InnoDB''')
# COLUMNS:
# 'event' is the unique event id from the event_desc table
# 'run' is simulation run id that this event took place in
# 'tick' is the tick when the event happened
#
# INDEXES:
# 'event' is indexed so you can look up all occurences of a
# specific event
# 'run' is indexed so you can find all events in a run
# 'tick' is indexed because we want the unique thing anyway
# 'event,run,tick' is unique combination
self.query('''
CREATE TABLE events(
ev_event SMALLINT UNSIGNED NOT NULL,
ev_run SMALLINT UNSIGNED NOT NULL,
ev_tick BIGINT UNSIGNED NOT NULL,
INDEX(ev_event),
INDEX(ev_run),
INDEX(ev_tick),
UNIQUE(ev_event,ev_run,ev_tick)
) TYPE=InnoDB''')
# COLUMNS:
# 'id' is the unique description id
# 'name' is the name of the event that occurred
#
# INDEXES:
# 'id' is indexed because it is the primary key and is what you use
# to look up the descriptions
# 'name' is indexed so one can find the event based on name
#
self.query('''
CREATE TABLE event_names(
en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
en_name VARCHAR(255) NOT NULL,
PRIMARY KEY (en_id),
UNIQUE (en_name)
) TYPE=InnoDB''')
def clean(self):
self.query('''
DELETE data
FROM data
LEFT JOIN runs ON dt_run=rn_id
WHERE rn_id IS NULL''')
self.query('''
DELETE formula_ref
FROM formula_ref
LEFT JOIN runs ON fr_run=rn_id
WHERE rn_id IS NULL''')
self.query('''
DELETE formulas
FROM formulas
LEFT JOIN formula_ref ON fm_stat=fr_stat
WHERE fr_stat IS NULL''')
self.query('''
DELETE stats
FROM stats
LEFT JOIN data ON st_id=dt_stat
WHERE dt_stat IS NULL''')
self.query('''
DELETE subdata
FROM subdata
LEFT JOIN data ON sd_stat=dt_stat
WHERE dt_stat IS NULL''')
self.query('''
DELETE bins
FROM bins
LEFT JOIN data ON bn_id=dt_bin
WHERE dt_bin IS NULL''')
self.query('''
DELETE events
FROM events
LEFT JOIN runs ON ev_run=rn_id
WHERE rn_id IS NULL''')
self.query('''
DELETE event_names
FROM event_names
LEFT JOIN events ON en_id=ev_event
WHERE ev_event IS NULL''')

124
util/stats/display.py Normal file
View file

@ -0,0 +1,124 @@
class Value:
def __init__(self, value, precision, percent = False):
self.value = value
self.precision = precision
self.percent = percent
def __str__(self):
if isinstance(self.value, str):
if self.value.lower() == 'nan':
value = 'NaN'
if self.value.lower() == 'inf':
value = 'Inf'
else:
if self.precision >= 0:
format = "%%.%df" % self.precision
elif self.value == 0.0:
format = "%.0f"
elif self.value % 1.0 == 0.0:
format = "%.0f"
else:
format = "%f"
value = self.value
if self.percent:
value = value * 100.0
value = format % value
if self.percent:
value = value + "%"
return value
class Print:
def __init__(self, **vals):
self.__dict__.update(vals)
def __str__(self):
value = Value(self.value, self.precision)
pdf = ''
cdf = ''
if self.__dict__.has_key('pdf'):
pdf = Value(self.pdf, 2, True)
if self.__dict__.has_key('cdf'):
cdf = Value(self.cdf, 2, True)
output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf)
if descriptions and self.__dict__.has_key('desc') and self.desc:
output = "%s # %s" % (output, self.desc)
return output
def doprint(self):
if display_all:
return True
if self.value == 0.0 and (self.flags & flags_nozero):
return False
if isinstance(self.value, str):
if self.value == 'NaN' and (self.flags & flags_nonan):
return False
return True
def display(self):
if self.doprint():
print self
class VectorDisplay:
def display(self):
p = Print()
p.flags = self.flags
p.precision = self.precision
if issequence(self.value):
if not len(self.value):
return
mytotal = reduce(lambda x,y: float(x) + float(y), self.value)
mycdf = 0.0
value = self.value
if display_all:
subnames = [ '[%d]' % i for i in range(len(value)) ]
else:
subnames = [''] * len(value)
if self.__dict__.has_key('subnames'):
for i,each in enumerate(self.subnames):
if len(each) > 0:
subnames[i] = '.%s' % each
subdescs = [self.desc]*len(value)
if self.__dict__.has_key('subdescs'):
for i in xrange(min(len(value), len(self.subdescs))):
subdescs[i] = self.subdescs[i]
for val,sname,sdesc in map(None, value, subnames, subdescs):
if mytotal > 0.0:
mypdf = float(val) / float(mytotal)
mycdf += mypdf
if (self.flags & flags_pdf):
p.pdf = mypdf
p.cdf = mycdf
if len(sname) == 0:
continue
p.name = self.name + sname
p.desc = sdesc
p.value = val
p.display()
if (self.flags & flags_total):
if (p.__dict__.has_key('pdf')): del p.__dict__['pdf']
if (p.__dict__.has_key('cdf')): del p.__dict__['cdf']
p.name = self.name + '.total'
p.desc = self.desc
p.value = mytotal
p.display()
else:
p.name = self.name
p.desc = self.desc
p.value = self.value
p.display()

36
util/stats/flags.py Normal file
View file

@ -0,0 +1,36 @@
# Copyright (c) 2004 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
init = 0x00000001
printable = 0x00000002
total = 0x00000010
pdf = 0x00000020
cdf = 0x00000040
dist = 0x00000080
nozero = 0x00000100
nonan = 0x00000200

724
util/stats/info.py Normal file
View file

@ -0,0 +1,724 @@
from __future__ import division
import operator, re, types
source = None
display_run = 0
def issequence(t):
return isinstance(t, types.TupleType) or isinstance(t, types.ListType)
def total(f):
if isinstance(f, FormulaStat):
v = f.value
else:
v = f
f = FormulaStat()
if issequence(v):
f.value = reduce(operator.add, v)
else:
f.value = v
return f
def unaryop(op, f):
if isinstance(f, FormulaStat):
v = f.value
else:
v = f
if issequence(v):
return map(op, v)
else:
return op(v)
def zerodiv(lv, rv):
if rv == 0.0:
return 0.0
else:
return operator.truediv(lv, rv)
def wrapop(op, lv, rv):
if isinstance(lv, str):
return lv
if isinstance(rv, str):
return rv
return op(lv, rv)
def same(lv, rv):
for lrun,rrun in zip(lv.keys(),rv.keys()):
if lrun != rrun:
print 'lrun != rrun'
print lrun, rrun
print lv.keys()
print rv.keys()
return False
for lx,rx in zip(lv[lrun].keys(),rv[rrun].keys()):
if lx != rx:
print 'lx != rx'
print lx, rx
print lv[lrun].keys()
print rv[rrun].keys()
return False
for ly,ry in zip(lv[lrun][lx].keys(),rv[rrun][rx].keys()):
if ly != ry:
print 'ly != ry'
print ly, ry
print lv[lrun][lx].keys()
print rv[rrun][rx].keys()
return False
return True
def binaryop(op, lf, rf):
result = {}
if isinstance(lf, FormulaStat) and isinstance(rf, FormulaStat):
lv = lf.value
rv = rf.value
if not same(lv, rv):
raise AttributeError, "run,x,y not identical"
for run in lv.keys():
result[run] = {}
for x in lv[run].keys():
result[run][x] = {}
for y in lv[run][x].keys():
result[run][x][y] = wrapop(op, lv[run][x][y],
rv[run][x][y])
elif isinstance(lf, FormulaStat):
lv = lf.value
for run in lv.keys():
result[run] = {}
for x in lv[run].keys():
result[run][x] = {}
for y in lv[run][x].keys():
result[run][x][y] = wrapop(op, lv[run][x][y], rf)
elif isinstance(rf, FormulaStat):
rv = rf.value
for run in rv.keys():
result[run] = {}
for x in rv[run].keys():
result[run][x] = {}
for y in rv[run][x].keys():
result[run][x][y] = wrapop(op, lf, rv[run][x][y])
return result
def sums(x, y):
if issequence(x):
return map(lambda x, y: x + y, x, y)
else:
return x + y
def alltrue(list):
return reduce(lambda x, y: x and y, list)
def allfalse(list):
return not reduce(lambda x, y: x or y, list)
def enumerate(list):
return map(None, range(len(list)), list)
def cmp(a, b):
if a < b:
return -1
elif a == b:
return 0
else:
return 1
class Statistic(object):
def __init__(self, data):
self.__dict__.update(data.__dict__)
if not self.__dict__.has_key('value'):
self.__dict__['value'] = None
if not self.__dict__.has_key('bins'):
self.__dict__['bins'] = None
if not self.__dict__.has_key('ticks'):
self.__dict__['ticks'] = None
def __getattribute__(self, attr):
if attr == 'value':
if self.__dict__['value'] == None:
self.__dict__['value'] = self.getValue()
return self.__dict__['value']
else:
return super(Statistic, self).__getattribute__(attr)
def __setattr__(self, attr, value):
if attr == 'bins' or attr == 'ticks':
if attr == 'bins':
global db
if value is not None:
value = db.getBin(value)
elif attr == 'samples' and type(value) is str:
value = [ int(x) for x in value.split() ]
self.__dict__[attr] = value
self.__dict__['value'] = None
else:
super(Statistic, self).__setattr__(attr, value)
def getValue(self):
raise AttributeError, 'getValue() must be defined'
def zero(self):
return False
def __ne__(self, other):
return not (self == other)
def __str__(self):
return '%f' % (float(self))
class FormulaStat(object):
def __add__(self, other):
f = FormulaStat()
f.value = binaryop(operator.add, self, other)
return f
def __sub__(self, other):
f = FormulaStat()
f.value = binaryop(operator.sub, self, other)
return f
def __mul__(self, other):
f = FormulaStat()
f.value = binaryop(operator.mul, self, other)
return f
def __truediv__(self, other):
f = FormulaStat()
f.value = binaryop(zerodiv, self, other)
return f
def __mod__(self, other):
f = FormulaStat()
f.value = binaryop(operator.mod, self, other)
return f
def __radd__(self, other):
f = FormulaStat()
f.value = binaryop(operator.add, other, self)
return f
def __rsub__(self, other):
f = FormulaStat()
f.value = binaryop(operator.sub, other, self)
return f
def __rmul__(self, other):
f = FormulaStat()
f.value = binaryop(operator.mul, other, self)
return f
def __rtruediv__(self, other):
f = FormulaStat()
f.value = binaryop(zerodiv, other, self)
return f
def __rmod__(self, other):
f = FormulaStat()
f.value = binaryop(operator.mod, other, self)
return f
def __neg__(self):
f = FormulaStat()
f.value = unaryop(operator.neg, self)
return f
def __getitem__(self, idx):
f = FormulaStat()
f.value = {}
for key in self.value.keys():
f.value[key] = {}
f.value[key][0] = {}
f.value[key][0][0] = self.value[key][idx][0]
return f
def __float__(self):
if isinstance(self.value, FormulaStat):
return float(self.value)
if not self.value.has_key(display_run):
return (1e300*1e300)
if len(self.value[display_run]) == 1:
return self.value[display_run][0][0]
else:
#print self.value[display_run]
return self.value[display_run][4][0]
#raise ValueError
def display(self):
import display
d = display.VectorDisplay()
d.flags = 0
d.precision = 1
d.name = 'formula'
d.desc = 'formula'
val = self.value[display_run]
d.value = [ val[x][0] for x in val.keys() ]
d.display()
class Scalar(Statistic,FormulaStat):
def getValue(self):
return source.data(self, self.bins)
def display(self):
import display
p = display.Print()
p.name = self.name
p.desc = self.desc
p.value = float(self)
p.flags = self.flags
p.precision = self.precision
if display.all or (self.flags & flags.printable):
p.display()
def comparable(self, other):
return self.name == other.name
def __eq__(self, other):
return self.value == other.value
def __isub__(self, other):
self.value -= other.value
return self
def __iadd__(self, other):
self.value += other.value
return self
def __itruediv__(self, other):
if not other:
return self
self.value /= other
return self
class Vector(Statistic,FormulaStat):
def getValue(self):
return source.data(self, self.bins);
def display(self):
import display
if not display.all and not (self.flags & flags.printable):
return
d = display.VectorDisplay()
d.__dict__.update(self.__dict__)
d.display()
def comparable(self, other):
return self.name == other.name and \
len(self.value) == len(other.value)
def __eq__(self, other):
if issequence(self.value) != issequence(other.value):
return false
if issequence(self.value):
if len(self.value) != len(other.value):
return False
else:
for v1,v2 in zip(self.value, other.value):
if v1 != v2:
return False
return True
else:
return self.value == other.value
def __isub__(self, other):
self.value = binaryop(operator.sub, self.value, other.value)
return self
def __iadd__(self, other):
self.value = binaryop(operator.add, self.value, other.value)
return self
def __itruediv__(self, other):
if not other:
return self
if issequence(self.value):
for i in xrange(len(self.value)):
self.value[i] /= other
else:
self.value /= other
return self
class Formula(Vector):
def getValue(self):
formula = re.sub(':', '__', self.formula)
x = eval(formula, source.stattop)
return x.value
def comparable(self, other):
return self.name == other.name and \
compare(self.dist, other.dist)
def __eq__(self, other):
return self.value == other.value
def __isub__(self, other):
return self
def __iadd__(self, other):
return self
def __itruediv__(self, other):
if not other:
return self
return self
class SimpleDist(object):
def __init__(self, sums, squares, samples):
self.sums = sums
self.squares = squares
self.samples = samples
def getValue(self):
return 0.0
def display(self, name, desc, flags, precision):
import display
p = display.Print()
p.flags = flags
p.precision = precision
if self.samples > 0:
p.name = name + ".mean"
p.value = self.sums / self.samples
p.display()
p.name = name + ".stdev"
if self.samples > 1:
var = (self.samples * self.squares - self.sums ** 2) \
/ (self.samples * (self.samples - 1))
if var >= 0:
p.value = math.sqrt(var)
else:
p.value = 'NaN'
else:
p.value = 0.0
p.display()
p.name = name + ".samples"
p.value = self.samples
p.display()
def comparable(self, other):
return True
def __eq__(self, other):
return self.sums == other.sums and self.squares == other.squares and \
self.samples == other.samples
def __isub__(self, other):
self.sums -= other.sums
self.squares -= other.squares
self.samples -= other.samples
return self
def __iadd__(self, other):
self.sums += other.sums
self.squares += other.squares
self.samples += other.samples
return self
def __itruediv__(self, other):
if not other:
return self
self.sums /= other
self.squares /= other
self.samples /= other
return self
class FullDist(SimpleDist):
def __init__(self, sums, squares, samples, minval, maxval,
under, vec, over, min, max, bsize, size):
self.sums = sums
self.squares = squares
self.samples = samples
self.minval = minval
self.maxval = maxval
self.under = under
self.vec = vec
self.over = over
self.min = min
self.max = max
self.bsize = bsize
self.size = size
def getValue(self):
return 0.0
def display(self, name, desc, flags, precision):
import display
p = display.Print()
p.flags = flags
p.precision = precision
p.name = name + '.min_val'
p.value = self.minval
p.display()
p.name = name + '.max_val'
p.value = self.maxval
p.display()
p.name = name + '.underflow'
p.value = self.under
p.display()
i = self.min
for val in self.vec[:-1]:
p.name = name + '[%d:%d]' % (i, i + self.bsize - 1)
p.value = val
p.display()
i += self.bsize
p.name = name + '[%d:%d]' % (i, self.max)
p.value = self.vec[-1]
p.display()
p.name = name + '.overflow'
p.value = self.over
p.display()
SimpleDist.display(self, name, desc, flags, precision)
def comparable(self, other):
return self.min == other.min and self.max == other.max and \
self.bsize == other.bsize and self.size == other.size
def __eq__(self, other):
return self.sums == other.sums and self.squares == other.squares and \
self.samples == other.samples
def __isub__(self, other):
self.sums -= other.sums
self.squares -= other.squares
self.samples -= other.samples
if other.samples:
self.minval = min(self.minval, other.minval)
self.maxval = max(self.maxval, other.maxval)
self.under -= under
self.vec = map(lambda x,y: x - y, self.vec, other.vec)
self.over -= over
return self
def __iadd__(self, other):
if not self.samples and other.samples:
self = other
return self
self.sums += other.sums
self.squares += other.squares
self.samples += other.samples
if other.samples:
self.minval = min(self.minval, other.minval)
self.maxval = max(self.maxval, other.maxval)
self.under += other.under
self.vec = map(lambda x,y: x + y, self.vec, other.vec)
self.over += other.over
return self
def __itruediv__(self, other):
if not other:
return self
self.sums /= other
self.squares /= other
self.samples /= other
if self.samples:
self.under /= other
for i in xrange(len(self.vec)):
self.vec[i] /= other
self.over /= other
return self
class Dist(Statistic):
def getValue(self):
return 0.0
def display(self):
import display
if not display.all and not (self.flags & flags.printable):
return
self.dist.display(self.name, self.desc, self.flags, self.precision)
def comparable(self, other):
return self.name == other.name and \
self.dist.compareable(other.dist)
def __eq__(self, other):
return self.dist == other.dist
def __isub__(self, other):
self.dist -= other.dist
return self
def __iadd__(self, other):
self.dist += other.dist
return self
def __itruediv__(self, other):
if not other:
return self
self.dist /= other
return self
class VectorDist(Statistic):
def getValue(self):
return 0.0
def display(self):
import display
if not display.all and not (self.flags & flags.printable):
return
if isinstance(self.dist, SimpleDist):
return
for dist,sn,sd,i in map(None, self.dist, self.subnames, self.subdescs,
range(len(self.dist))):
if len(sn) > 0:
name = '%s.%s' % (self.name, sn)
else:
name = '%s[%d]' % (self.name, i)
if len(sd) > 0:
desc = sd
else:
desc = self.desc
dist.display(name, desc, self.flags, self.precision)
if (self.flags & flags.total) or 1:
if isinstance(self.dist[0], SimpleDist):
disttotal = SimpleDist( \
reduce(sums, [d.sums for d in self.dist]),
reduce(sums, [d.squares for d in self.dist]),
reduce(sums, [d.samples for d in self.dist]))
else:
disttotal = FullDist( \
reduce(sums, [d.sums for d in self.dist]),
reduce(sums, [d.squares for d in self.dist]),
reduce(sums, [d.samples for d in self.dist]),
min([d.minval for d in self.dist]),
max([d.maxval for d in self.dist]),
reduce(sums, [d.under for d in self.dist]),
reduce(sums, [d.vec for d in self.dist]),
reduce(sums, [d.over for d in self.dist]),
dist[0].min,
dist[0].max,
dist[0].bsize,
dist[0].size)
name = '%s.total' % (self.name)
desc = self.desc
disttotal.display(name, desc, self.flags, self.precision)
def comparable(self, other):
return self.name == other.name and \
alltrue(map(lambda x, y : x.comparable(y),
self.dist,
other.dist))
def __eq__(self, other):
return alltrue(map(lambda x, y : x == y, self.dist, other.dist))
def __isub__(self, other):
if issequence(self.dist) and issequence(other.dist):
for sd,od in zip(self.dist, other.dist):
sd -= od
else:
self.dist -= other.dist
return self
def __iadd__(self, other):
if issequence(self.dist) and issequence(other.dist):
for sd,od in zip(self.dist, other.dist):
sd += od
else:
self.dist += other.dist
return self
def __itruediv__(self, other):
if not other:
return self
if issequence(self.dist):
for dist in self.dist:
dist /= other
else:
self.dist /= other
return self
class Vector2d(Statistic):
def getValue(self):
return 0.0
def display(self):
import display
if not display.all and not (self.flags & flags.printable):
return
d = display.VectorDisplay()
d.__dict__.update(self.__dict__)
if self.__dict__.has_key('ysubnames'):
ysubnames = list(self.ysubnames)
slack = self.x - len(ysubnames)
if slack > 0:
ysubnames.extend(['']*slack)
else:
ysubnames = range(self.x)
for x,sname in enumerate(ysubnames):
o = x * self.y
d.value = self.value[o:o+self.y]
d.name = '%s[%s]' % (self.name, sname)
d.display()
if self.flags & flags.total:
d.value = []
for y in range(self.y):
xtot = 0.0
for x in range(self.x):
xtot += self.value[y + x * self.x]
d.value.append(xtot)
d.name = self.name + '.total'
d.display()
def comparable(self, other):
return self.name == other.name and self.x == other.x and \
self.y == other.y
def __eq__(self, other):
return True
def __isub__(self, other):
return self
def __iadd__(self, other):
return self
def __itruediv__(self, other):
if not other:
return self
return self
def NewStat(data):
stat = None
if data.type == 'SCALAR':
stat = Scalar(data)
elif data.type == 'VECTOR':
stat = Vector(data)
elif data.type == 'DIST':
stat = Dist(data)
elif data.type == 'VECTORDIST':
stat = VectorDist(data)
elif data.type == 'VECTOR2D':
stat = Vector2d(data)
elif data.type == 'FORMULA':
stat = Formula(data)
return stat

127
util/stats/print.py Normal file
View file

@ -0,0 +1,127 @@
all = False
descriptions = False
class Value:
def __init__(self, value, precision, percent = False):
self.value = value
self.precision = precision
self.percent = percent
def __str__(self):
if isinstance(self.value, str):
if self.value.lower() == 'nan':
value = 'NaN'
if self.value.lower() == 'inf':
value = 'Inf'
else:
if self.precision >= 0:
format = "%%.%df" % self.precision
elif self.value == 0.0:
format = "%.0f"
elif self.value % 1.0 == 0.0:
format = "%.0f"
else:
format = "%f"
value = self.value
if self.percent:
value = value * 100.0
value = format % value
if self.percent:
value = value + "%"
return value
class Print:
def __init__(self, **vals):
self.__dict__.update(vals)
def __str__(self):
value = Value(self.value, self.precision)
pdf = ''
cdf = ''
if self.__dict__.has_key('pdf'):
pdf = Value(self.pdf, 2, True)
if self.__dict__.has_key('cdf'):
cdf = Value(self.cdf, 2, True)
output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf)
if descriptions and self.__dict__.has_key('desc') and self.desc:
output = "%s # %s" % (output, self.desc)
return output
def doprint(self):
if display_all:
return True
if self.value == 0.0 and (self.flags & flags_nozero):
return False
if isinstance(self.value, str):
if self.value == 'NaN' and (self.flags & flags_nonan):
return False
return True
def display(self):
if self.doprint():
print self
class VectorDisplay:
def display(self):
p = Print()
p.flags = self.flags
p.precision = self.precision
if issequence(self.value):
if not len(self.value):
return
mytotal = reduce(lambda x,y: float(x) + float(y), self.value)
mycdf = 0.0
value = self.value
if display_all:
subnames = [ '[%d]' % i for i in range(len(value)) ]
else:
subnames = [''] * len(value)
if self.__dict__.has_key('subnames'):
for i,each in enumerate(self.subnames):
if len(each) > 0:
subnames[i] = '.%s' % each
subdescs = [self.desc]*len(value)
if self.__dict__.has_key('subdescs'):
for i in xrange(min(len(value), len(self.subdescs))):
subdescs[i] = self.subdescs[i]
for val,sname,sdesc in map(None, value, subnames, subdescs):
if mytotal > 0.0:
mypdf = float(val) / float(mytotal)
mycdf += mypdf
if (self.flags & flags_pdf):
p.pdf = mypdf
p.cdf = mycdf
if len(sname) == 0:
continue
p.name = self.name + sname
p.desc = sdesc
p.value = val
p.display()
if (self.flags & flags_total):
if (p.__dict__.has_key('pdf')): del p.__dict__['pdf']
if (p.__dict__.has_key('cdf')): del p.__dict__['cdf']
p.name = self.name + '.total'
p.desc = self.desc
p.value = mytotal
p.display()
else:
p.name = self.name
p.desc = self.desc
p.value = self.value
p.display()

478
util/stats/stats.py Executable file
View file

@ -0,0 +1,478 @@
#!/usr/bin/env python
from __future__ import division
import re, sys
def usage():
print '''\
Usage: %s [-E] [-F] [-d <db> ] [-g <get> ] [-h <host>] [-p]
[-s <system>] [-r <runs> ] [-u <username>] <command> [command args]
''' % sys.argv[0]
sys.exit(1)
def getopts(list, flags):
import getopt
try:
opts, args = getopt.getopt(list, flags)
except getopt.GetoptError:
usage()
return opts, args
def printval(name, value, invert = False):
if invert and value != 0.0:
value = 1 / value
if value == (1e300*1e300):
return
if printval.mode == 'G':
print '%s: %g' % (name, value)
elif printval.mode != 'F' and value > 1e6:
print '%s: %0.5e' % (name, value)
else:
print '%s: %f' % (name, value)
printval.mode = 'G'
def unique(list):
set = {}
map(set.__setitem__, list, [])
return set.keys()
def graphdata(runs, tag, label, value):
import info
configs = ['std', 'csa', 'ht1', 'ht4', 'htx', 'ocm', 'occ', 'ocp' ]
benchmarks = [ 'm', 's' ]
dmas = [ 'x', 'd', 'b' ]
caches = [ '1', '2', '3', '4', '5' ]
systems = [ 'M' ]
checkpoints = [ '1' ]
names = []
for bench in benchmarks:
for dma in dmas:
for cache in caches:
for sys in systems:
for cpt in checkpoints:
names.append([bench, dma, cache, sys, cpt])
for bench,dma,cache,sys,cpt in names:
base = '%s.%s.%s.%s.%s' % (bench, dma, cache, sys, cpt)
fname = '/n/ziff/z/binkertn/graph/data.ibm/%s.%s.dat' % (tag, base)
f = open(fname, 'w')
print >>f, '#set TITLE = %s' % base
print >>f, '#set xlbl = Configuration'
print >>f, '#set ylbl = %s' % label
print >>f, '#set sublabels = %s' % ' '.join(configs)
for speed,freq in zip(['s', 'q'],['4GHz','10GHz']):
print >>f, '"%s"' % freq,
for conf in configs:
name = '%s.%s.%s.%s.%s.%s.%s' % (conf, bench, dma, speed,
cache, sys, cpt)
run = info.source.allRunNames[name]
info.display_run = run.run;
val = float(value)
if val == 1e300*1e300:
print >>f, 0.0,
else:
print >>f, "%f" % val,
print >>f
f.close()
def printdata(runs, value, invert = False):
import info
for run in runs:
info.display_run = run.run;
val = float(value)
printval(run.name, val)
class CommandException(Exception):
pass
def commands(options, command, args):
if command == 'database':
if len(args) == 0: raise CommandException
import dbinit
mydb = dbinit.MyDB(options)
if args[0] == 'drop':
if len(args) > 2: raise CommandException
mydb.admin()
mydb.drop()
if len(args) == 2 and args[1] == 'init':
mydb.create()
mydb.connect()
mydb.populate()
mydb.close()
return
if args[0] == 'init':
if len(args) > 1: raise CommandException
mydb.admin()
mydb.create()
mydb.connect()
mydb.populate()
mydb.close()
return
if args[0] == 'clean':
if len(args) > 1: raise CommandException
mydb.connect()
mydb.clean()
return
raise CommandException
import db, info
info.source = db.Database()
info.source.host = options.host
info.source.db = options.db
info.source.passwd = options.passwd
info.source.user = options.user
info.source.connect()
info.source.update_dict(globals())
system = info.source.__dict__[options.system]
if type(options.get) is str:
info.source.get = options.get
if options.runs is None:
runs = info.source.allRuns
else:
rx = re.compile(options.runs)
runs = []
for run in info.source.allRuns:
if rx.match(run.name):
runs.append(run)
info.display_run = runs[0].run
if command == 'runs':
user = None
opts, args = getopts(args, '-u')
if len(args):
raise CommandException
for o,a in opts:
if o == '-u':
user = a
info.source.listRuns(user)
return
if command == 'stats':
if len(args) == 0:
info.source.listStats()
elif len(args) == 1:
info.source.listStats(args[0])
else:
raise CommandException
return
if command == 'stat':
if len(args) != 1:
raise CommandException
stats = info.source.getStat(args[0])
for stat in stats:
if graph:
graphdata(runs, stat.name, stat.name, stat)
else:
print stat.name
printdata(runs, stat)
return
if command == 'bins':
if len(args) == 0:
info.source.listBins()
elif len(args) == 1:
info.source.listBins(args[0])
else:
raise CommandException
return
if command == 'formulas':
if len(args) == 0:
info.source.listFormulas()
elif len(args) == 1:
info.source.listFormulas(args[0])
else:
raise CommandException
return
if command == 'samples':
if len(args):
raise CommandException
info.source.listTicks(runs)
return
if len(args):
raise CommandException
if command == 'usertime':
import copy
kernel = copy.copy(system.full_cpu.numCycles)
kernel.bins = 'kernel'
user = copy.copy(system.full_cpu.numCycles)
user.bins = 'user'
if graph:
graphdata(runs, 'usertime', 'User Fraction',
user / system.full_cpu.numCycles)
else:
printdata(runs, user / system.full_cpu.numCycles)
return
if command == 'ticks':
if binned:
print 'kernel ticks'
system.full_cpu.numCycles.bins = 'kernel'
printdata(runs, system.full_cpu.numCycles)
print 'idle ticks'
system.full_cpu.numCycles.bins = 'idle'
printdata(runs, system.full_cpu.numCycles)
print 'user ticks'
system.full_cpu.numCycles.bins = 'user'
printdata(runs, system.full_cpu.numCycles)
print 'total ticks'
system.full_cpu.numCycles.bins = None
printdata(runs, system.full_cpu.numCycles)
return
if command == 'packets':
packets = system.tsunami.nsgige.rxPackets
if graph:
graphdata(runs, 'packets', 'Packets', packets)
else:
printdata(runs, packets)
return
if command == 'ppt' or command == 'tpp':
ppt = system.tsunami.nsgige.rxPackets / sim_ticks
printdata(runs, ppt, command == 'tpp')
return
if command == 'pps':
pps = system.tsunami.nsgige.rxPackets / sim_seconds
if graph:
graphdata(runs, 'pps', 'Packets/s', pps)
else:
printdata(runs, pps)
return
if command == 'bpt' or command == 'tpb':
bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
bpt = bytes / sim_ticks * 8
if graph:
graphdata(runs, 'bpt', 'bps / Hz', bpt)
else:
printdata(runs, bpt, command == 'tpb')
return
if command == 'bptb' or command == 'tpbb':
bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
print 'kernel stats'
bytes.bins = 'kernel'
printdata(runs, bytes / ticks)
print 'idle stats'
bytes.bins = 'idle'
printdata(runs, bytes / ticks)
print 'user stats'
bytes.bins = 'user'
printdata(runs, bytes / ticks)
return
if command == 'bytes':
stat = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
if binned:
print '%s kernel stats' % stat.name
stat.bins = 'kernel'
printdata(runs, stat)
print '%s idle stats' % stat.name
stat.bins = 'idle'
printdata(runs, stat)
print '%s user stats' % stat.name
stat.bins = 'user'
printdata(runs, stat)
print '%s total stats' % stat.name
stat.bins = None
printdata(runs, stat)
return
if command == 'rxbps':
gbps = system.tsunami.nsgige.rxBandwidth / 1e9
if graph:
graphdata(runs, 'rxbps', 'Bandwidth (Gbps)', gbps)
else:
printdata(runs, gbps)
return
if command == 'txbps':
gbps = system.tsunami.nsgige.txBandwidth / 1e9
if graph:
graphdata(runs, 'txbps', 'Bandwidth (Gbps)', gbps)
else:
printdata(runs, gbps)
return
if command == 'bps':
rxbps = system.tsunami.nsgige.rxBandwidth
txbps = system.tsunami.nsgige.txBandwidth
gbps = (rxbps + txbps) / 1e9
if graph:
graphdata(runs, 'bps', 'Bandwidth (Gbps)', gbps)
else:
printdata(runs, gbps)
return
if command == 'misses':
stat = system.L3.overall_mshr_misses
if binned:
print '%s kernel stats' % stat.name
stat.bins = 'kernel'
printdata(runs, stat)
print '%s idle stats' % stat.name
stat.bins = 'idle'
printdata(runs, stat)
print '%s user stats' % stat.name
stat.bins = 'user'
printdata(runs, stat)
print '%s total stats' % stat.name
stat.bins = None
if graph:
graphdata(runs, 'misses', 'Overall MSHR Misses', stat)
else:
printdata(runs, stat)
return
if command == 'mpkb':
misses = system.L3.overall_mshr_misses
rxbytes = system.tsunami.nsgige.rxBytes
txbytes = system.tsunami.nsgige.txBytes
if binned:
print 'mpkb kernel stats'
misses.bins = 'kernel'
mpkb = misses / ((rxbytes + txbytes) / 1024)
printdata(runs, mpkb)
print 'mpkb idle stats'
misses.bins = 'idle'
mpkb = misses / ((rxbytes + txbytes) / 1024)
printdata(runs, mpkb)
print 'mpkb user stats'
misses.bins = 'user'
mpkb = misses / ((rxbytes + txbytes) / 1024)
printdata(runs, mpkb)
print 'mpkb total stats'
mpkb = misses / ((rxbytes + txbytes) / 1024)
misses.bins = None
if graph:
graphdata(runs, 'mpkb', 'Misses / KB', mpkb)
else:
printdata(runs, mpkb)
return
if command == 'execute':
printdata(runs, system.full_cpu.ISSUE__count)
return
if command == 'commit':
printdata(runs, system.full_cpu.COM__count)
return
if command == 'fetch':
printdata(runs, system.full_cpu.FETCH__count)
return
if command == 'rxbpp':
bpp = system.tsunami.nsgige.rxBytes / system.tsunami.nsgige.rxPackets
printdata(run, 8 * bpp)
return
if command == 'txbpp':
bpp = system.tsunami.nsgige.txBytes / system.tsunami.nsgige.txPackets
printdata(run, 8 * bpp)
return
raise CommandException
graph = False
binned = False
class Options: pass
if __name__ == '__main__':
import getpass
options = Options()
options.host = 'zizzer.pool'
options.db = None
options.passwd = ''
options.user = getpass.getuser()
options.runs = None
options.system = 'client'
options.get = None
opts, args = getopts(sys.argv[1:], '-BEFGd:g:h:pr:s:u:')
for o,a in opts:
if o == '-B':
options.binned = True
if o == '-E':
printval.mode = 'E'
if o == '-F':
printval.mode = 'F'
if o == '-G':
options.graph = True;
if o == '-d':
options.db = a
if o == '-g':
options.get = a
if o == '-h':
options.host = a
if o == '-p':
options.passwd = getpass.getpass()
if o == '-r':
options.runs = a
if o == '-u':
options.user = a
if o == '-s':
options.system = a
if len(args) == 0:
usage()
command = args[0]
args = args[1:]
try:
commands(options, command, args)
except CommandException:
usage()