Nathan Binkert 2771abb6ed Totally re-do/reorganize the python part of the statistics code
Make the database creation/removal/cleanup code use python
Make formulas work with the database
Add support to do some graphing, but needs more work
Still need to work on vectors, 2d vectors, dists and vectordists

extra : convert_revision : 1a88320dcc036a3751e8a036770766dce76a568c
2004-08-09 21:20:52 -04:00

415 lines
13 KiB

import MySQLdb, re, string
def statcmp(a, b):
v1 = a.split('.')
v2 = b.split('.')
last = min(len(v1), len(v2)) - 1
for i,j in zip(v1[0:last], v2[0:last]):
if i != j:
return cmp(i, j)
# Special compare for last element.
if len(v1) == len(v2):
return cmp(v1[last], v2[last])
return cmp(len(v1), len(v2))
class RunData:
def __init__(self, row): = int(row[0]) = row[1]
self.user = row[2]
self.project = row[3]
class SubData:
def __init__(self, row):
self.stat = int(row[0])
self.x = int(row[1])
self.y = int(row[2]) = row[3]
self.descr = row[4]
class Data:
def __init__(self, row):
if len(row) != 5:
raise 'stat db error'
self.stat = int(row[0]) = int(row[1])
self.x = int(row[2])
self.y = int(row[3]) = float(row[4])
def __repr__(self):
return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,, self.x, self.y,
class StatData(object):
def __init__(self, row):
self.stat = int(row[0]) = row[1]
self.desc = row[2]
self.type = row[3]
self.prereq = int(row[5])
self.precision = int(row[6])
import flags
self.flags = 0
if int(row[4]): self.flags |= flags.printable
if int(row[7]): self.flags |= flags.nozero
if int(row[8]): self.flags |= flags.nonan
if int(row[9]): self.flags |=
if int(row[10]): self.flags |= flags.pdf
if int(row[11]): self.flags |= flags.cdf
if self.type == 'DIST' or self.type == 'VECTORDIST':
self.min = float(row[12])
self.max = float(row[13])
self.bktsize = float(row[14])
self.size = int(row[15])
if self.type == 'FORMULA':
self.formula = self.db.allFormulas[self.stat]
class Node(object):
def __init__(self, name): = name
def __str__(self):
return name
class Database(object):
def __init__(self): = 'zizzer.pool'
self.user = ''
self.passwd = ''
self.db = 'm5stats'
self.cursor = None
self.allStats = []
self.allStatIds = {}
self.allStatNames = {}
self.allSubData = {}
self.allRuns = []
self.allRunIds = {}
self.allRunNames = {}
self.allBins = []
self.allBinIds = {}
self.allBinNames = {}
self.allFormulas = {}
self.stattop = {}
self.statdict = {}
self.statlist = []
self.mode = 'sum';
self.runs = None
self.bins = None
self.ticks = None
self.__dict__['get'] = type(self).sum
def query(self, sql):
def update_dict(self, dict):
def append(self, stat):
statname = re.sub(':', '__',
path = string.split(statname, '.')
pathtop = path[0]
fullname = ''
x = self
while len(path) > 1:
name = path.pop(0)
if not x.__dict__.has_key(name):
x.__dict__[name] = Node(fullname + name)
x = x.__dict__[name]
fullname = '%s%s.' % (fullname, name)
name = path.pop(0)
x.__dict__[name] = stat
self.stattop[pathtop] = self.__dict__[pathtop]
self.statdict[statname] = stat
def connect(self):
# connect
self.thedb = MySQLdb.connect(db=self.db,,
# create a cursor
self.cursor = self.thedb.cursor()
self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
from runs''')
for result in self.cursor.fetchall():
run = RunData(result);
self.allRunIds[] = run
self.allRunNames[] = run
self.query('select * from bins')
for id,name in self.cursor.fetchall():
self.allBinIds[int(id)] = name
self.allBinNames[name] = int(id)
self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
for result in self.cursor.fetchall():
subdata = SubData(result)
if self.allSubData.has_key(subdata.stat):
self.allSubData[subdata.stat] = [ subdata ]
self.query('select * from formulas')
for id,formula in self.cursor.fetchall():
self.allFormulas[int(id)] = formula
StatData.db = self
self.query('select * from stats')
import info
for result in self.cursor.fetchall():
stat = info.NewStat(StatData(result))
self.allStatIds[stat.stat] = stat
self.allStatNames[] = stat
# Name: listbins
# Desc: Prints all bins matching regex argument, if no argument
# is given all bins are returned
def listBins(self, regex='.*'):
print '%-50s %-10s' % ('bin name', 'id')
print '-' * 61
names = self.allBinNames.keys()
for name in names:
id = self.allBinNames[name]
print '%-50s %-10d' % (name, id)
# Name: listruns
# Desc: Prints all runs matching a given user, if no argument
# is given all runs are returned
def listRuns(self, user=None):
print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
print '-' * 62
for run in self.allRuns:
if user == None or user == run.user:
print '%-40s %-10s %-10d' % (, run.user,
# Name: listTicks
# Desc: Prints all samples for a given run
def listTicks(self, run=None):
print "tick"
print "----------------------------------------"
sql = 'select distinct dt_tick from data where dt_stat=1950'
#if run != None:
# sql += ' where dt_run=%d' % run
for r in self.cursor.fetchall():
print r[0]
# Name: liststats
# Desc: Prints all statistics that appear in the database,
# the optional argument is a regular expression that can
# be used to prune the result set
def listStats(self, regex=None):
print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
print '-' * 80
rx = None
if regex != None:
rx = re.compile(regex)
stats = [ for stat in self.allStats ]
for stat in stats:
stat = self.allStatNames[stat]
if rx == None or rx.match(
print '%-60s %-8s %-10s' % (, stat.stat, stat.type)
# Name: liststats
# Desc: Prints all statistics that appear in the database,
# the optional argument is a regular expression that can
# be used to prune the result set
def listFormulas(self, regex=None):
print '%-60s %s' % ('formula name', 'formula')
print '-' * 80
rx = None
if regex != None:
rx = re.compile(regex)
stats = [ for stat in self.allStats ]
for stat in stats:
stat = self.allStatNames[stat]
if stat.type == 'FORMULA' and (rx == None or rx.match(
print '%-60s %s' % (, self.allFormulas[stat.stat])
def getStat(self, stats):
if type(stats) is not list:
stats = [ stats ]
ret = []
for stat in stats:
if type(stat) is int:
if type(stat) is str:
rx = re.compile(stat)
for stat in self.allStats:
if rx.match(
return ret
def getBin(self, bins):
if type(bins) is not list:
bins = [ bins ]
ret = []
for bin in bins:
if type(bin) is int:
elif type(bin) is str:
for name,id in self.allBinNames.items():
if bin.match(name):
return ret
def getNotBin(self, bin):
map = {}
for bin in getBin(bin):
map[bin] = 1
ret = []
for bin in self.allBinIds.keys():
if not map.has_key(bin):
return ret
# get the data
def inner(self, op, stat, bins, ticks, group=False):
sql = 'select '
sql += 'dt_stat as stat, '
sql += 'dt_run as run, '
sql += 'dt_x as x, '
sql += 'dt_y as y, '
if group:
sql += 'dt_tick as tick, '
sql += '%s(dt_data) as data ' % op
sql += 'from data '
sql += 'where '
if isinstance(stat, list):
val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
sql += ' (%s)' % val
sql += ' dt_stat=%d' % stat.stat
if self.runs != None and len(self.runs):
val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
sql += ' and (%s)' % val
if bins != None and len(bins):
val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
sql += ' and (%s)' % val
if ticks != None and len(ticks):
val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
sql += ' and (%s)' % val
sql += ' group by dt_stat,dt_run,dt_x,dt_y'
if group:
sql += ',dt_tick'
return sql
def outer(self, op_out, op_in, stat, bins, ticks):
sql = self.inner(op_in, stat, bins, ticks, True)
sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
sql += 'group by stat,run,x,y'
return sql
# Name: sum
# Desc: given a run, a stat and an array of samples and bins,
# sum all the bins and then get the standard deviation of the
# samples for non-binned runs. This will just return the average
# of samples, however a bin array still must be passed
def sum(self, stat, bins, ticks):
return self.inner('sum', stat, bins, ticks)
# Name: avg
# Desc: given a run, a stat and an array of samples and bins,
# sum all the bins and then average the samples for non-binned
# runs this will just return the average of samples, however
# a bin array still must be passed
def avg(self, stat, bins, ticks):
return self.outer('avg', 'sum', stat, bins, ticks)
# Name: stdev
# Desc: given a run, a stat and an array of samples and bins,
# sum all the bins and then get the standard deviation of the
# samples for non-binned runs. This will just return the average
# of samples, however a bin array still must be passed
def stdev(self, stat, bins, ticks):
return self.outer('stddev', 'sum', stat, bins, ticks)
def __getattribute__(self, attr):
if attr != 'get':
return super(Database, self).__getattribute__(attr)
if self.__dict__['get'] == type(self).sum:
return 'sum'
elif self.__dict__['get'] == type(self).avg:
return 'avg'
elif self.__dict__['get'] == type(self).stdev:
return 'stdev'
return ''
def __setattr__(self, attr, value):
if attr != 'get':
super(Database, self).__setattr__(attr, value)
if value == 'sum':
self.__dict__['get'] = type(self).sum
elif value == 'avg':
self.__dict__['get'] = type(self).avg
elif value == 'stdev':
self.__dict__['get'] = type(self).stdev
raise AttributeError, "can only set get to: sum | avg | stdev"
def data(self, stat, bins=None, ticks=None):
if bins is None:
bins = self.bins
if ticks is None:
ticks = self.ticks
sql = self.__dict__['get'](self, stat, bins, ticks)
runs = {}
for x in self.cursor.fetchall():
data = Data(x)
if not runs.has_key(
runs[] = {}
if not runs[].has_key(data.x):
runs[][data.x] = {}
runs[][data.x][data.y] =
return runs