cb0cf2dd8a
--HG-- extra : convert_revision : 77f475b156d81c03a2811818fa23593d5615c685
416 lines
17 KiB
Python
416 lines
17 KiB
Python
# Copyright (c) 2003-2004 The Regents of The University of Michigan
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met: redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer;
|
|
# redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution;
|
|
# neither the name of the copyright holders nor the names of its
|
|
# contributors may be used to endorse or promote products derived from
|
|
# this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# Authors: Nathan Binkert
|
|
|
|
import MySQLdb
|
|
|
|
class MyDB(object):
|
|
def __init__(self, options):
|
|
self.name = options.db
|
|
self.host = options.host
|
|
self.user = options.user
|
|
self.passwd = options.passwd
|
|
self.mydb = None
|
|
self.cursor = None
|
|
|
|
def admin(self):
|
|
self.close()
|
|
self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
|
|
passwd=self.passwd)
|
|
self.cursor = self.mydb.cursor()
|
|
|
|
def connect(self):
|
|
self.close()
|
|
self.mydb = MySQLdb.connect(db=self.name, host=self.host,
|
|
user=self.user, passwd=self.passwd)
|
|
self.cursor = self.mydb.cursor()
|
|
|
|
def close(self):
|
|
if self.mydb is not None:
|
|
self.mydb.close()
|
|
self.cursor = None
|
|
|
|
def query(self, sql):
|
|
self.cursor.execute(sql)
|
|
|
|
def drop(self):
|
|
self.query('DROP DATABASE IF EXISTS %s' % self.name)
|
|
|
|
def create(self):
|
|
self.query('CREATE DATABASE %s' % self.name)
|
|
|
|
def populate(self):
|
|
#
|
|
# Each run (or simulation) gets its own entry in the runs table to
|
|
# group stats by where they were generated
|
|
#
|
|
# COLUMNS:
|
|
# 'id' is a unique identifier for each run to be used in other
|
|
# tables.
|
|
# 'name' is the user designated name for the data generated. It is
|
|
# configured in the simulator.
|
|
# 'user' identifies the user that generated the data for the given
|
|
# run.
|
|
# 'project' another name to identify runs for a specific goal
|
|
# 'date' is a timestamp for when the data was generated. It can be
|
|
# used to easily expire data that was generated in the past.
|
|
# 'expire' is a timestamp for when the data should be removed from
|
|
# the database so we don't have years worth of junk.
|
|
#
|
|
# INDEXES:
|
|
# 'run' is indexed so you can find out details of a run if the run
|
|
# was retreived from the data table.
|
|
# 'name' is indexed so that two all run names are forced to be unique
|
|
#
|
|
self.query('''
|
|
CREATE TABLE runs(
|
|
rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
|
rn_name VARCHAR(200) NOT NULL,
|
|
rn_sample VARCHAR(32) NOT NULL,
|
|
rn_user VARCHAR(32) NOT NULL,
|
|
rn_project VARCHAR(100) NOT NULL,
|
|
rn_date TIMESTAMP NOT NULL,
|
|
rn_expire TIMESTAMP NOT NULL,
|
|
PRIMARY KEY (rn_id),
|
|
UNIQUE (rn_name,rn_sample)
|
|
) TYPE=InnoDB''')
|
|
|
|
#
|
|
# We keep the bin names separate so that the data table doesn't get
|
|
# huge since bin names are frequently repeated.
|
|
#
|
|
# COLUMNS:
|
|
# 'id' is the unique bin identifer.
|
|
# 'name' is the string name for the bin.
|
|
#
|
|
# INDEXES:
|
|
# 'bin' is indexed to get the name of a bin when data is retrieved
|
|
# via the data table.
|
|
# 'name' is indexed to get the bin id for a named bin when you want
|
|
# to search the data table based on a specific bin.
|
|
#
|
|
self.query('''
|
|
CREATE TABLE bins(
|
|
bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
|
bn_name VARCHAR(255) NOT NULL,
|
|
PRIMARY KEY(bn_id),
|
|
UNIQUE (bn_name)
|
|
) TYPE=InnoDB''')
|
|
|
|
#
|
|
# The stat table gives us all of the data for a particular stat.
|
|
#
|
|
# COLUMNS:
|
|
# 'stat' is a unique identifier for each stat to be used in other
|
|
# tables for references.
|
|
# 'name' is simply the simulator derived name for a given
|
|
# statistic.
|
|
# 'descr' is the description of the statistic and what it tells
|
|
# you.
|
|
# 'type' defines what the stat tells you. Types are:
|
|
# SCALAR: A simple scalar statistic that holds one value
|
|
# VECTOR: An array of statistic values. Such a something that
|
|
# is generated per-thread. Vectors exist to give averages,
|
|
# pdfs, cdfs, means, standard deviations, etc across the
|
|
# stat values.
|
|
# DIST: Is a distribution of data. When the statistic value is
|
|
# sampled, its value is counted in a particular bucket.
|
|
# Useful for keeping track of utilization of a resource.
|
|
# (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
|
|
# VECTORDIST: Can be used when the distribution needs to be
|
|
# factored out into a per-thread distribution of data for
|
|
# example. It can still be summed across threads to find
|
|
# the total distribution.
|
|
# VECTOR2D: Can be used when you have a stat that is not only
|
|
# per-thread, but it is per-something else. Like
|
|
# per-message type.
|
|
# FORMULA: This statistic is a formula, and its data must be
|
|
# looked up in the formula table, for indicating how to
|
|
# present its values.
|
|
# 'subdata' is potentially used by any of the vector types to
|
|
# give a specific name to all of the data elements within a
|
|
# stat.
|
|
# 'print' indicates whether this stat should be printed ever.
|
|
# (Unnamed stats don't usually get printed)
|
|
# 'prereq' only print the stat if the prereq is not zero.
|
|
# 'prec' number of decimal places to print
|
|
# 'nozero' don't print zero values
|
|
# 'nonan' don't print NaN values
|
|
# 'total' for vector type stats, print the total.
|
|
# 'pdf' for vector type stats, print the pdf.
|
|
# 'cdf' for vector type stats, print the cdf.
|
|
#
|
|
# The Following are for dist type stats:
|
|
# 'min' is the minimum bucket value. Anything less is an underflow.
|
|
# 'max' is the maximum bucket value. Anything more is an overflow.
|
|
# 'bktsize' is the approximate number of entries in each bucket.
|
|
# 'size' is the number of buckets. equal to (min/max)/bktsize.
|
|
#
|
|
# INDEXES:
|
|
# 'stat' is indexed so that you can find out details about a stat
|
|
# if the stat id was retrieved from the data table.
|
|
# 'name' is indexed so that you can simply look up data about a
|
|
# named stat.
|
|
#
|
|
self.query('''
|
|
CREATE TABLE stats(
|
|
st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
|
st_name VARCHAR(255) NOT NULL,
|
|
st_descr TEXT NOT NULL,
|
|
st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
|
|
"VECTOR2D", "FORMULA") NOT NULL,
|
|
st_print BOOL NOT NULL,
|
|
st_prereq SMALLINT UNSIGNED NOT NULL,
|
|
st_prec TINYINT NOT NULL,
|
|
st_nozero BOOL NOT NULL,
|
|
st_nonan BOOL NOT NULL,
|
|
st_total BOOL NOT NULL,
|
|
st_pdf BOOL NOT NULL,
|
|
st_cdf BOOL NOT NULL,
|
|
st_min DOUBLE NOT NULL,
|
|
st_max DOUBLE NOT NULL,
|
|
st_bktsize DOUBLE NOT NULL,
|
|
st_size SMALLINT UNSIGNED NOT NULL,
|
|
PRIMARY KEY (st_id),
|
|
UNIQUE (st_name)
|
|
) TYPE=InnoDB''')
|
|
|
|
#
|
|
# This is the main table of data for stats.
|
|
#
|
|
# COLUMNS:
|
|
# 'stat' refers to the stat field given in the stat table.
|
|
#
|
|
# 'x' referrs to the first dimension of a multi-dimensional stat. For
|
|
# a vector, x will start at 0 and increase for each vector
|
|
# element.
|
|
# For a distribution:
|
|
# -1: sum (for calculating standard deviation)
|
|
# -2: sum of squares (for calculating standard deviation)
|
|
# -3: total number of samples taken (for calculating
|
|
# standard deviation)
|
|
# -4: minimum value
|
|
# -5: maximum value
|
|
# -6: underflow
|
|
# -7: overflow
|
|
# 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
|
|
# dimension.
|
|
# 'run' is the run that the data was generated from. Details up in
|
|
# the run table
|
|
# 'tick' is a timestamp generated by the simulator.
|
|
# 'bin' is the name of the bin that the data was generated in, if
|
|
# any.
|
|
# 'data' is the actual stat value.
|
|
#
|
|
# INDEXES:
|
|
# 'stat' is indexed so that a user can find all of the data for a
|
|
# particular stat. It is not unique, because that specific stat
|
|
# can be found in many runs, bins, and samples, in addition to
|
|
# having entries for the mulidimensional cases.
|
|
# 'run' is indexed to allow a user to remove all of the data for a
|
|
# particular execution run. It can also be used to allow the
|
|
# user to print out all of the data for a given run.
|
|
#
|
|
self.query('''
|
|
CREATE TABLE data(
|
|
dt_stat SMALLINT UNSIGNED NOT NULL,
|
|
dt_x SMALLINT NOT NULL,
|
|
dt_y SMALLINT NOT NULL,
|
|
dt_run SMALLINT UNSIGNED NOT NULL,
|
|
dt_tick BIGINT UNSIGNED NOT NULL,
|
|
dt_bin SMALLINT UNSIGNED NOT NULL,
|
|
dt_data DOUBLE NOT NULL,
|
|
INDEX (dt_stat),
|
|
INDEX (dt_run),
|
|
UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
|
|
) TYPE=InnoDB;''')
|
|
|
|
#
|
|
# Names and descriptions for multi-dimensional stats (vectors, etc.)
|
|
# are stored here instead of having their own entry in the statistics
|
|
# table. This allows all parts of a single stat to easily share a
|
|
# single id.
|
|
#
|
|
# COLUMNS:
|
|
# 'stat' is the unique stat identifier from the stat table.
|
|
# 'x' is the first dimension for multi-dimensional stats
|
|
# corresponding to the data table above.
|
|
# 'y' is the second dimension for multi-dimensional stats
|
|
# corresponding to the data table above.
|
|
# 'name' is the specific subname for the unique stat,x,y combination.
|
|
# 'descr' is the specific description for the uniqe stat,x,y
|
|
# combination.
|
|
#
|
|
# INDEXES:
|
|
# 'stat' is indexed so you can get the subdata for a specific stat.
|
|
#
|
|
self.query('''
|
|
CREATE TABLE subdata(
|
|
sd_stat SMALLINT UNSIGNED NOT NULL,
|
|
sd_x SMALLINT NOT NULL,
|
|
sd_y SMALLINT NOT NULL,
|
|
sd_name VARCHAR(255) NOT NULL,
|
|
sd_descr TEXT,
|
|
UNIQUE (sd_stat,sd_x,sd_y)
|
|
) TYPE=InnoDB''')
|
|
|
|
|
|
#
|
|
# The formula table is maintained separately from the data table
|
|
# because formula data, unlike other stat data cannot be represented
|
|
# there.
|
|
#
|
|
# COLUMNS:
|
|
# 'stat' refers to the stat field generated in the stat table.
|
|
# 'formula' is the actual string representation of the formula
|
|
# itself.
|
|
#
|
|
# INDEXES:
|
|
# 'stat' is indexed so that you can just look up a formula.
|
|
#
|
|
self.query('''
|
|
CREATE TABLE formulas(
|
|
fm_stat SMALLINT UNSIGNED NOT NULL,
|
|
fm_formula BLOB NOT NULL,
|
|
PRIMARY KEY(fm_stat)
|
|
) TYPE=InnoDB''')
|
|
|
|
#
|
|
# Each stat used in each formula is kept in this table. This way, if
|
|
# you want to print out a particular formula, you can simply find out
|
|
# which stats you need by looking in this table. Additionally, when
|
|
# you remove a stat from the stats table and data table, you remove
|
|
# any references to the formula in this table. When a formula is no
|
|
# longer referred to, you remove its entry.
|
|
#
|
|
# COLUMNS:
|
|
# 'stat' is the stat id from the stat table above.
|
|
# 'child' is the stat id of a stat that is used for this formula.
|
|
# There may be many children for any given 'stat' (formula)
|
|
#
|
|
# INDEXES:
|
|
# 'stat' is indexed so you can look up all of the children for a
|
|
# particular stat.
|
|
# 'child' is indexed so that you can remove an entry when a stat is
|
|
# removed.
|
|
#
|
|
self.query('''
|
|
CREATE TABLE formula_ref(
|
|
fr_stat SMALLINT UNSIGNED NOT NULL,
|
|
fr_run SMALLINT UNSIGNED NOT NULL,
|
|
UNIQUE (fr_stat,fr_run),
|
|
INDEX (fr_stat),
|
|
INDEX (fr_run)
|
|
) TYPE=InnoDB''')
|
|
|
|
# COLUMNS:
|
|
# 'event' is the unique event id from the event_desc table
|
|
# 'run' is simulation run id that this event took place in
|
|
# 'tick' is the tick when the event happened
|
|
#
|
|
# INDEXES:
|
|
# 'event' is indexed so you can look up all occurences of a
|
|
# specific event
|
|
# 'run' is indexed so you can find all events in a run
|
|
# 'tick' is indexed because we want the unique thing anyway
|
|
# 'event,run,tick' is unique combination
|
|
self.query('''
|
|
CREATE TABLE events(
|
|
ev_event SMALLINT UNSIGNED NOT NULL,
|
|
ev_run SMALLINT UNSIGNED NOT NULL,
|
|
ev_tick BIGINT UNSIGNED NOT NULL,
|
|
INDEX(ev_event),
|
|
INDEX(ev_run),
|
|
INDEX(ev_tick),
|
|
UNIQUE(ev_event,ev_run,ev_tick)
|
|
) TYPE=InnoDB''')
|
|
|
|
# COLUMNS:
|
|
# 'id' is the unique description id
|
|
# 'name' is the name of the event that occurred
|
|
#
|
|
# INDEXES:
|
|
# 'id' is indexed because it is the primary key and is what you use
|
|
# to look up the descriptions
|
|
# 'name' is indexed so one can find the event based on name
|
|
#
|
|
self.query('''
|
|
CREATE TABLE event_names(
|
|
en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
|
en_name VARCHAR(255) NOT NULL,
|
|
PRIMARY KEY (en_id),
|
|
UNIQUE (en_name)
|
|
) TYPE=InnoDB''')
|
|
|
|
def clean(self):
|
|
self.query('''
|
|
DELETE data
|
|
FROM data
|
|
LEFT JOIN runs ON dt_run=rn_id
|
|
WHERE rn_id IS NULL''')
|
|
|
|
self.query('''
|
|
DELETE formula_ref
|
|
FROM formula_ref
|
|
LEFT JOIN runs ON fr_run=rn_id
|
|
WHERE rn_id IS NULL''')
|
|
|
|
self.query('''
|
|
DELETE formulas
|
|
FROM formulas
|
|
LEFT JOIN formula_ref ON fm_stat=fr_stat
|
|
WHERE fr_stat IS NULL''')
|
|
|
|
self.query('''
|
|
DELETE stats
|
|
FROM stats
|
|
LEFT JOIN data ON st_id=dt_stat
|
|
WHERE dt_stat IS NULL''')
|
|
|
|
self.query('''
|
|
DELETE subdata
|
|
FROM subdata
|
|
LEFT JOIN data ON sd_stat=dt_stat
|
|
WHERE dt_stat IS NULL''')
|
|
|
|
self.query('''
|
|
DELETE bins
|
|
FROM bins
|
|
LEFT JOIN data ON bn_id=dt_bin
|
|
WHERE dt_bin IS NULL''')
|
|
|
|
self.query('''
|
|
DELETE events
|
|
FROM events
|
|
LEFT JOIN runs ON ev_run=rn_id
|
|
WHERE rn_id IS NULL''')
|
|
|
|
self.query('''
|
|
DELETE event_names
|
|
FROM event_names
|
|
LEFT JOIN events ON en_id=ev_event
|
|
WHERE ev_event IS NULL''')
|