gem5/util/stats/dbinit.py

# Copyright (c) 2003-2004 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert

import MySQLdb

class MyDB(object):
    def __init__(self, options):
        self.name = options.db
        self.host = options.host
        self.user = options.user
        self.passwd = options.passwd
        self.mydb = None
        self.cursor = None

    def admin(self):
        self.close()
        self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
                                    passwd=self.passwd)
        self.cursor = self.mydb.cursor()

    def connect(self):
        self.close()
        self.mydb = MySQLdb.connect(db=self.name, host=self.host,
                                    user=self.user, passwd=self.passwd)
        self.cursor = self.mydb.cursor()

    def close(self):
        if self.mydb is not None:
            self.mydb.close()
        self.cursor = None

    def query(self, sql):
        self.cursor.execute(sql)

    def drop(self):
        self.query('DROP DATABASE IF EXISTS %s' % self.name)

    def create(self):
        self.query('CREATE DATABASE %s' % self.name)

    def populate(self):
        #
        # Each run (or simulation) gets its own entry in the runs table to
        # group stats by where they were generated
        #
        # COLUMNS:
        #   'id' is a unique identifier for each run to be used in other
        #       tables.
        #   'name' is the user designated name for the data generated.  It is
        #       configured in the simulator.
        #   'user' identifies the user that generated the data for the given
        #       run.
        #   'project' another name to identify runs for a specific goal
        #   'date' is a timestamp for when the data was generated.  It can be
        #       used to easily expire data that was generated in the past.
        #   'expire' is a timestamp for when the data should be removed from
        #       the database so we don't have years worth of junk.
        #
        # INDEXES:
        #   'run' is indexed so you can find out details of a run if the run
        #       was retreived from the data table.
        #   'name' is indexed so that two all run names are forced to be unique
        #
        self.query('''
        CREATE TABLE runs(
            rn_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
            rn_name	VARCHAR(200)		NOT NULL,
            rn_sample	VARCHAR(32)		NOT NULL,
            rn_user	VARCHAR(32)		NOT NULL,
            rn_project	VARCHAR(100)            NOT NULL,
            rn_date	TIMESTAMP		NOT NULL,
            rn_expire	TIMESTAMP               NOT NULL,
            PRIMARY KEY (rn_id),
            UNIQUE (rn_name,rn_sample)
        ) TYPE=InnoDB''')

        #
        # The stat table gives us all of the data for a particular stat.
        #
        # COLUMNS:
        #   'stat' is a unique identifier for each stat to be used in other
        #       tables for references.
        #   'name' is simply the simulator derived name for a given
        #       statistic.
        #   'descr' is the description of the statistic and what it tells
        #       you.
        #   'type' defines what the stat tells you.  Types are:
        #       SCALAR: A simple scalar statistic that holds one value
        #       VECTOR: An array of statistic values.  Such a something that
        #           is generated per-thread.  Vectors exist to give averages,
        #	     pdfs, cdfs, means, standard deviations, etc across the
        #           stat values.
        #       DIST: Is a distribution of data.  When the statistic value is
        #	     sampled, its value is counted in a particular bucket.
        #           Useful for keeping track of utilization of a resource.
        #           (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
        #       VECTORDIST: Can be used when the distribution needs to be
        #	     factored out into a per-thread distribution of data for
        #	     example.  It can still be summed across threads to find
        #           the total distribution.
        #       VECTOR2D: Can be used when you have a stat that is not only
        #           per-thread, but it is per-something else.  Like
        #           per-message type.
        #       FORMULA: This statistic is a formula, and its data must be
        #	     looked up in the formula table, for indicating how to
        #           present its values.
        #   'subdata' is potentially used by any of the vector types to
        #       give a specific name to all of the data elements within a
        #       stat.
        #   'print' indicates whether this stat should be printed ever.
        #       (Unnamed stats don't usually get printed)
        #   'prereq' only print the stat if the prereq is not zero.
        #   'prec' number of decimal places to print
        #   'nozero' don't print zero values
        #   'nonan' don't print NaN values
        #   'total' for vector type stats, print the total.
        #   'pdf' for vector type stats, print the pdf.
        #   'cdf' for vector type stats, print the cdf.
        #
        #   The Following are for dist type stats:
        #   'min' is the minimum bucket value. Anything less is an underflow.
        #   'max' is the maximum bucket value. Anything more is an overflow.
        #   'bktsize' is the approximate number of entries in each bucket.
        #   'size' is the number of buckets. equal to (min/max)/bktsize.
        #
        # INDEXES:
        #   'stat' is indexed so that you can find out details about a stat
        #       if the stat id was retrieved from the data table.
        #   'name' is indexed so that you can simply look up data about a
        #       named stat.
        #
        self.query('''
        CREATE TABLE stats(
            st_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
            st_name	VARCHAR(255)		NOT NULL,
            st_descr	TEXT			NOT NULL,
            st_type	ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
                "VECTOR2D", "FORMULA")	NOT NULL,
            st_print	BOOL			NOT NULL,
            st_prereq	SMALLINT UNSIGNED	NOT NULL,
            st_prec	TINYINT			NOT NULL,
            st_nozero	BOOL			NOT NULL,
            st_nonan	BOOL			NOT NULL,
            st_total	BOOL			NOT NULL,
            st_pdf	BOOL			NOT NULL,
            st_cdf	BOOL			NOT NULL,
            st_min	DOUBLE			NOT NULL,
            st_max	DOUBLE			NOT NULL,
            st_bktsize	DOUBLE			NOT NULL,
            st_size	SMALLINT UNSIGNED	NOT NULL,
            PRIMARY KEY (st_id),
            UNIQUE (st_name)
        ) TYPE=InnoDB''')

        #
        # This is the main table of data for stats.
        #
        # COLUMNS:
        #   'stat' refers to the stat field given in the stat table.
        #
        #   'x' referrs to the first dimension of a multi-dimensional stat. For
        #       a vector, x will start at 0 and increase for each vector
        #       element.
        #       For a distribution:
        #       -1: sum (for calculating standard deviation)
        #       -2: sum of squares (for calculating standard deviation)
        #       -3: total number of samples taken (for calculating
        #           standard deviation)
        #       -4: minimum value
        #       -5: maximum value
        #       -6: underflow
        #       -7: overflow
        #   'y' is used by a VECTORDIST and the VECTOR2D to describe the second
        #       dimension.
        #   'run' is the run that the data was generated from.  Details up in
        #       the run table
        #   'tick' is a timestamp generated by the simulator.
        #   'data' is the actual stat value.
        #
        # INDEXES:
        #   'stat' is indexed so that a user can find all of the data for a
        #       particular stat. It is not unique, because that specific stat
        #       can be found in many runs and samples, in addition to
        #       having entries for the mulidimensional cases.
        #   'run' is indexed to allow a user to remove all of the data for a
        #       particular execution run.  It can also be used to allow the
        #       user to print out all of the data for a given run.
        #
        self.query('''
        CREATE TABLE data(
            dt_stat	SMALLINT UNSIGNED	NOT NULL,
            dt_x	SMALLINT		NOT NULL,
            dt_y	SMALLINT		NOT NULL,
            dt_run	SMALLINT UNSIGNED	NOT NULL,
            dt_tick	BIGINT UNSIGNED		NOT NULL,
            dt_data	DOUBLE			NOT NULL,
            INDEX (dt_stat),
            INDEX (dt_run),
            UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick)
        ) TYPE=InnoDB;''')

        #
        # Names and descriptions for multi-dimensional stats (vectors, etc.)
        # are stored here instead of having their own entry in the statistics
        # table. This allows all parts of a single stat to easily share a
        # single id.
        #
        # COLUMNS:
        #   'stat' is the unique stat identifier from the stat table.
        #   'x' is the first dimension for multi-dimensional stats
        #       corresponding to the data table above.
        #   'y' is the second dimension for multi-dimensional stats
        #       corresponding to the data table above.
        #   'name' is the specific subname for the unique stat,x,y combination.
        #   'descr' is the specific description for the uniqe stat,x,y
        #        combination.
        #
        # INDEXES:
        #   'stat' is indexed so you can get the subdata for a specific stat.
        #
        self.query('''
        CREATE TABLE subdata(
            sd_stat	SMALLINT UNSIGNED	NOT NULL,
            sd_x	SMALLINT		NOT NULL,
            sd_y	SMALLINT		NOT NULL,
            sd_name	VARCHAR(255)		NOT NULL,
            sd_descr	TEXT,
            UNIQUE (sd_stat,sd_x,sd_y)
        ) TYPE=InnoDB''')


        #
        # The formula table is maintained separately from the data table
        # because formula data, unlike other stat data cannot be represented
        # there.
        #
        # COLUMNS:
        #   'stat' refers to the stat field generated in the stat table.
        #   'formula' is the actual string representation of the formula
        #       itself.
        #
        # INDEXES:
        #   'stat' is indexed so that you can just look up a formula.
        #
        self.query('''
        CREATE TABLE formulas(
            fm_stat	SMALLINT UNSIGNED	NOT NULL,
            fm_formula	BLOB			NOT NULL,
            PRIMARY KEY(fm_stat)
        ) TYPE=InnoDB''')

        #
        # Each stat used in each formula is kept in this table.  This way, if
        # you want to print out a particular formula, you can simply find out
        # which stats you need by looking in this table.  Additionally, when
        # you remove a stat from the stats table and data table, you remove
        # any references to the formula in this table.  When a formula is no
        # longer referred to, you remove its entry.
        #
        # COLUMNS:
        #   'stat' is the stat id from the stat table above.
        #   'child' is the stat id of a stat that is used for this formula.
        #       There may be many children for any given 'stat' (formula)
        #
        # INDEXES:
        #   'stat' is indexed so you can look up all of the children for a
        #       particular stat.
        #   'child' is indexed so that you can remove an entry when a stat is
        #       removed.
        #
        self.query('''
        CREATE TABLE formula_ref(
            fr_stat	SMALLINT UNSIGNED	NOT NULL,
            fr_run	SMALLINT UNSIGNED	NOT NULL,
            UNIQUE (fr_stat,fr_run),
            INDEX (fr_stat),
            INDEX (fr_run)
        ) TYPE=InnoDB''')

        # COLUMNS:
        #   'event' is the unique event id from the event_desc table
        #   'run' is simulation run id that this event took place in
        #   'tick' is the tick when the event happened
        #
        # INDEXES:
        #   'event' is indexed so you can look up all occurences of a
        #       specific event
        #   'run' is indexed so you can find all events in a run
        #   'tick' is indexed because we want the unique thing anyway
        #   'event,run,tick' is unique combination
        self.query('''
        CREATE TABLE events(
            ev_event	SMALLINT UNSIGNED	NOT NULL,
            ev_run	SMALLINT UNSIGNED	NOT NULL,
            ev_tick	BIGINT   UNSIGNED       NOT NULL,
            INDEX(ev_event),
            INDEX(ev_run),
            INDEX(ev_tick),
            UNIQUE(ev_event,ev_run,ev_tick)
        ) TYPE=InnoDB''')

        # COLUMNS:
        #   'id' is the unique description id
        #   'name' is the name of the event that occurred
        #
        # INDEXES:
        #   'id' is indexed because it is the primary key and is what you use
        #       to look up the descriptions
        #   'name' is indexed so one can find the event based on name
        #
        self.query('''
        CREATE TABLE event_names(
            en_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
            en_name	VARCHAR(255)		NOT NULL,
            PRIMARY KEY (en_id),
            UNIQUE (en_name)
        ) TYPE=InnoDB''')

    def clean(self):
        self.query('''
        DELETE data
        FROM data
        LEFT JOIN runs ON dt_run=rn_id
        WHERE rn_id IS NULL''')

        self.query('''
        DELETE formula_ref
        FROM formula_ref
        LEFT JOIN runs ON fr_run=rn_id
        WHERE rn_id IS NULL''')

        self.query('''
        DELETE formulas
        FROM formulas
        LEFT JOIN formula_ref ON fm_stat=fr_stat
        WHERE fr_stat IS NULL''')

        self.query('''
        DELETE stats
        FROM stats
        LEFT JOIN data ON st_id=dt_stat
        WHERE dt_stat IS NULL''')

        self.query('''
        DELETE subdata
        FROM subdata
        LEFT JOIN data ON sd_stat=dt_stat
        WHERE dt_stat IS NULL''')

        self.query('''
        DELETE events
        FROM events
        LEFT JOIN runs ON ev_run=rn_id
        WHERE rn_id IS NULL''')

        self.query('''
        DELETE event_names
        FROM event_names
        LEFT JOIN events ON en_id=ev_event
        WHERE ev_event IS NULL''')
added copyright kern/linux/sched.hh: kern/linux/thread_info.hh: got rid of everything but exactly what we needed util/categories.py: newest version from one of my repositories --HG-- extra : convert_revision : c4328e5938d421d60493c0da07022bfa9e92c404 2005-06-05 10:08:05 +02:00			`# Copyright (c) 2003-2004 The Regents of The University of Michigan`
			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are`
			`# met: redistributions of source code must retain the above copyright`
			`# notice, this list of conditions and the following disclaimer;`
			`# redistributions in binary form must reproduce the above copyright`
			`# notice, this list of conditions and the following disclaimer in the`
			`# documentation and/or other materials provided with the distribution;`
			`# neither the name of the copyright holders nor the names of its`
			`# contributors may be used to endorse or promote products derived from`
			`# this software without specific prior written permission.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
Updated Authors from bk prs info --HG-- extra : convert_revision : 77f475b156d81c03a2811818fa23593d5615c685 2006-06-01 01:26:56 +02:00			`#`
			`# Authors: Nathan Binkert`
added copyright kern/linux/sched.hh: kern/linux/thread_info.hh: got rid of everything but exactly what we needed util/categories.py: newest version from one of my repositories --HG-- extra : convert_revision : c4328e5938d421d60493c0da07022bfa9e92c404 2005-06-05 10:08:05 +02:00
Totally re-do/reorganize the python part of the statistics code Make the database creation/removal/cleanup code use python Make formulas work with the database Add support to do some graphing, but needs more work Still need to work on vectors, 2d vectors, dists and vectordists --HG-- extra : convert_revision : 1a88320dcc036a3751e8a036770766dce76a568c 2004-08-10 03:20:52 +02:00			`import MySQLdb`

			`class MyDB(object):`
			`def __init__(self, options):`
			`self.name = options.db`
			`self.host = options.host`
			`self.user = options.user`
			`self.passwd = options.passwd`
			`self.mydb = None`
			`self.cursor = None`

			`def admin(self):`
			`self.close()`
			`self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,`
			`passwd=self.passwd)`
			`self.cursor = self.mydb.cursor()`

			`def connect(self):`
			`self.close()`
			`self.mydb = MySQLdb.connect(db=self.name, host=self.host,`
			`user=self.user, passwd=self.passwd)`
			`self.cursor = self.mydb.cursor()`

			`def close(self):`
			`if self.mydb is not None:`
			`self.mydb.close()`
			`self.cursor = None`

			`def query(self, sql):`
			`self.cursor.execute(sql)`

			`def drop(self):`
			`self.query('DROP DATABASE IF EXISTS %s' % self.name)`

			`def create(self):`
			`self.query('CREATE DATABASE %s' % self.name)`

			`def populate(self):`
			`#`
			`# Each run (or simulation) gets its own entry in the runs table to`
			`# group stats by where they were generated`
			`#`
			`# COLUMNS:`
			`# 'id' is a unique identifier for each run to be used in other`
			`# tables.`
			`# 'name' is the user designated name for the data generated. It is`
			`# configured in the simulator.`
			`# 'user' identifies the user that generated the data for the given`
			`# run.`
			`# 'project' another name to identify runs for a specific goal`
			`# 'date' is a timestamp for when the data was generated. It can be`
			`# used to easily expire data that was generated in the past.`
			`# 'expire' is a timestamp for when the data should be removed from`
			`# the database so we don't have years worth of junk.`
			`#`
			`# INDEXES:`
			`# 'run' is indexed so you can find out details of a run if the run`
			`# was retreived from the data table.`
			`# 'name' is indexed so that two all run names are forced to be unique`
			`#`
			`self.query('''`
			`CREATE TABLE runs(`
			`rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,`
			`rn_name VARCHAR(200) NOT NULL,`
			`rn_sample VARCHAR(32) NOT NULL,`
			`rn_user VARCHAR(32) NOT NULL,`
			`rn_project VARCHAR(100) NOT NULL,`
			`rn_date TIMESTAMP NOT NULL,`
			`rn_expire TIMESTAMP NOT NULL,`
			`PRIMARY KEY (rn_id),`
			`UNIQUE (rn_name,rn_sample)`
			`) TYPE=InnoDB''')`

			`#`
			`# The stat table gives us all of the data for a particular stat.`
			`#`
			`# COLUMNS:`
			`# 'stat' is a unique identifier for each stat to be used in other`
			`# tables for references.`
			`# 'name' is simply the simulator derived name for a given`
			`# statistic.`
			`# 'descr' is the description of the statistic and what it tells`
			`# you.`
			`# 'type' defines what the stat tells you. Types are:`
			`# SCALAR: A simple scalar statistic that holds one value`
			`# VECTOR: An array of statistic values. Such a something that`
			`# is generated per-thread. Vectors exist to give averages,`
			`# pdfs, cdfs, means, standard deviations, etc across the`
			`# stat values.`
			`# DIST: Is a distribution of data. When the statistic value is`
			`# sampled, its value is counted in a particular bucket.`
			`# Useful for keeping track of utilization of a resource.`
			`# (e.g. fraction of time it is 25% used vs. 50% vs. 100%)`
			`# VECTORDIST: Can be used when the distribution needs to be`
			`# factored out into a per-thread distribution of data for`
			`# example. It can still be summed across threads to find`
			`# the total distribution.`
			`# VECTOR2D: Can be used when you have a stat that is not only`
			`# per-thread, but it is per-something else. Like`
			`# per-message type.`
			`# FORMULA: This statistic is a formula, and its data must be`
			`# looked up in the formula table, for indicating how to`
			`# present its values.`
			`# 'subdata' is potentially used by any of the vector types to`
			`# give a specific name to all of the data elements within a`
			`# stat.`
			`# 'print' indicates whether this stat should be printed ever.`
			`# (Unnamed stats don't usually get printed)`
			`# 'prereq' only print the stat if the prereq is not zero.`
			`# 'prec' number of decimal places to print`
			`# 'nozero' don't print zero values`
			`# 'nonan' don't print NaN values`
			`# 'total' for vector type stats, print the total.`
			`# 'pdf' for vector type stats, print the pdf.`
			`# 'cdf' for vector type stats, print the cdf.`
			`#`
			`# The Following are for dist type stats:`
			`# 'min' is the minimum bucket value. Anything less is an underflow.`
			`# 'max' is the maximum bucket value. Anything more is an overflow.`
			`# 'bktsize' is the approximate number of entries in each bucket.`
			`# 'size' is the number of buckets. equal to (min/max)/bktsize.`
			`#`
			`# INDEXES:`
			`# 'stat' is indexed so that you can find out details about a stat`
			`# if the stat id was retrieved from the data table.`
			`# 'name' is indexed so that you can simply look up data about a`
			`# named stat.`
			`#`
			`self.query('''`
			`CREATE TABLE stats(`
			`st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,`
			`st_name VARCHAR(255) NOT NULL,`
			`st_descr TEXT NOT NULL,`
			`st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",`
			`"VECTOR2D", "FORMULA") NOT NULL,`
			`st_print BOOL NOT NULL,`
			`st_prereq SMALLINT UNSIGNED NOT NULL,`
			`st_prec TINYINT NOT NULL,`
			`st_nozero BOOL NOT NULL,`
			`st_nonan BOOL NOT NULL,`
			`st_total BOOL NOT NULL,`
			`st_pdf BOOL NOT NULL,`
			`st_cdf BOOL NOT NULL,`
			`st_min DOUBLE NOT NULL,`
			`st_max DOUBLE NOT NULL,`
			`st_bktsize DOUBLE NOT NULL,`
			`st_size SMALLINT UNSIGNED NOT NULL,`
			`PRIMARY KEY (st_id),`
			`UNIQUE (st_name)`
			`) TYPE=InnoDB''')`

			`#`
			`# This is the main table of data for stats.`
			`#`
			`# COLUMNS:`
			`# 'stat' refers to the stat field given in the stat table.`
			`#`
			`# 'x' referrs to the first dimension of a multi-dimensional stat. For`
			`# a vector, x will start at 0 and increase for each vector`
			`# element.`
			`# For a distribution:`
			`# -1: sum (for calculating standard deviation)`
			`# -2: sum of squares (for calculating standard deviation)`
			`# -3: total number of samples taken (for calculating`
			`# standard deviation)`
			`# -4: minimum value`
			`# -5: maximum value`
			`# -6: underflow`
			`# -7: overflow`
			`# 'y' is used by a VECTORDIST and the VECTOR2D to describe the second`
			`# dimension.`
			`# 'run' is the run that the data was generated from. Details up in`
			`# the run table`
			`# 'tick' is a timestamp generated by the simulator.`
			`# 'data' is the actual stat value.`
			`#`
			`# INDEXES:`
			`# 'stat' is indexed so that a user can find all of the data for a`
			`# particular stat. It is not unique, because that specific stat`
Remove all binning stuff --HG-- extra : convert_revision : 6da2b3b0b6c2824f2064d42670fd8383edb7c718 2006-06-10 19:08:43 +02:00			`# can be found in many runs and samples, in addition to`
Totally re-do/reorganize the python part of the statistics code Make the database creation/removal/cleanup code use python Make formulas work with the database Add support to do some graphing, but needs more work Still need to work on vectors, 2d vectors, dists and vectordists --HG-- extra : convert_revision : 1a88320dcc036a3751e8a036770766dce76a568c 2004-08-10 03:20:52 +02:00			`# having entries for the mulidimensional cases.`
			`# 'run' is indexed to allow a user to remove all of the data for a`
			`# particular execution run. It can also be used to allow the`
			`# user to print out all of the data for a given run.`
			`#`
			`self.query('''`
			`CREATE TABLE data(`
			`dt_stat SMALLINT UNSIGNED NOT NULL,`
			`dt_x SMALLINT NOT NULL,`
			`dt_y SMALLINT NOT NULL,`
			`dt_run SMALLINT UNSIGNED NOT NULL,`
			`dt_tick BIGINT UNSIGNED NOT NULL,`
			`dt_data DOUBLE NOT NULL,`
			`INDEX (dt_stat),`
			`INDEX (dt_run),`
Remove all binning stuff --HG-- extra : convert_revision : 6da2b3b0b6c2824f2064d42670fd8383edb7c718 2006-06-10 19:08:43 +02:00			`UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick)`
Totally re-do/reorganize the python part of the statistics code Make the database creation/removal/cleanup code use python Make formulas work with the database Add support to do some graphing, but needs more work Still need to work on vectors, 2d vectors, dists and vectordists --HG-- extra : convert_revision : 1a88320dcc036a3751e8a036770766dce76a568c 2004-08-10 03:20:52 +02:00			`) TYPE=InnoDB;''')`

			`#`
			`# Names and descriptions for multi-dimensional stats (vectors, etc.)`
			`# are stored here instead of having their own entry in the statistics`
			`# table. This allows all parts of a single stat to easily share a`
			`# single id.`
			`#`
			`# COLUMNS:`
			`# 'stat' is the unique stat identifier from the stat table.`
			`# 'x' is the first dimension for multi-dimensional stats`
			`# corresponding to the data table above.`
			`# 'y' is the second dimension for multi-dimensional stats`
			`# corresponding to the data table above.`
			`# 'name' is the specific subname for the unique stat,x,y combination.`
			`# 'descr' is the specific description for the uniqe stat,x,y`
			`# combination.`
			`#`
			`# INDEXES:`
			`# 'stat' is indexed so you can get the subdata for a specific stat.`
			`#`
			`self.query('''`
			`CREATE TABLE subdata(`
			`sd_stat SMALLINT UNSIGNED NOT NULL,`
			`sd_x SMALLINT NOT NULL,`
			`sd_y SMALLINT NOT NULL,`
			`sd_name VARCHAR(255) NOT NULL,`
			`sd_descr TEXT,`
			`UNIQUE (sd_stat,sd_x,sd_y)`
			`) TYPE=InnoDB''')`


			`#`
			`# The formula table is maintained separately from the data table`
			`# because formula data, unlike other stat data cannot be represented`
			`# there.`
			`#`
			`# COLUMNS:`
			`# 'stat' refers to the stat field generated in the stat table.`
			`# 'formula' is the actual string representation of the formula`
			`# itself.`
			`#`
			`# INDEXES:`
			`# 'stat' is indexed so that you can just look up a formula.`
			`#`
			`self.query('''`
			`CREATE TABLE formulas(`
			`fm_stat SMALLINT UNSIGNED NOT NULL,`
			`fm_formula BLOB NOT NULL,`
			`PRIMARY KEY(fm_stat)`
			`) TYPE=InnoDB''')`

			`#`
			`# Each stat used in each formula is kept in this table. This way, if`
			`# you want to print out a particular formula, you can simply find out`
			`# which stats you need by looking in this table. Additionally, when`
			`# you remove a stat from the stats table and data table, you remove`
			`# any references to the formula in this table. When a formula is no`
			`# longer referred to, you remove its entry.`
			`#`
			`# COLUMNS:`
			`# 'stat' is the stat id from the stat table above.`
			`# 'child' is the stat id of a stat that is used for this formula.`
			`# There may be many children for any given 'stat' (formula)`
			`#`
			`# INDEXES:`
			`# 'stat' is indexed so you can look up all of the children for a`
			`# particular stat.`
			`# 'child' is indexed so that you can remove an entry when a stat is`
			`# removed.`
			`#`
			`self.query('''`
			`CREATE TABLE formula_ref(`
			`fr_stat SMALLINT UNSIGNED NOT NULL,`
			`fr_run SMALLINT UNSIGNED NOT NULL,`
			`UNIQUE (fr_stat,fr_run),`
			`INDEX (fr_stat),`
			`INDEX (fr_run)`
			`) TYPE=InnoDB''')`

			`# COLUMNS:`
			`# 'event' is the unique event id from the event_desc table`
			`# 'run' is simulation run id that this event took place in`
			`# 'tick' is the tick when the event happened`
			`#`
			`# INDEXES:`
			`# 'event' is indexed so you can look up all occurences of a`
			`# specific event`
			`# 'run' is indexed so you can find all events in a run`
			`# 'tick' is indexed because we want the unique thing anyway`
			`# 'event,run,tick' is unique combination`
			`self.query('''`
			`CREATE TABLE events(`
			`ev_event SMALLINT UNSIGNED NOT NULL,`
			`ev_run SMALLINT UNSIGNED NOT NULL,`
			`ev_tick BIGINT UNSIGNED NOT NULL,`
			`INDEX(ev_event),`
			`INDEX(ev_run),`
			`INDEX(ev_tick),`
			`UNIQUE(ev_event,ev_run,ev_tick)`
			`) TYPE=InnoDB''')`

			`# COLUMNS:`
			`# 'id' is the unique description id`
			`# 'name' is the name of the event that occurred`
			`#`
			`# INDEXES:`
			`# 'id' is indexed because it is the primary key and is what you use`
			`# to look up the descriptions`
			`# 'name' is indexed so one can find the event based on name`
			`#`
			`self.query('''`
			`CREATE TABLE event_names(`
			`en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,`
			`en_name VARCHAR(255) NOT NULL,`
			`PRIMARY KEY (en_id),`
			`UNIQUE (en_name)`
			`) TYPE=InnoDB''')`

			`def clean(self):`
			`self.query('''`
			`DELETE data`
			`FROM data`
			`LEFT JOIN runs ON dt_run=rn_id`
			`WHERE rn_id IS NULL''')`

			`self.query('''`
			`DELETE formula_ref`
			`FROM formula_ref`
			`LEFT JOIN runs ON fr_run=rn_id`
			`WHERE rn_id IS NULL''')`

			`self.query('''`
			`DELETE formulas`
			`FROM formulas`
			`LEFT JOIN formula_ref ON fm_stat=fr_stat`
			`WHERE fr_stat IS NULL''')`

			`self.query('''`
			`DELETE stats`
			`FROM stats`
			`LEFT JOIN data ON st_id=dt_stat`
			`WHERE dt_stat IS NULL''')`

			`self.query('''`
			`DELETE subdata`
			`FROM subdata`
			`LEFT JOIN data ON sd_stat=dt_stat`
			`WHERE dt_stat IS NULL''')`

			`self.query('''`
			`DELETE events`
			`FROM events`
			`LEFT JOIN runs ON ev_run=rn_id`
			`WHERE rn_id IS NULL''')`

			`self.query('''`
			`DELETE event_names`
			`FROM event_names`
			`LEFT JOIN events ON en_id=ev_event`
			`WHERE ev_event IS NULL''')`