Support for compiling and testing on pool via 'qdo' script.

For this to work qdo must be on your path.  I've copied it into
/usr/local/bin on zizzer.

build/SConstruct:
    Add BATCH and BATCH_CMD options to support compiling/testing
    on pool via qdo.

--HG--
extra : convert_revision : b7fc46465e897f7f15ed4a67f6735886917a6c4b
This commit is contained in:
Steve Reinhardt 2005-09-22 15:27:42 -04:00
parent e1c61e5b2f
commit b15a7aaf5e
2 changed files with 214 additions and 1 deletions

View file

@ -239,7 +239,9 @@ sticky_opts.AddOptions(
BoolOption('USE_MYSQL', 'Use MySQL for stats output', have_mysql),
BoolOption('USE_FENV', 'Use <fenv.h> IEEE mode control', have_fenv),
('CC', 'C compiler', os.environ.get('CC', env['CC'])),
('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX']))
('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])),
BoolOption('BATCH', 'Use batch pool for build and tests', False),
('BATCH_CMD', 'Batch pool submission command name', 'qdo')
)
# Non-sticky options only apply to the current build.
@ -354,6 +356,12 @@ for build_dir in build_dirs:
# Save sticky option settings back to file
sticky_opts.Save(options_file, env)
# Do this after we save setting back, or else we'll tack on an
# extra 'qdo' every time we run scons.
if env['BATCH']:
env['CC'] = env['BATCH_CMD'] + ' ' + env['CC']
env['CXX'] = env['BATCH_CMD'] + ' ' + env['CXX']
# The m5/SConscript file sets up the build rules in 'env' according
# to the configured options. It returns a list of environments,
# one for each variant build (debug, opt, etc.)

205
util/qdo Executable file
View file

@ -0,0 +1,205 @@
#! /usr/bin/env python
# Copyright (c) 2004-2005 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
import os
import re
import time
import optparse
import pexpect
progname = os.path.basename(sys.argv[0])
usage = "%prog [options] command [command arguments]"
optparser = optparse.OptionParser(usage=usage)
optparser.allow_interspersed_args=False
optparser.add_option('-e', dest='stderr_file',
help='command stderr output file')
optparser.add_option('-o', dest='stdout_file',
help='command stdout output file')
optparser.add_option('-l', dest='save_log', action='store_true',
help='save qsub output log file')
optparser.add_option('-q', dest='qsub_timeout', type='int',
help='qsub queue wait timeout', default=30*60)
optparser.add_option('-t', dest='cmd_timeout', type='int',
help='command execution timeout', default=600*60)
(options, cmd) = optparser.parse_args()
if cmd == []:
print >>sys.stderr, "%s: missing command" % progname
sys.exit(1)
cwd = os.getcwd()
# Deal with systems where /n is a symlink to /.automount
if cwd.startswith('/.automount/'):
cwd = cwd.replace('/.automount/', '/n/', 1)
if not cwd.startswith('/n/poolfs/'):
print >>sys.stderr, "Error: current directory must be under /n/poolfs."
sys.exit(1)
# The Shell class wraps pexpect.spawn with some handy functions that
# assume the thing on the other end is a Bourne/bash shell.
class Shell(pexpect.spawn):
# Regexp to match the shell prompt. We change the prompt to
# something fixed and distinctive to make it easier to match
# reliably.
prompt_re = re.compile('qdo\$ ')
def __init__(self, cmd):
# initialize base pexpect.spawn object
try:
pexpect.spawn.__init__(self, cmd)
except pexpect.ExceptionPexpect, exc:
print "%s:" % progname, exc
sys.exit(1)
# full_output accumulates the full output of the session
self.full_output = ""
self.quick_timeout = 15
# wait for a prompt, then change it
try:
self.expect('\$ ', options.qsub_timeout)
except pexpect.TIMEOUT:
print >>sys.stderr, "%s: qsub timed out." % progname
self.kill(15)
self.close(wait=True)
sys.exit(1)
self.do_command('PS1="qdo$ "')
# version of expect that updates full_output too
def expect(self, regexp, timeout = -1):
pexpect.spawn.expect(self, regexp, timeout)
self.full_output += self.before + self.after
# Just issue a command and wait for the next prompt.
# Returns a string containing the output of the command.
def do_bare_command(self, cmd, timeout = -1):
global full_output
self.sendline(cmd)
# read back the echo of the command
self.readline()
# wait for the next prompt
self.expect(self.prompt_re, timeout)
output = self.before.rstrip()
return output
# Issue a command, then query its exit status.
# Returns a (string, int) tuple with the command output and the status.
def do_command(self, cmd, timeout = -1):
# do the command itself
output = self.do_bare_command(cmd, timeout)
# collect status
status = int(self.do_bare_command("echo $?", self.quick_timeout))
return (output, status)
# Check to see if the given directory exists.
def dir_exists(self, dirname):
(output, status) = shell.do_command('[ -d %s ]' % dirname,
self.quick_timeout)
return status == 0
# Spawn the interactive pool job.
# Hack to do link on poolfs... disabled for now since
# compiler/linker/library versioning problems between poolfs and
# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
if False and len(cmd) > 50:
shell_cmd = 'ssh -t poolfs /bin/sh -l'
print "%s: running %s on poolfs" % (progname, cmd[0])
else:
shell_cmd = 'qsub -I -S /bin/sh'
shell = Shell(shell_cmd)
try:
# chdir to cwd
(output, status) = shell.do_command('cd ' + cwd)
if status != 0:
raise OSError, "Can't chdir to %s" % cwd
# wacky hack: sometimes scons will create an output directory then
# fork a job to generate files in that directory, and the job will
# get run before the directory creation propagates through NFS.
# This hack looks for a '-o' option indicating an output file and
# waits for the corresponding directory to appear if necessary.
try:
if 'cc' in cmd[0] or 'g++' in cmd[0]:
output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
elif 'm5' in cmd[0]:
output_dir = cmd[cmd.index('-d')+1]
else:
output_dir = None
except (ValueError, IndexError):
# no big deal if there's no '-o'/'-d' or if it's the final argument
output_dir = None
if output_dir:
secs_waited = 0
while not shell.dir_exists(output_dir) and secs_waited < 45:
time.sleep(5)
secs_waited += 5
if secs_waited > 10:
print "waited", secs_waited, "seconds for", output_dir
# run command
if options.stdout_file:
cmd += ['>', options.stdout_file]
if options.stderr_file:
cmd += ['2>', options.stderr_file]
try:
(output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
except pexpect.TIMEOUT:
print >>sys.stderr, "%s: command timed out after %d seconds." \
% (progname, options.cmd_timeout)
shell.sendline('~.') # qsub/ssh termination escape sequence
shell.close(wait=True)
status = 3
if output:
print output
finally:
# end job
if shell.isalive():
shell.sendline('exit')
shell.expect('qsub: job .* completed\r\n')
shell.close(wait=True)
# if there was an error, log the output even if not requested
if status != 0 or options.save_log:
log = file('qdo-log.' + str(os.getpid()), 'w')
log.write(shell.full_output)
log.close()
del shell
sys.exit(status)