2005-09-22 21:27:42 +02:00
|
|
|
#! /usr/bin/env python
|
|
|
|
|
|
|
|
# Copyright (c) 2004-2005 The Regents of The University of Michigan
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions are
|
|
|
|
# met: redistributions of source code must retain the above copyright
|
|
|
|
# notice, this list of conditions and the following disclaimer;
|
|
|
|
# redistributions in binary form must reproduce the above copyright
|
|
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
|
|
# documentation and/or other materials provided with the distribution;
|
|
|
|
# neither the name of the copyright holders nor the names of its
|
|
|
|
# contributors may be used to endorse or promote products derived from
|
|
|
|
# this software without specific prior written permission.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import time
|
|
|
|
import optparse
|
|
|
|
|
|
|
|
import pexpect
|
|
|
|
|
|
|
|
progname = os.path.basename(sys.argv[0])
|
|
|
|
|
|
|
|
usage = "%prog [options] command [command arguments]"
|
|
|
|
optparser = optparse.OptionParser(usage=usage)
|
|
|
|
optparser.allow_interspersed_args=False
|
|
|
|
optparser.add_option('-e', dest='stderr_file',
|
|
|
|
help='command stderr output file')
|
|
|
|
optparser.add_option('-o', dest='stdout_file',
|
|
|
|
help='command stdout output file')
|
|
|
|
optparser.add_option('-l', dest='save_log', action='store_true',
|
|
|
|
help='save qsub output log file')
|
2005-10-20 19:25:43 +02:00
|
|
|
optparser.add_option('-N', dest='job_name',
|
|
|
|
help='qsub job name')
|
|
|
|
optparser.add_option('-q', dest='dest_queue',
|
|
|
|
help='qsub destination queue')
|
|
|
|
optparser.add_option('--qwait', dest='qsub_timeout', type='int',
|
2005-09-22 21:27:42 +02:00
|
|
|
help='qsub queue wait timeout', default=30*60)
|
|
|
|
optparser.add_option('-t', dest='cmd_timeout', type='int',
|
|
|
|
help='command execution timeout', default=600*60)
|
|
|
|
|
|
|
|
(options, cmd) = optparser.parse_args()
|
|
|
|
|
|
|
|
if cmd == []:
|
|
|
|
print >>sys.stderr, "%s: missing command" % progname
|
|
|
|
sys.exit(1)
|
|
|
|
|
2005-10-20 19:25:43 +02:00
|
|
|
if not options.job_name:
|
|
|
|
options.job_name = cmd[0]
|
|
|
|
|
2005-09-22 21:27:42 +02:00
|
|
|
cwd = os.getcwd()
|
|
|
|
|
|
|
|
# Deal with systems where /n is a symlink to /.automount
|
|
|
|
if cwd.startswith('/.automount/'):
|
|
|
|
cwd = cwd.replace('/.automount/', '/n/', 1)
|
|
|
|
|
|
|
|
if not cwd.startswith('/n/poolfs/'):
|
|
|
|
print >>sys.stderr, "Error: current directory must be under /n/poolfs."
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
# The Shell class wraps pexpect.spawn with some handy functions that
|
|
|
|
# assume the thing on the other end is a Bourne/bash shell.
|
|
|
|
class Shell(pexpect.spawn):
|
|
|
|
# Regexp to match the shell prompt. We change the prompt to
|
|
|
|
# something fixed and distinctive to make it easier to match
|
|
|
|
# reliably.
|
|
|
|
prompt_re = re.compile('qdo\$ ')
|
|
|
|
|
|
|
|
def __init__(self, cmd):
|
|
|
|
# initialize base pexpect.spawn object
|
|
|
|
try:
|
|
|
|
pexpect.spawn.__init__(self, cmd)
|
|
|
|
except pexpect.ExceptionPexpect, exc:
|
|
|
|
print "%s:" % progname, exc
|
|
|
|
sys.exit(1)
|
|
|
|
# full_output accumulates the full output of the session
|
|
|
|
self.full_output = ""
|
|
|
|
self.quick_timeout = 15
|
|
|
|
# wait for a prompt, then change it
|
|
|
|
try:
|
|
|
|
self.expect('\$ ', options.qsub_timeout)
|
|
|
|
except pexpect.TIMEOUT:
|
|
|
|
print >>sys.stderr, "%s: qsub timed out." % progname
|
|
|
|
self.kill(15)
|
|
|
|
self.close(wait=True)
|
|
|
|
sys.exit(1)
|
2005-09-29 22:03:36 +02:00
|
|
|
self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
|
2005-09-22 21:27:42 +02:00
|
|
|
|
|
|
|
# version of expect that updates full_output too
|
|
|
|
def expect(self, regexp, timeout = -1):
|
|
|
|
pexpect.spawn.expect(self, regexp, timeout)
|
|
|
|
self.full_output += self.before + self.after
|
|
|
|
|
|
|
|
# Just issue a command and wait for the next prompt.
|
|
|
|
# Returns a string containing the output of the command.
|
|
|
|
def do_bare_command(self, cmd, timeout = -1):
|
|
|
|
global full_output
|
|
|
|
self.sendline(cmd)
|
|
|
|
# read back the echo of the command
|
|
|
|
self.readline()
|
|
|
|
# wait for the next prompt
|
|
|
|
self.expect(self.prompt_re, timeout)
|
|
|
|
output = self.before.rstrip()
|
|
|
|
return output
|
|
|
|
|
|
|
|
# Issue a command, then query its exit status.
|
|
|
|
# Returns a (string, int) tuple with the command output and the status.
|
|
|
|
def do_command(self, cmd, timeout = -1):
|
|
|
|
# do the command itself
|
|
|
|
output = self.do_bare_command(cmd, timeout)
|
|
|
|
# collect status
|
|
|
|
status = int(self.do_bare_command("echo $?", self.quick_timeout))
|
|
|
|
return (output, status)
|
|
|
|
|
|
|
|
# Check to see if the given directory exists.
|
|
|
|
def dir_exists(self, dirname):
|
|
|
|
(output, status) = shell.do_command('[ -d %s ]' % dirname,
|
|
|
|
self.quick_timeout)
|
|
|
|
return status == 0
|
|
|
|
|
|
|
|
|
|
|
|
# Spawn the interactive pool job.
|
|
|
|
|
|
|
|
# Hack to do link on poolfs... disabled for now since
|
|
|
|
# compiler/linker/library versioning problems between poolfs and
|
|
|
|
# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
|
|
|
|
if False and len(cmd) > 50:
|
|
|
|
shell_cmd = 'ssh -t poolfs /bin/sh -l'
|
|
|
|
print "%s: running %s on poolfs" % (progname, cmd[0])
|
|
|
|
else:
|
|
|
|
shell_cmd = 'qsub -I -S /bin/sh'
|
2005-10-20 19:25:43 +02:00
|
|
|
shell_cmd += ' -N "%s"' % options.job_name
|
|
|
|
if options.dest_queue:
|
|
|
|
shell_cmd += ' -q ' + options.dest_queue
|
2005-09-22 21:27:42 +02:00
|
|
|
|
|
|
|
shell = Shell(shell_cmd)
|
|
|
|
|
|
|
|
try:
|
|
|
|
# chdir to cwd
|
|
|
|
(output, status) = shell.do_command('cd ' + cwd)
|
|
|
|
|
|
|
|
if status != 0:
|
|
|
|
raise OSError, "Can't chdir to %s" % cwd
|
|
|
|
|
|
|
|
# wacky hack: sometimes scons will create an output directory then
|
|
|
|
# fork a job to generate files in that directory, and the job will
|
|
|
|
# get run before the directory creation propagates through NFS.
|
|
|
|
# This hack looks for a '-o' option indicating an output file and
|
|
|
|
# waits for the corresponding directory to appear if necessary.
|
|
|
|
try:
|
|
|
|
if 'cc' in cmd[0] or 'g++' in cmd[0]:
|
|
|
|
output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
|
|
|
|
elif 'm5' in cmd[0]:
|
|
|
|
output_dir = cmd[cmd.index('-d')+1]
|
|
|
|
else:
|
|
|
|
output_dir = None
|
|
|
|
except (ValueError, IndexError):
|
|
|
|
# no big deal if there's no '-o'/'-d' or if it's the final argument
|
|
|
|
output_dir = None
|
|
|
|
|
|
|
|
if output_dir:
|
|
|
|
secs_waited = 0
|
|
|
|
while not shell.dir_exists(output_dir) and secs_waited < 45:
|
|
|
|
time.sleep(5)
|
|
|
|
secs_waited += 5
|
|
|
|
if secs_waited > 10:
|
|
|
|
print "waited", secs_waited, "seconds for", output_dir
|
|
|
|
|
|
|
|
# run command
|
|
|
|
if options.stdout_file:
|
|
|
|
cmd += ['>', options.stdout_file]
|
|
|
|
if options.stderr_file:
|
|
|
|
cmd += ['2>', options.stderr_file]
|
|
|
|
try:
|
|
|
|
(output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
|
|
|
|
except pexpect.TIMEOUT:
|
|
|
|
print >>sys.stderr, "%s: command timed out after %d seconds." \
|
|
|
|
% (progname, options.cmd_timeout)
|
|
|
|
shell.sendline('~.') # qsub/ssh termination escape sequence
|
|
|
|
shell.close(wait=True)
|
|
|
|
status = 3
|
|
|
|
if output:
|
|
|
|
print output
|
|
|
|
|
|
|
|
finally:
|
|
|
|
# end job
|
|
|
|
if shell.isalive():
|
|
|
|
shell.sendline('exit')
|
|
|
|
shell.expect('qsub: job .* completed\r\n')
|
|
|
|
shell.close(wait=True)
|
|
|
|
|
|
|
|
# if there was an error, log the output even if not requested
|
|
|
|
if status != 0 or options.save_log:
|
|
|
|
log = file('qdo-log.' + str(os.getpid()), 'w')
|
|
|
|
log.write(shell.full_output)
|
|
|
|
log.close()
|
|
|
|
|
|
|
|
del shell
|
|
|
|
|
|
|
|
sys.exit(status)
|