#! /usr/bin/env python # Copyright (c) 2004-2005 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer; # redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution; # neither the name of the copyright holders nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import sys import os import re import time import optparse import pexpect progname = os.path.basename(sys.argv[0]) usage = "%prog [options] command [command arguments]" optparser = optparse.OptionParser(usage=usage) optparser.allow_interspersed_args=False optparser.add_option('-e', dest='stderr_file', help='command stderr output file') optparser.add_option('-o', dest='stdout_file', help='command stdout output file') optparser.add_option('-l', dest='save_log', action='store_true', help='save qsub output log file') optparser.add_option('-N', dest='job_name', help='qsub job name') optparser.add_option('-q', dest='dest_queue', help='qsub destination queue') optparser.add_option('--qwait', dest='qsub_timeout', type='int', help='qsub queue wait timeout', default=30*60) optparser.add_option('-t', dest='cmd_timeout', type='int', help='command execution timeout', default=600*60) (options, cmd) = optparser.parse_args() if cmd == []: print >>sys.stderr, "%s: missing command" % progname sys.exit(1) if not options.job_name: options.job_name = cmd[0] cwd = os.getcwd() # Deal with systems where /n is a symlink to /.automount if cwd.startswith('/.automount/'): cwd = cwd.replace('/.automount/', '/n/', 1) if not cwd.startswith('/n/poolfs/'): print >>sys.stderr, "Error: current directory must be under /n/poolfs." sys.exit(1) # The Shell class wraps pexpect.spawn with some handy functions that # assume the thing on the other end is a Bourne/bash shell. class Shell(pexpect.spawn): # Regexp to match the shell prompt. We change the prompt to # something fixed and distinctive to make it easier to match # reliably. prompt_re = re.compile('qdo\$ ') def __init__(self, cmd): # initialize base pexpect.spawn object try: pexpect.spawn.__init__(self, cmd) except pexpect.ExceptionPexpect, exc: print "%s:" % progname, exc sys.exit(1) # full_output accumulates the full output of the session self.full_output = "" self.quick_timeout = 15 # wait for a prompt, then change it try: self.expect('\$ ', options.qsub_timeout) except pexpect.TIMEOUT: print >>sys.stderr, "%s: qsub timed out." % progname self.kill(15) self.close(wait=True) sys.exit(1) self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') # version of expect that updates full_output too def expect(self, regexp, timeout = -1): pexpect.spawn.expect(self, regexp, timeout) self.full_output += self.before + self.after # Just issue a command and wait for the next prompt. # Returns a string containing the output of the command. def do_bare_command(self, cmd, timeout = -1): global full_output self.sendline(cmd) # read back the echo of the command self.readline() # wait for the next prompt self.expect(self.prompt_re, timeout) output = self.before.rstrip() return output # Issue a command, then query its exit status. # Returns a (string, int) tuple with the command output and the status. def do_command(self, cmd, timeout = -1): # do the command itself output = self.do_bare_command(cmd, timeout) # collect status status = int(self.do_bare_command("echo $?", self.quick_timeout)) return (output, status) # Check to see if the given directory exists. def dir_exists(self, dirname): (output, status) = shell.do_command('[ -d %s ]' % dirname, self.quick_timeout) return status == 0 # Spawn the interactive pool job. # Hack to do link on poolfs... disabled for now since # compiler/linker/library versioning problems between poolfs and # nodes. May never work since poolfs is x86-64 and nodes are 32-bit. if False and len(cmd) > 50: shell_cmd = 'ssh -t poolfs /bin/sh -l' print "%s: running %s on poolfs" % (progname, cmd[0]) else: shell_cmd = 'qsub -I -S /bin/sh' shell_cmd += ' -N "%s"' % options.job_name if options.dest_queue: shell_cmd += ' -q ' + options.dest_queue shell = Shell(shell_cmd) try: # chdir to cwd (output, status) = shell.do_command('cd ' + cwd) if status != 0: raise OSError, "Can't chdir to %s" % cwd # wacky hack: sometimes scons will create an output directory then # fork a job to generate files in that directory, and the job will # get run before the directory creation propagates through NFS. # This hack looks for a '-o' option indicating an output file and # waits for the corresponding directory to appear if necessary. try: if 'cc' in cmd[0] or 'g++' in cmd[0]: output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) elif 'm5' in cmd[0]: output_dir = cmd[cmd.index('-d')+1] else: output_dir = None except (ValueError, IndexError): # no big deal if there's no '-o'/'-d' or if it's the final argument output_dir = None if output_dir: secs_waited = 0 while not shell.dir_exists(output_dir) and secs_waited < 45: time.sleep(5) secs_waited += 5 if secs_waited > 10: print "waited", secs_waited, "seconds for", output_dir # run command if options.stdout_file: cmd += ['>', options.stdout_file] if options.stderr_file: cmd += ['2>', options.stderr_file] try: (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) except pexpect.TIMEOUT: print >>sys.stderr, "%s: command timed out after %d seconds." \ % (progname, options.cmd_timeout) shell.sendline('~.') # qsub/ssh termination escape sequence shell.close(wait=True) status = 3 if output: print output finally: # end job if shell.isalive(): shell.sendline('exit') shell.expect('qsub: job .* completed\r\n') shell.close(wait=True) # if there was an error, log the output even if not requested if status != 0 or options.save_log: log = file('qdo-log.' + str(os.getpid()), 'w') log.write(shell.full_output) log.close() del shell sys.exit(status)