Make qdo work with oar. I don't know if this catches every case, but it appears
to be working at the moment. --HG-- extra : convert_revision : 90a5b0e2a06087259c97ff88b94852ddea8ea7b2
This commit is contained in:
parent
917d82eab9
commit
c648044100
1 changed files with 37 additions and 20 deletions
57
util/qdo
57
util/qdo
|
@ -1,6 +1,6 @@
|
||||||
#! /usr/bin/env python
|
#! /usr/bin/env python
|
||||||
|
|
||||||
# Copyright (c) 2004-2005 The Regents of The University of Michigan
|
# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
#
|
#
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -27,6 +27,15 @@
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#
|
#
|
||||||
# Authors: Steve Reinhardt
|
# Authors: Steve Reinhardt
|
||||||
|
# Ali Saidi
|
||||||
|
|
||||||
|
# Important!
|
||||||
|
# This script expects a simple $ prompt, if you are using a shell other than
|
||||||
|
# sh which defaults to this you'll need to add something like the following
|
||||||
|
# to your bashrc/bash_profile script:
|
||||||
|
#if [ "$OAR_USER" = "xxxx" ]; then
|
||||||
|
# PS1='$ '
|
||||||
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
@ -46,13 +55,13 @@ optparser.add_option('-e', dest='stderr_file',
|
||||||
optparser.add_option('-o', dest='stdout_file',
|
optparser.add_option('-o', dest='stdout_file',
|
||||||
help='command stdout output file')
|
help='command stdout output file')
|
||||||
optparser.add_option('-l', dest='save_log', action='store_true',
|
optparser.add_option('-l', dest='save_log', action='store_true',
|
||||||
help='save qsub output log file')
|
help='save oarsub output log file')
|
||||||
optparser.add_option('-N', dest='job_name',
|
optparser.add_option('-N', dest='job_name',
|
||||||
help='qsub job name')
|
help='oarsub job name')
|
||||||
optparser.add_option('-q', dest='dest_queue',
|
optparser.add_option('-q', dest='dest_queue',
|
||||||
help='qsub destination queue')
|
help='oarsub destination queue')
|
||||||
optparser.add_option('--qwait', dest='qsub_timeout', type='int',
|
optparser.add_option('--qwait', dest='oarsub_timeout', type='int',
|
||||||
help='qsub queue wait timeout', default=30*60)
|
help='oarsub queue wait timeout', default=30*60)
|
||||||
optparser.add_option('-t', dest='cmd_timeout', type='int',
|
optparser.add_option('-t', dest='cmd_timeout', type='int',
|
||||||
help='command execution timeout', default=600*60)
|
help='command execution timeout', default=600*60)
|
||||||
|
|
||||||
|
@ -63,7 +72,7 @@ if cmd == []:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# If we want to do this, need to add check here to make sure cmd[0] is
|
# If we want to do this, need to add check here to make sure cmd[0] is
|
||||||
# a valid PBS job name, else qsub will die on us.
|
# a valid PBS job name, else oarsub will die on us.
|
||||||
#
|
#
|
||||||
#if not options.job_name:
|
#if not options.job_name:
|
||||||
# options.job_name = cmd[0]
|
# options.job_name = cmd[0]
|
||||||
|
@ -98,11 +107,11 @@ class Shell(pexpect.spawn):
|
||||||
self.quick_timeout = 15
|
self.quick_timeout = 15
|
||||||
# wait for a prompt, then change it
|
# wait for a prompt, then change it
|
||||||
try:
|
try:
|
||||||
self.expect('\$ ', options.qsub_timeout)
|
self.expect('\$ ', options.oarsub_timeout)
|
||||||
except pexpect.TIMEOUT:
|
except pexpect.TIMEOUT:
|
||||||
print >>sys.stderr, "%s: qsub timed out." % progname
|
print >>sys.stderr, "%s: oarsub timed out." % progname
|
||||||
self.kill(9)
|
self.kill(9)
|
||||||
self.close(wait=True)
|
self.safe_close()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
|
self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
|
||||||
|
|
||||||
|
@ -137,8 +146,17 @@ class Shell(pexpect.spawn):
|
||||||
(output, status) = shell.do_command('[ -d %s ]' % dirname,
|
(output, status) = shell.do_command('[ -d %s ]' % dirname,
|
||||||
self.quick_timeout)
|
self.quick_timeout)
|
||||||
return status == 0
|
return status == 0
|
||||||
|
|
||||||
|
# Don't actually try to close it.. just wait until it closes by itself
|
||||||
|
# We can't actually kill the pid which is what it's trying to do, and if
|
||||||
|
# we call wait we could be in an unfortunate situation of it printing input
|
||||||
|
# right as we call wait, so the input is never read and the process never ends
|
||||||
|
def safe_close(self):
|
||||||
|
count = 0
|
||||||
|
while self.isalive() and count < 10:
|
||||||
|
time.sleep(1)
|
||||||
|
self.close(force=False)
|
||||||
|
|
||||||
# Spawn the interactive pool job.
|
# Spawn the interactive pool job.
|
||||||
|
|
||||||
# Hack to do link on poolfs... disabled for now since
|
# Hack to do link on poolfs... disabled for now since
|
||||||
|
@ -148,11 +166,12 @@ if False and len(cmd) > 50:
|
||||||
shell_cmd = 'ssh -t poolfs /bin/sh -l'
|
shell_cmd = 'ssh -t poolfs /bin/sh -l'
|
||||||
print "%s: running %s on poolfs" % (progname, cmd[0])
|
print "%s: running %s on poolfs" % (progname, cmd[0])
|
||||||
else:
|
else:
|
||||||
shell_cmd = 'qsub -I -S /bin/sh'
|
shell_cmd = 'oarsub -I'
|
||||||
if options.job_name:
|
if options.job_name:
|
||||||
shell_cmd += ' -N "%s"' % options.job_name
|
shell_cmd += ' -n "%s"' % options.job_name
|
||||||
if options.dest_queue:
|
if options.dest_queue:
|
||||||
shell_cmd += ' -q ' + options.dest_queue
|
shell_cmd += ' -q ' + options.dest_queue
|
||||||
|
shell_cmd += ' -d %s' % cwd
|
||||||
|
|
||||||
shell = Shell(shell_cmd)
|
shell = Shell(shell_cmd)
|
||||||
|
|
||||||
|
@ -197,25 +216,23 @@ try:
|
||||||
except pexpect.TIMEOUT:
|
except pexpect.TIMEOUT:
|
||||||
print >>sys.stderr, "%s: command timed out after %d seconds." \
|
print >>sys.stderr, "%s: command timed out after %d seconds." \
|
||||||
% (progname, options.cmd_timeout)
|
% (progname, options.cmd_timeout)
|
||||||
shell.sendline('~.') # qsub/ssh termination escape sequence
|
shell.sendline('~.') # oarsub/ssh termination escape sequence
|
||||||
shell.close(wait=True)
|
shell.safe_close()
|
||||||
status = 3
|
status = 3
|
||||||
if output:
|
if output:
|
||||||
print output
|
print output
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# end job
|
# end job
|
||||||
if shell.isalive():
|
if shell.isalive():
|
||||||
shell.sendline('exit')
|
shell.sendline('exit')
|
||||||
shell.expect('qsub: job .* completed\r\n')
|
shell.expect('Disconnected from OAR job .*')
|
||||||
shell.close(wait=True)
|
shell.safe_close()
|
||||||
|
|
||||||
# if there was an error, log the output even if not requested
|
# if there was an error, log the output even if not requested
|
||||||
if status != 0 or options.save_log:
|
if status != 0 or options.save_log:
|
||||||
log = file('qdo-log.' + str(os.getpid()), 'w')
|
log = file('qdo-log.' + str(os.getpid()), 'w')
|
||||||
log.write(shell.full_output)
|
log.write(shell.full_output)
|
||||||
log.close()
|
log.close()
|
||||||
|
|
||||||
del shell
|
del shell
|
||||||
|
|
||||||
sys.exit(status)
|
sys.exit(status)
|
||||||
|
|
Loading…
Reference in a new issue