Better pool job handling
util/pbs/job.py: the default jobfile is now Test.py in the root of the jobs directory util/pbs/pbs.py: Clean up the qsub options handling and add job dependencies util/pbs/send.py: the default jobfile is now Test.py in the root of the jobs directory add a flag to depend on your checkpoint add a flag to specify your node type create the base directory if it doesn't exist --HG-- extra : convert_revision : dfffa4a5b0e68b2550a28fbb06b9d6a208ea1f2e
This commit is contained in:
parent
31d13e9a9b
commit
357ee7a845
3 changed files with 37 additions and 20 deletions
|
@ -149,7 +149,7 @@ if __name__ == '__main__':
|
||||||
pbs_jobname = env['PBS_JOBNAME']
|
pbs_jobname = env['PBS_JOBNAME']
|
||||||
basedir = joinpath(rootdir, 'Base')
|
basedir = joinpath(rootdir, 'Base')
|
||||||
jobname = env.setdefault('JOBNAME', pbs_jobname)
|
jobname = env.setdefault('JOBNAME', pbs_jobname)
|
||||||
jobfile = env.setdefault('JOBFILE', joinpath(basedir, 'test.py'))
|
jobfile = env.setdefault('JOBFILE', joinpath(rootdir, 'Test.py'))
|
||||||
outdir = env.setdefault('OUTPUT_DIR', joinpath(rootdir, jobname))
|
outdir = env.setdefault('OUTPUT_DIR', joinpath(rootdir, jobname))
|
||||||
env['POOLJOB'] = 'True'
|
env['POOLJOB'] = 'True'
|
||||||
|
|
||||||
|
|
|
@ -91,19 +91,20 @@ class MyPOpen(object):
|
||||||
|
|
||||||
class qsub:
|
class qsub:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.afterok = None
|
||||||
self.hold = False
|
self.hold = False
|
||||||
self.join = False
|
self.join = False
|
||||||
self.keep_stdout = False
|
self.keep_stdout = False
|
||||||
self.keep_stderr = False
|
self.keep_stderr = False
|
||||||
self.node_type = ''
|
self.node_type = None
|
||||||
self.mail_abort = False
|
self.mail_abort = False
|
||||||
self.mail_begin = False
|
self.mail_begin = False
|
||||||
self.mail_end = False
|
self.mail_end = False
|
||||||
self.name = ''
|
self.name = None
|
||||||
self.stdout = ''
|
self.stdout = None
|
||||||
self.priority = 0
|
self.priority = None
|
||||||
self.queue = ''
|
self.queue = None
|
||||||
self.pbshost = ''
|
self.pbshost = None
|
||||||
self.qsub = 'qsub'
|
self.qsub = 'qsub'
|
||||||
self.env = {}
|
self.env = {}
|
||||||
|
|
||||||
|
@ -118,7 +119,7 @@ class qsub:
|
||||||
if self.hold:
|
if self.hold:
|
||||||
self.cmd.append('-h')
|
self.cmd.append('-h')
|
||||||
|
|
||||||
if len(self.stdout):
|
if self.stdout:
|
||||||
self.cmd.append('-olocalhost:' + self.stdout)
|
self.cmd.append('-olocalhost:' + self.stdout)
|
||||||
|
|
||||||
if self.keep_stdout and self.keep_stderr:
|
if self.keep_stdout and self.keep_stderr:
|
||||||
|
@ -133,7 +134,7 @@ class qsub:
|
||||||
if self.join:
|
if self.join:
|
||||||
self.cmd.append('-joe')
|
self.cmd.append('-joe')
|
||||||
|
|
||||||
if len(self.node_type):
|
if self.node_type:
|
||||||
self.cmd.append('-lnodes=' + self.node_type)
|
self.cmd.append('-lnodes=' + self.node_type)
|
||||||
|
|
||||||
if self.mail_abort or self.mail_begin or self.mail_end:
|
if self.mail_abort or self.mail_begin or self.mail_end:
|
||||||
|
@ -147,15 +148,18 @@ class qsub:
|
||||||
if len(flags):
|
if len(flags):
|
||||||
self.cmd.append('-m ' + flags)
|
self.cmd.append('-m ' + flags)
|
||||||
|
|
||||||
if len(self.name):
|
if self.name:
|
||||||
self.cmd.append("-N%s" % self.name)
|
self.cmd.append("-N%s" % self.name)
|
||||||
|
|
||||||
if self.priority != 0:
|
if self.priority:
|
||||||
self.cmd.append('-p' + self.priority)
|
self.cmd.append('-p' + self.priority)
|
||||||
|
|
||||||
if len(self.queue):
|
if self.queue:
|
||||||
self.cmd.append('-q' + self.queue)
|
self.cmd.append('-q' + self.queue)
|
||||||
|
|
||||||
|
if self.afterok:
|
||||||
|
self.cmd.append('-Wdepend=afterok:%s' % self.after)
|
||||||
|
|
||||||
self.cmd.extend(args)
|
self.cmd.extend(args)
|
||||||
self.script = script
|
self.script = script
|
||||||
self.command = ' '.join(self.cmd + [ self.script ])
|
self.command = ' '.join(self.cmd + [ self.script ])
|
||||||
|
|
|
@ -82,11 +82,11 @@ Usage:
|
||||||
-e only echo pbs command info, don't actually send the job
|
-e only echo pbs command info, don't actually send the job
|
||||||
-f force the job to run regardless of state
|
-f force the job to run regardless of state
|
||||||
-q <queue> submit job to the named queue
|
-q <queue> submit job to the named queue
|
||||||
-j <jobfile> specify the jobfile (default is <basedir>/test.py)
|
-j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
|
||||||
-v be verbose
|
-v be verbose
|
||||||
|
|
||||||
%(progname)s [-j <jobfile>] -l [-v] <regexp>
|
%(progname)s [-j <jobfile>] -l [-v] <regexp>
|
||||||
-j <jobfile> specify the jobfile (default is <basedir>/test.py)
|
-j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
|
||||||
-l list job names, don't submit
|
-l list job names, don't submit
|
||||||
-v be verbose (list job parameters)
|
-v be verbose (list job parameters)
|
||||||
|
|
||||||
|
@ -96,10 +96,11 @@ Usage:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import getopt
|
import getopt
|
||||||
opts, args = getopt.getopt(sys.argv[1:], '-CRcd:efhj:lq:v')
|
opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lq:Rt:v')
|
||||||
except getopt.GetoptError:
|
except getopt.GetoptError:
|
||||||
sys.exit(usage)
|
sys.exit(usage)
|
||||||
|
|
||||||
|
depend = False
|
||||||
clean = False
|
clean = False
|
||||||
onlyecho = False
|
onlyecho = False
|
||||||
exprs = []
|
exprs = []
|
||||||
|
@ -107,18 +108,19 @@ force = False
|
||||||
listonly = False
|
listonly = False
|
||||||
queue = ''
|
queue = ''
|
||||||
verbose = False
|
verbose = False
|
||||||
jfile = 'Base/test.py'
|
jfile = 'Test.py'
|
||||||
docpts = False
|
docpts = False
|
||||||
doruns = True
|
doruns = True
|
||||||
runflag = False
|
runflag = False
|
||||||
|
node_type = 'FAST'
|
||||||
|
|
||||||
for opt,arg in opts:
|
for opt,arg in opts:
|
||||||
if opt == '-C':
|
if opt == '-C':
|
||||||
docpts = True
|
docpts = True
|
||||||
if opt == '-R':
|
|
||||||
runflag = True
|
|
||||||
if opt == '-c':
|
if opt == '-c':
|
||||||
clean = True
|
clean = True
|
||||||
|
if opt == '-d':
|
||||||
|
depend = True
|
||||||
if opt == '-e':
|
if opt == '-e':
|
||||||
onlyecho = True
|
onlyecho = True
|
||||||
if opt == '-f':
|
if opt == '-f':
|
||||||
|
@ -132,6 +134,10 @@ for opt,arg in opts:
|
||||||
listonly = True
|
listonly = True
|
||||||
if opt == '-q':
|
if opt == '-q':
|
||||||
queue = arg
|
queue = arg
|
||||||
|
if opt == '-R':
|
||||||
|
runflag = True
|
||||||
|
if opt == '-t':
|
||||||
|
node_type = arg
|
||||||
if opt == '-v':
|
if opt == '-v':
|
||||||
verbose = True
|
verbose = True
|
||||||
|
|
||||||
|
@ -149,6 +155,8 @@ conf = jobfile.JobFile(jfile)
|
||||||
if not listonly and not onlyecho and isdir(conf.linkdir):
|
if not listonly and not onlyecho and isdir(conf.linkdir):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Checking for outdated files in Link directory'
|
print 'Checking for outdated files in Link directory'
|
||||||
|
if not isdir(conf.basedir):
|
||||||
|
os.mkdir(conf.basedir)
|
||||||
syncdir(conf.linkdir, conf.basedir)
|
syncdir(conf.linkdir, conf.basedir)
|
||||||
|
|
||||||
jobnames = {}
|
jobnames = {}
|
||||||
|
@ -237,6 +245,9 @@ namehack = NameHack()
|
||||||
|
|
||||||
for job in joblist:
|
for job in joblist:
|
||||||
jobdir = JobDir(joinpath(conf.rootdir, job.name))
|
jobdir = JobDir(joinpath(conf.rootdir, job.name))
|
||||||
|
if depend:
|
||||||
|
cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name))
|
||||||
|
cptjob = cptdir.readval('.pbs_jobid')
|
||||||
|
|
||||||
if not onlyecho:
|
if not onlyecho:
|
||||||
jobdir.create()
|
jobdir.create()
|
||||||
|
@ -249,10 +260,12 @@ for job in joblist:
|
||||||
qsub.stdout = jobdir.file('jobout')
|
qsub.stdout = jobdir.file('jobout')
|
||||||
qsub.name = job.name[:15]
|
qsub.name = job.name[:15]
|
||||||
qsub.join = True
|
qsub.join = True
|
||||||
qsub.node_type = 'FAST'
|
qsub.node_type = node_type
|
||||||
qsub.env['ROOTDIR'] = conf.rootdir
|
qsub.env['ROOTDIR'] = conf.rootdir
|
||||||
qsub.env['JOBNAME'] = job.name
|
qsub.env['JOBNAME'] = job.name
|
||||||
if len(queue):
|
if depend:
|
||||||
|
qsub.afterok = cptjob
|
||||||
|
if queue:
|
||||||
qsub.queue = queue
|
qsub.queue = queue
|
||||||
qsub.build(joinpath(progpath, 'job.py'))
|
qsub.build(joinpath(progpath, 'job.py'))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue