Updates to job scripts to accept more than 15 characters of jobname

Make the Link directory even more useful by working with
sub-directories.

util/pbs/job.py:
    Expose JOBNAME as a separate parameter from PBS_JOBNAME.  If the
    former exists, it is used as the jobname for starting the job, if
    it doesn't exist, PBS_JOBNAME is used.  This is to get around the 15
    character maximum pbs job name length.  While we're at it, shuffle
    things around to hopefully make things a bit more clear.
util/pbs/send.py:
    Make the Link directory functionality more sophisticated, copy
    sub-directories and links to directories.  (we still don't copy
    dotfiles though)
    Add the setname() function to contact pbs and use raj's hack to
    tell the webpage about longer jobnames. (it's gross, don't look)
    truncate the pbs job name to 15 characters so that it works.

--HG--
extra : convert_revision : 4a76b1a1c33721c7ca93e2fbb761f95bc3a2ac69
This commit is contained in:
Nathan Binkert 2005-08-16 11:27:49 -04:00
parent 38da461fd7
commit 09bb203484
2 changed files with 74 additions and 35 deletions

View file

@ -83,37 +83,35 @@ def readval(filename):
if __name__ == '__main__': if __name__ == '__main__':
rootdir = env.setdefault('ROOTDIR', os.getcwd()) rootdir = env.setdefault('ROOTDIR', os.getcwd())
jobid = env['PBS_JOBID'] pbs_jobid = env['PBS_JOBID']
jobname = env['PBS_JOBNAME'] pbs_jobname = env['PBS_JOBNAME']
jobdir = joinpath(rootdir, jobname)
basedir = joinpath(rootdir, 'Base') basedir = joinpath(rootdir, 'Base')
user = env['USER'] jobname = env.setdefault('JOBNAME', pbs_jobname)
jobfile = env.setdefault('JOBFILE', joinpath(basedir, 'test.py'))
outdir = env.setdefault('OUTPUT_DIR', joinpath(rootdir, jobname))
env['POOLJOB'] = 'True' env['POOLJOB'] = 'True'
env['OUTPUT_DIR'] = jobdir
env['JOBFILE'] = joinpath(basedir, 'test.py')
env['JOBNAME'] = jobname
def echofile(filename, string):
try:
f = file(joinpath(jobdir, filename), 'w')
print >>f, string
f.flush()
f.close()
except IOError,e:
sys.exit(e)
if os.path.isdir("/work"): if os.path.isdir("/work"):
workbase = "/work" workbase = "/work"
else: else:
workbase = "/tmp/" workbase = "/tmp/"
workdir = joinpath(workbase, '%s.%s' % (user, jobid)) workdir = joinpath(workbase, '%s.%s' % (env['USER'], pbs_jobid))
def echofile(filename, string):
try:
f = file(joinpath(outdir, filename), 'w')
print >>f, string
f.flush()
f.close()
except IOError,e:
sys.exit(e)
os.umask(0022) os.umask(0022)
echofile('.start', date()) echofile('.start', date())
echofile('.jobid', jobid) echofile('.pbs_jobid', pbs_jobid)
echofile('.pbs_jobname', pbs_jobid)
echofile('.host', socket.gethostname()) echofile('.host', socket.gethostname())
if os.path.isdir(workdir): if os.path.isdir(workdir):
@ -132,7 +130,7 @@ if __name__ == '__main__':
except OSError,e: except OSError,e:
sys.exit(e) sys.exit(e)
os.symlink(joinpath(jobdir, 'output'), 'status.out') os.symlink(joinpath(outdir, 'output'), 'status.out')
args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.py') ] args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.py') ]
if not len(args): if not len(args):
@ -147,7 +145,7 @@ if __name__ == '__main__':
if not childpid: if not childpid:
# Execute command # Execute command
sys.stdin.close() sys.stdin.close()
fd = os.open(joinpath(jobdir, "output"), fd = os.open(joinpath(outdir, "output"),
os.O_WRONLY | os.O_CREAT | os.O_TRUNC) os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
os.dup2(fd, sys.stdout.fileno()) os.dup2(fd, sys.stdout.fileno())
os.dup2(fd, sys.stderr.fileno()) os.dup2(fd, sys.stderr.fileno())

View file

@ -30,9 +30,9 @@
import os, os.path, re, socket, sys import os, os.path, re, socket, sys
from os import environ as env, listdir from os import environ as env, listdir
from os.path import basename, isdir, isfile, islink, join as joinpath from os.path import basename, isdir, isfile, islink, join as joinpath, normpath
from filecmp import cmp as filecmp from filecmp import cmp as filecmp
from shutil import copyfile from shutil import copy
def nfspath(dir): def nfspath(dir):
if dir.startswith('/.automount/'): if dir.startswith('/.automount/'):
@ -41,6 +41,38 @@ def nfspath(dir):
dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir) dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
return dir return dir
def syncdir(srcdir, destdir):
srcdir = normpath(srcdir)
destdir = normpath(destdir)
if not isdir(destdir):
sys.exit('destination directory "%s" does not exist' % destdir)
for root, dirs, files in os.walk(srcdir):
root = normpath(root)
prefix = os.path.commonprefix([root, srcdir])
root = root[len(prefix):]
if root.startswith('/'):
root = root[1:]
for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
dirs.remove(rem)
for entry in dirs:
newdir = joinpath(destdir, root, entry)
if not isdir(newdir):
os.mkdir(newdir)
print 'mkdir', newdir
for i,d in enumerate(dirs):
if islink(joinpath(srcdir, root, d)):
dirs[i] = joinpath(d, '.')
for entry in files:
dest = normpath(joinpath(destdir, root, entry))
src = normpath(joinpath(srcdir, root, entry))
if not isfile(dest) or not filecmp(src, dest):
print 'copy %s %s' % (dest, src)
copy(src, dest)
progpath = nfspath(sys.path[0]) progpath = nfspath(sys.path[0])
progname = basename(sys.argv[0]) progname = basename(sys.argv[0])
usage = """\ usage = """\
@ -107,16 +139,7 @@ for arg in args:
if not listonly and not onlyecho and isdir(linkdir): if not listonly and not onlyecho and isdir(linkdir):
if verbose: if verbose:
print 'Checking for outdated files in Link directory' print 'Checking for outdated files in Link directory'
entries = listdir(linkdir) syncdir(linkdir, basedir)
for entry in entries:
link = joinpath(linkdir, entry)
if not islink(link) or not isfile(link):
continue
base = joinpath(basedir, entry)
if not isfile(base) or not filecmp(link, base):
print 'Base/%s is different than Link/%s: copying' % (entry, entry)
copyfile(link, base)
import job, jobfile, pbs import job, jobfile, pbs
@ -164,6 +187,21 @@ if not onlyecho:
jl.append(jobname) jl.append(jobname)
joblist = jl joblist = jl
def setname(jobid, jobname):
# since pbs can handle jobnames of 15 characters or less, don't
# use the raj hack.
if len(jobname) <= 15:
return
import socket
s = socket.socket()
# Connect to pbs.pool and send the jobid/jobname pair to port
# 24465 (Raj didn't realize that there are only 64k ports and
# setup inetd to point to port 90001)
s.connect(("pbs.pool", 24465))
s.send("%s %s\n" % (jobid, jobname))
s.close()
for jobname in joblist: for jobname in joblist:
jobdir = joinpath(rootdir, jobname) jobdir = joinpath(rootdir, jobname)
@ -176,10 +214,11 @@ for jobname in joblist:
qsub = pbs.qsub() qsub = pbs.qsub()
qsub.pbshost = 'simpool.eecs.umich.edu' qsub.pbshost = 'simpool.eecs.umich.edu'
qsub.stdout = joinpath(jobdir, 'jobout') qsub.stdout = joinpath(jobdir, 'jobout')
qsub.name = jobname qsub.name = jobname[:15]
qsub.join = True qsub.join = True
qsub.node_type = 'FAST' qsub.node_type = 'FAST'
qsub.env['ROOTDIR'] = rootdir qsub.env['ROOTDIR'] = rootdir
qsub.env['JOBNAME'] = jobname
if len(queue): if len(queue):
qsub.queue = queue qsub.queue = queue
qsub.build(joinpath(progpath, 'job.py')) qsub.build(joinpath(progpath, 'job.py'))
@ -190,6 +229,8 @@ for jobname in joblist:
if not onlyecho: if not onlyecho:
ec = qsub.do() ec = qsub.do()
if ec == 0: if ec == 0:
print 'PBS Jobid: %s' % qsub.result jobid = qsub.result
print 'PBS Jobid: %s' % jobid
setname(jobid, jobname)
else: else:
print 'PBS Failed' print 'PBS Failed'