gem5/util/qdo

#! /usr/bin/env python

# Copyright (c) 2004-2005 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys
import os
import re
import time
import optparse

import pexpect

progname = os.path.basename(sys.argv[0])

usage = "%prog [options] command [command arguments]"
optparser = optparse.OptionParser(usage=usage)
optparser.allow_interspersed_args=False
optparser.add_option('-e', dest='stderr_file',
                     help='command stderr output file')
optparser.add_option('-o', dest='stdout_file',
                     help='command stdout output file')
optparser.add_option('-l', dest='save_log', action='store_true',
                     help='save qsub output log file')
optparser.add_option('-q', dest='qsub_timeout', type='int',
                     help='qsub queue wait timeout', default=30*60)
optparser.add_option('-t', dest='cmd_timeout', type='int',
                     help='command execution timeout', default=600*60)

(options, cmd) = optparser.parse_args()

if cmd == []:
    print >>sys.stderr, "%s: missing command" % progname
    sys.exit(1)

cwd = os.getcwd()

# Deal with systems where /n is a symlink to /.automount
if cwd.startswith('/.automount/'):
    cwd = cwd.replace('/.automount/', '/n/', 1)

if not cwd.startswith('/n/poolfs/'):
    print >>sys.stderr, "Error: current directory must be under /n/poolfs."
    sys.exit(1)

# The Shell class wraps pexpect.spawn with some handy functions that
# assume the thing on the other end is a Bourne/bash shell.
class Shell(pexpect.spawn):
    # Regexp to match the shell prompt.  We change the prompt to
    # something fixed and distinctive to make it easier to match
    # reliably.
    prompt_re = re.compile('qdo\$ ')

    def __init__(self, cmd):
        # initialize base pexpect.spawn object
	try:
            pexpect.spawn.__init__(self, cmd)
	except pexpect.ExceptionPexpect, exc:
	    print "%s:" % progname, exc
	    sys.exit(1)
        # full_output accumulates the full output of the session
        self.full_output = ""
        self.quick_timeout = 15
        # wait for a prompt, then change it
        try:
            self.expect('\$ ', options.qsub_timeout)
        except pexpect.TIMEOUT:
            print >>sys.stderr, "%s: qsub timed out." % progname
            self.kill(15)
            self.close(wait=True)
            sys.exit(1)
        self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')

    # version of expect that updates full_output too
    def expect(self, regexp, timeout = -1):
        pexpect.spawn.expect(self, regexp, timeout)
        self.full_output += self.before + self.after

    # Just issue a command and wait for the next prompt.
    # Returns a string containing the output of the command.
    def do_bare_command(self, cmd, timeout = -1):
        global full_output
        self.sendline(cmd)
        # read back the echo of the command
        self.readline()
        # wait for the next prompt
        self.expect(self.prompt_re, timeout)
        output = self.before.rstrip()
        return output

    # Issue a command, then query its exit status.
    # Returns a (string, int) tuple with the command output and the status.
    def do_command(self, cmd, timeout = -1):
        # do the command itself
        output = self.do_bare_command(cmd, timeout)
        # collect status
        status = int(self.do_bare_command("echo $?", self.quick_timeout))
        return (output, status)

    # Check to see if the given directory exists.
    def dir_exists(self, dirname):
        (output, status) = shell.do_command('[ -d %s ]' % dirname,
                                            self.quick_timeout)
        return status == 0


# Spawn the interactive pool job.

# Hack to do link on poolfs... disabled for now since
# compiler/linker/library versioning problems between poolfs and
# nodes.  May never work since poolfs is x86-64 and nodes are 32-bit.
if False and len(cmd) > 50:
    shell_cmd = 'ssh -t poolfs /bin/sh -l'
    print "%s: running %s on poolfs" % (progname, cmd[0])
else:
    shell_cmd = 'qsub -I -S /bin/sh'

shell = Shell(shell_cmd)

try:
    # chdir to cwd
    (output, status) = shell.do_command('cd ' + cwd)

    if status != 0:
        raise OSError, "Can't chdir to %s" % cwd

    # wacky hack: sometimes scons will create an output directory then
    # fork a job to generate files in that directory, and the job will
    # get run before the directory creation propagates through NFS.
    # This hack looks for a '-o' option indicating an output file and
    # waits for the corresponding directory to appear if necessary.
    try:
        if 'cc' in cmd[0] or 'g++' in cmd[0]:
            output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
        elif 'm5' in cmd[0]:
            output_dir = cmd[cmd.index('-d')+1]
        else:
            output_dir = None
    except (ValueError, IndexError):
        # no big deal if there's no '-o'/'-d' or if it's the final argument
        output_dir = None

    if output_dir:
        secs_waited = 0
        while not shell.dir_exists(output_dir) and secs_waited < 45:
            time.sleep(5)
            secs_waited += 5
        if secs_waited > 10:
            print "waited", secs_waited, "seconds for", output_dir

    # run command
    if options.stdout_file:
        cmd += ['>', options.stdout_file]
    if options.stderr_file:
        cmd += ['2>', options.stderr_file]
    try:
        (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
    except pexpect.TIMEOUT:
            print >>sys.stderr, "%s: command timed out after %d seconds." \
                  % (progname, options.cmd_timeout)
            shell.sendline('~.') # qsub/ssh termination escape sequence
            shell.close(wait=True)
            status = 3
    if output:
        print output

finally:
    # end job
    if shell.isalive():
        shell.sendline('exit')
        shell.expect('qsub: job .* completed\r\n')
        shell.close(wait=True)

    # if there was an error, log the output even if not requested
    if status != 0 or options.save_log:
        log = file('qdo-log.' + str(os.getpid()), 'w')
        log.write(shell.full_output)
        log.close()

del shell

sys.exit(status)
Support for compiling and testing on pool via 'qdo' script. For this to work qdo must be on your path. I've copied it into /usr/local/bin on zizzer. build/SConstruct: Add BATCH and BATCH_CMD options to support compiling/testing on pool via qdo. --HG-- extra : convert_revision : b7fc46465e897f7f15ed4a67f6735886917a6c4b 2005-09-22 21:27:42 +02:00			`#! /usr/bin/env python`

			`# Copyright (c) 2004-2005 The Regents of The University of Michigan`
			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are`
			`# met: redistributions of source code must retain the above copyright`
			`# notice, this list of conditions and the following disclaimer;`
			`# redistributions in binary form must reproduce the above copyright`
			`# notice, this list of conditions and the following disclaimer in the`
			`# documentation and/or other materials provided with the distribution;`
			`# neither the name of the copyright holders nor the names of its`
			`# contributors may be used to endorse or promote products derived from`
			`# this software without specific prior written permission.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`

			`import sys`
			`import os`
			`import re`
			`import time`
			`import optparse`

			`import pexpect`

			`progname = os.path.basename(sys.argv[0])`

			`usage = "%prog [options] command [command arguments]"`
			`optparser = optparse.OptionParser(usage=usage)`
			`optparser.allow_interspersed_args=False`
			`optparser.add_option('-e', dest='stderr_file',`
			`help='command stderr output file')`
			`optparser.add_option('-o', dest='stdout_file',`
			`help='command stdout output file')`
			`optparser.add_option('-l', dest='save_log', action='store_true',`
			`help='save qsub output log file')`
			`optparser.add_option('-q', dest='qsub_timeout', type='int',`
			`help='qsub queue wait timeout', default=30*60)`
			`optparser.add_option('-t', dest='cmd_timeout', type='int',`
			`help='command execution timeout', default=600*60)`

			`(options, cmd) = optparser.parse_args()`

			`if cmd == []:`
			`print >>sys.stderr, "%s: missing command" % progname`
			`sys.exit(1)`

			`cwd = os.getcwd()`

			`# Deal with systems where /n is a symlink to /.automount`
			`if cwd.startswith('/.automount/'):`
			`cwd = cwd.replace('/.automount/', '/n/', 1)`

			`if not cwd.startswith('/n/poolfs/'):`
			`print >>sys.stderr, "Error: current directory must be under /n/poolfs."`
			`sys.exit(1)`

			`# The Shell class wraps pexpect.spawn with some handy functions that`
			`# assume the thing on the other end is a Bourne/bash shell.`
			`class Shell(pexpect.spawn):`
			`# Regexp to match the shell prompt. We change the prompt to`
			`# something fixed and distinctive to make it easier to match`
			`# reliably.`
			`prompt_re = re.compile('qdo\$ ')`

			`def __init__(self, cmd):`
			`# initialize base pexpect.spawn object`
			`try:`
			`pexpect.spawn.__init__(self, cmd)`
			`except pexpect.ExceptionPexpect, exc:`
			`print "%s:" % progname, exc`
			`sys.exit(1)`
			`# full_output accumulates the full output of the session`
			`self.full_output = ""`
			`self.quick_timeout = 15`
			`# wait for a prompt, then change it`
			`try:`
			`self.expect('\$ ', options.qsub_timeout)`
			`except pexpect.TIMEOUT:`
			`print >>sys.stderr, "%s: qsub timed out." % progname`
			`self.kill(15)`
			`self.close(wait=True)`
			`sys.exit(1)`
Fix for qdo & new pool node build. util/qdo: unset PROMPT_COMMAND in shell in case system sets it for us. --HG-- extra : convert_revision : f9f20f7dc6b9585b60f3ce53aadd06e7d64f5179 2005-09-29 22:03:36 +02:00			`self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')`
Support for compiling and testing on pool via 'qdo' script. For this to work qdo must be on your path. I've copied it into /usr/local/bin on zizzer. build/SConstruct: Add BATCH and BATCH_CMD options to support compiling/testing on pool via qdo. --HG-- extra : convert_revision : b7fc46465e897f7f15ed4a67f6735886917a6c4b 2005-09-22 21:27:42 +02:00
			`# version of expect that updates full_output too`
			`def expect(self, regexp, timeout = -1):`
			`pexpect.spawn.expect(self, regexp, timeout)`
			`self.full_output += self.before + self.after`

			`# Just issue a command and wait for the next prompt.`
			`# Returns a string containing the output of the command.`
			`def do_bare_command(self, cmd, timeout = -1):`
			`global full_output`
			`self.sendline(cmd)`
			`# read back the echo of the command`
			`self.readline()`
			`# wait for the next prompt`
			`self.expect(self.prompt_re, timeout)`
			`output = self.before.rstrip()`
			`return output`

			`# Issue a command, then query its exit status.`
			`# Returns a (string, int) tuple with the command output and the status.`
			`def do_command(self, cmd, timeout = -1):`
			`# do the command itself`
			`output = self.do_bare_command(cmd, timeout)`
			`# collect status`
			`status = int(self.do_bare_command("echo $?", self.quick_timeout))`
			`return (output, status)`

			`# Check to see if the given directory exists.`
			`def dir_exists(self, dirname):`
			`(output, status) = shell.do_command('[ -d %s ]' % dirname,`
			`self.quick_timeout)`
			`return status == 0`


			`# Spawn the interactive pool job.`

			`# Hack to do link on poolfs... disabled for now since`
			`# compiler/linker/library versioning problems between poolfs and`
			`# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.`
			`if False and len(cmd) > 50:`
			`shell_cmd = 'ssh -t poolfs /bin/sh -l'`
			`print "%s: running %s on poolfs" % (progname, cmd[0])`
			`else:`
			`shell_cmd = 'qsub -I -S /bin/sh'`

			`shell = Shell(shell_cmd)`

			`try:`
			`# chdir to cwd`
			`(output, status) = shell.do_command('cd ' + cwd)`

			`if status != 0:`
			`raise OSError, "Can't chdir to %s" % cwd`

			`# wacky hack: sometimes scons will create an output directory then`
			`# fork a job to generate files in that directory, and the job will`
			`# get run before the directory creation propagates through NFS.`
			`# This hack looks for a '-o' option indicating an output file and`
			`# waits for the corresponding directory to appear if necessary.`
			`try:`
			`if 'cc' in cmd[0] or 'g++' in cmd[0]:`
			`output_dir = os.path.dirname(cmd[cmd.index('-o')+1])`
			`elif 'm5' in cmd[0]:`
			`output_dir = cmd[cmd.index('-d')+1]`
			`else:`
			`output_dir = None`
			`except (ValueError, IndexError):`
			`# no big deal if there's no '-o'/'-d' or if it's the final argument`
			`output_dir = None`

			`if output_dir:`
			`secs_waited = 0`
			`while not shell.dir_exists(output_dir) and secs_waited < 45:`
			`time.sleep(5)`
			`secs_waited += 5`
			`if secs_waited > 10:`
			`print "waited", secs_waited, "seconds for", output_dir`

			`# run command`
			`if options.stdout_file:`
			`cmd += ['>', options.stdout_file]`
			`if options.stderr_file:`
			`cmd += ['2>', options.stderr_file]`
			`try:`
			`(output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)`
			`except pexpect.TIMEOUT:`
			`print >>sys.stderr, "%s: command timed out after %d seconds." \`
			`% (progname, options.cmd_timeout)`
			`shell.sendline('~.') # qsub/ssh termination escape sequence`
			`shell.close(wait=True)`
			`status = 3`
			`if output:`
			`print output`

			`finally:`
			`# end job`
			`if shell.isalive():`
			`shell.sendline('exit')`
			`shell.expect('qsub: job .* completed\r\n')`
			`shell.close(wait=True)`

			`# if there was an error, log the output even if not requested`
			`if status != 0 or options.save_log:`
			`log = file('qdo-log.' + str(os.getpid()), 'w')`
			`log.write(shell.full_output)`
			`log.close()`

			`del shell`

			`sys.exit(status)`