qdo revision 5147:a7b91336a3fc
111308Santhony.gutierrez@amd.com#! /usr/bin/env python
211308Santhony.gutierrez@amd.com
311308Santhony.gutierrez@amd.com# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
411308Santhony.gutierrez@amd.com# All rights reserved.
511308Santhony.gutierrez@amd.com#
611308Santhony.gutierrez@amd.com# Redistribution and use in source and binary forms, with or without
711308Santhony.gutierrez@amd.com# modification, are permitted provided that the following conditions are
811308Santhony.gutierrez@amd.com# met: redistributions of source code must retain the above copyright
911308Santhony.gutierrez@amd.com# notice, this list of conditions and the following disclaimer;
1011308Santhony.gutierrez@amd.com# redistributions in binary form must reproduce the above copyright
1111308Santhony.gutierrez@amd.com# notice, this list of conditions and the following disclaimer in the
1211308Santhony.gutierrez@amd.com# documentation and/or other materials provided with the distribution;
1311308Santhony.gutierrez@amd.com# neither the name of the copyright holders nor the names of its
1411308Santhony.gutierrez@amd.com# contributors may be used to endorse or promote products derived from
1511308Santhony.gutierrez@amd.com# this software without specific prior written permission.
1611308Santhony.gutierrez@amd.com#
1711308Santhony.gutierrez@amd.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1811308Santhony.gutierrez@amd.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1911308Santhony.gutierrez@amd.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2011308Santhony.gutierrez@amd.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2111308Santhony.gutierrez@amd.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2211308Santhony.gutierrez@amd.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2311308Santhony.gutierrez@amd.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2411308Santhony.gutierrez@amd.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2511308Santhony.gutierrez@amd.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2611308Santhony.gutierrez@amd.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2711308Santhony.gutierrez@amd.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2811308Santhony.gutierrez@amd.com#
2911308Santhony.gutierrez@amd.com# Authors: Steve Reinhardt
3011308Santhony.gutierrez@amd.com#          Ali Saidi
3111308Santhony.gutierrez@amd.com
3211308Santhony.gutierrez@amd.com# Important!
3311308Santhony.gutierrez@amd.com# This script expects a simple $ prompt, if you are using a shell other than 
3411308Santhony.gutierrez@amd.com# sh which defaults to this you'll need to add something like the following
3511308Santhony.gutierrez@amd.com# to your bashrc/bash_profile script:
3611308Santhony.gutierrez@amd.com#if [ "$OAR_USER" = "xxxx" ]; then
3711308Santhony.gutierrez@amd.com#   PS1='$ '
3811308Santhony.gutierrez@amd.com
3911308Santhony.gutierrez@amd.com
4011308Santhony.gutierrez@amd.comimport sys
4111308Santhony.gutierrez@amd.comimport os
4211308Santhony.gutierrez@amd.comimport re
4311308Santhony.gutierrez@amd.comimport time
4411308Santhony.gutierrez@amd.comimport optparse
4511308Santhony.gutierrez@amd.com
4611308Santhony.gutierrez@amd.comimport pexpect
4711308Santhony.gutierrez@amd.com
4811308Santhony.gutierrez@amd.comprogname = os.path.basename(sys.argv[0])
4911308Santhony.gutierrez@amd.com
5011308Santhony.gutierrez@amd.comusage = "%prog [options] command [command arguments]"
5111308Santhony.gutierrez@amd.comoptparser = optparse.OptionParser(usage=usage)
5211308Santhony.gutierrez@amd.comoptparser.allow_interspersed_args=False
5311308Santhony.gutierrez@amd.comoptparser.add_option('-e', dest='stderr_file',
5411308Santhony.gutierrez@amd.com                     help='command stderr output file')
5511308Santhony.gutierrez@amd.comoptparser.add_option('-o', dest='stdout_file',
5611308Santhony.gutierrez@amd.com                     help='command stdout output file')
5711308Santhony.gutierrez@amd.comoptparser.add_option('-l', dest='save_log', action='store_true',
5811308Santhony.gutierrez@amd.com                     help='save oarsub output log file')
5911308Santhony.gutierrez@amd.comoptparser.add_option('-N', dest='job_name',
6011308Santhony.gutierrez@amd.com                     help='oarsub job name')
6111308Santhony.gutierrez@amd.comoptparser.add_option('-q', dest='dest_queue',
6211308Santhony.gutierrez@amd.com                     help='oarsub destination queue')
6311534Sjohn.kalamatianos@amd.comoptparser.add_option('--qwait', dest='oarsub_timeout', type='int',
6411534Sjohn.kalamatianos@amd.com                     help='oarsub queue wait timeout', default=30*60)
6511308Santhony.gutierrez@amd.comoptparser.add_option('-t', dest='cmd_timeout', type='int',
6611308Santhony.gutierrez@amd.com                     help='command execution timeout', default=600*60)
6711534Sjohn.kalamatianos@amd.com
6811308Santhony.gutierrez@amd.com(options, cmd) = optparser.parse_args()
6911308Santhony.gutierrez@amd.com
7011308Santhony.gutierrez@amd.comif cmd == []:
7111308Santhony.gutierrez@amd.com    print >>sys.stderr, "%s: missing command" % progname
7211534Sjohn.kalamatianos@amd.com    sys.exit(1)
7311308Santhony.gutierrez@amd.com
7411308Santhony.gutierrez@amd.com# If we want to do this, need to add check here to make sure cmd[0] is
7511308Santhony.gutierrez@amd.com# a valid PBS job name, else oarsub will die on us.
7611308Santhony.gutierrez@amd.com#
7711308Santhony.gutierrez@amd.com#if not options.job_name:
7811308Santhony.gutierrez@amd.com#    options.job_name = cmd[0]
7911308Santhony.gutierrez@amd.com
8011308Santhony.gutierrez@amd.comcwd = os.getcwd()
8111308Santhony.gutierrez@amd.com
8211308Santhony.gutierrez@amd.com# Deal with systems where /n is a symlink to /.automount
8311308Santhony.gutierrez@amd.comif cwd.startswith('/.automount/'):
8411308Santhony.gutierrez@amd.com    cwd = cwd.replace('/.automount/', '/n/', 1)
8511308Santhony.gutierrez@amd.com
8611308Santhony.gutierrez@amd.comif not cwd.startswith('/n/poolfs/'):
8711308Santhony.gutierrez@amd.com    print >>sys.stderr, "Error: current directory must be under /n/poolfs."
8811308Santhony.gutierrez@amd.com    sys.exit(1)
8911308Santhony.gutierrez@amd.com
9011308Santhony.gutierrez@amd.com# The Shell class wraps pexpect.spawn with some handy functions that
9111308Santhony.gutierrez@amd.com# assume the thing on the other end is a Bourne/bash shell.
9211308Santhony.gutierrez@amd.comclass Shell(pexpect.spawn):
9311308Santhony.gutierrez@amd.com    # Regexp to match the shell prompt.  We change the prompt to
9411308Santhony.gutierrez@amd.com    # something fixed and distinctive to make it easier to match
9511308Santhony.gutierrez@amd.com    # reliably.
9611308Santhony.gutierrez@amd.com    prompt_re = re.compile('qdo\$ ')
9711308Santhony.gutierrez@amd.com
9811308Santhony.gutierrez@amd.com    def __init__(self, cmd):
9911308Santhony.gutierrez@amd.com        # initialize base pexpect.spawn object
10011308Santhony.gutierrez@amd.com	try:
10111308Santhony.gutierrez@amd.com            pexpect.spawn.__init__(self, cmd)
10211308Santhony.gutierrez@amd.com	except pexpect.ExceptionPexpect, exc:
10311308Santhony.gutierrez@amd.com	    print "%s:" % progname, exc
10411308Santhony.gutierrez@amd.com	    sys.exit(1)
10511308Santhony.gutierrez@amd.com        # full_output accumulates the full output of the session
10611308Santhony.gutierrez@amd.com        self.full_output = ""
10711308Santhony.gutierrez@amd.com        self.quick_timeout = 15
10811308Santhony.gutierrez@amd.com        # wait for a prompt, then change it
10911308Santhony.gutierrez@amd.com        try:
11011308Santhony.gutierrez@amd.com            self.expect('\$ ', options.oarsub_timeout)
11111308Santhony.gutierrez@amd.com        except pexpect.TIMEOUT:
11211308Santhony.gutierrez@amd.com            print >>sys.stderr, "%s: oarsub timed out." % progname
11311308Santhony.gutierrez@amd.com            self.kill(9)
11411308Santhony.gutierrez@amd.com            self.safe_close()
11511308Santhony.gutierrez@amd.com            sys.exit(1)
11611308Santhony.gutierrez@amd.com        self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
11711308Santhony.gutierrez@amd.com
11811308Santhony.gutierrez@amd.com    # version of expect that updates full_output too
11911308Santhony.gutierrez@amd.com    def expect(self, regexp, timeout = -1):
12011308Santhony.gutierrez@amd.com        pexpect.spawn.expect(self, regexp, timeout)
12111308Santhony.gutierrez@amd.com        self.full_output += self.before + self.after
12211308Santhony.gutierrez@amd.com
12311308Santhony.gutierrez@amd.com    # Just issue a command and wait for the next prompt.
12411308Santhony.gutierrez@amd.com    # Returns a string containing the output of the command.
12511308Santhony.gutierrez@amd.com    def do_bare_command(self, cmd, timeout = -1):
12611534Sjohn.kalamatianos@amd.com        global full_output
12711534Sjohn.kalamatianos@amd.com        self.sendline(cmd)
12811308Santhony.gutierrez@amd.com        # read back the echo of the command
12911308Santhony.gutierrez@amd.com        self.readline()
13011308Santhony.gutierrez@amd.com        # wait for the next prompt
13111308Santhony.gutierrez@amd.com        self.expect(self.prompt_re, timeout)
13211308Santhony.gutierrez@amd.com        output = self.before.rstrip()
13311308Santhony.gutierrez@amd.com        return output
13411308Santhony.gutierrez@amd.com
13511308Santhony.gutierrez@amd.com    # Issue a command, then query its exit status.
13611308Santhony.gutierrez@amd.com    # Returns a (string, int) tuple with the command output and the status.
13711308Santhony.gutierrez@amd.com    def do_command(self, cmd, timeout = -1):
13811308Santhony.gutierrez@amd.com        # do the command itself
13911308Santhony.gutierrez@amd.com        output = self.do_bare_command(cmd, timeout)
14011308Santhony.gutierrez@amd.com        # collect status
14111308Santhony.gutierrez@amd.com        status = int(self.do_bare_command("echo $?", self.quick_timeout))
14211308Santhony.gutierrez@amd.com        return (output, status)
14311308Santhony.gutierrez@amd.com
14411534Sjohn.kalamatianos@amd.com    # Check to see if the given directory exists.
14511534Sjohn.kalamatianos@amd.com    def dir_exists(self, dirname):
14611308Santhony.gutierrez@amd.com        (output, status) = shell.do_command('[ -d %s ]' % dirname,
14711534Sjohn.kalamatianos@amd.com                                            self.quick_timeout)
14811534Sjohn.kalamatianos@amd.com        return status == 0
14911308Santhony.gutierrez@amd.com    
15011308Santhony.gutierrez@amd.com    # Don't actually try to close it.. just wait until it closes by itself
15111308Santhony.gutierrez@amd.com    # We can't actually kill the pid which is what it's trying to do, and if 
15211308Santhony.gutierrez@amd.com    # we call wait we could be in an unfortunate situation of it printing input 
15311308Santhony.gutierrez@amd.com    # right as we call wait, so the input is never read and the process never ends
15411308Santhony.gutierrez@amd.com    def safe_close(self):
15511308Santhony.gutierrez@amd.com        count = 0
15611308Santhony.gutierrez@amd.com        while self.isalive() and count < 10:
15711308Santhony.gutierrez@amd.com            time.sleep(1)
15811308Santhony.gutierrez@amd.com        self.close(force=False)
15911308Santhony.gutierrez@amd.com        
16011308Santhony.gutierrez@amd.com# Spawn the interactive pool job.
16111308Santhony.gutierrez@amd.com
16211308Santhony.gutierrez@amd.com# Hack to do link on poolfs... disabled for now since
16311308Santhony.gutierrez@amd.com# compiler/linker/library versioning problems between poolfs and
16411308Santhony.gutierrez@amd.com# nodes.  May never work since poolfs is x86-64 and nodes are 32-bit.
16511308Santhony.gutierrez@amd.comif False and len(cmd) > 50:
16611308Santhony.gutierrez@amd.com    shell_cmd = 'ssh -t poolfs /bin/sh -l'
16711308Santhony.gutierrez@amd.com    print "%s: running %s on poolfs" % (progname, cmd[0])
16811308Santhony.gutierrez@amd.comelse:
16911308Santhony.gutierrez@amd.com    shell_cmd = 'oarsub -I'
17011308Santhony.gutierrez@amd.com    if options.job_name:
17111308Santhony.gutierrez@amd.com        shell_cmd += ' -n "%s"' % options.job_name
17211308Santhony.gutierrez@amd.com    if options.dest_queue:
17311308Santhony.gutierrez@amd.com        shell_cmd += ' -q ' + options.dest_queue
17411308Santhony.gutierrez@amd.com    shell_cmd += ' -d %s' % cwd
17511308Santhony.gutierrez@amd.com
17611308Santhony.gutierrez@amd.comshell = Shell(shell_cmd)
17711308Santhony.gutierrez@amd.com
17811308Santhony.gutierrez@amd.comtry:
17911308Santhony.gutierrez@amd.com    # chdir to cwd
18011308Santhony.gutierrez@amd.com    (output, status) = shell.do_command('cd ' + cwd)
18111308Santhony.gutierrez@amd.com
18211534Sjohn.kalamatianos@amd.com    if status != 0:
18311308Santhony.gutierrez@amd.com        raise OSError, "Can't chdir to %s" % cwd
18411308Santhony.gutierrez@amd.com
18511308Santhony.gutierrez@amd.com    # wacky hack: sometimes scons will create an output directory then
18611308Santhony.gutierrez@amd.com    # fork a job to generate files in that directory, and the job will
18711308Santhony.gutierrez@amd.com    # get run before the directory creation propagates through NFS.
18811308Santhony.gutierrez@amd.com    # This hack looks for a '-o' option indicating an output file and
18911308Santhony.gutierrez@amd.com    # waits for the corresponding directory to appear if necessary.
19011308Santhony.gutierrez@amd.com    try:
19111534Sjohn.kalamatianos@amd.com        if 'cc' in cmd[0] or 'g++' in cmd[0]:
19211308Santhony.gutierrez@amd.com            output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
19311308Santhony.gutierrez@amd.com        elif 'm5' in cmd[0]:
19411308Santhony.gutierrez@amd.com            output_dir = cmd[cmd.index('-d')+1]
19511308Santhony.gutierrez@amd.com        else:
19611308Santhony.gutierrez@amd.com            output_dir = None
19711308Santhony.gutierrez@amd.com    except (ValueError, IndexError):
19811308Santhony.gutierrez@amd.com        # no big deal if there's no '-o'/'-d' or if it's the final argument
19911308Santhony.gutierrez@amd.com        output_dir = None
20011308Santhony.gutierrez@amd.com
20111308Santhony.gutierrez@amd.com    if output_dir:
20211308Santhony.gutierrez@amd.com        secs_waited = 0
20311308Santhony.gutierrez@amd.com        while not shell.dir_exists(output_dir) and secs_waited < 90:
20411308Santhony.gutierrez@amd.com            time.sleep(5)
20511308Santhony.gutierrez@amd.com            secs_waited += 5
20611308Santhony.gutierrez@amd.com        if secs_waited > 30:
20711308Santhony.gutierrez@amd.com            print "waited", secs_waited, "seconds for", output_dir
20811308Santhony.gutierrez@amd.com
20911308Santhony.gutierrez@amd.com    # run command
21011308Santhony.gutierrez@amd.com    if options.stdout_file:
21111308Santhony.gutierrez@amd.com        cmd += ['>', options.stdout_file]
21211308Santhony.gutierrez@amd.com    if options.stderr_file:
21311308Santhony.gutierrez@amd.com        cmd += ['2>', options.stderr_file]
21411308Santhony.gutierrez@amd.com    try:
21511308Santhony.gutierrez@amd.com        (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
21611308Santhony.gutierrez@amd.com    except pexpect.TIMEOUT:
21711308Santhony.gutierrez@amd.com            print >>sys.stderr, "%s: command timed out after %d seconds." \
21811308Santhony.gutierrez@amd.com                  % (progname, options.cmd_timeout)
21911308Santhony.gutierrez@amd.com            shell.sendline('~.') # oarsub/ssh termination escape sequence
22011308Santhony.gutierrez@amd.com            shell.safe_close()
22111308Santhony.gutierrez@amd.com            status = 3
22211308Santhony.gutierrez@amd.com    if output:
22311308Santhony.gutierrez@amd.com        print output
22411308Santhony.gutierrez@amd.comfinally:
22511308Santhony.gutierrez@amd.com    # end job
22611308Santhony.gutierrez@amd.com    if shell.isalive():
22711308Santhony.gutierrez@amd.com        shell.sendline('exit')
22811308Santhony.gutierrez@amd.com        shell.expect('Disconnected from OAR job .*')
22911308Santhony.gutierrez@amd.com        shell.safe_close()
23011308Santhony.gutierrez@amd.com
23111308Santhony.gutierrez@amd.com    # if there was an error, log the output even if not requested
23211308Santhony.gutierrez@amd.com    if status != 0 or options.save_log:
23311308Santhony.gutierrez@amd.com        log = file('qdo-log.' + str(os.getpid()), 'w')
23411308Santhony.gutierrez@amd.com        log.write(shell.full_output)
23511308Santhony.gutierrez@amd.com        log.close()
23611308Santhony.gutierrez@amd.comdel shell
23711308Santhony.gutierrez@amd.com
23811308Santhony.gutierrez@amd.comsys.exit(status)
23911308Santhony.gutierrez@amd.com