qdo revision 13540
113540Sandrea.mondelli@ucf.edu#! /usr/bin/env python2.7
21884Sstever@eecs.umich.edu
35147Ssaidi@eecs.umich.edu# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
41884Sstever@eecs.umich.edu# All rights reserved.
51884Sstever@eecs.umich.edu#
61884Sstever@eecs.umich.edu# Redistribution and use in source and binary forms, with or without
71884Sstever@eecs.umich.edu# modification, are permitted provided that the following conditions are
81884Sstever@eecs.umich.edu# met: redistributions of source code must retain the above copyright
91884Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer;
101884Sstever@eecs.umich.edu# redistributions in binary form must reproduce the above copyright
111884Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the
121884Sstever@eecs.umich.edu# documentation and/or other materials provided with the distribution;
131884Sstever@eecs.umich.edu# neither the name of the copyright holders nor the names of its
141884Sstever@eecs.umich.edu# contributors may be used to endorse or promote products derived from
151884Sstever@eecs.umich.edu# this software without specific prior written permission.
161884Sstever@eecs.umich.edu#
171884Sstever@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
181884Sstever@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
191884Sstever@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
201884Sstever@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
211884Sstever@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
221884Sstever@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
231884Sstever@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
241884Sstever@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
251884Sstever@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
261884Sstever@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
271884Sstever@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
282665Ssaidi@eecs.umich.edu#
292665Ssaidi@eecs.umich.edu# Authors: Steve Reinhardt
305147Ssaidi@eecs.umich.edu#          Ali Saidi
315147Ssaidi@eecs.umich.edu
325147Ssaidi@eecs.umich.edu# Important!
3311320Ssteve.reinhardt@amd.com# This script expects a simple $ prompt, if you are using a shell other than
345147Ssaidi@eecs.umich.edu# sh which defaults to this you'll need to add something like the following
355147Ssaidi@eecs.umich.edu# to your bashrc/bash_profile script:
365147Ssaidi@eecs.umich.edu#if [ "$OAR_USER" = "xxxx" ]; then
375147Ssaidi@eecs.umich.edu#   PS1='$ '
385147Ssaidi@eecs.umich.edu
391884Sstever@eecs.umich.edu
401884Sstever@eecs.umich.eduimport sys
411884Sstever@eecs.umich.eduimport os
421884Sstever@eecs.umich.eduimport re
431884Sstever@eecs.umich.eduimport time
441884Sstever@eecs.umich.eduimport optparse
451884Sstever@eecs.umich.edu
461884Sstever@eecs.umich.eduimport pexpect
471884Sstever@eecs.umich.edu
481884Sstever@eecs.umich.eduprogname = os.path.basename(sys.argv[0])
491884Sstever@eecs.umich.edu
501884Sstever@eecs.umich.eduusage = "%prog [options] command [command arguments]"
511884Sstever@eecs.umich.eduoptparser = optparse.OptionParser(usage=usage)
521884Sstever@eecs.umich.eduoptparser.allow_interspersed_args=False
531884Sstever@eecs.umich.eduoptparser.add_option('-e', dest='stderr_file',
541884Sstever@eecs.umich.edu                     help='command stderr output file')
551884Sstever@eecs.umich.eduoptparser.add_option('-o', dest='stdout_file',
561884Sstever@eecs.umich.edu                     help='command stdout output file')
571884Sstever@eecs.umich.eduoptparser.add_option('-l', dest='save_log', action='store_true',
585147Ssaidi@eecs.umich.edu                     help='save oarsub output log file')
591930Sstever@eecs.umich.eduoptparser.add_option('-N', dest='job_name',
605147Ssaidi@eecs.umich.edu                     help='oarsub job name')
611930Sstever@eecs.umich.eduoptparser.add_option('-q', dest='dest_queue',
625147Ssaidi@eecs.umich.edu                     help='oarsub destination queue')
635147Ssaidi@eecs.umich.eduoptparser.add_option('--qwait', dest='oarsub_timeout', type='int',
645147Ssaidi@eecs.umich.edu                     help='oarsub queue wait timeout', default=30*60)
651884Sstever@eecs.umich.eduoptparser.add_option('-t', dest='cmd_timeout', type='int',
661884Sstever@eecs.umich.edu                     help='command execution timeout', default=600*60)
671884Sstever@eecs.umich.edu
681884Sstever@eecs.umich.edu(options, cmd) = optparser.parse_args()
691884Sstever@eecs.umich.edu
701884Sstever@eecs.umich.eduif cmd == []:
711884Sstever@eecs.umich.edu    print >>sys.stderr, "%s: missing command" % progname
721884Sstever@eecs.umich.edu    sys.exit(1)
731884Sstever@eecs.umich.edu
741940Sstever@eecs.umich.edu# If we want to do this, need to add check here to make sure cmd[0] is
755147Ssaidi@eecs.umich.edu# a valid PBS job name, else oarsub will die on us.
761940Sstever@eecs.umich.edu#
771940Sstever@eecs.umich.edu#if not options.job_name:
781940Sstever@eecs.umich.edu#    options.job_name = cmd[0]
791930Sstever@eecs.umich.edu
801884Sstever@eecs.umich.educwd = os.getcwd()
811884Sstever@eecs.umich.edu
821884Sstever@eecs.umich.edu# Deal with systems where /n is a symlink to /.automount
831884Sstever@eecs.umich.eduif cwd.startswith('/.automount/'):
841884Sstever@eecs.umich.edu    cwd = cwd.replace('/.automount/', '/n/', 1)
851884Sstever@eecs.umich.edu
861884Sstever@eecs.umich.eduif not cwd.startswith('/n/poolfs/'):
871884Sstever@eecs.umich.edu    print >>sys.stderr, "Error: current directory must be under /n/poolfs."
881884Sstever@eecs.umich.edu    sys.exit(1)
891884Sstever@eecs.umich.edu
901884Sstever@eecs.umich.edu# The Shell class wraps pexpect.spawn with some handy functions that
911884Sstever@eecs.umich.edu# assume the thing on the other end is a Bourne/bash shell.
921884Sstever@eecs.umich.educlass Shell(pexpect.spawn):
931884Sstever@eecs.umich.edu    # Regexp to match the shell prompt.  We change the prompt to
941884Sstever@eecs.umich.edu    # something fixed and distinctive to make it easier to match
951884Sstever@eecs.umich.edu    # reliably.
961884Sstever@eecs.umich.edu    prompt_re = re.compile('qdo\$ ')
971884Sstever@eecs.umich.edu
981884Sstever@eecs.umich.edu    def __init__(self, cmd):
991884Sstever@eecs.umich.edu        # initialize base pexpect.spawn object
10011320Ssteve.reinhardt@amd.com        try:
1011884Sstever@eecs.umich.edu            pexpect.spawn.__init__(self, cmd)
10211320Ssteve.reinhardt@amd.com        except pexpect.ExceptionPexpect, exc:
10311320Ssteve.reinhardt@amd.com            print "%s:" % progname, exc
10411320Ssteve.reinhardt@amd.com            sys.exit(1)
1051884Sstever@eecs.umich.edu        # full_output accumulates the full output of the session
1061884Sstever@eecs.umich.edu        self.full_output = ""
1071884Sstever@eecs.umich.edu        self.quick_timeout = 15
1081884Sstever@eecs.umich.edu        # wait for a prompt, then change it
1091884Sstever@eecs.umich.edu        try:
1105147Ssaidi@eecs.umich.edu            self.expect('\$ ', options.oarsub_timeout)
1111884Sstever@eecs.umich.edu        except pexpect.TIMEOUT:
1125147Ssaidi@eecs.umich.edu            print >>sys.stderr, "%s: oarsub timed out." % progname
1131964Sstever@eecs.umich.edu            self.kill(9)
1145147Ssaidi@eecs.umich.edu            self.safe_close()
1151884Sstever@eecs.umich.edu            sys.exit(1)
1161891Sstever@eecs.umich.edu        self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
1171884Sstever@eecs.umich.edu
1181884Sstever@eecs.umich.edu    # version of expect that updates full_output too
1191884Sstever@eecs.umich.edu    def expect(self, regexp, timeout = -1):
1201884Sstever@eecs.umich.edu        pexpect.spawn.expect(self, regexp, timeout)
1211884Sstever@eecs.umich.edu        self.full_output += self.before + self.after
1221884Sstever@eecs.umich.edu
1231884Sstever@eecs.umich.edu    # Just issue a command and wait for the next prompt.
1241884Sstever@eecs.umich.edu    # Returns a string containing the output of the command.
1251884Sstever@eecs.umich.edu    def do_bare_command(self, cmd, timeout = -1):
1261884Sstever@eecs.umich.edu        global full_output
1271884Sstever@eecs.umich.edu        self.sendline(cmd)
1281884Sstever@eecs.umich.edu        # read back the echo of the command
1291884Sstever@eecs.umich.edu        self.readline()
1301884Sstever@eecs.umich.edu        # wait for the next prompt
1311884Sstever@eecs.umich.edu        self.expect(self.prompt_re, timeout)
1321884Sstever@eecs.umich.edu        output = self.before.rstrip()
1331884Sstever@eecs.umich.edu        return output
1341884Sstever@eecs.umich.edu
1351884Sstever@eecs.umich.edu    # Issue a command, then query its exit status.
1361884Sstever@eecs.umich.edu    # Returns a (string, int) tuple with the command output and the status.
1371884Sstever@eecs.umich.edu    def do_command(self, cmd, timeout = -1):
1381884Sstever@eecs.umich.edu        # do the command itself
1391884Sstever@eecs.umich.edu        output = self.do_bare_command(cmd, timeout)
1401884Sstever@eecs.umich.edu        # collect status
1411884Sstever@eecs.umich.edu        status = int(self.do_bare_command("echo $?", self.quick_timeout))
1421884Sstever@eecs.umich.edu        return (output, status)
1431884Sstever@eecs.umich.edu
1441884Sstever@eecs.umich.edu    # Check to see if the given directory exists.
1451884Sstever@eecs.umich.edu    def dir_exists(self, dirname):
1461884Sstever@eecs.umich.edu        (output, status) = shell.do_command('[ -d %s ]' % dirname,
1471884Sstever@eecs.umich.edu                                            self.quick_timeout)
1481884Sstever@eecs.umich.edu        return status == 0
14911320Ssteve.reinhardt@amd.com
1505147Ssaidi@eecs.umich.edu    # Don't actually try to close it.. just wait until it closes by itself
15111320Ssteve.reinhardt@amd.com    # We can't actually kill the pid which is what it's trying to do, and if
15211320Ssteve.reinhardt@amd.com    # we call wait we could be in an unfortunate situation of it printing input
1535147Ssaidi@eecs.umich.edu    # right as we call wait, so the input is never read and the process never ends
1545147Ssaidi@eecs.umich.edu    def safe_close(self):
1555147Ssaidi@eecs.umich.edu        count = 0
1565147Ssaidi@eecs.umich.edu        while self.isalive() and count < 10:
1575147Ssaidi@eecs.umich.edu            time.sleep(1)
1585147Ssaidi@eecs.umich.edu        self.close(force=False)
15911320Ssteve.reinhardt@amd.com
1601884Sstever@eecs.umich.edu# Spawn the interactive pool job.
1611884Sstever@eecs.umich.edu
1621884Sstever@eecs.umich.edu# Hack to do link on poolfs... disabled for now since
1631884Sstever@eecs.umich.edu# compiler/linker/library versioning problems between poolfs and
1641884Sstever@eecs.umich.edu# nodes.  May never work since poolfs is x86-64 and nodes are 32-bit.
1651884Sstever@eecs.umich.eduif False and len(cmd) > 50:
1661884Sstever@eecs.umich.edu    shell_cmd = 'ssh -t poolfs /bin/sh -l'
1671884Sstever@eecs.umich.edu    print "%s: running %s on poolfs" % (progname, cmd[0])
1681884Sstever@eecs.umich.eduelse:
1695147Ssaidi@eecs.umich.edu    shell_cmd = 'oarsub -I'
1701940Sstever@eecs.umich.edu    if options.job_name:
1715147Ssaidi@eecs.umich.edu        shell_cmd += ' -n "%s"' % options.job_name
1721930Sstever@eecs.umich.edu    if options.dest_queue:
1731930Sstever@eecs.umich.edu        shell_cmd += ' -q ' + options.dest_queue
1745147Ssaidi@eecs.umich.edu    shell_cmd += ' -d %s' % cwd
1751884Sstever@eecs.umich.edu
1761884Sstever@eecs.umich.edushell = Shell(shell_cmd)
1771884Sstever@eecs.umich.edu
1781884Sstever@eecs.umich.edutry:
1791884Sstever@eecs.umich.edu    # chdir to cwd
1801884Sstever@eecs.umich.edu    (output, status) = shell.do_command('cd ' + cwd)
1811884Sstever@eecs.umich.edu
1821884Sstever@eecs.umich.edu    if status != 0:
1831884Sstever@eecs.umich.edu        raise OSError, "Can't chdir to %s" % cwd
1841884Sstever@eecs.umich.edu
1851884Sstever@eecs.umich.edu    # wacky hack: sometimes scons will create an output directory then
1861884Sstever@eecs.umich.edu    # fork a job to generate files in that directory, and the job will
1871884Sstever@eecs.umich.edu    # get run before the directory creation propagates through NFS.
1881884Sstever@eecs.umich.edu    # This hack looks for a '-o' option indicating an output file and
1891884Sstever@eecs.umich.edu    # waits for the corresponding directory to appear if necessary.
1901884Sstever@eecs.umich.edu    try:
1911884Sstever@eecs.umich.edu        if 'cc' in cmd[0] or 'g++' in cmd[0]:
1921884Sstever@eecs.umich.edu            output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
1931884Sstever@eecs.umich.edu        elif 'm5' in cmd[0]:
1941884Sstever@eecs.umich.edu            output_dir = cmd[cmd.index('-d')+1]
1951884Sstever@eecs.umich.edu        else:
1961884Sstever@eecs.umich.edu            output_dir = None
1971884Sstever@eecs.umich.edu    except (ValueError, IndexError):
1981884Sstever@eecs.umich.edu        # no big deal if there's no '-o'/'-d' or if it's the final argument
1991884Sstever@eecs.umich.edu        output_dir = None
2001884Sstever@eecs.umich.edu
2011884Sstever@eecs.umich.edu    if output_dir:
2021884Sstever@eecs.umich.edu        secs_waited = 0
2032441Sstever@eecs.umich.edu        while not shell.dir_exists(output_dir) and secs_waited < 90:
2041884Sstever@eecs.umich.edu            time.sleep(5)
2051884Sstever@eecs.umich.edu            secs_waited += 5
2062441Sstever@eecs.umich.edu        if secs_waited > 30:
2071884Sstever@eecs.umich.edu            print "waited", secs_waited, "seconds for", output_dir
2081884Sstever@eecs.umich.edu
2091884Sstever@eecs.umich.edu    # run command
2101884Sstever@eecs.umich.edu    if options.stdout_file:
2111884Sstever@eecs.umich.edu        cmd += ['>', options.stdout_file]
2121884Sstever@eecs.umich.edu    if options.stderr_file:
2131884Sstever@eecs.umich.edu        cmd += ['2>', options.stderr_file]
2141884Sstever@eecs.umich.edu    try:
2151884Sstever@eecs.umich.edu        (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
2161884Sstever@eecs.umich.edu    except pexpect.TIMEOUT:
2171884Sstever@eecs.umich.edu            print >>sys.stderr, "%s: command timed out after %d seconds." \
2181884Sstever@eecs.umich.edu                  % (progname, options.cmd_timeout)
2195147Ssaidi@eecs.umich.edu            shell.sendline('~.') # oarsub/ssh termination escape sequence
2205147Ssaidi@eecs.umich.edu            shell.safe_close()
2211884Sstever@eecs.umich.edu            status = 3
2221884Sstever@eecs.umich.edu    if output:
2231884Sstever@eecs.umich.edu        print output
2241884Sstever@eecs.umich.edufinally:
2251884Sstever@eecs.umich.edu    # end job
2261884Sstever@eecs.umich.edu    if shell.isalive():
2271884Sstever@eecs.umich.edu        shell.sendline('exit')
2285147Ssaidi@eecs.umich.edu        shell.expect('Disconnected from OAR job .*')
2295147Ssaidi@eecs.umich.edu        shell.safe_close()
2301884Sstever@eecs.umich.edu
2311884Sstever@eecs.umich.edu    # if there was an error, log the output even if not requested
2321884Sstever@eecs.umich.edu    if status != 0 or options.save_log:
2331884Sstever@eecs.umich.edu        log = file('qdo-log.' + str(os.getpid()), 'w')
2341884Sstever@eecs.umich.edu        log.write(shell.full_output)
2351884Sstever@eecs.umich.edu        log.close()
2361884Sstever@eecs.umich.edudel shell
2371884Sstever@eecs.umich.edu
2381884Sstever@eecs.umich.edusys.exit(status)
239