qdo revision 13540
113540Sandrea.mondelli@ucf.edu#! /usr/bin/env python2.7 21884Sstever@eecs.umich.edu 35147Ssaidi@eecs.umich.edu# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan 41884Sstever@eecs.umich.edu# All rights reserved. 51884Sstever@eecs.umich.edu# 61884Sstever@eecs.umich.edu# Redistribution and use in source and binary forms, with or without 71884Sstever@eecs.umich.edu# modification, are permitted provided that the following conditions are 81884Sstever@eecs.umich.edu# met: redistributions of source code must retain the above copyright 91884Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer; 101884Sstever@eecs.umich.edu# redistributions in binary form must reproduce the above copyright 111884Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the 121884Sstever@eecs.umich.edu# documentation and/or other materials provided with the distribution; 131884Sstever@eecs.umich.edu# neither the name of the copyright holders nor the names of its 141884Sstever@eecs.umich.edu# contributors may be used to endorse or promote products derived from 151884Sstever@eecs.umich.edu# this software without specific prior written permission. 161884Sstever@eecs.umich.edu# 171884Sstever@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 181884Sstever@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 191884Sstever@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 201884Sstever@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 211884Sstever@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 221884Sstever@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 231884Sstever@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 241884Sstever@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 251884Sstever@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 261884Sstever@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 271884Sstever@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 282665Ssaidi@eecs.umich.edu# 292665Ssaidi@eecs.umich.edu# Authors: Steve Reinhardt 305147Ssaidi@eecs.umich.edu# Ali Saidi 315147Ssaidi@eecs.umich.edu 325147Ssaidi@eecs.umich.edu# Important! 3311320Ssteve.reinhardt@amd.com# This script expects a simple $ prompt, if you are using a shell other than 345147Ssaidi@eecs.umich.edu# sh which defaults to this you'll need to add something like the following 355147Ssaidi@eecs.umich.edu# to your bashrc/bash_profile script: 365147Ssaidi@eecs.umich.edu#if [ "$OAR_USER" = "xxxx" ]; then 375147Ssaidi@eecs.umich.edu# PS1='$ ' 385147Ssaidi@eecs.umich.edu 391884Sstever@eecs.umich.edu 401884Sstever@eecs.umich.eduimport sys 411884Sstever@eecs.umich.eduimport os 421884Sstever@eecs.umich.eduimport re 431884Sstever@eecs.umich.eduimport time 441884Sstever@eecs.umich.eduimport optparse 451884Sstever@eecs.umich.edu 461884Sstever@eecs.umich.eduimport pexpect 471884Sstever@eecs.umich.edu 481884Sstever@eecs.umich.eduprogname = os.path.basename(sys.argv[0]) 491884Sstever@eecs.umich.edu 501884Sstever@eecs.umich.eduusage = "%prog [options] command [command arguments]" 511884Sstever@eecs.umich.eduoptparser = optparse.OptionParser(usage=usage) 521884Sstever@eecs.umich.eduoptparser.allow_interspersed_args=False 531884Sstever@eecs.umich.eduoptparser.add_option('-e', dest='stderr_file', 541884Sstever@eecs.umich.edu help='command stderr output file') 551884Sstever@eecs.umich.eduoptparser.add_option('-o', dest='stdout_file', 561884Sstever@eecs.umich.edu help='command stdout output file') 571884Sstever@eecs.umich.eduoptparser.add_option('-l', dest='save_log', action='store_true', 585147Ssaidi@eecs.umich.edu help='save oarsub output log file') 591930Sstever@eecs.umich.eduoptparser.add_option('-N', dest='job_name', 605147Ssaidi@eecs.umich.edu help='oarsub job name') 611930Sstever@eecs.umich.eduoptparser.add_option('-q', dest='dest_queue', 625147Ssaidi@eecs.umich.edu help='oarsub destination queue') 635147Ssaidi@eecs.umich.eduoptparser.add_option('--qwait', dest='oarsub_timeout', type='int', 645147Ssaidi@eecs.umich.edu help='oarsub queue wait timeout', default=30*60) 651884Sstever@eecs.umich.eduoptparser.add_option('-t', dest='cmd_timeout', type='int', 661884Sstever@eecs.umich.edu help='command execution timeout', default=600*60) 671884Sstever@eecs.umich.edu 681884Sstever@eecs.umich.edu(options, cmd) = optparser.parse_args() 691884Sstever@eecs.umich.edu 701884Sstever@eecs.umich.eduif cmd == []: 711884Sstever@eecs.umich.edu print >>sys.stderr, "%s: missing command" % progname 721884Sstever@eecs.umich.edu sys.exit(1) 731884Sstever@eecs.umich.edu 741940Sstever@eecs.umich.edu# If we want to do this, need to add check here to make sure cmd[0] is 755147Ssaidi@eecs.umich.edu# a valid PBS job name, else oarsub will die on us. 761940Sstever@eecs.umich.edu# 771940Sstever@eecs.umich.edu#if not options.job_name: 781940Sstever@eecs.umich.edu# options.job_name = cmd[0] 791930Sstever@eecs.umich.edu 801884Sstever@eecs.umich.educwd = os.getcwd() 811884Sstever@eecs.umich.edu 821884Sstever@eecs.umich.edu# Deal with systems where /n is a symlink to /.automount 831884Sstever@eecs.umich.eduif cwd.startswith('/.automount/'): 841884Sstever@eecs.umich.edu cwd = cwd.replace('/.automount/', '/n/', 1) 851884Sstever@eecs.umich.edu 861884Sstever@eecs.umich.eduif not cwd.startswith('/n/poolfs/'): 871884Sstever@eecs.umich.edu print >>sys.stderr, "Error: current directory must be under /n/poolfs." 881884Sstever@eecs.umich.edu sys.exit(1) 891884Sstever@eecs.umich.edu 901884Sstever@eecs.umich.edu# The Shell class wraps pexpect.spawn with some handy functions that 911884Sstever@eecs.umich.edu# assume the thing on the other end is a Bourne/bash shell. 921884Sstever@eecs.umich.educlass Shell(pexpect.spawn): 931884Sstever@eecs.umich.edu # Regexp to match the shell prompt. We change the prompt to 941884Sstever@eecs.umich.edu # something fixed and distinctive to make it easier to match 951884Sstever@eecs.umich.edu # reliably. 961884Sstever@eecs.umich.edu prompt_re = re.compile('qdo\$ ') 971884Sstever@eecs.umich.edu 981884Sstever@eecs.umich.edu def __init__(self, cmd): 991884Sstever@eecs.umich.edu # initialize base pexpect.spawn object 10011320Ssteve.reinhardt@amd.com try: 1011884Sstever@eecs.umich.edu pexpect.spawn.__init__(self, cmd) 10211320Ssteve.reinhardt@amd.com except pexpect.ExceptionPexpect, exc: 10311320Ssteve.reinhardt@amd.com print "%s:" % progname, exc 10411320Ssteve.reinhardt@amd.com sys.exit(1) 1051884Sstever@eecs.umich.edu # full_output accumulates the full output of the session 1061884Sstever@eecs.umich.edu self.full_output = "" 1071884Sstever@eecs.umich.edu self.quick_timeout = 15 1081884Sstever@eecs.umich.edu # wait for a prompt, then change it 1091884Sstever@eecs.umich.edu try: 1105147Ssaidi@eecs.umich.edu self.expect('\$ ', options.oarsub_timeout) 1111884Sstever@eecs.umich.edu except pexpect.TIMEOUT: 1125147Ssaidi@eecs.umich.edu print >>sys.stderr, "%s: oarsub timed out." % progname 1131964Sstever@eecs.umich.edu self.kill(9) 1145147Ssaidi@eecs.umich.edu self.safe_close() 1151884Sstever@eecs.umich.edu sys.exit(1) 1161891Sstever@eecs.umich.edu self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') 1171884Sstever@eecs.umich.edu 1181884Sstever@eecs.umich.edu # version of expect that updates full_output too 1191884Sstever@eecs.umich.edu def expect(self, regexp, timeout = -1): 1201884Sstever@eecs.umich.edu pexpect.spawn.expect(self, regexp, timeout) 1211884Sstever@eecs.umich.edu self.full_output += self.before + self.after 1221884Sstever@eecs.umich.edu 1231884Sstever@eecs.umich.edu # Just issue a command and wait for the next prompt. 1241884Sstever@eecs.umich.edu # Returns a string containing the output of the command. 1251884Sstever@eecs.umich.edu def do_bare_command(self, cmd, timeout = -1): 1261884Sstever@eecs.umich.edu global full_output 1271884Sstever@eecs.umich.edu self.sendline(cmd) 1281884Sstever@eecs.umich.edu # read back the echo of the command 1291884Sstever@eecs.umich.edu self.readline() 1301884Sstever@eecs.umich.edu # wait for the next prompt 1311884Sstever@eecs.umich.edu self.expect(self.prompt_re, timeout) 1321884Sstever@eecs.umich.edu output = self.before.rstrip() 1331884Sstever@eecs.umich.edu return output 1341884Sstever@eecs.umich.edu 1351884Sstever@eecs.umich.edu # Issue a command, then query its exit status. 1361884Sstever@eecs.umich.edu # Returns a (string, int) tuple with the command output and the status. 1371884Sstever@eecs.umich.edu def do_command(self, cmd, timeout = -1): 1381884Sstever@eecs.umich.edu # do the command itself 1391884Sstever@eecs.umich.edu output = self.do_bare_command(cmd, timeout) 1401884Sstever@eecs.umich.edu # collect status 1411884Sstever@eecs.umich.edu status = int(self.do_bare_command("echo $?", self.quick_timeout)) 1421884Sstever@eecs.umich.edu return (output, status) 1431884Sstever@eecs.umich.edu 1441884Sstever@eecs.umich.edu # Check to see if the given directory exists. 1451884Sstever@eecs.umich.edu def dir_exists(self, dirname): 1461884Sstever@eecs.umich.edu (output, status) = shell.do_command('[ -d %s ]' % dirname, 1471884Sstever@eecs.umich.edu self.quick_timeout) 1481884Sstever@eecs.umich.edu return status == 0 14911320Ssteve.reinhardt@amd.com 1505147Ssaidi@eecs.umich.edu # Don't actually try to close it.. just wait until it closes by itself 15111320Ssteve.reinhardt@amd.com # We can't actually kill the pid which is what it's trying to do, and if 15211320Ssteve.reinhardt@amd.com # we call wait we could be in an unfortunate situation of it printing input 1535147Ssaidi@eecs.umich.edu # right as we call wait, so the input is never read and the process never ends 1545147Ssaidi@eecs.umich.edu def safe_close(self): 1555147Ssaidi@eecs.umich.edu count = 0 1565147Ssaidi@eecs.umich.edu while self.isalive() and count < 10: 1575147Ssaidi@eecs.umich.edu time.sleep(1) 1585147Ssaidi@eecs.umich.edu self.close(force=False) 15911320Ssteve.reinhardt@amd.com 1601884Sstever@eecs.umich.edu# Spawn the interactive pool job. 1611884Sstever@eecs.umich.edu 1621884Sstever@eecs.umich.edu# Hack to do link on poolfs... disabled for now since 1631884Sstever@eecs.umich.edu# compiler/linker/library versioning problems between poolfs and 1641884Sstever@eecs.umich.edu# nodes. May never work since poolfs is x86-64 and nodes are 32-bit. 1651884Sstever@eecs.umich.eduif False and len(cmd) > 50: 1661884Sstever@eecs.umich.edu shell_cmd = 'ssh -t poolfs /bin/sh -l' 1671884Sstever@eecs.umich.edu print "%s: running %s on poolfs" % (progname, cmd[0]) 1681884Sstever@eecs.umich.eduelse: 1695147Ssaidi@eecs.umich.edu shell_cmd = 'oarsub -I' 1701940Sstever@eecs.umich.edu if options.job_name: 1715147Ssaidi@eecs.umich.edu shell_cmd += ' -n "%s"' % options.job_name 1721930Sstever@eecs.umich.edu if options.dest_queue: 1731930Sstever@eecs.umich.edu shell_cmd += ' -q ' + options.dest_queue 1745147Ssaidi@eecs.umich.edu shell_cmd += ' -d %s' % cwd 1751884Sstever@eecs.umich.edu 1761884Sstever@eecs.umich.edushell = Shell(shell_cmd) 1771884Sstever@eecs.umich.edu 1781884Sstever@eecs.umich.edutry: 1791884Sstever@eecs.umich.edu # chdir to cwd 1801884Sstever@eecs.umich.edu (output, status) = shell.do_command('cd ' + cwd) 1811884Sstever@eecs.umich.edu 1821884Sstever@eecs.umich.edu if status != 0: 1831884Sstever@eecs.umich.edu raise OSError, "Can't chdir to %s" % cwd 1841884Sstever@eecs.umich.edu 1851884Sstever@eecs.umich.edu # wacky hack: sometimes scons will create an output directory then 1861884Sstever@eecs.umich.edu # fork a job to generate files in that directory, and the job will 1871884Sstever@eecs.umich.edu # get run before the directory creation propagates through NFS. 1881884Sstever@eecs.umich.edu # This hack looks for a '-o' option indicating an output file and 1891884Sstever@eecs.umich.edu # waits for the corresponding directory to appear if necessary. 1901884Sstever@eecs.umich.edu try: 1911884Sstever@eecs.umich.edu if 'cc' in cmd[0] or 'g++' in cmd[0]: 1921884Sstever@eecs.umich.edu output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) 1931884Sstever@eecs.umich.edu elif 'm5' in cmd[0]: 1941884Sstever@eecs.umich.edu output_dir = cmd[cmd.index('-d')+1] 1951884Sstever@eecs.umich.edu else: 1961884Sstever@eecs.umich.edu output_dir = None 1971884Sstever@eecs.umich.edu except (ValueError, IndexError): 1981884Sstever@eecs.umich.edu # no big deal if there's no '-o'/'-d' or if it's the final argument 1991884Sstever@eecs.umich.edu output_dir = None 2001884Sstever@eecs.umich.edu 2011884Sstever@eecs.umich.edu if output_dir: 2021884Sstever@eecs.umich.edu secs_waited = 0 2032441Sstever@eecs.umich.edu while not shell.dir_exists(output_dir) and secs_waited < 90: 2041884Sstever@eecs.umich.edu time.sleep(5) 2051884Sstever@eecs.umich.edu secs_waited += 5 2062441Sstever@eecs.umich.edu if secs_waited > 30: 2071884Sstever@eecs.umich.edu print "waited", secs_waited, "seconds for", output_dir 2081884Sstever@eecs.umich.edu 2091884Sstever@eecs.umich.edu # run command 2101884Sstever@eecs.umich.edu if options.stdout_file: 2111884Sstever@eecs.umich.edu cmd += ['>', options.stdout_file] 2121884Sstever@eecs.umich.edu if options.stderr_file: 2131884Sstever@eecs.umich.edu cmd += ['2>', options.stderr_file] 2141884Sstever@eecs.umich.edu try: 2151884Sstever@eecs.umich.edu (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) 2161884Sstever@eecs.umich.edu except pexpect.TIMEOUT: 2171884Sstever@eecs.umich.edu print >>sys.stderr, "%s: command timed out after %d seconds." \ 2181884Sstever@eecs.umich.edu % (progname, options.cmd_timeout) 2195147Ssaidi@eecs.umich.edu shell.sendline('~.') # oarsub/ssh termination escape sequence 2205147Ssaidi@eecs.umich.edu shell.safe_close() 2211884Sstever@eecs.umich.edu status = 3 2221884Sstever@eecs.umich.edu if output: 2231884Sstever@eecs.umich.edu print output 2241884Sstever@eecs.umich.edufinally: 2251884Sstever@eecs.umich.edu # end job 2261884Sstever@eecs.umich.edu if shell.isalive(): 2271884Sstever@eecs.umich.edu shell.sendline('exit') 2285147Ssaidi@eecs.umich.edu shell.expect('Disconnected from OAR job .*') 2295147Ssaidi@eecs.umich.edu shell.safe_close() 2301884Sstever@eecs.umich.edu 2311884Sstever@eecs.umich.edu # if there was an error, log the output even if not requested 2321884Sstever@eecs.umich.edu if status != 0 or options.save_log: 2331884Sstever@eecs.umich.edu log = file('qdo-log.' + str(os.getpid()), 'w') 2341884Sstever@eecs.umich.edu log.write(shell.full_output) 2351884Sstever@eecs.umich.edu log.close() 2361884Sstever@eecs.umich.edudel shell 2371884Sstever@eecs.umich.edu 2381884Sstever@eecs.umich.edusys.exit(status) 239