qdo revision 5147:a7b91336a3fc
111308Santhony.gutierrez@amd.com#! /usr/bin/env python 211308Santhony.gutierrez@amd.com 311308Santhony.gutierrez@amd.com# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan 411308Santhony.gutierrez@amd.com# All rights reserved. 511308Santhony.gutierrez@amd.com# 611308Santhony.gutierrez@amd.com# Redistribution and use in source and binary forms, with or without 711308Santhony.gutierrez@amd.com# modification, are permitted provided that the following conditions are 811308Santhony.gutierrez@amd.com# met: redistributions of source code must retain the above copyright 911308Santhony.gutierrez@amd.com# notice, this list of conditions and the following disclaimer; 1011308Santhony.gutierrez@amd.com# redistributions in binary form must reproduce the above copyright 1111308Santhony.gutierrez@amd.com# notice, this list of conditions and the following disclaimer in the 1211308Santhony.gutierrez@amd.com# documentation and/or other materials provided with the distribution; 1311308Santhony.gutierrez@amd.com# neither the name of the copyright holders nor the names of its 1411308Santhony.gutierrez@amd.com# contributors may be used to endorse or promote products derived from 1511308Santhony.gutierrez@amd.com# this software without specific prior written permission. 1611308Santhony.gutierrez@amd.com# 1711308Santhony.gutierrez@amd.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1811308Santhony.gutierrez@amd.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1911308Santhony.gutierrez@amd.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2011308Santhony.gutierrez@amd.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2111308Santhony.gutierrez@amd.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2211308Santhony.gutierrez@amd.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2311308Santhony.gutierrez@amd.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2411308Santhony.gutierrez@amd.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2511308Santhony.gutierrez@amd.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2611308Santhony.gutierrez@amd.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2711308Santhony.gutierrez@amd.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2811308Santhony.gutierrez@amd.com# 2911308Santhony.gutierrez@amd.com# Authors: Steve Reinhardt 3011308Santhony.gutierrez@amd.com# Ali Saidi 3111308Santhony.gutierrez@amd.com 3211308Santhony.gutierrez@amd.com# Important! 3311308Santhony.gutierrez@amd.com# This script expects a simple $ prompt, if you are using a shell other than 3411308Santhony.gutierrez@amd.com# sh which defaults to this you'll need to add something like the following 3511308Santhony.gutierrez@amd.com# to your bashrc/bash_profile script: 3611308Santhony.gutierrez@amd.com#if [ "$OAR_USER" = "xxxx" ]; then 3711308Santhony.gutierrez@amd.com# PS1='$ ' 3811308Santhony.gutierrez@amd.com 3911308Santhony.gutierrez@amd.com 4011308Santhony.gutierrez@amd.comimport sys 4111308Santhony.gutierrez@amd.comimport os 4211308Santhony.gutierrez@amd.comimport re 4311308Santhony.gutierrez@amd.comimport time 4411308Santhony.gutierrez@amd.comimport optparse 4511308Santhony.gutierrez@amd.com 4611308Santhony.gutierrez@amd.comimport pexpect 4711308Santhony.gutierrez@amd.com 4811308Santhony.gutierrez@amd.comprogname = os.path.basename(sys.argv[0]) 4911308Santhony.gutierrez@amd.com 5011308Santhony.gutierrez@amd.comusage = "%prog [options] command [command arguments]" 5111308Santhony.gutierrez@amd.comoptparser = optparse.OptionParser(usage=usage) 5211308Santhony.gutierrez@amd.comoptparser.allow_interspersed_args=False 5311308Santhony.gutierrez@amd.comoptparser.add_option('-e', dest='stderr_file', 5411308Santhony.gutierrez@amd.com help='command stderr output file') 5511308Santhony.gutierrez@amd.comoptparser.add_option('-o', dest='stdout_file', 5611308Santhony.gutierrez@amd.com help='command stdout output file') 5711308Santhony.gutierrez@amd.comoptparser.add_option('-l', dest='save_log', action='store_true', 5811308Santhony.gutierrez@amd.com help='save oarsub output log file') 5911308Santhony.gutierrez@amd.comoptparser.add_option('-N', dest='job_name', 6011308Santhony.gutierrez@amd.com help='oarsub job name') 6111308Santhony.gutierrez@amd.comoptparser.add_option('-q', dest='dest_queue', 6211308Santhony.gutierrez@amd.com help='oarsub destination queue') 6311534Sjohn.kalamatianos@amd.comoptparser.add_option('--qwait', dest='oarsub_timeout', type='int', 6411534Sjohn.kalamatianos@amd.com help='oarsub queue wait timeout', default=30*60) 6511308Santhony.gutierrez@amd.comoptparser.add_option('-t', dest='cmd_timeout', type='int', 6611308Santhony.gutierrez@amd.com help='command execution timeout', default=600*60) 6711534Sjohn.kalamatianos@amd.com 6811308Santhony.gutierrez@amd.com(options, cmd) = optparser.parse_args() 6911308Santhony.gutierrez@amd.com 7011308Santhony.gutierrez@amd.comif cmd == []: 7111308Santhony.gutierrez@amd.com print >>sys.stderr, "%s: missing command" % progname 7211534Sjohn.kalamatianos@amd.com sys.exit(1) 7311308Santhony.gutierrez@amd.com 7411308Santhony.gutierrez@amd.com# If we want to do this, need to add check here to make sure cmd[0] is 7511308Santhony.gutierrez@amd.com# a valid PBS job name, else oarsub will die on us. 7611308Santhony.gutierrez@amd.com# 7711308Santhony.gutierrez@amd.com#if not options.job_name: 7811308Santhony.gutierrez@amd.com# options.job_name = cmd[0] 7911308Santhony.gutierrez@amd.com 8011308Santhony.gutierrez@amd.comcwd = os.getcwd() 8111308Santhony.gutierrez@amd.com 8211308Santhony.gutierrez@amd.com# Deal with systems where /n is a symlink to /.automount 8311308Santhony.gutierrez@amd.comif cwd.startswith('/.automount/'): 8411308Santhony.gutierrez@amd.com cwd = cwd.replace('/.automount/', '/n/', 1) 8511308Santhony.gutierrez@amd.com 8611308Santhony.gutierrez@amd.comif not cwd.startswith('/n/poolfs/'): 8711308Santhony.gutierrez@amd.com print >>sys.stderr, "Error: current directory must be under /n/poolfs." 8811308Santhony.gutierrez@amd.com sys.exit(1) 8911308Santhony.gutierrez@amd.com 9011308Santhony.gutierrez@amd.com# The Shell class wraps pexpect.spawn with some handy functions that 9111308Santhony.gutierrez@amd.com# assume the thing on the other end is a Bourne/bash shell. 9211308Santhony.gutierrez@amd.comclass Shell(pexpect.spawn): 9311308Santhony.gutierrez@amd.com # Regexp to match the shell prompt. We change the prompt to 9411308Santhony.gutierrez@amd.com # something fixed and distinctive to make it easier to match 9511308Santhony.gutierrez@amd.com # reliably. 9611308Santhony.gutierrez@amd.com prompt_re = re.compile('qdo\$ ') 9711308Santhony.gutierrez@amd.com 9811308Santhony.gutierrez@amd.com def __init__(self, cmd): 9911308Santhony.gutierrez@amd.com # initialize base pexpect.spawn object 10011308Santhony.gutierrez@amd.com try: 10111308Santhony.gutierrez@amd.com pexpect.spawn.__init__(self, cmd) 10211308Santhony.gutierrez@amd.com except pexpect.ExceptionPexpect, exc: 10311308Santhony.gutierrez@amd.com print "%s:" % progname, exc 10411308Santhony.gutierrez@amd.com sys.exit(1) 10511308Santhony.gutierrez@amd.com # full_output accumulates the full output of the session 10611308Santhony.gutierrez@amd.com self.full_output = "" 10711308Santhony.gutierrez@amd.com self.quick_timeout = 15 10811308Santhony.gutierrez@amd.com # wait for a prompt, then change it 10911308Santhony.gutierrez@amd.com try: 11011308Santhony.gutierrez@amd.com self.expect('\$ ', options.oarsub_timeout) 11111308Santhony.gutierrez@amd.com except pexpect.TIMEOUT: 11211308Santhony.gutierrez@amd.com print >>sys.stderr, "%s: oarsub timed out." % progname 11311308Santhony.gutierrez@amd.com self.kill(9) 11411308Santhony.gutierrez@amd.com self.safe_close() 11511308Santhony.gutierrez@amd.com sys.exit(1) 11611308Santhony.gutierrez@amd.com self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') 11711308Santhony.gutierrez@amd.com 11811308Santhony.gutierrez@amd.com # version of expect that updates full_output too 11911308Santhony.gutierrez@amd.com def expect(self, regexp, timeout = -1): 12011308Santhony.gutierrez@amd.com pexpect.spawn.expect(self, regexp, timeout) 12111308Santhony.gutierrez@amd.com self.full_output += self.before + self.after 12211308Santhony.gutierrez@amd.com 12311308Santhony.gutierrez@amd.com # Just issue a command and wait for the next prompt. 12411308Santhony.gutierrez@amd.com # Returns a string containing the output of the command. 12511308Santhony.gutierrez@amd.com def do_bare_command(self, cmd, timeout = -1): 12611534Sjohn.kalamatianos@amd.com global full_output 12711534Sjohn.kalamatianos@amd.com self.sendline(cmd) 12811308Santhony.gutierrez@amd.com # read back the echo of the command 12911308Santhony.gutierrez@amd.com self.readline() 13011308Santhony.gutierrez@amd.com # wait for the next prompt 13111308Santhony.gutierrez@amd.com self.expect(self.prompt_re, timeout) 13211308Santhony.gutierrez@amd.com output = self.before.rstrip() 13311308Santhony.gutierrez@amd.com return output 13411308Santhony.gutierrez@amd.com 13511308Santhony.gutierrez@amd.com # Issue a command, then query its exit status. 13611308Santhony.gutierrez@amd.com # Returns a (string, int) tuple with the command output and the status. 13711308Santhony.gutierrez@amd.com def do_command(self, cmd, timeout = -1): 13811308Santhony.gutierrez@amd.com # do the command itself 13911308Santhony.gutierrez@amd.com output = self.do_bare_command(cmd, timeout) 14011308Santhony.gutierrez@amd.com # collect status 14111308Santhony.gutierrez@amd.com status = int(self.do_bare_command("echo $?", self.quick_timeout)) 14211308Santhony.gutierrez@amd.com return (output, status) 14311308Santhony.gutierrez@amd.com 14411534Sjohn.kalamatianos@amd.com # Check to see if the given directory exists. 14511534Sjohn.kalamatianos@amd.com def dir_exists(self, dirname): 14611308Santhony.gutierrez@amd.com (output, status) = shell.do_command('[ -d %s ]' % dirname, 14711534Sjohn.kalamatianos@amd.com self.quick_timeout) 14811534Sjohn.kalamatianos@amd.com return status == 0 14911308Santhony.gutierrez@amd.com 15011308Santhony.gutierrez@amd.com # Don't actually try to close it.. just wait until it closes by itself 15111308Santhony.gutierrez@amd.com # We can't actually kill the pid which is what it's trying to do, and if 15211308Santhony.gutierrez@amd.com # we call wait we could be in an unfortunate situation of it printing input 15311308Santhony.gutierrez@amd.com # right as we call wait, so the input is never read and the process never ends 15411308Santhony.gutierrez@amd.com def safe_close(self): 15511308Santhony.gutierrez@amd.com count = 0 15611308Santhony.gutierrez@amd.com while self.isalive() and count < 10: 15711308Santhony.gutierrez@amd.com time.sleep(1) 15811308Santhony.gutierrez@amd.com self.close(force=False) 15911308Santhony.gutierrez@amd.com 16011308Santhony.gutierrez@amd.com# Spawn the interactive pool job. 16111308Santhony.gutierrez@amd.com 16211308Santhony.gutierrez@amd.com# Hack to do link on poolfs... disabled for now since 16311308Santhony.gutierrez@amd.com# compiler/linker/library versioning problems between poolfs and 16411308Santhony.gutierrez@amd.com# nodes. May never work since poolfs is x86-64 and nodes are 32-bit. 16511308Santhony.gutierrez@amd.comif False and len(cmd) > 50: 16611308Santhony.gutierrez@amd.com shell_cmd = 'ssh -t poolfs /bin/sh -l' 16711308Santhony.gutierrez@amd.com print "%s: running %s on poolfs" % (progname, cmd[0]) 16811308Santhony.gutierrez@amd.comelse: 16911308Santhony.gutierrez@amd.com shell_cmd = 'oarsub -I' 17011308Santhony.gutierrez@amd.com if options.job_name: 17111308Santhony.gutierrez@amd.com shell_cmd += ' -n "%s"' % options.job_name 17211308Santhony.gutierrez@amd.com if options.dest_queue: 17311308Santhony.gutierrez@amd.com shell_cmd += ' -q ' + options.dest_queue 17411308Santhony.gutierrez@amd.com shell_cmd += ' -d %s' % cwd 17511308Santhony.gutierrez@amd.com 17611308Santhony.gutierrez@amd.comshell = Shell(shell_cmd) 17711308Santhony.gutierrez@amd.com 17811308Santhony.gutierrez@amd.comtry: 17911308Santhony.gutierrez@amd.com # chdir to cwd 18011308Santhony.gutierrez@amd.com (output, status) = shell.do_command('cd ' + cwd) 18111308Santhony.gutierrez@amd.com 18211534Sjohn.kalamatianos@amd.com if status != 0: 18311308Santhony.gutierrez@amd.com raise OSError, "Can't chdir to %s" % cwd 18411308Santhony.gutierrez@amd.com 18511308Santhony.gutierrez@amd.com # wacky hack: sometimes scons will create an output directory then 18611308Santhony.gutierrez@amd.com # fork a job to generate files in that directory, and the job will 18711308Santhony.gutierrez@amd.com # get run before the directory creation propagates through NFS. 18811308Santhony.gutierrez@amd.com # This hack looks for a '-o' option indicating an output file and 18911308Santhony.gutierrez@amd.com # waits for the corresponding directory to appear if necessary. 19011308Santhony.gutierrez@amd.com try: 19111534Sjohn.kalamatianos@amd.com if 'cc' in cmd[0] or 'g++' in cmd[0]: 19211308Santhony.gutierrez@amd.com output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) 19311308Santhony.gutierrez@amd.com elif 'm5' in cmd[0]: 19411308Santhony.gutierrez@amd.com output_dir = cmd[cmd.index('-d')+1] 19511308Santhony.gutierrez@amd.com else: 19611308Santhony.gutierrez@amd.com output_dir = None 19711308Santhony.gutierrez@amd.com except (ValueError, IndexError): 19811308Santhony.gutierrez@amd.com # no big deal if there's no '-o'/'-d' or if it's the final argument 19911308Santhony.gutierrez@amd.com output_dir = None 20011308Santhony.gutierrez@amd.com 20111308Santhony.gutierrez@amd.com if output_dir: 20211308Santhony.gutierrez@amd.com secs_waited = 0 20311308Santhony.gutierrez@amd.com while not shell.dir_exists(output_dir) and secs_waited < 90: 20411308Santhony.gutierrez@amd.com time.sleep(5) 20511308Santhony.gutierrez@amd.com secs_waited += 5 20611308Santhony.gutierrez@amd.com if secs_waited > 30: 20711308Santhony.gutierrez@amd.com print "waited", secs_waited, "seconds for", output_dir 20811308Santhony.gutierrez@amd.com 20911308Santhony.gutierrez@amd.com # run command 21011308Santhony.gutierrez@amd.com if options.stdout_file: 21111308Santhony.gutierrez@amd.com cmd += ['>', options.stdout_file] 21211308Santhony.gutierrez@amd.com if options.stderr_file: 21311308Santhony.gutierrez@amd.com cmd += ['2>', options.stderr_file] 21411308Santhony.gutierrez@amd.com try: 21511308Santhony.gutierrez@amd.com (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) 21611308Santhony.gutierrez@amd.com except pexpect.TIMEOUT: 21711308Santhony.gutierrez@amd.com print >>sys.stderr, "%s: command timed out after %d seconds." \ 21811308Santhony.gutierrez@amd.com % (progname, options.cmd_timeout) 21911308Santhony.gutierrez@amd.com shell.sendline('~.') # oarsub/ssh termination escape sequence 22011308Santhony.gutierrez@amd.com shell.safe_close() 22111308Santhony.gutierrez@amd.com status = 3 22211308Santhony.gutierrez@amd.com if output: 22311308Santhony.gutierrez@amd.com print output 22411308Santhony.gutierrez@amd.comfinally: 22511308Santhony.gutierrez@amd.com # end job 22611308Santhony.gutierrez@amd.com if shell.isalive(): 22711308Santhony.gutierrez@amd.com shell.sendline('exit') 22811308Santhony.gutierrez@amd.com shell.expect('Disconnected from OAR job .*') 22911308Santhony.gutierrez@amd.com shell.safe_close() 23011308Santhony.gutierrez@amd.com 23111308Santhony.gutierrez@amd.com # if there was an error, log the output even if not requested 23211308Santhony.gutierrez@amd.com if status != 0 or options.save_log: 23311308Santhony.gutierrez@amd.com log = file('qdo-log.' + str(os.getpid()), 'w') 23411308Santhony.gutierrez@amd.com log.write(shell.full_output) 23511308Santhony.gutierrez@amd.com log.close() 23611308Santhony.gutierrez@amd.comdel shell 23711308Santhony.gutierrez@amd.com 23811308Santhony.gutierrez@amd.comsys.exit(status) 23911308Santhony.gutierrez@amd.com