qdo revision 11320
12101SN/A#! /usr/bin/env python 22084SN/A 35268Sksewell@umich.edu# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan 45268Sksewell@umich.edu# All rights reserved. 55268Sksewell@umich.edu# 65268Sksewell@umich.edu# Redistribution and use in source and binary forms, with or without 75268Sksewell@umich.edu# modification, are permitted provided that the following conditions are 85268Sksewell@umich.edu# met: redistributions of source code must retain the above copyright 95268Sksewell@umich.edu# notice, this list of conditions and the following disclaimer; 105268Sksewell@umich.edu# redistributions in binary form must reproduce the above copyright 115268Sksewell@umich.edu# notice, this list of conditions and the following disclaimer in the 125268Sksewell@umich.edu# documentation and/or other materials provided with the distribution; 135268Sksewell@umich.edu# neither the name of the copyright holders nor the names of its 145268Sksewell@umich.edu# contributors may be used to endorse or promote products derived from 155268Sksewell@umich.edu# this software without specific prior written permission. 165268Sksewell@umich.edu# 175268Sksewell@umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 185268Sksewell@umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 195268Sksewell@umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 205268Sksewell@umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 215268Sksewell@umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 225268Sksewell@umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 235268Sksewell@umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 245268Sksewell@umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 255268Sksewell@umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 265268Sksewell@umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 275268Sksewell@umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 285268Sksewell@umich.edu# 295268Sksewell@umich.edu# Authors: Steve Reinhardt 302754Sksewell@umich.edu# Ali Saidi 312084SN/A 322084SN/A# Important! 332084SN/A# This script expects a simple $ prompt, if you are using a shell other than 342084SN/A# sh which defaults to this you'll need to add something like the following 352084SN/A# to your bashrc/bash_profile script: 362084SN/A#if [ "$OAR_USER" = "xxxx" ]; then 372084SN/A# PS1='$ ' 382084SN/A 392084SN/A 402101SN/Aimport sys 412084SN/Aimport os 422084SN/Aimport re 432084SN/Aimport time 442084SN/Aimport optparse 452084SN/A 462084SN/Aimport pexpect 472084SN/A 482101SN/Aprogname = os.path.basename(sys.argv[0]) 492084SN/A 502084SN/Ausage = "%prog [options] command [command arguments]" 512084SN/Aoptparser = optparse.OptionParser(usage=usage) 522084SN/Aoptparser.allow_interspersed_args=False 532084SN/Aoptparser.add_option('-e', dest='stderr_file', 542084SN/A help='command stderr output file') 552084SN/Aoptparser.add_option('-o', dest='stdout_file', 5612616Sgabeblack@google.com help='command stdout output file') 5712616Sgabeblack@google.comoptparser.add_option('-l', dest='save_log', action='store_true', 582084SN/A help='save oarsub output log file') 5912616Sgabeblack@google.comoptparser.add_option('-N', dest='job_name', 602084SN/A help='oarsub job name') 612084SN/Aoptparser.add_option('-q', dest='dest_queue', 622084SN/A help='oarsub destination queue') 632084SN/Aoptparser.add_option('--qwait', dest='oarsub_timeout', type='int', 642084SN/A help='oarsub queue wait timeout', default=30*60) 652084SN/Aoptparser.add_option('-t', dest='cmd_timeout', type='int', 662084SN/A help='command execution timeout', default=600*60) 672686Sksewell@umich.edu 682084SN/A(options, cmd) = optparser.parse_args() 692084SN/A 702084SN/Aif cmd == []: 712084SN/A print >>sys.stderr, "%s: missing command" % progname 722084SN/A sys.exit(1) 732101SN/A 742101SN/A# If we want to do this, need to add check here to make sure cmd[0] is 752084SN/A# a valid PBS job name, else oarsub will die on us. 762750Sksewell@umich.edu# 772750Sksewell@umich.edu#if not options.job_name: 782084SN/A# options.job_name = cmd[0] 792084SN/A 802084SN/Acwd = os.getcwd() 812084SN/A 822084SN/A# Deal with systems where /n is a symlink to /.automount 832084SN/Aif cwd.startswith('/.automount/'): 842084SN/A cwd = cwd.replace('/.automount/', '/n/', 1) 8512234Sgabeblack@google.com 862084SN/Aif not cwd.startswith('/n/poolfs/'): 872239SN/A print >>sys.stderr, "Error: current directory must be under /n/poolfs." 882084SN/A sys.exit(1) 892084SN/A 902084SN/A# The Shell class wraps pexpect.spawn with some handy functions that 912750Sksewell@umich.edu# assume the thing on the other end is a Bourne/bash shell. 922750Sksewell@umich.educlass Shell(pexpect.spawn): 932750Sksewell@umich.edu # Regexp to match the shell prompt. We change the prompt to 942750Sksewell@umich.edu # something fixed and distinctive to make it easier to match 952750Sksewell@umich.edu # reliably. 962750Sksewell@umich.edu prompt_re = re.compile('qdo\$ ') 972750Sksewell@umich.edu 982750Sksewell@umich.edu def __init__(self, cmd): 992750Sksewell@umich.edu # initialize base pexpect.spawn object 1002750Sksewell@umich.edu try: 1012750Sksewell@umich.edu pexpect.spawn.__init__(self, cmd) 1022750Sksewell@umich.edu except pexpect.ExceptionPexpect, exc: 1032084SN/A print "%s:" % progname, exc 1042084SN/A sys.exit(1) 1052101SN/A # full_output accumulates the full output of the session 1062750Sksewell@umich.edu self.full_output = "" 1072750Sksewell@umich.edu self.quick_timeout = 15 1082750Sksewell@umich.edu # wait for a prompt, then change it 1092750Sksewell@umich.edu try: 1102750Sksewell@umich.edu self.expect('\$ ', options.oarsub_timeout) 1112750Sksewell@umich.edu except pexpect.TIMEOUT: 1122239SN/A print >>sys.stderr, "%s: oarsub timed out." % progname 1132750Sksewell@umich.edu self.kill(9) 1142750Sksewell@umich.edu self.safe_close() 1152750Sksewell@umich.edu sys.exit(1) 1162750Sksewell@umich.edu self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') 1172750Sksewell@umich.edu 1182750Sksewell@umich.edu # version of expect that updates full_output too 1192750Sksewell@umich.edu def expect(self, regexp, timeout = -1): 1202750Sksewell@umich.edu pexpect.spawn.expect(self, regexp, timeout) 1212084SN/A self.full_output += self.before + self.after 1222084SN/A 1232084SN/A # Just issue a command and wait for the next prompt. 1242084SN/A # Returns a string containing the output of the command. 1252084SN/A def do_bare_command(self, cmd, timeout = -1): 1262084SN/A global full_output 1272084SN/A self.sendline(cmd) 1283951Sgblack@eecs.umich.edu # read back the echo of the command 1292084SN/A self.readline() 1302084SN/A # wait for the next prompt 1312084SN/A self.expect(self.prompt_re, timeout) 1322084SN/A output = self.before.rstrip() 1332084SN/A return output 1342084SN/A 1352084SN/A # Issue a command, then query its exit status. 1362470SN/A # Returns a (string, int) tuple with the command output and the status. 1372686Sksewell@umich.edu def do_command(self, cmd, timeout = -1): 1382470SN/A # do the command itself 1392470SN/A output = self.do_bare_command(cmd, timeout) 140 # collect status 141 status = int(self.do_bare_command("echo $?", self.quick_timeout)) 142 return (output, status) 143 144 # Check to see if the given directory exists. 145 def dir_exists(self, dirname): 146 (output, status) = shell.do_command('[ -d %s ]' % dirname, 147 self.quick_timeout) 148 return status == 0 149 150 # Don't actually try to close it.. just wait until it closes by itself 151 # We can't actually kill the pid which is what it's trying to do, and if 152 # we call wait we could be in an unfortunate situation of it printing input 153 # right as we call wait, so the input is never read and the process never ends 154 def safe_close(self): 155 count = 0 156 while self.isalive() and count < 10: 157 time.sleep(1) 158 self.close(force=False) 159 160# Spawn the interactive pool job. 161 162# Hack to do link on poolfs... disabled for now since 163# compiler/linker/library versioning problems between poolfs and 164# nodes. May never work since poolfs is x86-64 and nodes are 32-bit. 165if False and len(cmd) > 50: 166 shell_cmd = 'ssh -t poolfs /bin/sh -l' 167 print "%s: running %s on poolfs" % (progname, cmd[0]) 168else: 169 shell_cmd = 'oarsub -I' 170 if options.job_name: 171 shell_cmd += ' -n "%s"' % options.job_name 172 if options.dest_queue: 173 shell_cmd += ' -q ' + options.dest_queue 174 shell_cmd += ' -d %s' % cwd 175 176shell = Shell(shell_cmd) 177 178try: 179 # chdir to cwd 180 (output, status) = shell.do_command('cd ' + cwd) 181 182 if status != 0: 183 raise OSError, "Can't chdir to %s" % cwd 184 185 # wacky hack: sometimes scons will create an output directory then 186 # fork a job to generate files in that directory, and the job will 187 # get run before the directory creation propagates through NFS. 188 # This hack looks for a '-o' option indicating an output file and 189 # waits for the corresponding directory to appear if necessary. 190 try: 191 if 'cc' in cmd[0] or 'g++' in cmd[0]: 192 output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) 193 elif 'm5' in cmd[0]: 194 output_dir = cmd[cmd.index('-d')+1] 195 else: 196 output_dir = None 197 except (ValueError, IndexError): 198 # no big deal if there's no '-o'/'-d' or if it's the final argument 199 output_dir = None 200 201 if output_dir: 202 secs_waited = 0 203 while not shell.dir_exists(output_dir) and secs_waited < 90: 204 time.sleep(5) 205 secs_waited += 5 206 if secs_waited > 30: 207 print "waited", secs_waited, "seconds for", output_dir 208 209 # run command 210 if options.stdout_file: 211 cmd += ['>', options.stdout_file] 212 if options.stderr_file: 213 cmd += ['2>', options.stderr_file] 214 try: 215 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) 216 except pexpect.TIMEOUT: 217 print >>sys.stderr, "%s: command timed out after %d seconds." \ 218 % (progname, options.cmd_timeout) 219 shell.sendline('~.') # oarsub/ssh termination escape sequence 220 shell.safe_close() 221 status = 3 222 if output: 223 print output 224finally: 225 # end job 226 if shell.isalive(): 227 shell.sendline('exit') 228 shell.expect('Disconnected from OAR job .*') 229 shell.safe_close() 230 231 # if there was an error, log the output even if not requested 232 if status != 0 or options.save_log: 233 log = file('qdo-log.' + str(os.getpid()), 'w') 234 log.write(shell.full_output) 235 log.close() 236del shell 237 238sys.exit(status) 239