qdo revision 2665
1#! /usr/bin/env python 2 3# Copyright (c) 2004-2005 The Regents of The University of Michigan 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions are 8# met: redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer; 10# redistributions in binary form must reproduce the above copyright 11# notice, this list of conditions and the following disclaimer in the 12# documentation and/or other materials provided with the distribution; 13# neither the name of the copyright holders nor the names of its 14# contributors may be used to endorse or promote products derived from 15# this software without specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# Authors: Steve Reinhardt 30 31import sys 32import os 33import re 34import time 35import optparse 36 37import pexpect 38 39progname = os.path.basename(sys.argv[0]) 40 41usage = "%prog [options] command [command arguments]" 42optparser = optparse.OptionParser(usage=usage) 43optparser.allow_interspersed_args=False 44optparser.add_option('-e', dest='stderr_file', 45 help='command stderr output file') 46optparser.add_option('-o', dest='stdout_file', 47 help='command stdout output file') 48optparser.add_option('-l', dest='save_log', action='store_true', 49 help='save qsub output log file') 50optparser.add_option('-N', dest='job_name', 51 help='qsub job name') 52optparser.add_option('-q', dest='dest_queue', 53 help='qsub destination queue') 54optparser.add_option('--qwait', dest='qsub_timeout', type='int', 55 help='qsub queue wait timeout', default=30*60) 56optparser.add_option('-t', dest='cmd_timeout', type='int', 57 help='command execution timeout', default=600*60) 58 59(options, cmd) = optparser.parse_args() 60 61if cmd == []: 62 print >>sys.stderr, "%s: missing command" % progname 63 sys.exit(1) 64 65# If we want to do this, need to add check here to make sure cmd[0] is 66# a valid PBS job name, else qsub will die on us. 67# 68#if not options.job_name: 69# options.job_name = cmd[0] 70 71cwd = os.getcwd() 72 73# Deal with systems where /n is a symlink to /.automount 74if cwd.startswith('/.automount/'): 75 cwd = cwd.replace('/.automount/', '/n/', 1) 76 77if not cwd.startswith('/n/poolfs/'): 78 print >>sys.stderr, "Error: current directory must be under /n/poolfs." 79 sys.exit(1) 80 81# The Shell class wraps pexpect.spawn with some handy functions that 82# assume the thing on the other end is a Bourne/bash shell. 83class Shell(pexpect.spawn): 84 # Regexp to match the shell prompt. We change the prompt to 85 # something fixed and distinctive to make it easier to match 86 # reliably. 87 prompt_re = re.compile('qdo\$ ') 88 89 def __init__(self, cmd): 90 # initialize base pexpect.spawn object 91 try: 92 pexpect.spawn.__init__(self, cmd) 93 except pexpect.ExceptionPexpect, exc: 94 print "%s:" % progname, exc 95 sys.exit(1) 96 # full_output accumulates the full output of the session 97 self.full_output = "" 98 self.quick_timeout = 15 99 # wait for a prompt, then change it 100 try: 101 self.expect('\$ ', options.qsub_timeout) 102 except pexpect.TIMEOUT: 103 print >>sys.stderr, "%s: qsub timed out." % progname 104 self.kill(9) 105 self.close(wait=True) 106 sys.exit(1) 107 self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') 108 109 # version of expect that updates full_output too 110 def expect(self, regexp, timeout = -1): 111 pexpect.spawn.expect(self, regexp, timeout) 112 self.full_output += self.before + self.after 113 114 # Just issue a command and wait for the next prompt. 115 # Returns a string containing the output of the command. 116 def do_bare_command(self, cmd, timeout = -1): 117 global full_output 118 self.sendline(cmd) 119 # read back the echo of the command 120 self.readline() 121 # wait for the next prompt 122 self.expect(self.prompt_re, timeout) 123 output = self.before.rstrip() 124 return output 125 126 # Issue a command, then query its exit status. 127 # Returns a (string, int) tuple with the command output and the status. 128 def do_command(self, cmd, timeout = -1): 129 # do the command itself 130 output = self.do_bare_command(cmd, timeout) 131 # collect status 132 status = int(self.do_bare_command("echo $?", self.quick_timeout)) 133 return (output, status) 134 135 # Check to see if the given directory exists. 136 def dir_exists(self, dirname): 137 (output, status) = shell.do_command('[ -d %s ]' % dirname, 138 self.quick_timeout) 139 return status == 0 140 141 142# Spawn the interactive pool job. 143 144# Hack to do link on poolfs... disabled for now since 145# compiler/linker/library versioning problems between poolfs and 146# nodes. May never work since poolfs is x86-64 and nodes are 32-bit. 147if False and len(cmd) > 50: 148 shell_cmd = 'ssh -t poolfs /bin/sh -l' 149 print "%s: running %s on poolfs" % (progname, cmd[0]) 150else: 151 shell_cmd = 'qsub -I -S /bin/sh' 152 if options.job_name: 153 shell_cmd += ' -N "%s"' % options.job_name 154 if options.dest_queue: 155 shell_cmd += ' -q ' + options.dest_queue 156 157shell = Shell(shell_cmd) 158 159try: 160 # chdir to cwd 161 (output, status) = shell.do_command('cd ' + cwd) 162 163 if status != 0: 164 raise OSError, "Can't chdir to %s" % cwd 165 166 # wacky hack: sometimes scons will create an output directory then 167 # fork a job to generate files in that directory, and the job will 168 # get run before the directory creation propagates through NFS. 169 # This hack looks for a '-o' option indicating an output file and 170 # waits for the corresponding directory to appear if necessary. 171 try: 172 if 'cc' in cmd[0] or 'g++' in cmd[0]: 173 output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) 174 elif 'm5' in cmd[0]: 175 output_dir = cmd[cmd.index('-d')+1] 176 else: 177 output_dir = None 178 except (ValueError, IndexError): 179 # no big deal if there's no '-o'/'-d' or if it's the final argument 180 output_dir = None 181 182 if output_dir: 183 secs_waited = 0 184 while not shell.dir_exists(output_dir) and secs_waited < 90: 185 time.sleep(5) 186 secs_waited += 5 187 if secs_waited > 30: 188 print "waited", secs_waited, "seconds for", output_dir 189 190 # run command 191 if options.stdout_file: 192 cmd += ['>', options.stdout_file] 193 if options.stderr_file: 194 cmd += ['2>', options.stderr_file] 195 try: 196 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) 197 except pexpect.TIMEOUT: 198 print >>sys.stderr, "%s: command timed out after %d seconds." \ 199 % (progname, options.cmd_timeout) 200 shell.sendline('~.') # qsub/ssh termination escape sequence 201 shell.close(wait=True) 202 status = 3 203 if output: 204 print output 205 206finally: 207 # end job 208 if shell.isalive(): 209 shell.sendline('exit') 210 shell.expect('qsub: job .* completed\r\n') 211 shell.close(wait=True) 212 213 # if there was an error, log the output even if not requested 214 if status != 0 or options.save_log: 215 log = file('qdo-log.' + str(os.getpid()), 'w') 216 log.write(shell.full_output) 217 log.close() 218 219del shell 220 221sys.exit(status) 222