qdo (2665:a124942bacb8) | qdo (5147:a7b91336a3fc) |
---|---|
1#! /usr/bin/env python 2 | 1#! /usr/bin/env python 2 |
3# Copyright (c) 2004-2005 The Regents of The University of Michigan | 3# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan |
4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions are 8# met: redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer; 10# redistributions in binary form must reproduce the above copyright 11# notice, this list of conditions and the following disclaimer in the --- 10 unchanged lines hidden (view full) --- 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# Authors: Steve Reinhardt | 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions are 8# met: redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer; 10# redistributions in binary form must reproduce the above copyright 11# notice, this list of conditions and the following disclaimer in the --- 10 unchanged lines hidden (view full) --- 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# Authors: Steve Reinhardt |
30# Ali Saidi |
|
30 | 31 |
32# Important! 33# This script expects a simple $ prompt, if you are using a shell other than 34# sh which defaults to this you'll need to add something like the following 35# to your bashrc/bash_profile script: 36#if [ "$OAR_USER" = "xxxx" ]; then 37# PS1='$ ' 38 39 |
|
31import sys 32import os 33import re 34import time 35import optparse 36 37import pexpect 38 39progname = os.path.basename(sys.argv[0]) 40 41usage = "%prog [options] command [command arguments]" 42optparser = optparse.OptionParser(usage=usage) 43optparser.allow_interspersed_args=False 44optparser.add_option('-e', dest='stderr_file', 45 help='command stderr output file') 46optparser.add_option('-o', dest='stdout_file', 47 help='command stdout output file') 48optparser.add_option('-l', dest='save_log', action='store_true', | 40import sys 41import os 42import re 43import time 44import optparse 45 46import pexpect 47 48progname = os.path.basename(sys.argv[0]) 49 50usage = "%prog [options] command [command arguments]" 51optparser = optparse.OptionParser(usage=usage) 52optparser.allow_interspersed_args=False 53optparser.add_option('-e', dest='stderr_file', 54 help='command stderr output file') 55optparser.add_option('-o', dest='stdout_file', 56 help='command stdout output file') 57optparser.add_option('-l', dest='save_log', action='store_true', |
49 help='save qsub output log file') | 58 help='save oarsub output log file') |
50optparser.add_option('-N', dest='job_name', | 59optparser.add_option('-N', dest='job_name', |
51 help='qsub job name') | 60 help='oarsub job name') |
52optparser.add_option('-q', dest='dest_queue', | 61optparser.add_option('-q', dest='dest_queue', |
53 help='qsub destination queue') 54optparser.add_option('--qwait', dest='qsub_timeout', type='int', 55 help='qsub queue wait timeout', default=30*60) | 62 help='oarsub destination queue') 63optparser.add_option('--qwait', dest='oarsub_timeout', type='int', 64 help='oarsub queue wait timeout', default=30*60) |
56optparser.add_option('-t', dest='cmd_timeout', type='int', 57 help='command execution timeout', default=600*60) 58 59(options, cmd) = optparser.parse_args() 60 61if cmd == []: 62 print >>sys.stderr, "%s: missing command" % progname 63 sys.exit(1) 64 65# If we want to do this, need to add check here to make sure cmd[0] is | 65optparser.add_option('-t', dest='cmd_timeout', type='int', 66 help='command execution timeout', default=600*60) 67 68(options, cmd) = optparser.parse_args() 69 70if cmd == []: 71 print >>sys.stderr, "%s: missing command" % progname 72 sys.exit(1) 73 74# If we want to do this, need to add check here to make sure cmd[0] is |
66# a valid PBS job name, else qsub will die on us. | 75# a valid PBS job name, else oarsub will die on us. |
67# 68#if not options.job_name: 69# options.job_name = cmd[0] 70 71cwd = os.getcwd() 72 73# Deal with systems where /n is a symlink to /.automount 74if cwd.startswith('/.automount/'): --- 18 unchanged lines hidden (view full) --- 93 except pexpect.ExceptionPexpect, exc: 94 print "%s:" % progname, exc 95 sys.exit(1) 96 # full_output accumulates the full output of the session 97 self.full_output = "" 98 self.quick_timeout = 15 99 # wait for a prompt, then change it 100 try: | 76# 77#if not options.job_name: 78# options.job_name = cmd[0] 79 80cwd = os.getcwd() 81 82# Deal with systems where /n is a symlink to /.automount 83if cwd.startswith('/.automount/'): --- 18 unchanged lines hidden (view full) --- 102 except pexpect.ExceptionPexpect, exc: 103 print "%s:" % progname, exc 104 sys.exit(1) 105 # full_output accumulates the full output of the session 106 self.full_output = "" 107 self.quick_timeout = 15 108 # wait for a prompt, then change it 109 try: |
101 self.expect('\$ ', options.qsub_timeout) | 110 self.expect('\$ ', options.oarsub_timeout) |
102 except pexpect.TIMEOUT: | 111 except pexpect.TIMEOUT: |
103 print >>sys.stderr, "%s: qsub timed out." % progname | 112 print >>sys.stderr, "%s: oarsub timed out." % progname |
104 self.kill(9) | 113 self.kill(9) |
105 self.close(wait=True) | 114 self.safe_close() |
106 sys.exit(1) 107 self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') 108 109 # version of expect that updates full_output too 110 def expect(self, regexp, timeout = -1): 111 pexpect.spawn.expect(self, regexp, timeout) 112 self.full_output += self.before + self.after 113 --- 18 unchanged lines hidden (view full) --- 132 status = int(self.do_bare_command("echo $?", self.quick_timeout)) 133 return (output, status) 134 135 # Check to see if the given directory exists. 136 def dir_exists(self, dirname): 137 (output, status) = shell.do_command('[ -d %s ]' % dirname, 138 self.quick_timeout) 139 return status == 0 | 115 sys.exit(1) 116 self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') 117 118 # version of expect that updates full_output too 119 def expect(self, regexp, timeout = -1): 120 pexpect.spawn.expect(self, regexp, timeout) 121 self.full_output += self.before + self.after 122 --- 18 unchanged lines hidden (view full) --- 141 status = int(self.do_bare_command("echo $?", self.quick_timeout)) 142 return (output, status) 143 144 # Check to see if the given directory exists. 145 def dir_exists(self, dirname): 146 (output, status) = shell.do_command('[ -d %s ]' % dirname, 147 self.quick_timeout) 148 return status == 0 |
140 141 | 149 150 # Don't actually try to close it.. just wait until it closes by itself 151 # We can't actually kill the pid which is what it's trying to do, and if 152 # we call wait we could be in an unfortunate situation of it printing input 153 # right as we call wait, so the input is never read and the process never ends 154 def safe_close(self): 155 count = 0 156 while self.isalive() and count < 10: 157 time.sleep(1) 158 self.close(force=False) 159 |
142# Spawn the interactive pool job. 143 144# Hack to do link on poolfs... disabled for now since 145# compiler/linker/library versioning problems between poolfs and 146# nodes. May never work since poolfs is x86-64 and nodes are 32-bit. 147if False and len(cmd) > 50: 148 shell_cmd = 'ssh -t poolfs /bin/sh -l' 149 print "%s: running %s on poolfs" % (progname, cmd[0]) 150else: | 160# Spawn the interactive pool job. 161 162# Hack to do link on poolfs... disabled for now since 163# compiler/linker/library versioning problems between poolfs and 164# nodes. May never work since poolfs is x86-64 and nodes are 32-bit. 165if False and len(cmd) > 50: 166 shell_cmd = 'ssh -t poolfs /bin/sh -l' 167 print "%s: running %s on poolfs" % (progname, cmd[0]) 168else: |
151 shell_cmd = 'qsub -I -S /bin/sh' | 169 shell_cmd = 'oarsub -I' |
152 if options.job_name: | 170 if options.job_name: |
153 shell_cmd += ' -N "%s"' % options.job_name | 171 shell_cmd += ' -n "%s"' % options.job_name |
154 if options.dest_queue: 155 shell_cmd += ' -q ' + options.dest_queue | 172 if options.dest_queue: 173 shell_cmd += ' -q ' + options.dest_queue |
174 shell_cmd += ' -d %s' % cwd |
|
156 157shell = Shell(shell_cmd) 158 159try: 160 # chdir to cwd 161 (output, status) = shell.do_command('cd ' + cwd) 162 163 if status != 0: --- 28 unchanged lines hidden (view full) --- 192 cmd += ['>', options.stdout_file] 193 if options.stderr_file: 194 cmd += ['2>', options.stderr_file] 195 try: 196 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) 197 except pexpect.TIMEOUT: 198 print >>sys.stderr, "%s: command timed out after %d seconds." \ 199 % (progname, options.cmd_timeout) | 175 176shell = Shell(shell_cmd) 177 178try: 179 # chdir to cwd 180 (output, status) = shell.do_command('cd ' + cwd) 181 182 if status != 0: --- 28 unchanged lines hidden (view full) --- 211 cmd += ['>', options.stdout_file] 212 if options.stderr_file: 213 cmd += ['2>', options.stderr_file] 214 try: 215 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) 216 except pexpect.TIMEOUT: 217 print >>sys.stderr, "%s: command timed out after %d seconds." \ 218 % (progname, options.cmd_timeout) |
200 shell.sendline('~.') # qsub/ssh termination escape sequence 201 shell.close(wait=True) | 219 shell.sendline('~.') # oarsub/ssh termination escape sequence 220 shell.safe_close() |
202 status = 3 203 if output: 204 print output | 221 status = 3 222 if output: 223 print output |
205 | |
206finally: 207 # end job 208 if shell.isalive(): 209 shell.sendline('exit') | 224finally: 225 # end job 226 if shell.isalive(): 227 shell.sendline('exit') |
210 shell.expect('qsub: job .* completed\r\n') 211 shell.close(wait=True) | 228 shell.expect('Disconnected from OAR job .*') 229 shell.safe_close() |
212 213 # if there was an error, log the output even if not requested 214 if status != 0 or options.save_log: 215 log = file('qdo-log.' + str(os.getpid()), 'w') 216 log.write(shell.full_output) 217 log.close() | 230 231 # if there was an error, log the output even if not requested 232 if status != 0 or options.save_log: 233 log = file('qdo-log.' + str(os.getpid()), 'w') 234 log.write(shell.full_output) 235 log.close() |
218 | |
219del shell 220 221sys.exit(status) | 236del shell 237 238sys.exit(status) |