qdo (2665:a124942bacb8) qdo (5147:a7b91336a3fc)
1#! /usr/bin/env python
2
1#! /usr/bin/env python
2
3# Copyright (c) 2004-2005 The Regents of The University of Michigan
3# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the

--- 10 unchanged lines hidden (view full) ---

22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# Authors: Steve Reinhardt
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the

--- 10 unchanged lines hidden (view full) ---

22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# Authors: Steve Reinhardt
30# Ali Saidi
30
31
32# Important!
33# This script expects a simple $ prompt, if you are using a shell other than
34# sh which defaults to this you'll need to add something like the following
35# to your bashrc/bash_profile script:
36#if [ "$OAR_USER" = "xxxx" ]; then
37# PS1='$ '
38
39
31import sys
32import os
33import re
34import time
35import optparse
36
37import pexpect
38
39progname = os.path.basename(sys.argv[0])
40
41usage = "%prog [options] command [command arguments]"
42optparser = optparse.OptionParser(usage=usage)
43optparser.allow_interspersed_args=False
44optparser.add_option('-e', dest='stderr_file',
45 help='command stderr output file')
46optparser.add_option('-o', dest='stdout_file',
47 help='command stdout output file')
48optparser.add_option('-l', dest='save_log', action='store_true',
40import sys
41import os
42import re
43import time
44import optparse
45
46import pexpect
47
48progname = os.path.basename(sys.argv[0])
49
50usage = "%prog [options] command [command arguments]"
51optparser = optparse.OptionParser(usage=usage)
52optparser.allow_interspersed_args=False
53optparser.add_option('-e', dest='stderr_file',
54 help='command stderr output file')
55optparser.add_option('-o', dest='stdout_file',
56 help='command stdout output file')
57optparser.add_option('-l', dest='save_log', action='store_true',
49 help='save qsub output log file')
58 help='save oarsub output log file')
50optparser.add_option('-N', dest='job_name',
59optparser.add_option('-N', dest='job_name',
51 help='qsub job name')
60 help='oarsub job name')
52optparser.add_option('-q', dest='dest_queue',
61optparser.add_option('-q', dest='dest_queue',
53 help='qsub destination queue')
54optparser.add_option('--qwait', dest='qsub_timeout', type='int',
55 help='qsub queue wait timeout', default=30*60)
62 help='oarsub destination queue')
63optparser.add_option('--qwait', dest='oarsub_timeout', type='int',
64 help='oarsub queue wait timeout', default=30*60)
56optparser.add_option('-t', dest='cmd_timeout', type='int',
57 help='command execution timeout', default=600*60)
58
59(options, cmd) = optparser.parse_args()
60
61if cmd == []:
62 print >>sys.stderr, "%s: missing command" % progname
63 sys.exit(1)
64
65# If we want to do this, need to add check here to make sure cmd[0] is
65optparser.add_option('-t', dest='cmd_timeout', type='int',
66 help='command execution timeout', default=600*60)
67
68(options, cmd) = optparser.parse_args()
69
70if cmd == []:
71 print >>sys.stderr, "%s: missing command" % progname
72 sys.exit(1)
73
74# If we want to do this, need to add check here to make sure cmd[0] is
66# a valid PBS job name, else qsub will die on us.
75# a valid PBS job name, else oarsub will die on us.
67#
68#if not options.job_name:
69# options.job_name = cmd[0]
70
71cwd = os.getcwd()
72
73# Deal with systems where /n is a symlink to /.automount
74if cwd.startswith('/.automount/'):

--- 18 unchanged lines hidden (view full) ---

93 except pexpect.ExceptionPexpect, exc:
94 print "%s:" % progname, exc
95 sys.exit(1)
96 # full_output accumulates the full output of the session
97 self.full_output = ""
98 self.quick_timeout = 15
99 # wait for a prompt, then change it
100 try:
76#
77#if not options.job_name:
78# options.job_name = cmd[0]
79
80cwd = os.getcwd()
81
82# Deal with systems where /n is a symlink to /.automount
83if cwd.startswith('/.automount/'):

--- 18 unchanged lines hidden (view full) ---

102 except pexpect.ExceptionPexpect, exc:
103 print "%s:" % progname, exc
104 sys.exit(1)
105 # full_output accumulates the full output of the session
106 self.full_output = ""
107 self.quick_timeout = 15
108 # wait for a prompt, then change it
109 try:
101 self.expect('\$ ', options.qsub_timeout)
110 self.expect('\$ ', options.oarsub_timeout)
102 except pexpect.TIMEOUT:
111 except pexpect.TIMEOUT:
103 print >>sys.stderr, "%s: qsub timed out." % progname
112 print >>sys.stderr, "%s: oarsub timed out." % progname
104 self.kill(9)
113 self.kill(9)
105 self.close(wait=True)
114 self.safe_close()
106 sys.exit(1)
107 self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
108
109 # version of expect that updates full_output too
110 def expect(self, regexp, timeout = -1):
111 pexpect.spawn.expect(self, regexp, timeout)
112 self.full_output += self.before + self.after
113

--- 18 unchanged lines hidden (view full) ---

132 status = int(self.do_bare_command("echo $?", self.quick_timeout))
133 return (output, status)
134
135 # Check to see if the given directory exists.
136 def dir_exists(self, dirname):
137 (output, status) = shell.do_command('[ -d %s ]' % dirname,
138 self.quick_timeout)
139 return status == 0
115 sys.exit(1)
116 self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
117
118 # version of expect that updates full_output too
119 def expect(self, regexp, timeout = -1):
120 pexpect.spawn.expect(self, regexp, timeout)
121 self.full_output += self.before + self.after
122

--- 18 unchanged lines hidden (view full) ---

141 status = int(self.do_bare_command("echo $?", self.quick_timeout))
142 return (output, status)
143
144 # Check to see if the given directory exists.
145 def dir_exists(self, dirname):
146 (output, status) = shell.do_command('[ -d %s ]' % dirname,
147 self.quick_timeout)
148 return status == 0
140
141
149
150 # Don't actually try to close it.. just wait until it closes by itself
151 # We can't actually kill the pid which is what it's trying to do, and if
152 # we call wait we could be in an unfortunate situation of it printing input
153 # right as we call wait, so the input is never read and the process never ends
154 def safe_close(self):
155 count = 0
156 while self.isalive() and count < 10:
157 time.sleep(1)
158 self.close(force=False)
159
142# Spawn the interactive pool job.
143
144# Hack to do link on poolfs... disabled for now since
145# compiler/linker/library versioning problems between poolfs and
146# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
147if False and len(cmd) > 50:
148 shell_cmd = 'ssh -t poolfs /bin/sh -l'
149 print "%s: running %s on poolfs" % (progname, cmd[0])
150else:
160# Spawn the interactive pool job.
161
162# Hack to do link on poolfs... disabled for now since
163# compiler/linker/library versioning problems between poolfs and
164# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
165if False and len(cmd) > 50:
166 shell_cmd = 'ssh -t poolfs /bin/sh -l'
167 print "%s: running %s on poolfs" % (progname, cmd[0])
168else:
151 shell_cmd = 'qsub -I -S /bin/sh'
169 shell_cmd = 'oarsub -I'
152 if options.job_name:
170 if options.job_name:
153 shell_cmd += ' -N "%s"' % options.job_name
171 shell_cmd += ' -n "%s"' % options.job_name
154 if options.dest_queue:
155 shell_cmd += ' -q ' + options.dest_queue
172 if options.dest_queue:
173 shell_cmd += ' -q ' + options.dest_queue
174 shell_cmd += ' -d %s' % cwd
156
157shell = Shell(shell_cmd)
158
159try:
160 # chdir to cwd
161 (output, status) = shell.do_command('cd ' + cwd)
162
163 if status != 0:

--- 28 unchanged lines hidden (view full) ---

192 cmd += ['>', options.stdout_file]
193 if options.stderr_file:
194 cmd += ['2>', options.stderr_file]
195 try:
196 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
197 except pexpect.TIMEOUT:
198 print >>sys.stderr, "%s: command timed out after %d seconds." \
199 % (progname, options.cmd_timeout)
175
176shell = Shell(shell_cmd)
177
178try:
179 # chdir to cwd
180 (output, status) = shell.do_command('cd ' + cwd)
181
182 if status != 0:

--- 28 unchanged lines hidden (view full) ---

211 cmd += ['>', options.stdout_file]
212 if options.stderr_file:
213 cmd += ['2>', options.stderr_file]
214 try:
215 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
216 except pexpect.TIMEOUT:
217 print >>sys.stderr, "%s: command timed out after %d seconds." \
218 % (progname, options.cmd_timeout)
200 shell.sendline('~.') # qsub/ssh termination escape sequence
201 shell.close(wait=True)
219 shell.sendline('~.') # oarsub/ssh termination escape sequence
220 shell.safe_close()
202 status = 3
203 if output:
204 print output
221 status = 3
222 if output:
223 print output
205
206finally:
207 # end job
208 if shell.isalive():
209 shell.sendline('exit')
224finally:
225 # end job
226 if shell.isalive():
227 shell.sendline('exit')
210 shell.expect('qsub: job .* completed\r\n')
211 shell.close(wait=True)
228 shell.expect('Disconnected from OAR job .*')
229 shell.safe_close()
212
213 # if there was an error, log the output even if not requested
214 if status != 0 or options.save_log:
215 log = file('qdo-log.' + str(os.getpid()), 'w')
216 log.write(shell.full_output)
217 log.close()
230
231 # if there was an error, log the output even if not requested
232 if status != 0 or options.save_log:
233 log = file('qdo-log.' + str(os.getpid()), 'w')
234 log.write(shell.full_output)
235 log.close()
218
219del shell
220
221sys.exit(status)
236del shell
237
238sys.exit(status)