qdo (1964:fca3bb8babaf) qdo (2441:6d9097edd4aa)
1#! /usr/bin/env python
2
3# Copyright (c) 2004-2005 The Regents of The University of Michigan
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29import sys
30import os
31import re
32import time
33import optparse
34
35import pexpect
36
37progname = os.path.basename(sys.argv[0])
38
39usage = "%prog [options] command [command arguments]"
40optparser = optparse.OptionParser(usage=usage)
41optparser.allow_interspersed_args=False
42optparser.add_option('-e', dest='stderr_file',
43 help='command stderr output file')
44optparser.add_option('-o', dest='stdout_file',
45 help='command stdout output file')
46optparser.add_option('-l', dest='save_log', action='store_true',
47 help='save qsub output log file')
48optparser.add_option('-N', dest='job_name',
49 help='qsub job name')
50optparser.add_option('-q', dest='dest_queue',
51 help='qsub destination queue')
52optparser.add_option('--qwait', dest='qsub_timeout', type='int',
53 help='qsub queue wait timeout', default=30*60)
54optparser.add_option('-t', dest='cmd_timeout', type='int',
55 help='command execution timeout', default=600*60)
56
57(options, cmd) = optparser.parse_args()
58
59if cmd == []:
60 print >>sys.stderr, "%s: missing command" % progname
61 sys.exit(1)
62
63# If we want to do this, need to add check here to make sure cmd[0] is
64# a valid PBS job name, else qsub will die on us.
65#
66#if not options.job_name:
67# options.job_name = cmd[0]
68
69cwd = os.getcwd()
70
71# Deal with systems where /n is a symlink to /.automount
72if cwd.startswith('/.automount/'):
73 cwd = cwd.replace('/.automount/', '/n/', 1)
74
75if not cwd.startswith('/n/poolfs/'):
76 print >>sys.stderr, "Error: current directory must be under /n/poolfs."
77 sys.exit(1)
78
79# The Shell class wraps pexpect.spawn with some handy functions that
80# assume the thing on the other end is a Bourne/bash shell.
81class Shell(pexpect.spawn):
82 # Regexp to match the shell prompt. We change the prompt to
83 # something fixed and distinctive to make it easier to match
84 # reliably.
85 prompt_re = re.compile('qdo\$ ')
86
87 def __init__(self, cmd):
88 # initialize base pexpect.spawn object
89 try:
90 pexpect.spawn.__init__(self, cmd)
91 except pexpect.ExceptionPexpect, exc:
92 print "%s:" % progname, exc
93 sys.exit(1)
94 # full_output accumulates the full output of the session
95 self.full_output = ""
96 self.quick_timeout = 15
97 # wait for a prompt, then change it
98 try:
99 self.expect('\$ ', options.qsub_timeout)
100 except pexpect.TIMEOUT:
101 print >>sys.stderr, "%s: qsub timed out." % progname
102 self.kill(9)
103 self.close(wait=True)
104 sys.exit(1)
105 self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
106
107 # version of expect that updates full_output too
108 def expect(self, regexp, timeout = -1):
109 pexpect.spawn.expect(self, regexp, timeout)
110 self.full_output += self.before + self.after
111
112 # Just issue a command and wait for the next prompt.
113 # Returns a string containing the output of the command.
114 def do_bare_command(self, cmd, timeout = -1):
115 global full_output
116 self.sendline(cmd)
117 # read back the echo of the command
118 self.readline()
119 # wait for the next prompt
120 self.expect(self.prompt_re, timeout)
121 output = self.before.rstrip()
122 return output
123
124 # Issue a command, then query its exit status.
125 # Returns a (string, int) tuple with the command output and the status.
126 def do_command(self, cmd, timeout = -1):
127 # do the command itself
128 output = self.do_bare_command(cmd, timeout)
129 # collect status
130 status = int(self.do_bare_command("echo $?", self.quick_timeout))
131 return (output, status)
132
133 # Check to see if the given directory exists.
134 def dir_exists(self, dirname):
135 (output, status) = shell.do_command('[ -d %s ]' % dirname,
136 self.quick_timeout)
137 return status == 0
138
139
140# Spawn the interactive pool job.
141
142# Hack to do link on poolfs... disabled for now since
143# compiler/linker/library versioning problems between poolfs and
144# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
145if False and len(cmd) > 50:
146 shell_cmd = 'ssh -t poolfs /bin/sh -l'
147 print "%s: running %s on poolfs" % (progname, cmd[0])
148else:
149 shell_cmd = 'qsub -I -S /bin/sh'
150 if options.job_name:
151 shell_cmd += ' -N "%s"' % options.job_name
152 if options.dest_queue:
153 shell_cmd += ' -q ' + options.dest_queue
154
155shell = Shell(shell_cmd)
156
157try:
158 # chdir to cwd
159 (output, status) = shell.do_command('cd ' + cwd)
160
161 if status != 0:
162 raise OSError, "Can't chdir to %s" % cwd
163
164 # wacky hack: sometimes scons will create an output directory then
165 # fork a job to generate files in that directory, and the job will
166 # get run before the directory creation propagates through NFS.
167 # This hack looks for a '-o' option indicating an output file and
168 # waits for the corresponding directory to appear if necessary.
169 try:
170 if 'cc' in cmd[0] or 'g++' in cmd[0]:
171 output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
172 elif 'm5' in cmd[0]:
173 output_dir = cmd[cmd.index('-d')+1]
174 else:
175 output_dir = None
176 except (ValueError, IndexError):
177 # no big deal if there's no '-o'/'-d' or if it's the final argument
178 output_dir = None
179
180 if output_dir:
181 secs_waited = 0
1#! /usr/bin/env python
2
3# Copyright (c) 2004-2005 The Regents of The University of Michigan
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29import sys
30import os
31import re
32import time
33import optparse
34
35import pexpect
36
37progname = os.path.basename(sys.argv[0])
38
39usage = "%prog [options] command [command arguments]"
40optparser = optparse.OptionParser(usage=usage)
41optparser.allow_interspersed_args=False
42optparser.add_option('-e', dest='stderr_file',
43 help='command stderr output file')
44optparser.add_option('-o', dest='stdout_file',
45 help='command stdout output file')
46optparser.add_option('-l', dest='save_log', action='store_true',
47 help='save qsub output log file')
48optparser.add_option('-N', dest='job_name',
49 help='qsub job name')
50optparser.add_option('-q', dest='dest_queue',
51 help='qsub destination queue')
52optparser.add_option('--qwait', dest='qsub_timeout', type='int',
53 help='qsub queue wait timeout', default=30*60)
54optparser.add_option('-t', dest='cmd_timeout', type='int',
55 help='command execution timeout', default=600*60)
56
57(options, cmd) = optparser.parse_args()
58
59if cmd == []:
60 print >>sys.stderr, "%s: missing command" % progname
61 sys.exit(1)
62
63# If we want to do this, need to add check here to make sure cmd[0] is
64# a valid PBS job name, else qsub will die on us.
65#
66#if not options.job_name:
67# options.job_name = cmd[0]
68
69cwd = os.getcwd()
70
71# Deal with systems where /n is a symlink to /.automount
72if cwd.startswith('/.automount/'):
73 cwd = cwd.replace('/.automount/', '/n/', 1)
74
75if not cwd.startswith('/n/poolfs/'):
76 print >>sys.stderr, "Error: current directory must be under /n/poolfs."
77 sys.exit(1)
78
79# The Shell class wraps pexpect.spawn with some handy functions that
80# assume the thing on the other end is a Bourne/bash shell.
81class Shell(pexpect.spawn):
82 # Regexp to match the shell prompt. We change the prompt to
83 # something fixed and distinctive to make it easier to match
84 # reliably.
85 prompt_re = re.compile('qdo\$ ')
86
87 def __init__(self, cmd):
88 # initialize base pexpect.spawn object
89 try:
90 pexpect.spawn.__init__(self, cmd)
91 except pexpect.ExceptionPexpect, exc:
92 print "%s:" % progname, exc
93 sys.exit(1)
94 # full_output accumulates the full output of the session
95 self.full_output = ""
96 self.quick_timeout = 15
97 # wait for a prompt, then change it
98 try:
99 self.expect('\$ ', options.qsub_timeout)
100 except pexpect.TIMEOUT:
101 print >>sys.stderr, "%s: qsub timed out." % progname
102 self.kill(9)
103 self.close(wait=True)
104 sys.exit(1)
105 self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
106
107 # version of expect that updates full_output too
108 def expect(self, regexp, timeout = -1):
109 pexpect.spawn.expect(self, regexp, timeout)
110 self.full_output += self.before + self.after
111
112 # Just issue a command and wait for the next prompt.
113 # Returns a string containing the output of the command.
114 def do_bare_command(self, cmd, timeout = -1):
115 global full_output
116 self.sendline(cmd)
117 # read back the echo of the command
118 self.readline()
119 # wait for the next prompt
120 self.expect(self.prompt_re, timeout)
121 output = self.before.rstrip()
122 return output
123
124 # Issue a command, then query its exit status.
125 # Returns a (string, int) tuple with the command output and the status.
126 def do_command(self, cmd, timeout = -1):
127 # do the command itself
128 output = self.do_bare_command(cmd, timeout)
129 # collect status
130 status = int(self.do_bare_command("echo $?", self.quick_timeout))
131 return (output, status)
132
133 # Check to see if the given directory exists.
134 def dir_exists(self, dirname):
135 (output, status) = shell.do_command('[ -d %s ]' % dirname,
136 self.quick_timeout)
137 return status == 0
138
139
140# Spawn the interactive pool job.
141
142# Hack to do link on poolfs... disabled for now since
143# compiler/linker/library versioning problems between poolfs and
144# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
145if False and len(cmd) > 50:
146 shell_cmd = 'ssh -t poolfs /bin/sh -l'
147 print "%s: running %s on poolfs" % (progname, cmd[0])
148else:
149 shell_cmd = 'qsub -I -S /bin/sh'
150 if options.job_name:
151 shell_cmd += ' -N "%s"' % options.job_name
152 if options.dest_queue:
153 shell_cmd += ' -q ' + options.dest_queue
154
155shell = Shell(shell_cmd)
156
157try:
158 # chdir to cwd
159 (output, status) = shell.do_command('cd ' + cwd)
160
161 if status != 0:
162 raise OSError, "Can't chdir to %s" % cwd
163
164 # wacky hack: sometimes scons will create an output directory then
165 # fork a job to generate files in that directory, and the job will
166 # get run before the directory creation propagates through NFS.
167 # This hack looks for a '-o' option indicating an output file and
168 # waits for the corresponding directory to appear if necessary.
169 try:
170 if 'cc' in cmd[0] or 'g++' in cmd[0]:
171 output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
172 elif 'm5' in cmd[0]:
173 output_dir = cmd[cmd.index('-d')+1]
174 else:
175 output_dir = None
176 except (ValueError, IndexError):
177 # no big deal if there's no '-o'/'-d' or if it's the final argument
178 output_dir = None
179
180 if output_dir:
181 secs_waited = 0
182 while not shell.dir_exists(output_dir) and secs_waited < 45:
182 while not shell.dir_exists(output_dir) and secs_waited < 90:
183 time.sleep(5)
184 secs_waited += 5
183 time.sleep(5)
184 secs_waited += 5
185 if secs_waited > 10:
185 if secs_waited > 30:
186 print "waited", secs_waited, "seconds for", output_dir
187
188 # run command
189 if options.stdout_file:
190 cmd += ['>', options.stdout_file]
191 if options.stderr_file:
192 cmd += ['2>', options.stderr_file]
193 try:
194 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
195 except pexpect.TIMEOUT:
196 print >>sys.stderr, "%s: command timed out after %d seconds." \
197 % (progname, options.cmd_timeout)
198 shell.sendline('~.') # qsub/ssh termination escape sequence
199 shell.close(wait=True)
200 status = 3
201 if output:
202 print output
203
204finally:
205 # end job
206 if shell.isalive():
207 shell.sendline('exit')
208 shell.expect('qsub: job .* completed\r\n')
209 shell.close(wait=True)
210
211 # if there was an error, log the output even if not requested
212 if status != 0 or options.save_log:
213 log = file('qdo-log.' + str(os.getpid()), 'w')
214 log.write(shell.full_output)
215 log.close()
216
217del shell
218
219sys.exit(status)
186 print "waited", secs_waited, "seconds for", output_dir
187
188 # run command
189 if options.stdout_file:
190 cmd += ['>', options.stdout_file]
191 if options.stderr_file:
192 cmd += ['2>', options.stderr_file]
193 try:
194 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
195 except pexpect.TIMEOUT:
196 print >>sys.stderr, "%s: command timed out after %d seconds." \
197 % (progname, options.cmd_timeout)
198 shell.sendline('~.') # qsub/ssh termination escape sequence
199 shell.close(wait=True)
200 status = 3
201 if output:
202 print output
203
204finally:
205 # end job
206 if shell.isalive():
207 shell.sendline('exit')
208 shell.expect('qsub: job .* completed\r\n')
209 shell.close(wait=True)
210
211 # if there was an error, log the output even if not requested
212 if status != 0 or options.save_log:
213 log = file('qdo-log.' + str(os.getpid()), 'w')
214 log.write(shell.full_output)
215 log.close()
216
217del shell
218
219sys.exit(status)