send.py (1816:ecb6cb1337e8) send.py (1881:fc205a7edd58)
1#!/usr/bin/env python
2# Copyright (c) 2005 The Regents of The University of Michigan
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;

--- 82 unchanged lines hidden (view full) ---

91 -v be verbose (list job parameters)
92
93 %(progname)s -h
94 -h display this help
95""" % locals()
96
97try:
98 import getopt
1#!/usr/bin/env python
2# Copyright (c) 2005 The Regents of The University of Michigan
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;

--- 82 unchanged lines hidden (view full) ---

91 -v be verbose (list job parameters)
92
93 %(progname)s -h
94 -h display this help
95""" % locals()
96
97try:
98 import getopt
99 opts, args = getopt.getopt(sys.argv[1:], '-cd:efhj:lq:v')
99 opts, args = getopt.getopt(sys.argv[1:], '-CRcd:efhj:lq:v')
100except getopt.GetoptError:
101 sys.exit(usage)
102
103clean = False
104onlyecho = False
105exprs = []
106force = False
107listonly = False
108queue = ''
109verbose = False
100except getopt.GetoptError:
101 sys.exit(usage)
102
103clean = False
104onlyecho = False
105exprs = []
106force = False
107listonly = False
108queue = ''
109verbose = False
110rootdir = nfspath(os.getcwd())
111jfile = 'test.py'
110jfile = 'Base/test.py'
111docpts = False
112doruns = True
113runflag = False
114
112for opt,arg in opts:
115for opt,arg in opts:
116 if opt == '-C':
117 docpts = True
118 if opt == '-R':
119 runflag = True
113 if opt == '-c':
114 clean = True
120 if opt == '-c':
121 clean = True
115 if opt == '-d':
116 rootdir = arg
117 if opt == '-e':
118 onlyecho = True
119 if opt == '-f':
120 force = True
121 if opt == '-h':
122 print usage
123 sys.exit(0)
124 if opt == '-j':
125 jfile = arg
126 if opt == '-l':
127 listonly = True
128 if opt == '-q':
129 queue = arg
130 if opt == '-v':
131 verbose = True
132
122 if opt == '-e':
123 onlyecho = True
124 if opt == '-f':
125 force = True
126 if opt == '-h':
127 print usage
128 sys.exit(0)
129 if opt == '-j':
130 jfile = arg
131 if opt == '-l':
132 listonly = True
133 if opt == '-q':
134 queue = arg
135 if opt == '-v':
136 verbose = True
137
133basedir = joinpath(rootdir, 'Base')
134linkdir = joinpath(rootdir, 'Link')
138if docpts:
139 doruns = runflag
135
136for arg in args:
137 exprs.append(re.compile(arg))
138
140
141for arg in args:
142 exprs.append(re.compile(arg))
143
139if not listonly and not onlyecho and isdir(linkdir):
144import jobfile, pbs
145from job import JobDir, date
146
147conf = jobfile.JobFile(jfile)
148
149if not listonly and not onlyecho and isdir(conf.linkdir):
140 if verbose:
141 print 'Checking for outdated files in Link directory'
150 if verbose:
151 print 'Checking for outdated files in Link directory'
142 syncdir(linkdir, basedir)
152 syncdir(conf.linkdir, conf.basedir)
143
153
144import job, jobfile, pbs
154jobnames = {}
155joblist = []
145
156
146test = jobfile.JobFile(joinpath(basedir, jfile))
157if docpts and doruns:
158 gen = conf.alljobs()
159elif docpts:
160 gen = conf.checkpoints()
161elif doruns:
162 gen = conf.jobs()
147
163
148joblist = []
149for jobname in test.jobs:
150 if not exprs:
151 joblist.append(jobname)
164for job in gen:
165 if job.name in jobnames:
152 continue
153
166 continue
167
154 for expr in exprs:
155 if expr.match(jobname):
156 joblist.append(jobname)
157 break
168 if exprs:
169 for expr in exprs:
170 if expr.match(job.name):
171 joblist.append(job)
172 break
173 else:
174 joblist.append(job)
158
159if listonly:
160 if verbose:
175
176if listonly:
177 if verbose:
161 for jobname in joblist:
162 test.printinfo(jobname)
178 for job in joblist:
179 job.printinfo()
163 else:
180 else:
164 for jobname in joblist:
165 print jobname
181 for job in joblist:
182 print job.name
166 sys.exit(0)
167
168if not onlyecho:
183 sys.exit(0)
184
185if not onlyecho:
169 jl = []
170 for jobname in joblist:
171 jobdir = joinpath(rootdir, jobname)
172 if os.path.exists(jobname):
186 newlist = []
187 for job in joblist:
188 jobdir = JobDir(joinpath(conf.rootdir, job.name))
189 if jobdir.exists():
173 if not force:
190 if not force:
174 if os.path.isfile(joinpath(jobdir, '.success')):
191 status = jobdir.getstatus()
192 if status == 'queued':
175 continue
176
193 continue
194
177 if os.path.isfile(joinpath(jobdir, '.start')) and \
178 not os.path.isfile(joinpath(jobdir, '.stop')):
195 if status == 'running':
179 continue
180
196 continue
197
198 if status == 'success':
199 continue
200
181 if not clean:
201 if not clean:
182 sys.exit('job directory not clean!')
202 sys.exit('job directory %s not clean!' % jobdir)
183
203
184 job.cleandir(jobdir)
185 else:
186 os.mkdir(jobdir)
187 jl.append(jobname)
188 joblist = jl
204 jobdir.clean()
205 newlist.append(job)
206 joblist = newlist
189
207
190def setname(jobid, jobname):
191 # since pbs can handle jobnames of 15 characters or less, don't
192 # use the raj hack.
193 if len(jobname) <= 15:
194 return
208class NameHack(object):
209 def __init__(self, host='pbs.pool', port=24465):
210 self.host = host
211 self.port = port
212 self.socket = None
195
213
196 import socket
197 s = socket.socket()
198 # Connect to pbs.pool and send the jobid/jobname pair to port
199 # 24465 (Raj didn't realize that there are only 64k ports and
200 # setup inetd to point to port 90001)
201 s.connect(("pbs.pool", 24465))
202 s.send("%s %s\n" % (jobid, jobname))
203 s.close()
214 def setname(self, jobid, jobname):
215 try:
216 jobid = int(jobid)
217 except ValueError:
218 jobid = int(jobid.strip().split('.')[0])
204
219
205for jobname in joblist:
206 jobdir = joinpath(rootdir, jobname)
220 jobname = jobname.strip()
221 # since pbs can handle jobnames of 15 characters or less,
222 # don't use the raj hack.
223 if len(jobname) <= 15:
224 return
207
225
208 if not onlyecho and not os.path.isdir(jobdir):
209 sys.exit('%s is not a directory. Cannot build job' % jobdir)
226 if self.socket is None:
227 import socket
228 self.socket = socket.socket()
229 # Connect to pbs.pool and send the jobid/jobname pair to port
230 # 24465 (Raj didn't realize that there are only 64k ports and
231 # setup inetd to point to port 90001)
232 self.socket.connect((self.host, self.port))
210
233
211 print 'Job name: %s' % jobname
234 self.socket.send("%s %s\n" % (jobid, jobname))
235
236namehack = NameHack()
237
238for job in joblist:
239 jobdir = JobDir(joinpath(conf.rootdir, job.name))
240
241 if not onlyecho:
242 jobdir.create()
243
244 print 'Job name: %s' % job.name
212 print 'Job directory: %s' % jobdir
213
214 qsub = pbs.qsub()
215 qsub.pbshost = 'simpool.eecs.umich.edu'
245 print 'Job directory: %s' % jobdir
246
247 qsub = pbs.qsub()
248 qsub.pbshost = 'simpool.eecs.umich.edu'
216 qsub.stdout = joinpath(jobdir, 'jobout')
217 qsub.name = jobname[:15]
249 qsub.stdout = jobdir.file('jobout')
250 qsub.name = job.name[:15]
218 qsub.join = True
219 qsub.node_type = 'FAST'
251 qsub.join = True
252 qsub.node_type = 'FAST'
220 qsub.env['ROOTDIR'] = rootdir
221 qsub.env['JOBNAME'] = jobname
253 qsub.env['ROOTDIR'] = conf.rootdir
254 qsub.env['JOBNAME'] = job.name
222 if len(queue):
223 qsub.queue = queue
224 qsub.build(joinpath(progpath, 'job.py'))
225
226 if verbose:
227 print 'PBS Command: %s' % qsub.command
228
229 if not onlyecho:
230 ec = qsub.do()
231 if ec == 0:
232 jobid = qsub.result
233 print 'PBS Jobid: %s' % jobid
255 if len(queue):
256 qsub.queue = queue
257 qsub.build(joinpath(progpath, 'job.py'))
258
259 if verbose:
260 print 'PBS Command: %s' % qsub.command
261
262 if not onlyecho:
263 ec = qsub.do()
264 if ec == 0:
265 jobid = qsub.result
266 print 'PBS Jobid: %s' % jobid
234 setname(jobid, jobname)
267 namehack.setname(jobid, job.name)
268 queued = date()
269 jobdir.echofile('.queued', queued)
270 jobdir.setstatus('queued on %s' % queued)
235 else:
236 print 'PBS Failed'
271 else:
272 print 'PBS Failed'