send.py revision 1948
11758SN/A#!/usr/bin/env python
21762SN/A# Copyright (c) 2005 The Regents of The University of Michigan
31758SN/A# All rights reserved.
41758SN/A#
51758SN/A# Redistribution and use in source and binary forms, with or without
61758SN/A# modification, are permitted provided that the following conditions are
71758SN/A# met: redistributions of source code must retain the above copyright
81758SN/A# notice, this list of conditions and the following disclaimer;
91758SN/A# redistributions in binary form must reproduce the above copyright
101758SN/A# notice, this list of conditions and the following disclaimer in the
111758SN/A# documentation and/or other materials provided with the distribution;
121758SN/A# neither the name of the copyright holders nor the names of its
131758SN/A# contributors may be used to endorse or promote products derived from
141758SN/A# this software without specific prior written permission.
151758SN/A#
161758SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
171758SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
181758SN/A# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
191758SN/A# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
201758SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
211758SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
221758SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
231758SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
241758SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
251758SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
261758SN/A# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
272665Ssaidi@eecs.umich.edu#
282665Ssaidi@eecs.umich.edu# Authors: Ali Saidi
292665Ssaidi@eecs.umich.edu#          Nathan Binkert
302665Ssaidi@eecs.umich.edu
311758SN/Aimport os, os.path, re, socket, sys
322SN/Afrom os import environ as env, listdir
331147SN/Afrom os.path import basename, isdir, isfile, islink, join as joinpath, normpath
341147SN/Afrom filecmp import cmp as filecmp
352SN/Afrom shutil import copy
361858SN/A
372107SN/Adef nfspath(dir):
381858SN/A    if dir.startswith('/.automount/'):
395566Snate@binkert.org        dir = '/n/%s' % dir[12:]
402107SN/A    elif not dir.startswith('/n/'):
411858SN/A        dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
421147SN/A    return dir
43924SN/A
441147SN/Adef syncdir(srcdir, destdir):
45924SN/A    srcdir = normpath(srcdir)
46924SN/A    destdir = normpath(destdir)
471147SN/A    if not isdir(destdir):
481147SN/A        sys.exit('destination directory "%s" does not exist' % destdir)
491147SN/A
501147SN/A    for root, dirs, files in os.walk(srcdir):
515568Snate@binkert.org        root = normpath(root)
525568Snate@binkert.org        prefix = os.path.commonprefix([root, srcdir])
531147SN/A        root = root[len(prefix):]
541147SN/A        if root.startswith('/'):
55924SN/A            root = root[1:]
561858SN/A        for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
571147SN/A            dirs.remove(rem)
581147SN/A
59924SN/A        for entry in dirs:
601147SN/A            newdir = joinpath(destdir, root, entry)
611147SN/A            if not isdir(newdir):
62924SN/A                os.mkdir(newdir)
631147SN/A                print 'mkdir', newdir
641147SN/A
651147SN/A        for i,d in enumerate(dirs):
661147SN/A            if islink(joinpath(srcdir, root, d)):
671147SN/A                dirs[i] = joinpath(d, '.')
685569Snate@binkert.org
695569Snate@binkert.org        for entry in files:
705569Snate@binkert.org            dest = normpath(joinpath(destdir, root, entry))
711805SN/A            src = normpath(joinpath(srcdir, root, entry))
721858SN/A            if not isfile(dest) or not filecmp(src, dest):
731805SN/A                print 'copy %s %s' % (dest, src)
741805SN/A                copy(src, dest)
751805SN/A
761805SN/Aprogpath = nfspath(sys.path[0])
771805SN/Aprogname = basename(sys.argv[0])
785568Snate@binkert.orgusage = """\
791805SN/AUsage:
80924SN/A    %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
811147SN/A    -c           clean directory if job can be run
821147SN/A    -e           only echo pbs command info, don't actually send the job
835568Snate@binkert.org    -f           force the job to run regardless of state
841147SN/A    -q <queue>   submit job to the named queue
851147SN/A    -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
861147SN/A    -v           be verbose
871147SN/A
881147SN/A    %(progname)s [-j <jobfile>] -l [-v] <regexp>
891147SN/A    -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
902SN/A    -l           list job names, don't submit
911147SN/A    -v           be verbose (list job parameters)
921147SN/A
935568Snate@binkert.org    %(progname)s -h
941147SN/A    -h           display this help
951147SN/A""" % locals()
961147SN/A
971147SN/Atry:
981147SN/A    import getopt
992SN/A    opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v')
1001147SN/Aexcept getopt.GetoptError:
1012SN/A    sys.exit(usage)
1021147SN/A
1031147SN/Adepend = False
1041147SN/Aclean = False
1052SN/Aonlyecho = False
1061147SN/Aexprs = []
1071147SN/Aforce = False
1081147SN/Alistonly = False
1092SN/Aqueue = ''
1101147SN/Averbose = False
1111147SN/Ajfile = 'Test.py'
1121147SN/Adocpts = False
1131147SN/Adoruns = True
1141147SN/Arunflag = False
1151147SN/Anode_type = 'FAST'
1165568Snate@binkert.orgupdate = True
1175568Snate@binkert.org
1182SN/Afor opt,arg in opts:
1191147SN/A    if opt == '-C':
1201147SN/A        docpts = True
1212SN/A    if opt == '-c':
1225566Snate@binkert.org        clean = True
1232SN/A    if opt == '-d':
1241147SN/A        depend = True
125    if opt == '-e':
126        onlyecho = True
127    if opt == '-f':
128        force = True
129    if opt == '-h':
130        print usage
131        sys.exit(0)
132    if opt == '-j':
133        jfile = arg
134    if opt == '-l':
135        listonly = True
136    if opt == '-n':
137        update = False
138    if opt == '-q':
139        queue = arg
140    if opt == '-R':
141        runflag = True
142    if opt == '-t':
143        node_type = arg
144    if opt == '-v':
145        verbose = True
146
147if docpts:
148    doruns = runflag
149
150for arg in args:
151    exprs.append(re.compile(arg))
152
153import jobfile, pbs
154from job import JobDir, date
155
156conf = jobfile.JobFile(jfile)
157
158if update and not listonly and not onlyecho and isdir(conf.linkdir):
159    if verbose:
160        print 'Checking for outdated files in Link directory'
161    if not isdir(conf.basedir):
162        os.mkdir(conf.basedir)
163    syncdir(conf.linkdir, conf.basedir)
164
165jobnames = {}
166joblist = []
167
168if docpts and doruns:
169    gen = conf.alljobs()
170elif docpts:
171    gen = conf.checkpoints()
172elif doruns:
173    gen = conf.jobs()
174
175for job in gen:
176    if job.name in jobnames:
177        continue
178
179    if exprs:
180        for expr in exprs:
181            if expr.match(job.name):
182                joblist.append(job)
183                break
184    else:
185        joblist.append(job)
186
187if listonly:
188    if verbose:
189        for job in joblist:
190            job.printinfo()
191    else:
192        for job in joblist:
193            print job.name
194    sys.exit(0)
195
196if not onlyecho:
197    newlist = []
198    for job in joblist:
199        jobdir = JobDir(joinpath(conf.rootdir, job.name))
200        if jobdir.exists():
201            if not force:
202                status = jobdir.getstatus()
203                if status == 'queued':
204                    continue
205
206                if status == 'running':
207                    continue
208
209                if status == 'success':
210                    continue
211
212            if not clean:
213                sys.exit('job directory %s not clean!' % jobdir)
214
215            jobdir.clean()
216        newlist.append(job)
217    joblist = newlist
218
219class NameHack(object):
220    def __init__(self, host='pbs.pool', port=24465):
221        self.host = host
222        self.port = port
223        self.socket = None
224
225    def setname(self, jobid, jobname):
226        try:
227            jobid = int(jobid)
228        except ValueError:
229            jobid = int(jobid.strip().split('.')[0])
230
231        jobname = jobname.strip()
232        # since pbs can handle jobnames of 15 characters or less,
233        # don't use the raj hack.
234        if len(jobname) <= 15:
235            return
236
237        if self.socket is None:
238            import socket
239            self.socket = socket.socket()
240            # Connect to pbs.pool and send the jobid/jobname pair to port
241            # 24465 (Raj didn't realize that there are only 64k ports and
242            # setup inetd to point to port 90001)
243            self.socket.connect((self.host, self.port))
244
245        self.socket.send("%s %s\n" % (jobid, jobname))
246
247namehack = NameHack()
248
249for job in joblist:
250    jobdir = JobDir(joinpath(conf.rootdir, job.name))
251    if depend:
252        cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name))
253        cptjob = cptdir.readval('.pbs_jobid')
254
255    if not onlyecho:
256        jobdir.create()
257
258    print 'Job name:       %s' % job.name
259    print 'Job directory:  %s' % jobdir
260
261    qsub = pbs.qsub()
262    qsub.pbshost = 'simpool.eecs.umich.edu'
263    qsub.stdout = jobdir.file('jobout')
264    qsub.name = job.name[:15]
265    qsub.join = True
266    qsub.node_type = node_type
267    qsub.env['ROOTDIR'] = conf.rootdir
268    qsub.env['JOBNAME'] = job.name
269    if depend:
270        qsub.afterok = cptjob
271    if queue:
272        qsub.queue = queue
273    qsub.build(joinpath(progpath, 'job.py'))
274
275    if verbose:
276        print 'PBS Command:    %s' % qsub.command
277
278    if not onlyecho:
279        ec = qsub.do()
280        if ec == 0:
281            jobid = qsub.result
282            print 'PBS Jobid:      %s' % jobid
283            namehack.setname(jobid, job.name)
284            queued = date()
285            jobdir.echofile('.pbs_jobid', jobid)
286            jobdir.echofile('.pbs_jobname', job.name)
287            jobdir.echofile('.queued', queued)
288            jobdir.setstatus('queued on %s' % queued)
289        else:
290            print 'PBS Failed'
291