send.py revision 1956
12914SN/A#!/usr/bin/env python
28856SN/A# Copyright (c) 2005 The Regents of The University of Michigan
38856SN/A# All rights reserved.
48856SN/A#
58856SN/A# Redistribution and use in source and binary forms, with or without
68856SN/A# modification, are permitted provided that the following conditions are
78856SN/A# met: redistributions of source code must retain the above copyright
88856SN/A# notice, this list of conditions and the following disclaimer;
98856SN/A# redistributions in binary form must reproduce the above copyright
108856SN/A# notice, this list of conditions and the following disclaimer in the
118856SN/A# documentation and/or other materials provided with the distribution;
128856SN/A# neither the name of the copyright holders nor the names of its
138856SN/A# contributors may be used to endorse or promote products derived from
142914SN/A# this software without specific prior written permission.
152914SN/A#
162914SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
172914SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
182914SN/A# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
192914SN/A# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
202914SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
212914SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
222914SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
232914SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
242914SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
252914SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
262914SN/A# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
272914SN/A#
282914SN/A# Authors: Ali Saidi
292914SN/A#          Nathan Binkert
302914SN/A
312914SN/Aimport os, os.path, re, socket, sys
322914SN/Afrom os import environ as env, listdir
332914SN/Afrom os.path import basename, isdir, isfile, islink, join as joinpath, normpath
342914SN/Afrom filecmp import cmp as filecmp
352914SN/Afrom shutil import copy
362914SN/A
372914SN/Adef nfspath(dir):
382914SN/A    if dir.startswith('/.automount/'):
392914SN/A        dir = '/n/%s' % dir[12:]
402914SN/A    elif not dir.startswith('/n/'):
418856SN/A        dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
422914SN/A    return dir
432914SN/A
448914Sandreas.hansson@arm.comdef syncdir(srcdir, destdir):
458914Sandreas.hansson@arm.com    srcdir = normpath(srcdir)
463091SN/A    destdir = normpath(destdir)
472914SN/A    if not isdir(destdir):
482914SN/A        sys.exit('destination directory "%s" does not exist' % destdir)
498914Sandreas.hansson@arm.com
508914Sandreas.hansson@arm.com    for root, dirs, files in os.walk(srcdir):
518914Sandreas.hansson@arm.com        root = normpath(root)
528914Sandreas.hansson@arm.com        prefix = os.path.commonprefix([root, srcdir])
538914Sandreas.hansson@arm.com        root = root[len(prefix):]
542914SN/A        if root.startswith('/'):
552914SN/A            root = root[1:]
568229SN/A        for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
578229SN/A            dirs.remove(rem)
582914SN/A
599342SAndreas.Sandberg@arm.com        for entry in dirs:
609356Snilay@cs.wisc.edu            newdir = joinpath(destdir, root, entry)
612914SN/A            if not isdir(newdir):
623091SN/A                os.mkdir(newdir)
638914Sandreas.hansson@arm.com                print 'mkdir', newdir
648914Sandreas.hansson@arm.com
653091SN/A        for i,d in enumerate(dirs):
669342SAndreas.Sandberg@arm.com            if islink(joinpath(srcdir, root, d)):
672914SN/A                dirs[i] = joinpath(d, '.')
688914Sandreas.hansson@arm.com
694490SN/A        for entry in files:
704490SN/A            dest = normpath(joinpath(destdir, root, entry))
714490SN/A            src = normpath(joinpath(srcdir, root, entry))
724490SN/A            if not isfile(dest) or not filecmp(src, dest):
734490SN/A                print 'copy %s %s' % (dest, src)
748948Sandreas.hansson@arm.com                copy(src, dest)
758948Sandreas.hansson@arm.com
768948Sandreas.hansson@arm.comprogpath = nfspath(sys.path[0])
774490SN/Aprogname = basename(sys.argv[0])
784490SN/Ausage = """\
794490SN/AUsage:
804490SN/A    %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
814490SN/A    -c           clean directory if job can be run
823090SN/A    -C           submit the checkpointing runs
833090SN/A    -d           Make jobs be dependent on the completion of the checkpoint runs
844490SN/A    -e           only echo pbs command info, don't actually send the job
854490SN/A    -f           force the job to run regardless of state
868914Sandreas.hansson@arm.com    -q <queue>   submit job to the named queue
878914Sandreas.hansson@arm.com    -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
888914Sandreas.hansson@arm.com    -v           be verbose
894490SN/A
904490SN/A    %(progname)s [-j <jobfile>] -l [-v] <regexp>
914490SN/A    -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
923091SN/A    -l           list job names, don't submit
932914SN/A    -v           be verbose (list job parameters)
948914Sandreas.hansson@arm.com
953403SN/A    %(progname)s -h
968914Sandreas.hansson@arm.com    -h           display this help
972914SN/A""" % locals()
989342SAndreas.Sandberg@arm.com
992914SN/Atry:
1009342SAndreas.Sandberg@arm.com    import getopt
1012914SN/A    opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lq:Rt:v')
1028914Sandreas.hansson@arm.comexcept getopt.GetoptError:
1038914Sandreas.hansson@arm.com    sys.exit(usage)
1048975Sandreas.hansson@arm.com
1058975Sandreas.hansson@arm.comdepend = False
1068914Sandreas.hansson@arm.comclean = False
1074492SN/Aonlyecho = False
1084492SN/Aexprs = []
1094492SN/Aforce = False
1104492SN/Alistonly = False
11110322Sandreas.hansson@arm.comqueue = ''
1127823SN/Averbose = False
1134492SN/Ajfile = 'Test.py'
11410322Sandreas.hansson@arm.comdocpts = False
1154666SN/Adoruns = True
1164666SN/Arunflag = False
1178708SN/Anode_type = 'FAST'
1188914Sandreas.hansson@arm.com
1198914Sandreas.hansson@arm.comfor opt,arg in opts:
1208914Sandreas.hansson@arm.com    if opt == '-C':
1218914Sandreas.hansson@arm.com        docpts = True
1228914Sandreas.hansson@arm.com    if opt == '-c':
1234492SN/A        clean = True
1248856SN/A    if opt == '-d':
1258856SN/A        depend = True
1268856SN/A    if opt == '-e':
1278856SN/A        onlyecho = True
1288856SN/A    if opt == '-f':
1298856SN/A        force = True
1308856SN/A    if opt == '-h':
1318856SN/A        print usage
1328856SN/A        sys.exit(0)
1338856SN/A    if opt == '-j':
1348856SN/A        jfile = arg
1358975Sandreas.hansson@arm.com    if opt == '-l':
1368975Sandreas.hansson@arm.com        listonly = True
1378975Sandreas.hansson@arm.com    if opt == '-q':
1388975Sandreas.hansson@arm.com        queue = arg
1398975Sandreas.hansson@arm.com    if opt == '-R':
1408856SN/A        runflag = True
1418856SN/A    if opt == '-t':
1428856SN/A        node_type = arg
1438856SN/A    if opt == '-v':
1448856SN/A        verbose = True
1458856SN/A
1468856SN/Aif docpts:
1472914SN/A    doruns = runflag
1483091SN/A
1498711SN/Afor arg in args:
1508711SN/A    exprs.append(re.compile(arg))
1518711SN/A
1528711SN/Aimport jobfile, pbs
1538711SN/Afrom job import JobDir, date
1548711SN/A
1558711SN/Aconf = jobfile.JobFile(jfile)
1563091SN/A
1578914Sandreas.hansson@arm.comif not listonly and not onlyecho and isdir(conf.linkdir):
1588975Sandreas.hansson@arm.com    if verbose:
1598975Sandreas.hansson@arm.com        print 'Checking for outdated files in Link directory'
1608914Sandreas.hansson@arm.com    if not isdir(conf.basedir):
1618914Sandreas.hansson@arm.com        os.mkdir(conf.basedir)
1628914Sandreas.hansson@arm.com    syncdir(conf.linkdir, conf.basedir)
1638914Sandreas.hansson@arm.com
1648975Sandreas.hansson@arm.comjobnames = {}
1658914Sandreas.hansson@arm.comjoblist = []
1668914Sandreas.hansson@arm.com
1678914Sandreas.hansson@arm.comif docpts and doruns:
1688914Sandreas.hansson@arm.com    gen = conf.alljobs()
1698914Sandreas.hansson@arm.comelif docpts:
1708914Sandreas.hansson@arm.com    gen = conf.checkpoints()
1718975Sandreas.hansson@arm.comelif doruns:
1728975Sandreas.hansson@arm.com    gen = conf.jobs()
1738914Sandreas.hansson@arm.com
1748975Sandreas.hansson@arm.comfor job in gen:
1758914Sandreas.hansson@arm.com    if job.name in jobnames:
1768914Sandreas.hansson@arm.com        continue
1778914Sandreas.hansson@arm.com
1788975Sandreas.hansson@arm.com    if exprs:
1794490SN/A        for expr in exprs:
1808856SN/A            if expr.match(job.name):
1818856SN/A                joblist.append(job)
1828856SN/A                break
1838856SN/A    else:
1848914Sandreas.hansson@arm.com        joblist.append(job)
1858914Sandreas.hansson@arm.com
1868914Sandreas.hansson@arm.comif listonly:
1878914Sandreas.hansson@arm.com    if verbose:
1888914Sandreas.hansson@arm.com        for job in joblist:
1898914Sandreas.hansson@arm.com            job.printinfo()
1908914Sandreas.hansson@arm.com    else:
1918914Sandreas.hansson@arm.com        for job in joblist:
1928914Sandreas.hansson@arm.com            print job.name
1938914Sandreas.hansson@arm.com    sys.exit(0)
1948914Sandreas.hansson@arm.com
1958914Sandreas.hansson@arm.comif not onlyecho:
1968914Sandreas.hansson@arm.com    newlist = []
1978914Sandreas.hansson@arm.com    for job in joblist:
1988914Sandreas.hansson@arm.com        jobdir = JobDir(joinpath(conf.rootdir, job.name))
1998948Sandreas.hansson@arm.com        if jobdir.exists():
2008914Sandreas.hansson@arm.com            if not force:
2018948Sandreas.hansson@arm.com                status = jobdir.getstatus()
2028914Sandreas.hansson@arm.com                if status == 'queued':
2038914Sandreas.hansson@arm.com                    continue
2048914Sandreas.hansson@arm.com
2058914Sandreas.hansson@arm.com                if status == 'running':
2068914Sandreas.hansson@arm.com                    continue
2078914Sandreas.hansson@arm.com
2088914Sandreas.hansson@arm.com                if status == 'success':
2098914Sandreas.hansson@arm.com                    continue
2109342SAndreas.Sandberg@arm.com
2112914SN/A            if not clean:
2122914SN/A                sys.exit('job directory %s not clean!' % jobdir)
2138975Sandreas.hansson@arm.com
2148975Sandreas.hansson@arm.com            jobdir.clean()
2158975Sandreas.hansson@arm.com        newlist.append(job)
2168975Sandreas.hansson@arm.com    joblist = newlist
2178975Sandreas.hansson@arm.com
2188975Sandreas.hansson@arm.comclass NameHack(object):
2198975Sandreas.hansson@arm.com    def __init__(self, host='pbs.pool', port=24465):
2208975Sandreas.hansson@arm.com        self.host = host
2218975Sandreas.hansson@arm.com        self.port = port
2228975Sandreas.hansson@arm.com        self.socket = None
2238975Sandreas.hansson@arm.com
2248975Sandreas.hansson@arm.com    def setname(self, jobid, jobname):
2258975Sandreas.hansson@arm.com        try:
2268975Sandreas.hansson@arm.com            jobid = int(jobid)
2278975Sandreas.hansson@arm.com        except ValueError:
2288975Sandreas.hansson@arm.com            jobid = int(jobid.strip().split('.')[0])
2298975Sandreas.hansson@arm.com
2308975Sandreas.hansson@arm.com        jobname = jobname.strip()
2318975Sandreas.hansson@arm.com        # since pbs can handle jobnames of 15 characters or less,
2328975Sandreas.hansson@arm.com        # don't use the raj hack.
2338975Sandreas.hansson@arm.com        if len(jobname) <= 15:
2348975Sandreas.hansson@arm.com            return
2358975Sandreas.hansson@arm.com
2368975Sandreas.hansson@arm.com        if self.socket is None:
2378975Sandreas.hansson@arm.com            import socket
2388975Sandreas.hansson@arm.com            self.socket = socket.socket()
2398975Sandreas.hansson@arm.com            # Connect to pbs.pool and send the jobid/jobname pair to port
2408975Sandreas.hansson@arm.com            # 24465 (Raj didn't realize that there are only 64k ports and
2418975Sandreas.hansson@arm.com            # setup inetd to point to port 90001)
2428975Sandreas.hansson@arm.com            self.socket.connect((self.host, self.port))
2438975Sandreas.hansson@arm.com
2448975Sandreas.hansson@arm.com        self.socket.send("%s %s\n" % (jobid, jobname))
2458975Sandreas.hansson@arm.com
2468975Sandreas.hansson@arm.comnamehack = NameHack()
2478975Sandreas.hansson@arm.com
2488975Sandreas.hansson@arm.comfor job in joblist:
2498975Sandreas.hansson@arm.com    jobdir = JobDir(joinpath(conf.rootdir, job.name))
2508975Sandreas.hansson@arm.com    if depend:
2518975Sandreas.hansson@arm.com        cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name))
2528975Sandreas.hansson@arm.com        cptjob = cptdir.readval('.pbs_jobid')
2538975Sandreas.hansson@arm.com
2548975Sandreas.hansson@arm.com    if not onlyecho:
2558975Sandreas.hansson@arm.com        jobdir.create()
2568975Sandreas.hansson@arm.com
2578975Sandreas.hansson@arm.com    print 'Job name:       %s' % job.name
2588975Sandreas.hansson@arm.com    print 'Job directory:  %s' % jobdir
2598975Sandreas.hansson@arm.com
2608975Sandreas.hansson@arm.com    qsub = pbs.qsub()
2618975Sandreas.hansson@arm.com    qsub.pbshost = 'simpool.eecs.umich.edu'
2628975Sandreas.hansson@arm.com    qsub.stdout = jobdir.file('jobout')
2638975Sandreas.hansson@arm.com    qsub.name = job.name[:15]
2648975Sandreas.hansson@arm.com    qsub.join = True
2658975Sandreas.hansson@arm.com    qsub.node_type = node_type
2668975Sandreas.hansson@arm.com    qsub.env['ROOTDIR'] = conf.rootdir
2678975Sandreas.hansson@arm.com    qsub.env['JOBNAME'] = job.name
2688975Sandreas.hansson@arm.com    if depend:
2698975Sandreas.hansson@arm.com        qsub.afterok = cptjob
2708975Sandreas.hansson@arm.com    if queue:
2718975Sandreas.hansson@arm.com        qsub.queue = queue
2728948Sandreas.hansson@arm.com    qsub.build(joinpath(progpath, 'job.py'))
273
274    if verbose:
275        print 'PBS Command:    %s' % qsub.command
276
277    if not onlyecho:
278        ec = qsub.do()
279        if ec == 0:
280            jobid = qsub.result
281            print 'PBS Jobid:      %s' % jobid
282            namehack.setname(jobid, job.name)
283            queued = date()
284            jobdir.echofile('.pbs_jobid', jobid)
285            jobdir.echofile('.pbs_jobname', job.name)
286            jobdir.echofile('.queued', queued)
287            jobdir.setstatus('queued on %s' % queued)
288        else:
289            print 'PBS Failed'
290