send.py revision 13540
11689SN/A#!/usr/bin/env python2.7
29444SAndreas.Sandberg@ARM.com# Copyright (c) 2005 The Regents of The University of Michigan
37854SAli.Saidi@ARM.com# All rights reserved.
47854SAli.Saidi@ARM.com#
57854SAli.Saidi@ARM.com# Redistribution and use in source and binary forms, with or without
67854SAli.Saidi@ARM.com# modification, are permitted provided that the following conditions are
77854SAli.Saidi@ARM.com# met: redistributions of source code must retain the above copyright
87854SAli.Saidi@ARM.com# notice, this list of conditions and the following disclaimer;
97854SAli.Saidi@ARM.com# redistributions in binary form must reproduce the above copyright
107854SAli.Saidi@ARM.com# notice, this list of conditions and the following disclaimer in the
117854SAli.Saidi@ARM.com# documentation and/or other materials provided with the distribution;
127854SAli.Saidi@ARM.com# neither the name of the copyright holders nor the names of its
137854SAli.Saidi@ARM.com# contributors may be used to endorse or promote products derived from
142329SN/A# this software without specific prior written permission.
151689SN/A#
161689SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
171689SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
181689SN/A# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
191689SN/A# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
201689SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
211689SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
221689SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
231689SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
241689SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
251689SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
261689SN/A# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271689SN/A#
281689SN/A# Authors: Ali Saidi
291689SN/A#          Nathan Binkert
301689SN/A
311689SN/Aimport os, os.path, re, socket, sys
321689SN/Afrom os import environ as env, listdir
331689SN/Afrom os.path import basename, isdir, isfile, islink, join as joinpath, normpath
341689SN/Afrom filecmp import cmp as filecmp
351689SN/Afrom shutil import copy
361689SN/A
371689SN/Adef nfspath(dir):
381689SN/A    if dir.startswith('/.automount/'):
392665Ssaidi@eecs.umich.edu        dir = '/n/%s' % dir[12:]
402665Ssaidi@eecs.umich.edu    elif not dir.startswith('/n/'):
412935Sksewell@umich.edu        dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
421689SN/A    return dir
431689SN/A
441060SN/Adef syncdir(srcdir, destdir):
451060SN/A    srcdir = normpath(srcdir)
463773Sgblack@eecs.umich.edu    destdir = normpath(destdir)
476329Sgblack@eecs.umich.edu    if not isdir(destdir):
486658Snate@binkert.org        sys.exit('destination directory "%s" does not exist' % destdir)
491717SN/A
508232Snate@binkert.org    for root, dirs, files in os.walk(srcdir):
518232Snate@binkert.org        root = normpath(root)
525529Snate@binkert.org        prefix = os.path.commonprefix([root, srcdir])
531060SN/A        root = root[len(prefix):]
546221Snate@binkert.org        if root.startswith('/'):
556221Snate@binkert.org            root = root[1:]
561061SN/A        for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
575529Snate@binkert.org            dirs.remove(rem)
584329Sktlim@umich.edu
594329Sktlim@umich.edu        for entry in dirs:
602292SN/A            newdir = joinpath(destdir, root, entry)
612292SN/A            if not isdir(newdir):
622292SN/A                os.mkdir(newdir)
632292SN/A                print 'mkdir', newdir
645529Snate@binkert.org
652361SN/A        for i,d in enumerate(dirs):
661060SN/A            if islink(joinpath(srcdir, root, d)):
672292SN/A                dirs[i] = joinpath(d, '.')
688907Slukefahr@umich.edu
692292SN/A        for entry in files:
702292SN/A            dest = normpath(joinpath(destdir, root, entry))
712292SN/A            src = normpath(joinpath(srcdir, root, entry))
722292SN/A            if not isfile(dest) or not filecmp(src, dest):
732292SN/A                print 'copy %s %s' % (dest, src)
742292SN/A                copy(src, dest)
752292SN/A
761060SN/Aprogpath = nfspath(sys.path[0])
771060SN/Aprogname = basename(sys.argv[0])
781061SN/Ausage = """\
791060SN/AUsage:
802292SN/A    %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
811062SN/A    -c           clean directory if job can be run
821062SN/A    -C           submit the checkpointing runs
838240Snate@binkert.org    -d           Make jobs be dependent on the completion of the checkpoint runs
841062SN/A    -e           only echo pbs command info, don't actually send the job
851062SN/A    -f           force the job to run regardless of state
861062SN/A    -q <queue>   submit job to the named queue
878240Snate@binkert.org    -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
881062SN/A    -v           be verbose
891062SN/A
901062SN/A    %(progname)s [-j <jobfile>] -l [-v] <regexp>
918240Snate@binkert.org    -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
921062SN/A    -l           list job names, don't submit
931062SN/A    -v           be verbose (list job parameters)
942301SN/A
958240Snate@binkert.org    %(progname)s -h
962301SN/A    -h           display this help
972301SN/A""" % locals()
982292SN/A
998240Snate@binkert.orgtry:
1002292SN/A    import getopt
1012292SN/A    opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v')
1021062SN/Aexcept getopt.GetoptError:
1038240Snate@binkert.org    sys.exit(usage)
1041062SN/A
1051062SN/Adepend = False
1061062SN/Aclean = False
1078240Snate@binkert.orgonlyecho = False
1081062SN/Aexprs = []
1091062SN/Aforce = False
1101062SN/Alistonly = False
1118240Snate@binkert.orgqueue = ''
1121062SN/Averbose = False
1131062SN/Ajfile = 'Test.py'
1141062SN/Adocpts = False
1158240Snate@binkert.orgdoruns = True
1162292SN/Arunflag = False
1171062SN/Anode_type = 'FAST'
1181062SN/Aupdate = True
1198240Snate@binkert.org
1202292SN/Afor opt,arg in opts:
1211062SN/A    if opt == '-C':
1222292SN/A        docpts = True
1238240Snate@binkert.org    if opt == '-c':
1242292SN/A        clean = True
1252292SN/A    if opt == '-d':
1261062SN/A        depend = True
1278240Snate@binkert.org    if opt == '-e':
1281062SN/A        onlyecho = True
1291062SN/A    if opt == '-f':
1301062SN/A        force = True
1318240Snate@binkert.org    if opt == '-h':
1321062SN/A        print usage
1331062SN/A        sys.exit(0)
1341062SN/A    if opt == '-j':
1358240Snate@binkert.org        jfile = arg
1361062SN/A    if opt == '-l':
1371062SN/A        listonly = True
1381062SN/A    if opt == '-n':
1398240Snate@binkert.org        update = False
1401062SN/A    if opt == '-q':
1411062SN/A        queue = arg
1421062SN/A    if opt == '-R':
1438240Snate@binkert.org        runflag = True
1441062SN/A    if opt == '-t':
1451062SN/A        node_type = arg
1462301SN/A    if opt == '-v':
1478240Snate@binkert.org        verbose = True
1482301SN/A
1492301SN/Aif docpts:
1502301SN/A    doruns = runflag
1512301SN/A
1528240Snate@binkert.orgfor arg in args:
1532301SN/A    exprs.append(re.compile(arg))
1542301SN/A
1552301SN/Aimport jobfile, pbs
1562307SN/Afrom job import JobDir, date
1578240Snate@binkert.org
1582307SN/Aconf = jobfile.JobFile(jfile)
1592307SN/A
1602307SN/Aif update and not listonly and not onlyecho and isdir(conf.linkdir):
1617897Shestness@cs.utexas.edu    if verbose:
1628240Snate@binkert.org        print 'Checking for outdated files in Link directory'
1637897Shestness@cs.utexas.edu    if not isdir(conf.basedir):
1647897Shestness@cs.utexas.edu        os.mkdir(conf.basedir)
1657897Shestness@cs.utexas.edu    syncdir(conf.linkdir, conf.basedir)
1668240Snate@binkert.org
1677897Shestness@cs.utexas.edujobnames = {}
1687897Shestness@cs.utexas.edujoblist = []
1691062SN/A
1701062SN/Aif docpts and doruns:
1711062SN/A    gen = conf.alljobs()
1721062SN/Aelif docpts:
1732292SN/A    gen = conf.checkpoints()
1741060SN/Aelif doruns:
1751060SN/A    gen = conf.jobs()
1761060SN/A
1771060SN/Afor job in gen:
1781060SN/A    if job.name in jobnames:
1791060SN/A        continue
1801060SN/A
1811060SN/A    if exprs:
1821060SN/A        for expr in exprs:
1831060SN/A            if expr.match(job.name):
1841060SN/A                joblist.append(job)
1851060SN/A                break
1861060SN/A    else:
1871061SN/A        joblist.append(job)
1881060SN/A
1892292SN/Aif listonly:
1901060SN/A    if verbose:
1911060SN/A        for job in joblist:
1921060SN/A            job.printinfo()
1931060SN/A    else:
1941060SN/A        for job in joblist:
1951060SN/A            print job.name
1961060SN/A    sys.exit(0)
1971061SN/A
1981060SN/Aif not onlyecho:
1992292SN/A    newlist = []
2001060SN/A    for job in joblist:
2011060SN/A        jobdir = JobDir(joinpath(conf.rootdir, job.name))
2021060SN/A        if jobdir.exists():
2031060SN/A            if not force:
2041060SN/A                status = jobdir.getstatus()
2051060SN/A                if status == 'queued':
2061060SN/A                    continue
2071061SN/A
2081060SN/A                if status == 'running':
2099427SAndreas.Sandberg@ARM.com                    continue
2101060SN/A
2119444SAndreas.Sandberg@ARM.com                if status == 'success':
2129444SAndreas.Sandberg@ARM.com                    continue
2139444SAndreas.Sandberg@ARM.com
2149444SAndreas.Sandberg@ARM.com            if not clean:
2159444SAndreas.Sandberg@ARM.com                sys.exit('job directory %s not clean!' % jobdir)
2169444SAndreas.Sandberg@ARM.com
2179444SAndreas.Sandberg@ARM.com            jobdir.clean()
2189444SAndreas.Sandberg@ARM.com        newlist.append(job)
2199444SAndreas.Sandberg@ARM.com    joblist = newlist
2209444SAndreas.Sandberg@ARM.com
2219444SAndreas.Sandberg@ARM.comclass NameHack(object):
2229444SAndreas.Sandberg@ARM.com    def __init__(self, host='pbs.pool', port=24465):
2232329SN/A        self.host = host
2246221Snate@binkert.org        self.port = port
2259444SAndreas.Sandberg@ARM.com        self.socket = None
2269444SAndreas.Sandberg@ARM.com
2272292SN/A    def setname(self, jobid, jobname):
2282292SN/A        try:
2292292SN/A            jobid = int(jobid)
2302292SN/A        except ValueError:
2319444SAndreas.Sandberg@ARM.com            jobid = int(jobid.strip().split('.')[0])
2329444SAndreas.Sandberg@ARM.com
2339444SAndreas.Sandberg@ARM.com        jobname = jobname.strip()
2349444SAndreas.Sandberg@ARM.com        # since pbs can handle jobnames of 15 characters or less,
2359444SAndreas.Sandberg@ARM.com        # don't use the raj hack.
2369444SAndreas.Sandberg@ARM.com        if len(jobname) <= 15:
2379444SAndreas.Sandberg@ARM.com            return
2389444SAndreas.Sandberg@ARM.com
2392292SN/A        if self.socket is None:
2401060SN/A            import socket
2411060SN/A            self.socket = socket.socket()
2422292SN/A            # Connect to pbs.pool and send the jobid/jobname pair to port
2432292SN/A            # 24465 (Raj didn't realize that there are only 64k ports and
2446221Snate@binkert.org            # setup inetd to point to port 90001)
2452292SN/A            self.socket.connect((self.host, self.port))
2462292SN/A
2472292SN/A        self.socket.send("%s %s\n" % (jobid, jobname))
2482292SN/A
2492292SN/Anamehack = NameHack()
2501061SN/A
2511060SN/Afor job in joblist:
2522292SN/A    jobdir = JobDir(joinpath(conf.rootdir, job.name))
2531060SN/A    if depend:
2546221Snate@binkert.org        cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name))
2556221Snate@binkert.org        cptjob = cptdir.readval('.pbs_jobid')
2561060SN/A
2571060SN/A    if not onlyecho:
2581061SN/A        jobdir.create()
2591060SN/A
2602292SN/A    print 'Job name:       %s' % job.name
2611060SN/A    print 'Job directory:  %s' % jobdir
2622292SN/A
2632292SN/A    qsub = pbs.qsub()
2641060SN/A    qsub.pbshost = 'simpool.eecs.umich.edu'
2652292SN/A    qsub.stdout = jobdir.file('jobout')
2662292SN/A    qsub.name = job.name[:15]
2672292SN/A    qsub.join = True
2682292SN/A    qsub.node_type = node_type
2692292SN/A    qsub.env['ROOTDIR'] = conf.rootdir
2701060SN/A    qsub.env['JOBNAME'] = job.name
2711060SN/A    if depend:
2721061SN/A        qsub.afterok = cptjob
2732863Sktlim@umich.edu    if queue:
2749444SAndreas.Sandberg@ARM.com        qsub.queue = queue
2751060SN/A    qsub.build(joinpath(progpath, 'job.py'))
2769444SAndreas.Sandberg@ARM.com
2779444SAndreas.Sandberg@ARM.com    if verbose:
2789444SAndreas.Sandberg@ARM.com        print 'PBS Command:    %s' % qsub.command
2799444SAndreas.Sandberg@ARM.com
2809444SAndreas.Sandberg@ARM.com    if not onlyecho:
2819444SAndreas.Sandberg@ARM.com        ec = qsub.do()
2829444SAndreas.Sandberg@ARM.com        if ec == 0:
2832863Sktlim@umich.edu            jobid = qsub.result
2842316SN/A            print 'PBS Jobid:      %s' % jobid
2851060SN/A            namehack.setname(jobid, job.name)
2862316SN/A            queued = date()
2872316SN/A            jobdir.echofile('.pbs_jobid', jobid)
2882307SN/A            jobdir.echofile('.pbs_jobname', job.name)
2891060SN/A            jobdir.echofile('.queued', queued)
2909444SAndreas.Sandberg@ARM.com            jobdir.setstatus('queued on %s' % queued)
2919444SAndreas.Sandberg@ARM.com        else:
2921060SN/A            print 'PBS Failed'
2939444SAndreas.Sandberg@ARM.com