send.py revision 13540
11689SN/A#!/usr/bin/env python2.7 29444SAndreas.Sandberg@ARM.com# Copyright (c) 2005 The Regents of The University of Michigan 37854SAli.Saidi@ARM.com# All rights reserved. 47854SAli.Saidi@ARM.com# 57854SAli.Saidi@ARM.com# Redistribution and use in source and binary forms, with or without 67854SAli.Saidi@ARM.com# modification, are permitted provided that the following conditions are 77854SAli.Saidi@ARM.com# met: redistributions of source code must retain the above copyright 87854SAli.Saidi@ARM.com# notice, this list of conditions and the following disclaimer; 97854SAli.Saidi@ARM.com# redistributions in binary form must reproduce the above copyright 107854SAli.Saidi@ARM.com# notice, this list of conditions and the following disclaimer in the 117854SAli.Saidi@ARM.com# documentation and/or other materials provided with the distribution; 127854SAli.Saidi@ARM.com# neither the name of the copyright holders nor the names of its 137854SAli.Saidi@ARM.com# contributors may be used to endorse or promote products derived from 142329SN/A# this software without specific prior written permission. 151689SN/A# 161689SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 171689SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 181689SN/A# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 191689SN/A# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 201689SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 211689SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 221689SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 231689SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 241689SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 251689SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 261689SN/A# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 271689SN/A# 281689SN/A# Authors: Ali Saidi 291689SN/A# Nathan Binkert 301689SN/A 311689SN/Aimport os, os.path, re, socket, sys 321689SN/Afrom os import environ as env, listdir 331689SN/Afrom os.path import basename, isdir, isfile, islink, join as joinpath, normpath 341689SN/Afrom filecmp import cmp as filecmp 351689SN/Afrom shutil import copy 361689SN/A 371689SN/Adef nfspath(dir): 381689SN/A if dir.startswith('/.automount/'): 392665Ssaidi@eecs.umich.edu dir = '/n/%s' % dir[12:] 402665Ssaidi@eecs.umich.edu elif not dir.startswith('/n/'): 412935Sksewell@umich.edu dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir) 421689SN/A return dir 431689SN/A 441060SN/Adef syncdir(srcdir, destdir): 451060SN/A srcdir = normpath(srcdir) 463773Sgblack@eecs.umich.edu destdir = normpath(destdir) 476329Sgblack@eecs.umich.edu if not isdir(destdir): 486658Snate@binkert.org sys.exit('destination directory "%s" does not exist' % destdir) 491717SN/A 508232Snate@binkert.org for root, dirs, files in os.walk(srcdir): 518232Snate@binkert.org root = normpath(root) 525529Snate@binkert.org prefix = os.path.commonprefix([root, srcdir]) 531060SN/A root = root[len(prefix):] 546221Snate@binkert.org if root.startswith('/'): 556221Snate@binkert.org root = root[1:] 561061SN/A for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']: 575529Snate@binkert.org dirs.remove(rem) 584329Sktlim@umich.edu 594329Sktlim@umich.edu for entry in dirs: 602292SN/A newdir = joinpath(destdir, root, entry) 612292SN/A if not isdir(newdir): 622292SN/A os.mkdir(newdir) 632292SN/A print 'mkdir', newdir 645529Snate@binkert.org 652361SN/A for i,d in enumerate(dirs): 661060SN/A if islink(joinpath(srcdir, root, d)): 672292SN/A dirs[i] = joinpath(d, '.') 688907Slukefahr@umich.edu 692292SN/A for entry in files: 702292SN/A dest = normpath(joinpath(destdir, root, entry)) 712292SN/A src = normpath(joinpath(srcdir, root, entry)) 722292SN/A if not isfile(dest) or not filecmp(src, dest): 732292SN/A print 'copy %s %s' % (dest, src) 742292SN/A copy(src, dest) 752292SN/A 761060SN/Aprogpath = nfspath(sys.path[0]) 771060SN/Aprogname = basename(sys.argv[0]) 781061SN/Ausage = """\ 791060SN/AUsage: 802292SN/A %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp> 811062SN/A -c clean directory if job can be run 821062SN/A -C submit the checkpointing runs 838240Snate@binkert.org -d Make jobs be dependent on the completion of the checkpoint runs 841062SN/A -e only echo pbs command info, don't actually send the job 851062SN/A -f force the job to run regardless of state 861062SN/A -q <queue> submit job to the named queue 878240Snate@binkert.org -j <jobfile> specify the jobfile (default is <rootdir>/Test.py) 881062SN/A -v be verbose 891062SN/A 901062SN/A %(progname)s [-j <jobfile>] -l [-v] <regexp> 918240Snate@binkert.org -j <jobfile> specify the jobfile (default is <rootdir>/Test.py) 921062SN/A -l list job names, don't submit 931062SN/A -v be verbose (list job parameters) 942301SN/A 958240Snate@binkert.org %(progname)s -h 962301SN/A -h display this help 972301SN/A""" % locals() 982292SN/A 998240Snate@binkert.orgtry: 1002292SN/A import getopt 1012292SN/A opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v') 1021062SN/Aexcept getopt.GetoptError: 1038240Snate@binkert.org sys.exit(usage) 1041062SN/A 1051062SN/Adepend = False 1061062SN/Aclean = False 1078240Snate@binkert.orgonlyecho = False 1081062SN/Aexprs = [] 1091062SN/Aforce = False 1101062SN/Alistonly = False 1118240Snate@binkert.orgqueue = '' 1121062SN/Averbose = False 1131062SN/Ajfile = 'Test.py' 1141062SN/Adocpts = False 1158240Snate@binkert.orgdoruns = True 1162292SN/Arunflag = False 1171062SN/Anode_type = 'FAST' 1181062SN/Aupdate = True 1198240Snate@binkert.org 1202292SN/Afor opt,arg in opts: 1211062SN/A if opt == '-C': 1222292SN/A docpts = True 1238240Snate@binkert.org if opt == '-c': 1242292SN/A clean = True 1252292SN/A if opt == '-d': 1261062SN/A depend = True 1278240Snate@binkert.org if opt == '-e': 1281062SN/A onlyecho = True 1291062SN/A if opt == '-f': 1301062SN/A force = True 1318240Snate@binkert.org if opt == '-h': 1321062SN/A print usage 1331062SN/A sys.exit(0) 1341062SN/A if opt == '-j': 1358240Snate@binkert.org jfile = arg 1361062SN/A if opt == '-l': 1371062SN/A listonly = True 1381062SN/A if opt == '-n': 1398240Snate@binkert.org update = False 1401062SN/A if opt == '-q': 1411062SN/A queue = arg 1421062SN/A if opt == '-R': 1438240Snate@binkert.org runflag = True 1441062SN/A if opt == '-t': 1451062SN/A node_type = arg 1462301SN/A if opt == '-v': 1478240Snate@binkert.org verbose = True 1482301SN/A 1492301SN/Aif docpts: 1502301SN/A doruns = runflag 1512301SN/A 1528240Snate@binkert.orgfor arg in args: 1532301SN/A exprs.append(re.compile(arg)) 1542301SN/A 1552301SN/Aimport jobfile, pbs 1562307SN/Afrom job import JobDir, date 1578240Snate@binkert.org 1582307SN/Aconf = jobfile.JobFile(jfile) 1592307SN/A 1602307SN/Aif update and not listonly and not onlyecho and isdir(conf.linkdir): 1617897Shestness@cs.utexas.edu if verbose: 1628240Snate@binkert.org print 'Checking for outdated files in Link directory' 1637897Shestness@cs.utexas.edu if not isdir(conf.basedir): 1647897Shestness@cs.utexas.edu os.mkdir(conf.basedir) 1657897Shestness@cs.utexas.edu syncdir(conf.linkdir, conf.basedir) 1668240Snate@binkert.org 1677897Shestness@cs.utexas.edujobnames = {} 1687897Shestness@cs.utexas.edujoblist = [] 1691062SN/A 1701062SN/Aif docpts and doruns: 1711062SN/A gen = conf.alljobs() 1721062SN/Aelif docpts: 1732292SN/A gen = conf.checkpoints() 1741060SN/Aelif doruns: 1751060SN/A gen = conf.jobs() 1761060SN/A 1771060SN/Afor job in gen: 1781060SN/A if job.name in jobnames: 1791060SN/A continue 1801060SN/A 1811060SN/A if exprs: 1821060SN/A for expr in exprs: 1831060SN/A if expr.match(job.name): 1841060SN/A joblist.append(job) 1851060SN/A break 1861060SN/A else: 1871061SN/A joblist.append(job) 1881060SN/A 1892292SN/Aif listonly: 1901060SN/A if verbose: 1911060SN/A for job in joblist: 1921060SN/A job.printinfo() 1931060SN/A else: 1941060SN/A for job in joblist: 1951060SN/A print job.name 1961060SN/A sys.exit(0) 1971061SN/A 1981060SN/Aif not onlyecho: 1992292SN/A newlist = [] 2001060SN/A for job in joblist: 2011060SN/A jobdir = JobDir(joinpath(conf.rootdir, job.name)) 2021060SN/A if jobdir.exists(): 2031060SN/A if not force: 2041060SN/A status = jobdir.getstatus() 2051060SN/A if status == 'queued': 2061060SN/A continue 2071061SN/A 2081060SN/A if status == 'running': 2099427SAndreas.Sandberg@ARM.com continue 2101060SN/A 2119444SAndreas.Sandberg@ARM.com if status == 'success': 2129444SAndreas.Sandberg@ARM.com continue 2139444SAndreas.Sandberg@ARM.com 2149444SAndreas.Sandberg@ARM.com if not clean: 2159444SAndreas.Sandberg@ARM.com sys.exit('job directory %s not clean!' % jobdir) 2169444SAndreas.Sandberg@ARM.com 2179444SAndreas.Sandberg@ARM.com jobdir.clean() 2189444SAndreas.Sandberg@ARM.com newlist.append(job) 2199444SAndreas.Sandberg@ARM.com joblist = newlist 2209444SAndreas.Sandberg@ARM.com 2219444SAndreas.Sandberg@ARM.comclass NameHack(object): 2229444SAndreas.Sandberg@ARM.com def __init__(self, host='pbs.pool', port=24465): 2232329SN/A self.host = host 2246221Snate@binkert.org self.port = port 2259444SAndreas.Sandberg@ARM.com self.socket = None 2269444SAndreas.Sandberg@ARM.com 2272292SN/A def setname(self, jobid, jobname): 2282292SN/A try: 2292292SN/A jobid = int(jobid) 2302292SN/A except ValueError: 2319444SAndreas.Sandberg@ARM.com jobid = int(jobid.strip().split('.')[0]) 2329444SAndreas.Sandberg@ARM.com 2339444SAndreas.Sandberg@ARM.com jobname = jobname.strip() 2349444SAndreas.Sandberg@ARM.com # since pbs can handle jobnames of 15 characters or less, 2359444SAndreas.Sandberg@ARM.com # don't use the raj hack. 2369444SAndreas.Sandberg@ARM.com if len(jobname) <= 15: 2379444SAndreas.Sandberg@ARM.com return 2389444SAndreas.Sandberg@ARM.com 2392292SN/A if self.socket is None: 2401060SN/A import socket 2411060SN/A self.socket = socket.socket() 2422292SN/A # Connect to pbs.pool and send the jobid/jobname pair to port 2432292SN/A # 24465 (Raj didn't realize that there are only 64k ports and 2446221Snate@binkert.org # setup inetd to point to port 90001) 2452292SN/A self.socket.connect((self.host, self.port)) 2462292SN/A 2472292SN/A self.socket.send("%s %s\n" % (jobid, jobname)) 2482292SN/A 2492292SN/Anamehack = NameHack() 2501061SN/A 2511060SN/Afor job in joblist: 2522292SN/A jobdir = JobDir(joinpath(conf.rootdir, job.name)) 2531060SN/A if depend: 2546221Snate@binkert.org cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name)) 2556221Snate@binkert.org cptjob = cptdir.readval('.pbs_jobid') 2561060SN/A 2571060SN/A if not onlyecho: 2581061SN/A jobdir.create() 2591060SN/A 2602292SN/A print 'Job name: %s' % job.name 2611060SN/A print 'Job directory: %s' % jobdir 2622292SN/A 2632292SN/A qsub = pbs.qsub() 2641060SN/A qsub.pbshost = 'simpool.eecs.umich.edu' 2652292SN/A qsub.stdout = jobdir.file('jobout') 2662292SN/A qsub.name = job.name[:15] 2672292SN/A qsub.join = True 2682292SN/A qsub.node_type = node_type 2692292SN/A qsub.env['ROOTDIR'] = conf.rootdir 2701060SN/A qsub.env['JOBNAME'] = job.name 2711060SN/A if depend: 2721061SN/A qsub.afterok = cptjob 2732863Sktlim@umich.edu if queue: 2749444SAndreas.Sandberg@ARM.com qsub.queue = queue 2751060SN/A qsub.build(joinpath(progpath, 'job.py')) 2769444SAndreas.Sandberg@ARM.com 2779444SAndreas.Sandberg@ARM.com if verbose: 2789444SAndreas.Sandberg@ARM.com print 'PBS Command: %s' % qsub.command 2799444SAndreas.Sandberg@ARM.com 2809444SAndreas.Sandberg@ARM.com if not onlyecho: 2819444SAndreas.Sandberg@ARM.com ec = qsub.do() 2829444SAndreas.Sandberg@ARM.com if ec == 0: 2832863Sktlim@umich.edu jobid = qsub.result 2842316SN/A print 'PBS Jobid: %s' % jobid 2851060SN/A namehack.setname(jobid, job.name) 2862316SN/A queued = date() 2872316SN/A jobdir.echofile('.pbs_jobid', jobid) 2882307SN/A jobdir.echofile('.pbs_jobname', job.name) 2891060SN/A jobdir.echofile('.queued', queued) 2909444SAndreas.Sandberg@ARM.com jobdir.setstatus('queued on %s' % queued) 2919444SAndreas.Sandberg@ARM.com else: 2921060SN/A print 'PBS Failed' 2939444SAndreas.Sandberg@ARM.com