send.py revision 1956
12914SN/A#!/usr/bin/env python 28856SN/A# Copyright (c) 2005 The Regents of The University of Michigan 38856SN/A# All rights reserved. 48856SN/A# 58856SN/A# Redistribution and use in source and binary forms, with or without 68856SN/A# modification, are permitted provided that the following conditions are 78856SN/A# met: redistributions of source code must retain the above copyright 88856SN/A# notice, this list of conditions and the following disclaimer; 98856SN/A# redistributions in binary form must reproduce the above copyright 108856SN/A# notice, this list of conditions and the following disclaimer in the 118856SN/A# documentation and/or other materials provided with the distribution; 128856SN/A# neither the name of the copyright holders nor the names of its 138856SN/A# contributors may be used to endorse or promote products derived from 142914SN/A# this software without specific prior written permission. 152914SN/A# 162914SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 172914SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 182914SN/A# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 192914SN/A# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 202914SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 212914SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 222914SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 232914SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 242914SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 252914SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 262914SN/A# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 272914SN/A# 282914SN/A# Authors: Ali Saidi 292914SN/A# Nathan Binkert 302914SN/A 312914SN/Aimport os, os.path, re, socket, sys 322914SN/Afrom os import environ as env, listdir 332914SN/Afrom os.path import basename, isdir, isfile, islink, join as joinpath, normpath 342914SN/Afrom filecmp import cmp as filecmp 352914SN/Afrom shutil import copy 362914SN/A 372914SN/Adef nfspath(dir): 382914SN/A if dir.startswith('/.automount/'): 392914SN/A dir = '/n/%s' % dir[12:] 402914SN/A elif not dir.startswith('/n/'): 418856SN/A dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir) 422914SN/A return dir 432914SN/A 448914Sandreas.hansson@arm.comdef syncdir(srcdir, destdir): 458914Sandreas.hansson@arm.com srcdir = normpath(srcdir) 463091SN/A destdir = normpath(destdir) 472914SN/A if not isdir(destdir): 482914SN/A sys.exit('destination directory "%s" does not exist' % destdir) 498914Sandreas.hansson@arm.com 508914Sandreas.hansson@arm.com for root, dirs, files in os.walk(srcdir): 518914Sandreas.hansson@arm.com root = normpath(root) 528914Sandreas.hansson@arm.com prefix = os.path.commonprefix([root, srcdir]) 538914Sandreas.hansson@arm.com root = root[len(prefix):] 542914SN/A if root.startswith('/'): 552914SN/A root = root[1:] 568229SN/A for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']: 578229SN/A dirs.remove(rem) 582914SN/A 599342SAndreas.Sandberg@arm.com for entry in dirs: 609356Snilay@cs.wisc.edu newdir = joinpath(destdir, root, entry) 612914SN/A if not isdir(newdir): 623091SN/A os.mkdir(newdir) 638914Sandreas.hansson@arm.com print 'mkdir', newdir 648914Sandreas.hansson@arm.com 653091SN/A for i,d in enumerate(dirs): 669342SAndreas.Sandberg@arm.com if islink(joinpath(srcdir, root, d)): 672914SN/A dirs[i] = joinpath(d, '.') 688914Sandreas.hansson@arm.com 694490SN/A for entry in files: 704490SN/A dest = normpath(joinpath(destdir, root, entry)) 714490SN/A src = normpath(joinpath(srcdir, root, entry)) 724490SN/A if not isfile(dest) or not filecmp(src, dest): 734490SN/A print 'copy %s %s' % (dest, src) 748948Sandreas.hansson@arm.com copy(src, dest) 758948Sandreas.hansson@arm.com 768948Sandreas.hansson@arm.comprogpath = nfspath(sys.path[0]) 774490SN/Aprogname = basename(sys.argv[0]) 784490SN/Ausage = """\ 794490SN/AUsage: 804490SN/A %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp> 814490SN/A -c clean directory if job can be run 823090SN/A -C submit the checkpointing runs 833090SN/A -d Make jobs be dependent on the completion of the checkpoint runs 844490SN/A -e only echo pbs command info, don't actually send the job 854490SN/A -f force the job to run regardless of state 868914Sandreas.hansson@arm.com -q <queue> submit job to the named queue 878914Sandreas.hansson@arm.com -j <jobfile> specify the jobfile (default is <rootdir>/Test.py) 888914Sandreas.hansson@arm.com -v be verbose 894490SN/A 904490SN/A %(progname)s [-j <jobfile>] -l [-v] <regexp> 914490SN/A -j <jobfile> specify the jobfile (default is <rootdir>/Test.py) 923091SN/A -l list job names, don't submit 932914SN/A -v be verbose (list job parameters) 948914Sandreas.hansson@arm.com 953403SN/A %(progname)s -h 968914Sandreas.hansson@arm.com -h display this help 972914SN/A""" % locals() 989342SAndreas.Sandberg@arm.com 992914SN/Atry: 1009342SAndreas.Sandberg@arm.com import getopt 1012914SN/A opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lq:Rt:v') 1028914Sandreas.hansson@arm.comexcept getopt.GetoptError: 1038914Sandreas.hansson@arm.com sys.exit(usage) 1048975Sandreas.hansson@arm.com 1058975Sandreas.hansson@arm.comdepend = False 1068914Sandreas.hansson@arm.comclean = False 1074492SN/Aonlyecho = False 1084492SN/Aexprs = [] 1094492SN/Aforce = False 1104492SN/Alistonly = False 11110322Sandreas.hansson@arm.comqueue = '' 1127823SN/Averbose = False 1134492SN/Ajfile = 'Test.py' 11410322Sandreas.hansson@arm.comdocpts = False 1154666SN/Adoruns = True 1164666SN/Arunflag = False 1178708SN/Anode_type = 'FAST' 1188914Sandreas.hansson@arm.com 1198914Sandreas.hansson@arm.comfor opt,arg in opts: 1208914Sandreas.hansson@arm.com if opt == '-C': 1218914Sandreas.hansson@arm.com docpts = True 1228914Sandreas.hansson@arm.com if opt == '-c': 1234492SN/A clean = True 1248856SN/A if opt == '-d': 1258856SN/A depend = True 1268856SN/A if opt == '-e': 1278856SN/A onlyecho = True 1288856SN/A if opt == '-f': 1298856SN/A force = True 1308856SN/A if opt == '-h': 1318856SN/A print usage 1328856SN/A sys.exit(0) 1338856SN/A if opt == '-j': 1348856SN/A jfile = arg 1358975Sandreas.hansson@arm.com if opt == '-l': 1368975Sandreas.hansson@arm.com listonly = True 1378975Sandreas.hansson@arm.com if opt == '-q': 1388975Sandreas.hansson@arm.com queue = arg 1398975Sandreas.hansson@arm.com if opt == '-R': 1408856SN/A runflag = True 1418856SN/A if opt == '-t': 1428856SN/A node_type = arg 1438856SN/A if opt == '-v': 1448856SN/A verbose = True 1458856SN/A 1468856SN/Aif docpts: 1472914SN/A doruns = runflag 1483091SN/A 1498711SN/Afor arg in args: 1508711SN/A exprs.append(re.compile(arg)) 1518711SN/A 1528711SN/Aimport jobfile, pbs 1538711SN/Afrom job import JobDir, date 1548711SN/A 1558711SN/Aconf = jobfile.JobFile(jfile) 1563091SN/A 1578914Sandreas.hansson@arm.comif not listonly and not onlyecho and isdir(conf.linkdir): 1588975Sandreas.hansson@arm.com if verbose: 1598975Sandreas.hansson@arm.com print 'Checking for outdated files in Link directory' 1608914Sandreas.hansson@arm.com if not isdir(conf.basedir): 1618914Sandreas.hansson@arm.com os.mkdir(conf.basedir) 1628914Sandreas.hansson@arm.com syncdir(conf.linkdir, conf.basedir) 1638914Sandreas.hansson@arm.com 1648975Sandreas.hansson@arm.comjobnames = {} 1658914Sandreas.hansson@arm.comjoblist = [] 1668914Sandreas.hansson@arm.com 1678914Sandreas.hansson@arm.comif docpts and doruns: 1688914Sandreas.hansson@arm.com gen = conf.alljobs() 1698914Sandreas.hansson@arm.comelif docpts: 1708914Sandreas.hansson@arm.com gen = conf.checkpoints() 1718975Sandreas.hansson@arm.comelif doruns: 1728975Sandreas.hansson@arm.com gen = conf.jobs() 1738914Sandreas.hansson@arm.com 1748975Sandreas.hansson@arm.comfor job in gen: 1758914Sandreas.hansson@arm.com if job.name in jobnames: 1768914Sandreas.hansson@arm.com continue 1778914Sandreas.hansson@arm.com 1788975Sandreas.hansson@arm.com if exprs: 1794490SN/A for expr in exprs: 1808856SN/A if expr.match(job.name): 1818856SN/A joblist.append(job) 1828856SN/A break 1838856SN/A else: 1848914Sandreas.hansson@arm.com joblist.append(job) 1858914Sandreas.hansson@arm.com 1868914Sandreas.hansson@arm.comif listonly: 1878914Sandreas.hansson@arm.com if verbose: 1888914Sandreas.hansson@arm.com for job in joblist: 1898914Sandreas.hansson@arm.com job.printinfo() 1908914Sandreas.hansson@arm.com else: 1918914Sandreas.hansson@arm.com for job in joblist: 1928914Sandreas.hansson@arm.com print job.name 1938914Sandreas.hansson@arm.com sys.exit(0) 1948914Sandreas.hansson@arm.com 1958914Sandreas.hansson@arm.comif not onlyecho: 1968914Sandreas.hansson@arm.com newlist = [] 1978914Sandreas.hansson@arm.com for job in joblist: 1988914Sandreas.hansson@arm.com jobdir = JobDir(joinpath(conf.rootdir, job.name)) 1998948Sandreas.hansson@arm.com if jobdir.exists(): 2008914Sandreas.hansson@arm.com if not force: 2018948Sandreas.hansson@arm.com status = jobdir.getstatus() 2028914Sandreas.hansson@arm.com if status == 'queued': 2038914Sandreas.hansson@arm.com continue 2048914Sandreas.hansson@arm.com 2058914Sandreas.hansson@arm.com if status == 'running': 2068914Sandreas.hansson@arm.com continue 2078914Sandreas.hansson@arm.com 2088914Sandreas.hansson@arm.com if status == 'success': 2098914Sandreas.hansson@arm.com continue 2109342SAndreas.Sandberg@arm.com 2112914SN/A if not clean: 2122914SN/A sys.exit('job directory %s not clean!' % jobdir) 2138975Sandreas.hansson@arm.com 2148975Sandreas.hansson@arm.com jobdir.clean() 2158975Sandreas.hansson@arm.com newlist.append(job) 2168975Sandreas.hansson@arm.com joblist = newlist 2178975Sandreas.hansson@arm.com 2188975Sandreas.hansson@arm.comclass NameHack(object): 2198975Sandreas.hansson@arm.com def __init__(self, host='pbs.pool', port=24465): 2208975Sandreas.hansson@arm.com self.host = host 2218975Sandreas.hansson@arm.com self.port = port 2228975Sandreas.hansson@arm.com self.socket = None 2238975Sandreas.hansson@arm.com 2248975Sandreas.hansson@arm.com def setname(self, jobid, jobname): 2258975Sandreas.hansson@arm.com try: 2268975Sandreas.hansson@arm.com jobid = int(jobid) 2278975Sandreas.hansson@arm.com except ValueError: 2288975Sandreas.hansson@arm.com jobid = int(jobid.strip().split('.')[0]) 2298975Sandreas.hansson@arm.com 2308975Sandreas.hansson@arm.com jobname = jobname.strip() 2318975Sandreas.hansson@arm.com # since pbs can handle jobnames of 15 characters or less, 2328975Sandreas.hansson@arm.com # don't use the raj hack. 2338975Sandreas.hansson@arm.com if len(jobname) <= 15: 2348975Sandreas.hansson@arm.com return 2358975Sandreas.hansson@arm.com 2368975Sandreas.hansson@arm.com if self.socket is None: 2378975Sandreas.hansson@arm.com import socket 2388975Sandreas.hansson@arm.com self.socket = socket.socket() 2398975Sandreas.hansson@arm.com # Connect to pbs.pool and send the jobid/jobname pair to port 2408975Sandreas.hansson@arm.com # 24465 (Raj didn't realize that there are only 64k ports and 2418975Sandreas.hansson@arm.com # setup inetd to point to port 90001) 2428975Sandreas.hansson@arm.com self.socket.connect((self.host, self.port)) 2438975Sandreas.hansson@arm.com 2448975Sandreas.hansson@arm.com self.socket.send("%s %s\n" % (jobid, jobname)) 2458975Sandreas.hansson@arm.com 2468975Sandreas.hansson@arm.comnamehack = NameHack() 2478975Sandreas.hansson@arm.com 2488975Sandreas.hansson@arm.comfor job in joblist: 2498975Sandreas.hansson@arm.com jobdir = JobDir(joinpath(conf.rootdir, job.name)) 2508975Sandreas.hansson@arm.com if depend: 2518975Sandreas.hansson@arm.com cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name)) 2528975Sandreas.hansson@arm.com cptjob = cptdir.readval('.pbs_jobid') 2538975Sandreas.hansson@arm.com 2548975Sandreas.hansson@arm.com if not onlyecho: 2558975Sandreas.hansson@arm.com jobdir.create() 2568975Sandreas.hansson@arm.com 2578975Sandreas.hansson@arm.com print 'Job name: %s' % job.name 2588975Sandreas.hansson@arm.com print 'Job directory: %s' % jobdir 2598975Sandreas.hansson@arm.com 2608975Sandreas.hansson@arm.com qsub = pbs.qsub() 2618975Sandreas.hansson@arm.com qsub.pbshost = 'simpool.eecs.umich.edu' 2628975Sandreas.hansson@arm.com qsub.stdout = jobdir.file('jobout') 2638975Sandreas.hansson@arm.com qsub.name = job.name[:15] 2648975Sandreas.hansson@arm.com qsub.join = True 2658975Sandreas.hansson@arm.com qsub.node_type = node_type 2668975Sandreas.hansson@arm.com qsub.env['ROOTDIR'] = conf.rootdir 2678975Sandreas.hansson@arm.com qsub.env['JOBNAME'] = job.name 2688975Sandreas.hansson@arm.com if depend: 2698975Sandreas.hansson@arm.com qsub.afterok = cptjob 2708975Sandreas.hansson@arm.com if queue: 2718975Sandreas.hansson@arm.com qsub.queue = queue 2728948Sandreas.hansson@arm.com qsub.build(joinpath(progpath, 'job.py')) 273 274 if verbose: 275 print 'PBS Command: %s' % qsub.command 276 277 if not onlyecho: 278 ec = qsub.do() 279 if ec == 0: 280 jobid = qsub.result 281 print 'PBS Jobid: %s' % jobid 282 namehack.setname(jobid, job.name) 283 queued = date() 284 jobdir.echofile('.pbs_jobid', jobid) 285 jobdir.echofile('.pbs_jobname', job.name) 286 jobdir.echofile('.queued', queued) 287 jobdir.setstatus('queued on %s' % queued) 288 else: 289 print 'PBS Failed' 290