1#!/usr/bin/env python2.7 2# Copyright (c) 2006 The Regents of The University of Michigan 3# All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: redistributions of source code must retain the above copyright 8# notice, this list of conditions and the following disclaimer; 9# redistributions in binary form must reproduce the above copyright 10# notice, this list of conditions and the following disclaimer in the 11# documentation and/or other materials provided with the distribution; 12# neither the name of the copyright holders nor the names of its 13# contributors may be used to endorse or promote products derived from 14# this software without specific prior written permission. 15# 16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27# 28# Authors: Kevin Lim 29 30import os, os.path, shutil, signal, socket, sys 31from os import environ as env 32from os.path import join as joinpath, expanduser 33 34def date(): 35 import time 36 return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime()) 37 38def cleandir(dir): 39 for root, dirs, files in os.walk(dir, False): 40 for name in files: 41 os.remove(joinpath(root, name)) 42 for name in dirs: 43 os.rmdir(joinpath(root, name)) 44 45class rsync: 46 def __init__(self): 47 self.sudo = False 48 self.rsync = 'rsync' 49 self.compress = False 50 self.archive = True 51 self.delete = False 52 self.options = '' 53 54 def do(self, src, dst): 55 args = [] 56 if self.sudo: 57 args.append('sudo') 58 59 args.append(self.rsync) 60 if (self.archive): 61 args.append('-a') 62 if (self.compress): 63 args.append('-z') 64 if (self.delete): 65 args.append('--delete') 66 if len(self.options): 67 args.append(self.options) 68 args.append(src) 69 args.append(dst) 70 71 return os.spawnvp(os.P_WAIT, args[0], args) 72 73class JobDir(object): 74 def __init__(self, dir): 75 self.dir = dir 76 77 def file(self, filename): 78 return joinpath(self.dir, filename) 79 80 def create(self): 81 if os.path.exists(self.dir): 82 if not os.path.isdir(self.dir): 83 sys.exit('%s is not a directory. Cannot build job' % self.dir) 84 else: 85 os.mkdir(self.dir) 86 87 def exists(self): 88 return os.path.isdir(self.dir) 89 90 def clean(self): 91 cleandir(self.dir) 92 93 def hasfile(self, filename): 94 return os.path.isfile(self.file(filename)) 95 96 def echofile(self, filename, string): 97 filename = self.file(filename) 98 try: 99 f = file(filename, 'w') 100 print >>f, string 101 f.flush() 102 f.close() 103 except IOError,e: 104 sys.exit(e) 105 106 def rmfile(self, filename): 107 filename = self.file(filename) 108 if os.path.isfile(filename): 109 os.unlink(filename) 110 111 def readval(self, filename): 112 filename = self.file(filename) 113 f = file(filename, 'r') 114 value = f.readline().strip() 115 f.close() 116 return value 117 118 def setstatus(self, string): 119 filename = self.file('.status') 120 try: 121 f = file(filename, 'a') 122 print >>f, string 123 f.flush() 124 f.close() 125 except IOError,e: 126 sys.exit(e) 127 128 def getstatus(self): 129 filename = self.file('.status') 130 try: 131 f = file(filename, 'r') 132 except IOError, e: 133 return 'none' 134 135 # fast forward to the end 136 for line in f: pass 137 138 # the first word on the last line is the status 139 return line.split(' ')[0] 140 141 def __str__(self): 142 return self.dir 143 144if __name__ == '__main__': 145 import platform 146 binaries = { 'i686' : 'm5.i386', 147 'x86_64' : 'm5.amd64' } 148 binary = binaries[platform.machine()] 149 150 cwd = os.getcwd() 151 rootdir = env.setdefault('ROOTDIR', os.path.dirname(cwd)) 152 oar_jobid = int(env['OAR_JOBID']) 153 oar_jobname = os.path.basename(cwd) 154 #pbs_jobname = env['PBS_JOBNAME'] 155 basedir = joinpath(rootdir, 'Base') 156 jobname = env.setdefault('JOBNAME', oar_jobname) 157 jobfile = env.setdefault('JOBFILE', joinpath(rootdir, 'Test.py')) 158 outdir = env.setdefault('OUTPUT_DIR', cwd) 159 env['POOLJOB'] = 'True' 160 161 if os.path.isdir("/work"): 162 workbase = "/work" 163 else: 164 workbase = "/tmp/" 165 166 workdir = joinpath(workbase, '%s.%s' % (env['USER'], oar_jobid)) 167 host = socket.gethostname() 168 169 os.umask(0022) 170 171 jobdir = JobDir(outdir) 172 173 started = date() 174 jobdir.echofile('.running', started) 175 jobdir.rmfile('.queued') 176 jobdir.echofile('.host', host) 177 178 jobdir.setstatus('running on %s on %s' % (host, started)) 179 180 if os.path.isdir(workdir): 181 cleandir(workdir) 182 else: 183 os.mkdir(workdir) 184 185 if False and os.path.isdir('/z/dist'): 186 sync = rsync() 187 sync.delete = True 188 sync.sudo = True 189 sync.do('poolfs::dist/m5/', '/z/dist/m5/') 190 191 try: 192 os.chdir(workdir) 193 except OSError,e: 194 sys.exit(e) 195 196 os.symlink(jobdir.file('output'), 'status.out') 197 198 args = [ joinpath(basedir, binary), joinpath(basedir, 'run.py') ] 199 if not len(args): 200 sys.exit("no arguments") 201 202 print 'starting job... %s' % started 203 print ' '.join(args) 204 print 205 sys.stdout.flush() 206 207 childpid = os.fork() 208 if not childpid: 209 # Execute command 210 sys.stdin.close() 211 fd = os.open(jobdir.file("output"), 212 os.O_WRONLY | os.O_CREAT | os.O_TRUNC) 213 os.dup2(fd, sys.stdout.fileno()) 214 os.dup2(fd, sys.stderr.fileno()) 215 os.execvp(args[0], args) 216 217 def handler(signum, frame): 218 if childpid != 0: 219 os.kill(childpid, signum) 220 221 signal.signal(signal.SIGHUP, handler) 222 signal.signal(signal.SIGINT, handler) 223 signal.signal(signal.SIGQUIT, handler) 224 signal.signal(signal.SIGTERM, handler) 225 signal.signal(signal.SIGCONT, handler) 226 signal.signal(signal.SIGUSR1, handler) 227 signal.signal(signal.SIGUSR2, handler) 228 229 done = 0 230 while not done: 231 try: 232 thepid,ec = os.waitpid(childpid, 0) 233 if ec: 234 print 'Exit code ', ec 235 status = 'failure' 236 else: 237 status = 'success' 238 done = 1 239 except OSError: 240 pass 241 242 complete = date() 243 print '\njob complete... %s' % complete 244 jobdir.echofile('.%s' % status, complete) 245 jobdir.rmfile('.running') 246 jobdir.setstatus('%s on %s' % (status, complete)) 247