job.py revision 1816:ecb6cb1337e8
1#!/usr/bin/env python
2# Copyright (c) 2005 The Regents of The University of Michigan
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;
9# redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution;
12# neither the name of the copyright holders nor the names of its
13# contributors may be used to endorse or promote products derived from
14# this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27#
28# Authors: Nathan Binkert
29#          Steve Reinhardt
30#          Ali Saidi
31
32import os, os.path, shutil, signal, socket, sys, time
33from os import environ as env
34from os.path import join as joinpath, expanduser
35
36class rsync:
37    def __init__(self):
38        self.sudo = False
39        self.rsync = 'rsync'
40        self.compress = False
41        self.archive = True
42        self.delete = False
43        self.options = ''
44
45    def do(self, src, dst):
46        args = []
47        if self.sudo:
48            args.append('sudo')
49
50        args.append(self.rsync)
51        if (self.archive):
52            args.append('-a')
53        if (self.compress):
54            args.append('-z')
55        if (self.delete):
56            args.append('--delete')
57        if len(self.options):
58            args.append(self.options)
59        args.append(src)
60        args.append(dst)
61
62        return os.spawnvp(os.P_WAIT, args[0], args)
63
64def cleandir(dir):
65    for root, dirs, files in os.walk(dir, False):
66        for name in files:
67            os.remove(joinpath(root, name))
68        for name in dirs:
69            os.rmdir(joinpath(root, name))
70
71def date():
72    return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime())
73
74def remfile(file):
75    if os.path.isfile(file):
76        os.unlink(file)
77
78def readval(filename):
79    file = open(filename, 'r')
80    value = file.readline().strip()
81    file.close()
82    return value
83
84if __name__ == '__main__':
85    rootdir = env.setdefault('ROOTDIR', os.getcwd())
86    pbs_jobid = env['PBS_JOBID']
87    pbs_jobname = env['PBS_JOBNAME']
88    basedir = joinpath(rootdir, 'Base')
89    jobname = env.setdefault('JOBNAME', pbs_jobname)
90    jobfile = env.setdefault('JOBFILE', joinpath(basedir, 'test.py'))
91    outdir = env.setdefault('OUTPUT_DIR', joinpath(rootdir, jobname))
92    env['POOLJOB'] = 'True'
93
94    if os.path.isdir("/work"):
95        workbase = "/work"
96    else:
97        workbase = "/tmp/"
98
99    workdir = joinpath(workbase, '%s.%s' % (env['USER'], pbs_jobid))
100
101    def echofile(filename, string):
102        try:
103            f = file(joinpath(outdir, filename), 'w')
104            print >>f, string
105            f.flush()
106            f.close()
107        except IOError,e:
108            sys.exit(e)
109
110    os.umask(0022)
111
112    echofile('.start', date())
113    echofile('.pbs_jobid', pbs_jobid)
114    echofile('.pbs_jobname', pbs_jobid)
115    echofile('.host', socket.gethostname())
116
117    if os.path.isdir(workdir):
118        cleandir(workdir)
119    else:
120        os.mkdir(workdir)
121
122    if os.path.isdir('/z/dist'):
123        sync = rsync()
124        sync.delete = True
125        sync.sudo = True
126        sync.do('poolfs::dist/m5/', '/z/dist/m5/')
127
128    try:
129        os.chdir(workdir)
130    except OSError,e:
131        sys.exit(e)
132
133    os.symlink(joinpath(outdir, 'output'), 'status.out')
134
135    args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.py') ]
136    if not len(args):
137        sys.exit("no arguments")
138
139    print 'starting job... %s' % date()
140    print ' '.join(args)
141    print
142    sys.stdout.flush()
143
144    childpid = os.fork()
145    if not childpid:
146        # Execute command
147        sys.stdin.close()
148        fd = os.open(joinpath(outdir, "output"),
149                     os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
150        os.dup2(fd, sys.stdout.fileno())
151        os.dup2(fd, sys.stderr.fileno())
152        os.execvp(args[0], args)
153
154    def handler(signum, frame):
155        if childpid != 0:
156            os.kill(childpid, signum)
157
158    signal.signal(signal.SIGHUP, handler)
159    signal.signal(signal.SIGINT, handler)
160    signal.signal(signal.SIGQUIT, handler)
161    signal.signal(signal.SIGTERM, handler)
162    signal.signal(signal.SIGSTOP, handler)
163    signal.signal(signal.SIGCONT, handler)
164    signal.signal(signal.SIGUSR1, handler)
165    signal.signal(signal.SIGUSR2, handler)
166
167    done = 0
168    while not done:
169        try:
170            thepid,ec = os.waitpid(childpid, 0)
171            if ec:
172                print 'Exit code ', ec
173                echofile('.failure', date())
174            else:
175                echofile('.success', date())
176            done = 1
177        except OSError:
178            pass
179
180    print '\njob complete... %s' % date()
181    echofile('.stop', date())
182