job.py revision 1392:aabce7923b3b
1#!/usr/bin/env python
2# Copyright (c) 2005 The Regents of The University of Michigan
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;
9# redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution;
12# neither the name of the copyright holders nor the names of its
13# contributors may be used to endorse or promote products derived from
14# this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27#
28# Authors: Nathan Binkert
29#          Steve Reinhardt
30#          Ali Saidi
31
32import os, os.path, shutil, signal, socket, sys, time
33from os import environ as env
34from os.path import join as joinpath, expanduser
35
36class rsync:
37    def __init__(self):
38        self.sudo = False
39        self.rsync = 'rsync'
40        self.compress = False
41        self.archive = True
42        self.delete = False
43        self.options = ''
44
45    def do(self, src, dst):
46        args = []
47        if self.sudo:
48            args.append('sudo')
49
50        args.append(self.rsync)
51        if (self.archive):
52            args.append('-a')
53        if (self.compress):
54            args.append('-z')
55        if (self.delete):
56            args.append('--delete')
57        if len(self.options):
58            args.append(self.options)
59        args.append(src)
60        args.append(dst)
61
62        return os.spawnvp(os.P_WAIT, args[0], args)
63
64def cleandir(dir):
65    for root, dirs, files in os.walk(dir, False):
66        for name in files:
67            os.remove(joinpath(root, name))
68        for name in dirs:
69            os.rmdir(joinpath(root, name))
70
71def date():
72    return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime())
73
74def remfile(file):
75    if os.path.isfile(file):
76        os.unlink(file)
77
78def readval(filename):
79    file = open(filename, 'r')
80    value = file.readline().strip()
81    file.close()
82    return value
83
84if __name__ == '__main__':
85    rootdir = env.setdefault('ROOTDIR', os.getcwd())
86    jobid = env['PBS_JOBID']
87    jobname = env['PBS_JOBNAME']
88    jobdir = joinpath(rootdir, jobname)
89    basedir = joinpath(rootdir, 'Base')
90    user = env['USER']
91
92    env['POOLJOB'] = 'True'
93    env['OUTPUT_DIR'] = jobdir
94    env['JOBFILE'] = joinpath(basedir, 'test.py')
95    env['JOBNAME'] = jobname
96
97    def echofile(filename, string):
98        try:
99            f = file(joinpath(jobdir, filename), 'w')
100            print >>f, string
101            f.flush()
102            f.close()
103        except IOError,e:
104            sys.exit(e)
105
106    if os.path.isdir("/work"):
107        workbase = "/work"
108    else:
109        workbase = "/tmp/"
110
111    workdir = joinpath(workbase, '%s.%s' % (user, jobid))
112
113    os.umask(0022)
114
115    echofile('.start', date())
116    echofile('.jobid', jobid)
117    echofile('.host', socket.gethostname())
118
119    if os.path.isdir(workdir):
120        cleandir(workdir)
121    else:
122        os.mkdir(workdir)
123
124    if os.path.isdir('/z/dist'):
125        sync = rsync()
126        sync.delete = True
127        sync.sudo = True
128        sync.do('poolfs::dist/m5/', '/z/dist/m5/')
129
130    try:
131        os.chdir(workdir)
132    except OSError,e:
133        sys.exit(e)
134
135    os.symlink(joinpath(jobdir, 'output'), 'status.out')
136
137    args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.mpy') ]
138    if not len(args):
139        sys.exit("no arguments")
140
141    print 'starting job... %s' % date()
142    print ' '.join(args)
143    print
144    sys.stdout.flush()
145
146    childpid = os.fork()
147    if not childpid:
148        # Execute command
149        sys.stdin.close()
150        fd = os.open(joinpath(jobdir, "output"),
151                     os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
152        os.dup2(fd, sys.stdout.fileno())
153        os.dup2(fd, sys.stderr.fileno())
154        os.execvp(args[0], args)
155
156    def handler(signum, frame):
157        if childpid != 0:
158            os.kill(childpid, signum)
159
160    signal.signal(signal.SIGHUP, handler)
161    signal.signal(signal.SIGINT, handler)
162    signal.signal(signal.SIGQUIT, handler)
163    signal.signal(signal.SIGTERM, handler)
164    signal.signal(signal.SIGSTOP, handler)
165    signal.signal(signal.SIGCONT, handler)
166    signal.signal(signal.SIGUSR1, handler)
167    signal.signal(signal.SIGUSR2, handler)
168
169    done = 0
170    while not done:
171        try:
172            thepid,ec = os.waitpid(childpid, 0)
173            if ec:
174                print 'Exit code ', ec
175                echofile('.failure', date())
176            else:
177                echofile('.success', date())
178            done = 1
179        except OSError:
180            pass
181
182    print '\njob complete... %s' % date()
183    echofile('.stop', date())
184