mkdoc.py revision 12391:ceeca8b41e4b
1#!/usr/bin/env python3
2#
3#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
4#
5#  Extract documentation from C++ header files to use it in Python bindings
6#
7
8import os
9import sys
10import platform
11import re
12import textwrap
13
14from clang import cindex
15from clang.cindex import CursorKind
16from collections import OrderedDict
17from threading import Thread, Semaphore
18from multiprocessing import cpu_count
19
20RECURSE_LIST = [
21    CursorKind.TRANSLATION_UNIT,
22    CursorKind.NAMESPACE,
23    CursorKind.CLASS_DECL,
24    CursorKind.STRUCT_DECL,
25    CursorKind.ENUM_DECL,
26    CursorKind.CLASS_TEMPLATE
27]
28
29PRINT_LIST = [
30    CursorKind.CLASS_DECL,
31    CursorKind.STRUCT_DECL,
32    CursorKind.ENUM_DECL,
33    CursorKind.ENUM_CONSTANT_DECL,
34    CursorKind.CLASS_TEMPLATE,
35    CursorKind.FUNCTION_DECL,
36    CursorKind.FUNCTION_TEMPLATE,
37    CursorKind.CONVERSION_FUNCTION,
38    CursorKind.CXX_METHOD,
39    CursorKind.CONSTRUCTOR,
40    CursorKind.FIELD_DECL
41]
42
43CPP_OPERATORS = {
44    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
45    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
46    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
47    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
48    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
49    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
50    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
51}
52
53CPP_OPERATORS = OrderedDict(
54    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
55
56job_count = cpu_count()
57job_semaphore = Semaphore(job_count)
58
59output = []
60
61def d(s):
62    return s.decode('utf8')
63
64
65def sanitize_name(name):
66    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
67    for k, v in CPP_OPERATORS.items():
68        name = name.replace('operator%s' % k, 'operator_%s' % v)
69    name = re.sub('<.*>', '', name)
70    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
71    name = re.sub('_$', '', re.sub('_+', '_', name))
72    return '__doc_' + name
73
74
75def process_comment(comment):
76    result = ''
77
78    # Remove C++ comment syntax
79    leading_spaces = float('inf')
80    for s in comment.expandtabs(tabsize=4).splitlines():
81        s = s.strip()
82        if s.startswith('/*'):
83            s = s[2:].lstrip('*')
84        elif s.endswith('*/'):
85            s = s[:-2].rstrip('*')
86        elif s.startswith('///'):
87            s = s[3:]
88        if s.startswith('*'):
89            s = s[1:]
90        if len(s) > 0:
91            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
92        result += s + '\n'
93
94    if leading_spaces != float('inf'):
95        result2 = ""
96        for s in result.splitlines():
97            result2 += s[leading_spaces:] + '\n'
98        result = result2
99
100    # Doxygen tags
101    cpp_group = '([\w:]+)'
102    param_group = '([\[\w:\]]+)'
103
104    s = result
105    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
106    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
107    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
108    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
109    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
110    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
111    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
112               r'\n\n$Parameter ``\2``:\n\n', s)
113    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
114               r'\n\n$Template parameter ``\2``:\n\n', s)
115
116    for in_, out_ in {
117        'return': 'Returns',
118        'author': 'Author',
119        'authors': 'Authors',
120        'copyright': 'Copyright',
121        'date': 'Date',
122        'remark': 'Remark',
123        'sa': 'See also',
124        'see': 'See also',
125        'extends': 'Extends',
126        'throw': 'Throws',
127        'throws': 'Throws'
128    }.items():
129        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
130
131    s = re.sub(r'\\details\s*', r'\n\n', s)
132    s = re.sub(r'\\brief\s*', r'', s)
133    s = re.sub(r'\\short\s*', r'', s)
134    s = re.sub(r'\\ref\s*', r'', s)
135
136    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
137               r"```\n\1\n```\n", s, flags=re.DOTALL)
138
139    # HTML/TeX tags
140    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
141    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
142    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
143    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
144    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
145    s = re.sub(r'<li>', r'\n\n* ', s)
146    s = re.sub(r'</?ul>', r'', s)
147    s = re.sub(r'</li>', r'\n\n', s)
148
149    s = s.replace('``true``', '``True``')
150    s = s.replace('``false``', '``False``')
151
152    # Re-flow text
153    wrapper = textwrap.TextWrapper()
154    wrapper.expand_tabs = True
155    wrapper.replace_whitespace = True
156    wrapper.drop_whitespace = True
157    wrapper.width = 70
158    wrapper.initial_indent = wrapper.subsequent_indent = ''
159
160    result = ''
161    in_code_segment = False
162    for x in re.split(r'(```)', s):
163        if x == '```':
164            if not in_code_segment:
165                result += '```\n'
166            else:
167                result += '\n```\n\n'
168            in_code_segment = not in_code_segment
169        elif in_code_segment:
170            result += x.strip()
171        else:
172            for y in re.split(r'(?: *\n *){2,}', x):
173                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
174                if len(wrapped) > 0 and wrapped[0] == '$':
175                    result += wrapped[1:] + '\n'
176                    wrapper.initial_indent = \
177                        wrapper.subsequent_indent = ' ' * 4
178                else:
179                    if len(wrapped) > 0:
180                        result += wrapped + '\n\n'
181                    wrapper.initial_indent = wrapper.subsequent_indent = ''
182    return result.rstrip().lstrip('\n')
183
184
185def extract(filename, node, prefix):
186    if not (node.location.file is None or
187            os.path.samefile(d(node.location.file.name), filename)):
188        return 0
189    if node.kind in RECURSE_LIST:
190        sub_prefix = prefix
191        if node.kind != CursorKind.TRANSLATION_UNIT:
192            if len(sub_prefix) > 0:
193                sub_prefix += '_'
194            sub_prefix += d(node.spelling)
195        for i in node.get_children():
196            extract(filename, i, sub_prefix)
197    if node.kind in PRINT_LIST:
198        comment = d(node.raw_comment) if node.raw_comment is not None else ''
199        comment = process_comment(comment)
200        sub_prefix = prefix
201        if len(sub_prefix) > 0:
202            sub_prefix += '_'
203        if len(node.spelling) > 0:
204            name = sanitize_name(sub_prefix + d(node.spelling))
205            global output
206            output.append((name, filename, comment))
207
208
209class ExtractionThread(Thread):
210    def __init__(self, filename, parameters):
211        Thread.__init__(self)
212        self.filename = filename
213        self.parameters = parameters
214        job_semaphore.acquire()
215
216    def run(self):
217        print('Processing "%s" ..' % self.filename, file=sys.stderr)
218        try:
219            index = cindex.Index(
220                cindex.conf.lib.clang_createIndex(False, True))
221            tu = index.parse(self.filename, self.parameters)
222            extract(self.filename, tu.cursor, '')
223        finally:
224            job_semaphore.release()
225
226if __name__ == '__main__':
227    parameters = ['-x', 'c++', '-std=c++11']
228    filenames = []
229
230    if platform.system() == 'Darwin':
231        dev_path = '/Applications/Xcode.app/Contents/Developer/'
232        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
233        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
234        libclang = lib_dir + 'libclang.dylib'
235
236        if os.path.exists(libclang):
237            cindex.Config.set_library_path(os.path.dirname(libclang))
238
239        if os.path.exists(sdk_dir):
240            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
241            parameters.append('-isysroot')
242            parameters.append(sysroot_dir)
243
244    for item in sys.argv[1:]:
245        if item.startswith('-'):
246            parameters.append(item)
247        else:
248            filenames.append(item)
249
250    if len(filenames) == 0:
251        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
252        exit(-1)
253
254    print('''/*
255  This file contains docstrings for the Python bindings.
256  Do not edit! These were automatically extracted by mkdoc.py
257 */
258
259#define __EXPAND(x)                                      x
260#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
261#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
262#define __CAT1(a, b)                                     a ## b
263#define __CAT2(a, b)                                     __CAT1(a, b)
264#define __DOC1(n1)                                       __doc_##n1
265#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
266#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
267#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
268#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
269#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
270#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
271#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
272
273#if defined(__GNUG__)
274#pragma GCC diagnostic push
275#pragma GCC diagnostic ignored "-Wunused-variable"
276#endif
277''')
278
279    output.clear()
280    for filename in filenames:
281        thr = ExtractionThread(filename, parameters)
282        thr.start()
283
284    print('Waiting for jobs to finish ..', file=sys.stderr)
285    for i in range(job_count):
286        job_semaphore.acquire()
287
288    name_ctr = 1
289    name_prev = None
290    for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
291        if name == name_prev:
292            name_ctr += 1
293            name = name + "_%i" % name_ctr
294        else:
295            name_prev = name
296            name_ctr = 1
297        print('\nstatic const char *%s =%sR"doc(%s)doc";' %
298              (name, '\n' if '\n' in comment else ' ', comment))
299
300    print('''
301#if defined(__GNUG__)
302#pragma GCC diagnostic pop
303#endif
304''')
305