mkdoc.py revision 11986:c12e4625ab56
1#!/usr/bin/env python3
2#
3#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
4#
5#  Extract documentation from C++ header files to use it in Python bindings
6#
7
8import os
9import sys
10import platform
11import re
12import textwrap
13
14from clang import cindex
15from clang.cindex import CursorKind
16from collections import OrderedDict
17from threading import Thread, Semaphore
18from multiprocessing import cpu_count
19
20RECURSE_LIST = [
21    CursorKind.TRANSLATION_UNIT,
22    CursorKind.NAMESPACE,
23    CursorKind.CLASS_DECL,
24    CursorKind.STRUCT_DECL,
25    CursorKind.ENUM_DECL,
26    CursorKind.CLASS_TEMPLATE
27]
28
29PRINT_LIST = [
30    CursorKind.CLASS_DECL,
31    CursorKind.STRUCT_DECL,
32    CursorKind.ENUM_DECL,
33    CursorKind.ENUM_CONSTANT_DECL,
34    CursorKind.CLASS_TEMPLATE,
35    CursorKind.FUNCTION_DECL,
36    CursorKind.FUNCTION_TEMPLATE,
37    CursorKind.CONVERSION_FUNCTION,
38    CursorKind.CXX_METHOD,
39    CursorKind.CONSTRUCTOR,
40    CursorKind.FIELD_DECL
41]
42
43CPP_OPERATORS = {
44    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
45    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
46    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
47    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
48    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
49    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
50    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
51}
52
53CPP_OPERATORS = OrderedDict(
54    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
55
56job_count = cpu_count()
57job_semaphore = Semaphore(job_count)
58
59registered_names = dict()
60
61
62def d(s):
63    return s.decode('utf8')
64
65
66def sanitize_name(name):
67    global registered_names
68    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
69    for k, v in CPP_OPERATORS.items():
70        name = name.replace('operator%s' % k, 'operator_%s' % v)
71    name = re.sub('<.*>', '', name)
72    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
73    name = re.sub('_$', '', re.sub('_+', '_', name))
74    if name in registered_names:
75        registered_names[name] += 1
76        name += '_' + str(registered_names[name])
77    else:
78        registered_names[name] = 1
79    return '__doc_' + name
80
81
82def process_comment(comment):
83    result = ''
84
85    # Remove C++ comment syntax
86    leading_spaces = float('inf')
87    for s in comment.expandtabs(tabsize=4).splitlines():
88        s = s.strip()
89        if s.startswith('/*'):
90            s = s[2:].lstrip('*')
91        elif s.endswith('*/'):
92            s = s[:-2].rstrip('*')
93        elif s.startswith('///'):
94            s = s[3:]
95        if s.startswith('*'):
96            s = s[1:]
97        if len(s) > 0:
98            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
99        result += s + '\n'
100
101    if leading_spaces != float('inf'):
102        result2 = ""
103        for s in result.splitlines():
104            result2 += s[leading_spaces:] + '\n'
105        result = result2
106
107    # Doxygen tags
108    cpp_group = '([\w:]+)'
109    param_group = '([\[\w:\]]+)'
110
111    s = result
112    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
113    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
114    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
115    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
116    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
117    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
118    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
119               r'\n\n$Parameter ``\2``:\n\n', s)
120    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
121               r'\n\n$Template parameter ``\2``:\n\n', s)
122
123    for in_, out_ in {
124        'return': 'Returns',
125        'author': 'Author',
126        'authors': 'Authors',
127        'copyright': 'Copyright',
128        'date': 'Date',
129        'remark': 'Remark',
130        'sa': 'See also',
131        'see': 'See also',
132        'extends': 'Extends',
133        'throw': 'Throws',
134        'throws': 'Throws'
135    }.items():
136        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
137
138    s = re.sub(r'\\details\s*', r'\n\n', s)
139    s = re.sub(r'\\brief\s*', r'', s)
140    s = re.sub(r'\\short\s*', r'', s)
141    s = re.sub(r'\\ref\s*', r'', s)
142
143    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
144               r"```\n\1\n```\n", s, flags=re.DOTALL)
145
146    # HTML/TeX tags
147    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
148    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
149    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
150    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
151    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
152    s = re.sub(r'<li>', r'\n\n* ', s)
153    s = re.sub(r'</?ul>', r'', s)
154    s = re.sub(r'</li>', r'\n\n', s)
155
156    s = s.replace('``true``', '``True``')
157    s = s.replace('``false``', '``False``')
158
159    # Re-flow text
160    wrapper = textwrap.TextWrapper()
161    wrapper.expand_tabs = True
162    wrapper.replace_whitespace = True
163    wrapper.drop_whitespace = True
164    wrapper.width = 70
165    wrapper.initial_indent = wrapper.subsequent_indent = ''
166
167    result = ''
168    in_code_segment = False
169    for x in re.split(r'(```)', s):
170        if x == '```':
171            if not in_code_segment:
172                result += '```\n'
173            else:
174                result += '\n```\n\n'
175            in_code_segment = not in_code_segment
176        elif in_code_segment:
177            result += x.strip()
178        else:
179            for y in re.split(r'(?: *\n *){2,}', x):
180                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
181                if len(wrapped) > 0 and wrapped[0] == '$':
182                    result += wrapped[1:] + '\n'
183                    wrapper.initial_indent = \
184                        wrapper.subsequent_indent = ' ' * 4
185                else:
186                    if len(wrapped) > 0:
187                        result += wrapped + '\n\n'
188                    wrapper.initial_indent = wrapper.subsequent_indent = ''
189    return result.rstrip().lstrip('\n')
190
191
192def extract(filename, node, prefix, output):
193    num_extracted = 0
194    if not (node.location.file is None or
195            os.path.samefile(d(node.location.file.name), filename)):
196        return 0
197    if node.kind in RECURSE_LIST:
198        sub_prefix = prefix
199        if node.kind != CursorKind.TRANSLATION_UNIT:
200            if len(sub_prefix) > 0:
201                sub_prefix += '_'
202            sub_prefix += d(node.spelling)
203        for i in node.get_children():
204            num_extracted += extract(filename, i, sub_prefix, output)
205        if num_extracted == 0:
206            return 0
207    if node.kind in PRINT_LIST:
208        comment = d(node.raw_comment) if node.raw_comment is not None else ''
209        comment = process_comment(comment)
210        sub_prefix = prefix
211        if len(sub_prefix) > 0:
212            sub_prefix += '_'
213        if len(node.spelling) > 0:
214            name = sanitize_name(sub_prefix + d(node.spelling))
215            output.append('\nstatic const char *%s =%sR"doc(%s)doc";' %
216                (name, '\n' if '\n' in comment else ' ', comment))
217            num_extracted += 1
218    return num_extracted
219
220
221class ExtractionThread(Thread):
222    def __init__(self, filename, parameters, output):
223        Thread.__init__(self)
224        self.filename = filename
225        self.parameters = parameters
226        self.output = output
227        job_semaphore.acquire()
228
229    def run(self):
230        print('Processing "%s" ..' % self.filename, file=sys.stderr)
231        try:
232            index = cindex.Index(
233                cindex.conf.lib.clang_createIndex(False, True))
234            tu = index.parse(self.filename, self.parameters)
235            extract(self.filename, tu.cursor, '', self.output)
236        finally:
237            job_semaphore.release()
238
239if __name__ == '__main__':
240    parameters = ['-x', 'c++', '-std=c++11']
241    filenames = []
242
243    if platform.system() == 'Darwin':
244        dev_path = '/Applications/Xcode.app/Contents/Developer/'
245        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
246        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
247        libclang = lib_dir + 'libclang.dylib'
248
249        if os.path.exists(libclang):
250            cindex.Config.set_library_path(os.path.dirname(libclang))
251
252        if os.path.exists(sdk_dir):
253            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
254            parameters.append('-isysroot')
255            parameters.append(sysroot_dir)
256
257    for item in sys.argv[1:]:
258        if item.startswith('-'):
259            parameters.append(item)
260        else:
261            filenames.append(item)
262
263    if len(filenames) == 0:
264        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
265        exit(-1)
266
267    print('''/*
268  This file contains docstrings for the Python bindings.
269  Do not edit! These were automatically extracted by mkdoc.py
270 */
271
272#define __EXPAND(x)                                      x
273#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
274#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
275#define __CAT1(a, b)                                     a ## b
276#define __CAT2(a, b)                                     __CAT1(a, b)
277#define __DOC1(n1)                                       __doc_##n1
278#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
279#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
280#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
281#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
282#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
283#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
284#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
285
286#if defined(__GNUG__)
287#pragma GCC diagnostic push
288#pragma GCC diagnostic ignored "-Wunused-variable"
289#endif
290''')
291
292    output = []
293    for filename in filenames:
294        thr = ExtractionThread(filename, parameters, output)
295        thr.start()
296
297    print('Waiting for jobs to finish ..', file=sys.stderr)
298    for i in range(job_count):
299        job_semaphore.acquire()
300
301    output.sort()
302    for l in output:
303        print(l)
304
305    print('''
306#if defined(__GNUG__)
307#pragma GCC diagnostic pop
308#endif
309''')
310