mkdoc.py revision 12391:ceeca8b41e4b
112855Sgabeblack@google.com#!/usr/bin/env python3
212855Sgabeblack@google.com#
312855Sgabeblack@google.com#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
412855Sgabeblack@google.com#
512855Sgabeblack@google.com#  Extract documentation from C++ header files to use it in Python bindings
612855Sgabeblack@google.com#
712855Sgabeblack@google.com
812855Sgabeblack@google.comimport os
912855Sgabeblack@google.comimport sys
1012855Sgabeblack@google.comimport platform
1112855Sgabeblack@google.comimport re
1212855Sgabeblack@google.comimport textwrap
1312855Sgabeblack@google.com
1412855Sgabeblack@google.comfrom clang import cindex
1512855Sgabeblack@google.comfrom clang.cindex import CursorKind
1612855Sgabeblack@google.comfrom collections import OrderedDict
1712855Sgabeblack@google.comfrom threading import Thread, Semaphore
1812855Sgabeblack@google.comfrom multiprocessing import cpu_count
1912855Sgabeblack@google.com
2012855Sgabeblack@google.comRECURSE_LIST = [
2112855Sgabeblack@google.com    CursorKind.TRANSLATION_UNIT,
2212855Sgabeblack@google.com    CursorKind.NAMESPACE,
2312855Sgabeblack@google.com    CursorKind.CLASS_DECL,
2412855Sgabeblack@google.com    CursorKind.STRUCT_DECL,
2512855Sgabeblack@google.com    CursorKind.ENUM_DECL,
2612855Sgabeblack@google.com    CursorKind.CLASS_TEMPLATE
2712855Sgabeblack@google.com]
2812855Sgabeblack@google.com
2912855Sgabeblack@google.comPRINT_LIST = [
3012855Sgabeblack@google.com    CursorKind.CLASS_DECL,
3112855Sgabeblack@google.com    CursorKind.STRUCT_DECL,
3212855Sgabeblack@google.com    CursorKind.ENUM_DECL,
3312855Sgabeblack@google.com    CursorKind.ENUM_CONSTANT_DECL,
3412855Sgabeblack@google.com    CursorKind.CLASS_TEMPLATE,
3512855Sgabeblack@google.com    CursorKind.FUNCTION_DECL,
3612855Sgabeblack@google.com    CursorKind.FUNCTION_TEMPLATE,
3712855Sgabeblack@google.com    CursorKind.CONVERSION_FUNCTION,
3812855Sgabeblack@google.com    CursorKind.CXX_METHOD,
3912855Sgabeblack@google.com    CursorKind.CONSTRUCTOR,
4012855Sgabeblack@google.com    CursorKind.FIELD_DECL
4112855Sgabeblack@google.com]
4212855Sgabeblack@google.com
4312855Sgabeblack@google.comCPP_OPERATORS = {
4412855Sgabeblack@google.com    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
4512855Sgabeblack@google.com    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
4612855Sgabeblack@google.com    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
4712855Sgabeblack@google.com    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
4812855Sgabeblack@google.com    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
4912855Sgabeblack@google.com    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
5012855Sgabeblack@google.com    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
5112855Sgabeblack@google.com}
5212855Sgabeblack@google.com
5312855Sgabeblack@google.comCPP_OPERATORS = OrderedDict(
5412855Sgabeblack@google.com    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
5512855Sgabeblack@google.com
5612855Sgabeblack@google.comjob_count = cpu_count()
5712855Sgabeblack@google.comjob_semaphore = Semaphore(job_count)
5812855Sgabeblack@google.com
5912855Sgabeblack@google.comoutput = []
6012855Sgabeblack@google.com
6112855Sgabeblack@google.comdef d(s):
6212855Sgabeblack@google.com    return s.decode('utf8')
6312855Sgabeblack@google.com
6412855Sgabeblack@google.com
6512855Sgabeblack@google.comdef sanitize_name(name):
6612855Sgabeblack@google.com    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
6712855Sgabeblack@google.com    for k, v in CPP_OPERATORS.items():
6812855Sgabeblack@google.com        name = name.replace('operator%s' % k, 'operator_%s' % v)
6912855Sgabeblack@google.com    name = re.sub('<.*>', '', name)
70    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
71    name = re.sub('_$', '', re.sub('_+', '_', name))
72    return '__doc_' + name
73
74
75def process_comment(comment):
76    result = ''
77
78    # Remove C++ comment syntax
79    leading_spaces = float('inf')
80    for s in comment.expandtabs(tabsize=4).splitlines():
81        s = s.strip()
82        if s.startswith('/*'):
83            s = s[2:].lstrip('*')
84        elif s.endswith('*/'):
85            s = s[:-2].rstrip('*')
86        elif s.startswith('///'):
87            s = s[3:]
88        if s.startswith('*'):
89            s = s[1:]
90        if len(s) > 0:
91            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
92        result += s + '\n'
93
94    if leading_spaces != float('inf'):
95        result2 = ""
96        for s in result.splitlines():
97            result2 += s[leading_spaces:] + '\n'
98        result = result2
99
100    # Doxygen tags
101    cpp_group = '([\w:]+)'
102    param_group = '([\[\w:\]]+)'
103
104    s = result
105    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
106    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
107    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
108    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
109    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
110    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
111    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
112               r'\n\n$Parameter ``\2``:\n\n', s)
113    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
114               r'\n\n$Template parameter ``\2``:\n\n', s)
115
116    for in_, out_ in {
117        'return': 'Returns',
118        'author': 'Author',
119        'authors': 'Authors',
120        'copyright': 'Copyright',
121        'date': 'Date',
122        'remark': 'Remark',
123        'sa': 'See also',
124        'see': 'See also',
125        'extends': 'Extends',
126        'throw': 'Throws',
127        'throws': 'Throws'
128    }.items():
129        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
130
131    s = re.sub(r'\\details\s*', r'\n\n', s)
132    s = re.sub(r'\\brief\s*', r'', s)
133    s = re.sub(r'\\short\s*', r'', s)
134    s = re.sub(r'\\ref\s*', r'', s)
135
136    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
137               r"```\n\1\n```\n", s, flags=re.DOTALL)
138
139    # HTML/TeX tags
140    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
141    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
142    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
143    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
144    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
145    s = re.sub(r'<li>', r'\n\n* ', s)
146    s = re.sub(r'</?ul>', r'', s)
147    s = re.sub(r'</li>', r'\n\n', s)
148
149    s = s.replace('``true``', '``True``')
150    s = s.replace('``false``', '``False``')
151
152    # Re-flow text
153    wrapper = textwrap.TextWrapper()
154    wrapper.expand_tabs = True
155    wrapper.replace_whitespace = True
156    wrapper.drop_whitespace = True
157    wrapper.width = 70
158    wrapper.initial_indent = wrapper.subsequent_indent = ''
159
160    result = ''
161    in_code_segment = False
162    for x in re.split(r'(```)', s):
163        if x == '```':
164            if not in_code_segment:
165                result += '```\n'
166            else:
167                result += '\n```\n\n'
168            in_code_segment = not in_code_segment
169        elif in_code_segment:
170            result += x.strip()
171        else:
172            for y in re.split(r'(?: *\n *){2,}', x):
173                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
174                if len(wrapped) > 0 and wrapped[0] == '$':
175                    result += wrapped[1:] + '\n'
176                    wrapper.initial_indent = \
177                        wrapper.subsequent_indent = ' ' * 4
178                else:
179                    if len(wrapped) > 0:
180                        result += wrapped + '\n\n'
181                    wrapper.initial_indent = wrapper.subsequent_indent = ''
182    return result.rstrip().lstrip('\n')
183
184
185def extract(filename, node, prefix):
186    if not (node.location.file is None or
187            os.path.samefile(d(node.location.file.name), filename)):
188        return 0
189    if node.kind in RECURSE_LIST:
190        sub_prefix = prefix
191        if node.kind != CursorKind.TRANSLATION_UNIT:
192            if len(sub_prefix) > 0:
193                sub_prefix += '_'
194            sub_prefix += d(node.spelling)
195        for i in node.get_children():
196            extract(filename, i, sub_prefix)
197    if node.kind in PRINT_LIST:
198        comment = d(node.raw_comment) if node.raw_comment is not None else ''
199        comment = process_comment(comment)
200        sub_prefix = prefix
201        if len(sub_prefix) > 0:
202            sub_prefix += '_'
203        if len(node.spelling) > 0:
204            name = sanitize_name(sub_prefix + d(node.spelling))
205            global output
206            output.append((name, filename, comment))
207
208
209class ExtractionThread(Thread):
210    def __init__(self, filename, parameters):
211        Thread.__init__(self)
212        self.filename = filename
213        self.parameters = parameters
214        job_semaphore.acquire()
215
216    def run(self):
217        print('Processing "%s" ..' % self.filename, file=sys.stderr)
218        try:
219            index = cindex.Index(
220                cindex.conf.lib.clang_createIndex(False, True))
221            tu = index.parse(self.filename, self.parameters)
222            extract(self.filename, tu.cursor, '')
223        finally:
224            job_semaphore.release()
225
226if __name__ == '__main__':
227    parameters = ['-x', 'c++', '-std=c++11']
228    filenames = []
229
230    if platform.system() == 'Darwin':
231        dev_path = '/Applications/Xcode.app/Contents/Developer/'
232        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
233        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
234        libclang = lib_dir + 'libclang.dylib'
235
236        if os.path.exists(libclang):
237            cindex.Config.set_library_path(os.path.dirname(libclang))
238
239        if os.path.exists(sdk_dir):
240            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
241            parameters.append('-isysroot')
242            parameters.append(sysroot_dir)
243
244    for item in sys.argv[1:]:
245        if item.startswith('-'):
246            parameters.append(item)
247        else:
248            filenames.append(item)
249
250    if len(filenames) == 0:
251        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
252        exit(-1)
253
254    print('''/*
255  This file contains docstrings for the Python bindings.
256  Do not edit! These were automatically extracted by mkdoc.py
257 */
258
259#define __EXPAND(x)                                      x
260#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
261#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
262#define __CAT1(a, b)                                     a ## b
263#define __CAT2(a, b)                                     __CAT1(a, b)
264#define __DOC1(n1)                                       __doc_##n1
265#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
266#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
267#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
268#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
269#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
270#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
271#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
272
273#if defined(__GNUG__)
274#pragma GCC diagnostic push
275#pragma GCC diagnostic ignored "-Wunused-variable"
276#endif
277''')
278
279    output.clear()
280    for filename in filenames:
281        thr = ExtractionThread(filename, parameters)
282        thr.start()
283
284    print('Waiting for jobs to finish ..', file=sys.stderr)
285    for i in range(job_count):
286        job_semaphore.acquire()
287
288    name_ctr = 1
289    name_prev = None
290    for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
291        if name == name_prev:
292            name_ctr += 1
293            name = name + "_%i" % name_ctr
294        else:
295            name_prev = name
296            name_ctr = 1
297        print('\nstatic const char *%s =%sR"doc(%s)doc";' %
298              (name, '\n' if '\n' in comment else ' ', comment))
299
300    print('''
301#if defined(__GNUG__)
302#pragma GCC diagnostic pop
303#endif
304''')
305