mkdoc.py revision 12391:ceeca8b41e4b
1#!/usr/bin/env python3 2# 3# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] 4# 5# Extract documentation from C++ header files to use it in Python bindings 6# 7 8import os 9import sys 10import platform 11import re 12import textwrap 13 14from clang import cindex 15from clang.cindex import CursorKind 16from collections import OrderedDict 17from threading import Thread, Semaphore 18from multiprocessing import cpu_count 19 20RECURSE_LIST = [ 21 CursorKind.TRANSLATION_UNIT, 22 CursorKind.NAMESPACE, 23 CursorKind.CLASS_DECL, 24 CursorKind.STRUCT_DECL, 25 CursorKind.ENUM_DECL, 26 CursorKind.CLASS_TEMPLATE 27] 28 29PRINT_LIST = [ 30 CursorKind.CLASS_DECL, 31 CursorKind.STRUCT_DECL, 32 CursorKind.ENUM_DECL, 33 CursorKind.ENUM_CONSTANT_DECL, 34 CursorKind.CLASS_TEMPLATE, 35 CursorKind.FUNCTION_DECL, 36 CursorKind.FUNCTION_TEMPLATE, 37 CursorKind.CONVERSION_FUNCTION, 38 CursorKind.CXX_METHOD, 39 CursorKind.CONSTRUCTOR, 40 CursorKind.FIELD_DECL 41] 42 43CPP_OPERATORS = { 44 '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', 45 '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': 46 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', 47 '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': 48 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', 49 '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': 50 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call' 51} 52 53CPP_OPERATORS = OrderedDict( 54 sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) 55 56job_count = cpu_count() 57job_semaphore = Semaphore(job_count) 58 59output = [] 60 61def d(s): 62 return s.decode('utf8') 63 64 65def sanitize_name(name): 66 name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) 67 for k, v in CPP_OPERATORS.items(): 68 name = name.replace('operator%s' % k, 'operator_%s' % v) 69 name = re.sub('<.*>', '', name) 70 name = ''.join([ch if ch.isalnum() else '_' for ch in name]) 71 name = re.sub('_$', '', re.sub('_+', '_', name)) 72 return '__doc_' + name 73 74 75def process_comment(comment): 76 result = '' 77 78 # Remove C++ comment syntax 79 leading_spaces = float('inf') 80 for s in comment.expandtabs(tabsize=4).splitlines(): 81 s = s.strip() 82 if s.startswith('/*'): 83 s = s[2:].lstrip('*') 84 elif s.endswith('*/'): 85 s = s[:-2].rstrip('*') 86 elif s.startswith('///'): 87 s = s[3:] 88 if s.startswith('*'): 89 s = s[1:] 90 if len(s) > 0: 91 leading_spaces = min(leading_spaces, len(s) - len(s.lstrip())) 92 result += s + '\n' 93 94 if leading_spaces != float('inf'): 95 result2 = "" 96 for s in result.splitlines(): 97 result2 += s[leading_spaces:] + '\n' 98 result = result2 99 100 # Doxygen tags 101 cpp_group = '([\w:]+)' 102 param_group = '([\[\w:\]]+)' 103 104 s = result 105 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) 106 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) 107 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) 108 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) 109 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) 110 s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) 111 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), 112 r'\n\n$Parameter ``\2``:\n\n', s) 113 s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group), 114 r'\n\n$Template parameter ``\2``:\n\n', s) 115 116 for in_, out_ in { 117 'return': 'Returns', 118 'author': 'Author', 119 'authors': 'Authors', 120 'copyright': 'Copyright', 121 'date': 'Date', 122 'remark': 'Remark', 123 'sa': 'See also', 124 'see': 'See also', 125 'extends': 'Extends', 126 'throw': 'Throws', 127 'throws': 'Throws' 128 }.items(): 129 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) 130 131 s = re.sub(r'\\details\s*', r'\n\n', s) 132 s = re.sub(r'\\brief\s*', r'', s) 133 s = re.sub(r'\\short\s*', r'', s) 134 s = re.sub(r'\\ref\s*', r'', s) 135 136 s = re.sub(r'\\code\s?(.*?)\s?\\endcode', 137 r"```\n\1\n```\n", s, flags=re.DOTALL) 138 139 # HTML/TeX tags 140 s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL) 141 s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL) 142 s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL) 143 s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL) 144 s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL) 145 s = re.sub(r'<li>', r'\n\n* ', s) 146 s = re.sub(r'</?ul>', r'', s) 147 s = re.sub(r'</li>', r'\n\n', s) 148 149 s = s.replace('``true``', '``True``') 150 s = s.replace('``false``', '``False``') 151 152 # Re-flow text 153 wrapper = textwrap.TextWrapper() 154 wrapper.expand_tabs = True 155 wrapper.replace_whitespace = True 156 wrapper.drop_whitespace = True 157 wrapper.width = 70 158 wrapper.initial_indent = wrapper.subsequent_indent = '' 159 160 result = '' 161 in_code_segment = False 162 for x in re.split(r'(```)', s): 163 if x == '```': 164 if not in_code_segment: 165 result += '```\n' 166 else: 167 result += '\n```\n\n' 168 in_code_segment = not in_code_segment 169 elif in_code_segment: 170 result += x.strip() 171 else: 172 for y in re.split(r'(?: *\n *){2,}', x): 173 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip()) 174 if len(wrapped) > 0 and wrapped[0] == '$': 175 result += wrapped[1:] + '\n' 176 wrapper.initial_indent = \ 177 wrapper.subsequent_indent = ' ' * 4 178 else: 179 if len(wrapped) > 0: 180 result += wrapped + '\n\n' 181 wrapper.initial_indent = wrapper.subsequent_indent = '' 182 return result.rstrip().lstrip('\n') 183 184 185def extract(filename, node, prefix): 186 if not (node.location.file is None or 187 os.path.samefile(d(node.location.file.name), filename)): 188 return 0 189 if node.kind in RECURSE_LIST: 190 sub_prefix = prefix 191 if node.kind != CursorKind.TRANSLATION_UNIT: 192 if len(sub_prefix) > 0: 193 sub_prefix += '_' 194 sub_prefix += d(node.spelling) 195 for i in node.get_children(): 196 extract(filename, i, sub_prefix) 197 if node.kind in PRINT_LIST: 198 comment = d(node.raw_comment) if node.raw_comment is not None else '' 199 comment = process_comment(comment) 200 sub_prefix = prefix 201 if len(sub_prefix) > 0: 202 sub_prefix += '_' 203 if len(node.spelling) > 0: 204 name = sanitize_name(sub_prefix + d(node.spelling)) 205 global output 206 output.append((name, filename, comment)) 207 208 209class ExtractionThread(Thread): 210 def __init__(self, filename, parameters): 211 Thread.__init__(self) 212 self.filename = filename 213 self.parameters = parameters 214 job_semaphore.acquire() 215 216 def run(self): 217 print('Processing "%s" ..' % self.filename, file=sys.stderr) 218 try: 219 index = cindex.Index( 220 cindex.conf.lib.clang_createIndex(False, True)) 221 tu = index.parse(self.filename, self.parameters) 222 extract(self.filename, tu.cursor, '') 223 finally: 224 job_semaphore.release() 225 226if __name__ == '__main__': 227 parameters = ['-x', 'c++', '-std=c++11'] 228 filenames = [] 229 230 if platform.system() == 'Darwin': 231 dev_path = '/Applications/Xcode.app/Contents/Developer/' 232 lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' 233 sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' 234 libclang = lib_dir + 'libclang.dylib' 235 236 if os.path.exists(libclang): 237 cindex.Config.set_library_path(os.path.dirname(libclang)) 238 239 if os.path.exists(sdk_dir): 240 sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) 241 parameters.append('-isysroot') 242 parameters.append(sysroot_dir) 243 244 for item in sys.argv[1:]: 245 if item.startswith('-'): 246 parameters.append(item) 247 else: 248 filenames.append(item) 249 250 if len(filenames) == 0: 251 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) 252 exit(-1) 253 254 print('''/* 255 This file contains docstrings for the Python bindings. 256 Do not edit! These were automatically extracted by mkdoc.py 257 */ 258 259#define __EXPAND(x) x 260#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT 261#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) 262#define __CAT1(a, b) a ## b 263#define __CAT2(a, b) __CAT1(a, b) 264#define __DOC1(n1) __doc_##n1 265#define __DOC2(n1, n2) __doc_##n1##_##n2 266#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 267#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 268#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 269#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 270#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 271#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) 272 273#if defined(__GNUG__) 274#pragma GCC diagnostic push 275#pragma GCC diagnostic ignored "-Wunused-variable" 276#endif 277''') 278 279 output.clear() 280 for filename in filenames: 281 thr = ExtractionThread(filename, parameters) 282 thr.start() 283 284 print('Waiting for jobs to finish ..', file=sys.stderr) 285 for i in range(job_count): 286 job_semaphore.acquire() 287 288 name_ctr = 1 289 name_prev = None 290 for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))): 291 if name == name_prev: 292 name_ctr += 1 293 name = name + "_%i" % name_ctr 294 else: 295 name_prev = name 296 name_ctr = 1 297 print('\nstatic const char *%s =%sR"doc(%s)doc";' % 298 (name, '\n' if '\n' in comment else ' ', comment)) 299 300 print(''' 301#if defined(__GNUG__) 302#pragma GCC diagnostic pop 303#endif 304''') 305