mkdoc.py revision 14299:2fbea9df56d2
1#!/usr/bin/env python3 2# 3# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] 4# 5# Extract documentation from C++ header files to use it in Python bindings 6# 7 8import os 9import sys 10import platform 11import re 12import textwrap 13 14from clang import cindex 15from clang.cindex import CursorKind 16from collections import OrderedDict 17from glob import glob 18from threading import Thread, Semaphore 19from multiprocessing import cpu_count 20 21RECURSE_LIST = [ 22 CursorKind.TRANSLATION_UNIT, 23 CursorKind.NAMESPACE, 24 CursorKind.CLASS_DECL, 25 CursorKind.STRUCT_DECL, 26 CursorKind.ENUM_DECL, 27 CursorKind.CLASS_TEMPLATE 28] 29 30PRINT_LIST = [ 31 CursorKind.CLASS_DECL, 32 CursorKind.STRUCT_DECL, 33 CursorKind.ENUM_DECL, 34 CursorKind.ENUM_CONSTANT_DECL, 35 CursorKind.CLASS_TEMPLATE, 36 CursorKind.FUNCTION_DECL, 37 CursorKind.FUNCTION_TEMPLATE, 38 CursorKind.CONVERSION_FUNCTION, 39 CursorKind.CXX_METHOD, 40 CursorKind.CONSTRUCTOR, 41 CursorKind.FIELD_DECL 42] 43 44PREFIX_BLACKLIST = [ 45 CursorKind.TRANSLATION_UNIT 46] 47 48CPP_OPERATORS = { 49 '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', 50 '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': 51 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', 52 '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': 53 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', 54 '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': 55 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call' 56} 57 58CPP_OPERATORS = OrderedDict( 59 sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) 60 61job_count = cpu_count() 62job_semaphore = Semaphore(job_count) 63 64 65class NoFilenamesError(ValueError): 66 pass 67 68 69def d(s): 70 return s if isinstance(s, str) else s.decode('utf8') 71 72 73def sanitize_name(name): 74 name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) 75 for k, v in CPP_OPERATORS.items(): 76 name = name.replace('operator%s' % k, 'operator_%s' % v) 77 name = re.sub('<.*>', '', name) 78 name = ''.join([ch if ch.isalnum() else '_' for ch in name]) 79 name = re.sub('_$', '', re.sub('_+', '_', name)) 80 return '__doc_' + name 81 82 83def process_comment(comment): 84 result = '' 85 86 # Remove C++ comment syntax 87 leading_spaces = float('inf') 88 for s in comment.expandtabs(tabsize=4).splitlines(): 89 s = s.strip() 90 if s.startswith('/*'): 91 s = s[2:].lstrip('*') 92 elif s.endswith('*/'): 93 s = s[:-2].rstrip('*') 94 elif s.startswith('///'): 95 s = s[3:] 96 if s.startswith('*'): 97 s = s[1:] 98 if len(s) > 0: 99 leading_spaces = min(leading_spaces, len(s) - len(s.lstrip())) 100 result += s + '\n' 101 102 if leading_spaces != float('inf'): 103 result2 = "" 104 for s in result.splitlines(): 105 result2 += s[leading_spaces:] + '\n' 106 result = result2 107 108 # Doxygen tags 109 cpp_group = '([\w:]+)' 110 param_group = '([\[\w:\]]+)' 111 112 s = result 113 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) 114 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) 115 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) 116 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) 117 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) 118 s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) 119 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), 120 r'\n\n$Parameter ``\2``:\n\n', s) 121 s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group), 122 r'\n\n$Template parameter ``\2``:\n\n', s) 123 124 for in_, out_ in { 125 'return': 'Returns', 126 'author': 'Author', 127 'authors': 'Authors', 128 'copyright': 'Copyright', 129 'date': 'Date', 130 'remark': 'Remark', 131 'sa': 'See also', 132 'see': 'See also', 133 'extends': 'Extends', 134 'throw': 'Throws', 135 'throws': 'Throws' 136 }.items(): 137 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) 138 139 s = re.sub(r'\\details\s*', r'\n\n', s) 140 s = re.sub(r'\\brief\s*', r'', s) 141 s = re.sub(r'\\short\s*', r'', s) 142 s = re.sub(r'\\ref\s*', r'', s) 143 144 s = re.sub(r'\\code\s?(.*?)\s?\\endcode', 145 r"```\n\1\n```\n", s, flags=re.DOTALL) 146 147 # HTML/TeX tags 148 s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL) 149 s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL) 150 s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL) 151 s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL) 152 s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL) 153 s = re.sub(r'<li>', r'\n\n* ', s) 154 s = re.sub(r'</?ul>', r'', s) 155 s = re.sub(r'</li>', r'\n\n', s) 156 157 s = s.replace('``true``', '``True``') 158 s = s.replace('``false``', '``False``') 159 160 # Re-flow text 161 wrapper = textwrap.TextWrapper() 162 wrapper.expand_tabs = True 163 wrapper.replace_whitespace = True 164 wrapper.drop_whitespace = True 165 wrapper.width = 70 166 wrapper.initial_indent = wrapper.subsequent_indent = '' 167 168 result = '' 169 in_code_segment = False 170 for x in re.split(r'(```)', s): 171 if x == '```': 172 if not in_code_segment: 173 result += '```\n' 174 else: 175 result += '\n```\n\n' 176 in_code_segment = not in_code_segment 177 elif in_code_segment: 178 result += x.strip() 179 else: 180 for y in re.split(r'(?: *\n *){2,}', x): 181 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip()) 182 if len(wrapped) > 0 and wrapped[0] == '$': 183 result += wrapped[1:] + '\n' 184 wrapper.initial_indent = \ 185 wrapper.subsequent_indent = ' ' * 4 186 else: 187 if len(wrapped) > 0: 188 result += wrapped + '\n\n' 189 wrapper.initial_indent = wrapper.subsequent_indent = '' 190 return result.rstrip().lstrip('\n') 191 192 193def extract(filename, node, prefix, output): 194 if not (node.location.file is None or 195 os.path.samefile(d(node.location.file.name), filename)): 196 return 0 197 if node.kind in RECURSE_LIST: 198 sub_prefix = prefix 199 if node.kind not in PREFIX_BLACKLIST: 200 if len(sub_prefix) > 0: 201 sub_prefix += '_' 202 sub_prefix += d(node.spelling) 203 for i in node.get_children(): 204 extract(filename, i, sub_prefix, output) 205 if node.kind in PRINT_LIST: 206 comment = d(node.raw_comment) if node.raw_comment is not None else '' 207 comment = process_comment(comment) 208 sub_prefix = prefix 209 if len(sub_prefix) > 0: 210 sub_prefix += '_' 211 if len(node.spelling) > 0: 212 name = sanitize_name(sub_prefix + d(node.spelling)) 213 output.append((name, filename, comment)) 214 215 216class ExtractionThread(Thread): 217 def __init__(self, filename, parameters, output): 218 Thread.__init__(self) 219 self.filename = filename 220 self.parameters = parameters 221 self.output = output 222 job_semaphore.acquire() 223 224 def run(self): 225 print('Processing "%s" ..' % self.filename, file=sys.stderr) 226 try: 227 index = cindex.Index( 228 cindex.conf.lib.clang_createIndex(False, True)) 229 tu = index.parse(self.filename, self.parameters) 230 extract(self.filename, tu.cursor, '', self.output) 231 finally: 232 job_semaphore.release() 233 234 235def read_args(args): 236 parameters = [] 237 filenames = [] 238 if "-x" not in args: 239 parameters.extend(['-x', 'c++']) 240 if not any(it.startswith("-std=") for it in args): 241 parameters.append('-std=c++11') 242 243 if platform.system() == 'Darwin': 244 dev_path = '/Applications/Xcode.app/Contents/Developer/' 245 lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' 246 sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' 247 libclang = lib_dir + 'libclang.dylib' 248 249 if os.path.exists(libclang): 250 cindex.Config.set_library_path(os.path.dirname(libclang)) 251 252 if os.path.exists(sdk_dir): 253 sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) 254 parameters.append('-isysroot') 255 parameters.append(sysroot_dir) 256 elif platform.system() == 'Linux': 257 # clang doesn't find its own base includes by default on Linux, 258 # but different distros install them in different paths. 259 # Try to autodetect, preferring the highest numbered version. 260 def clang_folder_version(d): 261 return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)] 262 clang_include_dir = max(( 263 path 264 for libdir in ['lib64', 'lib', 'lib32'] 265 for path in glob('/usr/%s/clang/*/include' % libdir) 266 if os.path.isdir(path) 267 ), default=None, key=clang_folder_version) 268 if clang_include_dir: 269 parameters.extend(['-isystem', clang_include_dir]) 270 271 for item in args: 272 if item.startswith('-'): 273 parameters.append(item) 274 else: 275 filenames.append(item) 276 277 if len(filenames) == 0: 278 raise NoFilenamesError("args parameter did not contain any filenames") 279 280 return parameters, filenames 281 282 283def extract_all(args): 284 parameters, filenames = read_args(args) 285 output = [] 286 for filename in filenames: 287 thr = ExtractionThread(filename, parameters, output) 288 thr.start() 289 290 print('Waiting for jobs to finish ..', file=sys.stderr) 291 for i in range(job_count): 292 job_semaphore.acquire() 293 294 return output 295 296 297def write_header(comments, out_file=sys.stdout): 298 print('''/* 299 This file contains docstrings for the Python bindings. 300 Do not edit! These were automatically extracted by mkdoc.py 301 */ 302 303#define __EXPAND(x) x 304#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT 305#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) 306#define __CAT1(a, b) a ## b 307#define __CAT2(a, b) __CAT1(a, b) 308#define __DOC1(n1) __doc_##n1 309#define __DOC2(n1, n2) __doc_##n1##_##n2 310#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 311#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 312#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 313#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 314#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 315#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) 316 317#if defined(__GNUG__) 318#pragma GCC diagnostic push 319#pragma GCC diagnostic ignored "-Wunused-variable" 320#endif 321''', file=out_file) 322 323 324 name_ctr = 1 325 name_prev = None 326 for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))): 327 if name == name_prev: 328 name_ctr += 1 329 name = name + "_%i" % name_ctr 330 else: 331 name_prev = name 332 name_ctr = 1 333 print('\nstatic const char *%s =%sR"doc(%s)doc";' % 334 (name, '\n' if '\n' in comment else ' ', comment), file=out_file) 335 336 print(''' 337#if defined(__GNUG__) 338#pragma GCC diagnostic pop 339#endif 340''', file=out_file) 341 342 343def mkdoc(args): 344 args = list(args) 345 out_path = None 346 for idx, arg in enumerate(args): 347 if arg.startswith("-o"): 348 args.remove(arg) 349 try: 350 out_path = arg[2:] or args.pop(idx) 351 except IndexError: 352 print("-o flag requires an argument") 353 exit(-1) 354 break 355 356 comments = extract_all(args) 357 358 if out_path: 359 try: 360 with open(out_path, 'w') as out_file: 361 write_header(comments, out_file) 362 except: 363 # In the event of an error, don't leave a partially-written 364 # output file. 365 try: 366 os.unlink(out_path) 367 except: 368 pass 369 raise 370 else: 371 write_header(comments) 372 373 374if __name__ == '__main__': 375 try: 376 mkdoc(sys.argv[1:]) 377 except NoFilenamesError: 378 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) 379 exit(-1) 380