mkdoc.py revision 11986:c12e4625ab56
1#!/usr/bin/env python3 2# 3# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] 4# 5# Extract documentation from C++ header files to use it in Python bindings 6# 7 8import os 9import sys 10import platform 11import re 12import textwrap 13 14from clang import cindex 15from clang.cindex import CursorKind 16from collections import OrderedDict 17from threading import Thread, Semaphore 18from multiprocessing import cpu_count 19 20RECURSE_LIST = [ 21 CursorKind.TRANSLATION_UNIT, 22 CursorKind.NAMESPACE, 23 CursorKind.CLASS_DECL, 24 CursorKind.STRUCT_DECL, 25 CursorKind.ENUM_DECL, 26 CursorKind.CLASS_TEMPLATE 27] 28 29PRINT_LIST = [ 30 CursorKind.CLASS_DECL, 31 CursorKind.STRUCT_DECL, 32 CursorKind.ENUM_DECL, 33 CursorKind.ENUM_CONSTANT_DECL, 34 CursorKind.CLASS_TEMPLATE, 35 CursorKind.FUNCTION_DECL, 36 CursorKind.FUNCTION_TEMPLATE, 37 CursorKind.CONVERSION_FUNCTION, 38 CursorKind.CXX_METHOD, 39 CursorKind.CONSTRUCTOR, 40 CursorKind.FIELD_DECL 41] 42 43CPP_OPERATORS = { 44 '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', 45 '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': 46 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', 47 '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': 48 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', 49 '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': 50 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call' 51} 52 53CPP_OPERATORS = OrderedDict( 54 sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) 55 56job_count = cpu_count() 57job_semaphore = Semaphore(job_count) 58 59registered_names = dict() 60 61 62def d(s): 63 return s.decode('utf8') 64 65 66def sanitize_name(name): 67 global registered_names 68 name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) 69 for k, v in CPP_OPERATORS.items(): 70 name = name.replace('operator%s' % k, 'operator_%s' % v) 71 name = re.sub('<.*>', '', name) 72 name = ''.join([ch if ch.isalnum() else '_' for ch in name]) 73 name = re.sub('_$', '', re.sub('_+', '_', name)) 74 if name in registered_names: 75 registered_names[name] += 1 76 name += '_' + str(registered_names[name]) 77 else: 78 registered_names[name] = 1 79 return '__doc_' + name 80 81 82def process_comment(comment): 83 result = '' 84 85 # Remove C++ comment syntax 86 leading_spaces = float('inf') 87 for s in comment.expandtabs(tabsize=4).splitlines(): 88 s = s.strip() 89 if s.startswith('/*'): 90 s = s[2:].lstrip('*') 91 elif s.endswith('*/'): 92 s = s[:-2].rstrip('*') 93 elif s.startswith('///'): 94 s = s[3:] 95 if s.startswith('*'): 96 s = s[1:] 97 if len(s) > 0: 98 leading_spaces = min(leading_spaces, len(s) - len(s.lstrip())) 99 result += s + '\n' 100 101 if leading_spaces != float('inf'): 102 result2 = "" 103 for s in result.splitlines(): 104 result2 += s[leading_spaces:] + '\n' 105 result = result2 106 107 # Doxygen tags 108 cpp_group = '([\w:]+)' 109 param_group = '([\[\w:\]]+)' 110 111 s = result 112 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) 113 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) 114 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) 115 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) 116 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) 117 s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) 118 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), 119 r'\n\n$Parameter ``\2``:\n\n', s) 120 s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group), 121 r'\n\n$Template parameter ``\2``:\n\n', s) 122 123 for in_, out_ in { 124 'return': 'Returns', 125 'author': 'Author', 126 'authors': 'Authors', 127 'copyright': 'Copyright', 128 'date': 'Date', 129 'remark': 'Remark', 130 'sa': 'See also', 131 'see': 'See also', 132 'extends': 'Extends', 133 'throw': 'Throws', 134 'throws': 'Throws' 135 }.items(): 136 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) 137 138 s = re.sub(r'\\details\s*', r'\n\n', s) 139 s = re.sub(r'\\brief\s*', r'', s) 140 s = re.sub(r'\\short\s*', r'', s) 141 s = re.sub(r'\\ref\s*', r'', s) 142 143 s = re.sub(r'\\code\s?(.*?)\s?\\endcode', 144 r"```\n\1\n```\n", s, flags=re.DOTALL) 145 146 # HTML/TeX tags 147 s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL) 148 s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL) 149 s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL) 150 s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL) 151 s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL) 152 s = re.sub(r'<li>', r'\n\n* ', s) 153 s = re.sub(r'</?ul>', r'', s) 154 s = re.sub(r'</li>', r'\n\n', s) 155 156 s = s.replace('``true``', '``True``') 157 s = s.replace('``false``', '``False``') 158 159 # Re-flow text 160 wrapper = textwrap.TextWrapper() 161 wrapper.expand_tabs = True 162 wrapper.replace_whitespace = True 163 wrapper.drop_whitespace = True 164 wrapper.width = 70 165 wrapper.initial_indent = wrapper.subsequent_indent = '' 166 167 result = '' 168 in_code_segment = False 169 for x in re.split(r'(```)', s): 170 if x == '```': 171 if not in_code_segment: 172 result += '```\n' 173 else: 174 result += '\n```\n\n' 175 in_code_segment = not in_code_segment 176 elif in_code_segment: 177 result += x.strip() 178 else: 179 for y in re.split(r'(?: *\n *){2,}', x): 180 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip()) 181 if len(wrapped) > 0 and wrapped[0] == '$': 182 result += wrapped[1:] + '\n' 183 wrapper.initial_indent = \ 184 wrapper.subsequent_indent = ' ' * 4 185 else: 186 if len(wrapped) > 0: 187 result += wrapped + '\n\n' 188 wrapper.initial_indent = wrapper.subsequent_indent = '' 189 return result.rstrip().lstrip('\n') 190 191 192def extract(filename, node, prefix, output): 193 num_extracted = 0 194 if not (node.location.file is None or 195 os.path.samefile(d(node.location.file.name), filename)): 196 return 0 197 if node.kind in RECURSE_LIST: 198 sub_prefix = prefix 199 if node.kind != CursorKind.TRANSLATION_UNIT: 200 if len(sub_prefix) > 0: 201 sub_prefix += '_' 202 sub_prefix += d(node.spelling) 203 for i in node.get_children(): 204 num_extracted += extract(filename, i, sub_prefix, output) 205 if num_extracted == 0: 206 return 0 207 if node.kind in PRINT_LIST: 208 comment = d(node.raw_comment) if node.raw_comment is not None else '' 209 comment = process_comment(comment) 210 sub_prefix = prefix 211 if len(sub_prefix) > 0: 212 sub_prefix += '_' 213 if len(node.spelling) > 0: 214 name = sanitize_name(sub_prefix + d(node.spelling)) 215 output.append('\nstatic const char *%s =%sR"doc(%s)doc";' % 216 (name, '\n' if '\n' in comment else ' ', comment)) 217 num_extracted += 1 218 return num_extracted 219 220 221class ExtractionThread(Thread): 222 def __init__(self, filename, parameters, output): 223 Thread.__init__(self) 224 self.filename = filename 225 self.parameters = parameters 226 self.output = output 227 job_semaphore.acquire() 228 229 def run(self): 230 print('Processing "%s" ..' % self.filename, file=sys.stderr) 231 try: 232 index = cindex.Index( 233 cindex.conf.lib.clang_createIndex(False, True)) 234 tu = index.parse(self.filename, self.parameters) 235 extract(self.filename, tu.cursor, '', self.output) 236 finally: 237 job_semaphore.release() 238 239if __name__ == '__main__': 240 parameters = ['-x', 'c++', '-std=c++11'] 241 filenames = [] 242 243 if platform.system() == 'Darwin': 244 dev_path = '/Applications/Xcode.app/Contents/Developer/' 245 lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' 246 sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' 247 libclang = lib_dir + 'libclang.dylib' 248 249 if os.path.exists(libclang): 250 cindex.Config.set_library_path(os.path.dirname(libclang)) 251 252 if os.path.exists(sdk_dir): 253 sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) 254 parameters.append('-isysroot') 255 parameters.append(sysroot_dir) 256 257 for item in sys.argv[1:]: 258 if item.startswith('-'): 259 parameters.append(item) 260 else: 261 filenames.append(item) 262 263 if len(filenames) == 0: 264 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) 265 exit(-1) 266 267 print('''/* 268 This file contains docstrings for the Python bindings. 269 Do not edit! These were automatically extracted by mkdoc.py 270 */ 271 272#define __EXPAND(x) x 273#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT 274#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) 275#define __CAT1(a, b) a ## b 276#define __CAT2(a, b) __CAT1(a, b) 277#define __DOC1(n1) __doc_##n1 278#define __DOC2(n1, n2) __doc_##n1##_##n2 279#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 280#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 281#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 282#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 283#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 284#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) 285 286#if defined(__GNUG__) 287#pragma GCC diagnostic push 288#pragma GCC diagnostic ignored "-Wunused-variable" 289#endif 290''') 291 292 output = [] 293 for filename in filenames: 294 thr = ExtractionThread(filename, parameters, output) 295 thr.start() 296 297 print('Waiting for jobs to finish ..', file=sys.stderr) 298 for i in range(job_count): 299 job_semaphore.acquire() 300 301 output.sort() 302 for l in output: 303 print(l) 304 305 print(''' 306#if defined(__GNUG__) 307#pragma GCC diagnostic pop 308#endif 309''') 310