mkdoc.py revision 12391:ceeca8b41e4b
112855Sgabeblack@google.com#!/usr/bin/env python3 212855Sgabeblack@google.com# 312855Sgabeblack@google.com# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] 412855Sgabeblack@google.com# 512855Sgabeblack@google.com# Extract documentation from C++ header files to use it in Python bindings 612855Sgabeblack@google.com# 712855Sgabeblack@google.com 812855Sgabeblack@google.comimport os 912855Sgabeblack@google.comimport sys 1012855Sgabeblack@google.comimport platform 1112855Sgabeblack@google.comimport re 1212855Sgabeblack@google.comimport textwrap 1312855Sgabeblack@google.com 1412855Sgabeblack@google.comfrom clang import cindex 1512855Sgabeblack@google.comfrom clang.cindex import CursorKind 1612855Sgabeblack@google.comfrom collections import OrderedDict 1712855Sgabeblack@google.comfrom threading import Thread, Semaphore 1812855Sgabeblack@google.comfrom multiprocessing import cpu_count 1912855Sgabeblack@google.com 2012855Sgabeblack@google.comRECURSE_LIST = [ 2112855Sgabeblack@google.com CursorKind.TRANSLATION_UNIT, 2212855Sgabeblack@google.com CursorKind.NAMESPACE, 2312855Sgabeblack@google.com CursorKind.CLASS_DECL, 2412855Sgabeblack@google.com CursorKind.STRUCT_DECL, 2512855Sgabeblack@google.com CursorKind.ENUM_DECL, 2612855Sgabeblack@google.com CursorKind.CLASS_TEMPLATE 2712855Sgabeblack@google.com] 2812855Sgabeblack@google.com 2912855Sgabeblack@google.comPRINT_LIST = [ 3012855Sgabeblack@google.com CursorKind.CLASS_DECL, 3112855Sgabeblack@google.com CursorKind.STRUCT_DECL, 3212855Sgabeblack@google.com CursorKind.ENUM_DECL, 3312855Sgabeblack@google.com CursorKind.ENUM_CONSTANT_DECL, 3412855Sgabeblack@google.com CursorKind.CLASS_TEMPLATE, 3512855Sgabeblack@google.com CursorKind.FUNCTION_DECL, 3612855Sgabeblack@google.com CursorKind.FUNCTION_TEMPLATE, 3712855Sgabeblack@google.com CursorKind.CONVERSION_FUNCTION, 3812855Sgabeblack@google.com CursorKind.CXX_METHOD, 3912855Sgabeblack@google.com CursorKind.CONSTRUCTOR, 4012855Sgabeblack@google.com CursorKind.FIELD_DECL 4112855Sgabeblack@google.com] 4212855Sgabeblack@google.com 4312855Sgabeblack@google.comCPP_OPERATORS = { 4412855Sgabeblack@google.com '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', 4512855Sgabeblack@google.com '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': 4612855Sgabeblack@google.com 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', 4712855Sgabeblack@google.com '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': 4812855Sgabeblack@google.com 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', 4912855Sgabeblack@google.com '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': 5012855Sgabeblack@google.com 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call' 5112855Sgabeblack@google.com} 5212855Sgabeblack@google.com 5312855Sgabeblack@google.comCPP_OPERATORS = OrderedDict( 5412855Sgabeblack@google.com sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) 5512855Sgabeblack@google.com 5612855Sgabeblack@google.comjob_count = cpu_count() 5712855Sgabeblack@google.comjob_semaphore = Semaphore(job_count) 5812855Sgabeblack@google.com 5912855Sgabeblack@google.comoutput = [] 6012855Sgabeblack@google.com 6112855Sgabeblack@google.comdef d(s): 6212855Sgabeblack@google.com return s.decode('utf8') 6312855Sgabeblack@google.com 6412855Sgabeblack@google.com 6512855Sgabeblack@google.comdef sanitize_name(name): 6612855Sgabeblack@google.com name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) 6712855Sgabeblack@google.com for k, v in CPP_OPERATORS.items(): 6812855Sgabeblack@google.com name = name.replace('operator%s' % k, 'operator_%s' % v) 6912855Sgabeblack@google.com name = re.sub('<.*>', '', name) 70 name = ''.join([ch if ch.isalnum() else '_' for ch in name]) 71 name = re.sub('_$', '', re.sub('_+', '_', name)) 72 return '__doc_' + name 73 74 75def process_comment(comment): 76 result = '' 77 78 # Remove C++ comment syntax 79 leading_spaces = float('inf') 80 for s in comment.expandtabs(tabsize=4).splitlines(): 81 s = s.strip() 82 if s.startswith('/*'): 83 s = s[2:].lstrip('*') 84 elif s.endswith('*/'): 85 s = s[:-2].rstrip('*') 86 elif s.startswith('///'): 87 s = s[3:] 88 if s.startswith('*'): 89 s = s[1:] 90 if len(s) > 0: 91 leading_spaces = min(leading_spaces, len(s) - len(s.lstrip())) 92 result += s + '\n' 93 94 if leading_spaces != float('inf'): 95 result2 = "" 96 for s in result.splitlines(): 97 result2 += s[leading_spaces:] + '\n' 98 result = result2 99 100 # Doxygen tags 101 cpp_group = '([\w:]+)' 102 param_group = '([\[\w:\]]+)' 103 104 s = result 105 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) 106 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) 107 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) 108 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) 109 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) 110 s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) 111 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), 112 r'\n\n$Parameter ``\2``:\n\n', s) 113 s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group), 114 r'\n\n$Template parameter ``\2``:\n\n', s) 115 116 for in_, out_ in { 117 'return': 'Returns', 118 'author': 'Author', 119 'authors': 'Authors', 120 'copyright': 'Copyright', 121 'date': 'Date', 122 'remark': 'Remark', 123 'sa': 'See also', 124 'see': 'See also', 125 'extends': 'Extends', 126 'throw': 'Throws', 127 'throws': 'Throws' 128 }.items(): 129 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) 130 131 s = re.sub(r'\\details\s*', r'\n\n', s) 132 s = re.sub(r'\\brief\s*', r'', s) 133 s = re.sub(r'\\short\s*', r'', s) 134 s = re.sub(r'\\ref\s*', r'', s) 135 136 s = re.sub(r'\\code\s?(.*?)\s?\\endcode', 137 r"```\n\1\n```\n", s, flags=re.DOTALL) 138 139 # HTML/TeX tags 140 s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL) 141 s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL) 142 s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL) 143 s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL) 144 s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL) 145 s = re.sub(r'<li>', r'\n\n* ', s) 146 s = re.sub(r'</?ul>', r'', s) 147 s = re.sub(r'</li>', r'\n\n', s) 148 149 s = s.replace('``true``', '``True``') 150 s = s.replace('``false``', '``False``') 151 152 # Re-flow text 153 wrapper = textwrap.TextWrapper() 154 wrapper.expand_tabs = True 155 wrapper.replace_whitespace = True 156 wrapper.drop_whitespace = True 157 wrapper.width = 70 158 wrapper.initial_indent = wrapper.subsequent_indent = '' 159 160 result = '' 161 in_code_segment = False 162 for x in re.split(r'(```)', s): 163 if x == '```': 164 if not in_code_segment: 165 result += '```\n' 166 else: 167 result += '\n```\n\n' 168 in_code_segment = not in_code_segment 169 elif in_code_segment: 170 result += x.strip() 171 else: 172 for y in re.split(r'(?: *\n *){2,}', x): 173 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip()) 174 if len(wrapped) > 0 and wrapped[0] == '$': 175 result += wrapped[1:] + '\n' 176 wrapper.initial_indent = \ 177 wrapper.subsequent_indent = ' ' * 4 178 else: 179 if len(wrapped) > 0: 180 result += wrapped + '\n\n' 181 wrapper.initial_indent = wrapper.subsequent_indent = '' 182 return result.rstrip().lstrip('\n') 183 184 185def extract(filename, node, prefix): 186 if not (node.location.file is None or 187 os.path.samefile(d(node.location.file.name), filename)): 188 return 0 189 if node.kind in RECURSE_LIST: 190 sub_prefix = prefix 191 if node.kind != CursorKind.TRANSLATION_UNIT: 192 if len(sub_prefix) > 0: 193 sub_prefix += '_' 194 sub_prefix += d(node.spelling) 195 for i in node.get_children(): 196 extract(filename, i, sub_prefix) 197 if node.kind in PRINT_LIST: 198 comment = d(node.raw_comment) if node.raw_comment is not None else '' 199 comment = process_comment(comment) 200 sub_prefix = prefix 201 if len(sub_prefix) > 0: 202 sub_prefix += '_' 203 if len(node.spelling) > 0: 204 name = sanitize_name(sub_prefix + d(node.spelling)) 205 global output 206 output.append((name, filename, comment)) 207 208 209class ExtractionThread(Thread): 210 def __init__(self, filename, parameters): 211 Thread.__init__(self) 212 self.filename = filename 213 self.parameters = parameters 214 job_semaphore.acquire() 215 216 def run(self): 217 print('Processing "%s" ..' % self.filename, file=sys.stderr) 218 try: 219 index = cindex.Index( 220 cindex.conf.lib.clang_createIndex(False, True)) 221 tu = index.parse(self.filename, self.parameters) 222 extract(self.filename, tu.cursor, '') 223 finally: 224 job_semaphore.release() 225 226if __name__ == '__main__': 227 parameters = ['-x', 'c++', '-std=c++11'] 228 filenames = [] 229 230 if platform.system() == 'Darwin': 231 dev_path = '/Applications/Xcode.app/Contents/Developer/' 232 lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' 233 sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' 234 libclang = lib_dir + 'libclang.dylib' 235 236 if os.path.exists(libclang): 237 cindex.Config.set_library_path(os.path.dirname(libclang)) 238 239 if os.path.exists(sdk_dir): 240 sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) 241 parameters.append('-isysroot') 242 parameters.append(sysroot_dir) 243 244 for item in sys.argv[1:]: 245 if item.startswith('-'): 246 parameters.append(item) 247 else: 248 filenames.append(item) 249 250 if len(filenames) == 0: 251 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) 252 exit(-1) 253 254 print('''/* 255 This file contains docstrings for the Python bindings. 256 Do not edit! These were automatically extracted by mkdoc.py 257 */ 258 259#define __EXPAND(x) x 260#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT 261#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) 262#define __CAT1(a, b) a ## b 263#define __CAT2(a, b) __CAT1(a, b) 264#define __DOC1(n1) __doc_##n1 265#define __DOC2(n1, n2) __doc_##n1##_##n2 266#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 267#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 268#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 269#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 270#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 271#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) 272 273#if defined(__GNUG__) 274#pragma GCC diagnostic push 275#pragma GCC diagnostic ignored "-Wunused-variable" 276#endif 277''') 278 279 output.clear() 280 for filename in filenames: 281 thr = ExtractionThread(filename, parameters) 282 thr.start() 283 284 print('Waiting for jobs to finish ..', file=sys.stderr) 285 for i in range(job_count): 286 job_semaphore.acquire() 287 288 name_ctr = 1 289 name_prev = None 290 for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))): 291 if name == name_prev: 292 name_ctr += 1 293 name = name + "_%i" % name_ctr 294 else: 295 name_prev = name 296 name_ctr = 1 297 print('\nstatic const char *%s =%sR"doc(%s)doc";' % 298 (name, '\n' if '\n' in comment else ' ', comment)) 299 300 print(''' 301#if defined(__GNUG__) 302#pragma GCC diagnostic pop 303#endif 304''') 305