1#!/usr/bin/env python
2#
3# Copyright 2007 Neal Norwitz
4# Portions Copyright 2007 Google Inc.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Generate an Abstract Syntax Tree (AST) for C++."""
19
20__author__ = 'nnorwitz@google.com (Neal Norwitz)'
21
22
23# TODO:
24#  * Tokens should never be exported, need to convert to Nodes
25#    (return types, parameters, etc.)
26#  * Handle static class data for templatized classes
27#  * Handle casts (both C++ and C-style)
28#  * Handle conditions and loops (if/else, switch, for, while/do)
29#
30# TODO much, much later:
31#  * Handle #define
32#  * exceptions
33
34
35try:
36    # Python 3.x
37    import builtins
38except ImportError:
39    # Python 2.x
40    import __builtin__ as builtins
41
42import sys
43import traceback
44
45from cpp import keywords
46from cpp import tokenize
47from cpp import utils
48
49
50if not hasattr(builtins, 'reversed'):
51    # Support Python 2.3 and earlier.
52    def reversed(seq):
53        for i in range(len(seq)-1, -1, -1):
54            yield seq[i]
55
56if not hasattr(builtins, 'next'):
57    # Support Python 2.5 and earlier.
58    def next(obj):
59        return obj.next()
60
61
62VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
63
64FUNCTION_NONE = 0x00
65FUNCTION_CONST = 0x01
66FUNCTION_VIRTUAL = 0x02
67FUNCTION_PURE_VIRTUAL = 0x04
68FUNCTION_CTOR = 0x08
69FUNCTION_DTOR = 0x10
70FUNCTION_ATTRIBUTE = 0x20
71FUNCTION_UNKNOWN_ANNOTATION = 0x40
72FUNCTION_THROW = 0x80
73FUNCTION_OVERRIDE = 0x100
74
75"""
76These are currently unused.  Should really handle these properly at some point.
77
78TYPE_MODIFIER_INLINE   = 0x010000
79TYPE_MODIFIER_EXTERN   = 0x020000
80TYPE_MODIFIER_STATIC   = 0x040000
81TYPE_MODIFIER_CONST    = 0x080000
82TYPE_MODIFIER_REGISTER = 0x100000
83TYPE_MODIFIER_VOLATILE = 0x200000
84TYPE_MODIFIER_MUTABLE  = 0x400000
85
86TYPE_MODIFIER_MAP = {
87    'inline': TYPE_MODIFIER_INLINE,
88    'extern': TYPE_MODIFIER_EXTERN,
89    'static': TYPE_MODIFIER_STATIC,
90    'const': TYPE_MODIFIER_CONST,
91    'register': TYPE_MODIFIER_REGISTER,
92    'volatile': TYPE_MODIFIER_VOLATILE,
93    'mutable': TYPE_MODIFIER_MUTABLE,
94    }
95"""
96
97_INTERNAL_TOKEN = 'internal'
98_NAMESPACE_POP = 'ns-pop'
99
100
101# TODO(nnorwitz): use this as a singleton for templated_types, etc
102# where we don't want to create a new empty dict each time.  It is also const.
103class _NullDict(object):
104    __contains__ = lambda self: False
105    keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
106
107
108# TODO(nnorwitz): move AST nodes into a separate module.
109class Node(object):
110    """Base AST node."""
111
112    def __init__(self, start, end):
113        self.start = start
114        self.end = end
115
116    def IsDeclaration(self):
117        """Returns bool if this node is a declaration."""
118        return False
119
120    def IsDefinition(self):
121        """Returns bool if this node is a definition."""
122        return False
123
124    def IsExportable(self):
125        """Returns bool if this node exportable from a header file."""
126        return False
127
128    def Requires(self, node):
129        """Does this AST node require the definition of the node passed in?"""
130        return False
131
132    def XXX__str__(self):
133        return self._StringHelper(self.__class__.__name__, '')
134
135    def _StringHelper(self, name, suffix):
136        if not utils.DEBUG:
137            return '%s(%s)' % (name, suffix)
138        return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
139
140    def __repr__(self):
141        return str(self)
142
143
144class Define(Node):
145    def __init__(self, start, end, name, definition):
146        Node.__init__(self, start, end)
147        self.name = name
148        self.definition = definition
149
150    def __str__(self):
151        value = '%s %s' % (self.name, self.definition)
152        return self._StringHelper(self.__class__.__name__, value)
153
154
155class Include(Node):
156    def __init__(self, start, end, filename, system):
157        Node.__init__(self, start, end)
158        self.filename = filename
159        self.system = system
160
161    def __str__(self):
162        fmt = '"%s"'
163        if self.system:
164            fmt = '<%s>'
165        return self._StringHelper(self.__class__.__name__, fmt % self.filename)
166
167
168class Goto(Node):
169    def __init__(self, start, end, label):
170        Node.__init__(self, start, end)
171        self.label = label
172
173    def __str__(self):
174        return self._StringHelper(self.__class__.__name__, str(self.label))
175
176
177class Expr(Node):
178    def __init__(self, start, end, expr):
179        Node.__init__(self, start, end)
180        self.expr = expr
181
182    def Requires(self, node):
183        # TODO(nnorwitz): impl.
184        return False
185
186    def __str__(self):
187        return self._StringHelper(self.__class__.__name__, str(self.expr))
188
189
190class Return(Expr):
191    pass
192
193
194class Delete(Expr):
195    pass
196
197
198class Friend(Expr):
199    def __init__(self, start, end, expr, namespace):
200        Expr.__init__(self, start, end, expr)
201        self.namespace = namespace[:]
202
203
204class Using(Node):
205    def __init__(self, start, end, names):
206        Node.__init__(self, start, end)
207        self.names = names
208
209    def __str__(self):
210        return self._StringHelper(self.__class__.__name__, str(self.names))
211
212
213class Parameter(Node):
214    def __init__(self, start, end, name, parameter_type, default):
215        Node.__init__(self, start, end)
216        self.name = name
217        self.type = parameter_type
218        self.default = default
219
220    def Requires(self, node):
221        # TODO(nnorwitz): handle namespaces, etc.
222        return self.type.name == node.name
223
224    def __str__(self):
225        name = str(self.type)
226        suffix = '%s %s' % (name, self.name)
227        if self.default:
228            suffix += ' = ' + ''.join([d.name for d in self.default])
229        return self._StringHelper(self.__class__.__name__, suffix)
230
231
232class _GenericDeclaration(Node):
233    def __init__(self, start, end, name, namespace):
234        Node.__init__(self, start, end)
235        self.name = name
236        self.namespace = namespace[:]
237
238    def FullName(self):
239        prefix = ''
240        if self.namespace and self.namespace[-1]:
241            prefix = '::'.join(self.namespace) + '::'
242        return prefix + self.name
243
244    def _TypeStringHelper(self, suffix):
245        if self.namespace:
246            names = [n or '<anonymous>' for n in self.namespace]
247            suffix += ' in ' + '::'.join(names)
248        return self._StringHelper(self.__class__.__name__, suffix)
249
250
251# TODO(nnorwitz): merge with Parameter in some way?
252class VariableDeclaration(_GenericDeclaration):
253    def __init__(self, start, end, name, var_type, initial_value, namespace):
254        _GenericDeclaration.__init__(self, start, end, name, namespace)
255        self.type = var_type
256        self.initial_value = initial_value
257
258    def Requires(self, node):
259        # TODO(nnorwitz): handle namespaces, etc.
260        return self.type.name == node.name
261
262    def ToString(self):
263        """Return a string that tries to reconstitute the variable decl."""
264        suffix = '%s %s' % (self.type, self.name)
265        if self.initial_value:
266            suffix += ' = ' + self.initial_value
267        return suffix
268
269    def __str__(self):
270        return self._StringHelper(self.__class__.__name__, self.ToString())
271
272
273class Typedef(_GenericDeclaration):
274    def __init__(self, start, end, name, alias, namespace):
275        _GenericDeclaration.__init__(self, start, end, name, namespace)
276        self.alias = alias
277
278    def IsDefinition(self):
279        return True
280
281    def IsExportable(self):
282        return True
283
284    def Requires(self, node):
285        # TODO(nnorwitz): handle namespaces, etc.
286        name = node.name
287        for token in self.alias:
288            if token is not None and name == token.name:
289                return True
290        return False
291
292    def __str__(self):
293        suffix = '%s, %s' % (self.name, self.alias)
294        return self._TypeStringHelper(suffix)
295
296
297class _NestedType(_GenericDeclaration):
298    def __init__(self, start, end, name, fields, namespace):
299        _GenericDeclaration.__init__(self, start, end, name, namespace)
300        self.fields = fields
301
302    def IsDefinition(self):
303        return True
304
305    def IsExportable(self):
306        return True
307
308    def __str__(self):
309        suffix = '%s, {%s}' % (self.name, self.fields)
310        return self._TypeStringHelper(suffix)
311
312
313class Union(_NestedType):
314    pass
315
316
317class Enum(_NestedType):
318    pass
319
320
321class Class(_GenericDeclaration):
322    def __init__(self, start, end, name, bases, templated_types, body, namespace):
323        _GenericDeclaration.__init__(self, start, end, name, namespace)
324        self.bases = bases
325        self.body = body
326        self.templated_types = templated_types
327
328    def IsDeclaration(self):
329        return self.bases is None and self.body is None
330
331    def IsDefinition(self):
332        return not self.IsDeclaration()
333
334    def IsExportable(self):
335        return not self.IsDeclaration()
336
337    def Requires(self, node):
338        # TODO(nnorwitz): handle namespaces, etc.
339        if self.bases:
340            for token_list in self.bases:
341                # TODO(nnorwitz): bases are tokens, do name comparision.
342                for token in token_list:
343                    if token.name == node.name:
344                        return True
345        # TODO(nnorwitz): search in body too.
346        return False
347
348    def __str__(self):
349        name = self.name
350        if self.templated_types:
351            name += '<%s>' % self.templated_types
352        suffix = '%s, %s, %s' % (name, self.bases, self.body)
353        return self._TypeStringHelper(suffix)
354
355
356class Struct(Class):
357    pass
358
359
360class Function(_GenericDeclaration):
361    def __init__(self, start, end, name, return_type, parameters,
362                 modifiers, templated_types, body, namespace):
363        _GenericDeclaration.__init__(self, start, end, name, namespace)
364        converter = TypeConverter(namespace)
365        self.return_type = converter.CreateReturnType(return_type)
366        self.parameters = converter.ToParameters(parameters)
367        self.modifiers = modifiers
368        self.body = body
369        self.templated_types = templated_types
370
371    def IsDeclaration(self):
372        return self.body is None
373
374    def IsDefinition(self):
375        return self.body is not None
376
377    def IsExportable(self):
378        if self.return_type and 'static' in self.return_type.modifiers:
379            return False
380        return None not in self.namespace
381
382    def Requires(self, node):
383        if self.parameters:
384            # TODO(nnorwitz): parameters are tokens, do name comparision.
385            for p in self.parameters:
386                if p.name == node.name:
387                    return True
388        # TODO(nnorwitz): search in body too.
389        return False
390
391    def __str__(self):
392        # TODO(nnorwitz): add templated_types.
393        suffix = ('%s %s(%s), 0x%02x, %s' %
394                  (self.return_type, self.name, self.parameters,
395                   self.modifiers, self.body))
396        return self._TypeStringHelper(suffix)
397
398
399class Method(Function):
400    def __init__(self, start, end, name, in_class, return_type, parameters,
401                 modifiers, templated_types, body, namespace):
402        Function.__init__(self, start, end, name, return_type, parameters,
403                          modifiers, templated_types, body, namespace)
404        # TODO(nnorwitz): in_class could also be a namespace which can
405        # mess up finding functions properly.
406        self.in_class = in_class
407
408
409class Type(_GenericDeclaration):
410    """Type used for any variable (eg class, primitive, struct, etc)."""
411
412    def __init__(self, start, end, name, templated_types, modifiers,
413                 reference, pointer, array):
414        """
415        Args:
416          name: str name of main type
417          templated_types: [Class (Type?)] template type info between <>
418          modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
419          reference, pointer, array: bools
420        """
421        _GenericDeclaration.__init__(self, start, end, name, [])
422        self.templated_types = templated_types
423        if not name and modifiers:
424            self.name = modifiers.pop()
425        self.modifiers = modifiers
426        self.reference = reference
427        self.pointer = pointer
428        self.array = array
429
430    def __str__(self):
431        prefix = ''
432        if self.modifiers:
433            prefix = ' '.join(self.modifiers) + ' '
434        name = str(self.name)
435        if self.templated_types:
436            name += '<%s>' % self.templated_types
437        suffix = prefix + name
438        if self.reference:
439            suffix += '&'
440        if self.pointer:
441            suffix += '*'
442        if self.array:
443            suffix += '[]'
444        return self._TypeStringHelper(suffix)
445
446    # By definition, Is* are always False.  A Type can only exist in
447    # some sort of variable declaration, parameter, or return value.
448    def IsDeclaration(self):
449        return False
450
451    def IsDefinition(self):
452        return False
453
454    def IsExportable(self):
455        return False
456
457
458class TypeConverter(object):
459
460    def __init__(self, namespace_stack):
461        self.namespace_stack = namespace_stack
462
463    def _GetTemplateEnd(self, tokens, start):
464        count = 1
465        end = start
466        while 1:
467            token = tokens[end]
468            end += 1
469            if token.name == '<':
470                count += 1
471            elif token.name == '>':
472                count -= 1
473                if count == 0:
474                    break
475        return tokens[start:end-1], end
476
477    def ToType(self, tokens):
478        """Convert [Token,...] to [Class(...), ] useful for base classes.
479        For example, code like class Foo : public Bar<x, y> { ... };
480        the "Bar<x, y>" portion gets converted to an AST.
481
482        Returns:
483          [Class(...), ...]
484        """
485        result = []
486        name_tokens = []
487        reference = pointer = array = False
488
489        def AddType(templated_types):
490            # Partition tokens into name and modifier tokens.
491            names = []
492            modifiers = []
493            for t in name_tokens:
494                if keywords.IsKeyword(t.name):
495                    modifiers.append(t.name)
496                else:
497                    names.append(t.name)
498            name = ''.join(names)
499            if name_tokens:
500                result.append(Type(name_tokens[0].start, name_tokens[-1].end,
501                                   name, templated_types, modifiers,
502                                   reference, pointer, array))
503            del name_tokens[:]
504
505        i = 0
506        end = len(tokens)
507        while i < end:
508            token = tokens[i]
509            if token.name == '<':
510                new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
511                AddType(self.ToType(new_tokens))
512                # If there is a comma after the template, we need to consume
513                # that here otherwise it becomes part of the name.
514                i = new_end
515                reference = pointer = array = False
516            elif token.name == ',':
517                AddType([])
518                reference = pointer = array = False
519            elif token.name == '*':
520                pointer = True
521            elif token.name == '&':
522                reference = True
523            elif token.name == '[':
524               pointer = True
525            elif token.name == ']':
526                pass
527            else:
528                name_tokens.append(token)
529            i += 1
530
531        if name_tokens:
532            # No '<' in the tokens, just a simple name and no template.
533            AddType([])
534        return result
535
536    def DeclarationToParts(self, parts, needs_name_removed):
537        name = None
538        default = []
539        if needs_name_removed:
540            # Handle default (initial) values properly.
541            for i, t in enumerate(parts):
542                if t.name == '=':
543                    default = parts[i+1:]
544                    name = parts[i-1].name
545                    if name == ']' and parts[i-2].name == '[':
546                        name = parts[i-3].name
547                        i -= 1
548                    parts = parts[:i-1]
549                    break
550            else:
551                if parts[-1].token_type == tokenize.NAME:
552                    name = parts.pop().name
553                else:
554                    # TODO(nnorwitz): this is a hack that happens for code like
555                    # Register(Foo<T>); where it thinks this is a function call
556                    # but it's actually a declaration.
557                    name = '???'
558        modifiers = []
559        type_name = []
560        other_tokens = []
561        templated_types = []
562        i = 0
563        end = len(parts)
564        while i < end:
565            p = parts[i]
566            if keywords.IsKeyword(p.name):
567                modifiers.append(p.name)
568            elif p.name == '<':
569                templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
570                templated_types = self.ToType(templated_tokens)
571                i = new_end - 1
572                # Don't add a spurious :: to data members being initialized.
573                next_index = i + 1
574                if next_index < end and parts[next_index].name == '::':
575                    i += 1
576            elif p.name in ('[', ']', '='):
577                # These are handled elsewhere.
578                other_tokens.append(p)
579            elif p.name not in ('*', '&', '>'):
580                # Ensure that names have a space between them.
581                if (type_name and type_name[-1].token_type == tokenize.NAME and
582                    p.token_type == tokenize.NAME):
583                    type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
584                type_name.append(p)
585            else:
586                other_tokens.append(p)
587            i += 1
588        type_name = ''.join([t.name for t in type_name])
589        return name, type_name, templated_types, modifiers, default, other_tokens
590
591    def ToParameters(self, tokens):
592        if not tokens:
593            return []
594
595        result = []
596        name = type_name = ''
597        type_modifiers = []
598        pointer = reference = array = False
599        first_token = None
600        default = []
601
602        def AddParameter(end):
603            if default:
604                del default[0]  # Remove flag.
605            parts = self.DeclarationToParts(type_modifiers, True)
606            (name, type_name, templated_types, modifiers,
607             unused_default, unused_other_tokens) = parts
608            parameter_type = Type(first_token.start, first_token.end,
609                                  type_name, templated_types, modifiers,
610                                  reference, pointer, array)
611            p = Parameter(first_token.start, end, name,
612                          parameter_type, default)
613            result.append(p)
614
615        template_count = 0
616        for s in tokens:
617            if not first_token:
618                first_token = s
619            if s.name == '<':
620                template_count += 1
621            elif s.name == '>':
622                template_count -= 1
623            if template_count > 0:
624                type_modifiers.append(s)
625                continue
626
627            if s.name == ',':
628                AddParameter(s.start)
629                name = type_name = ''
630                type_modifiers = []
631                pointer = reference = array = False
632                first_token = None
633                default = []
634            elif s.name == '*':
635                pointer = True
636            elif s.name == '&':
637                reference = True
638            elif s.name == '[':
639                array = True
640            elif s.name == ']':
641                pass  # Just don't add to type_modifiers.
642            elif s.name == '=':
643                # Got a default value.  Add any value (None) as a flag.
644                default.append(None)
645            elif default:
646                default.append(s)
647            else:
648                type_modifiers.append(s)
649        AddParameter(tokens[-1].end)
650        return result
651
652    def CreateReturnType(self, return_type_seq):
653        if not return_type_seq:
654            return None
655        start = return_type_seq[0].start
656        end = return_type_seq[-1].end
657        _, name, templated_types, modifiers, default, other_tokens = \
658           self.DeclarationToParts(return_type_seq, False)
659        names = [n.name for n in other_tokens]
660        reference = '&' in names
661        pointer = '*' in names
662        array = '[' in names
663        return Type(start, end, name, templated_types, modifiers,
664                    reference, pointer, array)
665
666    def GetTemplateIndices(self, names):
667        # names is a list of strings.
668        start = names.index('<')
669        end = len(names) - 1
670        while end > 0:
671            if names[end] == '>':
672                break
673            end -= 1
674        return start, end+1
675
676class AstBuilder(object):
677    def __init__(self, token_stream, filename, in_class='', visibility=None,
678                 namespace_stack=[]):
679        self.tokens = token_stream
680        self.filename = filename
681        # TODO(nnorwitz): use a better data structure (deque) for the queue.
682        # Switching directions of the "queue" improved perf by about 25%.
683        # Using a deque should be even better since we access from both sides.
684        self.token_queue = []
685        self.namespace_stack = namespace_stack[:]
686        self.in_class = in_class
687        if in_class is None:
688            self.in_class_name_only = None
689        else:
690            self.in_class_name_only = in_class.split('::')[-1]
691        self.visibility = visibility
692        self.in_function = False
693        self.current_token = None
694        # Keep the state whether we are currently handling a typedef or not.
695        self._handling_typedef = False
696
697        self.converter = TypeConverter(self.namespace_stack)
698
699    def HandleError(self, msg, token):
700        printable_queue = list(reversed(self.token_queue[-20:]))
701        sys.stderr.write('Got %s in %s @ %s %s\n' %
702                         (msg, self.filename, token, printable_queue))
703
704    def Generate(self):
705        while 1:
706            token = self._GetNextToken()
707            if not token:
708                break
709
710            # Get the next token.
711            self.current_token = token
712
713            # Dispatch on the next token type.
714            if token.token_type == _INTERNAL_TOKEN:
715                if token.name == _NAMESPACE_POP:
716                    self.namespace_stack.pop()
717                continue
718
719            try:
720                result = self._GenerateOne(token)
721                if result is not None:
722                    yield result
723            except:
724                self.HandleError('exception', token)
725                raise
726
727    def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
728                        ref_pointer_name_seq, templated_types, value=None):
729        reference = '&' in ref_pointer_name_seq
730        pointer = '*' in ref_pointer_name_seq
731        array = '[' in ref_pointer_name_seq
732        var_type = Type(pos_token.start, pos_token.end, type_name,
733                        templated_types, type_modifiers,
734                        reference, pointer, array)
735        return VariableDeclaration(pos_token.start, pos_token.end,
736                                   name, var_type, value, self.namespace_stack)
737
738    def _GenerateOne(self, token):
739        if token.token_type == tokenize.NAME:
740            if (keywords.IsKeyword(token.name) and
741                not keywords.IsBuiltinType(token.name)):
742                method = getattr(self, 'handle_' + token.name)
743                return method()
744            elif token.name == self.in_class_name_only:
745                # The token name is the same as the class, must be a ctor if
746                # there is a paren.  Otherwise, it's the return type.
747                # Peek ahead to get the next token to figure out which.
748                next = self._GetNextToken()
749                self._AddBackToken(next)
750                if next.token_type == tokenize.SYNTAX and next.name == '(':
751                    return self._GetMethod([token], FUNCTION_CTOR, None, True)
752                # Fall through--handle like any other method.
753
754            # Handle data or function declaration/definition.
755            syntax = tokenize.SYNTAX
756            temp_tokens, last_token = \
757                self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
758            temp_tokens.insert(0, token)
759            if last_token.name == '(':
760                # If there is an assignment before the paren,
761                # this is an expression, not a method.
762                expr = bool([e for e in temp_tokens if e.name == '='])
763                if expr:
764                    new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
765                    temp_tokens.append(last_token)
766                    temp_tokens.extend(new_temp)
767                    last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
768
769            if last_token.name == '[':
770                # Handle array, this isn't a method, unless it's an operator.
771                # TODO(nnorwitz): keep the size somewhere.
772                # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
773                temp_tokens.append(last_token)
774                if temp_tokens[-2].name == 'operator':
775                    temp_tokens.append(self._GetNextToken())
776                else:
777                    temp_tokens2, last_token = \
778                        self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
779                    temp_tokens.extend(temp_tokens2)
780
781            if last_token.name == ';':
782                # Handle data, this isn't a method.
783                parts = self.converter.DeclarationToParts(temp_tokens, True)
784                (name, type_name, templated_types, modifiers, default,
785                 unused_other_tokens) = parts
786
787                t0 = temp_tokens[0]
788                names = [t.name for t in temp_tokens]
789                if templated_types:
790                    start, end = self.converter.GetTemplateIndices(names)
791                    names = names[:start] + names[end:]
792                default = ''.join([t.name for t in default])
793                return self._CreateVariable(t0, name, type_name, modifiers,
794                                            names, templated_types, default)
795            if last_token.name == '{':
796                self._AddBackTokens(temp_tokens[1:])
797                self._AddBackToken(last_token)
798                method_name = temp_tokens[0].name
799                method = getattr(self, 'handle_' + method_name, None)
800                if not method:
801                    # Must be declaring a variable.
802                    # TODO(nnorwitz): handle the declaration.
803                    return None
804                return method()
805            return self._GetMethod(temp_tokens, 0, None, False)
806        elif token.token_type == tokenize.SYNTAX:
807            if token.name == '~' and self.in_class:
808                # Must be a dtor (probably not in method body).
809                token = self._GetNextToken()
810                # self.in_class can contain A::Name, but the dtor will only
811                # be Name.  Make sure to compare against the right value.
812                if (token.token_type == tokenize.NAME and
813                    token.name == self.in_class_name_only):
814                    return self._GetMethod([token], FUNCTION_DTOR, None, True)
815            # TODO(nnorwitz): handle a lot more syntax.
816        elif token.token_type == tokenize.PREPROCESSOR:
817            # TODO(nnorwitz): handle more preprocessor directives.
818            # token starts with a #, so remove it and strip whitespace.
819            name = token.name[1:].lstrip()
820            if name.startswith('include'):
821                # Remove "include".
822                name = name[7:].strip()
823                assert name
824                # Handle #include \<newline> "header-on-second-line.h".
825                if name.startswith('\\'):
826                    name = name[1:].strip()
827                assert name[0] in '<"', token
828                assert name[-1] in '>"', token
829                system = name[0] == '<'
830                filename = name[1:-1]
831                return Include(token.start, token.end, filename, system)
832            if name.startswith('define'):
833                # Remove "define".
834                name = name[6:].strip()
835                assert name
836                value = ''
837                for i, c in enumerate(name):
838                    if c.isspace():
839                        value = name[i:].lstrip()
840                        name = name[:i]
841                        break
842                return Define(token.start, token.end, name, value)
843            if name.startswith('if') and name[2:3].isspace():
844                condition = name[3:].strip()
845                if condition.startswith('0') or condition.startswith('(0)'):
846                    self._SkipIf0Blocks()
847        return None
848
849    def _GetTokensUpTo(self, expected_token_type, expected_token):
850        return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
851
852    def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
853        last_token = self._GetNextToken()
854        tokens = []
855        while (last_token.token_type != expected_token_type or
856               last_token.name not in expected_tokens):
857            tokens.append(last_token)
858            last_token = self._GetNextToken()
859        return tokens, last_token
860
861    # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
862    def _IgnoreUpTo(self, token_type, token):
863        unused_tokens = self._GetTokensUpTo(token_type, token)
864
865    def _SkipIf0Blocks(self):
866        count = 1
867        while 1:
868            token = self._GetNextToken()
869            if token.token_type != tokenize.PREPROCESSOR:
870                continue
871
872            name = token.name[1:].lstrip()
873            if name.startswith('endif'):
874                count -= 1
875                if count == 0:
876                    break
877            elif name.startswith('if'):
878                count += 1
879
880    def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
881        if GetNextToken is None:
882            GetNextToken = self._GetNextToken
883        # Assumes the current token is open_paren and we will consume
884        # and return up to the close_paren.
885        count = 1
886        token = GetNextToken()
887        while 1:
888            if token.token_type == tokenize.SYNTAX:
889                if token.name == open_paren:
890                    count += 1
891                elif token.name == close_paren:
892                    count -= 1
893                    if count == 0:
894                        break
895            yield token
896            token = GetNextToken()
897        yield token
898
899    def _GetParameters(self):
900        return self._GetMatchingChar('(', ')')
901
902    def GetScope(self):
903        return self._GetMatchingChar('{', '}')
904
905    def _GetNextToken(self):
906        if self.token_queue:
907            return self.token_queue.pop()
908        return next(self.tokens)
909
910    def _AddBackToken(self, token):
911        if token.whence == tokenize.WHENCE_STREAM:
912            token.whence = tokenize.WHENCE_QUEUE
913            self.token_queue.insert(0, token)
914        else:
915            assert token.whence == tokenize.WHENCE_QUEUE, token
916            self.token_queue.append(token)
917
918    def _AddBackTokens(self, tokens):
919        if tokens:
920            if tokens[-1].whence == tokenize.WHENCE_STREAM:
921                for token in tokens:
922                    token.whence = tokenize.WHENCE_QUEUE
923                self.token_queue[:0] = reversed(tokens)
924            else:
925                assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
926                self.token_queue.extend(reversed(tokens))
927
928    def GetName(self, seq=None):
929        """Returns ([tokens], next_token_info)."""
930        GetNextToken = self._GetNextToken
931        if seq is not None:
932            it = iter(seq)
933            GetNextToken = lambda: next(it)
934        next_token = GetNextToken()
935        tokens = []
936        last_token_was_name = False
937        while (next_token.token_type == tokenize.NAME or
938               (next_token.token_type == tokenize.SYNTAX and
939                next_token.name in ('::', '<'))):
940            # Two NAMEs in a row means the identifier should terminate.
941            # It's probably some sort of variable declaration.
942            if last_token_was_name and next_token.token_type == tokenize.NAME:
943                break
944            last_token_was_name = next_token.token_type == tokenize.NAME
945            tokens.append(next_token)
946            # Handle templated names.
947            if next_token.name == '<':
948                tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
949                last_token_was_name = True
950            next_token = GetNextToken()
951        return tokens, next_token
952
953    def GetMethod(self, modifiers, templated_types):
954        return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
955        assert len(return_type_and_name) >= 1
956        return self._GetMethod(return_type_and_name, modifiers, templated_types,
957                               False)
958
959    def _GetMethod(self, return_type_and_name, modifiers, templated_types,
960                   get_paren):
961        template_portion = None
962        if get_paren:
963            token = self._GetNextToken()
964            assert token.token_type == tokenize.SYNTAX, token
965            if token.name == '<':
966                # Handle templatized dtors.
967                template_portion = [token]
968                template_portion.extend(self._GetMatchingChar('<', '>'))
969                token = self._GetNextToken()
970            assert token.token_type == tokenize.SYNTAX, token
971            assert token.name == '(', token
972
973        name = return_type_and_name.pop()
974        # Handle templatized ctors.
975        if name.name == '>':
976            index = 1
977            while return_type_and_name[index].name != '<':
978                index += 1
979            template_portion = return_type_and_name[index:] + [name]
980            del return_type_and_name[index:]
981            name = return_type_and_name.pop()
982        elif name.name == ']':
983            rt = return_type_and_name
984            assert rt[-1].name == '[', return_type_and_name
985            assert rt[-2].name == 'operator', return_type_and_name
986            name_seq = return_type_and_name[-2:]
987            del return_type_and_name[-2:]
988            name = tokenize.Token(tokenize.NAME, 'operator[]',
989                                  name_seq[0].start, name.end)
990            # Get the open paren so _GetParameters() below works.
991            unused_open_paren = self._GetNextToken()
992
993        # TODO(nnorwitz): store template_portion.
994        return_type = return_type_and_name
995        indices = name
996        if return_type:
997            indices = return_type[0]
998
999        # Force ctor for templatized ctors.
1000        if name.name == self.in_class and not modifiers:
1001            modifiers |= FUNCTION_CTOR
1002        parameters = list(self._GetParameters())
1003        del parameters[-1]              # Remove trailing ')'.
1004
1005        # Handling operator() is especially weird.
1006        if name.name == 'operator' and not parameters:
1007            token = self._GetNextToken()
1008            assert token.name == '(', token
1009            parameters = list(self._GetParameters())
1010            del parameters[-1]          # Remove trailing ')'.
1011
1012        token = self._GetNextToken()
1013        while token.token_type == tokenize.NAME:
1014            modifier_token = token
1015            token = self._GetNextToken()
1016            if modifier_token.name == 'const':
1017                modifiers |= FUNCTION_CONST
1018            elif modifier_token.name == '__attribute__':
1019                # TODO(nnorwitz): handle more __attribute__ details.
1020                modifiers |= FUNCTION_ATTRIBUTE
1021                assert token.name == '(', token
1022                # Consume everything between the (parens).
1023                unused_tokens = list(self._GetMatchingChar('(', ')'))
1024                token = self._GetNextToken()
1025            elif modifier_token.name == 'throw':
1026                modifiers |= FUNCTION_THROW
1027                assert token.name == '(', token
1028                # Consume everything between the (parens).
1029                unused_tokens = list(self._GetMatchingChar('(', ')'))
1030                token = self._GetNextToken()
1031            elif modifier_token.name == 'override':
1032                modifiers |= FUNCTION_OVERRIDE
1033            elif modifier_token.name == modifier_token.name.upper():
1034                # HACK(nnorwitz):  assume that all upper-case names
1035                # are some macro we aren't expanding.
1036                modifiers |= FUNCTION_UNKNOWN_ANNOTATION
1037            else:
1038                self.HandleError('unexpected token', modifier_token)
1039
1040        assert token.token_type == tokenize.SYNTAX, token
1041        # Handle ctor initializers.
1042        if token.name == ':':
1043            # TODO(nnorwitz): anything else to handle for initializer list?
1044            while token.name != ';' and token.name != '{':
1045                token = self._GetNextToken()
1046
1047        # Handle pointer to functions that are really data but look
1048        # like method declarations.
1049        if token.name == '(':
1050            if parameters[0].name == '*':
1051                # name contains the return type.
1052                name = parameters.pop()
1053                # parameters contains the name of the data.
1054                modifiers = [p.name for p in parameters]
1055                # Already at the ( to open the parameter list.
1056                function_parameters = list(self._GetMatchingChar('(', ')'))
1057                del function_parameters[-1]  # Remove trailing ')'.
1058                # TODO(nnorwitz): store the function_parameters.
1059                token = self._GetNextToken()
1060                assert token.token_type == tokenize.SYNTAX, token
1061                assert token.name == ';', token
1062                return self._CreateVariable(indices, name.name, indices.name,
1063                                            modifiers, '', None)
1064            # At this point, we got something like:
1065            #  return_type (type::*name_)(params);
1066            # This is a data member called name_ that is a function pointer.
1067            # With this code: void (sq_type::*field_)(string&);
1068            # We get: name=void return_type=[] parameters=sq_type ... field_
1069            # TODO(nnorwitz): is return_type always empty?
1070            # TODO(nnorwitz): this isn't even close to being correct.
1071            # Just put in something so we don't crash and can move on.
1072            real_name = parameters[-1]
1073            modifiers = [p.name for p in self._GetParameters()]
1074            del modifiers[-1]           # Remove trailing ')'.
1075            return self._CreateVariable(indices, real_name.name, indices.name,
1076                                        modifiers, '', None)
1077
1078        if token.name == '{':
1079            body = list(self.GetScope())
1080            del body[-1]                # Remove trailing '}'.
1081        else:
1082            body = None
1083            if token.name == '=':
1084                token = self._GetNextToken()
1085
1086                if token.name == 'default' or token.name == 'delete':
1087                    # Ignore explicitly defaulted and deleted special members
1088                    # in C++11.
1089                    token = self._GetNextToken()
1090                else:
1091                    # Handle pure-virtual declarations.
1092                    assert token.token_type == tokenize.CONSTANT, token
1093                    assert token.name == '0', token
1094                    modifiers |= FUNCTION_PURE_VIRTUAL
1095                    token = self._GetNextToken()
1096
1097            if token.name == '[':
1098                # TODO(nnorwitz): store tokens and improve parsing.
1099                # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1100                tokens = list(self._GetMatchingChar('[', ']'))
1101                token = self._GetNextToken()
1102
1103            assert token.name == ';', (token, return_type_and_name, parameters)
1104
1105        # Looks like we got a method, not a function.
1106        if len(return_type) > 2 and return_type[-1].name == '::':
1107            return_type, in_class = \
1108                         self._GetReturnTypeAndClassName(return_type)
1109            return Method(indices.start, indices.end, name.name, in_class,
1110                          return_type, parameters, modifiers, templated_types,
1111                          body, self.namespace_stack)
1112        return Function(indices.start, indices.end, name.name, return_type,
1113                        parameters, modifiers, templated_types, body,
1114                        self.namespace_stack)
1115
1116    def _GetReturnTypeAndClassName(self, token_seq):
1117        # Splitting the return type from the class name in a method
1118        # can be tricky.  For example, Return::Type::Is::Hard::To::Find().
1119        # Where is the return type and where is the class name?
1120        # The heuristic used is to pull the last name as the class name.
1121        # This includes all the templated type info.
1122        # TODO(nnorwitz): if there is only One name like in the
1123        # example above, punt and assume the last bit is the class name.
1124
1125        # Ignore a :: prefix, if exists so we can find the first real name.
1126        i = 0
1127        if token_seq[0].name == '::':
1128            i = 1
1129        # Ignore a :: suffix, if exists.
1130        end = len(token_seq) - 1
1131        if token_seq[end-1].name == '::':
1132            end -= 1
1133
1134        # Make a copy of the sequence so we can append a sentinel
1135        # value. This is required for GetName will has to have some
1136        # terminating condition beyond the last name.
1137        seq_copy = token_seq[i:end]
1138        seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
1139        names = []
1140        while i < end:
1141            # Iterate through the sequence parsing out each name.
1142            new_name, next = self.GetName(seq_copy[i:])
1143            assert new_name, 'Got empty new_name, next=%s' % next
1144            # We got a pointer or ref.  Add it to the name.
1145            if next and next.token_type == tokenize.SYNTAX:
1146                new_name.append(next)
1147            names.append(new_name)
1148            i += len(new_name)
1149
1150        # Now that we have the names, it's time to undo what we did.
1151
1152        # Remove the sentinel value.
1153        names[-1].pop()
1154        # Flatten the token sequence for the return type.
1155        return_type = [e for seq in names[:-1] for e in seq]
1156        # The class name is the last name.
1157        class_name = names[-1]
1158        return return_type, class_name
1159
1160    def handle_bool(self):
1161        pass
1162
1163    def handle_char(self):
1164        pass
1165
1166    def handle_int(self):
1167        pass
1168
1169    def handle_long(self):
1170        pass
1171
1172    def handle_short(self):
1173        pass
1174
1175    def handle_double(self):
1176        pass
1177
1178    def handle_float(self):
1179        pass
1180
1181    def handle_void(self):
1182        pass
1183
1184    def handle_wchar_t(self):
1185        pass
1186
1187    def handle_unsigned(self):
1188        pass
1189
1190    def handle_signed(self):
1191        pass
1192
1193    def _GetNestedType(self, ctor):
1194        name = None
1195        name_tokens, token = self.GetName()
1196        if name_tokens:
1197            name = ''.join([t.name for t in name_tokens])
1198
1199        # Handle forward declarations.
1200        if token.token_type == tokenize.SYNTAX and token.name == ';':
1201            return ctor(token.start, token.end, name, None,
1202                        self.namespace_stack)
1203
1204        if token.token_type == tokenize.NAME and self._handling_typedef:
1205            self._AddBackToken(token)
1206            return ctor(token.start, token.end, name, None,
1207                        self.namespace_stack)
1208
1209        # Must be the type declaration.
1210        fields = list(self._GetMatchingChar('{', '}'))
1211        del fields[-1]                  # Remove trailing '}'.
1212        if token.token_type == tokenize.SYNTAX and token.name == '{':
1213            next = self._GetNextToken()
1214            new_type = ctor(token.start, token.end, name, fields,
1215                            self.namespace_stack)
1216            # A name means this is an anonymous type and the name
1217            # is the variable declaration.
1218            if next.token_type != tokenize.NAME:
1219                return new_type
1220            name = new_type
1221            token = next
1222
1223        # Must be variable declaration using the type prefixed with keyword.
1224        assert token.token_type == tokenize.NAME, token
1225        return self._CreateVariable(token, token.name, name, [], '', None)
1226
1227    def handle_struct(self):
1228        # Special case the handling typedef/aliasing of structs here.
1229        # It would be a pain to handle in the class code.
1230        name_tokens, var_token = self.GetName()
1231        if name_tokens:
1232            next_token = self._GetNextToken()
1233            is_syntax = (var_token.token_type == tokenize.SYNTAX and
1234                         var_token.name[0] in '*&')
1235            is_variable = (var_token.token_type == tokenize.NAME and
1236                           next_token.name == ';')
1237            variable = var_token
1238            if is_syntax and not is_variable:
1239                variable = next_token
1240                temp = self._GetNextToken()
1241                if temp.token_type == tokenize.SYNTAX and temp.name == '(':
1242                    # Handle methods declared to return a struct.
1243                    t0 = name_tokens[0]
1244                    struct = tokenize.Token(tokenize.NAME, 'struct',
1245                                            t0.start-7, t0.start-2)
1246                    type_and_name = [struct]
1247                    type_and_name.extend(name_tokens)
1248                    type_and_name.extend((var_token, next_token))
1249                    return self._GetMethod(type_and_name, 0, None, False)
1250                assert temp.name == ';', (temp, name_tokens, var_token)
1251            if is_syntax or (is_variable and not self._handling_typedef):
1252                modifiers = ['struct']
1253                type_name = ''.join([t.name for t in name_tokens])
1254                position = name_tokens[0]
1255                return self._CreateVariable(position, variable.name, type_name,
1256                                            modifiers, var_token.name, None)
1257            name_tokens.extend((var_token, next_token))
1258            self._AddBackTokens(name_tokens)
1259        else:
1260            self._AddBackToken(var_token)
1261        return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
1262
1263    def handle_union(self):
1264        return self._GetNestedType(Union)
1265
1266    def handle_enum(self):
1267        return self._GetNestedType(Enum)
1268
1269    def handle_auto(self):
1270        # TODO(nnorwitz): warn about using auto?  Probably not since it
1271        # will be reclaimed and useful for C++0x.
1272        pass
1273
1274    def handle_register(self):
1275        pass
1276
1277    def handle_const(self):
1278        pass
1279
1280    def handle_inline(self):
1281        pass
1282
1283    def handle_extern(self):
1284        pass
1285
1286    def handle_static(self):
1287        pass
1288
1289    def handle_virtual(self):
1290        # What follows must be a method.
1291        token = token2 = self._GetNextToken()
1292        if token.name == 'inline':
1293            # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1294            token2 = self._GetNextToken()
1295        if token2.token_type == tokenize.SYNTAX and token2.name == '~':
1296            return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
1297        assert token.token_type == tokenize.NAME or token.name == '::', token
1298        return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')  # )
1299        return_type_and_name.insert(0, token)
1300        if token2 is not token:
1301            return_type_and_name.insert(1, token2)
1302        return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
1303                               None, False)
1304
1305    def handle_volatile(self):
1306        pass
1307
1308    def handle_mutable(self):
1309        pass
1310
1311    def handle_public(self):
1312        assert self.in_class
1313        self.visibility = VISIBILITY_PUBLIC
1314
1315    def handle_protected(self):
1316        assert self.in_class
1317        self.visibility = VISIBILITY_PROTECTED
1318
1319    def handle_private(self):
1320        assert self.in_class
1321        self.visibility = VISIBILITY_PRIVATE
1322
1323    def handle_friend(self):
1324        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1325        assert tokens
1326        t0 = tokens[0]
1327        return Friend(t0.start, t0.end, tokens, self.namespace_stack)
1328
1329    def handle_static_cast(self):
1330        pass
1331
1332    def handle_const_cast(self):
1333        pass
1334
1335    def handle_dynamic_cast(self):
1336        pass
1337
1338    def handle_reinterpret_cast(self):
1339        pass
1340
1341    def handle_new(self):
1342        pass
1343
1344    def handle_delete(self):
1345        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1346        assert tokens
1347        return Delete(tokens[0].start, tokens[0].end, tokens)
1348
1349    def handle_typedef(self):
1350        token = self._GetNextToken()
1351        if (token.token_type == tokenize.NAME and
1352            keywords.IsKeyword(token.name)):
1353            # Token must be struct/enum/union/class.
1354            method = getattr(self, 'handle_' + token.name)
1355            self._handling_typedef = True
1356            tokens = [method()]
1357            self._handling_typedef = False
1358        else:
1359            tokens = [token]
1360
1361        # Get the remainder of the typedef up to the semi-colon.
1362        tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
1363
1364        # TODO(nnorwitz): clean all this up.
1365        assert tokens
1366        name = tokens.pop()
1367        indices = name
1368        if tokens:
1369            indices = tokens[0]
1370        if not indices:
1371            indices = token
1372        if name.name == ')':
1373            # HACK(nnorwitz): Handle pointers to functions "properly".
1374            if (len(tokens) >= 4 and
1375                tokens[1].name == '(' and tokens[2].name == '*'):
1376                tokens.append(name)
1377                name = tokens[3]
1378        elif name.name == ']':
1379            # HACK(nnorwitz): Handle arrays properly.
1380            if len(tokens) >= 2:
1381                tokens.append(name)
1382                name = tokens[1]
1383        new_type = tokens
1384        if tokens and isinstance(tokens[0], tokenize.Token):
1385            new_type = self.converter.ToType(tokens)[0]
1386        return Typedef(indices.start, indices.end, name.name,
1387                       new_type, self.namespace_stack)
1388
1389    def handle_typeid(self):
1390        pass  # Not needed yet.
1391
1392    def handle_typename(self):
1393        pass  # Not needed yet.
1394
1395    def _GetTemplatedTypes(self):
1396        result = {}
1397        tokens = list(self._GetMatchingChar('<', '>'))
1398        len_tokens = len(tokens) - 1    # Ignore trailing '>'.
1399        i = 0
1400        while i < len_tokens:
1401            key = tokens[i].name
1402            i += 1
1403            if keywords.IsKeyword(key) or key == ',':
1404                continue
1405            type_name = default = None
1406            if i < len_tokens:
1407                i += 1
1408                if tokens[i-1].name == '=':
1409                    assert i < len_tokens, '%s %s' % (i, tokens)
1410                    default, unused_next_token = self.GetName(tokens[i:])
1411                    i += len(default)
1412                else:
1413                    if tokens[i-1].name != ',':
1414                        # We got something like: Type variable.
1415                        # Re-adjust the key (variable) and type_name (Type).
1416                        key = tokens[i-1].name
1417                        type_name = tokens[i-2]
1418
1419            result[key] = (type_name, default)
1420        return result
1421
1422    def handle_template(self):
1423        token = self._GetNextToken()
1424        assert token.token_type == tokenize.SYNTAX, token
1425        assert token.name == '<', token
1426        templated_types = self._GetTemplatedTypes()
1427        # TODO(nnorwitz): for now, just ignore the template params.
1428        token = self._GetNextToken()
1429        if token.token_type == tokenize.NAME:
1430            if token.name == 'class':
1431                return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
1432            elif token.name == 'struct':
1433                return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
1434            elif token.name == 'friend':
1435                return self.handle_friend()
1436        self._AddBackToken(token)
1437        tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
1438        tokens.append(last)
1439        self._AddBackTokens(tokens)
1440        if last.name == '(':
1441            return self.GetMethod(FUNCTION_NONE, templated_types)
1442        # Must be a variable definition.
1443        return None
1444
1445    def handle_true(self):
1446        pass  # Nothing to do.
1447
1448    def handle_false(self):
1449        pass  # Nothing to do.
1450
1451    def handle_asm(self):
1452        pass  # Not needed yet.
1453
1454    def handle_class(self):
1455        return self._GetClass(Class, VISIBILITY_PRIVATE, None)
1456
1457    def _GetBases(self):
1458        # Get base classes.
1459        bases = []
1460        while 1:
1461            token = self._GetNextToken()
1462            assert token.token_type == tokenize.NAME, token
1463            # TODO(nnorwitz): store kind of inheritance...maybe.
1464            if token.name not in ('public', 'protected', 'private'):
1465                # If inheritance type is not specified, it is private.
1466                # Just put the token back so we can form a name.
1467                # TODO(nnorwitz): it would be good to warn about this.
1468                self._AddBackToken(token)
1469            else:
1470                # Check for virtual inheritance.
1471                token = self._GetNextToken()
1472                if token.name != 'virtual':
1473                    self._AddBackToken(token)
1474                else:
1475                    # TODO(nnorwitz): store that we got virtual for this base.
1476                    pass
1477            base, next_token = self.GetName()
1478            bases_ast = self.converter.ToType(base)
1479            assert len(bases_ast) == 1, bases_ast
1480            bases.append(bases_ast[0])
1481            assert next_token.token_type == tokenize.SYNTAX, next_token
1482            if next_token.name == '{':
1483                token = next_token
1484                break
1485            # Support multiple inheritance.
1486            assert next_token.name == ',', next_token
1487        return bases, token
1488
1489    def _GetClass(self, class_type, visibility, templated_types):
1490        class_name = None
1491        class_token = self._GetNextToken()
1492        if class_token.token_type != tokenize.NAME:
1493            assert class_token.token_type == tokenize.SYNTAX, class_token
1494            token = class_token
1495        else:
1496            # Skip any macro (e.g. storage class specifiers) after the
1497            # 'class' keyword.
1498            next_token = self._GetNextToken()
1499            if next_token.token_type == tokenize.NAME:
1500                self._AddBackToken(next_token)
1501            else:
1502                self._AddBackTokens([class_token, next_token])
1503            name_tokens, token = self.GetName()
1504            class_name = ''.join([t.name for t in name_tokens])
1505        bases = None
1506        if token.token_type == tokenize.SYNTAX:
1507            if token.name == ';':
1508                # Forward declaration.
1509                return class_type(class_token.start, class_token.end,
1510                                  class_name, None, templated_types, None,
1511                                  self.namespace_stack)
1512            if token.name in '*&':
1513                # Inline forward declaration.  Could be method or data.
1514                name_token = self._GetNextToken()
1515                next_token = self._GetNextToken()
1516                if next_token.name == ';':
1517                    # Handle data
1518                    modifiers = ['class']
1519                    return self._CreateVariable(class_token, name_token.name,
1520                                                class_name,
1521                                                modifiers, token.name, None)
1522                else:
1523                    # Assume this is a method.
1524                    tokens = (class_token, token, name_token, next_token)
1525                    self._AddBackTokens(tokens)
1526                    return self.GetMethod(FUNCTION_NONE, None)
1527            if token.name == ':':
1528                bases, token = self._GetBases()
1529
1530        body = None
1531        if token.token_type == tokenize.SYNTAX and token.name == '{':
1532            assert token.token_type == tokenize.SYNTAX, token
1533            assert token.name == '{', token
1534
1535            ast = AstBuilder(self.GetScope(), self.filename, class_name,
1536                             visibility, self.namespace_stack)
1537            body = list(ast.Generate())
1538
1539            if not self._handling_typedef:
1540                token = self._GetNextToken()
1541                if token.token_type != tokenize.NAME:
1542                    assert token.token_type == tokenize.SYNTAX, token
1543                    assert token.name == ';', token
1544                else:
1545                    new_class = class_type(class_token.start, class_token.end,
1546                                           class_name, bases, None,
1547                                           body, self.namespace_stack)
1548
1549                    modifiers = []
1550                    return self._CreateVariable(class_token,
1551                                                token.name, new_class,
1552                                                modifiers, token.name, None)
1553        else:
1554            if not self._handling_typedef:
1555                self.HandleError('non-typedef token', token)
1556            self._AddBackToken(token)
1557
1558        return class_type(class_token.start, class_token.end, class_name,
1559                          bases, templated_types, body, self.namespace_stack)
1560
1561    def handle_namespace(self):
1562        token = self._GetNextToken()
1563        # Support anonymous namespaces.
1564        name = None
1565        if token.token_type == tokenize.NAME:
1566            name = token.name
1567            token = self._GetNextToken()
1568        self.namespace_stack.append(name)
1569        assert token.token_type == tokenize.SYNTAX, token
1570        # Create an internal token that denotes when the namespace is complete.
1571        internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
1572                                        None, None)
1573        internal_token.whence = token.whence
1574        if token.name == '=':
1575            # TODO(nnorwitz): handle aliasing namespaces.
1576            name, next_token = self.GetName()
1577            assert next_token.name == ';', next_token
1578            self._AddBackToken(internal_token)
1579        else:
1580            assert token.name == '{', token
1581            tokens = list(self.GetScope())
1582            # Replace the trailing } with the internal namespace pop token.
1583            tokens[-1] = internal_token
1584            # Handle namespace with nothing in it.
1585            self._AddBackTokens(tokens)
1586        return None
1587
1588    def handle_using(self):
1589        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1590        assert tokens
1591        return Using(tokens[0].start, tokens[0].end, tokens)
1592
1593    def handle_explicit(self):
1594        assert self.in_class
1595        # Nothing much to do.
1596        # TODO(nnorwitz): maybe verify the method name == class name.
1597        # This must be a ctor.
1598        return self.GetMethod(FUNCTION_CTOR, None)
1599
1600    def handle_this(self):
1601        pass  # Nothing to do.
1602
1603    def handle_operator(self):
1604        # Pull off the next token(s?) and make that part of the method name.
1605        pass
1606
1607    def handle_sizeof(self):
1608        pass
1609
1610    def handle_case(self):
1611        pass
1612
1613    def handle_switch(self):
1614        pass
1615
1616    def handle_default(self):
1617        token = self._GetNextToken()
1618        assert token.token_type == tokenize.SYNTAX
1619        assert token.name == ':'
1620
1621    def handle_if(self):
1622        pass
1623
1624    def handle_else(self):
1625        pass
1626
1627    def handle_return(self):
1628        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1629        if not tokens:
1630            return Return(self.current_token.start, self.current_token.end, None)
1631        return Return(tokens[0].start, tokens[0].end, tokens)
1632
1633    def handle_goto(self):
1634        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1635        assert len(tokens) == 1, str(tokens)
1636        return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
1637
1638    def handle_try(self):
1639        pass  # Not needed yet.
1640
1641    def handle_catch(self):
1642        pass  # Not needed yet.
1643
1644    def handle_throw(self):
1645        pass  # Not needed yet.
1646
1647    def handle_while(self):
1648        pass
1649
1650    def handle_do(self):
1651        pass
1652
1653    def handle_for(self):
1654        pass
1655
1656    def handle_break(self):
1657        self._IgnoreUpTo(tokenize.SYNTAX, ';')
1658
1659    def handle_continue(self):
1660        self._IgnoreUpTo(tokenize.SYNTAX, ';')
1661
1662
1663def BuilderFromSource(source, filename):
1664    """Utility method that returns an AstBuilder from source code.
1665
1666    Args:
1667      source: 'C++ source code'
1668      filename: 'file1'
1669
1670    Returns:
1671      AstBuilder
1672    """
1673    return AstBuilder(tokenize.GetTokens(source), filename)
1674
1675
1676def PrintIndentifiers(filename, should_print):
1677    """Prints all identifiers for a C++ source file.
1678
1679    Args:
1680      filename: 'file1'
1681      should_print: predicate with signature: bool Function(token)
1682    """
1683    source = utils.ReadFile(filename, False)
1684    if source is None:
1685        sys.stderr.write('Unable to find: %s\n' % filename)
1686        return
1687
1688    #print('Processing %s' % actual_filename)
1689    builder = BuilderFromSource(source, filename)
1690    try:
1691        for node in builder.Generate():
1692            if should_print(node):
1693                print(node.name)
1694    except KeyboardInterrupt:
1695        return
1696    except:
1697        pass
1698
1699
1700def PrintAllIndentifiers(filenames, should_print):
1701    """Prints all identifiers for each C++ source file in filenames.
1702
1703    Args:
1704      filenames: ['file1', 'file2', ...]
1705      should_print: predicate with signature: bool Function(token)
1706    """
1707    for path in filenames:
1708        PrintIndentifiers(path, should_print)
1709
1710
1711def main(argv):
1712    for filename in argv[1:]:
1713        source = utils.ReadFile(filename)
1714        if source is None:
1715            continue
1716
1717        print('Processing %s' % filename)
1718        builder = BuilderFromSource(source, filename)
1719        try:
1720            entire_ast = filter(None, builder.Generate())
1721        except KeyboardInterrupt:
1722            return
1723        except:
1724            # Already printed a warning, print the traceback and continue.
1725            traceback.print_exc()
1726        else:
1727            if utils.DEBUG:
1728                for ast in entire_ast:
1729                    print(ast)
1730
1731
1732if __name__ == '__main__':
1733    main(sys.argv)
1734