pytree.py 27.4 KB
Newer Older
Martin v. Löwis's avatar
Martin v. Löwis committed
1 2 3
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.

Benjamin Peterson's avatar
Benjamin Peterson committed
4 5
"""
Python parse tree definitions.
Martin v. Löwis's avatar
Martin v. Löwis committed
6 7 8 9 10 11 12 13 14

This is a very concrete parse tree; we need to keep every token and
even the comments and whitespace between tokens.

There's also a pattern matching implementation here.
"""

__author__ = "Guido van Rossum <guido@python.org>"

Benjamin Peterson's avatar
Benjamin Peterson committed
15
import sys
Benjamin Peterson's avatar
Benjamin Peterson committed
16
import warnings
Benjamin Peterson's avatar
Benjamin Peterson committed
17 18
from StringIO import StringIO

Martin v. Löwis's avatar
Martin v. Löwis committed
19 20 21

HUGE = 0x7FFFFFFF  # maximum repeat count, default max

22 23 24 25 26 27 28 29 30 31 32
_type_reprs = {}
def type_repr(type_num):
    global _type_reprs
    if not _type_reprs:
        from .pygram import python_symbols
        # printing tokens is possible but not as useful
        # from .pgen2 import token // token.__dict__.items():
        for name, val in python_symbols.__dict__.items():
            if type(val) == int: _type_reprs[val] = name
    return _type_reprs.setdefault(type_num, type_num)

Martin v. Löwis's avatar
Martin v. Löwis committed
33 34 35

class Base(object):

Benjamin Peterson's avatar
Benjamin Peterson committed
36 37
    """
    Abstract base class for Node and Leaf.
Martin v. Löwis's avatar
Martin v. Löwis committed
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

    This provides some default functionality and boilerplate using the
    template pattern.

    A node may be a subnode of at most one parent.
    """

    # Default values for instance variables
    type = None    # int: token number (< 256) or symbol number (>= 256)
    parent = None  # Parent node pointer, or None
    children = ()  # Tuple of subnodes
    was_changed = False

    def __new__(cls, *args, **kwds):
        """Constructor that prevents Base from being instantiated."""
        assert cls is not Base, "Cannot instantiate Base"
        return object.__new__(cls)

    def __eq__(self, other):
Benjamin Peterson's avatar
Benjamin Peterson committed
57 58
        """
        Compare two nodes for equality.
Martin v. Löwis's avatar
Martin v. Löwis committed
59 60 61 62 63 64 65

        This calls the method _eq().
        """
        if self.__class__ is not other.__class__:
            return NotImplemented
        return self._eq(other)

Benjamin Peterson's avatar
Benjamin Peterson committed
66 67
    __hash__ = None # For Py3 compatibility.

Martin v. Löwis's avatar
Martin v. Löwis committed
68
    def __ne__(self, other):
Benjamin Peterson's avatar
Benjamin Peterson committed
69 70
        """
        Compare two nodes for inequality.
Martin v. Löwis's avatar
Martin v. Löwis committed
71 72 73 74 75 76 77 78

        This calls the method _eq().
        """
        if self.__class__ is not other.__class__:
            return NotImplemented
        return not self._eq(other)

    def _eq(self, other):
Benjamin Peterson's avatar
Benjamin Peterson committed
79 80
        """
        Compare two nodes for equality.
Martin v. Löwis's avatar
Martin v. Löwis committed
81

Benjamin Peterson's avatar
Benjamin Peterson committed
82 83 84 85
        This is called by __eq__ and __ne__.  It is only called if the two nodes
        have the same type.  This must be implemented by the concrete subclass.
        Nodes should be considered equal if they have the same structure,
        ignoring the prefix string and other context information.
Martin v. Löwis's avatar
Martin v. Löwis committed
86 87 88 89
        """
        raise NotImplementedError

    def clone(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
90 91
        """
        Return a cloned (deep) copy of self.
Martin v. Löwis's avatar
Martin v. Löwis committed
92 93 94 95 96 97

        This must be implemented by the concrete subclass.
        """
        raise NotImplementedError

    def post_order(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
98 99
        """
        Return a post-order iterator for the tree.
Martin v. Löwis's avatar
Martin v. Löwis committed
100 101 102 103 104 105

        This must be implemented by the concrete subclass.
        """
        raise NotImplementedError

    def pre_order(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
106 107
        """
        Return a pre-order iterator for the tree.
Martin v. Löwis's avatar
Martin v. Löwis committed
108 109 110 111 112 113

        This must be implemented by the concrete subclass.
        """
        raise NotImplementedError

    def set_prefix(self, prefix):
Benjamin Peterson's avatar
Benjamin Peterson committed
114 115
        """
        Set the prefix for the node (see Leaf class).
Martin v. Löwis's avatar
Martin v. Löwis committed
116

Benjamin Peterson's avatar
Benjamin Peterson committed
117
        DEPRECATED; use the prefix property directly.
Martin v. Löwis's avatar
Martin v. Löwis committed
118
        """
Benjamin Peterson's avatar
Benjamin Peterson committed
119 120 121
        warnings.warn("set_prefix() is deprecated; use the prefix property",
                      DeprecationWarning, stacklevel=2)
        self.prefix = prefix
Martin v. Löwis's avatar
Martin v. Löwis committed
122 123

    def get_prefix(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
124 125
        """
        Return the prefix for the node (see Leaf class).
Martin v. Löwis's avatar
Martin v. Löwis committed
126

Benjamin Peterson's avatar
Benjamin Peterson committed
127
        DEPRECATED; use the prefix property directly.
Martin v. Löwis's avatar
Martin v. Löwis committed
128
        """
Benjamin Peterson's avatar
Benjamin Peterson committed
129 130 131
        warnings.warn("get_prefix() is deprecated; use the prefix property",
                      DeprecationWarning, stacklevel=2)
        return self.prefix
Martin v. Löwis's avatar
Martin v. Löwis committed
132 133

    def replace(self, new):
Benjamin Peterson's avatar
Benjamin Peterson committed
134
        """Replace this node with a new one in the parent."""
Martin v. Löwis's avatar
Martin v. Löwis committed
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
        assert self.parent is not None, str(self)
        assert new is not None
        if not isinstance(new, list):
            new = [new]
        l_children = []
        found = False
        for ch in self.parent.children:
            if ch is self:
                assert not found, (self.parent.children, self, new)
                if new is not None:
                    l_children.extend(new)
                found = True
            else:
                l_children.append(ch)
        assert found, (self.children, self, new)
        self.parent.changed()
        self.parent.children = l_children
        for x in new:
            x.parent = self.parent
        self.parent = None

    def get_lineno(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
157
        """Return the line number which generated the invocant node."""
Martin v. Löwis's avatar
Martin v. Löwis committed
158 159 160 161 162 163 164 165 166 167 168 169 170
        node = self
        while not isinstance(node, Leaf):
            if not node.children:
                return
            node = node.children[0]
        return node.lineno

    def changed(self):
        if self.parent:
            self.parent.changed()
        self.was_changed = True

    def remove(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
171 172 173 174
        """
        Remove the node from the tree. Returns the position of the node in its
        parent's children before it was removed.
        """
Martin v. Löwis's avatar
Martin v. Löwis committed
175 176 177 178 179 180 181 182
        if self.parent:
            for i, node in enumerate(self.parent.children):
                if node is self:
                    self.parent.changed()
                    del self.parent.children[i]
                    self.parent = None
                    return i

Benjamin Peterson's avatar
Benjamin Peterson committed
183 184 185 186 187 188
    @property
    def next_sibling(self):
        """
        The node immediately following the invocant in their parent's children
        list. If the invocant does not have a next sibling, it is None
        """
Martin v. Löwis's avatar
Martin v. Löwis committed
189 190 191 192
        if self.parent is None:
            return None

        # Can't use index(); we need to test by identity
193 194
        for i, child in enumerate(self.parent.children):
            if child is self:
Martin v. Löwis's avatar
Martin v. Löwis committed
195 196 197 198 199
                try:
                    return self.parent.children[i+1]
                except IndexError:
                    return None

Benjamin Peterson's avatar
Benjamin Peterson committed
200 201 202 203 204 205
    @property
    def prev_sibling(self):
        """
        The node immediately preceding the invocant in their parent's children
        list. If the invocant does not have a previous sibling, it is None.
        """
206 207 208 209 210 211 212 213 214 215
        if self.parent is None:
            return None

        # Can't use index(); we need to test by identity
        for i, child in enumerate(self.parent.children):
            if child is self:
                if i == 0:
                    return None
                return self.parent.children[i-1]

Martin v. Löwis's avatar
Martin v. Löwis committed
216
    def get_suffix(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
217 218
        """
        Return the string immediately following the invocant node. This is
Benjamin Peterson's avatar
Benjamin Peterson committed
219
        effectively equivalent to node.next_sibling.prefix
Benjamin Peterson's avatar
Benjamin Peterson committed
220 221
        """
        next_sib = self.next_sibling
Martin v. Löwis's avatar
Martin v. Löwis committed
222
        if next_sib is None:
223
            return u""
Benjamin Peterson's avatar
Benjamin Peterson committed
224
        return next_sib.prefix
Martin v. Löwis's avatar
Martin v. Löwis committed
225

226 227 228 229
    if sys.version_info < (3, 0):
        def __str__(self):
            return unicode(self).encode("ascii")

Martin v. Löwis's avatar
Martin v. Löwis committed
230 231 232 233 234 235

class Node(Base):

    """Concrete implementation for interior nodes."""

    def __init__(self, type, children, context=None, prefix=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
236 237
        """
        Initializer.
Martin v. Löwis's avatar
Martin v. Löwis committed
238 239 240 241 242 243 244 245 246 247 248 249 250

        Takes a type constant (a symbol number >= 256), a sequence of
        child nodes, and an optional context keyword argument.

        As a side effect, the parent pointers of the children are updated.
        """
        assert type >= 256, type
        self.type = type
        self.children = list(children)
        for ch in self.children:
            assert ch.parent is None, repr(ch)
            ch.parent = self
        if prefix is not None:
Benjamin Peterson's avatar
Benjamin Peterson committed
251
            self.prefix = prefix
Martin v. Löwis's avatar
Martin v. Löwis committed
252 253

    def __repr__(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
254
        """Return a canonical string representation."""
255 256
        return "%s(%s, %r)" % (self.__class__.__name__,
                               type_repr(self.type),
Martin v. Löwis's avatar
Martin v. Löwis committed
257 258
                               self.children)

259
    def __unicode__(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
260 261
        """
        Return a pretty string representation.
Martin v. Löwis's avatar
Martin v. Löwis committed
262 263 264

        This reproduces the input source exactly.
        """
265 266 267 268
        return u"".join(map(unicode, self.children))

    if sys.version_info > (3, 0):
        __str__ = __unicode__
Martin v. Löwis's avatar
Martin v. Löwis committed
269 270

    def _eq(self, other):
Benjamin Peterson's avatar
Benjamin Peterson committed
271
        """Compare two nodes for equality."""
Martin v. Löwis's avatar
Martin v. Löwis committed
272 273 274
        return (self.type, self.children) == (other.type, other.children)

    def clone(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
275
        """Return a cloned (deep) copy of self."""
Martin v. Löwis's avatar
Martin v. Löwis committed
276 277 278
        return Node(self.type, [ch.clone() for ch in self.children])

    def post_order(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
279
        """Return a post-order iterator for the tree."""
Martin v. Löwis's avatar
Martin v. Löwis committed
280 281 282 283 284 285
        for child in self.children:
            for node in child.post_order():
                yield node
        yield self

    def pre_order(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
286
        """Return a pre-order iterator for the tree."""
Martin v. Löwis's avatar
Martin v. Löwis committed
287 288 289 290 291
        yield self
        for child in self.children:
            for node in child.post_order():
                yield node

Benjamin Peterson's avatar
Benjamin Peterson committed
292
    def _prefix_getter(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
293
        """
Benjamin Peterson's avatar
Benjamin Peterson committed
294
        The whitespace and comments preceding this node in the input.
Martin v. Löwis's avatar
Martin v. Löwis committed
295 296 297
        """
        if not self.children:
            return ""
Benjamin Peterson's avatar
Benjamin Peterson committed
298 299
        return self.children[0].prefix

Benjamin Peterson's avatar
Benjamin Peterson committed
300
    def _prefix_setter(self, prefix):
Benjamin Peterson's avatar
Benjamin Peterson committed
301 302
        if self.children:
            self.children[0].prefix = prefix
Martin v. Löwis's avatar
Martin v. Löwis committed
303

Benjamin Peterson's avatar
Benjamin Peterson committed
304 305
    prefix = property(_prefix_getter, _prefix_setter)

Martin v. Löwis's avatar
Martin v. Löwis committed
306
    def set_child(self, i, child):
Benjamin Peterson's avatar
Benjamin Peterson committed
307 308 309 310
        """
        Equivalent to 'node.children[i] = child'. This method also sets the
        child's parent attribute appropriately.
        """
Martin v. Löwis's avatar
Martin v. Löwis committed
311 312 313
        child.parent = self
        self.children[i].parent = None
        self.children[i] = child
Benjamin Peterson's avatar
Benjamin Peterson committed
314
        self.changed()
Martin v. Löwis's avatar
Martin v. Löwis committed
315 316

    def insert_child(self, i, child):
Benjamin Peterson's avatar
Benjamin Peterson committed
317 318 319 320
        """
        Equivalent to 'node.children.insert(i, child)'. This method also sets
        the child's parent attribute appropriately.
        """
Martin v. Löwis's avatar
Martin v. Löwis committed
321 322
        child.parent = self
        self.children.insert(i, child)
Benjamin Peterson's avatar
Benjamin Peterson committed
323
        self.changed()
Martin v. Löwis's avatar
Martin v. Löwis committed
324 325

    def append_child(self, child):
Benjamin Peterson's avatar
Benjamin Peterson committed
326 327 328 329
        """
        Equivalent to 'node.children.append(child)'. This method also sets the
        child's parent attribute appropriately.
        """
Martin v. Löwis's avatar
Martin v. Löwis committed
330 331
        child.parent = self
        self.children.append(child)
Benjamin Peterson's avatar
Benjamin Peterson committed
332
        self.changed()
Martin v. Löwis's avatar
Martin v. Löwis committed
333 334 335 336 337 338 339


class Leaf(Base):

    """Concrete implementation for leaf nodes."""

    # Default values for instance variables
Benjamin Peterson's avatar
Benjamin Peterson committed
340 341 342
    _prefix = ""  # Whitespace and comments preceding this token in the input
    lineno = 0    # Line where this token starts in the input
    column = 0    # Column where this token tarts in the input
Martin v. Löwis's avatar
Martin v. Löwis committed
343 344

    def __init__(self, type, value, context=None, prefix=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
345 346
        """
        Initializer.
Martin v. Löwis's avatar
Martin v. Löwis committed
347

Benjamin Peterson's avatar
Benjamin Peterson committed
348 349
        Takes a type constant (a token number < 256), a string value, and an
        optional context keyword argument.
Martin v. Löwis's avatar
Martin v. Löwis committed
350 351 352
        """
        assert 0 <= type < 256, type
        if context is not None:
Benjamin Peterson's avatar
Benjamin Peterson committed
353
            self._prefix, (self.lineno, self.column) = context
Martin v. Löwis's avatar
Martin v. Löwis committed
354 355 356
        self.type = type
        self.value = value
        if prefix is not None:
Benjamin Peterson's avatar
Benjamin Peterson committed
357
            self._prefix = prefix
Martin v. Löwis's avatar
Martin v. Löwis committed
358 359

    def __repr__(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
360
        """Return a canonical string representation."""
Martin v. Löwis's avatar
Martin v. Löwis committed
361 362 363 364
        return "%s(%r, %r)" % (self.__class__.__name__,
                               self.type,
                               self.value)

365
    def __unicode__(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
366 367
        """
        Return a pretty string representation.
Martin v. Löwis's avatar
Martin v. Löwis committed
368 369 370

        This reproduces the input source exactly.
        """
371 372 373 374
        return self.prefix + unicode(self.value)

    if sys.version_info > (3, 0):
        __str__ = __unicode__
Martin v. Löwis's avatar
Martin v. Löwis committed
375 376

    def _eq(self, other):
Benjamin Peterson's avatar
Benjamin Peterson committed
377
        """Compare two nodes for equality."""
Martin v. Löwis's avatar
Martin v. Löwis committed
378 379 380
        return (self.type, self.value) == (other.type, other.value)

    def clone(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
381
        """Return a cloned (deep) copy of self."""
Martin v. Löwis's avatar
Martin v. Löwis committed
382 383 384 385
        return Leaf(self.type, self.value,
                    (self.prefix, (self.lineno, self.column)))

    def post_order(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
386
        """Return a post-order iterator for the tree."""
Martin v. Löwis's avatar
Martin v. Löwis committed
387 388 389
        yield self

    def pre_order(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
390
        """Return a pre-order iterator for the tree."""
Martin v. Löwis's avatar
Martin v. Löwis committed
391 392
        yield self

Benjamin Peterson's avatar
Benjamin Peterson committed
393
    def _prefix_getter(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
394 395 396 397
        """
        The whitespace and comments preceding this token in the input.
        """
        return self._prefix
Martin v. Löwis's avatar
Martin v. Löwis committed
398

Benjamin Peterson's avatar
Benjamin Peterson committed
399
    def _prefix_setter(self, prefix):
Benjamin Peterson's avatar
Benjamin Peterson committed
400 401
        self.changed()
        self._prefix = prefix
Martin v. Löwis's avatar
Martin v. Löwis committed
402

Benjamin Peterson's avatar
Benjamin Peterson committed
403
    prefix = property(_prefix_getter, _prefix_setter)
Martin v. Löwis's avatar
Martin v. Löwis committed
404 405

def convert(gr, raw_node):
Benjamin Peterson's avatar
Benjamin Peterson committed
406 407
    """
    Convert raw node information to a Node or Leaf instance.
Martin v. Löwis's avatar
Martin v. Löwis committed
408

Benjamin Peterson's avatar
Benjamin Peterson committed
409 410 411
    This is passed to the parser driver which calls it whenever a reduction of a
    grammar rule produces a new complete node, so that the tree is build
    strictly bottom-up.
Martin v. Löwis's avatar
Martin v. Löwis committed
412 413 414 415 416 417 418 419 420 421 422 423 424 425
    """
    type, value, context, children = raw_node
    if children or type in gr.number2symbol:
        # If there's exactly one child, return that child instead of
        # creating a new node.
        if len(children) == 1:
            return children[0]
        return Node(type, children, context=context)
    else:
        return Leaf(type, value, context=context)


class BasePattern(object):

Benjamin Peterson's avatar
Benjamin Peterson committed
426 427
    """
    A pattern is a tree matching pattern.
Martin v. Löwis's avatar
Martin v. Löwis committed
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450

    It looks for a specific node type (token or symbol), and
    optionally for a specific content.

    This is an abstract base class.  There are three concrete
    subclasses:

    - LeafPattern matches a single leaf node;
    - NodePattern matches a single node (usually non-leaf);
    - WildcardPattern matches a sequence of nodes of variable length.
    """

    # Defaults for instance variables
    type = None     # Node type (token if < 256, symbol if >= 256)
    content = None  # Optional content matching pattern
    name = None     # Optional name used to store match in results dict

    def __new__(cls, *args, **kwds):
        """Constructor that prevents BasePattern from being instantiated."""
        assert cls is not BasePattern, "Cannot instantiate BasePattern"
        return object.__new__(cls)

    def __repr__(self):
451
        args = [type_repr(self.type), self.content, self.name]
Martin v. Löwis's avatar
Martin v. Löwis committed
452 453 454 455 456
        while args and args[-1] is None:
            del args[-1]
        return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))

    def optimize(self):
Benjamin Peterson's avatar
Benjamin Peterson committed
457 458
        """
        A subclass can define this as a hook for optimizations.
Martin v. Löwis's avatar
Martin v. Löwis committed
459 460 461 462 463 464

        Returns either self or another node with the same effect.
        """
        return self

    def match(self, node, results=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
465 466
        """
        Does this pattern exactly match a node?
Martin v. Löwis's avatar
Martin v. Löwis committed
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489

        Returns True if it matches, False if not.

        If results is not None, it must be a dict which will be
        updated with the nodes matching named subpatterns.

        Default implementation for non-wildcard patterns.
        """
        if self.type is not None and node.type != self.type:
            return False
        if self.content is not None:
            r = None
            if results is not None:
                r = {}
            if not self._submatch(node, r):
                return False
            if r:
                results.update(r)
        if results is not None and self.name:
            results[self.name] = node
        return True

    def match_seq(self, nodes, results=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
490 491
        """
        Does this pattern exactly match a sequence of nodes?
Martin v. Löwis's avatar
Martin v. Löwis committed
492 493 494 495 496 497 498 499

        Default implementation for non-wildcard patterns.
        """
        if len(nodes) != 1:
            return False
        return self.match(nodes[0], results)

    def generate_matches(self, nodes):
Benjamin Peterson's avatar
Benjamin Peterson committed
500 501
        """
        Generator yielding all matches for this pattern.
Martin v. Löwis's avatar
Martin v. Löwis committed
502 503 504 505 506 507 508 509 510 511 512

        Default implementation for non-wildcard patterns.
        """
        r = {}
        if nodes and self.match(nodes[0], r):
            yield 1, r


class LeafPattern(BasePattern):

    def __init__(self, type=None, content=None, name=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
513 514
        """
        Initializer.  Takes optional type, content, and name.
Martin v. Löwis's avatar
Martin v. Löwis committed
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538

        The type, if given must be a token type (< 256).  If not given,
        this matches any *leaf* node; the content may still be required.

        The content, if given, must be a string.

        If a name is given, the matching node is stored in the results
        dict under that key.
        """
        if type is not None:
            assert 0 <= type < 256, type
        if content is not None:
            assert isinstance(content, basestring), repr(content)
        self.type = type
        self.content = content
        self.name = name

    def match(self, node, results=None):
        """Override match() to insist on a leaf node."""
        if not isinstance(node, Leaf):
            return False
        return BasePattern.match(self, node, results)

    def _submatch(self, node, results=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
539 540
        """
        Match the pattern's content to the node's children.
Martin v. Löwis's avatar
Martin v. Löwis committed
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558

        This assumes the node type matches and self.content is not None.

        Returns True if it matches, False if not.

        If results is not None, it must be a dict which will be
        updated with the nodes matching named subpatterns.

        When returning False, the results dict may still be updated.
        """
        return self.content == node.value


class NodePattern(BasePattern):

    wildcards = False

    def __init__(self, type=None, content=None, name=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
559 560
        """
        Initializer.  Takes optional type, content, and name.
Martin v. Löwis's avatar
Martin v. Löwis committed
561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587

        The type, if given, must be a symbol type (>= 256).  If the
        type is None this matches *any* single node (leaf or not),
        except if content is not None, in which it only matches
        non-leaf nodes that also match the content pattern.

        The content, if not None, must be a sequence of Patterns that
        must match the node's children exactly.  If the content is
        given, the type must not be None.

        If a name is given, the matching node is stored in the results
        dict under that key.
        """
        if type is not None:
            assert type >= 256, type
        if content is not None:
            assert not isinstance(content, basestring), repr(content)
            content = list(content)
            for i, item in enumerate(content):
                assert isinstance(item, BasePattern), (i, item)
                if isinstance(item, WildcardPattern):
                    self.wildcards = True
        self.type = type
        self.content = content
        self.name = name

    def _submatch(self, node, results=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
588 589
        """
        Match the pattern's content to the node's children.
Martin v. Löwis's avatar
Martin v. Löwis committed
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616

        This assumes the node type matches and self.content is not None.

        Returns True if it matches, False if not.

        If results is not None, it must be a dict which will be
        updated with the nodes matching named subpatterns.

        When returning False, the results dict may still be updated.
        """
        if self.wildcards:
            for c, r in generate_matches(self.content, node.children):
                if c == len(node.children):
                    if results is not None:
                        results.update(r)
                    return True
            return False
        if len(self.content) != len(node.children):
            return False
        for subpattern, child in zip(self.content, node.children):
            if not subpattern.match(child, results):
                return False
        return True


class WildcardPattern(BasePattern):

Benjamin Peterson's avatar
Benjamin Peterson committed
617 618
    """
    A wildcard pattern can match zero or more nodes.
Martin v. Löwis's avatar
Martin v. Löwis committed
619 620 621 622 623 624 625 626 627 628 629

    This has all the flexibility needed to implement patterns like:

    .*      .+      .?      .{m,n}
    (a b c | d e | f)
    (...)*  (...)+  (...)?  (...){m,n}

    except it always uses non-greedy matching.
    """

    def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
630 631
        """
        Initializer.
Martin v. Löwis's avatar
Martin v. Löwis committed
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698

        Args:
            content: optional sequence of subsequences of patterns;
                     if absent, matches one node;
                     if present, each subsequence is an alternative [*]
            min: optinal minumum number of times to match, default 0
            max: optional maximum number of times tro match, default HUGE
            name: optional name assigned to this match

        [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
            equivalent to (a b c | d e | f g h); if content is None,
            this is equivalent to '.' in regular expression terms.
            The min and max parameters work as follows:
                min=0, max=maxint: .*
                min=1, max=maxint: .+
                min=0, max=1: .?
                min=1, max=1: .
            If content is not None, replace the dot with the parenthesized
            list of alternatives, e.g. (a b c | d e | f g h)*
        """
        assert 0 <= min <= max <= HUGE, (min, max)
        if content is not None:
            content = tuple(map(tuple, content))  # Protect against alterations
            # Check sanity of alternatives
            assert len(content), repr(content)  # Can't have zero alternatives
            for alt in content:
                assert len(alt), repr(alt) # Can have empty alternatives
        self.content = content
        self.min = min
        self.max = max
        self.name = name

    def optimize(self):
        """Optimize certain stacked wildcard patterns."""
        subpattern = None
        if (self.content is not None and
            len(self.content) == 1 and len(self.content[0]) == 1):
            subpattern = self.content[0][0]
        if self.min == 1 and self.max == 1:
            if self.content is None:
                return NodePattern(name=self.name)
            if subpattern is not None and  self.name == subpattern.name:
                return subpattern.optimize()
        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
            subpattern.min <= 1 and self.name == subpattern.name):
            return WildcardPattern(subpattern.content,
                                   self.min*subpattern.min,
                                   self.max*subpattern.max,
                                   subpattern.name)
        return self

    def match(self, node, results=None):
        """Does this pattern exactly match a node?"""
        return self.match_seq([node], results)

    def match_seq(self, nodes, results=None):
        """Does this pattern exactly match a sequence of nodes?"""
        for c, r in self.generate_matches(nodes):
            if c == len(nodes):
                if results is not None:
                    results.update(r)
                    if self.name:
                        results[self.name] = list(nodes)
                return True
        return False

    def generate_matches(self, nodes):
Benjamin Peterson's avatar
Benjamin Peterson committed
699 700
        """
        Generator yielding matches for a sequence of nodes.
Martin v. Löwis's avatar
Martin v. Löwis committed
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716

        Args:
            nodes: sequence of nodes

        Yields:
            (count, results) tuples where:
            count: the match comprises nodes[:count];
            results: dict containing named submatches.
        """
        if self.content is None:
            # Shortcut for special case (see __init__.__doc__)
            for count in xrange(self.min, 1 + min(len(nodes), self.max)):
                r = {}
                if self.name:
                    r[self.name] = nodes[:count]
                yield count, r
717 718
        elif self.name == "bare_name":
            yield self._bare_name_matches(nodes)
Martin v. Löwis's avatar
Martin v. Löwis committed
719
        else:
Benjamin Peterson's avatar
Benjamin Peterson committed
720 721 722 723 724
            # The reason for this is that hitting the recursion limit usually
            # results in some ugly messages about how RuntimeErrors are being
            # ignored.
            save_stderr = sys.stderr
            sys.stderr = StringIO()
725 726 727 728 729 730 731 732 733 734 735 736
            try:
                for count, r in self._recursive_matches(nodes, 0):
                    if self.name:
                        r[self.name] = nodes[:count]
                    yield count, r
            except RuntimeError:
                # We fall back to the iterative pattern matching scheme if the recursive
                # scheme hits the recursion limit.
                for count, r in self._iterative_matches(nodes):
                    if self.name:
                        r[self.name] = nodes[:count]
                    yield count, r
Benjamin Peterson's avatar
Benjamin Peterson committed
737 738
            finally:
                sys.stderr = save_stderr
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767

    def _iterative_matches(self, nodes):
        """Helper to iteratively yield the matches."""
        nodelen = len(nodes)
        if 0 >= self.min:
            yield 0, {}

        results = []
        # generate matches that use just one alt from self.content
        for alt in self.content:
            for c, r in generate_matches(alt, nodes):
                yield c, r
                results.append((c, r))

        # for each match, iterate down the nodes
        while results:
            new_results = []
            for c0, r0 in results:
                # stop if the entire set of nodes has been matched
                if c0 < nodelen and c0 <= self.max:
                    for alt in self.content:
                        for c1, r1 in generate_matches(alt, nodes[c0:]):
                            if c1 > 0:
                                r = {}
                                r.update(r0)
                                r.update(r1)
                                yield c0 + c1, r
                                new_results.append((c0 + c1, r))
            results = new_results
Martin v. Löwis's avatar
Martin v. Löwis committed
768

769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
    def _bare_name_matches(self, nodes):
        """Special optimized matcher for bare_name."""
        count = 0
        r = {}
        done = False
        max = len(nodes)
        while not done and count < max:
            done = True
            for leaf in self.content:
                if leaf[0].match(nodes[count], r):
                    count += 1
                    done = False
                    break
        r[self.name] = nodes[:count]
        return count, r

Martin v. Löwis's avatar
Martin v. Löwis committed
785 786 787 788
    def _recursive_matches(self, nodes, count):
        """Helper to recursively yield the matches."""
        assert self.content is not None
        if count >= self.min:
789
            yield 0, {}
Martin v. Löwis's avatar
Martin v. Löwis committed
790 791 792 793 794 795 796 797 798 799 800 801 802
        if count < self.max:
            for alt in self.content:
                for c0, r0 in generate_matches(alt, nodes):
                    for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
                        r = {}
                        r.update(r0)
                        r.update(r1)
                        yield c0 + c1, r


class NegatedPattern(BasePattern):

    def __init__(self, content=None):
Benjamin Peterson's avatar
Benjamin Peterson committed
803 804
        """
        Initializer.
Martin v. Löwis's avatar
Martin v. Löwis committed
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835

        The argument is either a pattern or None.  If it is None, this
        only matches an empty sequence (effectively '$' in regex
        lingo).  If it is not None, this matches whenever the argument
        pattern doesn't have any matches.
        """
        if content is not None:
            assert isinstance(content, BasePattern), repr(content)
        self.content = content

    def match(self, node):
        # We never match a node in its entirety
        return False

    def match_seq(self, nodes):
        # We only match an empty sequence of nodes in its entirety
        return len(nodes) == 0

    def generate_matches(self, nodes):
        if self.content is None:
            # Return a match if there is an empty sequence
            if len(nodes) == 0:
                yield 0, {}
        else:
            # Return a match if the argument pattern has no matches
            for c, r in self.content.generate_matches(nodes):
                return
            yield 0, {}


def generate_matches(patterns, nodes):
Benjamin Peterson's avatar
Benjamin Peterson committed
836 837
    """
    Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwis's avatar
Martin v. Löwis committed
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860

    Args:
        patterns: a sequence of patterns
        nodes: a sequence of nodes

    Yields:
        (count, results) tuples where:
        count: the entire sequence of patterns matches nodes[:count];
        results: dict containing named submatches.
        """
    if not patterns:
        yield 0, {}
    else:
        p, rest = patterns[0], patterns[1:]
        for c0, r0 in p.generate_matches(nodes):
            if not rest:
                yield c0, r0
            else:
                for c1, r1 in generate_matches(rest, nodes[c0:]):
                    r = {}
                    r.update(r0)
                    r.update(r1)
                    yield c0 + c1, r