Merge pull request #40 from berkerpeksag/pm_develop

pm_develop contains all the work I have been doing over the last couple of weeks

Merge pull request #40 from berkerpeksag/pm_develop
pm_develop contains all the work I have been doing over the last couple of weeks
c51d586a · Berker Peksag · c504f6e8 · 5d004635 · c51d586a · c51d586a
Kaydet (Commit) c51d586a authored May 26, 2015 tarafından Berker Peksag
15 changed files
--- a/README.rst
+++ b/README.rst
@@ -15,10 +15,13 @@ astor is designed to allow easy manipulation of Python source via the AST.

 There are some other similar libraries, but astor focuses on the following areas:

- Round-trip back to Python via Armin Ronacher's codegen.py module:
+- Round-trip an AST back to Python:

-  - Modified AST doesn't need linenumbers, ctx, etc. or otherwise be directly compileable
+  - Modified AST doesn't need linenumbers, ctx, etc. or otherwise
+    be directly compileable for the round-trip to work.
  - Easy to read generated code as, well, code
+  - Can round-trip two different source trees to compare for functional
+    differences, using the astor.rtrip tool (for example, after PEP8 edits).

 - Dump pretty-printing of AST


--- a/astor/__init__.py
+++ b/astor/__init__.py
@@ -9,8 +9,6 @@ Copyright 2013 (c) Berker Peksag

 """

-__version__ = '0.6'
-
 from .code_gen import to_source  # NOQA
 from .node_util import iter_node, strip_tree, dump_tree
 from .node_util import ExplicitNodeVisitor
@@ -19,8 +17,10 @@ from .op_util import get_op_symbol, get_op_precedence  # NOQA
 from .op_util import symbol_data
 from .tree_walk import TreeWalk  # NOQA

+__version__ = '0.6'
+

-#DEPRECATED!!!
+# DEPRECATED!!!

 # These aliases support old programs.  Please do not use in future.

@@ -30,7 +30,7 @@ from .tree_walk import TreeWalk  # NOQA
 #      things could be accessed from their submodule.


-get_boolop = get_binop = get_cmpop = get_unaryop =  get_op_symbol # NOQA
+get_boolop = get_binop = get_cmpop = get_unaryop = get_op_symbol  # NOQA
 get_anyop = get_op_symbol
 parsefile = code_to_ast.parse_file
 codetoast = code_to_ast

--- a/astor/code_gen.py
+++ b/astor/code_gen.py
--- a/astor/file_util.py
+++ b/astor/file_util.py
@@ -4,8 +4,8 @@ Part of the astor library for Python AST manipulation.

 License: 3-clause BSD

-Copyright 2012-2015 (c) Patrick Maupin
-Copyright 2013-2015 (c) Berker Peksag
+Copyright (c) 2012-2015 Patrick Maupin
+Copyright (c) 2013-2015 Berker Peksag

 Functions that interact with the filesystem go here.

@@ -37,6 +37,8 @@ class CodeToAst(object):
        designed to be used in code that uses this class.
        """

+        if not os.path.isdir(srctree):
+            yield os.path.split(srctree)
        for srcpath, _, fnames in os.walk(srctree):
            # Avoid infinite recursion for silly users
            if ignore is not None and ignore in srcpath:
@@ -52,14 +54,19 @@ class CodeToAst(object):

            TODO: Handle encodings other than the default (issue #26)
        """
-        with open(fname, 'r') as f:
-            fstr = f.read()
+        try:
+            with open(fname, 'r') as f:
+                fstr = f.read()
+        except IOError:
+            if fname != 'stdin':
+                raise
+            sys.stdout.write('\nReading from stdin:\n\n')
+            fstr = sys.stdin.read()
        fstr = fstr.replace('\r\n', '\n').replace('\r', '\n')
        if not fstr.endswith('\n'):
            fstr += '\n'
        return ast.parse(fstr, filename=fname)

-
    @staticmethod
    def get_file_info(codeobj):
        """Returns the file and line number of a code object.

--- a/astor/node_util.py
+++ b/astor/node_util.py
@@ -53,10 +53,10 @@ def iter_node(node, name='', unknown=None,


 def dump_tree(node, name=None, initial_indent='', indentation='    ',
-         maxline=120, maxmerged=80,
-         #Runtime optimization
-         iter_node=iter_node, special=ast.AST,
-         list=list, isinstance=isinstance, type=type, len=len):
+              maxline=120, maxmerged=80,
+              # Runtime optimization
+              iter_node=iter_node, special=ast.AST,
+              list=list, isinstance=isinstance, type=type, len=len):
    """Dumps an AST or similar structure:

       - Pretty-prints with indentation
@@ -87,9 +87,9 @@ def dump_tree(node, name=None, initial_indent='', indentation='    ',


 def strip_tree(node,
-         #Runtime optimization
-         iter_node=iter_node, special=ast.AST,
-         list=list, isinstance=isinstance, type=type, len=len):
+               # Runtime optimization
+               iter_node=iter_node, special=ast.AST,
+               list=list, isinstance=isinstance, type=type, len=len):
    """Strips an AST by removing all attributes not in _fields.

    Returns a set of the names of all attributes stripped.
@@ -97,6 +97,7 @@ def strip_tree(node,
    This canonicalizes two trees for comparison purposes.
    """
    stripped = set()
+
    def strip(node, indent):
        unknown = set()
        leaf = True
@@ -134,3 +135,31 @@ class ExplicitNodeVisitor(ast.NodeVisitor):
        method = 'visit_' + node.__class__.__name__
        visitor = getattr(self, method, abort)
        return visitor(node)
+
+
+def allow_ast_comparison():
+    """This ugly little monkey-patcher adds in a helper class
+    to all the AST node types.  This helper class allows
+    eq/ne comparisons to work, so that entire trees can
+    be easily compared by Python's comparison machinery.
+    Used by the anti8 functions to compare old and new ASTs.
+    Could also be used by the test library.
+
+
+    """
+
+    class CompareHelper(object):
+        def __eq__(self, other):
+            return type(self) == type(other) and vars(self) == vars(other)
+
+        def __ne__(self, other):
+            return type(self) != type(other) or vars(self) != vars(other)
+
+    for item in vars(ast).values():
+        if type(item) != type:
+            continue
+        if issubclass(item, ast.AST):
+            try:
+                item.__bases__ = tuple(list(item.__bases__) + [CompareHelper])
+            except TypeError:
+                pass
--- a/astor/op_util.py
+++ b/astor/op_util.py
@@ -4,7 +4,7 @@ Part of the astor library for Python AST manipulation.

 License: 3-clause BSD

-Copyright (c) 2012-2015 Patrick Maupin
+Copyright (c) 2015 Patrick Maupin

 This module provides data and functions for mapping
 AST nodes to symbols and precedences.
@@ -14,48 +14,91 @@ AST nodes to symbols and precedences.
 import ast

 op_data = """
-              Or   or            4
-             And   and           6
-             Not   not           8
-              Eq   ==           10
-              Gt   >            10
-             GtE   >=           10
-              In   in           10
-              Is   is           10
-           NotEq   !=           10
-              Lt   <            10
-             LtE   <=           10
-           NotIn   not in       10
-           IsNot   is not       10
-           BitOr   |            12
-          BitXor   ^            14
-          BitAnd   &            16
-          LShift   <<           18
-          RShift   >>           18
-             Add   +            20
-             Sub   -            20
-            Mult   *            22
-             Div   /            22
-             Mod   %            22
-        FloorDiv   //           22
-         MatMult   @            22
-            UAdd   +            24
-            USub   -            24
-          Invert   ~            24
-             Pow   **           26
+    GeneratorExp                1
+
+          Assign                1
+       AugAssign                0
+            Expr                0
+           Yield                1
+       YieldFrom                0
+              If                1
+             For                0
+           While                0
+          Return                1
+
+           Slice                1
+       Subscript                0
+           Index                1
+        ExtSlice                1
+    comprehension_target        1
+           Tuple                0
+
+           Comma                1
+          Assert                0
+           Raise                0
+    call_one_arg                1
+
+          Lambda                1
+           IfExp                0
+
+   comprehension                1
+              Or   or           1
+             And   and          1
+             Not   not          1
+
+              Eq   ==           1
+              Gt   >            0
+             GtE   >=           0
+              In   in           0
+              Is   is           0
+           NotEq   !=           0
+              Lt   <            0
+             LtE   <=           0
+           NotIn   not in       0
+           IsNot   is not       0
+
+           BitOr   |            1
+          BitXor   ^            1
+          BitAnd   &            1
+          LShift   <<           1
+          RShift   >>           0
+             Add   +            1
+             Sub   -            0
+            Mult   *            1
+             Div   /            0
+             Mod   %            0
+        FloorDiv   //           0
+         MatMult   @            0
+          PowRHS                1
+          Invert   ~            1
+            UAdd   +            0
+            USub   -            0
+             Pow   **           1
+             Num                1
 """

 op_data = [x.split() for x in op_data.splitlines()]
-op_data = [(x[0], ' '.join(x[1:-1]), int(x[-1])) for x in op_data if x]
+op_data = [[x[0], ' '.join(x[1:-1]), int(x[-1])] for x in op_data if x]
+for index in range(1, len(op_data)):
+    op_data[index][2] *= 2
+    op_data[index][2] += op_data[index - 1][2]
+
 precedence_data = dict((getattr(ast, x, None), z) for x, y, z in op_data)
 symbol_data = dict((getattr(ast, x, None), y) for x, y, z in op_data)

+
 def get_op_symbol(obj, fmt='%s', symbol_data=symbol_data, type=type):
    """Given an AST node object, returns a string containing the symbol.
    """
    return fmt % symbol_data[type(obj)]

+
 def get_op_precedence(obj, precedence_data=precedence_data, type=type):
    """Given an AST node object, returns the precedence.
    """
    return precedence_data[type(obj)]
+
+
+class Precedence(object):
+    vars().update((x, z) for x, y, z in op_data)
+    highest = max(z for x, y, z in op_data) + 2
--- a/astor/rtrip.py
+++ b/astor/rtrip.py
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Part of the astor library for Python AST manipulation.
+
+License: 3-clause BSD
+
+Copyright (c) 2015 Patrick Maupin
+
+Usage:
+
+    python -m astor.rtrip [readonly] [<source>]
+
+
+This utility tests round-tripping of Python source to AST
+and back to source.
+
+    .. versionadded:: 0.6
+
+If readonly is specified, then the source will be tested,
+but no files will be written.
+
+if the source is specified to be "stdin" (without quotes)
+then any source entered at the command line will be compiled
+into an AST, converted back to text, and then compiled to
+an AST again, and the results will be displayed to stdout.
+
+If neither readonly nor stdin is specified, then rtrip
+will create a mirror directory named tmp_rtrip and will
+recursively round-trip all the Python source from the source
+into the tmp_rtrip dir, after compiling it and then reconstituting
+it through code_gen.to_source.
+
+If the source is not specified, the entire Python library will be used.
+
+The purpose of rtrip is to place Python code into a canonical form.
+
+This is useful both for functional testing of astor, and for
+validating code edits.
+
+For example, if you make manual edits for PEP8 compliance,
+you can diff the rtrip output of the original code against
+the rtrip output of the edited code, to insure that you
+didn't make any functional changes.
+
+For testing astor itself, it is useful to point to a big codebase,
+e.g::
+
+    python -m astor.rtrip
+
+to roundtrip the standard library.
+
+If any round-tripped files fail to be built or to match, the
+tmp_rtrip directory will also contain fname.srcdmp and fname.dstdmp,
+which are textual representations of the ASTs.
+
+
+Note 1:
+        The canonical form is only canonical for a given version of
+        this module and the astor toolbox.  It is not guaranteed to
+        be stable.  The only desired guarantee is that two source modules
+        that parse to the same AST will be converted back into the same
+        canonical form.
+
+Note 2:
+        This tool WILL TRASH the tmp_rtrip directory (unless readonly
+        is specified) -- as far as it is concerned, it OWNS that directory.
+
+Note 3: Why is it "readonly" and not "-r"?  Because python -m slurps
+        all the thingies starting with the dash.
+"""
+
+import sys
+import os
+import ast
+import shutil
+import logging
+
+from astor.code_gen import to_source
+from astor.file_util import code_to_ast
+from astor.node_util import allow_ast_comparison, dump_tree, strip_tree
+
+
+dsttree = 'tmp_rtrip'
+
+
+def convert(srctree, dsttree=dsttree, readonly=False, dumpall=False):
+    """Walk the srctree, and convert/copy all python files
+    into the dsttree
+
+    """
+
+    allow_ast_comparison()
+
+    parse_file = code_to_ast.parse_file
+    find_py_files = code_to_ast.find_py_files
+    srctree = os.path.normpath(srctree)
+
+    if not readonly:
+        dsttree = os.path.normpath(dsttree)
+        logging.info('')
+        logging.info('Trashing ' + dsttree)
+        shutil.rmtree(dsttree, True)
+
+    unknown_src_nodes = set()
+    unknown_dst_nodes = set()
+    badfiles = set()
+    broken = []
+    # TODO: When issue #26 resolved, remove UnicodeDecodeError
+    handled_exceptions = SyntaxError, UnicodeDecodeError
+
+    oldpath = None
+
+    allfiles = find_py_files(srctree, None if readonly else dsttree)
+    for srcpath, fname in allfiles:
+        # Create destination directory
+        if not readonly and srcpath != oldpath:
+            oldpath = srcpath
+            if srcpath >= srctree:
+                dstpath = srcpath.replace(srctree, dsttree, 1)
+                if not dstpath.startswith(dsttree):
+                    raise ValueError("%s not a subdirectory of %s" %
+                                     (dstpath, dsttree))
+            else:
+                assert srctree.startswith(srcpath)
+                dstpath = dsttree
+            os.makedirs(dstpath)
+
+        srcfname = os.path.join(srcpath, fname)
+        logging.info('Converting %s' % srcfname)
+        try:
+            srcast = parse_file(srcfname)
+        except handled_exceptions:
+            badfiles.add(srcfname)
+            continue
+
+        dsttxt = to_source(srcast)
+
+        if not readonly:
+            dstfname = os.path.join(dstpath, fname)
+            try:
+                with open(dstfname, 'w') as f:
+                    f.write(dsttxt)
+            except UnicodeEncodeError:
+                badfiles.add(dstfname)
+
+        # As a sanity check, make sure that ASTs themselves
+        # round-trip OK
+        try:
+            dstast = ast.parse(dsttxt) if readonly else parse_file(dstfname)
+        except SyntaxError:
+            dstast = []
+        unknown_src_nodes.update(strip_tree(srcast))
+        unknown_dst_nodes.update(strip_tree(dstast))
+        if dumpall or srcast != dstast:
+            srcdump = dump_tree(srcast)
+            dstdump = dump_tree(dstast)
+            bad = srcdump != dstdump
+            logging.warning('    calculating dump -- %s' %
+                            ('bad' if bad else 'OK'))
+            if bad:
+                broken.append(srcfname)
+            if dumpall or bad:
+                if not readonly:
+                    try:
+                        with open(dstfname[:-3] + '.srcdmp', 'w') as f:
+                            f.write(srcdump)
+                    except UnicodeEncodeError:
+                        badfiles.add(dstfname[:-3] + '.srcdmp')
+                    try:
+                        with open(dstfname[:-3] + '.dstdmp', 'w') as f:
+                            f.write(dstdump)
+                    except UnicodeEncodeError:
+                        badfiles.add(dstfname[:-3] + '.dstdmp')
+                elif dumpall:
+                    sys.stdout.write('\n\nAST:\n\n    ')
+                    sys.stdout.write(srcdump.replace('\n', '\n    '))
+                    sys.stdout.write('\n\nDecompile:\n\n    ')
+                    sys.stdout.write(dsttxt.replace('\n', '\n    '))
+                    sys.stdout.write('\n\nNew AST:\n\n    ')
+                    sys.stdout.write('(same as old)' if dstdump == srcdump
+                                     else dstdump.replace('\n', '\n    '))
+                    sys.stdout.write('\n')
+
+    if badfiles:
+        logging.warning('\nFiles not processed due to syntax errors:')
+        for fname in sorted(badfiles):
+            logging.warning('    %s' % fname)
+    if broken:
+        logging.warning('\nFiles failed to round-trip to AST:')
+        for srcfname in broken:
+            logging.warning('    %s' % srcfname)
+
+    ok_to_strip = 'col_offset _precedence _use_parens lineno _p_op _pp'
+    ok_to_strip = set(ok_to_strip.split())
+    bad_nodes = (unknown_dst_nodes | unknown_src_nodes) - ok_to_strip
+    if bad_nodes:
+        logging.error('\nERROR -- UNKNOWN NODES STRIPPED: %s' % bad_nodes)
+    logging.info('\n')
+
+
+def usage(msg):
+    raise SystemExit(textwrap.dedent("""
+
+        Error: %s
+
+        Usage:
+
+            python -m astor.rtrip [readonly] [<source>]
+
+
+        This utility tests round-tripping of Python source to AST
+        and back to source.
+
+        If readonly is specified, then the source will be tested,
+        but no files will be written.
+
+        if the source is specified to be "stdin" (without quotes)
+        then any source entered at the command line will be compiled
+        into an AST, converted back to text, and then compiled to
+        an AST again, and the results will be displayed to stdout.
+
+        If neither readonly nor stdin is specified, then rtrip
+        will create a mirror directory named tmp_rtrip and will
+        recursively round-trip all the Python source from the source
+        into the tmp_rtrip dir, after compiling it and then reconstituting
+        it through code_gen.to_source.
+
+        If the source is not specified, the entire Python library will be used.
+
+        """) % msg)
+
+if __name__ == '__main__':
+    import textwrap
+
+    args = sys.argv[1:]
+
+    readonly = 'readonly' in args
+    if readonly:
+        args.remove('readonly')
+
+    if not args:
+        args = [os.path.dirname(textwrap.__file__)]
+
+    if len(args) > 1:
+        usage("Too many arguments")
+
+    fname, = args
+    dumpall = False
+    if not os.path.exists(fname):
+        dumpall = fname == 'stdin' or usage("Cannot find directory %s" % fname)
+
+    logging.basicConfig(format='%(msg)s', level=logging.INFO)
+    convert(fname, readonly=readonly or dumpall, dumpall=dumpall)
--- a/astor/source_repr.py
+++ b/astor/source_repr.py
+# -*- coding: utf-8 -*-
+"""
+Part of the astor library for Python AST manipulation.
+
+License: 3-clause BSD
+
+Copyright (c) 2015 Patrick Maupin
+
+Pretty-print source -- post-process for the decompiler
+
+The goals of the initial cut of this engine are:
+
+1) Do a passable, if not PEP8, job of line-wrapping.
+
+2) Serve as an example of an interface to the decompiler
+   for anybody who wants to do a better job. :)
+"""
+
+
+def pretty_source(source):
+    """ Prettify the source.
+    """
+
+    return ''.join(flatten(split_lines(source)))
+
+
+def flatten(source, list=list, isinstance=isinstance):
+    """ Deal with nested lists
+    """
+
+    def flatten_iter(source):
+        for item in source:
+            if isinstance(item, list):
+                for item in flatten_iter(item):
+                    yield item
+            else:
+                yield item
+    return flatten_iter(source)
+
+
+def split_lines(source, maxline=79):
+    """Split inputs according to lines.
+       If a line is short enough, just yield it.
+       Otherwise, fix it.
+    """
+    line = []
+    multiline = False
+    count = 0
+    for item in source:
+        if item.startswith('\n'):
+            if line:
+                if count <= maxline or multiline:
+                    yield line
+                else:
+                    for item2 in wrap_line(line, maxline):
+                        yield item2
+                count = 0
+                multiline = False
+                line = []
+            yield item
+        else:
+            line.append(item)
+            multiline = '\n' in item
+            count += len(item)
+
+
+def count(group):
+    return sum(len(x) for x in group)
+
+
+def wrap_line(line, maxline=79, count=count):
+    """ We have a line that is too long,
+        so we're going to try to wrap it.
+    """
+
+    # Extract the indentation
+
+    indentation = line[0]
+    lenfirst = len(indentation)
+    indent = lenfirst - len(indentation.strip())
+    assert indent in (0, lenfirst)
+    indentation = line.pop(0) if indent else ''
+
+    # Get splittable/non-splittable groups
+
+    dgroups = list(delimiter_groups(line))
+    unsplittable = dgroups[::2]
+    splittable = dgroups[1::2]
+
+    # If the largest non-splittable group won't fit
+    # on a line, try to add parentheses to the line.
+
+    if max(count(x) for x in unsplittable) > maxline - indent:
+        line = add_parens(line, maxline, indent)
+        dgroups = list(delimiter_groups(line))
+        unsplittable = dgroups[::2]
+        splittable = dgroups[1::2]
+
+    # Deal with the first (always unsplittable) group, and
+    # then set up to deal with the remainder in pairs.
+
+    first = unsplittable[0]
+    yield indentation
+    yield first
+    if not splittable:
+        return
+    pos = indent + count(first)
+    indentation += '    '
+    indent += 4
+    if indent >= maxline/2:
+        maxline = maxline/2 + indent
+
+    for sg, nsg in zip(splittable, unsplittable[1:]):
+
+        if sg:
+            # If we already have stuff on the line and even
+            # the very first item won't fit, start a new line
+            if pos > indent and pos + len(sg[0]) > maxline:
+                yield '\n'
+                yield indentation
+                pos = indent
+
+            # Dump lines out of the splittable group
+            # until the entire thing fits
+            csg = count(sg)
+            while pos + csg > maxline:
+                ready, sg = split_group(sg, pos, maxline)
+                if ready[-1].endswith(' '):
+                    ready[-1] = ready[-1][:-1]
+                yield ready
+                yield '\n'
+                yield indentation
+                pos = indent
+                csg = count(sg)
+
+            # Dump the remainder of the splittable group
+            if sg:
+                yield sg
+                pos += csg
+
+        # Dump the unsplittable group, optionally
+        # preceded by a linefeed.
+        cnsg = count(nsg)
+        if pos > indent and pos + cnsg > maxline:
+            yield '\n'
+            yield indentation
+            pos = indent
+        yield nsg
+        pos += cnsg
+
+
+def split_group(source, pos, maxline):
+    """ Split a group into two subgroups.  The
+        first will be appended to the current
+        line, the second will start the new line.
+
+        Note that the first group must always
+        contain at least one item.
+
+        The original group may be destroyed.
+    """
+    first = []
+    source.reverse()
+    while source:
+        tok = source.pop()
+        first.append(tok)
+        pos += len(tok)
+        if source:
+            tok = source[-1]
+            allowed = (maxline + 1) if tok.endswith(' ') else (maxline - 4)
+            if pos + len(tok) > allowed:
+                break
+
+    source.reverse()
+    return first, source
+
+
+begin_delim = set('([{')
+end_delim = set(')]}')
+end_delim.add('):')
+
+
+def delimiter_groups(line, begin_delim=begin_delim,
+                     end_delim=end_delim):
+    """Split a line into alternating groups.
+       The first group cannot have a line feed inserted,
+       the next one can, etc.
+    """
+    text = []
+    line = iter(line)
+    while True:
+        # First build and yield an unsplittable group
+        for item in line:
+            text.append(item)
+            if item in begin_delim:
+                break
+        if not text:
+            break
+        yield text
+
+        # Now build and yield a splittable group
+        level = 0
+        text = []
+        for item in line:
+            if item in begin_delim:
+                level += 1
+            elif item in end_delim:
+                level -= 1
+                if level < 0:
+                    yield text
+                    text = [item]
+                    break
+            text.append(item)
+        else:
+            assert not text, text
+            break
+
+statements = set(['del ', 'return', 'yield ', 'if ', 'while '])
+
+
+def add_parens(line, maxline, indent, statements=statements, count=count):
+    """Attempt to add parentheses around the line
+       in order to make it splittable.
+    """
+
+    if line[0] in statements:
+        index = 1
+        if not line[0].endswith(' '):
+            index = 2
+            assert line[1] == ' '
+        line.insert(index, '(')
+        if line[-1] == ':':
+            line.insert(-1, ')')
+        else:
+            line.append(')')
+
+    # That was the easy stuff.  Now for assignments.
+    groups = list(get_assign_groups(line))
+    if len(groups) == 1:
+        # So sad, too bad
+        return line
+
+    counts = list(count(x) for x in groups)
+    didwrap = False
+
+    # If the LHS is large, wrap it first
+    if sum(counts[:-1]) >= maxline - indent - 4:
+        for group in groups[:-1]:
+            didwrap = False  # Only want to know about last group
+            if len(group) > 1:
+                group.insert(0, '(')
+                group.insert(-1, ')')
+                didwrap = True
+
+    # Might not need to wrap the RHS if wrapped the LHS
+    if not didwrap or counts[-1] > maxline - indent - 10:
+        groups[-1].insert(0, '(')
+        groups[-1].append(')')
+
+    return [item for group in groups for item in group]
+
+# Assignment operators
+ops = list('|^&+-*/%@~') + '<< >> // **'.split() + ['']
+ops = set(' %s= ' % x for x in ops)
+
+
+def get_assign_groups(line, ops=ops):
+    """ Split a line into groups by assignment (including
+        augmented assignment)
+    """
+    group = []
+    for item in line:
+        group.append(item)
+        if item in ops:
+            yield group
+            group = []
+    yield group
--- a/astor/string_repr.py
+++ b/astor/string_repr.py
+# -*- coding: utf-8 -*-
+"""
+Part of the astor library for Python AST manipulation.
+
+License: 3-clause BSD
+
+Copyright (c) 2015 Patrick Maupin
+
+Pretty-print strings for the decompiler
+
+We either return the repr() of the string,
+or try to format it as a triple-quoted string.
+
+This is a lot harder than you would think.
+
+This has lots of Python 2 / Python 3 ugliness.
+
+"""
+
+import re
+import logging
+
+try:
+    special_unicode = unicode
+except NameError:
+    class special_unicode(object):
+        pass
+
+try:
+    basestring = basestring
+except NameError:
+    basestring = str
+
+
+def _get_line(current_output):
+    """ Back up in the output buffer to
+        find the start of the current line,
+        and return the entire line.
+    """
+    myline = []
+    index = len(current_output)
+    while index:
+        index -= 1
+        try:
+            s = str(current_output[index])
+        except:
+            raise
+        myline.append(s)
+        if '\n' in s:
+            break
+    myline = ''.join(reversed(myline))
+    return myline.rsplit('\n', 1)[-1]
+
+
+def _properly_indented(s, current_line):
+    line_indent = len(current_line) - len(current_line.lstrip())
+    mylist = s.split('\n')[1:]
+    mylist = [x.rstrip() for x in mylist]
+    mylist = [x for x in mylist if x]
+    if not s:
+        return False
+    counts = [(len(x) - len(x.lstrip())) for x in mylist]
+    return counts and min(counts) >= line_indent
+
+mysplit = re.compile(r'(\\|\"\"\"|\"$)').split
+replacements = {'\\': '\\\\', '"""': '""\\"', '"': '\\"'}
+
+
+def _prep_triple_quotes(s, mysplit=mysplit, replacements=replacements):
+    """ Split the string up and force-feed some replacements
+        to make sure it will round-trip OK
+    """
+
+    s = mysplit(s)
+    s[1::2] = (replacements[x] for x in s[1::2])
+    return ''.join(s)
+
+
+def pretty_string(s, current_output, min_trip_str=20, max_line=100):
+    """There are a lot of reasons why we might not want to or
+       be able to return a triple-quoted string.  We can always
+       punt back to the default normal string.
+    """
+
+    default = repr(s)
+
+    # Punt on abnormal strings
+    if (isinstance(s, special_unicode) or not isinstance(s, basestring)):
+        return default
+
+    len_s = len(default)
+    current_line = _get_line(current_output)
+    if current_line.strip():
+        if len_s < min_trip_str:
+            return default
+
+        total_len = len(current_line) + len_s
+        if total_len < max_line and not _properly_indented(s, current_line):
+            return default
+
+    fancy = '"""%s"""' % _prep_triple_quotes(s)
+
+    # Sometimes this doesn't work.  One reason is that
+    # the AST has no understanding of whether \r\n was
+    # entered that way in the string or was a cr/lf in the
+    # file.  So we punt just so we can round-trip properly.
+
+    try:
+        if eval(fancy) == s and '\r' not in fancy:
+            return fancy
+    except:
+        pass
+    """
+    logging.warning("***String conversion did not work\n")
+    #print (eval(fancy), s)
+    print
+    print (fancy, repr(s))
+    print
+    """
+    return default
--- a/astor/tree_walk.py
+++ b/astor/tree_walk.py
@@ -7,6 +7,9 @@ License: 3-clause BSD
 Copyright 2012 (c) Patrick Maupin
 Copyright 2013 (c) Berker Peksag

+This file contains a TreeWalk class that views a node tree
+as a unified whole and allows several modes of traversal.
+
 """

 from .node_util import iter_node
@@ -76,9 +79,9 @@ class TreeWalk(MetaFlatten):
    methods can be written.  They will be called in alphabetical order.

    """
-    nodestack = None

    def __init__(self, node=None):
+        self.nodestack = []
        self.setup()
        if node is not None:
            self.walk(node)
@@ -106,11 +109,11 @@ class TreeWalk(MetaFlatten):
        """
        pre_handlers = self.pre_handlers.get
        post_handlers = self.post_handlers.get
-        oldstack = self.nodestack
-        self.nodestack = nodestack = []
+        nodestack = self.nodestack
+        emptystack = len(nodestack)
        append, pop = nodestack.append, nodestack.pop
        append([node, name, list(iter_node(node, name + '_item')), -1])
-        while nodestack:
+        while len(nodestack) > emptystack:
            node, name, subnodes, index = nodestack[-1]
            if index >= len(subnodes):
                handler = (post_handlers(type(node).__name__) or
@@ -138,7 +141,6 @@ class TreeWalk(MetaFlatten):
            else:
                node, name = subnodes[index]
                append([node, name, list(iter_node(node, name + '_item')), -1])
-        self.nodestack = oldstack

    @property
    def parent(self):

--- a/docs/index.rst
+++ b/docs/index.rst
@@ -208,4 +208,69 @@ Functions
        get_unaryop, and get_anyop.


+Command line utilities
+--------------------------
+
+rtrip
+''''''
+
+There is currently one command-line utility::
+
+    python -m astor.rtrip [readonly] [<source>]
+
+This utility tests round-tripping of Python source to AST
+and back to source.
+
+    .. versionadded:: 0.6
+
+If readonly is specified, then the source will be tested,
+but no files will be written.
+
+if the source is specified to be "stdin" (without quotes)
+then any source entered at the command line will be compiled
+into an AST, converted back to text, and then compiled to
+an AST again, and the results will be displayed to stdout.
+
+If neither readonly nor stdin is specified, then rtrip
+will create a mirror directory named tmp_rtrip and will
+recursively round-trip all the Python source from the source
+into the tmp_rtrip dir, after compiling it and then reconstituting
+it through code_gen.to_source.
+
+If the source is not specified, the entire Python library will be used.
+
+The purpose of rtrip is to place Python code into a canonical form.
+
+This is useful both for functional testing of astor, and for
+validating code edits.
+
+For example, if you make manual edits for PEP8 compliance,
+you can diff the rtrip output of the original code against
+the rtrip output of the edited code, to insure that you
+didn't make any functional changes.
+
+For testing astor itself, it is useful to point to a big codebase,
+e.g::
+
+    python -m astor.rtrip
+
+to round-trip the standard library.
+
+If any round-tripped files fail to be built or to match, the
+tmp_rtrip directory will also contain fname.srcdmp and fname.dstdmp,
+which are textual representations of the ASTs.
+
+
+Note 1:
+        The canonical form is only canonical for a given version of
+        this module and the astor toolbox.  It is not guaranteed to
+        be stable.  The only desired guarantee is that two source modules
+        that parse to the same AST will be converted back into the same
+        canonical form.
+
+Note 2:
+        This tool WILL TRASH the tmp_rtrip directory (unless readonly
+        is specified) -- as far as it is concerned, it OWNS that directory.
+
+
 .. _GitHub: https://github.com/berkerpeksag/astor/
--- a/setup.py
+++ b/setup.py
@@ -36,5 +36,5 @@ setup(
        'Topic :: Software Development :: Code Generators',
        'Topic :: Software Development :: Compilers',
    ],
-    keywords='ast, codegen',
+    keywords='ast, codegen, PEP8',
 )
--- a/tests/build_expressions.py
+++ b/tests/build_expressions.py
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Part of the astor library for Python AST manipulation.
+
+License: 3-clause BSD
+
+Copyright (c) 2015 Patrick Maupin
+
+This module generates a lot of permutations of Python
+expressions, and dumps them into a python module
+all_expr_x_y.py (where x and y are the python version tuple)
+as a string.
+
+This string is later used by check_expressions.
+
+This module takes a loooooooooong time to execute.
+
+"""
+
+import sys
+import collections
+import itertools
+import textwrap
+import ast
+import astor
+
+all_operators = (
+            # Selected special operands
+            '3 -3 () yield',
+            # operators with one parameter
+            'yield lambda_: not + - ~ $, yield_from',
+            # operators with two parameters
+            'or and == != > >= < <= in not_in is is_not '
+            '| ^ & << >> + - * / % // @ ** for$in$ $($) $[$] . '
+            '$,$ ',
+            # operators with 3 parameters
+            '$if$else$ $for$in$'
+        )
+
+
+select_operators = (
+            # Selected special operands -- remove
+            # some at redundant precedence levels
+            '-3',
+            # operators with one parameter
+            'yield lambda_: not - ~ $,',
+            # operators with two parameters
+            'or and == in is '
+            '| ^ & >> - % ** for$in$ $($) . ',
+            # operators with 3 parameters
+            '$if$else$  $for$in$'
+        )
+
+
+def get_primitives(base):
+    """Attempt to return formatting strings for all operators,
+       and selected operands.
+       Here, I use the term operator loosely to describe anything
+       that accepts an expression and can be used in an additional
+       expression.
+    """
+
+    operands = []
+    operators = []
+    for nparams, s in enumerate(base):
+        s = s.replace('%', '%%').split()
+        for s in (x.replace('_', ' ') for x in s):
+            if nparams and '$' not in s:
+                assert nparams in (1, 2)
+                s = '%s%s$' % ('$' if nparams == 2 else '', s)
+            assert nparams == s.count('$'), (nparams, s)
+            s = s.replace('$', ' %s ').strip()
+
+            # Normalize the spacing
+            s = s.replace(' ,', ',')
+            s = s.replace(' . ', '.')
+            s = s.replace(' [ ', '[').replace(' ]', ']')
+            s = s.replace(' ( ', '(').replace(' )', ')')
+            if nparams == 1:
+                s = s.replace('+ ', '+')
+                s = s.replace('- ', '-')
+                s = s.replace('~ ', '~')
+
+            if nparams:
+                operators.append((s, nparams))
+            else:
+                operands.append(s)
+    return operators, operands
+
+
+def get_sub_combinations(maxop):
+    """Return a dictionary of lists of combinations suitable
+       for recursively building expressions.
+
+       Each dictionary key is a tuple of (numops, numoperands),
+       where:
+
+            numops is the number of operators we
+            should build an expression for
+
+            numterms is the number of operands required
+            by the current operator.
+
+        Each list contains all permutations of the number
+        of operators that the recursively called function
+        should use for each operand.
+    """
+    combo = collections.defaultdict(list)
+    for numops in range(maxop+1):
+        if numops:
+            combo[numops, 1].append((numops-1,))
+        for op1 in range(numops):
+            combo[numops, 2].append((op1, numops - op1 - 1))
+            for op2 in range(numops - op1):
+                combo[numops, 3].append((op1, op2, numops - op1 - op2 - 1))
+    return combo
+
+
+def get_paren_combos():
+    """This function returns a list of lists.
+       The first list is indexed by the number of operands
+       the current operator has.
+
+       Each sublist contains all permutations of wrapping
+       the operands in parentheses or not.
+    """
+    results = [None] * 4
+    options = [('%s', '(%s)')]
+    for i in range(1, 4):
+        results[i] = list(itertools.product(*(i * options)))
+    return results
+
+
+def operand_combo(expressions, operands, max_operand=13):
+    op_combos = []
+    operands = list(operands)
+    operands.append('%s')
+    for n in range(max_operand):
+        this_combo = []
+        op_combos.append(this_combo)
+        for i in range(n):
+            for op in operands:
+                mylist = ['%s'] * n
+                mylist[i] = op
+                this_combo.append(tuple(mylist))
+    for expr in expressions:
+        expr = expr.replace('%%', '%%%%')
+        for op in op_combos[expr.count('%s')]:
+            yield expr % op
+
+
+def build(numops=2, all_operators=all_operators, use_operands=False,
+          # Runtime optimization
+          tuple=tuple):
+    operators, operands = get_primitives(all_operators)
+    combo = get_sub_combinations(numops)
+    paren_combos = get_paren_combos()
+    product = itertools.product
+    try:
+        izip = itertools.izip
+    except AttributeError:
+        izip = zip
+
+    def recurse_build(numops):
+        if not numops:
+            yield '%s'
+        for myop, nparams in operators:
+            myop = myop.replace('%%', '%%%%')
+            myparens = paren_combos[nparams]
+            # print combo[numops, nparams]
+            for mycombo in combo[numops, nparams]:
+                # print mycombo
+                call_again = (recurse_build(x) for x in mycombo)
+                for subexpr in product(*call_again):
+                    for parens in myparens:
+                        wrapped = tuple(x % y for (x, y)
+                                        in izip(parens, subexpr))
+                        yield myop % wrapped
+    result = recurse_build(numops)
+    return operand_combo(result, operands) if use_operands else result
+
+
+def makelib():
+    parse = ast.parse
+    dump_tree = astor.dump_tree
+
+    def default_value(): return 1000000, ''
+    mydict = collections.defaultdict(default_value)
+
+    allparams = [tuple('abcdefghijklmnop'[:x]) for x in range(13)]
+    alltxt = itertools.chain(build(1, use_operands=True),
+                             build(2, use_operands=True),
+                             build(3, select_operators))
+
+    yieldrepl = list(('yield %s %s' % (operator, operand),
+                      'yield %s%s' % (operator, operand))
+                     for operator in '+-' for operand in '(ab')
+    yieldrepl.append(('yield[', 'yield ['))
+    # alltxt = itertools.chain(build(1), build(2))
+    badexpr = 0
+    goodexpr = 0
+    silly = '3( 3.( 3[ 3.['.split()
+    for expr in alltxt:
+        params = allparams[expr.count('%s')]
+        expr %= params
+        try:
+            myast = parse(expr)
+        except:
+            badexpr += 1
+            continue
+        goodexpr += 1
+        key = dump_tree(myast)
+        expr = expr.replace(', - ', ', -')
+        ignore = [x for x in silly if x in expr]
+        if ignore:
+            continue
+        if 'yield' in expr:
+            for x in yieldrepl:
+                expr = expr.replace(*x)
+        mydict[key] = min(mydict[key], (len(expr), expr))
+    print(badexpr, goodexpr)
+
+    stuff = [x[1] for x in mydict.values()]
+    stuff.sort()
+
+    lineend = '\n'.encode('utf-8')
+    with open('all_expr_%s_%s.py' % sys.version_info[:2], 'wb') as f:
+        f.write(textwrap.dedent('''
+            # AUTOMAGICALLY GENERATED!!!  DO NOT MODIFY!!
+            #
+            all_expr = """
+            ''').encode('utf-8'))
+        for item in stuff:
+            f.write(item.encode('utf-8'))
+            f.write(lineend)
+        f.write('"""\n'.encode('utf-8'))
+
+if __name__ == '__main__':
+    makelib()
--- a/tests/check_expressions.py
+++ b/tests/check_expressions.py
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Part of the astor library for Python AST manipulation.
+
+License: 3-clause BSD
+
+Copyright (c) 2015 Patrick Maupin
+
+This module reads the strings generated by build_expressions,
+and runs them through the Python interpreter.
+
+For strings that are suboptimal (too many spaces, etc.),
+it simply dumps them to a miscompare file.
+
+For strings that seem broken (do not parse after roundtrip)
+or are maybe too compressed, it dumps information to the console.
+
+This module does not take too long to execute; however, the
+underlying build_expressions module takes forever, so this
+should not be part of the automated regressions.
+
+"""
+
+import sys
+import collections
+import itertools
+import textwrap
+import hashlib
+import ast
+import astor
+
+try:
+    import importlib
+except ImportError:
+    try:
+        import all_expr_2_6 as mymod
+    except ImportError:
+        print("Expression list does not exist -- building")
+        import build_expressions
+        build_expressions.makelib()
+        print("Expression list built")
+        import all_expr_2_6 as mymod
+else:
+    mymodname = 'all_expr_%s_%s' % sys.version_info[:2]
+
+    try:
+        mymod = importlib.import_module(mymodname)
+    except ImportError:
+        print("Expression list does not exist -- building")
+        import build_expressions
+        build_expressions.makelib()
+        print("Expression list built")
+        mymod = importlib.import_module(mymodname)
+
+
+def checklib():
+    print("Checking expressions")
+    parse = ast.parse
+    dump_tree = astor.dump_tree
+    to_source = astor.to_source
+    with open('mismatch_%s_%s.txt' % sys.version_info[:2], 'wb') as f:
+        for srctxt in mymod.all_expr.strip().splitlines():
+            srcast = parse(srctxt)
+            dsttxt = to_source(srcast)
+            if dsttxt != srctxt:
+                srcdmp = dump_tree(srcast)
+                try:
+                    dstast = parse(dsttxt)
+                except SyntaxError:
+                    bad = True
+                    dstdmp = 'aborted'
+                else:
+                    dstdmp = dump_tree(dstast)
+                    bad = srcdmp != dstdmp
+                if bad or len(dsttxt) < len(srctxt):
+                    print(srctxt, dsttxt)
+                    if bad:
+                        print('****************** Original')
+                        print(srcdmp)
+                        print('****************** Extra Crispy')
+                        print(dstdmp)
+                        print('******************')
+                        print()
+                        print()
+                f.write(('%s      %s\n' % (repr(srctxt),
+                                           repr(dsttxt))).encode('utf-8'))
+
+if __name__ == '__main__':
+    checklib()
--- a/tests/test_code_gen.py
+++ b/tests/test_code_gen.py