Refactor the code generator

- Make it prettier - Make it faster - Make the interface to the pretty printer better

Refactor the code generator
- Make it prettier - Make it faster - Make the interface to the pretty printer better
5d15f079 · Patrick Maupin · a957d588 · 5d15f079 · 5d15f079 · 5d15f079
Kaydet (Commit) 5d15f079 authored Nis 26, 2017 tarafından Patrick Maupin
Showing with 123 additions and 139 deletions

code_gen.py astor/code_gen.py +0 -0

node_util.py astor/node_util.py +1 -0

prettifier.py astor/prettifier.py +122 -33

string_repr.py astor/string_repr.py +0 -106

No files found.
--- a/astor/code_gen.py
+++ b/astor/code_gen.py
--- a/astor/node_util.py
+++ b/astor/node_util.py
@@ -132,6 +132,7 @@ class ExplicitNodeVisitor(ast.NodeVisitor):

    """

+    @staticmethod
    def abort_visit(node):  # XXX: self?
        msg = 'No defined handler for node of type %s'
        raise AttributeError(msg % node.__class__.__name__)

--- a/astor/source_repr.py
+++ b/astor/source_repr.py
@@ -4,7 +4,7 @@ Part of the astor library for Python AST manipulation.

 License: 3-clause BSD

-Copyright (c) 2015 Patrick Maupin
+Copyright (c) 2015-2017 Patrick Maupin

 Pretty-print source -- post-process for the decompiler

@@ -16,48 +16,61 @@ The goals of the initial cut of this engine are:
   for anybody who wants to do a better job. :)
 """

+import re

-def pretty_source(source):
-    """ Prettify the source.
-    """
+try:
+    special_unicode = unicode
+except NameError:
+    class special_unicode(object):
+        pass
+
+try:
+    basestring = basestring
+except NameError:
+    basestring = str

-    return ''.join(split_lines(source))

+class StringLiteral(str):
+    """ This is a class for literal strings generated by
+        the source generator.

-def split_lines(source, maxline=79):
-    """Split inputs according to lines.
-       If a line is short enough, just yield it.
-       Otherwise, fix it.
+        When retrieved from the source generator, the string
+        itself will be in the base representation of the
+        string (repr, not triple-quoted), and the following
+        attributes will be filled in:
+        
+            src -- original string
+            prefix -- e.g. 'f' or 'b'
    """
-    result = []
-    extend = result.extend
-    append = result.append
-    line = []
-    multiline = False
-    count = 0
-    find = str.find
-    for item in source:
-        index = find(item, '\n')
-        if index:
-            line.append(item)
-            multiline = index > 0
-            count += len(item)
-        else:
-            if line:
-                if count <= maxline or multiline:
-                    extend(line)
-                else:
-                    wrap_line(line, maxline, result)
-                count = 0
-                multiline = False
-                line = []
-            append(item)
-    return result
+    prefix = ''


 def count(group, slen=str.__len__):
    return sum([slen(x) for x in group])

+def beautify(source, maxlen=79,
+             # Constants
+             count=count, StringLiteral=StringLiteral, len=len,
+             sum=sum, isinstance=isinstance):
+    """ beautify is passed a list of lists of strings.
+        The lists alternate between linefeeds and lines.
+        The lists with the line info are the interesting ones.
+
+        The goal is to get them close to PEP 8.  This doesn't do
+        it yet, but shows how the interface to to_string works.
+    """
+
+    for index, line in enumerate(source):
+        size = count(line)
+        too_big = size > maxlen
+        if isinstance(line[-1], StringLiteral) and (
+            len(line) == 2 or too_big and '=' in line[-2]):
+            if not line[-1].prefix:
+                line[-1] = pretty_string(line[-1], False, ''.join(line[:-1]))
+        elif too_big:
+            source[index] = new_line = []
+            wrap_line(line, maxlen, new_line)
+

 def wrap_line(line, maxline=79, result=[], count=count):
    """ We have a line that is too long,
@@ -271,3 +284,79 @@ def get_assign_groups(line, ops=ops):
            yield group
            group = []
    yield group
+
+
+
+def _properly_indented(s, line_indent):
+    mylist = s.split('\n')[1:]
+    mylist = [x.rstrip() for x in mylist]
+    mylist = [x for x in mylist if x]
+    if not s:
+        return False
+    counts = [(len(x) - len(x.lstrip())) for x in mylist]
+    return counts and min(counts) >= line_indent
+
+
+mysplit = re.compile(r'(\\|\"\"\"|\"$)').split
+replacements = {'\\': '\\\\', '"""': '""\\"', '"': '\\"'}
+
+
+def _prep_triple_quotes(s, mysplit=mysplit, replacements=replacements):
+    """ Split the string up and force-feed some replacements
+        to make sure it will round-trip OK
+    """
+
+    s = mysplit(s)
+    s[1::2] = (replacements[x] for x in s[1::2])
+    return ''.join(s)
+
+
+def pretty_string(default, embedded, current_line, uni_lit=False,
+                  min_trip_str=20, max_line=79):
+    """There are a lot of reasons why we might not want to or
+       be able to return a triple-quoted string.  We can always
+       punt back to the default normal string.
+    """
+
+    s = default.src
+
+    # Punt on abnormal strings
+    if (isinstance(s, special_unicode) or not isinstance(s, basestring)):
+        return default
+    if uni_lit and isinstance(s, bytes):
+        return 'b' + default
+
+    len_s = len(default)
+
+    if current_line.strip():
+        len_current = len(current_line)
+        second_line_start = s.find('\n') + 1
+        if embedded > 1 and not second_line_start:
+            return default
+
+        if len_s < min_trip_str:
+            return default
+
+        line_indent = len_current - len(current_line.lstrip())
+
+        # Could be on a line by itself...
+        if embedded and not second_line_start:
+            return default
+
+        total_len = len_current + len_s
+        if total_len < max_line and not _properly_indented(s, line_indent):
+            return default
+
+    fancy = '"""%s"""' % _prep_triple_quotes(s)
+
+    # Sometimes this doesn't work.  One reason is that
+    # the AST has no understanding of whether \r\n was
+    # entered that way in the string or was a cr/lf in the
+    # file.  So we punt just so we can round-trip properly.
+
+    try:
+        if eval(fancy) == s and '\r' not in fancy:
+            return fancy
+    except:
+        pass
+    return default
--- a/astor/string_repr.py
+++ b/astor/string_repr.py
-# -*- coding: utf-8 -*-
-"""
-Part of the astor library for Python AST manipulation.
-
-License: 3-clause BSD
-
-Copyright (c) 2015 Patrick Maupin
-
-Pretty-print strings for the decompiler
-
-We either return the repr() of the string,
-or try to format it as a triple-quoted string.
-
-This is a lot harder than you would think.
-
-This has lots of Python 2 / Python 3 ugliness.
-
-"""
-
-import re
-
-try:
-    special_unicode = unicode
-except NameError:
-    class special_unicode(object):
-        pass
-
-try:
-    basestring = basestring
-except NameError:
-    basestring = str
-
-
-def _properly_indented(s, line_indent):
-    mylist = s.split('\n')[1:]
-    mylist = [x.rstrip() for x in mylist]
-    mylist = [x for x in mylist if x]
-    if not s:
-        return False
-    counts = [(len(x) - len(x.lstrip())) for x in mylist]
-    return counts and min(counts) >= line_indent
-
-
-mysplit = re.compile(r'(\\|\"\"\"|\"$)').split
-replacements = {'\\': '\\\\', '"""': '""\\"', '"': '\\"'}
-
-
-def _prep_triple_quotes(s, mysplit=mysplit, replacements=replacements):
-    """ Split the string up and force-feed some replacements
-        to make sure it will round-trip OK
-    """
-
-    s = mysplit(s)
-    s[1::2] = (replacements[x] for x in s[1::2])
-    return ''.join(s)
-
-
-def pretty_string(s, embedded, current_line, uni_lit=False,
-                  min_trip_str=20, max_line=100):
-    """There are a lot of reasons why we might not want to or
-       be able to return a triple-quoted string.  We can always
-       punt back to the default normal string.
-    """
-
-    default = repr(s)
-
-    # Punt on abnormal strings
-    if (isinstance(s, special_unicode) or not isinstance(s, basestring)):
-        return default
-    if uni_lit and isinstance(s, bytes):
-        return 'b' + default
-
-    len_s = len(default)
-
-    if current_line.strip():
-        len_current = len(current_line)
-        second_line_start = s.find('\n') + 1
-        if embedded > 1 and not second_line_start:
-            return default
-
-        if len_s < min_trip_str:
-            return default
-
-        line_indent = len_current - len(current_line.lstrip())
-
-        # Could be on a line by itself...
-        if embedded and not second_line_start:
-            return default
-
-        total_len = len_current + len_s
-        if total_len < max_line and not _properly_indented(s, line_indent):
-            return default
-
-    fancy = '"""%s"""' % _prep_triple_quotes(s)
-
-    # Sometimes this doesn't work.  One reason is that
-    # the AST has no understanding of whether \r\n was
-    # entered that way in the string or was a cr/lf in the
-    # file.  So we punt just so we can round-trip properly.
-
-    try:
-        if eval(fancy) == s and '\r' not in fancy:
-            return fancy
-    except:
-        pass
-    return default