Kaydet (Commit) 5d15f079 authored tarafından Patrick Maupin's avatar Patrick Maupin

Refactor the code generator

- Make it prettier
- Make it faster
- Make the interface to the pretty printer better
üst a957d588
This diff is collapsed.
......@@ -132,6 +132,7 @@ class ExplicitNodeVisitor(ast.NodeVisitor):
"""
@staticmethod
def abort_visit(node): # XXX: self?
msg = 'No defined handler for node of type %s'
raise AttributeError(msg % node.__class__.__name__)
......
......@@ -4,7 +4,7 @@ Part of the astor library for Python AST manipulation.
License: 3-clause BSD
Copyright (c) 2015 Patrick Maupin
Copyright (c) 2015-2017 Patrick Maupin
Pretty-print source -- post-process for the decompiler
......@@ -16,48 +16,61 @@ The goals of the initial cut of this engine are:
for anybody who wants to do a better job. :)
"""
import re
def pretty_source(source):
""" Prettify the source.
"""
try:
special_unicode = unicode
except NameError:
class special_unicode(object):
pass
try:
basestring = basestring
except NameError:
basestring = str
return ''.join(split_lines(source))
class StringLiteral(str):
""" This is a class for literal strings generated by
the source generator.
def split_lines(source, maxline=79):
"""Split inputs according to lines.
If a line is short enough, just yield it.
Otherwise, fix it.
When retrieved from the source generator, the string
itself will be in the base representation of the
string (repr, not triple-quoted), and the following
attributes will be filled in:
src -- original string
prefix -- e.g. 'f' or 'b'
"""
result = []
extend = result.extend
append = result.append
line = []
multiline = False
count = 0
find = str.find
for item in source:
index = find(item, '\n')
if index:
line.append(item)
multiline = index > 0
count += len(item)
else:
if line:
if count <= maxline or multiline:
extend(line)
else:
wrap_line(line, maxline, result)
count = 0
multiline = False
line = []
append(item)
return result
prefix = ''
def count(group, slen=str.__len__):
return sum([slen(x) for x in group])
def beautify(source, maxlen=79,
# Constants
count=count, StringLiteral=StringLiteral, len=len,
sum=sum, isinstance=isinstance):
""" beautify is passed a list of lists of strings.
The lists alternate between linefeeds and lines.
The lists with the line info are the interesting ones.
The goal is to get them close to PEP 8. This doesn't do
it yet, but shows how the interface to to_string works.
"""
for index, line in enumerate(source):
size = count(line)
too_big = size > maxlen
if isinstance(line[-1], StringLiteral) and (
len(line) == 2 or too_big and '=' in line[-2]):
if not line[-1].prefix:
line[-1] = pretty_string(line[-1], False, ''.join(line[:-1]))
elif too_big:
source[index] = new_line = []
wrap_line(line, maxlen, new_line)
def wrap_line(line, maxline=79, result=[], count=count):
""" We have a line that is too long,
......@@ -271,3 +284,79 @@ def get_assign_groups(line, ops=ops):
yield group
group = []
yield group
def _properly_indented(s, line_indent):
mylist = s.split('\n')[1:]
mylist = [x.rstrip() for x in mylist]
mylist = [x for x in mylist if x]
if not s:
return False
counts = [(len(x) - len(x.lstrip())) for x in mylist]
return counts and min(counts) >= line_indent
mysplit = re.compile(r'(\\|\"\"\"|\"$)').split
replacements = {'\\': '\\\\', '"""': '""\\"', '"': '\\"'}
def _prep_triple_quotes(s, mysplit=mysplit, replacements=replacements):
""" Split the string up and force-feed some replacements
to make sure it will round-trip OK
"""
s = mysplit(s)
s[1::2] = (replacements[x] for x in s[1::2])
return ''.join(s)
def pretty_string(default, embedded, current_line, uni_lit=False,
min_trip_str=20, max_line=79):
"""There are a lot of reasons why we might not want to or
be able to return a triple-quoted string. We can always
punt back to the default normal string.
"""
s = default.src
# Punt on abnormal strings
if (isinstance(s, special_unicode) or not isinstance(s, basestring)):
return default
if uni_lit and isinstance(s, bytes):
return 'b' + default
len_s = len(default)
if current_line.strip():
len_current = len(current_line)
second_line_start = s.find('\n') + 1
if embedded > 1 and not second_line_start:
return default
if len_s < min_trip_str:
return default
line_indent = len_current - len(current_line.lstrip())
# Could be on a line by itself...
if embedded and not second_line_start:
return default
total_len = len_current + len_s
if total_len < max_line and not _properly_indented(s, line_indent):
return default
fancy = '"""%s"""' % _prep_triple_quotes(s)
# Sometimes this doesn't work. One reason is that
# the AST has no understanding of whether \r\n was
# entered that way in the string or was a cr/lf in the
# file. So we punt just so we can round-trip properly.
try:
if eval(fancy) == s and '\r' not in fancy:
return fancy
except:
pass
return default
# -*- coding: utf-8 -*-
"""
Part of the astor library for Python AST manipulation.
License: 3-clause BSD
Copyright (c) 2015 Patrick Maupin
Pretty-print strings for the decompiler
We either return the repr() of the string,
or try to format it as a triple-quoted string.
This is a lot harder than you would think.
This has lots of Python 2 / Python 3 ugliness.
"""
import re
try:
special_unicode = unicode
except NameError:
class special_unicode(object):
pass
try:
basestring = basestring
except NameError:
basestring = str
def _properly_indented(s, line_indent):
mylist = s.split('\n')[1:]
mylist = [x.rstrip() for x in mylist]
mylist = [x for x in mylist if x]
if not s:
return False
counts = [(len(x) - len(x.lstrip())) for x in mylist]
return counts and min(counts) >= line_indent
mysplit = re.compile(r'(\\|\"\"\"|\"$)').split
replacements = {'\\': '\\\\', '"""': '""\\"', '"': '\\"'}
def _prep_triple_quotes(s, mysplit=mysplit, replacements=replacements):
""" Split the string up and force-feed some replacements
to make sure it will round-trip OK
"""
s = mysplit(s)
s[1::2] = (replacements[x] for x in s[1::2])
return ''.join(s)
def pretty_string(s, embedded, current_line, uni_lit=False,
min_trip_str=20, max_line=100):
"""There are a lot of reasons why we might not want to or
be able to return a triple-quoted string. We can always
punt back to the default normal string.
"""
default = repr(s)
# Punt on abnormal strings
if (isinstance(s, special_unicode) or not isinstance(s, basestring)):
return default
if uni_lit and isinstance(s, bytes):
return 'b' + default
len_s = len(default)
if current_line.strip():
len_current = len(current_line)
second_line_start = s.find('\n') + 1
if embedded > 1 and not second_line_start:
return default
if len_s < min_trip_str:
return default
line_indent = len_current - len(current_line.lstrip())
# Could be on a line by itself...
if embedded and not second_line_start:
return default
total_len = len_current + len_s
if total_len < max_line and not _properly_indented(s, line_indent):
return default
fancy = '"""%s"""' % _prep_triple_quotes(s)
# Sometimes this doesn't work. One reason is that
# the AST has no understanding of whether \r\n was
# entered that way in the string or was a cr/lf in the
# file. So we punt just so we can round-trip properly.
try:
if eval(fancy) == s and '\r' not in fancy:
return fancy
except:
pass
return default
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment