Kaydet (Commit) c51d586a authored tarafından Berker Peksag's avatar Berker Peksag

Merge pull request #40 from berkerpeksag/pm_develop

pm_develop contains all the work I have been doing over the last couple of weeks
...@@ -15,10 +15,13 @@ astor is designed to allow easy manipulation of Python source via the AST. ...@@ -15,10 +15,13 @@ astor is designed to allow easy manipulation of Python source via the AST.
There are some other similar libraries, but astor focuses on the following areas: There are some other similar libraries, but astor focuses on the following areas:
- Round-trip back to Python via Armin Ronacher's codegen.py module: - Round-trip an AST back to Python:
- Modified AST doesn't need linenumbers, ctx, etc. or otherwise be directly compileable - Modified AST doesn't need linenumbers, ctx, etc. or otherwise
be directly compileable for the round-trip to work.
- Easy to read generated code as, well, code - Easy to read generated code as, well, code
- Can round-trip two different source trees to compare for functional
differences, using the astor.rtrip tool (for example, after PEP8 edits).
- Dump pretty-printing of AST - Dump pretty-printing of AST
......
...@@ -9,8 +9,6 @@ Copyright 2013 (c) Berker Peksag ...@@ -9,8 +9,6 @@ Copyright 2013 (c) Berker Peksag
""" """
__version__ = '0.6'
from .code_gen import to_source # NOQA from .code_gen import to_source # NOQA
from .node_util import iter_node, strip_tree, dump_tree from .node_util import iter_node, strip_tree, dump_tree
from .node_util import ExplicitNodeVisitor from .node_util import ExplicitNodeVisitor
...@@ -19,8 +17,10 @@ from .op_util import get_op_symbol, get_op_precedence # NOQA ...@@ -19,8 +17,10 @@ from .op_util import get_op_symbol, get_op_precedence # NOQA
from .op_util import symbol_data from .op_util import symbol_data
from .tree_walk import TreeWalk # NOQA from .tree_walk import TreeWalk # NOQA
__version__ = '0.6'
#DEPRECATED!!! # DEPRECATED!!!
# These aliases support old programs. Please do not use in future. # These aliases support old programs. Please do not use in future.
...@@ -30,7 +30,7 @@ from .tree_walk import TreeWalk # NOQA ...@@ -30,7 +30,7 @@ from .tree_walk import TreeWalk # NOQA
# things could be accessed from their submodule. # things could be accessed from their submodule.
get_boolop = get_binop = get_cmpop = get_unaryop = get_op_symbol # NOQA get_boolop = get_binop = get_cmpop = get_unaryop = get_op_symbol # NOQA
get_anyop = get_op_symbol get_anyop = get_op_symbol
parsefile = code_to_ast.parse_file parsefile = code_to_ast.parse_file
codetoast = code_to_ast codetoast = code_to_ast
......
This diff is collapsed.
...@@ -4,8 +4,8 @@ Part of the astor library for Python AST manipulation. ...@@ -4,8 +4,8 @@ Part of the astor library for Python AST manipulation.
License: 3-clause BSD License: 3-clause BSD
Copyright 2012-2015 (c) Patrick Maupin Copyright (c) 2012-2015 Patrick Maupin
Copyright 2013-2015 (c) Berker Peksag Copyright (c) 2013-2015 Berker Peksag
Functions that interact with the filesystem go here. Functions that interact with the filesystem go here.
...@@ -37,6 +37,8 @@ class CodeToAst(object): ...@@ -37,6 +37,8 @@ class CodeToAst(object):
designed to be used in code that uses this class. designed to be used in code that uses this class.
""" """
if not os.path.isdir(srctree):
yield os.path.split(srctree)
for srcpath, _, fnames in os.walk(srctree): for srcpath, _, fnames in os.walk(srctree):
# Avoid infinite recursion for silly users # Avoid infinite recursion for silly users
if ignore is not None and ignore in srcpath: if ignore is not None and ignore in srcpath:
...@@ -52,14 +54,19 @@ class CodeToAst(object): ...@@ -52,14 +54,19 @@ class CodeToAst(object):
TODO: Handle encodings other than the default (issue #26) TODO: Handle encodings other than the default (issue #26)
""" """
with open(fname, 'r') as f: try:
fstr = f.read() with open(fname, 'r') as f:
fstr = f.read()
except IOError:
if fname != 'stdin':
raise
sys.stdout.write('\nReading from stdin:\n\n')
fstr = sys.stdin.read()
fstr = fstr.replace('\r\n', '\n').replace('\r', '\n') fstr = fstr.replace('\r\n', '\n').replace('\r', '\n')
if not fstr.endswith('\n'): if not fstr.endswith('\n'):
fstr += '\n' fstr += '\n'
return ast.parse(fstr, filename=fname) return ast.parse(fstr, filename=fname)
@staticmethod @staticmethod
def get_file_info(codeobj): def get_file_info(codeobj):
"""Returns the file and line number of a code object. """Returns the file and line number of a code object.
......
...@@ -53,10 +53,10 @@ def iter_node(node, name='', unknown=None, ...@@ -53,10 +53,10 @@ def iter_node(node, name='', unknown=None,
def dump_tree(node, name=None, initial_indent='', indentation=' ', def dump_tree(node, name=None, initial_indent='', indentation=' ',
maxline=120, maxmerged=80, maxline=120, maxmerged=80,
#Runtime optimization # Runtime optimization
iter_node=iter_node, special=ast.AST, iter_node=iter_node, special=ast.AST,
list=list, isinstance=isinstance, type=type, len=len): list=list, isinstance=isinstance, type=type, len=len):
"""Dumps an AST or similar structure: """Dumps an AST or similar structure:
- Pretty-prints with indentation - Pretty-prints with indentation
...@@ -87,9 +87,9 @@ def dump_tree(node, name=None, initial_indent='', indentation=' ', ...@@ -87,9 +87,9 @@ def dump_tree(node, name=None, initial_indent='', indentation=' ',
def strip_tree(node, def strip_tree(node,
#Runtime optimization # Runtime optimization
iter_node=iter_node, special=ast.AST, iter_node=iter_node, special=ast.AST,
list=list, isinstance=isinstance, type=type, len=len): list=list, isinstance=isinstance, type=type, len=len):
"""Strips an AST by removing all attributes not in _fields. """Strips an AST by removing all attributes not in _fields.
Returns a set of the names of all attributes stripped. Returns a set of the names of all attributes stripped.
...@@ -97,6 +97,7 @@ def strip_tree(node, ...@@ -97,6 +97,7 @@ def strip_tree(node,
This canonicalizes two trees for comparison purposes. This canonicalizes two trees for comparison purposes.
""" """
stripped = set() stripped = set()
def strip(node, indent): def strip(node, indent):
unknown = set() unknown = set()
leaf = True leaf = True
...@@ -134,3 +135,31 @@ class ExplicitNodeVisitor(ast.NodeVisitor): ...@@ -134,3 +135,31 @@ class ExplicitNodeVisitor(ast.NodeVisitor):
method = 'visit_' + node.__class__.__name__ method = 'visit_' + node.__class__.__name__
visitor = getattr(self, method, abort) visitor = getattr(self, method, abort)
return visitor(node) return visitor(node)
def allow_ast_comparison():
"""This ugly little monkey-patcher adds in a helper class
to all the AST node types. This helper class allows
eq/ne comparisons to work, so that entire trees can
be easily compared by Python's comparison machinery.
Used by the anti8 functions to compare old and new ASTs.
Could also be used by the test library.
"""
class CompareHelper(object):
def __eq__(self, other):
return type(self) == type(other) and vars(self) == vars(other)
def __ne__(self, other):
return type(self) != type(other) or vars(self) != vars(other)
for item in vars(ast).values():
if type(item) != type:
continue
if issubclass(item, ast.AST):
try:
item.__bases__ = tuple(list(item.__bases__) + [CompareHelper])
except TypeError:
pass
...@@ -4,7 +4,7 @@ Part of the astor library for Python AST manipulation. ...@@ -4,7 +4,7 @@ Part of the astor library for Python AST manipulation.
License: 3-clause BSD License: 3-clause BSD
Copyright (c) 2012-2015 Patrick Maupin Copyright (c) 2015 Patrick Maupin
This module provides data and functions for mapping This module provides data and functions for mapping
AST nodes to symbols and precedences. AST nodes to symbols and precedences.
...@@ -14,48 +14,91 @@ AST nodes to symbols and precedences. ...@@ -14,48 +14,91 @@ AST nodes to symbols and precedences.
import ast import ast
op_data = """ op_data = """
Or or 4 GeneratorExp 1
And and 6
Not not 8 Assign 1
Eq == 10 AugAssign 0
Gt > 10 Expr 0
GtE >= 10 Yield 1
In in 10 YieldFrom 0
Is is 10 If 1
NotEq != 10 For 0
Lt < 10 While 0
LtE <= 10 Return 1
NotIn not in 10
IsNot is not 10 Slice 1
BitOr | 12 Subscript 0
BitXor ^ 14 Index 1
BitAnd & 16 ExtSlice 1
LShift << 18 comprehension_target 1
RShift >> 18 Tuple 0
Add + 20
Sub - 20 Comma 1
Mult * 22 Assert 0
Div / 22 Raise 0
Mod % 22 call_one_arg 1
FloorDiv // 22
MatMult @ 22 Lambda 1
UAdd + 24 IfExp 0
USub - 24
Invert ~ 24 comprehension 1
Pow ** 26 Or or 1
And and 1
Not not 1
Eq == 1
Gt > 0
GtE >= 0
In in 0
Is is 0
NotEq != 0
Lt < 0
LtE <= 0
NotIn not in 0
IsNot is not 0
BitOr | 1
BitXor ^ 1
BitAnd & 1
LShift << 1
RShift >> 0
Add + 1
Sub - 0
Mult * 1
Div / 0
Mod % 0
FloorDiv // 0
MatMult @ 0
PowRHS 1
Invert ~ 1
UAdd + 0
USub - 0
Pow ** 1
Num 1
""" """
op_data = [x.split() for x in op_data.splitlines()] op_data = [x.split() for x in op_data.splitlines()]
op_data = [(x[0], ' '.join(x[1:-1]), int(x[-1])) for x in op_data if x] op_data = [[x[0], ' '.join(x[1:-1]), int(x[-1])] for x in op_data if x]
for index in range(1, len(op_data)):
op_data[index][2] *= 2
op_data[index][2] += op_data[index - 1][2]
precedence_data = dict((getattr(ast, x, None), z) for x, y, z in op_data) precedence_data = dict((getattr(ast, x, None), z) for x, y, z in op_data)
symbol_data = dict((getattr(ast, x, None), y) for x, y, z in op_data) symbol_data = dict((getattr(ast, x, None), y) for x, y, z in op_data)
def get_op_symbol(obj, fmt='%s', symbol_data=symbol_data, type=type): def get_op_symbol(obj, fmt='%s', symbol_data=symbol_data, type=type):
"""Given an AST node object, returns a string containing the symbol. """Given an AST node object, returns a string containing the symbol.
""" """
return fmt % symbol_data[type(obj)] return fmt % symbol_data[type(obj)]
def get_op_precedence(obj, precedence_data=precedence_data, type=type): def get_op_precedence(obj, precedence_data=precedence_data, type=type):
"""Given an AST node object, returns the precedence. """Given an AST node object, returns the precedence.
""" """
return precedence_data[type(obj)] return precedence_data[type(obj)]
class Precedence(object):
vars().update((x, z) for x, y, z in op_data)
highest = max(z for x, y, z in op_data) + 2
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Part of the astor library for Python AST manipulation.
License: 3-clause BSD
Copyright (c) 2015 Patrick Maupin
Usage:
python -m astor.rtrip [readonly] [<source>]
This utility tests round-tripping of Python source to AST
and back to source.
.. versionadded:: 0.6
If readonly is specified, then the source will be tested,
but no files will be written.
if the source is specified to be "stdin" (without quotes)
then any source entered at the command line will be compiled
into an AST, converted back to text, and then compiled to
an AST again, and the results will be displayed to stdout.
If neither readonly nor stdin is specified, then rtrip
will create a mirror directory named tmp_rtrip and will
recursively round-trip all the Python source from the source
into the tmp_rtrip dir, after compiling it and then reconstituting
it through code_gen.to_source.
If the source is not specified, the entire Python library will be used.
The purpose of rtrip is to place Python code into a canonical form.
This is useful both for functional testing of astor, and for
validating code edits.
For example, if you make manual edits for PEP8 compliance,
you can diff the rtrip output of the original code against
the rtrip output of the edited code, to insure that you
didn't make any functional changes.
For testing astor itself, it is useful to point to a big codebase,
e.g::
python -m astor.rtrip
to roundtrip the standard library.
If any round-tripped files fail to be built or to match, the
tmp_rtrip directory will also contain fname.srcdmp and fname.dstdmp,
which are textual representations of the ASTs.
Note 1:
The canonical form is only canonical for a given version of
this module and the astor toolbox. It is not guaranteed to
be stable. The only desired guarantee is that two source modules
that parse to the same AST will be converted back into the same
canonical form.
Note 2:
This tool WILL TRASH the tmp_rtrip directory (unless readonly
is specified) -- as far as it is concerned, it OWNS that directory.
Note 3: Why is it "readonly" and not "-r"? Because python -m slurps
all the thingies starting with the dash.
"""
import sys
import os
import ast
import shutil
import logging
from astor.code_gen import to_source
from astor.file_util import code_to_ast
from astor.node_util import allow_ast_comparison, dump_tree, strip_tree
dsttree = 'tmp_rtrip'
def convert(srctree, dsttree=dsttree, readonly=False, dumpall=False):
"""Walk the srctree, and convert/copy all python files
into the dsttree
"""
allow_ast_comparison()
parse_file = code_to_ast.parse_file
find_py_files = code_to_ast.find_py_files
srctree = os.path.normpath(srctree)
if not readonly:
dsttree = os.path.normpath(dsttree)
logging.info('')
logging.info('Trashing ' + dsttree)
shutil.rmtree(dsttree, True)
unknown_src_nodes = set()
unknown_dst_nodes = set()
badfiles = set()
broken = []
# TODO: When issue #26 resolved, remove UnicodeDecodeError
handled_exceptions = SyntaxError, UnicodeDecodeError
oldpath = None
allfiles = find_py_files(srctree, None if readonly else dsttree)
for srcpath, fname in allfiles:
# Create destination directory
if not readonly and srcpath != oldpath:
oldpath = srcpath
if srcpath >= srctree:
dstpath = srcpath.replace(srctree, dsttree, 1)
if not dstpath.startswith(dsttree):
raise ValueError("%s not a subdirectory of %s" %
(dstpath, dsttree))
else:
assert srctree.startswith(srcpath)
dstpath = dsttree
os.makedirs(dstpath)
srcfname = os.path.join(srcpath, fname)
logging.info('Converting %s' % srcfname)
try:
srcast = parse_file(srcfname)
except handled_exceptions:
badfiles.add(srcfname)
continue
dsttxt = to_source(srcast)
if not readonly:
dstfname = os.path.join(dstpath, fname)
try:
with open(dstfname, 'w') as f:
f.write(dsttxt)
except UnicodeEncodeError:
badfiles.add(dstfname)
# As a sanity check, make sure that ASTs themselves
# round-trip OK
try:
dstast = ast.parse(dsttxt) if readonly else parse_file(dstfname)
except SyntaxError:
dstast = []
unknown_src_nodes.update(strip_tree(srcast))
unknown_dst_nodes.update(strip_tree(dstast))
if dumpall or srcast != dstast:
srcdump = dump_tree(srcast)
dstdump = dump_tree(dstast)
bad = srcdump != dstdump
logging.warning(' calculating dump -- %s' %
('bad' if bad else 'OK'))
if bad:
broken.append(srcfname)
if dumpall or bad:
if not readonly:
try:
with open(dstfname[:-3] + '.srcdmp', 'w') as f:
f.write(srcdump)
except UnicodeEncodeError:
badfiles.add(dstfname[:-3] + '.srcdmp')
try:
with open(dstfname[:-3] + '.dstdmp', 'w') as f:
f.write(dstdump)
except UnicodeEncodeError:
badfiles.add(dstfname[:-3] + '.dstdmp')
elif dumpall:
sys.stdout.write('\n\nAST:\n\n ')
sys.stdout.write(srcdump.replace('\n', '\n '))
sys.stdout.write('\n\nDecompile:\n\n ')
sys.stdout.write(dsttxt.replace('\n', '\n '))
sys.stdout.write('\n\nNew AST:\n\n ')
sys.stdout.write('(same as old)' if dstdump == srcdump
else dstdump.replace('\n', '\n '))
sys.stdout.write('\n')
if badfiles:
logging.warning('\nFiles not processed due to syntax errors:')
for fname in sorted(badfiles):
logging.warning(' %s' % fname)
if broken:
logging.warning('\nFiles failed to round-trip to AST:')
for srcfname in broken:
logging.warning(' %s' % srcfname)
ok_to_strip = 'col_offset _precedence _use_parens lineno _p_op _pp'
ok_to_strip = set(ok_to_strip.split())
bad_nodes = (unknown_dst_nodes | unknown_src_nodes) - ok_to_strip
if bad_nodes:
logging.error('\nERROR -- UNKNOWN NODES STRIPPED: %s' % bad_nodes)
logging.info('\n')
def usage(msg):
raise SystemExit(textwrap.dedent("""
Error: %s
Usage:
python -m astor.rtrip [readonly] [<source>]
This utility tests round-tripping of Python source to AST
and back to source.
If readonly is specified, then the source will be tested,
but no files will be written.
if the source is specified to be "stdin" (without quotes)
then any source entered at the command line will be compiled
into an AST, converted back to text, and then compiled to
an AST again, and the results will be displayed to stdout.
If neither readonly nor stdin is specified, then rtrip
will create a mirror directory named tmp_rtrip and will
recursively round-trip all the Python source from the source
into the tmp_rtrip dir, after compiling it and then reconstituting
it through code_gen.to_source.
If the source is not specified, the entire Python library will be used.
""") % msg)
if __name__ == '__main__':
import textwrap
args = sys.argv[1:]
readonly = 'readonly' in args
if readonly:
args.remove('readonly')
if not args:
args = [os.path.dirname(textwrap.__file__)]
if len(args) > 1:
usage("Too many arguments")
fname, = args
dumpall = False
if not os.path.exists(fname):
dumpall = fname == 'stdin' or usage("Cannot find directory %s" % fname)
logging.basicConfig(format='%(msg)s', level=logging.INFO)
convert(fname, readonly=readonly or dumpall, dumpall=dumpall)
# -*- coding: utf-8 -*-
"""
Part of the astor library for Python AST manipulation.
License: 3-clause BSD
Copyright (c) 2015 Patrick Maupin
Pretty-print source -- post-process for the decompiler
The goals of the initial cut of this engine are:
1) Do a passable, if not PEP8, job of line-wrapping.
2) Serve as an example of an interface to the decompiler
for anybody who wants to do a better job. :)
"""
def pretty_source(source):
""" Prettify the source.
"""
return ''.join(flatten(split_lines(source)))
def flatten(source, list=list, isinstance=isinstance):
""" Deal with nested lists
"""
def flatten_iter(source):
for item in source:
if isinstance(item, list):
for item in flatten_iter(item):
yield item
else:
yield item
return flatten_iter(source)
def split_lines(source, maxline=79):
"""Split inputs according to lines.
If a line is short enough, just yield it.
Otherwise, fix it.
"""
line = []
multiline = False
count = 0
for item in source:
if item.startswith('\n'):
if line:
if count <= maxline or multiline:
yield line
else:
for item2 in wrap_line(line, maxline):
yield item2
count = 0
multiline = False
line = []
yield item
else:
line.append(item)
multiline = '\n' in item
count += len(item)
def count(group):
return sum(len(x) for x in group)
def wrap_line(line, maxline=79, count=count):
""" We have a line that is too long,
so we're going to try to wrap it.
"""
# Extract the indentation
indentation = line[0]
lenfirst = len(indentation)
indent = lenfirst - len(indentation.strip())
assert indent in (0, lenfirst)
indentation = line.pop(0) if indent else ''
# Get splittable/non-splittable groups
dgroups = list(delimiter_groups(line))
unsplittable = dgroups[::2]
splittable = dgroups[1::2]
# If the largest non-splittable group won't fit
# on a line, try to add parentheses to the line.
if max(count(x) for x in unsplittable) > maxline - indent:
line = add_parens(line, maxline, indent)
dgroups = list(delimiter_groups(line))
unsplittable = dgroups[::2]
splittable = dgroups[1::2]
# Deal with the first (always unsplittable) group, and
# then set up to deal with the remainder in pairs.
first = unsplittable[0]
yield indentation
yield first
if not splittable:
return
pos = indent + count(first)
indentation += ' '
indent += 4
if indent >= maxline/2:
maxline = maxline/2 + indent
for sg, nsg in zip(splittable, unsplittable[1:]):
if sg:
# If we already have stuff on the line and even
# the very first item won't fit, start a new line
if pos > indent and pos + len(sg[0]) > maxline:
yield '\n'
yield indentation
pos = indent
# Dump lines out of the splittable group
# until the entire thing fits
csg = count(sg)
while pos + csg > maxline:
ready, sg = split_group(sg, pos, maxline)
if ready[-1].endswith(' '):
ready[-1] = ready[-1][:-1]
yield ready
yield '\n'
yield indentation
pos = indent
csg = count(sg)
# Dump the remainder of the splittable group
if sg:
yield sg
pos += csg
# Dump the unsplittable group, optionally
# preceded by a linefeed.
cnsg = count(nsg)
if pos > indent and pos + cnsg > maxline:
yield '\n'
yield indentation
pos = indent
yield nsg
pos += cnsg
def split_group(source, pos, maxline):
""" Split a group into two subgroups. The
first will be appended to the current
line, the second will start the new line.
Note that the first group must always
contain at least one item.
The original group may be destroyed.
"""
first = []
source.reverse()
while source:
tok = source.pop()
first.append(tok)
pos += len(tok)
if source:
tok = source[-1]
allowed = (maxline + 1) if tok.endswith(' ') else (maxline - 4)
if pos + len(tok) > allowed:
break
source.reverse()
return first, source
begin_delim = set('([{')
end_delim = set(')]}')
end_delim.add('):')
def delimiter_groups(line, begin_delim=begin_delim,
end_delim=end_delim):
"""Split a line into alternating groups.
The first group cannot have a line feed inserted,
the next one can, etc.
"""
text = []
line = iter(line)
while True:
# First build and yield an unsplittable group
for item in line:
text.append(item)
if item in begin_delim:
break
if not text:
break
yield text
# Now build and yield a splittable group
level = 0
text = []
for item in line:
if item in begin_delim:
level += 1
elif item in end_delim:
level -= 1
if level < 0:
yield text
text = [item]
break
text.append(item)
else:
assert not text, text
break
statements = set(['del ', 'return', 'yield ', 'if ', 'while '])
def add_parens(line, maxline, indent, statements=statements, count=count):
"""Attempt to add parentheses around the line
in order to make it splittable.
"""
if line[0] in statements:
index = 1
if not line[0].endswith(' '):
index = 2
assert line[1] == ' '
line.insert(index, '(')
if line[-1] == ':':
line.insert(-1, ')')
else:
line.append(')')
# That was the easy stuff. Now for assignments.
groups = list(get_assign_groups(line))
if len(groups) == 1:
# So sad, too bad
return line
counts = list(count(x) for x in groups)
didwrap = False
# If the LHS is large, wrap it first
if sum(counts[:-1]) >= maxline - indent - 4:
for group in groups[:-1]:
didwrap = False # Only want to know about last group
if len(group) > 1:
group.insert(0, '(')
group.insert(-1, ')')
didwrap = True
# Might not need to wrap the RHS if wrapped the LHS
if not didwrap or counts[-1] > maxline - indent - 10:
groups[-1].insert(0, '(')
groups[-1].append(')')
return [item for group in groups for item in group]
# Assignment operators
ops = list('|^&+-*/%@~') + '<< >> // **'.split() + ['']
ops = set(' %s= ' % x for x in ops)
def get_assign_groups(line, ops=ops):
""" Split a line into groups by assignment (including
augmented assignment)
"""
group = []
for item in line:
group.append(item)
if item in ops:
yield group
group = []
yield group
# -*- coding: utf-8 -*-
"""
Part of the astor library for Python AST manipulation.
License: 3-clause BSD
Copyright (c) 2015 Patrick Maupin
Pretty-print strings for the decompiler
We either return the repr() of the string,
or try to format it as a triple-quoted string.
This is a lot harder than you would think.
This has lots of Python 2 / Python 3 ugliness.
"""
import re
import logging
try:
special_unicode = unicode
except NameError:
class special_unicode(object):
pass
try:
basestring = basestring
except NameError:
basestring = str
def _get_line(current_output):
""" Back up in the output buffer to
find the start of the current line,
and return the entire line.
"""
myline = []
index = len(current_output)
while index:
index -= 1
try:
s = str(current_output[index])
except:
raise
myline.append(s)
if '\n' in s:
break
myline = ''.join(reversed(myline))
return myline.rsplit('\n', 1)[-1]
def _properly_indented(s, current_line):
line_indent = len(current_line) - len(current_line.lstrip())
mylist = s.split('\n')[1:]
mylist = [x.rstrip() for x in mylist]
mylist = [x for x in mylist if x]
if not s:
return False
counts = [(len(x) - len(x.lstrip())) for x in mylist]
return counts and min(counts) >= line_indent
mysplit = re.compile(r'(\\|\"\"\"|\"$)').split
replacements = {'\\': '\\\\', '"""': '""\\"', '"': '\\"'}
def _prep_triple_quotes(s, mysplit=mysplit, replacements=replacements):
""" Split the string up and force-feed some replacements
to make sure it will round-trip OK
"""
s = mysplit(s)
s[1::2] = (replacements[x] for x in s[1::2])
return ''.join(s)
def pretty_string(s, current_output, min_trip_str=20, max_line=100):
"""There are a lot of reasons why we might not want to or
be able to return a triple-quoted string. We can always
punt back to the default normal string.
"""
default = repr(s)
# Punt on abnormal strings
if (isinstance(s, special_unicode) or not isinstance(s, basestring)):
return default
len_s = len(default)
current_line = _get_line(current_output)
if current_line.strip():
if len_s < min_trip_str:
return default
total_len = len(current_line) + len_s
if total_len < max_line and not _properly_indented(s, current_line):
return default
fancy = '"""%s"""' % _prep_triple_quotes(s)
# Sometimes this doesn't work. One reason is that
# the AST has no understanding of whether \r\n was
# entered that way in the string or was a cr/lf in the
# file. So we punt just so we can round-trip properly.
try:
if eval(fancy) == s and '\r' not in fancy:
return fancy
except:
pass
"""
logging.warning("***String conversion did not work\n")
#print (eval(fancy), s)
print
print (fancy, repr(s))
print
"""
return default
...@@ -7,6 +7,9 @@ License: 3-clause BSD ...@@ -7,6 +7,9 @@ License: 3-clause BSD
Copyright 2012 (c) Patrick Maupin Copyright 2012 (c) Patrick Maupin
Copyright 2013 (c) Berker Peksag Copyright 2013 (c) Berker Peksag
This file contains a TreeWalk class that views a node tree
as a unified whole and allows several modes of traversal.
""" """
from .node_util import iter_node from .node_util import iter_node
...@@ -76,9 +79,9 @@ class TreeWalk(MetaFlatten): ...@@ -76,9 +79,9 @@ class TreeWalk(MetaFlatten):
methods can be written. They will be called in alphabetical order. methods can be written. They will be called in alphabetical order.
""" """
nodestack = None
def __init__(self, node=None): def __init__(self, node=None):
self.nodestack = []
self.setup() self.setup()
if node is not None: if node is not None:
self.walk(node) self.walk(node)
...@@ -106,11 +109,11 @@ class TreeWalk(MetaFlatten): ...@@ -106,11 +109,11 @@ class TreeWalk(MetaFlatten):
""" """
pre_handlers = self.pre_handlers.get pre_handlers = self.pre_handlers.get
post_handlers = self.post_handlers.get post_handlers = self.post_handlers.get
oldstack = self.nodestack nodestack = self.nodestack
self.nodestack = nodestack = [] emptystack = len(nodestack)
append, pop = nodestack.append, nodestack.pop append, pop = nodestack.append, nodestack.pop
append([node, name, list(iter_node(node, name + '_item')), -1]) append([node, name, list(iter_node(node, name + '_item')), -1])
while nodestack: while len(nodestack) > emptystack:
node, name, subnodes, index = nodestack[-1] node, name, subnodes, index = nodestack[-1]
if index >= len(subnodes): if index >= len(subnodes):
handler = (post_handlers(type(node).__name__) or handler = (post_handlers(type(node).__name__) or
...@@ -138,7 +141,6 @@ class TreeWalk(MetaFlatten): ...@@ -138,7 +141,6 @@ class TreeWalk(MetaFlatten):
else: else:
node, name = subnodes[index] node, name = subnodes[index]
append([node, name, list(iter_node(node, name + '_item')), -1]) append([node, name, list(iter_node(node, name + '_item')), -1])
self.nodestack = oldstack
@property @property
def parent(self): def parent(self):
......
...@@ -208,4 +208,69 @@ Functions ...@@ -208,4 +208,69 @@ Functions
get_unaryop, and get_anyop. get_unaryop, and get_anyop.
Command line utilities
--------------------------
rtrip
''''''
There is currently one command-line utility::
python -m astor.rtrip [readonly] [<source>]
This utility tests round-tripping of Python source to AST
and back to source.
.. versionadded:: 0.6
If readonly is specified, then the source will be tested,
but no files will be written.
if the source is specified to be "stdin" (without quotes)
then any source entered at the command line will be compiled
into an AST, converted back to text, and then compiled to
an AST again, and the results will be displayed to stdout.
If neither readonly nor stdin is specified, then rtrip
will create a mirror directory named tmp_rtrip and will
recursively round-trip all the Python source from the source
into the tmp_rtrip dir, after compiling it and then reconstituting
it through code_gen.to_source.
If the source is not specified, the entire Python library will be used.
The purpose of rtrip is to place Python code into a canonical form.
This is useful both for functional testing of astor, and for
validating code edits.
For example, if you make manual edits for PEP8 compliance,
you can diff the rtrip output of the original code against
the rtrip output of the edited code, to insure that you
didn't make any functional changes.
For testing astor itself, it is useful to point to a big codebase,
e.g::
python -m astor.rtrip
to round-trip the standard library.
If any round-tripped files fail to be built or to match, the
tmp_rtrip directory will also contain fname.srcdmp and fname.dstdmp,
which are textual representations of the ASTs.
Note 1:
The canonical form is only canonical for a given version of
this module and the astor toolbox. It is not guaranteed to
be stable. The only desired guarantee is that two source modules
that parse to the same AST will be converted back into the same
canonical form.
Note 2:
This tool WILL TRASH the tmp_rtrip directory (unless readonly
is specified) -- as far as it is concerned, it OWNS that directory.
.. _GitHub: https://github.com/berkerpeksag/astor/ .. _GitHub: https://github.com/berkerpeksag/astor/
...@@ -36,5 +36,5 @@ setup( ...@@ -36,5 +36,5 @@ setup(
'Topic :: Software Development :: Code Generators', 'Topic :: Software Development :: Code Generators',
'Topic :: Software Development :: Compilers', 'Topic :: Software Development :: Compilers',
], ],
keywords='ast, codegen', keywords='ast, codegen, PEP8',
) )
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Part of the astor library for Python AST manipulation.
License: 3-clause BSD
Copyright (c) 2015 Patrick Maupin
This module generates a lot of permutations of Python
expressions, and dumps them into a python module
all_expr_x_y.py (where x and y are the python version tuple)
as a string.
This string is later used by check_expressions.
This module takes a loooooooooong time to execute.
"""
import sys
import collections
import itertools
import textwrap
import ast
import astor
all_operators = (
# Selected special operands
'3 -3 () yield',
# operators with one parameter
'yield lambda_: not + - ~ $, yield_from',
# operators with two parameters
'or and == != > >= < <= in not_in is is_not '
'| ^ & << >> + - * / % // @ ** for$in$ $($) $[$] . '
'$,$ ',
# operators with 3 parameters
'$if$else$ $for$in$'
)
select_operators = (
# Selected special operands -- remove
# some at redundant precedence levels
'-3',
# operators with one parameter
'yield lambda_: not - ~ $,',
# operators with two parameters
'or and == in is '
'| ^ & >> - % ** for$in$ $($) . ',
# operators with 3 parameters
'$if$else$ $for$in$'
)
def get_primitives(base):
"""Attempt to return formatting strings for all operators,
and selected operands.
Here, I use the term operator loosely to describe anything
that accepts an expression and can be used in an additional
expression.
"""
operands = []
operators = []
for nparams, s in enumerate(base):
s = s.replace('%', '%%').split()
for s in (x.replace('_', ' ') for x in s):
if nparams and '$' not in s:
assert nparams in (1, 2)
s = '%s%s$' % ('$' if nparams == 2 else '', s)
assert nparams == s.count('$'), (nparams, s)
s = s.replace('$', ' %s ').strip()
# Normalize the spacing
s = s.replace(' ,', ',')
s = s.replace(' . ', '.')
s = s.replace(' [ ', '[').replace(' ]', ']')
s = s.replace(' ( ', '(').replace(' )', ')')
if nparams == 1:
s = s.replace('+ ', '+')
s = s.replace('- ', '-')
s = s.replace('~ ', '~')
if nparams:
operators.append((s, nparams))
else:
operands.append(s)
return operators, operands
def get_sub_combinations(maxop):
"""Return a dictionary of lists of combinations suitable
for recursively building expressions.
Each dictionary key is a tuple of (numops, numoperands),
where:
numops is the number of operators we
should build an expression for
numterms is the number of operands required
by the current operator.
Each list contains all permutations of the number
of operators that the recursively called function
should use for each operand.
"""
combo = collections.defaultdict(list)
for numops in range(maxop+1):
if numops:
combo[numops, 1].append((numops-1,))
for op1 in range(numops):
combo[numops, 2].append((op1, numops - op1 - 1))
for op2 in range(numops - op1):
combo[numops, 3].append((op1, op2, numops - op1 - op2 - 1))
return combo
def get_paren_combos():
"""This function returns a list of lists.
The first list is indexed by the number of operands
the current operator has.
Each sublist contains all permutations of wrapping
the operands in parentheses or not.
"""
results = [None] * 4
options = [('%s', '(%s)')]
for i in range(1, 4):
results[i] = list(itertools.product(*(i * options)))
return results
def operand_combo(expressions, operands, max_operand=13):
op_combos = []
operands = list(operands)
operands.append('%s')
for n in range(max_operand):
this_combo = []
op_combos.append(this_combo)
for i in range(n):
for op in operands:
mylist = ['%s'] * n
mylist[i] = op
this_combo.append(tuple(mylist))
for expr in expressions:
expr = expr.replace('%%', '%%%%')
for op in op_combos[expr.count('%s')]:
yield expr % op
def build(numops=2, all_operators=all_operators, use_operands=False,
# Runtime optimization
tuple=tuple):
operators, operands = get_primitives(all_operators)
combo = get_sub_combinations(numops)
paren_combos = get_paren_combos()
product = itertools.product
try:
izip = itertools.izip
except AttributeError:
izip = zip
def recurse_build(numops):
if not numops:
yield '%s'
for myop, nparams in operators:
myop = myop.replace('%%', '%%%%')
myparens = paren_combos[nparams]
# print combo[numops, nparams]
for mycombo in combo[numops, nparams]:
# print mycombo
call_again = (recurse_build(x) for x in mycombo)
for subexpr in product(*call_again):
for parens in myparens:
wrapped = tuple(x % y for (x, y)
in izip(parens, subexpr))
yield myop % wrapped
result = recurse_build(numops)
return operand_combo(result, operands) if use_operands else result
def makelib():
parse = ast.parse
dump_tree = astor.dump_tree
def default_value(): return 1000000, ''
mydict = collections.defaultdict(default_value)
allparams = [tuple('abcdefghijklmnop'[:x]) for x in range(13)]
alltxt = itertools.chain(build(1, use_operands=True),
build(2, use_operands=True),
build(3, select_operators))
yieldrepl = list(('yield %s %s' % (operator, operand),
'yield %s%s' % (operator, operand))
for operator in '+-' for operand in '(ab')
yieldrepl.append(('yield[', 'yield ['))
# alltxt = itertools.chain(build(1), build(2))
badexpr = 0
goodexpr = 0
silly = '3( 3.( 3[ 3.['.split()
for expr in alltxt:
params = allparams[expr.count('%s')]
expr %= params
try:
myast = parse(expr)
except:
badexpr += 1
continue
goodexpr += 1
key = dump_tree(myast)
expr = expr.replace(', - ', ', -')
ignore = [x for x in silly if x in expr]
if ignore:
continue
if 'yield' in expr:
for x in yieldrepl:
expr = expr.replace(*x)
mydict[key] = min(mydict[key], (len(expr), expr))
print(badexpr, goodexpr)
stuff = [x[1] for x in mydict.values()]
stuff.sort()
lineend = '\n'.encode('utf-8')
with open('all_expr_%s_%s.py' % sys.version_info[:2], 'wb') as f:
f.write(textwrap.dedent('''
# AUTOMAGICALLY GENERATED!!! DO NOT MODIFY!!
#
all_expr = """
''').encode('utf-8'))
for item in stuff:
f.write(item.encode('utf-8'))
f.write(lineend)
f.write('"""\n'.encode('utf-8'))
if __name__ == '__main__':
makelib()
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Part of the astor library for Python AST manipulation.
License: 3-clause BSD
Copyright (c) 2015 Patrick Maupin
This module reads the strings generated by build_expressions,
and runs them through the Python interpreter.
For strings that are suboptimal (too many spaces, etc.),
it simply dumps them to a miscompare file.
For strings that seem broken (do not parse after roundtrip)
or are maybe too compressed, it dumps information to the console.
This module does not take too long to execute; however, the
underlying build_expressions module takes forever, so this
should not be part of the automated regressions.
"""
import sys
import collections
import itertools
import textwrap
import hashlib
import ast
import astor
try:
import importlib
except ImportError:
try:
import all_expr_2_6 as mymod
except ImportError:
print("Expression list does not exist -- building")
import build_expressions
build_expressions.makelib()
print("Expression list built")
import all_expr_2_6 as mymod
else:
mymodname = 'all_expr_%s_%s' % sys.version_info[:2]
try:
mymod = importlib.import_module(mymodname)
except ImportError:
print("Expression list does not exist -- building")
import build_expressions
build_expressions.makelib()
print("Expression list built")
mymod = importlib.import_module(mymodname)
def checklib():
print("Checking expressions")
parse = ast.parse
dump_tree = astor.dump_tree
to_source = astor.to_source
with open('mismatch_%s_%s.txt' % sys.version_info[:2], 'wb') as f:
for srctxt in mymod.all_expr.strip().splitlines():
srcast = parse(srctxt)
dsttxt = to_source(srcast)
if dsttxt != srctxt:
srcdmp = dump_tree(srcast)
try:
dstast = parse(dsttxt)
except SyntaxError:
bad = True
dstdmp = 'aborted'
else:
dstdmp = dump_tree(dstast)
bad = srcdmp != dstdmp
if bad or len(dsttxt) < len(srctxt):
print(srctxt, dsttxt)
if bad:
print('****************** Original')
print(srcdmp)
print('****************** Extra Crispy')
print(dstdmp)
print('******************')
print()
print()
f.write(('%s %s\n' % (repr(srctxt),
repr(dsttxt))).encode('utf-8'))
if __name__ == '__main__':
checklib()
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment