Initial version

a662c5d4 · Anthony Sottile · 7980630d · a662c5d4 · a662c5d4 · a662c5d4
Kaydet (Commit) a662c5d4 authored Tem 10, 2017 tarafından Anthony Sottile
Hide whitespace changes
Inline Side-by-side

Showing with 381 additions and 17 deletions

README.md README.md +118 -3

add_trailing_comma.py add_trailing_comma.py +167 -0

add_trailing_comma_test.py tests/add_trailing_comma_test.py +96 -14

No files found.
--- a/README.md
+++ b/README.md
@@ -25,7 +25,122 @@ Sample `.pre-commit-config.yaml`:
    -   id: add-trailing-comma
 ```

-## TODO
+## multi-line method invocation style -- why?

-`--py35-plus` will append a trailing comma even after `*args` or `**kwargs`
-(this is a syntax error in older versions).
+```python
+# Sample of *ideal* syntax
+function_call(
+    argument,
+    5 ** 5,
+    kwarg=foo,
+)
+```
+
+- the initial paren is at the end of the line
+- each argument is indented one leve further than the function name
+- the last parameter (unless the call contains an unpacking
+  (`*args` / `**kwargs`)) has a trailing comma
+
+This has the following benefits:
+
+- arbitrary indentation is avoided:
+
+    ```python
+    # I hear you like 15 space indents
+    # oh your function name changed? guess you get to reindent :)
+    very_long_call(arg,
+                   arg,
+                   arg)
+    ```
+- adding / removing a parameter preserves `git blame` and is a minimal diff:
+
+    ```diff
+     # with no trailing commas
+     x(
+    -    arg
+    +    arg,
+    +    arg2
+     )
+    ```
+
+    ```diff
+     # with trailing commas
+     x(
+         arg,
+    +    arg2,
+     )
+    ```
+
+
+## Implemented features
+
+### trailing commas for function calls
+
+```diff
+ x(
+     arg,
+-    arg
+    arg,
+ )
+```
+
+### trailing commas for function calls with unpackings
+
+If `--py35-plus` is passed (or python3.5+ syntax is automatically detected),
+`add-trailing-comma` will also perform the following change:
+
+```diff
+ x(
+-    *args
+    *args,
+ )
+ y(
+-    **kwargs
+    **kwargs,
+ )
+```
+
+Note that this would cause a **`SyntaxError`** in earlier python versions.
+
+
+## Planned features
+
+### trailing commas for tuple / list / dict / set literals
+
+```diff
+ x = [
+-    1, 2, 3
+    1, 2, 3,
+ ]
+```
+
+### trailing commas for function definitions
+
+```diff
+ def func(
+         arg1,
+-        arg2
+        arg2,
+ ):
+```
+
+### unhug trailing paren
+
+```diff
+ x(
+     arg1,
+-    arg2)
+    arg2,
+)
+```
+
+### unhug leading paren
+
+```diff
+-function_name(arg1,
+-              arg2)
+function_name(
+    arg1,
+    arg2,
+)
+```
--- a/add_trailing_comma.py
+++ b/add_trailing_comma.py
@@ -2,7 +2,172 @@ from __future__ import absolute_import
 from __future__ import unicode_literals

 import argparse
+import ast
+import collections
 import io
+import sys
+
+from tokenize_rt import src_to_tokens
+from tokenize_rt import Token
+from tokenize_rt import tokens_to_src
+from tokenize_rt import UNIMPORTANT_WS
+
+
+Offset = collections.namedtuple('Offset', ('line', 'utf8_byte_offset'))
+Node = collections.namedtuple('Node', ('node', 'star_args', 'arg_offsets'))
+
+NON_CODING_TOKENS = frozenset(('COMMENT', 'NL', UNIMPORTANT_WS))
+
+
+def ast_parse(contents_text):
+    return ast.parse(contents_text.encode('UTF-8'))
+
+
+def _to_offset(node):
+    candidates = [node]
+    while candidates:
+        candidate = candidates.pop()
+        if hasattr(candidate, 'lineno'):
+            return Offset(candidate.lineno, candidate.col_offset)
+        elif hasattr(candidate, '_fields'):  # pragma: no cover (PY35+)
+            for field in reversed(candidate._fields):
+                candidates.append(getattr(candidate, field))
+    else:
+        raise AssertionError(node)
+
+
+if sys.version_info < (3, 5):  # pragma: no cover (<PY35)
+    def _is_star_arg(node):
+        return False
+else:  # pragma: no cover (PY35+)
+    def _is_star_arg(node):
+        return isinstance(node, ast.Starred)
+
+
+def _is_star_star_kwarg(node):
+    return isinstance(node, ast.keyword) and node.arg is None
+
+
+class FindCalls(ast.NodeVisitor):
+    def __init__(self):
+        self.calls = {}
+        self.has_new_syntax = False
+
+    def visit_Call(self, node):
+        orig = node.lineno
+
+        argnodes = node.args + node.keywords
+        py2_starargs = getattr(node, 'starargs', None)
+        if py2_starargs:  # pragma: no cover (<PY35)
+            argnodes.append(py2_starargs)
+        py2_kwargs = getattr(node, 'kwargs', None)
+        if py2_kwargs:  # pragma: no cover (<PY35)
+            argnodes.append(py2_kwargs)
+
+        arg_offsets = set()
+        is_multiline = False
+        has_starargs = bool(py2_starargs or py2_kwargs)
+        for argnode in argnodes:
+            if (
+                    _is_star_arg(argnode) or
+                    _is_star_star_kwarg(argnode)
+            ):  # pragma: no cover (PY35+)
+                has_starargs = True
+
+            offset = _to_offset(argnode)
+            # multiline strings have invalid position, ignore them
+            if offset.utf8_byte_offset != -1:
+                if offset.line > orig:
+                    is_multiline = True
+                arg_offsets.add(offset)
+
+        # If the sole argument is a generator, don't add a trailing comma as
+        # this breaks lib2to3 based tools
+        only_a_generator = (
+            len(argnodes) == 1 and isinstance(argnodes[0], ast.GeneratorExp)
+        )
+
+        if is_multiline and not only_a_generator:
+            key = Offset(node.lineno, node.col_offset)
+            self.calls[key] = Node(node, has_starargs, arg_offsets)
+
+        if (
+                sum(_is_star_arg(n) for n in node.args) > 1 or
+                sum(_is_star_star_kwarg(n) for n in node.keywords) > 1
+        ):  # pragma: no cover (PY35+)
+            self.has_new_syntax = True
+
+        self.generic_visit(node)
+
+
+def _fix_call(call, i, tokens):
+    # When we get a `call` object, the ast refers to it as this:
+    #
+    #     func_name(arg, arg, arg)
+    #     ^ where ast points
+    #
+    # We care about the closing paren, in order to find it, we first walk
+    # until we find an argument.  When we find an argument, we know the outer
+    # paren we find is the function call paren
+    #
+    #     func_name(arg, arg, arg)
+    #              ^ outer paren
+    #
+    # Once that is identified, walk until the paren stack is empty -- this will
+    # put us at the last paren
+    #
+    #     func_name(arg, arg, arg)
+    #                            ^ paren stack is empty
+    first_paren = None
+    paren_stack = []
+    for i in range(i, len(tokens)):
+        token = tokens[i]
+        if token.src == '(':
+            paren_stack.append(i)
+        elif token.src == ')':
+            paren_stack.pop()
+
+        if (token.line, token.utf8_byte_offset) in call.arg_offsets:
+            first_paren = paren_stack[0]
+
+        if first_paren is not None and not paren_stack:
+            break
+    else:
+        raise AssertionError('Past end?')
+
+    # This was not actually a multi-line call, despite the ast telling us that
+    if tokens[first_paren].line == tokens[i].line:
+        return
+
+    # From there, we can walk backwards and decide whether a comma is needed
+    i -= 1
+    while tokens[i].name in NON_CODING_TOKENS:
+        i -= 1
+
+    # If we're not a hugging paren, we can insert a comma
+    if tokens[i].src != ',' and tokens[i + 1].src != ')':
+        tokens.insert(i + 1, Token('OP', ','))
+
+
+def _fix_calls(contents_text, py35_plus):
+    try:
+        ast_obj = ast_parse(contents_text)
+    except SyntaxError:
+        return contents_text
+
+    visitor = FindCalls()
+    visitor.visit(ast_obj)
+
+    tokens = src_to_tokens(contents_text)
+    for i, token in reversed(tuple(enumerate(tokens))):
+        key = Offset(token.line, token.utf8_byte_offset)
+        if key in visitor.calls:
+            call = visitor.calls[key]
+            # Only fix stararg calls if asked to
+            if not call.star_args or py35_plus or visitor.has_new_syntax:
+                _fix_call(call, i, tokens)
+
+    return tokens_to_src(tokens)


 def fix_file(filename, args):
@@ -15,6 +180,8 @@ def fix_file(filename, args):
        print('{} is non-utf-8 (not supported)'.format(filename))
        return 1

+    contents_text = _fix_calls(contents_text, args.py35_plus)
+
    if contents_text != contents_text_orig:
        print('Rewriting {}'.format(filename))
        with io.open(filename, 'w', encoding='UTF-8') as f:

--- a/tests/add_trailing_comma_test.py
+++ b/tests/add_trailing_comma_test.py
@@ -2,9 +2,82 @@
 from __future__ import absolute_import
 from __future__ import unicode_literals

+import sys
+
+import pytest
+
+from add_trailing_comma import _fix_calls
 from add_trailing_comma import main


+@pytest.mark.parametrize(
+    'src',
+    (
+        # No relevant multiline calls
+        'x = 5',
+        'x(1)',
+        # Don't rewrite functions that have a single generator argument as
+        # this breaks lib2to3 based tools.
+        'tuple(\n'
+        '    a for a in b\n'
+        ')',
+        # Don't rewrite *args or **kwargs unless --py35-plus
+        'x(\n'
+        '    *args\n'
+        ')',
+        'x(\n'
+        '    **kwargs\n'
+        ')',
+        # The ast tells us that the inner call starts on line 2, but the first
+        # paren (and last paren) are actually both on line 3.
+        'x(\n'
+        '    "foo"\n'
+        '    "bar".format(1),\n'
+        ')',
+        # Don't add a comma when it's not at the end of a line
+        'x((\n'
+        '    1,\n'
+        '))',
+        # Can't handle multi line strings
+        'x(\n'
+        '    """\n'
+        '    """\n'
+        ')',
+    ),
+)
+def test_fix_calls_noops(src):
+    ret = _fix_calls(src, py35_plus=False)
+    assert ret == src
+
+
+def test_py35_plus_rewrite():
+    src = (
+        'x(\n'
+        '    *args\n'
+        ')'
+    )
+    ret = _fix_calls(src, py35_plus=True)
+    assert ret == (
+        'x(\n'
+        '    *args,\n'
+        ')'
+    )
+
+
+@pytest.mark.xfail(sys.version_info < (3, 5), reason='py35+ only feature')
+@pytest.mark.parametrize(
+    'syntax',
+    (
+        'y(*args1, *args2)\n',
+        'y(**kwargs1, **kwargs2)\n',
+    ),
+)
+def test_auto_detected_py35_plus_rewrite(syntax):
+    src = syntax + 'x(\n    *args\n)'
+    expected = syntax + 'x(\n    *args,\n)'
+    assert _fix_calls(src, py35_plus=False) == expected
+
+
 def test_main_trivial():
    assert main(()) == 0

@@ -16,13 +89,13 @@ def test_main_noop(tmpdir):
    assert f.read() == 'x = 5\n'


-# def test_main_changes_a_file(tmpdir, capsys):
-#     f = tmpdir.join('f.py')
-#     f.write('x(\n    1\n)\n')
-#     assert main((f.strpath,)) == 1
-#     out, _ = capsys.readouterr()
-#     assert out == 'Rewriting {}\n'.format(f.strpath)
-#     assert f.read() == 'x(\n    1,\n)\n'
+def test_main_changes_a_file(tmpdir, capsys):
+    f = tmpdir.join('f.py')
+    f.write('x(\n    1\n)\n')
+    assert main((f.strpath,)) == 1
+    out, _ = capsys.readouterr()
+    assert out == 'Rewriting {}\n'.format(f.strpath)
+    assert f.read() == 'x(\n    1,\n)\n'


 def test_main_syntax_error(tmpdir):
@@ -39,10 +112,19 @@ def test_main_non_utf8_bytes(tmpdir, capsys):
    assert out == '{} is non-utf-8 (not supported)\n'.format(f.strpath)


-# def test_py35_plus_argument_star_args(tmpdir):
-#     f = tmpdir.join('f.py')
-#     f.write('x(\n    *args\n)\n')
-#     assert main((f.strpath,)) == 0
-#     assert f.read() == 'x(\n    *args\n)\n')
-#     assert main((f.strpath, '--py35-plus')) == 1
-#     assert f.read() == 'x(\n    *args,\n)\n'
+def test_main_py35_plus_argument_star_args(tmpdir):
+    f = tmpdir.join('f.py')
+    f.write('x(\n    *args\n)\n')
+    assert main((f.strpath,)) == 0
+    assert f.read() == 'x(\n    *args\n)\n'
+    assert main((f.strpath, '--py35-plus')) == 1
+    assert f.read() == 'x(\n    *args,\n)\n'
+
+
+def test_main_py35_plus_argument_star_star_kwargs(tmpdir):
+    f = tmpdir.join('f.py')
+    f.write('x(\n    **args\n)\n')
+    assert main((f.strpath,)) == 0
+    assert f.read() == 'x(\n    **args\n)\n'
+    assert main((f.strpath, '--py35-plus')) == 1
+    assert f.read() == 'x(\n    **args,\n)\n'