-
-
Save groboclown/a548643d649cb06c04ed7e4b4121f339 to your computer and use it in GitHub Desktop.
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |
# Under the this license, this extension applies the GPL to the extension itself, which is | |
# only used when run as part of PyLint. Therefore, it can be included in your project | |
# without triggering the viral clause. | |
# For details: https://github.com/groboclown/pylint-trailing-commas/blob/main/LICENSE | |
# Copyright (c) https://github.com/groboclown/pylint-trailing-commas/blob/main/CONTRIBUTORS.txt | |
# type: ignore | |
""" | |
trailing_commas v3.0 | |
(compatible with PyLint 3) | |
An extension to PyLint. It enforces a coding style similar to Golang in | |
regard to multi-line list expressions. | |
To add it to your run of PyLint, add the file's directory to your | |
`PYTHONPATH` environment variable, and run: | |
```bash | |
python3 -m pylint \ | |
--load-plugins trailing_commas \ | |
my_packages to_lint | |
``` | |
If you can't easily add it to the `PYTHONPATH`, then use (replace `build-src` | |
with the directory containing this file): | |
```bash | |
python3 -m pylint \ | |
'--init-hook=sys.path.append("build-src")' \ | |
--load-plugins trailing_commas \ | |
my_packages to_lint | |
``` | |
It encourages a coding style like: | |
```python | |
def my_func( | |
arg_1, | |
arg_2, | |
): | |
# Lists, tuples, dictionaries, sets, and function arguments, if multi-line, | |
# need to include a comma after the last item. | |
x = [ | |
"a", | |
"b-c", | |
"d-e-f", | |
] | |
# list constructor expressions are recognized and don't need a comma. | |
y = [ | |
value.upper() | |
for value in x | |
] | |
if ( | |
# if, elif, and other multi-line statements that aren't lists are | |
# identified as such, and don't need a trailing comma | |
len(arg_1) > 1 and | |
len(arg_2) < 10 | |
): | |
return x | |
# Returning items on the same line is fine, and you can use the option | |
# 'comma-always-after-last-tuple' to require tuples to have a final | |
# comma. | |
return (len(x), len(y), "string-value",) | |
``` | |
This has two options that can be set: | |
_ 'comma-always-after-last-tuple' : set to "y" or "n". If set, then | |
a comma is always required after the last item in a tuple, regardless | |
of whether it is multi-line or not. The linter makes a best effort | |
to discover whether a parenthetical expression is a tuple or not. | |
_ 'one-item-per-line' : set to "y" or "n". If set, then the extension | |
reports a problem if a multi-line list has more than one item on a | |
line. | |
""" | |
from typing import List, Tuple, Sequence, Optional | |
import tokenize | |
import pylint.lint | |
from pylint.checkers import BaseTokenChecker | |
def register(linter: pylint.lint.PyLinter) -> None: | |
"""Register the trailing commas checker.""" | |
linter.register_checker(TrailingCommaChecker(linter)) | |
Message = Tuple[str, Sequence[str], int] | |
OPEN_LIST = ('[', '{', '(',) | |
CLOSE_LIST = (']', '}', ')',) | |
SEPARATOR_LIST = (',',) | |
IGNORABLE_TOKEN_LIST = ( | |
tokenize.NL, tokenize.NEWLINE, tokenize.INDENT, | |
tokenize.DEDENT, tokenize.COMMENT, tokenize.ENCODING, | |
tokenize.ENDMARKER, | |
) | |
class Token: | |
"""Wrapper for a single token structure.""" | |
def __init__(self, token_wrapper: 'TokenListWrapper', index: int) -> None: | |
self.__token_wrapper = token_wrapper | |
self.__index = index | |
@property | |
def index(self) -> int: | |
"""token number""" | |
return self.__index | |
@property | |
def token_text(self) -> str: | |
"""text of this token""" | |
return self.__token_wrapper.token_text(self.__index) | |
@property | |
def token_type(self) -> int: | |
"""token category""" | |
return self.__token_wrapper.token_type(self.__index) | |
@property | |
def start_line_no(self) -> int: | |
"""line number for this token""" | |
return self.__token_wrapper.start_line_no(self.__index) | |
@property | |
def start_col_no(self) -> int: | |
"""Starting column number for this token""" | |
return self.__token_wrapper.start_col_no(self.__index) | |
@property | |
def end_line_no(self) -> int: | |
"""line number for this token""" | |
return self.__token_wrapper.end_line_no(self.__index) | |
@property | |
def end_col_no(self) -> int: | |
"""Ending column number for this token""" | |
return self.__token_wrapper.end_col_no(self.__index) | |
@property | |
def source_line(self) -> str: | |
"""Source code line for this token""" | |
return self.__token_wrapper.source_line(self.__index) | |
@property | |
def next(self) -> Optional['Token']: | |
"""Next token in the list""" | |
next_idx = self.__index + 1 | |
if next_idx >= self.__token_wrapper.count(): | |
return None | |
return Token(self.__token_wrapper, next_idx) | |
@property | |
def prev(self) -> Optional['Token']: | |
"""Previous token in the list""" | |
prev_idx = self.__index - 1 | |
if prev_idx < 0: | |
return None | |
return Token(self.__token_wrapper, prev_idx) | |
def __repr__(self) -> str: | |
return self.__token_wrapper.repr_item(self.__index) | |
class TokenListWrapper: | |
"""Wrapper around the whole list of tokens""" | |
def __init__(self, token_list) -> None: | |
self.__token_list = token_list | |
def count(self) -> int: | |
"""number of tokens""" | |
return len(self.__token_list) | |
def token_text(self, idx: int) -> str: | |
"""token text""" | |
return self.__token_list[idx][1] | |
def token_type(self, idx: int) -> int: | |
"""tokenize type""" | |
return self.__token_list[idx][0] | |
def start_line_no(self, idx: int) -> int: | |
"""line number of the first line for the token""" | |
return self.__token_list[idx][2][0] | |
def start_col_no(self, idx: int) -> int: | |
"""column number for the start of the token""" | |
return self.__token_list[idx][2][1] | |
def end_line_no(self, idx: int) -> int: | |
"""line number of the last line for the token""" | |
return self.__token_list[idx][3][0] | |
def end_col_no(self, idx: int) -> int: | |
"""column number for the last of the token""" | |
return self.__token_list[idx][3][1] | |
def source_line(self, idx: int): | |
"""source text that contains the whole token""" | |
return self.__token_list[idx][4] | |
def get(self, idx: int) -> Token: | |
"""get a single wrapped token by index""" | |
return self.__token_list[idx] | |
def items(self) -> List[Token]: | |
"""all the items in the list (memory inefficient)""" | |
ret: List[Token] = [] | |
for i in range(0, self.count()): | |
ret.append(Token(self, i)) | |
return ret | |
def repr_item(self, idx: int) -> str: | |
"""representation of a single index""" | |
return repr(self.__token_list[idx]) | |
class BracketContext: # pylint: disable=R0902 | |
"""Context within a bracket""" | |
__slots__ = ( | |
'_starting_token_line_no', '_current_line_no', '_current_token', '_current_line_items', | |
'_is_paren', '_previous_was_comma', '_previous_was_string', '_item_count', '_config', | |
'_messages', '_expression_type', '_is_index', | |
) | |
def __init__(self, starting_token: Token, config) -> None: | |
self._starting_token_line_no = starting_token.start_line_no | |
self._current_line_no = starting_token.start_line_no | |
# all the bits of text that make up the current item between commas | |
self._current_token: List[Token] = [] | |
self._current_line_items: List[Sequence[Token]] = [] | |
self._previous_was_comma = False | |
self._previous_was_string = False | |
self._item_count = 0 | |
self._config = config | |
self._messages: List[Message] = [] | |
# bandit recognizes this as a possible password. | |
self._is_paren = starting_token.token_text == '(' # nosec | |
# figure out if this is a tuple, if, index, or a function call. | |
self._is_index = False | |
self._expression_type = None | |
prev = starting_token.prev | |
if ( | |
prev and | |
prev.token_text in ('if', 'elif',) and | |
self._is_paren | |
): | |
# if statements might begin with a tuple. If it only contains a parenthetical | |
# expression, then we can ignore it. | |
self._expression_type = 'if' | |
elif ( | |
prev and | |
prev.token_type == tokenize.NAME and | |
starting_token.token_text == '[' # nosec | |
): | |
# index, or a type expression, like List[int] | |
self._is_index = True | |
elif ( | |
prev and | |
prev.token_type == tokenize.NAME and | |
self._is_paren | |
): | |
# This is a function call, so it's parsed as though it's a tuple. | |
# It can't be a list constructor. | |
self._expression_type = 'tuple' | |
else: | |
BracketContext._debug( | |
"Found list start, not if or elif or tuple, {prev} / {starting_token}", | |
prev=prev, starting_token=starting_token, | |
) | |
def started_sub_bracket(self, token: Token) -> List[Message]: | |
"""Start of a sub-bracket within this bracket.""" | |
# This should be treated like any other token, but it can spread across | |
# multiple lines. In this case, the start of the sub-bracket is just | |
# the first part of a long token. | |
self._current_token.append(token) | |
return [] | |
def ended_sub_bracket(self, token: Token) -> List[Message]: | |
"""End of a sub-bracket within the this bracket.""" | |
# act like whatever was in the bracket is part of the same line. | |
self._current_token.append(token) | |
if self._starting_token_line_no == self._current_line_no: | |
# the start of the bracket was on the starting line, so consider this the | |
# new start of the expression | |
self._starting_token_line_no = token.start_line_no | |
self._current_line_no = token.start_line_no | |
self._previous_was_comma = False | |
self._previous_was_string = False | |
return [] | |
def encountered_separator(self, token: Token) -> List[Message]: | |
"""a list separator in the bracket.""" | |
# if the separator is on a different line than the last token, then that's | |
# a problem. | |
if token.start_line_no != self._current_line_no: | |
if self._current_token: | |
# This indicates that the previous token was most likely a | |
# multi-line string. | |
if self._current_token[-1].end_line_no == token.start_line_no: | |
# Turns out that the separator and multi-line string are on the same line. | |
self._previous_was_comma = True | |
self._previous_was_string = False | |
self._current_line_items.append(self._current_token) | |
self._item_count += 1 | |
BracketContext._debug( | |
"(A0) string item (now {count}) with {token}", | |
count=self._item_count, token=token, | |
) | |
return [] | |
# This shouldn't happen. | |
raise RuntimeError( | |
f'failure: unexpected token stack {0} on token {1}'.format( | |
self._current_token, token, | |
) | |
) | |
self._messages.append(("leading-comma", [], token.start_line_no,)) | |
if self._current_token: | |
self._current_line_items.append(self._current_token) | |
self._item_count += 1 | |
BracketContext._debug( | |
"(A1) new item (now {count}) with {token}", count=self._item_count, token=token, | |
) | |
else: | |
BracketContext._debug( | |
"(A2) separator without token (now {count}) with {token}", | |
count=self._item_count, token=token, | |
) | |
self._current_token = [] | |
self._previous_was_comma = True | |
self._previous_was_string = False | |
return [] | |
def encountered_token(self, token: Token) -> List[Message]: | |
"""An item token in the bracket""" | |
# bandit recognizes this as a possible hard-coded password. | |
if token.token_text == 'for' and self._expression_type is None: # nosec | |
# this is most likely a [x for x in y] style list... | |
self._expression_type = 'for' | |
self._current_token.append(token) | |
# if token.token_type == tokenize.STRING and self._previous_was_string: | |
# # else this is a "a " " b" style string continuation, in which the two tokens act | |
# # like a single string. | |
# pass | |
# else: | |
# self._item_count += 1 | |
# BracketContext._debug( | |
# "(B) new item (now {count}) with {token}", count=self._item_count, token=token, | |
# ) | |
self._previous_was_comma = False | |
self._previous_was_string = token.token_type == tokenize.STRING | |
return [] | |
def encountered_newline(self, token: Token) -> List[Message]: | |
"""Newline in the bracket""" | |
# Don't look at the comma presence or adjust it. | |
if self._current_token: | |
self._current_line_items.append(self._current_token) | |
# do not increment the number of discovered items until a separator | |
# or end-of-bracket is found. | |
if self._config.one_item_per_line and len(self._current_line_items) > 1: | |
# if we are on an if or for statement, then we shouldn't have more than 1 | |
# item in the bracket expression. So we don't need to check for that | |
# case here. | |
self._messages.append(("multiple-items-per-line", [], token.start_line_no)) | |
elif ( | |
token.start_line_no == self._starting_token_line_no and | |
len(self._current_line_items) >= 1 | |
): | |
# items on the same line as the open bracket, but no closing bracket on | |
# that line. | |
# This is still valid for the "for" and "if" scenarios. | |
self._messages.append(("multi-line-list-first-line-item", [], token.start_line_no)) | |
next_item = token.next | |
if next_item: | |
self._current_line_no = next_item.start_line_no | |
# self._current_token = [] | |
self._current_line_items = [] | |
return [] | |
def end(self, token: Token) -> List[Message]: | |
"""End of the bracket""" | |
if self._expression_type == 'for': | |
# For expressions don't need the comma check stuff. | |
return [] | |
if self._current_token: | |
self._item_count += 1 | |
BracketContext._debug( | |
"(C) new item (now {count}) with {token}", count=self._item_count, token=token, | |
) | |
if self._expression_type == 'if': | |
# If the next token after the end is a ':', then this is a parenthetical | |
# expression to wrap a multi-line if condition. These can ignore the | |
# trailing comma logic below. There shouldn't be a newline after the ) before the :. | |
next_token = token.next | |
# bandit sees this as a possible hard-coded password. | |
if next_token and next_token.token_text == ':': # nosec | |
return self._messages | |
is_tuple = ( | |
self._expression_type == 'tuple' or | |
(self._expression_type is None and self._is_paren) | |
) | |
if ( | |
is_tuple and | |
self._config.comma_always_after_last_tuple and | |
not self._previous_was_comma and | |
# Tuple of length 0 does not need a comma. | |
# An expression like ('a') can potentially be just wrapping something | |
# in parenthesis for formatting, and is not a tuple. | |
len(self._current_line_items) > 1 | |
): | |
self._messages.append(("tuple-final-comma", [], token.start_line_no)) | |
# If the end token is on the same line number as the start brace, then | |
# the syntax checking rules don't apply, except for the tuple stuff. | |
# If the token is multi-line (like a concatenated string), then | |
# it will be considered for this logic, too. | |
if token.end_line_no != self._starting_token_line_no: | |
if len(self._current_line_items) > 0: | |
self._messages.append(("multi-line-list-eol-close", [], token.start_line_no)) | |
prev = token.prev | |
if not prev: | |
prev = token | |
if not self._previous_was_comma: | |
# Exceptional situation check. | |
# If the item count is 1, then this is a potential situation of | |
# parenthesis to have a single line wrap. For now, this does not | |
# enforce the need for a closing comma. | |
if self._item_count <= 1 and (self._is_paren or self._is_index): | |
pass | |
elif self._item_count > 0: | |
self._messages.append(("closing-comma", [], prev.start_line_no)) | |
BracketContext._debug( | |
"{0} items before ending {token}", self._item_count, token=token, | |
) | |
return self._messages | |
@staticmethod | |
def _debug(_msg, *_args, **_kvargs) -> None: | |
# print("-- " + (_msg.format(*_args, **_kvargs))) | |
pass | |
class TrailingCommaChecker(BaseTokenChecker): | |
""" | |
Trailing commas looks at the raw data and syntax tree. | |
This will enforce the multi-line brace / parenthesis format of: | |
( | |
this, | |
is, | |
correctly, | |
formatted, | |
) | |
As opposed to this: | |
(this, | |
is, | |
not, | |
right) | |
Scenarios that require commas: | |
_ after last item on a line in a list, tuple, or dictionary (includes argument list) | |
Scenarios that don't require commas: | |
_ list on the same line: | |
(x, y, z) | |
_ wrapper around a string concatenation: | |
( | |
"a " | |
"b " | |
"c d e" | |
) | |
The one weird situation that is incorrectly trapped is the parenthesis for | |
wrapping multi-lines: | |
( | |
A + | |
B - | |
C * | |
(x + y) | |
) | |
But here, those parenthesis aren't needed. | |
""" | |
name = 'trailing-comma' | |
priority = -1 | |
msgs = { | |
'C9201': ( | |
"No trailing comma", | |
"trailing-comma", | |
"Used when a list, tuple, or dict does not have a trailing comma at the end of a line.", | |
), | |
'C9202': ( | |
"Leading comma", | |
"leading-comma", | |
"A line started with a comma.", | |
), | |
'C9203': ( | |
"No closing comma", | |
"closing-comma", | |
"A list, tuple, or dict did not have a comma before the closing bracket.", | |
), | |
'C9204': ( | |
"More than 1 item per line for multi-line list", | |
"multiple-items-per-line", | |
"A list, tuple, or dict had more than 1 item on a line, for lists spread across lines.", | |
), | |
'C9205': ( | |
"No final tuple comma", | |
"tuple-final-comma", | |
"Tuples must have a comma after hte last item.", | |
), | |
'C9206': ( | |
"Multi-line list but items on first line", | |
"multi-line-list-first-line-item", | |
"A list, tuple, or dict that spread across multiple lines had an item on the " | |
"first line.", | |
), | |
'C9207': ( | |
"Multi-line list does not close on its own line", | |
"multi-line-list-eol-close", | |
"A multi-line list, tuple, or dict must not have an item on the same line as " | |
"the closing bracket.", | |
), | |
} | |
options = ( | |
( | |
'comma-always-after-last-tuple', | |
{ | |
'default': False, | |
"type": "yn", | |
"metavar": "<y_or_n>", | |
'help': | |
"Require a comma after the last item in a tuple, " | |
"regardless of the location of the closing token", | |
}, | |
), | |
( | |
'one-item-per-line', | |
{ | |
'default': False, | |
"type": "yn", | |
"metavar": "<y_or_n>", | |
'help': | |
"Require that at most 1 item in the list can be on each line.", | |
}, | |
), | |
) | |
def __init__(self, linter: pylint.lint.PyLinter): | |
# This is weird, yes | |
super(TrailingCommaChecker, self).__init__(linter) # pylint:disable=super-with-arguments | |
self._open_brace_lines: List[BracketContext] = [] | |
def process_module(self, module): | |
"""The start of a new module file's processing.""" | |
# Do nothing. | |
def process_tokens(self, tokens): | |
"""process tokens and search for: | |
_ a line in a list, that isn't on the same line as the open character, | |
or end with the closing character, that doesn't end with a comma. | |
The exception is a list generator. | |
""" | |
line_num = 0 | |
self._open_brace_lines = [] | |
token_list = TokenListWrapper(tokens) | |
for token in token_list.items(): | |
if token.start_line_no != line_num: | |
# Moved to another line of tokens. | |
line_num = token.start_line_no | |
# tokenize.NEWLINE == moved to the next logical line | |
if token.token_type == tokenize.NL: | |
# A physical newline... | |
self._ended_line(token) | |
elif token.token_text in OPEN_LIST: | |
self._enter_bracket(token) | |
elif token.token_text in CLOSE_LIST: | |
self._exit_bracket(token) | |
elif token.token_text in SEPARATOR_LIST: | |
self._separator(token) | |
elif token.token_type not in IGNORABLE_TOKEN_LIST: | |
# This is a real program token. | |
self._found_token(token) | |
def _enter_bracket(self, token: Token) -> None: | |
# print(f" - entered bracket on {token}") | |
if self._open_brace_lines: | |
self._add_messages(self._open_brace_lines[-1].started_sub_bracket(token)) | |
self._open_brace_lines.append(BracketContext(token, self.linter.config)) | |
def _exit_bracket(self, token: Token) -> None: | |
# print(f" - exited bracket on {token}") | |
if self._open_brace_lines: | |
prev = self._open_brace_lines.pop() | |
self._add_messages(prev.end(token)) | |
if self._open_brace_lines: | |
self._add_messages(self._open_brace_lines[-1].ended_sub_bracket(token)) | |
def _separator(self, token: Token) -> None: | |
# print(f" - encountered separator on {token}") | |
if self._open_brace_lines: | |
self._add_messages(self._open_brace_lines[-1].encountered_separator(token)) | |
def _found_token(self, token: Token) -> None: | |
# print(f" - found other token {token}") | |
if self._open_brace_lines: | |
self._add_messages(self._open_brace_lines[-1].encountered_token(token)) | |
def _ended_line(self, token: Token) -> None: | |
# print(f" - found EOF {token}") | |
if self._open_brace_lines: | |
self._add_messages(self._open_brace_lines[-1].encountered_newline(token)) | |
def _add_messages(self, messages: List[Message]) -> None: | |
for msg_id, args, line in messages: | |
self.add_message( | |
msgid=msg_id, | |
args=args, | |
line=line, | |
) |
The intention behind that block is to not require a comma for the case where a parenthetical expression has just a single item, such as:
print(
"This is a long statement"
)
However, it looks like there's bug in this logic. Based on this, and how complex the testing logic is, I'll move this out of a gist and into its own repo, to allow for builds and better documentation.
The underlying bug is a missing self._item_count += 1
at this line. I've also discovered a few other issues with this code that I've since fixed. The new repo is at pylint-trailing-commas. I'll update this Gist with the fixes.
Hey man, great idea but your code seems not compatible with pylint 3. See also pylint-dev/pylint-django#405
Did you ever consider publishing this as an pip installable plugin?
Thanks, @ThomDietrich . I've updated the source repository to work with PyLint 3, and I've also updated this Gist.
Amazing! I can confirm that the script works on command line. I didn't know there is a source repository :D
Btw for your readme, this is how I've integrated your script via setup.cfg
[pylint]
max-line-length = 120
load-plugins=pylint_plugin_trailing_commas
init-hook="sys.path.append('tools'); print('Custom pylint plugin \'trailing_commas\' loaded')"
Hi,
thank you so much for sharing this, it is precious!
I have one question though: while testing it I noticed that because of these lines no error is raised for the following cases.
Could you please expand that comment to explain why that block is present, so that I can test more?
Thank you again!