Update tokenize-rt-4.1.0

This commit is contained in:
JonnyWong16 2021-10-15 00:11:45 -07:00
parent 41ef595edc
commit 912912dbf9
No known key found for this signature in database
GPG key ID: B1F1F9807184697A

View file

@ -1,50 +1,44 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse import argparse
import collections
import io import io
import keyword import keyword
import re import re
import sys
import tokenize import tokenize
from typing import Generator from typing import Generator
from typing import Iterable from typing import Iterable
from typing import List from typing import List
from typing import NamedTuple
from typing import Optional from typing import Optional
from typing import Pattern from typing import Pattern
from typing import Sequence from typing import Sequence
from typing import Tuple from typing import Tuple
# this is a performance hack. see https://bugs.python.org/issue43014
if (
sys.version_info < (3, 10) and
callable(getattr(tokenize, '_compile', None))
): # pragma: no cover (<py310)
from functools import lru_cache
tokenize._compile = lru_cache()(tokenize._compile) # type: ignore
ESCAPED_NL = 'ESCAPED_NL' ESCAPED_NL = 'ESCAPED_NL'
UNIMPORTANT_WS = 'UNIMPORTANT_WS' UNIMPORTANT_WS = 'UNIMPORTANT_WS'
NON_CODING_TOKENS = frozenset(('COMMENT', ESCAPED_NL, 'NL', UNIMPORTANT_WS)) NON_CODING_TOKENS = frozenset(('COMMENT', ESCAPED_NL, 'NL', UNIMPORTANT_WS))
class Offset(collections.namedtuple('Offset', ('line', 'utf8_byte_offset'))): class Offset(NamedTuple):
__slots__ = () line: Optional[int] = None
utf8_byte_offset: Optional[int] = None
def __new__(cls, line=None, utf8_byte_offset=None):
# type: (Optional[int], Optional[int]) -> None
return super(Offset, cls).__new__(cls, line, utf8_byte_offset)
class Token( class Token(NamedTuple):
collections.namedtuple( name: str
'Token', ('name', 'src', 'line', 'utf8_byte_offset'), src: str
), line: Optional[int] = None
): utf8_byte_offset: Optional[int] = None
__slots__ = ()
def __new__(cls, name, src, line=None, utf8_byte_offset=None):
# type: (str, str, Optional[int], Optional[int]) -> None
return super(Token, cls).__new__(
cls, name, src, line, utf8_byte_offset,
)
@property @property
def offset(self): # type: () -> Offset def offset(self) -> Offset:
return Offset(self.line, self.utf8_byte_offset) return Offset(self.line, self.utf8_byte_offset)
@ -53,8 +47,7 @@ _string_prefixes = frozenset('bfru')
_escaped_nl_re = re.compile(r'\\(\n|\r\n|\r)') _escaped_nl_re = re.compile(r'\\(\n|\r\n|\r)')
def _re_partition(regex, s): def _re_partition(regex: Pattern[str], s: str) -> Tuple[str, str, str]:
# type: (Pattern[str], str) -> Tuple[str, str, str]
match = regex.search(s) match = regex.search(s)
if match: if match:
return s[:match.start()], s[slice(*match.span())], s[match.end():] return s[:match.start()], s[slice(*match.span())], s[match.end():]
@ -62,7 +55,7 @@ def _re_partition(regex, s):
return (s, '', '') return (s, '', '')
def src_to_tokens(src): # type: (str) -> List[Token] def src_to_tokens(src: str) -> List[Token]:
tokenize_target = io.StringIO(src) tokenize_target = io.StringIO(src)
lines = ('',) + tuple(tokenize_target) lines = ('',) + tuple(tokenize_target)
@ -111,7 +104,7 @@ def src_to_tokens(src): # type: (str) -> List[Token]
tok_name == 'NUMBER' and tok_name == 'NUMBER' and
tokens and tokens and
tokens[-1].name == 'NUMBER' tokens[-1].name == 'NUMBER'
): # pragma: no cover (PY3) ):
tokens[-1] = tokens[-1]._replace(src=tokens[-1].src + tok_text) tokens[-1] = tokens[-1]._replace(src=tokens[-1].src + tok_text)
# produce long literals as a single token in python 3 as well # produce long literals as a single token in python 3 as well
elif ( elif (
@ -119,7 +112,7 @@ def src_to_tokens(src): # type: (str) -> List[Token]
tok_text.lower() == 'l' and tok_text.lower() == 'l' and
tokens and tokens and
tokens[-1].name == 'NUMBER' tokens[-1].name == 'NUMBER'
): # pragma: no cover (PY3) ):
tokens[-1] = tokens[-1]._replace(src=tokens[-1].src + tok_text) tokens[-1] = tokens[-1]._replace(src=tokens[-1].src + tok_text)
else: else:
tokens.append(Token(tok_name, tok_text, sline, utf8_byte_offset)) tokens.append(Token(tok_name, tok_text, sline, utf8_byte_offset))
@ -128,25 +121,25 @@ def src_to_tokens(src): # type: (str) -> List[Token]
return tokens return tokens
def tokens_to_src(tokens): # type: (Iterable[Token]) -> str def tokens_to_src(tokens: Iterable[Token]) -> str:
return ''.join(tok.src for tok in tokens) return ''.join(tok.src for tok in tokens)
def reversed_enumerate(tokens): def reversed_enumerate(
# type: (Sequence[Token]) -> Generator[Tuple[int, Token], None, None] tokens: Sequence[Token],
) -> Generator[Tuple[int, Token], None, None]:
for i in reversed(range(len(tokens))): for i in reversed(range(len(tokens))):
yield i, tokens[i] yield i, tokens[i]
def parse_string_literal(src): # type: (str) -> Tuple[str, str] def parse_string_literal(src: str) -> Tuple[str, str]:
"""parse a string literal's source into (prefix, string)""" """parse a string literal's source into (prefix, string)"""
match = _string_re.match(src) match = _string_re.match(src)
assert match is not None assert match is not None
return match.group(1), match.group(2) return match.group(1), match.group(2)
def rfind_string_parts(tokens, i): def rfind_string_parts(tokens: Sequence[Token], i: int) -> Tuple[int, ...]:
# type: (Sequence[Token], int) -> Tuple[int, ...]
"""find the indicies of the string parts of a (joined) string literal """find the indicies of the string parts of a (joined) string literal
- `i` should start at the end of the string literal - `i` should start at the end of the string literal
@ -189,26 +182,19 @@ def rfind_string_parts(tokens, i):
return tuple(reversed(ret)) return tuple(reversed(ret))
def main(argv=None): # type: (Optional[Sequence[str]]) -> int def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('filename') parser.add_argument('filename')
args = parser.parse_args(argv) args = parser.parse_args(argv)
with io.open(args.filename) as f: with open(args.filename) as f:
tokens = src_to_tokens(f.read()) tokens = src_to_tokens(f.read())
def no_u_repr(s): # type: (str) -> str
return repr(s).lstrip('u')
for token in tokens: for token in tokens:
if token.name == UNIMPORTANT_WS: if token.name == UNIMPORTANT_WS:
line, col = '?', '?' line, col = '?', '?'
else: else:
line, col = token.line, token.utf8_byte_offset line, col = str(token.line), str(token.utf8_byte_offset)
print( print(f'{line}:{col} {token.name} {token.src!r}')
'{}:{} {} {}'.format(
line, col, token.name, no_u_repr(token.src),
),
)
return 0 return 0