Update tokenize-rt==4.2.1

This commit is contained in:
JonnyWong16 2021-11-28 13:50:45 -08:00
parent 966a6696d1
commit 03012cd2b2
No known key found for this signature in database
GPG key ID: B1F1F9807184697A

View file

@ -64,10 +64,10 @@ def src_to_tokens(src: str) -> List[Token]:
tokens = [] tokens = []
last_line = 1 last_line = 1
last_col = 0 last_col = 0
end_offset = 0
for ( gen = tokenize.generate_tokens(tokenize_target.readline)
tok_type, tok_text, (sline, scol), (eline, ecol), line, for tok_type, tok_text, (sline, scol), (eline, ecol), line in gen:
) in tokenize.generate_tokens(tokenize_target.readline):
if sline > last_line: if sline > last_line:
newtok = lines[last_line][last_col:] newtok = lines[last_line][last_col:]
for lineno in range(last_line + 1, sline): for lineno in range(last_line + 1, sline):
@ -79,16 +79,25 @@ def src_to_tokens(src: str) -> List[Token]:
while _escaped_nl_re.search(newtok): while _escaped_nl_re.search(newtok):
ws, nl, newtok = _re_partition(_escaped_nl_re, newtok) ws, nl, newtok = _re_partition(_escaped_nl_re, newtok)
if ws: if ws:
tokens.append(Token(UNIMPORTANT_WS, ws)) tokens.append(
tokens.append(Token(ESCAPED_NL, nl)) Token(UNIMPORTANT_WS, ws, last_line, end_offset),
)
end_offset += len(ws.encode())
tokens.append(Token(ESCAPED_NL, nl, last_line, end_offset))
end_offset = 0
last_line += 1
if newtok: if newtok:
tokens.append(Token(UNIMPORTANT_WS, newtok)) tokens.append(Token(UNIMPORTANT_WS, newtok, sline, 0))
end_offset = len(newtok.encode())
else:
end_offset = 0
elif scol > last_col: elif scol > last_col:
tokens.append(Token(UNIMPORTANT_WS, line[last_col:scol])) newtok = line[last_col:scol]
tokens.append(Token(UNIMPORTANT_WS, newtok, sline, end_offset))
end_offset += len(newtok.encode())
tok_name = tokenize.tok_name[tok_type] tok_name = tokenize.tok_name[tok_type]
utf8_byte_offset = len(line[:scol].encode('UTF-8'))
# when a string prefix is not recognized, the tokenizer produces a # when a string prefix is not recognized, the tokenizer produces a
# NAME token followed by a STRING token # NAME token followed by a STRING token
if ( if (
@ -115,8 +124,12 @@ def src_to_tokens(src: str) -> List[Token]:
): ):
tokens[-1] = tokens[-1]._replace(src=tokens[-1].src + tok_text) tokens[-1] = tokens[-1]._replace(src=tokens[-1].src + tok_text)
else: else:
tokens.append(Token(tok_name, tok_text, sline, utf8_byte_offset)) tokens.append(Token(tok_name, tok_text, sline, end_offset))
last_line, last_col = eline, ecol last_line, last_col = eline, ecol
if sline != eline:
end_offset = len(lines[last_line][:last_col].encode())
else:
end_offset += len(tok_text.encode())
return tokens return tokens
@ -190,9 +203,6 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
tokens = src_to_tokens(f.read()) tokens = src_to_tokens(f.read())
for token in tokens: for token in tokens:
if token.name == UNIMPORTANT_WS:
line, col = '?', '?'
else:
line, col = str(token.line), str(token.utf8_byte_offset) line, col = str(token.line), str(token.utf8_byte_offset)
print(f'{line}:{col} {token.name} {token.src!r}') print(f'{line}:{col} {token.name} {token.src!r}')