diff options
author | Terry Jan Reedy <tjreedy@udel.edu> | 2014-02-18 04:12:16 (GMT) |
---|---|---|
committer | Terry Jan Reedy <tjreedy@udel.edu> | 2014-02-18 04:12:16 (GMT) |
commit | 5b8d2c3af76e704926cf5915ad0e6af59a232e61 (patch) | |
tree | fe41f651e48dad3c4b79e363ebe8b3f51a5d48fc | |
parent | 58edfd9ff13813908e322dc0829469c3f82984b2 (diff) | |
download | cpython-5b8d2c3af76e704926cf5915ad0e6af59a232e61.zip cpython-5b8d2c3af76e704926cf5915ad0e6af59a232e61.tar.gz cpython-5b8d2c3af76e704926cf5915ad0e6af59a232e61.tar.bz2 |
Issue #8478: Untokenizer.compat now processes first token from iterator input.
Patch based on lines from Georg Brandl, Eric Snow, and Gareth Rees.
-rw-r--r-- | Lib/test/test_tokenize.py | 13 | ||||
-rw-r--r-- | Lib/tokenize.py | 24 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 27 insertions, 13 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 476ed76..7008d0e 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1165,6 +1165,19 @@ class UntokenizeTest(TestCase): 'start (1,3) precedes previous end (2,2)') self.assertRaises(ValueError, u.add_whitespace, (2,1)) + def test_iter_compat(self): + u = Untokenizer() + token = (NAME, 'Hello') + tokens = [(ENCODING, 'utf-8'), token] + u.compat(token, iter([])) + self.assertEqual(u.tokens, ["Hello "]) + u = Untokenizer() + self.assertEqual(u.untokenize(iter([token])), 'Hello ') + u = Untokenizer() + self.assertEqual(u.untokenize(iter(tokens)), 'Hello ') + self.assertEqual(u.encoding, 'utf-8') + self.assertEqual(untokenize(iter(tokens)), b'Hello ') + __test__ = {"doctests" : doctests, 'decistmt': decistmt} diff --git a/Lib/tokenize.py b/Lib/tokenize.py index c156450..7356a88 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -25,12 +25,14 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' 'Michael Foord') import builtins -import re -import sys -from token import * from codecs import lookup, BOM_UTF8 import collections from io import TextIOWrapper +from itertools import chain +import re +import sys +from token import * + cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) @@ -237,9 +239,10 @@ class Untokenizer: self.tokens.append(" " * col_offset) def untokenize(self, iterable): - for t in iterable: + it = iter(iterable) + for t in it: if len(t) == 2: - self.compat(t, iterable) + self.compat(t, it) break tok_type, token, start, end, line = t if tok_type == ENCODING: @@ -254,17 +257,12 @@ class Untokenizer: return "".join(self.tokens) def compat(self, token, iterable): - startline = False indents = [] toks_append = self.tokens.append - toknum, tokval = token - - if toknum in (NAME, NUMBER): - tokval += ' ' - if toknum in (NEWLINE, NL): - startline = True + startline = token[0] in (NEWLINE, NL) prevstring = False - for tok in iterable: + + for tok in chain([token], iterable): toknum, tokval = tok[:2] if toknum == ENCODING: self.encoding = tokval @@ -26,6 +26,9 @@ Library - Issue #17671: Fixed a crash when use non-initialized io.BufferedRWPair. Based on patch by Stephen Tu. +- Issue #8478: Untokenizer.compat processes first token from iterator input. + Patch based on lines from Georg Brandl, Eric Snow, and Gareth Rees. + - Issue #20594: Avoid name clash with the libc function posix_close. - Issue #19856: shutil.move() failed to move a directory to other directory |