From 5b8d2c3af76e704926cf5915ad0e6af59a232e61 Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Mon, 17 Feb 2014 23:12:16 -0500 Subject: Issue #8478: Untokenizer.compat now processes first token from iterator input. Patch based on lines from Georg Brandl, Eric Snow, and Gareth Rees. --- Lib/test/test_tokenize.py | 13 +++++++++++++ Lib/tokenize.py | 24 +++++++++++------------- Misc/NEWS | 3 +++ 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 476ed76..7008d0e 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1165,6 +1165,19 @@ class UntokenizeTest(TestCase): 'start (1,3) precedes previous end (2,2)') self.assertRaises(ValueError, u.add_whitespace, (2,1)) + def test_iter_compat(self): + u = Untokenizer() + token = (NAME, 'Hello') + tokens = [(ENCODING, 'utf-8'), token] + u.compat(token, iter([])) + self.assertEqual(u.tokens, ["Hello "]) + u = Untokenizer() + self.assertEqual(u.untokenize(iter([token])), 'Hello ') + u = Untokenizer() + self.assertEqual(u.untokenize(iter(tokens)), 'Hello ') + self.assertEqual(u.encoding, 'utf-8') + self.assertEqual(untokenize(iter(tokens)), b'Hello ') + __test__ = {"doctests" : doctests, 'decistmt': decistmt} diff --git a/Lib/tokenize.py b/Lib/tokenize.py index c156450..7356a88 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -25,12 +25,14 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' 'Michael Foord') import builtins -import re -import sys -from token import * from codecs import lookup, BOM_UTF8 import collections from io import TextIOWrapper +from itertools import chain +import re +import sys +from token import * + cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) @@ -237,9 +239,10 @@ class Untokenizer: self.tokens.append(" " * col_offset) def untokenize(self, iterable): - for t in iterable: + it = iter(iterable) + for t in it: if len(t) == 2: - self.compat(t, iterable) + self.compat(t, it) break tok_type, token, start, end, line = t if tok_type == ENCODING: @@ -254,17 +257,12 @@ class Untokenizer: return "".join(self.tokens) def compat(self, token, iterable): - startline = False indents = [] toks_append = self.tokens.append - toknum, tokval = token - - if toknum in (NAME, NUMBER): - tokval += ' ' - if toknum in (NEWLINE, NL): - startline = True + startline = token[0] in (NEWLINE, NL) prevstring = False - for tok in iterable: + + for tok in chain([token], iterable): toknum, tokval = tok[:2] if toknum == ENCODING: self.encoding = tokval diff --git a/Misc/NEWS b/Misc/NEWS index b226072..fd3541b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -26,6 +26,9 @@ Library - Issue #17671: Fixed a crash when use non-initialized io.BufferedRWPair. Based on patch by Stephen Tu. +- Issue #8478: Untokenizer.compat processes first token from iterator input. + Patch based on lines from Georg Brandl, Eric Snow, and Gareth Rees. + - Issue #20594: Avoid name clash with the libc function posix_close. - Issue #19856: shutil.move() failed to move a directory to other directory -- cgit v0.12