diff options
author | Raymond Hettinger <python@rcn.com> | 2005-06-10 11:05:19 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2005-06-10 11:05:19 (GMT) |
commit | 68c04534182f2c09783b6506701a8bc25c98b4a9 (patch) | |
tree | 4e5f2b764eff65a3201dd2e666355c487e88a9b7 /Lib/tokenize.py | |
parent | bf7255fffb5dda1b9541892cc40412bb6bbd4409 (diff) | |
download | cpython-68c04534182f2c09783b6506701a8bc25c98b4a9.zip cpython-68c04534182f2c09783b6506701a8bc25c98b4a9.tar.gz cpython-68c04534182f2c09783b6506701a8bc25c98b4a9.tar.bz2 |
Add untokenize() function to allow full round-trip tokenization.
Should significantly enhance the utility of the module by supporting
the creation of tools that modify the token stream and writeback the
modified result.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 52 |
1 files changed, 49 insertions, 3 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 9087e84..b29da6b 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -31,7 +31,7 @@ from token import * import token __all__ = [x for x in dir(token) if x[0] != '_'] + ["COMMENT", "tokenize", - "generate_tokens", "NL"] + "generate_tokens", "NL", "untokenize"] del x del token @@ -159,12 +159,55 @@ def tokenize_loop(readline, tokeneater): for token_info in generate_tokens(readline): tokeneater(*token_info) + +def untokenize(iterable): + """Transform tokens back into Python source code. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. + + Round-trip invariant: + # Output text will tokenize the back to the input + t1 = [tok[:2] for tok in generate_tokens(f.readline)] + newcode = untokenize(t1) + readline = iter(newcode.splitlines(1)).next + t2 = [tok[:2] for tokin generate_tokens(readline)] + assert t1 == t2 + """ + + startline = False + indents = [] + toks = [] + toks_append = toks.append + for tok in iterable: + toknum, tokval = tok[:2] + + if toknum == NAME: + tokval += ' ' + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, COMMENT, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + return ''.join(toks) + + def generate_tokens(readline): """ The generate_tokens() generator requires one argment, readline, which must be a callable object which provides the same interface as the readline() method of built-in file objects. Each call to the function - should return one line of input as a string. + should return one line of input as a string. Alternately, readline + can be a callable function terminating with StopIteration: + readline = open(myfile).next # Example of alternate readline The generator produces 5-tuples with these members: the token type; the token string; a 2-tuple (srow, scol) of ints specifying the row and @@ -180,7 +223,10 @@ def generate_tokens(readline): indents = [0] while 1: # loop over lines in stream - line = readline() + try: + line = readline() + except StopIteration: + line = '' lnum = lnum + 1 pos, max = 0, len(line) |