diff options
-rw-r--r-- | Doc/library/tokenize.rst | 57 | ||||
-rw-r--r-- | Lib/tokenize.py | 79 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
3 files changed, 115 insertions, 23 deletions
diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst index 577d7cc..050d74c 100644 --- a/Doc/library/tokenize.rst +++ b/Doc/library/tokenize.rst @@ -15,6 +15,9 @@ implemented in Python. The scanner in this module returns comments as tokens as well, making it useful for implementing "pretty-printers," including colorizers for on-screen displays. +Tokenizing Input +---------------- + The primary entry point is a :term:`generator`: .. function:: tokenize(readline) @@ -116,6 +119,26 @@ function it uses to do this is available: .. versionadded:: 3.2 +.. _tokenize-cli: + +Command-Line Usage +------------------ + +.. versionadded:: 3.3 + +The :mod:`tokenize` module can be executed as a script from the command line. +It is as simple as: + +.. code-block:: sh + + python -m tokenize [filename.py] + +If :file:`filename.py` is specified its contents are tokenized to stdout. +Otherwise, tokenization is performed on stdin. + +Examples +------------------ + Example of a script rewriter that transforms float literals into Decimal objects:: @@ -158,3 +181,37 @@ objects:: result.append((toknum, tokval)) return untokenize(result).decode('utf-8') +Example of tokenizing from the command line. The script:: + + def say_hello(): + print("Hello, World!") + + say_hello() + +will be tokenized to the following output where the first column is the range +of the line/column coordinates where the token is found, the second column is +the name of the token, and the final column is the value of the token (if any) + +.. code-block:: sh + + $ python -m tokenize hello.py + 0,0-0,0: ENCODING 'utf-8' + 1,0-1,3: NAME 'def' + 1,4-1,13: NAME 'say_hello' + 1,13-1,14: OP '(' + 1,14-1,15: OP ')' + 1,15-1,16: OP ':' + 1,16-1,17: NEWLINE '\n' + 2,0-2,4: INDENT ' ' + 2,4-2,9: NAME 'print' + 2,9-2,10: OP '(' + 2,10-2,25: STRING '"Hello, World!"' + 2,25-2,26: OP ')' + 2,26-2,27: NEWLINE '\n' + 3,0-3,1: NL '\n' + 4,0-4,0: DEDENT '' + 4,0-4,9: NAME 'say_hello' + 4,9-4,10: OP '(' + 4,10-4,11: OP ')' + 4,11-4,12: NEWLINE '\n' + 5,0-5,0: ENDMARKER '' diff --git a/Lib/tokenize.py b/Lib/tokenize.py index f575e9b..a0dc035 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -530,27 +530,60 @@ def _tokenize(readline, encoding): def generate_tokens(readline): return _tokenize(readline, None) +def main(): + import argparse + + # Helper error handling routines + def perror(message): + print(message, file=sys.stderr) + + def error(message, filename=None, location=None): + if location: + args = (filename,) + location + (message,) + perror("%s:%d:%d: error: %s" % args) + elif filename: + perror("%s: error: %s" % (filename, message)) + else: + perror("error: %s" % message) + sys.exit(1) + + # Parse the arguments and options + parser = argparse.ArgumentParser(prog='python -m tokenize') + parser.add_argument(dest='filename', nargs='?', + metavar='filename.py', + help='the file to tokenize; defaults to stdin') + args = parser.parse_args() + + try: + # Tokenize the input + if args.filename: + filename = args.filename + with builtins.open(filename, 'rb') as f: + tokens = list(tokenize(f.readline)) + else: + filename = "<stdin>" + tokens = _tokenize(sys.stdin.readline, None) + + # Output the tokenization + for token in tokens: + token_range = "%d,%d-%d,%d:" % (token.start + token.end) + print("%-20s%-15s%-15r" % + (token_range, tok_name[token.type], token.string)) + except IndentationError as err: + line, column = err.args[1][1:3] + error(err.args[0], filename, (line, column)) + except TokenError as err: + line, column = err.args[1] + error(err.args[0], filename, (line, column)) + except SyntaxError as err: + error(err, filename) + except IOError as err: + error(err) + except KeyboardInterrupt: + print("interrupted\n") + except Exception as err: + perror("unexpected error: %s" % err) + raise + if __name__ == "__main__": - # Quick sanity check - s = b'''def parseline(self, line): - """Parse the line into a command name and a string containing - the arguments. Returns a tuple containing (command, args, line). - 'command' and 'args' may be None if the line couldn't be parsed. - """ - line = line.strip() - if not line: - return None, None, line - elif line[0] == '?': - line = 'help ' + line[1:] - elif line[0] == '!': - if hasattr(self, 'do_shell'): - line = 'shell ' + line[1:] - else: - return None, None, line - i, n = 0, len(line) - while i < n and line[i] in self.identchars: i = i+1 - cmd, arg = line[:i], line[i:].strip() - return cmd, arg, line - ''' - for tok in tokenize(iter(s.splitlines()).__next__): - print(tok) + main() @@ -2520,6 +2520,8 @@ Core and Builtins Library ------- +- Issue #12943: python -m tokenize support has been added to tokenize. + - Issue #10465: fix broken delegating of attributes by gzip._PaddedFile. - Issue #10356: Decimal.__hash__(-1) should return -2. |