summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/tokenize.rst57
-rw-r--r--Lib/tokenize.py79
-rw-r--r--Misc/NEWS2
3 files changed, 115 insertions, 23 deletions
diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst
index 577d7cc..050d74c 100644
--- a/Doc/library/tokenize.rst
+++ b/Doc/library/tokenize.rst
@@ -15,6 +15,9 @@ implemented in Python. The scanner in this module returns comments as tokens
as well, making it useful for implementing "pretty-printers," including
colorizers for on-screen displays.
+Tokenizing Input
+----------------
+
The primary entry point is a :term:`generator`:
.. function:: tokenize(readline)
@@ -116,6 +119,26 @@ function it uses to do this is available:
.. versionadded:: 3.2
+.. _tokenize-cli:
+
+Command-Line Usage
+------------------
+
+.. versionadded:: 3.3
+
+The :mod:`tokenize` module can be executed as a script from the command line.
+It is as simple as:
+
+.. code-block:: sh
+
+ python -m tokenize [filename.py]
+
+If :file:`filename.py` is specified its contents are tokenized to stdout.
+Otherwise, tokenization is performed on stdin.
+
+Examples
+------------------
+
Example of a script rewriter that transforms float literals into Decimal
objects::
@@ -158,3 +181,37 @@ objects::
result.append((toknum, tokval))
return untokenize(result).decode('utf-8')
+Example of tokenizing from the command line. The script::
+
+ def say_hello():
+ print("Hello, World!")
+
+ say_hello()
+
+will be tokenized to the following output where the first column is the range
+of the line/column coordinates where the token is found, the second column is
+the name of the token, and the final column is the value of the token (if any)
+
+.. code-block:: sh
+
+ $ python -m tokenize hello.py
+ 0,0-0,0: ENCODING 'utf-8'
+ 1,0-1,3: NAME 'def'
+ 1,4-1,13: NAME 'say_hello'
+ 1,13-1,14: OP '('
+ 1,14-1,15: OP ')'
+ 1,15-1,16: OP ':'
+ 1,16-1,17: NEWLINE '\n'
+ 2,0-2,4: INDENT ' '
+ 2,4-2,9: NAME 'print'
+ 2,9-2,10: OP '('
+ 2,10-2,25: STRING '"Hello, World!"'
+ 2,25-2,26: OP ')'
+ 2,26-2,27: NEWLINE '\n'
+ 3,0-3,1: NL '\n'
+ 4,0-4,0: DEDENT ''
+ 4,0-4,9: NAME 'say_hello'
+ 4,9-4,10: OP '('
+ 4,10-4,11: OP ')'
+ 4,11-4,12: NEWLINE '\n'
+ 5,0-5,0: ENDMARKER ''
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index f575e9b..a0dc035 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -530,27 +530,60 @@ def _tokenize(readline, encoding):
def generate_tokens(readline):
return _tokenize(readline, None)
+def main():
+ import argparse
+
+ # Helper error handling routines
+ def perror(message):
+ print(message, file=sys.stderr)
+
+ def error(message, filename=None, location=None):
+ if location:
+ args = (filename,) + location + (message,)
+ perror("%s:%d:%d: error: %s" % args)
+ elif filename:
+ perror("%s: error: %s" % (filename, message))
+ else:
+ perror("error: %s" % message)
+ sys.exit(1)
+
+ # Parse the arguments and options
+ parser = argparse.ArgumentParser(prog='python -m tokenize')
+ parser.add_argument(dest='filename', nargs='?',
+ metavar='filename.py',
+ help='the file to tokenize; defaults to stdin')
+ args = parser.parse_args()
+
+ try:
+ # Tokenize the input
+ if args.filename:
+ filename = args.filename
+ with builtins.open(filename, 'rb') as f:
+ tokens = list(tokenize(f.readline))
+ else:
+ filename = "<stdin>"
+ tokens = _tokenize(sys.stdin.readline, None)
+
+ # Output the tokenization
+ for token in tokens:
+ token_range = "%d,%d-%d,%d:" % (token.start + token.end)
+ print("%-20s%-15s%-15r" %
+ (token_range, tok_name[token.type], token.string))
+ except IndentationError as err:
+ line, column = err.args[1][1:3]
+ error(err.args[0], filename, (line, column))
+ except TokenError as err:
+ line, column = err.args[1]
+ error(err.args[0], filename, (line, column))
+ except SyntaxError as err:
+ error(err, filename)
+ except IOError as err:
+ error(err)
+ except KeyboardInterrupt:
+ print("interrupted\n")
+ except Exception as err:
+ perror("unexpected error: %s" % err)
+ raise
+
if __name__ == "__main__":
- # Quick sanity check
- s = b'''def parseline(self, line):
- """Parse the line into a command name and a string containing
- the arguments. Returns a tuple containing (command, args, line).
- 'command' and 'args' may be None if the line couldn't be parsed.
- """
- line = line.strip()
- if not line:
- return None, None, line
- elif line[0] == '?':
- line = 'help ' + line[1:]
- elif line[0] == '!':
- if hasattr(self, 'do_shell'):
- line = 'shell ' + line[1:]
- else:
- return None, None, line
- i, n = 0, len(line)
- while i < n and line[i] in self.identchars: i = i+1
- cmd, arg = line[:i], line[i:].strip()
- return cmd, arg, line
- '''
- for tok in tokenize(iter(s.splitlines()).__next__):
- print(tok)
+ main()
diff --git a/Misc/NEWS b/Misc/NEWS
index 7432f59..fdbb5a1 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -2520,6 +2520,8 @@ Core and Builtins
Library
-------
+- Issue #12943: python -m tokenize support has been added to tokenize.
+
- Issue #10465: fix broken delegating of attributes by gzip._PaddedFile.
- Issue #10356: Decimal.__hash__(-1) should return -2.