summaryrefslogtreecommitdiffstats
path: root/Demo/parser
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1996-08-21 16:28:53 (GMT)
committerGuido van Rossum <guido@python.org>1996-08-21 16:28:53 (GMT)
commit16d27e3b141d26853effc6c70214412cebebbe9f (patch)
treec98f065362f2a1dfd552eff86934cb638dfaf055 /Demo/parser
parent6dbd190f5ec3127ad7c5ef6fc67d2f02c4cc1492 (diff)
downloadcpython-16d27e3b141d26853effc6c70214412cebebbe9f.zip
cpython-16d27e3b141d26853effc6c70214412cebebbe9f.tar.gz
cpython-16d27e3b141d26853effc6c70214412cebebbe9f.tar.bz2
Demos for Fred's parser module
Diffstat (limited to 'Demo/parser')
-rw-r--r--Demo/parser/FILES6
-rw-r--r--Demo/parser/Makefile8
-rw-r--r--Demo/parser/README15
-rw-r--r--Demo/parser/docstring.py2
-rw-r--r--Demo/parser/example.py163
-rw-r--r--Demo/parser/parser.tex77
-rw-r--r--Demo/parser/pprint.py143
-rw-r--r--Demo/parser/source.py27
-rwxr-xr-xDemo/parser/test_parser.py50
9 files changed, 491 insertions, 0 deletions
diff --git a/Demo/parser/FILES b/Demo/parser/FILES
new file mode 100644
index 0000000..4505d3a
--- /dev/null
+++ b/Demo/parser/FILES
@@ -0,0 +1,6 @@
+Demo/parser/
+Doc/libparser.tex
+Lib/AST.py
+Lib/symbol.py
+Lib/token.py
+Modules/parsermodule.c
diff --git a/Demo/parser/Makefile b/Demo/parser/Makefile
new file mode 100644
index 0000000..648bf6e
--- /dev/null
+++ b/Demo/parser/Makefile
@@ -0,0 +1,8 @@
+parser.dvi: parser.tex ../../Doc/libparser.tex
+ TEXINPUTS=../../Doc:: $(LATEX) parser
+
+# Use a new name for this; the included file uses 'clean' already....
+clean-parser:
+ rm -f *.log *.aux *.dvi *.pyc
+
+include ../../Doc/Makefile
diff --git a/Demo/parser/README b/Demo/parser/README
new file mode 100644
index 0000000..03696c3
--- /dev/null
+++ b/Demo/parser/README
@@ -0,0 +1,15 @@
+These files are from the large example of using the `parser' module. Refer
+to the Python Library Reference for more information.
+
+Files:
+------
+
+ example.py -- module that uses the `parser' module to extract
+ information from the parse tree of Python source
+ code.
+
+ source.py -- sample source code used to demonstrate ability to
+ handle nested constructs easily using the functions
+ and classes in example.py.
+
+Enjoy!
diff --git a/Demo/parser/docstring.py b/Demo/parser/docstring.py
new file mode 100644
index 0000000..45a261b
--- /dev/null
+++ b/Demo/parser/docstring.py
@@ -0,0 +1,2 @@
+"""Some documentation.
+"""
diff --git a/Demo/parser/example.py b/Demo/parser/example.py
new file mode 100644
index 0000000..c428aff
--- /dev/null
+++ b/Demo/parser/example.py
@@ -0,0 +1,163 @@
+"""Simple code to extract class & function docstrings from a module.
+
+
+"""
+
+import symbol
+import token
+import types
+
+
+def get_docs(fileName):
+ """Retrieve information from the parse tree of a source file.
+
+ fileName
+ Name of the file to read Python source code from.
+ """
+ source = open(fileName).read()
+ import os
+ basename = os.path.basename(os.path.splitext(fileName)[0])
+ import parser
+ ast = parser.suite(source)
+ tup = parser.ast2tuple(ast)
+ return ModuleInfo(tup, basename)
+
+
+class DefnInfo:
+ _docstring = ''
+ _name = ''
+
+ def __init__(self, tree):
+ self._name = tree[2][1]
+
+ def get_docstring(self):
+ return self._docstring
+
+ def get_name(self):
+ return self._name
+
+class SuiteInfoBase(DefnInfo):
+ def __init__(self):
+ self._class_info = {}
+ self._function_info = {}
+
+ def get_class_names(self):
+ return self._class_info.keys()
+
+ def get_class_info(self, name):
+ return self._class_info[name]
+
+ def _extract_info(self, tree):
+ if len(tree) >= 4:
+ found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
+ if found:
+ self._docstring = eval(vars['docstring'])
+ for node in tree[1:]:
+ if (node[0] == symbol.stmt
+ and node[1][0] == symbol.compound_stmt):
+ if node[1][1][0] == symbol.funcdef:
+ name = node[1][1][2][1]
+ self._function_info[name] = \
+ FunctionInfo(node[1][1])
+ elif node[1][1][0] == symbol.classdef:
+ name = node[1][1][2][1]
+ self._class_info[name] = ClassInfo(node[1][1])
+
+
+class SuiteInfo(SuiteInfoBase):
+ def __init__(self, tree):
+ SuiteInfoBase.__init__(self)
+ self._extract_info(tree)
+
+ def get_function_names(self):
+ return self._function_info.keys()
+
+ def get_function_info(self, name):
+ return self._function_info[name]
+
+
+class FunctionInfo(SuiteInfo):
+ def __init__(self, tree):
+ DefnInfo.__init__(self, tree)
+ suite = tree[-1]
+ if len(suite) >= 4:
+ found, vars = match(DOCSTRING_STMT_PATTERN, suite[3])
+ if found:
+ self._docstring = eval(vars['docstring'])
+ SuiteInfoBase.__init__(self)
+ self._extract_info(suite)
+
+
+class ClassInfo(SuiteInfoBase):
+ def __init__(self, tree):
+ SuiteInfoBase.__init__(self)
+ DefnInfo.__init__(self, tree)
+ self._extract_info(tree[-1])
+
+ def get_method_names(self):
+ return self._function_info.keys()
+
+ def get_method_info(self, name):
+ return self._function_info[name]
+
+
+class ModuleInfo(SuiteInfo):
+ def __init__(self, tree, name="<string>"):
+ self._name = name
+ SuiteInfo.__init__(self, tree)
+ found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
+ if found:
+ self._docstring = vars["docstring"]
+
+
+from types import ListType, TupleType
+
+def match(pattern, data, vars=None):
+ """
+ """
+ if vars is None:
+ vars = {}
+ if type(pattern) is ListType: # 'variables' are ['varname']
+ vars[pattern[0]] = data
+ return 1, vars
+ if type(pattern) is not TupleType:
+ return (pattern == data), vars
+ if len(data) != len(pattern):
+ return 0, vars
+ for pattern, data in map(None, pattern, data):
+ same, vars = match(pattern, data, vars)
+ if not same:
+ break
+ return same, vars
+
+
+# This pattern will match a 'stmt' node which *might* represent a docstring;
+# docstrings require that the statement which provides the docstring be the
+# first statement in the class or function, which this pattern does not check.
+#
+DOCSTRING_STMT_PATTERN = (
+ symbol.stmt,
+ (symbol.simple_stmt,
+ (symbol.small_stmt,
+ (symbol.expr_stmt,
+ (symbol.testlist,
+ (symbol.test,
+ (symbol.and_test,
+ (symbol.not_test,
+ (symbol.comparison,
+ (symbol.expr,
+ (symbol.xor_expr,
+ (symbol.and_expr,
+ (symbol.shift_expr,
+ (symbol.arith_expr,
+ (symbol.term,
+ (symbol.factor,
+ (symbol.power,
+ (symbol.atom,
+ (token.STRING, ['docstring'])
+ )))))))))))))))),
+ (token.NEWLINE, '')
+ ))
+
+#
+# end of file
diff --git a/Demo/parser/parser.tex b/Demo/parser/parser.tex
new file mode 100644
index 0000000..170d9d7
--- /dev/null
+++ b/Demo/parser/parser.tex
@@ -0,0 +1,77 @@
+\documentstyle[twoside,10pt,myformat]{report}
+
+%% This manual does not supplement the chapter from the Python
+%% Library Reference, but only allows formatting of the parser module
+%% component of that document as a separate document, and was created
+%% primarily to ease review of the formatted document during authoring.
+
+\title{Python Parser Module Reference}
+\author{
+ Fred L. Drake, Jr. \\
+ Corporation for National Research Initiatives (CNRI) \\
+ 1895 Preston White Drive, Reston, Va 20191, USA \\
+ E-mail: {\tt fdrake@cnri.reston.va.us}, {\tt fdrake@intr.net}
+}
+
+\date{August 20th, 1996 \\ Release 1.4}
+
+\begin{document}
+
+\pagenumbering{roman}
+
+\maketitle
+
+Copyright \copyright{} 1995-1996 by Fred L. Drake, Jr. and Virginia
+Polytechnic Institute and State University, Blacksburg, Virginia, USA.
+Portions of the software copyright 1991-1995 by Stichting Mathematisch
+Centrum, Amsterdam, The Netherlands. Copying is permitted under the
+terms associated with the main Python distribution, with the
+additional restriction that this additional notice be included and
+maintained on all distributed copies.
+
+\begin{center}
+All Rights Reserved
+\end{center}
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the names of Fred L. Drake, Jr. and
+Virginia Polytechnic Institute and State University not be used in
+advertising or publicity pertaining to distribution of the software
+without specific, written prior permission.
+
+FRED L. DRAKE, JR. AND VIRGINIA POLYTECHNIC INSTITUTE AND STATE
+UNIVERSITY DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+EVENT SHALL FRED L. DRAKE, JR. OR VIRGINIA POLYTECHNIC INSTITUTE AND
+STATE UNIVERSITY BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
+\begin{abstract}
+
+\noindent
+The \emph{Python Parser Module Reference} describes the interfaces
+published by the optional \code{parser} module and gives examples of
+how they may be used. It contains the same text as the chapter on the
+\code{parser} module in the \emph{Python Library Reference}, but is
+presented as a separate document.
+
+This manual assumes basic knowledge about the Python language. For an
+informal introduction to Python, see the {\em Python Tutorial}; the
+Python Reference Manual remains the highest authority on syntactic and
+semantic questions.
+
+\end{abstract}
+
+\pagebreak
+\pagenumbering{arabic}
+
+\chapter{Parser Module Reference}
+\input{libparser}
+
+\end{document}
diff --git a/Demo/parser/pprint.py b/Demo/parser/pprint.py
new file mode 100644
index 0000000..c4b8158
--- /dev/null
+++ b/Demo/parser/pprint.py
@@ -0,0 +1,143 @@
+# pprint.py
+#
+# Author: Fred L. Drake, Jr.
+# fdrake@vt.edu
+#
+# This is a simple little module I wrote to make life easier. I didn't
+# see anything quite like it in the library, though I may have overlooked
+# something. I wrote this when I was trying to read some heavily nested
+# tuples with fairly non-descriptive content. This is modelled very much
+# after Lisp/Scheme - style pretty-printing of lists. If you find it
+# useful, thank small children who sleep at night.
+#
+
+"""Support to pretty-print lists, tuples, & dictionaries recursively.
+Very simple, but at least somewhat useful, especially in debugging
+data structures.
+
+INDENT_PER_LEVEL -- Amount of indentation to use for each new
+ recursive level. The default is 1. This
+ must be a non-negative integer, and may be
+ set by the caller before calling pprint().
+
+MAX_WIDTH -- Maximum width of the display. This is only
+ used if the representation *can* be kept
+ less than MAX_WIDTH characters wide. May
+ be set by the user before calling pprint().
+
+TAB_WIDTH -- The width represented by a single tab. This
+ value is typically 8, but 4 is the default
+ under MacOS. Can be changed by the user if
+ desired, but is probably not a good idea.
+
+pprint(seq [, stream]) -- The pretty-printer. This takes a Python
+ object (presumably a sequence, but that
+ doesn't matter) and an optional output
+ stream. See the function documentation
+ for details.
+"""
+
+
+INDENT_PER_LEVEL = 1
+
+MAX_WIDTH = 80
+
+import os
+TAB_WIDTH = (os.name == 'mac' and 4) or 8
+del os
+
+
+
+def _indentation(cols):
+ "Create tabbed indentation string COLS columns wide."
+
+ # This is used to reduce the byte-count for the output, allowing
+ # files created using this module to use as little external storage
+ # as possible. This is primarily intended to minimize impact on
+ # a user's quota when storing resource files, or for creating output
+ # intended for transmission.
+
+ return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ')
+
+
+
+def pprint(seq, stream = None, indent = 0, allowance = 0):
+ """Pretty-print a list, tuple, or dictionary.
+
+ pprint(seq [, stream]) ==> None
+
+ If STREAM is provided, output is written to that stream, otherwise
+ sys.stdout is used. Indentation is done according to
+ INDENT_PER_LEVEL, which may be set to any non-negative integer
+ before calling this function. The output written on the stream is
+ a perfectly valid representation of the Python object passed in,
+ with indentation to suite human-readable interpretation. The
+ output can be used as input without error, given readable
+ representations of all sequence elements are available via repr().
+ Output is restricted to MAX_WIDTH columns where possible. The
+ STREAM parameter must support the write() method with a single
+ parameter, which will always be a string. The output stream may be
+ a StringIO.StringIO object if the result is needed as a string.
+ """
+
+ if stream is None:
+ import sys
+ stream = sys.stdout
+
+ from types import DictType, ListType, TupleType
+
+ rep = `seq`
+ typ = type(seq)
+ sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance)
+
+ if sepLines and (typ is ListType or typ is TupleType):
+ # Pretty-print the sequence.
+ stream.write(((typ is ListType) and '[') or '(')
+
+ length = len(seq)
+ if length:
+ indent = indent + INDENT_PER_LEVEL
+ pprint(seq[0], stream, indent, allowance + 1)
+
+ if len(seq) > 1:
+ for ent in seq[1:]:
+ stream.write(',\n' + _indentation(indent))
+ pprint(ent, stream, indent, allowance + 1)
+
+ indent = indent - INDENT_PER_LEVEL
+
+ stream.write(((typ is ListType) and ']') or ')')
+
+ elif typ is DictType and sepLines:
+ stream.write('{')
+
+ length = len(seq)
+ if length:
+ indent = indent + INDENT_PER_LEVEL
+ items = seq.items()
+ items.sort()
+ key, ent = items[0]
+ rep = `key` + ': '
+ stream.write(rep)
+ pprint(ent, stream, indent + len(rep), allowance + 1)
+
+ if len(items) > 1:
+ for key, ent in items[1:]:
+ rep = `key` + ': '
+ stream.write(',\n' + _indentation(indent) + rep)
+ pprint(ent, stream, indent + len(rep), allowance + 1)
+
+ indent = indent - INDENT_PER_LEVEL
+
+ stream.write('}')
+
+ else:
+ stream.write(rep)
+
+ # Terminate the 'print' if we're not a recursive invocation.
+ if not indent:
+ stream.write('\n')
+
+
+#
+# end of pprint.py
diff --git a/Demo/parser/source.py b/Demo/parser/source.py
new file mode 100644
index 0000000..b1690a5
--- /dev/null
+++ b/Demo/parser/source.py
@@ -0,0 +1,27 @@
+"""Exmaple file to be parsed for the parsermodule example.
+
+The classes and functions in this module exist only to exhibit the ability
+of the handling information extraction from nested definitions using parse
+trees. They shouldn't interest you otherwise!
+"""
+
+class Simple:
+ "This class does very little."
+
+ def method(self):
+ "This method does almost nothing."
+ return 1
+
+ class Nested:
+ "This is a nested class."
+
+ def nested_method(self):
+ "Method of Nested class."
+ def nested_function():
+ "Function in method of Nested class."
+ pass
+ return nested_function
+
+def function():
+ "This function lives at the module level."
+ return 0
diff --git a/Demo/parser/test_parser.py b/Demo/parser/test_parser.py
new file mode 100755
index 0000000..e114d76
--- /dev/null
+++ b/Demo/parser/test_parser.py
@@ -0,0 +1,50 @@
+#! /projects/python/Python-1.4b2/python
+# (Force the script to use the latest build.)
+#
+# test_parser.py
+
+import parser, traceback
+
+_numFailed = 0
+
+def testChunk(t, fileName):
+ global _numFailed
+ print '----', fileName,
+ try:
+ ast = parser.suite(t)
+ tup = parser.ast2tuple(ast)
+ # this discards the first AST; a huge memory savings when running
+ # against a large source file like Tkinter.py.
+ ast = None
+ new = parser.tuple2ast(tup)
+ except parser.ParserError, err:
+ print
+ print 'parser module raised exception on input file', fileName + ':'
+ traceback.print_exc()
+ _numFailed = _numFailed + 1
+ else:
+ if tup != parser.ast2tuple(new):
+ print
+ print 'parser module failed on input file', fileName
+ _numFailed = _numFailed + 1
+ else:
+ print 'o.k.'
+
+def testFile(fileName):
+ t = open(fileName).read()
+ testChunk(t, fileName)
+
+def test():
+ import sys
+ args = sys.argv[1:]
+ if not args:
+ import glob
+ args = glob.glob("*.py")
+ map(testFile, args)
+ sys.exit(_numFailed != 0)
+
+if __name__ == '__main__':
+ test()
+
+#
+# end of file