diff options
69 files changed, 73 insertions, 7120 deletions
diff --git a/Demo/pdist/makechangelog.py b/Demo/pdist/makechangelog.py index b26f30b..1ffa588 100755 --- a/Demo/pdist/makechangelog.py +++ b/Demo/pdist/makechangelog.py @@ -6,7 +6,7 @@ import sys import string -import regex +import re import getopt import time @@ -35,9 +35,9 @@ def main(): for rev in allrevs: formatrev(rev, prefix) -parsedateprog = regex.compile( - '^date: \([0-9]+\)/\([0-9]+\)/\([0-9]+\) ' + - '\([0-9]+\):\([0-9]+\):\([0-9]+\); author: \([^ ;]+\)') +parsedateprog = re.compile( + '^date: ([0-9]+)/([0-9]+)/([0-9]+) ' + + '([0-9]+):([0-9]+):([0-9]+); author: ([^ ;]+)') authormap = { 'guido': 'Guido van Rossum <guido@cnri.reston.va.us>', @@ -70,7 +70,7 @@ def formatrev(rev, prefix): print print -startprog = regex.compile("^Working file: \(.*\)$") +startprog = re.compile("^Working file: (.*)$") def getnextfile(f): while 1: diff --git a/Demo/pdist/rcsbump b/Demo/pdist/rcsbump index e4e9ed5..4fa078e 100755 --- a/Demo/pdist/rcsbump +++ b/Demo/pdist/rcsbump @@ -6,12 +6,12 @@ # Python script for bumping up an RCS major revision number. import sys -import regex +import re import rcslib import string WITHLOCK = 1 -majorrev_re = regex.compile('^[0-9]+') +majorrev_re = re.compile('^[0-9]+') dir = rcslib.RCS() diff --git a/Demo/pdist/rcslib.py b/Demo/pdist/rcslib.py index d5f7b65..3e63869 100755 --- a/Demo/pdist/rcslib.py +++ b/Demo/pdist/rcslib.py @@ -8,7 +8,7 @@ files and (possibly) corresponding work files. import fnmatch import os -import regsub +import re import string import tempfile @@ -150,7 +150,7 @@ class RCS: cmd = 'ci %s%s -t%s %s %s' % \ (lockflag, rev, f.name, otherflags, name) else: - message = regsub.gsub('\([\\"$`]\)', '\\\\\\1', message) + message = re.sub(r'([\"$`])', r'\\\1', message) cmd = 'ci %s%s -m"%s" %s %s' % \ (lockflag, rev, message, otherflags, name) return self._system(cmd) diff --git a/Demo/scripts/eqfix.py b/Demo/scripts/eqfix.py index 165ca49..35c43aa 100755 --- a/Demo/scripts/eqfix.py +++ b/Demo/scripts/eqfix.py @@ -29,7 +29,7 @@ # into a program for a different change to Python programs... import sys -import regex +import re import os from stat import * import string @@ -53,7 +53,7 @@ def main(): if fix(arg): bad = 1 sys.exit(bad) -ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$') +ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$') def ispython(name): return ispythonprog.match(name) >= 0 @@ -104,7 +104,7 @@ def fix(filename): if lineno == 1 and g is None and line[:2] == '#!': # Check for non-Python scripts words = string.split(line[2:]) - if words and regex.search('[pP]ython', words[0]) < 0: + if words and re.search('[pP]ython', words[0]) < 0: msg = filename + ': ' + words[0] msg = msg + ' script; not fixed\n' err(msg) diff --git a/Demo/scripts/ftpstats.py b/Demo/scripts/ftpstats.py index b37a58d..5c1599e 100755 --- a/Demo/scripts/ftpstats.py +++ b/Demo/scripts/ftpstats.py @@ -13,12 +13,12 @@ import os import sys -import regex +import re import string import getopt -pat = '^\([a-zA-Z0-9 :]*\)!\(.*\)!\(.*\)!\([<>].*\)!\([0-9]+\)!\([0-9]+\)$' -prog = regex.compile(pat) +pat = '^([a-zA-Z0-9 :]*)!(.*)!(.*)!([<>].*)!([0-9]+)!([0-9]+)$' +prog = re.compile(pat) def main(): maxitems = 25 diff --git a/Demo/scripts/mboxconvert.py b/Demo/scripts/mboxconvert.py index 502d774..8c462f3 100755 --- a/Demo/scripts/mboxconvert.py +++ b/Demo/scripts/mboxconvert.py @@ -10,7 +10,7 @@ import time import os import stat import getopt -import regex +import re def main(): dofile = mmdf @@ -45,7 +45,7 @@ def main(): if sts: sys.exit(sts) -numeric = regex.compile('[1-9][0-9]*') +numeric = re.compile('[1-9][0-9]*') def mh(dir): sts = 0 diff --git a/Demo/scripts/update.py b/Demo/scripts/update.py index 32ad6c8..c936026 100755 --- a/Demo/scripts/update.py +++ b/Demo/scripts/update.py @@ -8,10 +8,10 @@ import os import sys -import regex +import re -pat = '^\([^: \t\n]+\):\([1-9][0-9]*\):' -prog = regex.compile(pat) +pat = '^([^: \t\n]+):([1-9][0-9]*):' +prog = re.compile(pat) class FileObj: def __init__(self, filename): diff --git a/Demo/sockets/mcast.py b/Demo/sockets/mcast.py index 122dad7..1abd305 100755 --- a/Demo/sockets/mcast.py +++ b/Demo/sockets/mcast.py @@ -13,7 +13,6 @@ MYGROUP = '225.0.0.250' import sys import time import struct -import regsub from socket import * diff --git a/Demo/tkinter/guido/ManPage.py b/Demo/tkinter/guido/ManPage.py index 7d6fe00..de3117b 100755 --- a/Demo/tkinter/guido/ManPage.py +++ b/Demo/tkinter/guido/ManPage.py @@ -1,6 +1,6 @@ # Widget to display a man page -import regex +import re from Tkinter import * from Tkinter import _tkinter from ScrolledText import ScrolledText @@ -11,10 +11,10 @@ ITALICFONT = '*-Courier-Medium-O-Normal-*-120-*' # XXX Recognizing footers is system dependent # (This one works for IRIX 5.2 and Solaris 2.2) -footerprog = regex.compile( +footerprog = re.compile( '^ Page [1-9][0-9]*[ \t]+\|^.*Last change:.*[1-9][0-9]*\n') -emptyprog = regex.compile('^[ \t]*\n') -ulprog = regex.compile('^[ \t]*[Xv!_][Xv!_ \t]*\n') +emptyprog = re.compile('^[ \t]*\n') +ulprog = re.compile('^[ \t]*[Xv!_][Xv!_ \t]*\n') # Basic Man Page class -- does not disable editing class EditableManPage(ScrolledText): diff --git a/Demo/tkinter/guido/mbox.py b/Demo/tkinter/guido/mbox.py index 9b16f6b..3c36d88 100755 --- a/Demo/tkinter/guido/mbox.py +++ b/Demo/tkinter/guido/mbox.py @@ -4,7 +4,7 @@ import os import sys -import regex +import re import getopt import string import mhlib @@ -157,7 +157,7 @@ def scan_unpost(e): scanmenu.unpost() scanmenu.invoke('active') -scanparser = regex.compile('^ *\([0-9]+\)') +scanparser = re.compile('^ *([0-9]+)') def open_folder(e=None): global folder, mhf diff --git a/Demo/tkinter/guido/tkman.py b/Demo/tkinter/guido/tkman.py index 11d9690..6b0b641 100755 --- a/Demo/tkinter/guido/tkman.py +++ b/Demo/tkinter/guido/tkman.py @@ -5,7 +5,7 @@ import sys import os import string -import regex +import re from Tkinter import * from ManPage import ManPage @@ -208,15 +208,15 @@ class SelectionBox: print 'Empty search string' return if not self.casevar.get(): - map = regex.casefold + map = re.IGNORECASE else: map = None try: if map: - prog = regex.compile(search, map) + prog = re.compile(search, map) else: - prog = regex.compile(search) - except regex.error, msg: + prog = re.compile(search) + except re.error, msg: self.frame.bell() print 'Regex error:', msg return diff --git a/Doc/howto/regex.tex b/Doc/howto/regex.tex index 87fdad2..f9867ae 100644 --- a/Doc/howto/regex.tex +++ b/Doc/howto/regex.tex @@ -33,11 +33,8 @@ This document is available from The \module{re} module was added in Python 1.5, and provides Perl-style regular expression patterns. Earlier versions of Python -came with the \module{regex} module, which provides Emacs-style -patterns. Emacs-style patterns are slightly less readable and -don't provide as many features, so there's not much reason to use -the \module{regex} module when writing new code, though you might -encounter old code that uses it. +came with the \module{regex} module, which provided Emacs-style +patterns. \module{regex} module was removed in Python 2.5. Regular expressions (or REs) are essentially a tiny, highly specialized programming language embedded inside Python and made @@ -1458,7 +1455,7 @@ Jeffrey Friedl's \citetitle{Mastering Regular Expressions}, published by O'Reilly. Unfortunately, it exclusively concentrates on Perl and Java's flavours of regular expressions, and doesn't contain any Python material at all, so it won't be useful as a reference for programming -in Python. (The first edition covered Python's now-obsolete +in Python. (The first edition covered Python's now-removed \module{regex} module, which won't help you much.) Consider checking it out from your library. diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex index fad8fe7..ff9c391 100644 --- a/Doc/lib/lib.tex +++ b/Doc/lib/lib.tex @@ -87,7 +87,6 @@ and how to embed it in other applications. \input{libstrings} % String Services \input{libstring} \input{libre} -\input{libreconvert} \input{libstruct} % XXX also/better in File Formats? \input{libdifflib} \input{libstringio} @@ -454,8 +453,6 @@ and how to embed it in other applications. %\input{libcmpcache} %\input{libcmp} %\input{libni} -%\input{libregex} -%\input{libregsub} \chapter{Reporting Bugs} \input{reportingbugs} diff --git a/Doc/lib/libre.tex b/Doc/lib/libre.tex index 8e6513a..0fd5796 100644 --- a/Doc/lib/libre.tex +++ b/Doc/lib/libre.tex @@ -566,9 +566,6 @@ ignored. >>> re.split('\W+', 'Words, words, words.', 1) ['Words', 'words, words.'] \end{verbatim} - - This function combines and extends the functionality of - the old \function{regsub.split()} and \function{regsub.splitx()}. \end{funcdesc} \begin{funcdesc}{findall}{pattern, string\optional{, flags}} @@ -943,7 +940,7 @@ the message \code{maximum recursion limit} exceeded. For example, >>> re.match('Begin (\w| )*? end', s).end() Traceback (most recent call last): File "<stdin>", line 1, in ? - File "/usr/local/lib/python2.3/sre.py", line 132, in match + File "/usr/local/lib/python2.5/re.py", line 132, in match return _compile(pattern, flags).match(string) RuntimeError: maximum recursion limit exceeded \end{verbatim} diff --git a/Doc/lib/libreconvert.tex b/Doc/lib/libreconvert.tex deleted file mode 100644 index 29c6e52..0000000 --- a/Doc/lib/libreconvert.tex +++ /dev/null @@ -1,80 +0,0 @@ -\section{\module{reconvert} --- - Convert regular expressions from regex to re form} -\declaremodule{standard}{reconvert} -\moduleauthor{Andrew M. Kuchling}{amk@amk.ca} -\sectionauthor{Skip Montanaro}{skip@pobox.com} - - -\modulesynopsis{Convert regex-, emacs- or sed-style regular expressions -to re-style syntax.} - - -This module provides a facility to convert regular expressions from the -syntax used by the deprecated \module{regex} module to those used by the -newer \module{re} module. Because of similarity between the regular -expression syntax of \code{sed(1)} and \code{emacs(1)} and the -\module{regex} module, it is also helpful to convert patterns written for -those tools to \module{re} patterns. - -When used as a script, a Python string literal (or any other expression -evaluating to a string) is read from stdin, and the translated expression is -written to stdout as a string literal. Unless stdout is a tty, no trailing -newline is written to stdout. This is done so that it can be used with -Emacs \code{C-U M-|} (shell-command-on-region) which filters the region -through the shell command. - -\begin{seealso} - \seetitle{Mastering Regular Expressions}{Book on regular expressions - by Jeffrey Friedl, published by O'Reilly. The second - edition of the book no longer covers Python at all, - but the first edition covered writing good regular expression - patterns in great detail.} -\end{seealso} - -\subsection{Module Contents} -\nodename{Contents of Module reconvert} - -The module defines two functions and a handful of constants. - -\begin{funcdesc}{convert}{pattern\optional{, syntax=None}} - Convert a \var{pattern} representing a \module{regex}-stype regular - expression into a \module{re}-style regular expression. The optional - \var{syntax} parameter is a bitwise-or'd set of flags that control what - constructs are converted. See below for a description of the various - constants. -\end{funcdesc} - -\begin{funcdesc}{quote}{s\optional{, quote=None}} - Convert a string object to a quoted string literal. - - This is similar to \function{repr} but will return a "raw" string (r'...' - or r"...") when the string contains backslashes, instead of doubling all - backslashes. The resulting string does not always evaluate to the same - string as the original; however it will do just the right thing when passed - into re.compile(). - - The optional second argument forces the string quote; it must be a single - character which is a valid Python string quote. Note that prior to Python - 2.5 this would not accept triple-quoted string delimiters. -\end{funcdesc} - -\begin{datadesc}{RE_NO_BK_PARENS} - Suppress paren conversion. This should be omitted when converting - \code{sed}-style or \code{emacs}-style regular expressions. -\end{datadesc} - -\begin{datadesc}{RE_NO_BK_VBAR} - Suppress vertical bar conversion. This should be omitted when converting - \code{sed}-style or \code{emacs}-style regular expressions. -\end{datadesc} - -\begin{datadesc}{RE_BK_PLUS_QM} - Enable conversion of \code{+} and \code{?} characters. This should be - added to the \var{syntax} arg of \function{convert} when converting - \code{sed}-style regular expressions and omitted when converting - \code{emacs}-style regular expressions. -\end{datadesc} - -\begin{datadesc}{RE_NEWLINE_OR} - When set, newline characters are replaced by \code{|}. -\end{datadesc} diff --git a/Doc/lib/libregex.tex b/Doc/lib/libregex.tex deleted file mode 100644 index 0982f81..0000000 --- a/Doc/lib/libregex.tex +++ /dev/null @@ -1,370 +0,0 @@ -\section{\module{regex} --- - Regular expression operations} -\declaremodule{builtin}{regex} - -\modulesynopsis{Regular expression search and match operations. - \strong{Obsolete!}} - - -This module provides regular expression matching operations similar to -those found in Emacs. - -\strong{Obsolescence note:} -This module is obsolete as of Python version 1.5; it is still being -maintained because much existing code still uses it. All new code in -need of regular expressions should use the new -\code{re}\refstmodindex{re} module, which supports the more powerful -and regular Perl-style regular expressions. Existing code should be -converted. The standard library module -\code{reconvert}\refstmodindex{reconvert} helps in converting -\code{regex} style regular expressions to \code{re}\refstmodindex{re} -style regular expressions. (For more conversion help, see Andrew -Kuchling's\index{Kuchling, Andrew} ``\module{regex-to-re} HOWTO'' at -\url{http://www.python.org/doc/howto/regex-to-re/}.) - -By default the patterns are Emacs-style regular expressions -(with one exception). There is -a way to change the syntax to match that of several well-known -\UNIX{} utilities. The exception is that Emacs' \samp{\e s} -pattern is not supported, since the original implementation references -the Emacs syntax tables. - -This module is 8-bit clean: both patterns and strings may contain null -bytes and characters whose high bit is set. - -\strong{Please note:} There is a little-known fact about Python string -literals which means that you don't usually have to worry about -doubling backslashes, even though they are used to escape special -characters in string literals as well as in regular expressions. This -is because Python doesn't remove backslashes from string literals if -they are followed by an unrecognized escape character. -\emph{However}, if you want to include a literal \dfn{backslash} in a -regular expression represented as a string literal, you have to -\emph{quadruple} it or enclose it in a singleton character class. -E.g.\ to extract \LaTeX\ \samp{\e section\{\textrm{\ldots}\}} headers -from a document, you can use this pattern: -\code{'[\e ]section\{\e (.*\e )\}'}. \emph{Another exception:} -the escape sequence \samp{\e b} is significant in string literals -(where it means the ASCII bell character) as well as in Emacs regular -expressions (where it stands for a word boundary), so in order to -search for a word boundary, you should use the pattern \code{'\e \e b'}. -Similarly, a backslash followed by a digit 0-7 should be doubled to -avoid interpretation as an octal escape. - -\subsection{Regular Expressions} - -A regular expression (or RE) specifies a set of strings that matches -it; the functions in this module let you check if a particular string -matches a given regular expression (or if a given regular expression -matches a particular string, which comes down to the same thing). - -Regular expressions can be concatenated to form new regular -expressions; if \emph{A} and \emph{B} are both regular expressions, -then \emph{AB} is also an regular expression. If a string \emph{p} -matches A and another string \emph{q} matches B, the string \emph{pq} -will match AB. Thus, complex expressions can easily be constructed -from simpler ones like the primitives described here. For details of -the theory and implementation of regular expressions, consult almost -any textbook about compiler construction. - -% XXX The reference could be made more specific, say to -% "Compilers: Principles, Techniques and Tools", by Alfred V. Aho, -% Ravi Sethi, and Jeffrey D. Ullman, or some FA text. - -A brief explanation of the format of regular expressions follows. - -Regular expressions can contain both special and ordinary characters. -Ordinary characters, like '\code{A}', '\code{a}', or '\code{0}', are -the simplest regular expressions; they simply match themselves. You -can concatenate ordinary characters, so '\code{last}' matches the -characters 'last'. (In the rest of this section, we'll write RE's in -\code{this special font}, usually without quotes, and strings to be -matched 'in single quotes'.) - -Special characters either stand for classes of ordinary characters, or -affect how the regular expressions around them are interpreted. - -The special characters are: -\begin{itemize} -\item[\code{.}] (Dot.) Matches any character except a newline. -\item[\code{\^}] (Caret.) Matches the start of the string. -\item[\code{\$}] Matches the end of the string. -\code{foo} matches both 'foo' and 'foobar', while the regular -expression '\code{foo\$}' matches only 'foo'. -\item[\code{*}] Causes the resulting RE to -match 0 or more repetitions of the preceding RE. \code{ab*} will -match 'a', 'ab', or 'a' followed by any number of 'b's. -\item[\code{+}] Causes the -resulting RE to match 1 or more repetitions of the preceding RE. -\code{ab+} will match 'a' followed by any non-zero number of 'b's; it -will not match just 'a'. -\item[\code{?}] Causes the resulting RE to -match 0 or 1 repetitions of the preceding RE. \code{ab?} will -match either 'a' or 'ab'. - -\item[\code{\e}] Either escapes special characters (permitting you to match -characters like '*?+\&\$'), or signals a special sequence; special -sequences are discussed below. Remember that Python also uses the -backslash as an escape sequence in string literals; if the escape -sequence isn't recognized by Python's parser, the backslash and -subsequent character are included in the resulting string. However, -if Python would recognize the resulting sequence, the backslash should -be repeated twice. - -\item[\code{[]}] Used to indicate a set of characters. Characters can -be listed individually, or a range is indicated by giving two -characters and separating them by a '-'. Special characters are -not active inside sets. For example, \code{[akm\$]} -will match any of the characters 'a', 'k', 'm', or '\$'; \code{[a-z]} will -match any lowercase letter. - -If you want to include a \code{]} inside a -set, it must be the first character of the set; to include a \code{-}, -place it as the first or last character. - -Characters \emph{not} within a range can be matched by including a -\code{\^} as the first character of the set; \code{\^} elsewhere will -simply match the '\code{\^}' character. -\end{itemize} - -The special sequences consist of '\code{\e}' and a character -from the list below. If the ordinary character is not on the list, -then the resulting RE will match the second character. For example, -\code{\e\$} matches the character '\$'. Ones where the backslash -should be doubled in string literals are indicated. - -\begin{itemize} -\item[\code{\e|}]\code{A\e|B}, where A and B can be arbitrary REs, -creates a regular expression that will match either A or B. This can -be used inside groups (see below) as well. -% -\item[\code{\e( \e)}] Indicates the start and end of a group; the -contents of a group can be matched later in the string with the -\code{\e [1-9]} special sequence, described next. -\end{itemize} - -\begin{fulllineitems} -\item[\code{\e \e 1, ... \e \e 7, \e 8, \e 9}] -Matches the contents of the group of the same -number. For example, \code{\e (.+\e ) \e \e 1} matches 'the the' or -'55 55', but not 'the end' (note the space after the group). This -special sequence can only be used to match one of the first 9 groups; -groups with higher numbers can be matched using the \code{\e v} -sequence. (\code{\e 8} and \code{\e 9} don't need a double backslash -because they are not octal digits.) -\end{fulllineitems} - -\begin{itemize} -\item[\code{\e \e b}] Matches the empty string, but only at the -beginning or end of a word. A word is defined as a sequence of -alphanumeric characters, so the end of a word is indicated by -whitespace or a non-alphanumeric character. -% -\item[\code{\e B}] Matches the empty string, but when it is \emph{not} at the -beginning or end of a word. -% -\item[\code{\e v}] Must be followed by a two digit decimal number, and -matches the contents of the group of the same number. The group -number must be between 1 and 99, inclusive. -% -\item[\code{\e w}]Matches any alphanumeric character; this is -equivalent to the set \code{[a-zA-Z0-9]}. -% -\item[\code{\e W}] Matches any non-alphanumeric character; this is -equivalent to the set \code{[\^a-zA-Z0-9]}. -\item[\code{\e <}] Matches the empty string, but only at the beginning of a -word. A word is defined as a sequence of alphanumeric characters, so -the end of a word is indicated by whitespace or a non-alphanumeric -character. -\item[\code{\e >}] Matches the empty string, but only at the end of a -word. - -\item[\code{\e \e \e \e}] Matches a literal backslash. - -% In Emacs, the following two are start of buffer/end of buffer. In -% Python they seem to be synonyms for ^$. -\item[\code{\e `}] Like \code{\^}, this only matches at the start of the -string. -\item[\code{\e \e '}] Like \code{\$}, this only matches at the end of -the string. -% end of buffer -\end{itemize} - -\subsection{Module Contents} -\nodename{Contents of Module regex} - -The module defines these functions, and an exception: - - -\begin{funcdesc}{match}{pattern, string} - Return how many characters at the beginning of \var{string} match - the regular expression \var{pattern}. Return \code{-1} if the - string does not match the pattern (this is different from a - zero-length match!). -\end{funcdesc} - -\begin{funcdesc}{search}{pattern, string} - Return the first position in \var{string} that matches the regular - expression \var{pattern}. Return \code{-1} if no position in the string - matches the pattern (this is different from a zero-length match - anywhere!). -\end{funcdesc} - -\begin{funcdesc}{compile}{pattern\optional{, translate}} - Compile a regular expression pattern into a regular expression - object, which can be used for matching using its \code{match()} and - \code{search()} methods, described below. The optional argument - \var{translate}, if present, must be a 256-character string - indicating how characters (both of the pattern and of the strings to - be matched) are translated before comparing them; the \var{i}-th - element of the string gives the translation for the character with - \ASCII{} code \var{i}. This can be used to implement - case-insensitive matching; see the \code{casefold} data item below. - - The sequence - -\begin{verbatim} -prog = regex.compile(pat) -result = prog.match(str) -\end{verbatim} -% -is equivalent to - -\begin{verbatim} -result = regex.match(pat, str) -\end{verbatim} - -but the version using \code{compile()} is more efficient when multiple -regular expressions are used concurrently in a single program. (The -compiled version of the last pattern passed to \code{regex.match()} or -\code{regex.search()} is cached, so programs that use only a single -regular expression at a time needn't worry about compiling regular -expressions.) -\end{funcdesc} - -\begin{funcdesc}{set_syntax}{flags} - Set the syntax to be used by future calls to \code{compile()}, - \code{match()} and \code{search()}. (Already compiled expression - objects are not affected.) The argument is an integer which is the - OR of several flag bits. The return value is the previous value of - the syntax flags. Names for the flags are defined in the standard - module \code{regex_syntax}\refstmodindex{regex_syntax}; read the - file \file{regex_syntax.py} for more information. -\end{funcdesc} - -\begin{funcdesc}{get_syntax}{} - Returns the current value of the syntax flags as an integer. -\end{funcdesc} - -\begin{funcdesc}{symcomp}{pattern\optional{, translate}} -This is like \code{compile()}, but supports symbolic group names: if a -parenthesis-enclosed group begins with a group name in angular -brackets, e.g. \code{'\e(<id>[a-z][a-z0-9]*\e)'}, the group can -be referenced by its name in arguments to the \code{group()} method of -the resulting compiled regular expression object, like this: -\code{p.group('id')}. Group names may contain alphanumeric characters -and \code{'_'} only. -\end{funcdesc} - -\begin{excdesc}{error} - Exception raised when a string passed to one of the functions here - is not a valid regular expression (e.g., unmatched parentheses) or - when some other error occurs during compilation or matching. (It is - never an error if a string contains no match for a pattern.) -\end{excdesc} - -\begin{datadesc}{casefold} -A string suitable to pass as the \var{translate} argument to -\code{compile()} to map all upper case characters to their lowercase -equivalents. -\end{datadesc} - -\noindent -Compiled regular expression objects support these methods: - -\setindexsubitem{(regex method)} -\begin{funcdesc}{match}{string\optional{, pos}} - Return how many characters at the beginning of \var{string} match - the compiled regular expression. Return \code{-1} if the string - does not match the pattern (this is different from a zero-length - match!). - - The optional second parameter, \var{pos}, gives an index in the string - where the search is to start; it defaults to \code{0}. This is not - completely equivalent to slicing the string; the \code{'\^'} pattern - character matches at the real beginning of the string and at positions - just after a newline, not necessarily at the index where the search - is to start. -\end{funcdesc} - -\begin{funcdesc}{search}{string\optional{, pos}} - Return the first position in \var{string} that matches the regular - expression \code{pattern}. Return \code{-1} if no position in the - string matches the pattern (this is different from a zero-length - match anywhere!). - - The optional second parameter has the same meaning as for the - \code{match()} method. -\end{funcdesc} - -\begin{funcdesc}{group}{index, index, ...} -This method is only valid when the last call to the \code{match()} -or \code{search()} method found a match. It returns one or more -groups of the match. If there is a single \var{index} argument, -the result is a single string; if there are multiple arguments, the -result is a tuple with one item per argument. If the \var{index} is -zero, the corresponding return value is the entire matching string; if -it is in the inclusive range [1..99], it is the string matching the -corresponding parenthesized group (using the default syntax, -groups are parenthesized using \code{{\e}(} and \code{{\e})}). If no -such group exists, the corresponding result is \code{None}. - -If the regular expression was compiled by \code{symcomp()} instead of -\code{compile()}, the \var{index} arguments may also be strings -identifying groups by their group name. -\end{funcdesc} - -\noindent -Compiled regular expressions support these data attributes: - -\setindexsubitem{(regex attribute)} - -\begin{datadesc}{regs} -When the last call to the \code{match()} or \code{search()} method found a -match, this is a tuple of pairs of indexes corresponding to the -beginning and end of all parenthesized groups in the pattern. Indices -are relative to the string argument passed to \code{match()} or -\code{search()}. The 0-th tuple gives the beginning and end or the -whole pattern. When the last match or search failed, this is -\code{None}. -\end{datadesc} - -\begin{datadesc}{last} -When the last call to the \code{match()} or \code{search()} method found a -match, this is the string argument passed to that method. When the -last match or search failed, this is \code{None}. -\end{datadesc} - -\begin{datadesc}{translate} -This is the value of the \var{translate} argument to -\code{regex.compile()} that created this regular expression object. If -the \var{translate} argument was omitted in the \code{regex.compile()} -call, this is \code{None}. -\end{datadesc} - -\begin{datadesc}{givenpat} -The regular expression pattern as passed to \code{compile()} or -\code{symcomp()}. -\end{datadesc} - -\begin{datadesc}{realpat} -The regular expression after stripping the group names for regular -expressions compiled with \code{symcomp()}. Same as \code{givenpat} -otherwise. -\end{datadesc} - -\begin{datadesc}{groupindex} -A dictionary giving the mapping from symbolic group names to numerical -group indexes for regular expressions compiled with \code{symcomp()}. -\code{None} otherwise. -\end{datadesc} diff --git a/Doc/lib/libregsub.tex b/Doc/lib/libregsub.tex deleted file mode 100644 index b41b700..0000000 --- a/Doc/lib/libregsub.tex +++ /dev/null @@ -1,74 +0,0 @@ -\section{\module{regsub} --- - String operations using regular expressions} - -\declaremodule{standard}{regsub} -\modulesynopsis{Substitution and splitting operations that use - regular expressions. \strong{Obsolete!}} - - -This module defines a number of functions useful for working with -regular expressions (see built-in module \refmodule{regex}). - -Warning: these functions are not thread-safe. - -\strong{Obsolescence note:} -This module is obsolete as of Python version 1.5; it is still being -maintained because much existing code still uses it. All new code in -need of regular expressions should use the new \refmodule{re} module, which -supports the more powerful and regular Perl-style regular expressions. -Existing code should be converted. The standard library module -\module{reconvert} helps in converting \refmodule{regex} style regular -expressions to \refmodule{re} style regular expressions. (For more -conversion help, see Andrew Kuchling's\index{Kuchling, Andrew} -``regex-to-re HOWTO'' at -\url{http://www.python.org/doc/howto/regex-to-re/}.) - - -\begin{funcdesc}{sub}{pat, repl, str} -Replace the first occurrence of pattern \var{pat} in string -\var{str} by replacement \var{repl}. If the pattern isn't found, -the string is returned unchanged. The pattern may be a string or an -already compiled pattern. The replacement may contain references -\samp{\e \var{digit}} to subpatterns and escaped backslashes. -\end{funcdesc} - -\begin{funcdesc}{gsub}{pat, repl, str} -Replace all (non-overlapping) occurrences of pattern \var{pat} in -string \var{str} by replacement \var{repl}. The same rules as for -\code{sub()} apply. Empty matches for the pattern are replaced only -when not adjacent to a previous match, so e.g. -\code{gsub('', '-', 'abc')} returns \code{'-a-b-c-'}. -\end{funcdesc} - -\begin{funcdesc}{split}{str, pat\optional{, maxsplit}} -Split the string \var{str} in fields separated by delimiters matching -the pattern \var{pat}, and return a list containing the fields. Only -non-empty matches for the pattern are considered, so e.g. -\code{split('a:b', ':*')} returns \code{['a', 'b']} and -\code{split('abc', '')} returns \code{['abc']}. The \var{maxsplit} -defaults to 0. If it is nonzero, only \var{maxsplit} number of splits -occur, and the remainder of the string is returned as the final -element of the list. -\end{funcdesc} - -\begin{funcdesc}{splitx}{str, pat\optional{, maxsplit}} -Split the string \var{str} in fields separated by delimiters matching -the pattern \var{pat}, and return a list containing the fields as well -as the separators. For example, \code{splitx('a:::b', ':*')} returns -\code{['a', ':::', 'b']}. Otherwise, this function behaves the same -as \code{split}. -\end{funcdesc} - -\begin{funcdesc}{capwords}{s\optional{, pat}} -Capitalize words separated by optional pattern \var{pat}. The default -pattern uses any characters except letters, digits and underscores as -word delimiters. Capitalization is done by changing the first -character of each word to upper case. -\end{funcdesc} - -\begin{funcdesc}{clear_cache}{} -The regsub module maintains a cache of compiled regular expressions, -keyed on the regular expression string and the syntax of the regex -module at the time the expression was compiled. This function clears -that cache. -\end{funcdesc} diff --git a/Doc/lib/libundoc.tex b/Doc/lib/libundoc.tex index 6cef183..95aa262 100644 --- a/Doc/lib/libundoc.tex +++ b/Doc/lib/libundoc.tex @@ -137,18 +137,6 @@ now just as good). \item[\module{rand}] --- Old interface to the random number generator. -\item[\module{regex}] ---- Emacs-style regular expression support; may still be used in some -old code (extension module). Refer to the -\citetitle[http://www.python.org/doc/1.6/lib/module-regex.html]{Python -1.6 Documentation} for documentation. - -\item[\module{regsub}] ---- Regular expression based string replacement utilities, for use -with \module{regex} (extension module). Refer to the -\citetitle[http://www.python.org/doc/1.6/lib/module-regsub.html]{Python -1.6 Documentation} for documentation. - \item[\module{statcache}] --- Caches the results of os.stat(). Using the cache can be fragile and error-prone, just use \code{os.stat()} directly. diff --git a/Lib/lib-old/Para.py b/Lib/lib-old/Para.py deleted file mode 100644 index 2fd8dc6..0000000 --- a/Lib/lib-old/Para.py +++ /dev/null @@ -1,343 +0,0 @@ -# Text formatting abstractions -# Note -- this module is obsolete, it's too slow anyway - - -# Oft-used type object -Int = type(0) - - -# Represent a paragraph. This is a list of words with associated -# font and size information, plus indents and justification for the -# entire paragraph. -# Once the words have been added to a paragraph, it can be laid out -# for different line widths. Once laid out, it can be rendered at -# different screen locations. Once rendered, it can be queried -# for mouse hits, and parts of the text can be highlighted -class Para: - # - def __init__(self): - self.words = [] # The words - self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c' - self.indent_left = self.indent_right = self.indent_hang = 0 - # Final lay-out parameters, may change - self.left = self.top = self.right = self.bottom = \ - self.width = self.height = self.lines = None - # - # Add a word, computing size information for it. - # Words may also be added manually by appending to self.words - # Each word should be a 7-tuple: - # (font, text, width, space, stretch, ascent, descent) - def addword(self, d, font, text, space, stretch): - if font is not None: - d.setfont(font) - width = d.textwidth(text) - ascent = d.baseline() - descent = d.lineheight() - ascent - spw = d.textwidth(' ') - space = space * spw - stretch = stretch * spw - tuple = (font, text, width, space, stretch, ascent, descent) - self.words.append(tuple) - # - # Hooks to begin and end anchors -- insert numbers in the word list! - def bgn_anchor(self, id): - self.words.append(id) - # - def end_anchor(self, id): - self.words.append(0) - # - # Return the total length (width) of the text added so far, in pixels - def getlength(self): - total = 0 - for word in self.words: - if type(word) is not Int: - total = total + word[2] + word[3] - return total - # - # Tab to a given position (relative to the current left indent): - # remove all stretch, add fixed space up to the new indent. - # If the current position is already at the tab stop, - # don't add any new space (but still remove the stretch) - def tabto(self, tab): - total = 0 - as, de = 1, 0 - for i in range(len(self.words)): - word = self.words[i] - if type(word) is Int: continue - (fo, te, wi, sp, st, as, de) = word - self.words[i] = (fo, te, wi, sp, 0, as, de) - total = total + wi + sp - if total < tab: - self.words.append((None, '', 0, tab-total, 0, as, de)) - # - # Make a hanging tag: tab to hang, increment indent_left by hang, - # and reset indent_hang to -hang - def makehangingtag(self, hang): - self.tabto(hang) - self.indent_left = self.indent_left + hang - self.indent_hang = -hang - # - # Decide where the line breaks will be given some screen width - def layout(self, linewidth): - self.width = linewidth - height = 0 - self.lines = lines = [] - avail1 = self.width - self.indent_left - self.indent_right - avail = avail1 - self.indent_hang - words = self.words - i = 0 - n = len(words) - lastfont = None - while i < n: - firstfont = lastfont - charcount = 0 - width = 0 - stretch = 0 - ascent = 0 - descent = 0 - lsp = 0 - j = i - while i < n: - word = words[i] - if type(word) is Int: - if word > 0 and width >= avail: - break - i = i+1 - continue - fo, te, wi, sp, st, as, de = word - if width + wi > avail and width > 0 and wi > 0: - break - if fo is not None: - lastfont = fo - if width == 0: - firstfont = fo - charcount = charcount + len(te) + (sp > 0) - width = width + wi + sp - lsp = sp - stretch = stretch + st - lst = st - ascent = max(ascent, as) - descent = max(descent, de) - i = i+1 - while i > j and type(words[i-1]) is Int and \ - words[i-1] > 0: i = i-1 - width = width - lsp - if i < n: - stretch = stretch - lst - else: - stretch = 0 - tuple = i-j, firstfont, charcount, width, stretch, \ - ascent, descent - lines.append(tuple) - height = height + ascent + descent - avail = avail1 - self.height = height - # - # Call a function for all words in a line - def visit(self, wordfunc, anchorfunc): - avail1 = self.width - self.indent_left - self.indent_right - avail = avail1 - self.indent_hang - v = self.top - i = 0 - for tuple in self.lines: - wordcount, firstfont, charcount, width, stretch, \ - ascent, descent = tuple - h = self.left + self.indent_left - if i == 0: h = h + self.indent_hang - extra = 0 - if self.just == 'r': h = h + avail - width - elif self.just == 'c': h = h + (avail - width) / 2 - elif self.just == 'lr' and stretch > 0: - extra = avail - width - v2 = v + ascent + descent - for j in range(i, i+wordcount): - word = self.words[j] - if type(word) is Int: - ok = anchorfunc(self, tuple, word, \ - h, v) - if ok is not None: return ok - continue - fo, te, wi, sp, st, as, de = word - if extra > 0 and stretch > 0: - ex = extra * st / stretch - extra = extra - ex - stretch = stretch - st - else: - ex = 0 - h2 = h + wi + sp + ex - ok = wordfunc(self, tuple, word, h, v, \ - h2, v2, (j==i), (j==i+wordcount-1)) - if ok is not None: return ok - h = h2 - v = v2 - i = i + wordcount - avail = avail1 - # - # Render a paragraph in "drawing object" d, using the rectangle - # given by (left, top, right) with an unspecified bottom. - # Return the computed bottom of the text. - def render(self, d, left, top, right): - if self.width != right-left: - self.layout(right-left) - self.left = left - self.top = top - self.right = right - self.bottom = self.top + self.height - self.anchorid = 0 - try: - self.d = d - self.visit(self.__class__._renderword, \ - self.__class__._renderanchor) - finally: - self.d = None - return self.bottom - # - def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast): - if word[0] is not None: self.d.setfont(word[0]) - baseline = v + tuple[5] - self.d.text((h, baseline - word[5]), word[1]) - if self.anchorid > 0: - self.d.line((h, baseline+2), (h2, baseline+2)) - # - def _renderanchor(self, tuple, word, h, v): - self.anchorid = word - # - # Return which anchor(s) was hit by the mouse - def hitcheck(self, mouseh, mousev): - self.mouseh = mouseh - self.mousev = mousev - self.anchorid = 0 - self.hits = [] - self.visit(self.__class__._hitcheckword, \ - self.__class__._hitcheckanchor) - return self.hits - # - def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast): - if self.anchorid > 0 and h <= self.mouseh <= h2 and \ - v <= self.mousev <= v2: - self.hits.append(self.anchorid) - # - def _hitcheckanchor(self, tuple, word, h, v): - self.anchorid = word - # - # Return whether the given anchor id is present - def hasanchor(self, id): - return id in self.words or -id in self.words - # - # Extract the raw text from the word list, substituting one space - # for non-empty inter-word space, and terminating with '\n' - def extract(self): - text = '' - for w in self.words: - if type(w) is not Int: - word = w[1] - if w[3]: word = word + ' ' - text = text + word - return text + '\n' - # - # Return which character position was hit by the mouse, as - # an offset in the entire text as returned by extract(). - # Return None if the mouse was not in this paragraph - def whereis(self, d, mouseh, mousev): - if mousev < self.top or mousev > self.bottom: - return None - self.mouseh = mouseh - self.mousev = mousev - self.lastfont = None - self.charcount = 0 - try: - self.d = d - return self.visit(self.__class__._whereisword, \ - self.__class__._whereisanchor) - finally: - self.d = None - # - def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): - fo, te, wi, sp, st, as, de = word - if fo is not None: self.lastfont = fo - h = h1 - if isfirst: h1 = 0 - if islast: h2 = 999999 - if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2): - self.charcount = self.charcount + len(te) + (sp > 0) - return - if self.lastfont is not None: - self.d.setfont(self.lastfont) - cc = 0 - for c in te: - cw = self.d.textwidth(c) - if self.mouseh <= h + cw/2: - return self.charcount + cc - cc = cc+1 - h = h+cw - self.charcount = self.charcount + cc - if self.mouseh <= (h+h2) / 2: - return self.charcount - else: - return self.charcount + 1 - # - def _whereisanchor(self, tuple, word, h, v): - pass - # - # Return screen position corresponding to position in paragraph. - # Return tuple (h, vtop, vbaseline, vbottom). - # This is more or less the inverse of whereis() - def screenpos(self, d, pos): - if pos < 0: - ascent, descent = self.lines[0][5:7] - return self.left, self.top, self.top + ascent, \ - self.top + ascent + descent - self.pos = pos - self.lastfont = None - try: - self.d = d - ok = self.visit(self.__class__._screenposword, \ - self.__class__._screenposanchor) - finally: - self.d = None - if ok is None: - ascent, descent = self.lines[-1][5:7] - ok = self.right, self.bottom - ascent - descent, \ - self.bottom - descent, self.bottom - return ok - # - def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): - fo, te, wi, sp, st, as, de = word - if fo is not None: self.lastfont = fo - cc = len(te) + (sp > 0) - if self.pos > cc: - self.pos = self.pos - cc - return - if self.pos < cc: - self.d.setfont(self.lastfont) - h = h1 + self.d.textwidth(te[:self.pos]) - else: - h = h2 - ascent, descent = tuple[5:7] - return h, v1, v1+ascent, v2 - # - def _screenposanchor(self, tuple, word, h, v): - pass - # - # Invert the stretch of text between pos1 and pos2. - # If pos1 is None, the beginning is implied; - # if pos2 is None, the end is implied. - # Undoes its own effect when called again with the same arguments - def invert(self, d, pos1, pos2): - if pos1 is None: - pos1 = self.left, self.top, self.top, self.top - else: - pos1 = self.screenpos(d, pos1) - if pos2 is None: - pos2 = self.right, self.bottom,self.bottom,self.bottom - else: - pos2 = self.screenpos(d, pos2) - h1, top1, baseline1, bottom1 = pos1 - h2, top2, baseline2, bottom2 = pos2 - if bottom1 <= top2: - d.invert((h1, top1), (self.right, bottom1)) - h1 = self.left - if bottom1 < top2: - d.invert((h1, bottom1), (self.right, top2)) - top1, bottom1 = top2, bottom2 - d.invert((h1, top1), (h2, bottom2)) diff --git a/Lib/lib-old/addpack.py b/Lib/lib-old/addpack.py deleted file mode 100644 index 2fb2601..0000000 --- a/Lib/lib-old/addpack.py +++ /dev/null @@ -1,67 +0,0 @@ -# This module provides standard support for "packages". -# -# The idea is that large groups of related modules can be placed in -# their own subdirectory, which can be added to the Python search path -# in a relatively easy way. -# -# The current version takes a package name and searches the Python -# search path for a directory by that name, and if found adds it to -# the module search path (sys.path). It maintains a list of packages -# that have already been added so adding the same package many times -# is OK. -# -# It is intended to be used in a fairly stylized manner: each module -# that wants to use a particular package, say 'Foo', is supposed to -# contain the following code: -# -# from addpack import addpack -# addpack('Foo') -# <import modules from package Foo> -# -# Additional arguments, when present, provide additional places where -# to look for the package before trying sys.path (these may be either -# strings or lists/tuples of strings). Also, if the package name is a -# full pathname, first the last component is tried in the usual way, -# then the full pathname is tried last. If the package name is a -# *relative* pathname (UNIX: contains a slash but doesn't start with -# one), then nothing special is done. The packages "/foo/bar/bletch" -# and "bletch" are considered the same, but unrelated to "bar/bletch". -# -# If the algorithm finds more than one suitable subdirectory, all are -# added to the search path -- this makes it possible to override part -# of a package. The same path will not be added more than once. -# -# If no directory is found, ImportError is raised. - -_packs = {} # {pack: [pathname, ...], ...} - -def addpack(pack, *locations): - import os - if os.path.isabs(pack): - base = os.path.basename(pack) - else: - base = pack - if _packs.has_key(base): - return - import sys - path = [] - for loc in _flatten(locations) + sys.path: - fn = os.path.join(loc, base) - if fn not in path and os.path.isdir(fn): - path.append(fn) - if pack != base and pack not in path and os.path.isdir(pack): - path.append(pack) - if not path: raise ImportError, 'package ' + pack + ' not found' - _packs[base] = path - for fn in path: - if fn not in sys.path: - sys.path.append(fn) - -def _flatten(locations): - locs = [] - for loc in locations: - if type(loc) == type(''): - locs.append(loc) - else: - locs = locs + _flatten(loc) - return locs diff --git a/Lib/lib-old/cmp.py b/Lib/lib-old/cmp.py deleted file mode 100644 index 1146a25..0000000 --- a/Lib/lib-old/cmp.py +++ /dev/null @@ -1,63 +0,0 @@ -"""Efficiently compare files, boolean outcome only (equal / not equal). - -Tricks (used in this order): - - Files with identical type, size & mtime are assumed to be clones - - Files with different type or size cannot be identical - - We keep a cache of outcomes of earlier comparisons - - We don't fork a process to run 'cmp' but read the files ourselves -""" - -import os - -cache = {} - -def cmp(f1, f2, shallow=1): - """Compare two files, use the cache if possible. - Return 1 for identical files, 0 for different. - Raise exceptions if either file could not be statted, read, etc.""" - s1, s2 = sig(os.stat(f1)), sig(os.stat(f2)) - if s1[0] != 8 or s2[0] != 8: - # Either is a not a plain file -- always report as different - return 0 - if shallow and s1 == s2: - # type, size & mtime match -- report same - return 1 - if s1[:2] != s2[:2]: # Types or sizes differ, don't bother - # types or sizes differ -- report different - return 0 - # same type and size -- look in the cache - key = (f1, f2) - try: - cs1, cs2, outcome = cache[key] - # cache hit - if s1 == cs1 and s2 == cs2: - # cached signatures match - return outcome - # stale cached signature(s) - except KeyError: - # cache miss - pass - # really compare - outcome = do_cmp(f1, f2) - cache[key] = s1, s2, outcome - return outcome - -def sig(st): - """Return signature (i.e., type, size, mtime) from raw stat data - 0-5: st_mode, st_ino, st_dev, st_nlink, st_uid, st_gid - 6-9: st_size, st_atime, st_mtime, st_ctime""" - type = st[0] / 4096 - size = st[6] - mtime = st[8] - return type, size, mtime - -def do_cmp(f1, f2): - """Compare two files, really.""" - bufsize = 8*1024 # Could be tuned - fp1 = open(f1, 'rb') - fp2 = open(f2, 'rb') - while 1: - b1 = fp1.read(bufsize) - b2 = fp2.read(bufsize) - if b1 != b2: return 0 - if not b1: return 1 diff --git a/Lib/lib-old/cmpcache.py b/Lib/lib-old/cmpcache.py deleted file mode 100644 index 11540f8..0000000 --- a/Lib/lib-old/cmpcache.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Efficiently compare files, boolean outcome only (equal / not equal). - -Tricks (used in this order): - - Use the statcache module to avoid statting files more than once - - Files with identical type, size & mtime are assumed to be clones - - Files with different type or size cannot be identical - - We keep a cache of outcomes of earlier comparisons - - We don't fork a process to run 'cmp' but read the files ourselves -""" - -import os -from stat import * -import statcache - - -# The cache. -# -cache = {} - - -def cmp(f1, f2, shallow=1): - """Compare two files, use the cache if possible. - May raise os.error if a stat or open of either fails. - Return 1 for identical files, 0 for different. - Raise exceptions if either file could not be statted, read, etc.""" - s1, s2 = sig(statcache.stat(f1)), sig(statcache.stat(f2)) - if not S_ISREG(s1[0]) or not S_ISREG(s2[0]): - # Either is a not a plain file -- always report as different - return 0 - if shallow and s1 == s2: - # type, size & mtime match -- report same - return 1 - if s1[:2] != s2[:2]: # Types or sizes differ, don't bother - # types or sizes differ -- report different - return 0 - # same type and size -- look in the cache - key = f1 + ' ' + f2 - if cache.has_key(key): - cs1, cs2, outcome = cache[key] - # cache hit - if s1 == cs1 and s2 == cs2: - # cached signatures match - return outcome - # stale cached signature(s) - # really compare - outcome = do_cmp(f1, f2) - cache[key] = s1, s2, outcome - return outcome - -def sig(st): - """Return signature (i.e., type, size, mtime) from raw stat data.""" - return S_IFMT(st[ST_MODE]), st[ST_SIZE], st[ST_MTIME] - -def do_cmp(f1, f2): - """Compare two files, really.""" - #print ' cmp', f1, f2 # XXX remove when debugged - bufsize = 8*1024 # Could be tuned - fp1 = open(f1, 'rb') - fp2 = open(f2, 'rb') - while 1: - b1 = fp1.read(bufsize) - b2 = fp2.read(bufsize) - if b1 != b2: return 0 - if not b1: return 1 diff --git a/Lib/lib-old/codehack.py b/Lib/lib-old/codehack.py deleted file mode 100644 index 0b5e3a1..0000000 --- a/Lib/lib-old/codehack.py +++ /dev/null @@ -1,81 +0,0 @@ -# A subroutine for extracting a function name from a code object -# (with cache) - -import sys -from stat import * -import string -import os -import linecache - -# XXX The functions getcodename() and getfuncname() are now obsolete -# XXX as code and function objects now have a name attribute -- -# XXX co.co_name and f.func_name. -# XXX getlineno() is now also obsolete because of the new attribute -# XXX of code objects, co.co_firstlineno. - -# Extract the function or class name from a code object. -# This is a bit of a hack, since a code object doesn't contain -# the name directly. So what do we do: -# - get the filename (which *is* in the code object) -# - look in the code string to find the first SET_LINENO instruction -# (this must be the first instruction) -# - get the line from the file -# - if the line starts with 'class' or 'def' (after possible whitespace), -# extract the following identifier -# -# This breaks apart when the function was read from <stdin> -# or constructed by exec(), when the file is not accessible, -# and also when the file has been modified or when a line is -# continued with a backslash before the function or class name. -# -# Because this is a pretty expensive hack, a cache is kept. - -SET_LINENO = 127 # The opcode (see "opcode.h" in the Python source) -identchars = string.ascii_letters + string.digits + '_' # Identifier characters - -_namecache = {} # The cache - -def getcodename(co): - try: - return co.co_name - except AttributeError: - pass - key = `co` # arbitrary but uniquely identifying string - if _namecache.has_key(key): return _namecache[key] - filename = co.co_filename - code = co.co_code - name = '' - if ord(code[0]) == SET_LINENO: - lineno = ord(code[1]) | ord(code[2]) << 8 - line = linecache.getline(filename, lineno) - words = line.split() - if len(words) >= 2 and words[0] in ('def', 'class'): - name = words[1] - for i in range(len(name)): - if name[i] not in identchars: - name = name[:i] - break - _namecache[key] = name - return name - -# Use the above routine to find a function's name. - -def getfuncname(func): - try: - return func.func_name - except AttributeError: - pass - return getcodename(func.func_code) - -# A part of the above code to extract just the line number from a code object. - -def getlineno(co): - try: - return co.co_firstlineno - except AttributeError: - pass - code = co.co_code - if ord(code[0]) == SET_LINENO: - return ord(code[1]) | ord(code[2]) << 8 - else: - return -1 diff --git a/Lib/lib-old/dircmp.py b/Lib/lib-old/dircmp.py deleted file mode 100644 index 1e7bf2a..0000000 --- a/Lib/lib-old/dircmp.py +++ /dev/null @@ -1,202 +0,0 @@ -"""A class to build directory diff tools on.""" - -import os - -import dircache -import cmpcache -import statcache -from stat import * - -class dircmp: - """Directory comparison class.""" - - def new(self, a, b): - """Initialize.""" - self.a = a - self.b = b - # Properties that caller may change before calling self.run(): - self.hide = [os.curdir, os.pardir] # Names never to be shown - self.ignore = ['RCS', 'tags'] # Names ignored in comparison - - return self - - def run(self): - """Compare everything except common subdirectories.""" - self.a_list = filter(dircache.listdir(self.a), self.hide) - self.b_list = filter(dircache.listdir(self.b), self.hide) - self.a_list.sort() - self.b_list.sort() - self.phase1() - self.phase2() - self.phase3() - - def phase1(self): - """Compute common names.""" - self.a_only = [] - self.common = [] - for x in self.a_list: - if x in self.b_list: - self.common.append(x) - else: - self.a_only.append(x) - - self.b_only = [] - for x in self.b_list: - if x not in self.common: - self.b_only.append(x) - - def phase2(self): - """Distinguish files, directories, funnies.""" - self.common_dirs = [] - self.common_files = [] - self.common_funny = [] - - for x in self.common: - a_path = os.path.join(self.a, x) - b_path = os.path.join(self.b, x) - - ok = 1 - try: - a_stat = statcache.stat(a_path) - except os.error, why: - # print 'Can\'t stat', a_path, ':', why[1] - ok = 0 - try: - b_stat = statcache.stat(b_path) - except os.error, why: - # print 'Can\'t stat', b_path, ':', why[1] - ok = 0 - - if ok: - a_type = S_IFMT(a_stat[ST_MODE]) - b_type = S_IFMT(b_stat[ST_MODE]) - if a_type != b_type: - self.common_funny.append(x) - elif S_ISDIR(a_type): - self.common_dirs.append(x) - elif S_ISREG(a_type): - self.common_files.append(x) - else: - self.common_funny.append(x) - else: - self.common_funny.append(x) - - def phase3(self): - """Find out differences between common files.""" - xx = cmpfiles(self.a, self.b, self.common_files) - self.same_files, self.diff_files, self.funny_files = xx - - def phase4(self): - """Find out differences between common subdirectories. - A new dircmp object is created for each common subdirectory, - these are stored in a dictionary indexed by filename. - The hide and ignore properties are inherited from the parent.""" - self.subdirs = {} - for x in self.common_dirs: - a_x = os.path.join(self.a, x) - b_x = os.path.join(self.b, x) - self.subdirs[x] = newdd = dircmp().new(a_x, b_x) - newdd.hide = self.hide - newdd.ignore = self.ignore - newdd.run() - - def phase4_closure(self): - """Recursively call phase4() on subdirectories.""" - self.phase4() - for x in self.subdirs.keys(): - self.subdirs[x].phase4_closure() - - def report(self): - """Print a report on the differences between a and b.""" - # Assume that phases 1 to 3 have been executed - # Output format is purposely lousy - print 'diff', self.a, self.b - if self.a_only: - print 'Only in', self.a, ':', self.a_only - if self.b_only: - print 'Only in', self.b, ':', self.b_only - if self.same_files: - print 'Identical files :', self.same_files - if self.diff_files: - print 'Differing files :', self.diff_files - if self.funny_files: - print 'Trouble with common files :', self.funny_files - if self.common_dirs: - print 'Common subdirectories :', self.common_dirs - if self.common_funny: - print 'Common funny cases :', self.common_funny - - def report_closure(self): - """Print reports on self and on subdirs. - If phase 4 hasn't been done, no subdir reports are printed.""" - self.report() - try: - x = self.subdirs - except AttributeError: - return # No subdirectories computed - for x in self.subdirs.keys(): - print - self.subdirs[x].report_closure() - - def report_phase4_closure(self): - """Report and do phase 4 recursively.""" - self.report() - self.phase4() - for x in self.subdirs.keys(): - print - self.subdirs[x].report_phase4_closure() - - -def cmpfiles(a, b, common): - """Compare common files in two directories. - Return: - - files that compare equal - - files that compare different - - funny cases (can't stat etc.)""" - - res = ([], [], []) - for x in common: - res[cmp(os.path.join(a, x), os.path.join(b, x))].append(x) - return res - - -def cmp(a, b): - """Compare two files. - Return: - 0 for equal - 1 for different - 2 for funny cases (can't stat, etc.)""" - - try: - if cmpcache.cmp(a, b): return 0 - return 1 - except os.error: - return 2 - - -def filter(list, skip): - """Return a copy with items that occur in skip removed.""" - - result = [] - for item in list: - if item not in skip: result.append(item) - return result - - -def demo(): - """Demonstration and testing.""" - - import sys - import getopt - options, args = getopt.getopt(sys.argv[1:], 'r') - if len(args) != 2: - raise getopt.error, 'need exactly two args' - dd = dircmp().new(args[0], args[1]) - dd.run() - if ('-r', '') in options: - dd.report_phase4_closure() - else: - dd.report() - -if __name__ == "__main__": - demo() diff --git a/Lib/lib-old/dump.py b/Lib/lib-old/dump.py deleted file mode 100644 index 60bdba8..0000000 --- a/Lib/lib-old/dump.py +++ /dev/null @@ -1,63 +0,0 @@ -# Module 'dump' -# -# Print python code that reconstructs a variable. -# This only works in certain cases. -# -# It works fine for: -# - ints and floats (except NaNs and other weird things) -# - strings -# - compounds and lists, provided it works for all their elements -# - imported modules, provided their name is the module name -# -# It works for top-level dictionaries but not for dictionaries -# contained in other objects (could be made to work with some hassle -# though). -# -# It does not work for functions (all sorts), classes, class objects, -# windows, files etc. -# -# Finally, objects referenced by more than one name or contained in more -# than one other object lose their sharing property (this is bad for -# strings used as exception identifiers, for instance). - -# Dump a whole symbol table -# -def dumpsymtab(dict): - for key in dict.keys(): - dumpvar(key, dict[key]) - -# Dump a single variable -# -def dumpvar(name, x): - import sys - t = type(x) - if t == type({}): - print name, '= {}' - for key in x.keys(): - item = x[key] - if not printable(item): - print '#', - print name, '[', `key`, '] =', `item` - elif t in (type(''), type(0), type(0.0), type([]), type(())): - if not printable(x): - print '#', - print name, '=', `x` - elif t == type(sys): - print 'import', name, '#', x - else: - print '#', name, '=', x - -# check if a value is printable in a way that can be read back with input() -# -def printable(x): - t = type(x) - if t in (type(''), type(0), type(0.0)): - return 1 - if t in (type([]), type(())): - for item in x: - if not printable(item): - return 0 - return 1 - if x == {}: - return 1 - return 0 diff --git a/Lib/lib-old/find.py b/Lib/lib-old/find.py deleted file mode 100644 index 39ad771..0000000 --- a/Lib/lib-old/find.py +++ /dev/null @@ -1,26 +0,0 @@ -import fnmatch -import os - -_debug = 0 - -_prune = ['(*)'] - -def find(pattern, dir = os.curdir): - list = [] - names = os.listdir(dir) - names.sort() - for name in names: - if name in (os.curdir, os.pardir): - continue - fullname = os.path.join(dir, name) - if fnmatch.fnmatch(name, pattern): - list.append(fullname) - if os.path.isdir(fullname) and not os.path.islink(fullname): - for p in _prune: - if fnmatch.fnmatch(name, p): - if _debug: print "skip", `fullname` - break - else: - if _debug: print "descend into", `fullname` - list = list + find(pattern, fullname) - return list diff --git a/Lib/lib-old/fmt.py b/Lib/lib-old/fmt.py deleted file mode 100644 index 997d37a..0000000 --- a/Lib/lib-old/fmt.py +++ /dev/null @@ -1,623 +0,0 @@ -# Text formatting abstractions -# Note -- this module is obsolete, it's too slow anyway - - -import string -import Para - - -# A formatter back-end object has one method that is called by the formatter: -# addpara(p), where p is a paragraph object. For example: - - -# Formatter back-end to do nothing at all with the paragraphs -class NullBackEnd: - # - def __init__(self): - pass - # - def addpara(self, p): - pass - # - def bgn_anchor(self, id): - pass - # - def end_anchor(self, id): - pass - - -# Formatter back-end to collect the paragraphs in a list -class SavingBackEnd(NullBackEnd): - # - def __init__(self): - self.paralist = [] - # - def addpara(self, p): - self.paralist.append(p) - # - def hitcheck(self, h, v): - hits = [] - for p in self.paralist: - if p.top <= v <= p.bottom: - for id in p.hitcheck(h, v): - if id not in hits: - hits.append(id) - return hits - # - def extract(self): - text = '' - for p in self.paralist: - text = text + (p.extract()) - return text - # - def extractpart(self, long1, long2): - if long1 > long2: long1, long2 = long2, long1 - para1, pos1 = long1 - para2, pos2 = long2 - text = '' - while para1 < para2: - ptext = self.paralist[para1].extract() - text = text + ptext[pos1:] - pos1 = 0 - para1 = para1 + 1 - ptext = self.paralist[para2].extract() - return text + ptext[pos1:pos2] - # - def whereis(self, d, h, v): - total = 0 - for i in range(len(self.paralist)): - p = self.paralist[i] - result = p.whereis(d, h, v) - if result is not None: - return i, result - return None - # - def roundtowords(self, long1, long2): - i, offset = long1 - text = self.paralist[i].extract() - while offset > 0 and text[offset-1] != ' ': offset = offset-1 - long1 = i, offset - # - i, offset = long2 - text = self.paralist[i].extract() - n = len(text) - while offset < n-1 and text[offset] != ' ': offset = offset+1 - long2 = i, offset - # - return long1, long2 - # - def roundtoparagraphs(self, long1, long2): - long1 = long1[0], 0 - long2 = long2[0], len(self.paralist[long2[0]].extract()) - return long1, long2 - - -# Formatter back-end to send the text directly to the drawing object -class WritingBackEnd(NullBackEnd): - # - def __init__(self, d, width): - self.d = d - self.width = width - self.lineno = 0 - # - def addpara(self, p): - self.lineno = p.render(self.d, 0, self.lineno, self.width) - - -# A formatter receives a stream of formatting instructions and assembles -# these into a stream of paragraphs on to a back-end. The assembly is -# parametrized by a text measurement object, which must match the output -# operations of the back-end. The back-end is responsible for splitting -# paragraphs up in lines of a given maximum width. (This is done because -# in a windowing environment, when the window size changes, there is no -# need to redo the assembly into paragraphs, but the splitting into lines -# must be done taking the new window size into account.) - - -# Formatter base class. Initialize it with a text measurement object, -# which is used for text measurements, and a back-end object, -# which receives the completed paragraphs. The formatting methods are: -# setfont(font) -# setleftindent(nspaces) -# setjust(type) where type is 'l', 'c', 'r', or 'lr' -# flush() -# vspace(nlines) -# needvspace(nlines) -# addword(word, nspaces) -class BaseFormatter: - # - def __init__(self, d, b): - # Drawing object used for text measurements - self.d = d - # - # BackEnd object receiving completed paragraphs - self.b = b - # - # Parameters of the formatting model - self.leftindent = 0 - self.just = 'l' - self.font = None - self.blanklines = 0 - # - # Parameters derived from the current font - self.space = d.textwidth(' ') - self.line = d.lineheight() - self.ascent = d.baseline() - self.descent = self.line - self.ascent - # - # Parameter derived from the default font - self.n_space = self.space - # - # Current paragraph being built - self.para = None - self.nospace = 1 - # - # Font to set on the next word - self.nextfont = None - # - def newpara(self): - return Para.Para() - # - def setfont(self, font): - if font is None: return - self.font = self.nextfont = font - d = self.d - d.setfont(font) - self.space = d.textwidth(' ') - self.line = d.lineheight() - self.ascent = d.baseline() - self.descent = self.line - self.ascent - # - def setleftindent(self, nspaces): - self.leftindent = int(self.n_space * nspaces) - if self.para: - hang = self.leftindent - self.para.indent_left - if hang > 0 and self.para.getlength() <= hang: - self.para.makehangingtag(hang) - self.nospace = 1 - else: - self.flush() - # - def setrightindent(self, nspaces): - self.rightindent = int(self.n_space * nspaces) - if self.para: - self.para.indent_right = self.rightindent - self.flush() - # - def setjust(self, just): - self.just = just - if self.para: - self.para.just = self.just - # - def flush(self): - if self.para: - self.b.addpara(self.para) - self.para = None - if self.font is not None: - self.d.setfont(self.font) - self.nospace = 1 - # - def vspace(self, nlines): - self.flush() - if nlines > 0: - self.para = self.newpara() - tuple = None, '', 0, 0, 0, int(nlines*self.line), 0 - self.para.words.append(tuple) - self.flush() - self.blanklines = self.blanklines + nlines - # - def needvspace(self, nlines): - self.flush() # Just to be sure - if nlines > self.blanklines: - self.vspace(nlines - self.blanklines) - # - def addword(self, text, space): - if self.nospace and not text: - return - self.nospace = 0 - self.blanklines = 0 - if not self.para: - self.para = self.newpara() - self.para.indent_left = self.leftindent - self.para.just = self.just - self.nextfont = self.font - space = int(space * self.space) - self.para.words.append((self.nextfont, text, - self.d.textwidth(text), space, space, - self.ascent, self.descent)) - self.nextfont = None - # - def bgn_anchor(self, id): - if not self.para: - self.nospace = 0 - self.addword('', 0) - self.para.bgn_anchor(id) - # - def end_anchor(self, id): - if not self.para: - self.nospace = 0 - self.addword('', 0) - self.para.end_anchor(id) - - -# Measuring object for measuring text as viewed on a tty -class NullMeasurer: - # - def __init__(self): - pass - # - def setfont(self, font): - pass - # - def textwidth(self, text): - return len(text) - # - def lineheight(self): - return 1 - # - def baseline(self): - return 0 - - -# Drawing object for writing plain ASCII text to a file -class FileWriter: - # - def __init__(self, fp): - self.fp = fp - self.lineno, self.colno = 0, 0 - # - def setfont(self, font): - pass - # - def text(self, (h, v), str): - if not str: return - if '\n' in str: - raise ValueError, 'can\'t write \\n' - while self.lineno < v: - self.fp.write('\n') - self.colno, self.lineno = 0, self.lineno + 1 - while self.lineno > v: - # XXX This should never happen... - self.fp.write('\033[A') # ANSI up arrow - self.lineno = self.lineno - 1 - if self.colno < h: - self.fp.write(' ' * (h - self.colno)) - elif self.colno > h: - self.fp.write('\b' * (self.colno - h)) - self.colno = h - self.fp.write(str) - self.colno = h + len(str) - - -# Formatting class to do nothing at all with the data -class NullFormatter(BaseFormatter): - # - def __init__(self): - d = NullMeasurer() - b = NullBackEnd() - BaseFormatter.__init__(self, d, b) - - -# Formatting class to write directly to a file -class WritingFormatter(BaseFormatter): - # - def __init__(self, fp, width): - dm = NullMeasurer() - dw = FileWriter(fp) - b = WritingBackEnd(dw, width) - BaseFormatter.__init__(self, dm, b) - self.blanklines = 1 - # - # Suppress multiple blank lines - def needvspace(self, nlines): - BaseFormatter.needvspace(self, min(1, nlines)) - - -# A "FunnyFormatter" writes ASCII text with a twist: *bold words*, -# _italic text_ and _underlined words_, and `quoted text'. -# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman, -# italic, bold, underline, quote). -# Moreover, if the font is in upper case, the text is converted to -# UPPER CASE. -class FunnyFormatter(WritingFormatter): - # - def flush(self): - if self.para: finalize(self.para) - WritingFormatter.flush(self) - - -# Surrounds *bold words* and _italic text_ in a paragraph with -# appropriate markers, fixing the size (assuming these characters' -# width is 1). -openchar = \ - {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'} -closechar = \ - {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''} -def finalize(para): - oldfont = curfont = 'r' - para.words.append(('r', '', 0, 0, 0, 0)) # temporary, deleted at end - for i in range(len(para.words)): - fo, te, wi = para.words[i][:3] - if fo is not None: curfont = fo - if curfont != oldfont: - if closechar.has_key(oldfont): - c = closechar[oldfont] - j = i-1 - while j > 0 and para.words[j][1] == '': j = j-1 - fo1, te1, wi1 = para.words[j][:3] - te1 = te1 + c - wi1 = wi1 + len(c) - para.words[j] = (fo1, te1, wi1) + \ - para.words[j][3:] - if openchar.has_key(curfont) and te: - c = openchar[curfont] - te = c + te - wi = len(c) + wi - para.words[i] = (fo, te, wi) + \ - para.words[i][3:] - if te: oldfont = curfont - else: oldfont = 'r' - if curfont in string.uppercase: - te = string.upper(te) - para.words[i] = (fo, te, wi) + para.words[i][3:] - del para.words[-1] - - -# Formatter back-end to draw the text in a window. -# This has an option to draw while the paragraphs are being added, -# to minimize the delay before the user sees anything. -# This manages the entire "document" of the window. -class StdwinBackEnd(SavingBackEnd): - # - def __init__(self, window, drawnow): - self.window = window - self.drawnow = drawnow - self.width = window.getwinsize()[0] - self.selection = None - self.height = 0 - window.setorigin(0, 0) - window.setdocsize(0, 0) - self.d = window.begindrawing() - SavingBackEnd.__init__(self) - # - def finish(self): - self.d.close() - self.d = None - self.window.setdocsize(0, self.height) - # - def addpara(self, p): - self.paralist.append(p) - if self.drawnow: - self.height = \ - p.render(self.d, 0, self.height, self.width) - else: - p.layout(self.width) - p.left = 0 - p.top = self.height - p.right = self.width - p.bottom = self.height + p.height - self.height = p.bottom - # - def resize(self): - self.window.change((0, 0), (self.width, self.height)) - self.width = self.window.getwinsize()[0] - self.height = 0 - for p in self.paralist: - p.layout(self.width) - p.left = 0 - p.top = self.height - p.right = self.width - p.bottom = self.height + p.height - self.height = p.bottom - self.window.change((0, 0), (self.width, self.height)) - self.window.setdocsize(0, self.height) - # - def redraw(self, area): - d = self.window.begindrawing() - (left, top), (right, bottom) = area - d.erase(area) - d.cliprect(area) - for p in self.paralist: - if top < p.bottom and p.top < bottom: - v = p.render(d, p.left, p.top, p.right) - if self.selection: - self.invert(d, self.selection) - d.close() - # - def setselection(self, new): - if new: - long1, long2 = new - pos1 = long1[:3] - pos2 = long2[:3] - new = pos1, pos2 - if new != self.selection: - d = self.window.begindrawing() - if self.selection: - self.invert(d, self.selection) - if new: - self.invert(d, new) - d.close() - self.selection = new - # - def getselection(self): - return self.selection - # - def extractselection(self): - if self.selection: - a, b = self.selection - return self.extractpart(a, b) - else: - return None - # - def invert(self, d, region): - long1, long2 = region - if long1 > long2: long1, long2 = long2, long1 - para1, pos1 = long1 - para2, pos2 = long2 - while para1 < para2: - self.paralist[para1].invert(d, pos1, None) - pos1 = None - para1 = para1 + 1 - self.paralist[para2].invert(d, pos1, pos2) - # - def search(self, prog): - import re, string - if type(prog) is type(''): - prog = re.compile(string.lower(prog)) - if self.selection: - iold = self.selection[0][0] - else: - iold = -1 - hit = None - for i in range(len(self.paralist)): - if i == iold or i < iold and hit: - continue - p = self.paralist[i] - text = string.lower(p.extract()) - match = prog.search(text) - if match: - a, b = match.group(0) - long1 = i, a - long2 = i, b - hit = long1, long2 - if i > iold: - break - if hit: - self.setselection(hit) - i = hit[0][0] - p = self.paralist[i] - self.window.show((p.left, p.top), (p.right, p.bottom)) - return 1 - else: - return 0 - # - def showanchor(self, id): - for i in range(len(self.paralist)): - p = self.paralist[i] - if p.hasanchor(id): - long1 = i, 0 - long2 = i, len(p.extract()) - hit = long1, long2 - self.setselection(hit) - self.window.show( - (p.left, p.top), (p.right, p.bottom)) - break - - -# GL extensions - -class GLFontCache: - # - def __init__(self): - self.reset() - self.setfont('') - # - def reset(self): - self.fontkey = None - self.fonthandle = None - self.fontinfo = None - self.fontcache = {} - # - def close(self): - self.reset() - # - def setfont(self, fontkey): - if fontkey == '': - fontkey = 'Times-Roman 12' - elif ' ' not in fontkey: - fontkey = fontkey + ' 12' - if fontkey == self.fontkey: - return - if self.fontcache.has_key(fontkey): - handle = self.fontcache[fontkey] - else: - import string - i = string.index(fontkey, ' ') - name, sizestr = fontkey[:i], fontkey[i:] - size = eval(sizestr) - key1 = name + ' 1' - key = name + ' ' + `size` - # NB key may differ from fontkey! - if self.fontcache.has_key(key): - handle = self.fontcache[key] - else: - if self.fontcache.has_key(key1): - handle = self.fontcache[key1] - else: - import fm - handle = fm.findfont(name) - self.fontcache[key1] = handle - handle = handle.scalefont(size) - self.fontcache[fontkey] = \ - self.fontcache[key] = handle - self.fontkey = fontkey - if self.fonthandle != handle: - self.fonthandle = handle - self.fontinfo = handle.getfontinfo() - handle.setfont() - - -class GLMeasurer(GLFontCache): - # - def textwidth(self, text): - return self.fonthandle.getstrwidth(text) - # - def baseline(self): - return self.fontinfo[6] - self.fontinfo[3] - # - def lineheight(self): - return self.fontinfo[6] - - -class GLWriter(GLFontCache): - # - # NOTES: - # (1) Use gl.ortho2 to use X pixel coordinates! - # - def text(self, (h, v), text): - import gl, fm - gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3]) - fm.prstr(text) - # - def setfont(self, fontkey): - oldhandle = self.fonthandle - GLFontCache.setfont(fontkey) - if self.fonthandle != oldhandle: - handle.setfont() - - -class GLMeasurerWriter(GLMeasurer, GLWriter): - pass - - -class GLBackEnd(SavingBackEnd): - # - def __init__(self, wid): - import gl - gl.winset(wid) - self.wid = wid - self.width = gl.getsize()[1] - self.height = 0 - self.d = GLMeasurerWriter() - SavingBackEnd.__init__(self) - # - def finish(self): - pass - # - def addpara(self, p): - self.paralist.append(p) - self.height = p.render(self.d, 0, self.height, self.width) - # - def redraw(self): - import gl - gl.winset(self.wid) - width = gl.getsize()[1] - if width != self.width: - setdocsize = 1 - self.width = width - for p in self.paralist: - p.top = p.bottom = None - d = self.d - v = 0 - for p in self.paralist: - v = p.render(d, 0, v, width) diff --git a/Lib/lib-old/grep.py b/Lib/lib-old/grep.py deleted file mode 100644 index 2926746..0000000 --- a/Lib/lib-old/grep.py +++ /dev/null @@ -1,79 +0,0 @@ -# 'grep' - -import regex -from regex_syntax import * - -opt_show_where = 0 -opt_show_filename = 0 -opt_show_lineno = 1 - -def grep(pat, *files): - return ggrep(RE_SYNTAX_GREP, pat, files) - -def egrep(pat, *files): - return ggrep(RE_SYNTAX_EGREP, pat, files) - -def emgrep(pat, *files): - return ggrep(RE_SYNTAX_EMACS, pat, files) - -def ggrep(syntax, pat, files): - if len(files) == 1 and type(files[0]) == type([]): - files = files[0] - global opt_show_filename - opt_show_filename = (len(files) != 1) - syntax = regex.set_syntax(syntax) - try: - prog = regex.compile(pat) - finally: - syntax = regex.set_syntax(syntax) - for filename in files: - fp = open(filename, 'r') - lineno = 0 - while 1: - line = fp.readline() - if not line: break - lineno = lineno + 1 - if prog.search(line) >= 0: - showline(filename, lineno, line, prog) - fp.close() - -def pgrep(pat, *files): - if len(files) == 1 and type(files[0]) == type([]): - files = files[0] - global opt_show_filename - opt_show_filename = (len(files) != 1) - import re - prog = re.compile(pat) - for filename in files: - fp = open(filename, 'r') - lineno = 0 - while 1: - line = fp.readline() - if not line: break - lineno = lineno + 1 - if prog.search(line): - showline(filename, lineno, line, prog) - fp.close() - -def showline(filename, lineno, line, prog): - if line[-1:] == '\n': line = line[:-1] - if opt_show_lineno: - prefix = `lineno`.rjust(3) + ': ' - else: - prefix = '' - if opt_show_filename: - prefix = filename + ': ' + prefix - print prefix + line - if opt_show_where: - start, end = prog.regs()[0] - line = line[:start] - if '\t' not in line: - prefix = ' ' * (len(prefix) + start) - else: - prefix = ' ' * len(prefix) - for c in line: - if c != '\t': c = ' ' - prefix = prefix + c - if start == end: prefix = prefix + '\\' - else: prefix = prefix + '^'*(end-start) - print prefix diff --git a/Lib/lib-old/lockfile.py b/Lib/lib-old/lockfile.py deleted file mode 100644 index cde9b48..0000000 --- a/Lib/lib-old/lockfile.py +++ /dev/null @@ -1,15 +0,0 @@ -import struct, fcntl - -def writelock(f): - _lock(f, fcntl.F_WRLCK) - -def readlock(f): - _lock(f, fcntl.F_RDLCK) - -def unlock(f): - _lock(f, fcntl.F_UNLCK) - -def _lock(f, op): - dummy = fcntl.fcntl(f.fileno(), fcntl.F_SETLKW, - struct.pack('2h8l', op, - 0, 0, 0, 0, 0, 0, 0, 0, 0)) diff --git a/Lib/lib-old/newdir.py b/Lib/lib-old/newdir.py deleted file mode 100644 index 356becc..0000000 --- a/Lib/lib-old/newdir.py +++ /dev/null @@ -1,73 +0,0 @@ -# New dir() function - - -# This should be the new dir(), except that it should still list -# the current local name space by default - -def listattrs(x): - try: - dictkeys = x.__dict__.keys() - except (AttributeError, TypeError): - dictkeys = [] - # - try: - methods = x.__methods__ - except (AttributeError, TypeError): - methods = [] - # - try: - members = x.__members__ - except (AttributeError, TypeError): - members = [] - # - try: - the_class = x.__class__ - except (AttributeError, TypeError): - the_class = None - # - try: - bases = x.__bases__ - except (AttributeError, TypeError): - bases = () - # - total = dictkeys + methods + members - if the_class: - # It's a class instace; add the class's attributes - # that are functions (methods)... - class_attrs = listattrs(the_class) - class_methods = [] - for name in class_attrs: - if is_function(getattr(the_class, name)): - class_methods.append(name) - total = total + class_methods - elif bases: - # It's a derived class; add the base class attributes - for base in bases: - base_attrs = listattrs(base) - total = total + base_attrs - total.sort() - return total - i = 0 - while i+1 < len(total): - if total[i] == total[i+1]: - del total[i+1] - else: - i = i+1 - return total - - -# Helper to recognize functions - -def is_function(x): - return type(x) == type(is_function) - - -# Approximation of builtin dir(); but note that this lists the user's -# variables by default, not the current local name space. - -def dir(x = None): - if x is not None: - return listattrs(x) - else: - import __main__ - return listattrs(__main__) diff --git a/Lib/lib-old/ni.py b/Lib/lib-old/ni.py deleted file mode 100644 index 074f989..0000000 --- a/Lib/lib-old/ni.py +++ /dev/null @@ -1,433 +0,0 @@ -"""New import scheme with package support. - -Quick Reference ---------------- - -- To enable package support, execute "import ni" before importing any - packages. Importing this module automatically installs the relevant - import hooks. - -- To create a package named spam containing sub-modules ham, bacon and - eggs, create a directory spam somewhere on Python's module search - path (i.e. spam's parent directory must be one of the directories in - sys.path or $PYTHONPATH); then create files ham.py, bacon.py and - eggs.py inside spam. - -- To import module ham from package spam and use function hamneggs() - from that module, you can either do - - import spam.ham # *not* "import spam" !!! - spam.ham.hamneggs() - - or - - from spam import ham - ham.hamneggs() - - or - - from spam.ham import hamneggs - hamneggs() - -- Importing just "spam" does not do what you expect: it creates an - empty package named spam if one does not already exist, but it does - not import spam's submodules. The only submodule that is guaranteed - to be imported is spam.__init__, if it exists. Note that - spam.__init__ is a submodule of package spam. It can reference to - spam's namespace via the '__.' prefix, for instance - - __.spam_inited = 1 # Set a package-level variable - - - -Theory of Operation -------------------- - -A Package is a module that can contain other modules. Packages can be -nested. Package introduce dotted names for modules, like P.Q.M, which -could correspond to a file P/Q/M.py found somewhere on sys.path. It -is possible to import a package itself, though this makes little sense -unless the package contains a module called __init__. - -A package has two variables that control the namespace used for -packages and modules, both initialized to sensible defaults the first -time the package is referenced. - -(1) A package's *module search path*, contained in the per-package -variable __path__, defines a list of *directories* where submodules or -subpackages of the package are searched. It is initialized to the -directory containing the package. Setting this variable to None makes -the module search path default to sys.path (this is not quite the same -as setting it to sys.path, since the latter won't track later -assignments to sys.path). - -(2) A package's *import domain*, contained in the per-package variable -__domain__, defines a list of *packages* that are searched (using -their respective module search paths) to satisfy imports. It is -initialized to the list consisting of the package itself, its parent -package, its parent's parent, and so on, ending with the root package -(the nameless package containing all top-level packages and modules, -whose module search path is None, implying sys.path). - -The default domain implements a search algorithm called "expanding -search". An alternative search algorithm called "explicit search" -fixes the import search path to contain only the root package, -requiring the modules in the package to name all imported modules by -their full name. The convention of using '__' to refer to the current -package (both as a per-module variable and in module names) can be -used by packages using explicit search to refer to modules in the same -package; this combination is known as "explicit-relative search". - -The PackageImporter and PackageLoader classes together implement the -following policies: - -- There is a root package, whose name is ''. It cannot be imported - directly but may be referenced, e.g. by using '__' from a top-level - module. - -- In each module or package, the variable '__' contains a reference to - the parent package; in the root package, '__' points to itself. - -- In the name for imported modules (e.g. M in "import M" or "from M - import ..."), a leading '__' refers to the current package (i.e. - the package containing the current module); leading '__.__' and so - on refer to the current package's parent, and so on. The use of - '__' elsewhere in the module name is not supported. - -- Modules are searched using the "expanding search" algorithm by - virtue of the default value for __domain__. - -- If A.B.C is imported, A is searched using __domain__; then - subpackage B is searched in A using its __path__, and so on. - -- Built-in modules have priority: even if a file sys.py exists in a - package, "import sys" imports the built-in sys module. - -- The same holds for frozen modules, for better or for worse. - -- Submodules and subpackages are not automatically loaded when their - parent packages is loaded. - -- The construct "from package import *" is illegal. (It can still be - used to import names from a module.) - -- When "from package import module1, module2, ..." is used, those - modules are explicitly loaded. - -- When a package is loaded, if it has a submodule __init__, that - module is loaded. This is the place where required submodules can - be loaded, the __path__ variable extended, etc. The __init__ module - is loaded even if the package was loaded only in order to create a - stub for a sub-package: if "import P.Q.R" is the first reference to - P, and P has a submodule __init__, P.__init__ is loaded before P.Q - is even searched. - -Caveats: - -- It is possible to import a package that has no __init__ submodule; - this is not particularly useful but there may be useful applications - for it (e.g. to manipulate its search paths from the outside!). - -- There are no special provisions for os.chdir(). If you plan to use - os.chdir() before you have imported all your modules, it is better - not to have relative pathnames in sys.path. (This could actually be - fixed by changing the implementation of path_join() in the hook to - absolutize paths.) - -- Packages and modules are introduced in sys.modules as soon as their - loading is started. When the loading is terminated by an exception, - the sys.modules entries remain around. - -- There are no special measures to support mutually recursive modules, - but it will work under the same conditions where it works in the - flat module space system. - -- Sometimes dummy entries (whose value is None) are entered in - sys.modules, to indicate that a particular module does not exist -- - this is done to speed up the expanding search algorithm when a - module residing at a higher level is repeatedly imported (Python - promises that importing a previously imported module is cheap!) - -- Although dynamically loaded extensions are allowed inside packages, - the current implementation (hardcoded in the interpreter) of their - initialization may cause problems if an extension invokes the - interpreter during its initialization. - -- reload() may find another version of the module only if it occurs on - the package search path. Thus, it keeps the connection to the - package to which the module belongs, but may find a different file. - -XXX Need to have an explicit name for '', e.g. '__root__'. - -""" - - -import imp -import sys -import __builtin__ - -import ihooks -from ihooks import ModuleLoader, ModuleImporter - - -class PackageLoader(ModuleLoader): - - """A subclass of ModuleLoader with package support. - - find_module_in_dir() will succeed if there's a subdirectory with - the given name; load_module() will create a stub for a package and - load its __init__ module if it exists. - - """ - - def find_module_in_dir(self, name, dir): - if dir is not None: - dirname = self.hooks.path_join(dir, name) - if self.hooks.path_isdir(dirname): - return None, dirname, ('', '', 'PACKAGE') - return ModuleLoader.find_module_in_dir(self, name, dir) - - def load_module(self, name, stuff): - file, filename, info = stuff - suff, mode, type = info - if type == 'PACKAGE': - return self.load_package(name, stuff) - if sys.modules.has_key(name): - m = sys.modules[name] - else: - sys.modules[name] = m = imp.new_module(name) - self.set_parent(m) - if type == imp.C_EXTENSION and '.' in name: - return self.load_dynamic(name, stuff) - else: - return ModuleLoader.load_module(self, name, stuff) - - def load_dynamic(self, name, stuff): - file, filename, (suff, mode, type) = stuff - # Hack around restriction in imp.load_dynamic() - i = name.rfind('.') - tail = name[i+1:] - if sys.modules.has_key(tail): - save = sys.modules[tail] - else: - save = None - sys.modules[tail] = imp.new_module(name) - try: - m = imp.load_dynamic(tail, filename, file) - finally: - if save: - sys.modules[tail] = save - else: - del sys.modules[tail] - sys.modules[name] = m - return m - - def load_package(self, name, stuff): - file, filename, info = stuff - if sys.modules.has_key(name): - package = sys.modules[name] - else: - sys.modules[name] = package = imp.new_module(name) - package.__path__ = [filename] - self.init_package(package) - return package - - def init_package(self, package): - self.set_parent(package) - self.set_domain(package) - self.call_init_module(package) - - def set_parent(self, m): - name = m.__name__ - if '.' in name: - name = name[:name.rfind('.')] - else: - name = '' - m.__ = sys.modules[name] - - def set_domain(self, package): - name = package.__name__ - package.__domain__ = domain = [name] - while '.' in name: - name = name[:name.rfind('.')] - domain.append(name) - if name: - domain.append('') - - def call_init_module(self, package): - stuff = self.find_module('__init__', package.__path__) - if stuff: - m = self.load_module(package.__name__ + '.__init__', stuff) - package.__init__ = m - - -class PackageImporter(ModuleImporter): - - """Importer that understands packages and '__'.""" - - def __init__(self, loader = None, verbose = 0): - ModuleImporter.__init__(self, - loader or PackageLoader(None, verbose), verbose) - - def import_module(self, name, globals={}, locals={}, fromlist=[]): - if globals.has_key('__'): - package = globals['__'] - else: - # No calling context, assume in root package - package = sys.modules[''] - if name[:3] in ('__.', '__'): - p = package - name = name[3:] - while name[:3] in ('__.', '__'): - p = p.__ - name = name[3:] - if not name: - return self.finish(package, p, '', fromlist) - if '.' in name: - i = name.find('.') - name, tail = name[:i], name[i:] - else: - tail = '' - mname = p.__name__ and p.__name__+'.'+name or name - m = self.get1(mname) - return self.finish(package, m, tail, fromlist) - if '.' in name: - i = name.find('.') - name, tail = name[:i], name[i:] - else: - tail = '' - for pname in package.__domain__: - mname = pname and pname+'.'+name or name - m = self.get0(mname) - if m: break - else: - raise ImportError, "No such module %s" % name - return self.finish(m, m, tail, fromlist) - - def finish(self, module, m, tail, fromlist): - # Got ....A; now get ....A.B.C.D - yname = m.__name__ - if tail and sys.modules.has_key(yname + tail): # Fast path - yname, tail = yname + tail, '' - m = self.get1(yname) - while tail: - i = tail.find('.', 1) - if i > 0: - head, tail = tail[:i], tail[i:] - else: - head, tail = tail, '' - yname = yname + head - m = self.get1(yname) - - # Got ....A.B.C.D; now finalize things depending on fromlist - if not fromlist: - return module - if '__' in fromlist: - raise ImportError, "Can't import __ from anywhere" - if not hasattr(m, '__path__'): return m - if '*' in fromlist: - raise ImportError, "Can't import * from a package" - for f in fromlist: - if hasattr(m, f): continue - fname = yname + '.' + f - self.get1(fname) - return m - - def get1(self, name): - m = self.get(name) - if not m: - raise ImportError, "No module named %s" % name - return m - - def get0(self, name): - m = self.get(name) - if not m: - sys.modules[name] = None - return m - - def get(self, name): - # Internal routine to get or load a module when its parent exists - if sys.modules.has_key(name): - return sys.modules[name] - if '.' in name: - i = name.rfind('.') - head, tail = name[:i], name[i+1:] - else: - head, tail = '', name - path = sys.modules[head].__path__ - stuff = self.loader.find_module(tail, path) - if not stuff: - return None - sys.modules[name] = m = self.loader.load_module(name, stuff) - if head: - setattr(sys.modules[head], tail, m) - return m - - def reload(self, module): - name = module.__name__ - if '.' in name: - i = name.rfind('.') - head, tail = name[:i], name[i+1:] - path = sys.modules[head].__path__ - else: - tail = name - path = sys.modules[''].__path__ - stuff = self.loader.find_module(tail, path) - if not stuff: - raise ImportError, "No module named %s" % name - return self.loader.load_module(name, stuff) - - def unload(self, module): - if hasattr(module, '__path__'): - raise ImportError, "don't know how to unload packages yet" - PackageImporter.unload(self, module) - - def install(self): - if not sys.modules.has_key(''): - sys.modules[''] = package = imp.new_module('') - package.__path__ = None - self.loader.init_package(package) - for m in sys.modules.values(): - if not m: continue - if not hasattr(m, '__'): - self.loader.set_parent(m) - ModuleImporter.install(self) - - -def install(v = 0): - ihooks.install(PackageImporter(None, v)) - -def uninstall(): - ihooks.uninstall() - -def ni(v = 0): - install(v) - -def no(): - uninstall() - -def test(): - import pdb - try: - testproper() - except: - sys.last_type, sys.last_value, sys.last_traceback = sys.exc_info() - print - print sys.last_type, ':', sys.last_value - print - pdb.pm() - -def testproper(): - install(1) - try: - import mactest - print dir(mactest) - raw_input('OK?') - finally: - uninstall() - - -if __name__ == '__main__': - test() -else: - install() diff --git a/Lib/lib-old/packmail.py b/Lib/lib-old/packmail.py deleted file mode 100644 index e569108..0000000 --- a/Lib/lib-old/packmail.py +++ /dev/null @@ -1,111 +0,0 @@ -# Module 'packmail' -- create a self-unpacking shell archive. - -# This module works on UNIX and on the Mac; the archives can unpack -# themselves only on UNIX. - -import os -from stat import ST_MTIME - -# Print help -def help(): - print 'All fns have a file open for writing as first parameter' - print 'pack(f, fullname, name): pack fullname as name' - print 'packsome(f, directory, namelist): selected files from directory' - print 'packall(f, directory): pack all files from directory' - print 'packnotolder(f, directory, name): pack all files from directory' - print ' that are not older than a file there' - print 'packtree(f, directory): pack entire directory tree' - -# Pack one file -def pack(outfp, file, name): - fp = open(file, 'r') - outfp.write('echo ' + name + '\n') - outfp.write('sed "s/^X//" >"' + name + '" <<"!"\n') - while 1: - line = fp.readline() - if not line: break - if line[-1:] != '\n': - line = line + '\n' - outfp.write('X' + line) - outfp.write('!\n') - fp.close() - -# Pack some files from a directory -def packsome(outfp, dirname, names): - for name in names: - print name - file = os.path.join(dirname, name) - pack(outfp, file, name) - -# Pack all files from a directory -def packall(outfp, dirname): - names = os.listdir(dirname) - try: - names.remove('.') - except: - pass - try: - names.remove('..') - except: - pass - names.sort() - packsome(outfp, dirname, names) - -# Pack all files from a directory that are not older than a give one -def packnotolder(outfp, dirname, oldest): - names = os.listdir(dirname) - try: - names.remove('.') - except: - pass - try: - names.remove('..') - except: - pass - oldest = os.path.join(dirname, oldest) - st = os.stat(oldest) - mtime = st[ST_MTIME] - todo = [] - for name in names: - print name, '...', - st = os.stat(os.path.join(dirname, name)) - if st[ST_MTIME] >= mtime: - print 'Yes.' - todo.append(name) - else: - print 'No.' - todo.sort() - packsome(outfp, dirname, todo) - -# Pack a whole tree (no exceptions) -def packtree(outfp, dirname): - print 'packtree', dirname - outfp.write('mkdir ' + unixfix(dirname) + '\n') - names = os.listdir(dirname) - try: - names.remove('.') - except: - pass - try: - names.remove('..') - except: - pass - subdirs = [] - for name in names: - fullname = os.path.join(dirname, name) - if os.path.isdir(fullname): - subdirs.append(fullname) - else: - print 'pack', fullname - pack(outfp, fullname, unixfix(fullname)) - for subdirname in subdirs: - packtree(outfp, subdirname) - -def unixfix(name): - comps = name.split(os.sep) - res = '' - for comp in comps: - if comp: - if res: res = res + '/' - res = res + comp - return res diff --git a/Lib/lib-old/poly.py b/Lib/lib-old/poly.py deleted file mode 100644 index fe6a1dc..0000000 --- a/Lib/lib-old/poly.py +++ /dev/null @@ -1,52 +0,0 @@ -# module 'poly' -- Polynomials - -# A polynomial is represented by a list of coefficients, e.g., -# [1, 10, 5] represents 1*x**0 + 10*x**1 + 5*x**2 (or 1 + 10x + 5x**2). -# There is no way to suppress internal zeros; trailing zeros are -# taken out by normalize(). - -def normalize(p): # Strip unnecessary zero coefficients - n = len(p) - while n: - if p[n-1]: return p[:n] - n = n-1 - return [] - -def plus(a, b): - if len(a) < len(b): a, b = b, a # make sure a is the longest - res = a[:] # make a copy - for i in range(len(b)): - res[i] = res[i] + b[i] - return normalize(res) - -def minus(a, b): - neg_b = map(lambda x: -x, b[:]) - return plus(a, neg_b) - -def one(power, coeff): # Representation of coeff * x**power - res = [] - for i in range(power): res.append(0) - return res + [coeff] - -def times(a, b): - res = [] - for i in range(len(a)): - for j in range(len(b)): - res = plus(res, one(i+j, a[i]*b[j])) - return res - -def power(a, n): # Raise polynomial a to the positive integral power n - if n == 0: return [1] - if n == 1: return a - if n/2*2 == n: - b = power(a, n/2) - return times(b, b) - return times(power(a, n-1), a) - -def der(a): # First derivative - res = a[1:] - for i in range(len(res)): - res[i] = res[i] * (i+1) - return res - -# Computing a primitive function would require rational arithmetic... diff --git a/Lib/lib-old/rand.py b/Lib/lib-old/rand.py deleted file mode 100644 index a557b69..0000000 --- a/Lib/lib-old/rand.py +++ /dev/null @@ -1,13 +0,0 @@ -# Module 'rand' -# Don't use unless you want compatibility with C's rand()! - -import whrandom - -def srand(seed): - whrandom.seed(seed%256, seed/256%256, seed/65536%256) - -def rand(): - return int(whrandom.random() * 32768.0) % 32768 - -def choice(seq): - return seq[rand() % len(seq)] diff --git a/Lib/lib-old/statcache.py b/Lib/lib-old/statcache.py deleted file mode 100644 index d478393..0000000 --- a/Lib/lib-old/statcache.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Maintain a cache of stat() information on files. - -There are functions to reset the cache or to selectively remove items. -""" - -import warnings -warnings.warn("The statcache module is obsolete. Use os.stat() instead.", - DeprecationWarning) -del warnings - -import os as _os -from stat import * - -__all__ = ["stat","reset","forget","forget_prefix","forget_dir", - "forget_except_prefix","isdir"] - -# The cache. Keys are pathnames, values are os.stat outcomes. -# Remember that multiple threads may be calling this! So, e.g., that -# path in cache returns 1 doesn't mean the cache will still contain -# path on the next line. Code defensively. - -cache = {} - -def stat(path): - """Stat a file, possibly out of the cache.""" - ret = cache.get(path, None) - if ret is None: - cache[path] = ret = _os.stat(path) - return ret - -def reset(): - """Clear the cache.""" - cache.clear() - -# For thread saftey, always use forget() internally too. -def forget(path): - """Remove a given item from the cache, if it exists.""" - try: - del cache[path] - except KeyError: - pass - -def forget_prefix(prefix): - """Remove all pathnames with a given prefix.""" - for path in cache.keys(): - if path.startswith(prefix): - forget(path) - -def forget_dir(prefix): - """Forget a directory and all entries except for entries in subdirs.""" - - # Remove trailing separator, if any. This is tricky to do in a - # x-platform way. For example, Windows accepts both / and \ as - # separators, and if there's nothing *but* a separator we want to - # preserve that this is the root. Only os.path has the platform - # knowledge we need. - from os.path import split, join - prefix = split(join(prefix, "xxx"))[0] - forget(prefix) - for path in cache.keys(): - # First check that the path at least starts with the prefix, so - # that when it doesn't we can avoid paying for split(). - if path.startswith(prefix) and split(path)[0] == prefix: - forget(path) - -def forget_except_prefix(prefix): - """Remove all pathnames except with a given prefix. - - Normally used with prefix = '/' after a chdir(). - """ - - for path in cache.keys(): - if not path.startswith(prefix): - forget(path) - -def isdir(path): - """Return True if directory, else False.""" - try: - st = stat(path) - except _os.error: - return False - return S_ISDIR(st.st_mode) diff --git a/Lib/lib-old/tb.py b/Lib/lib-old/tb.py deleted file mode 100644 index 9063559..0000000 --- a/Lib/lib-old/tb.py +++ /dev/null @@ -1,177 +0,0 @@ -# Print tracebacks, with a dump of local variables. -# Also an interactive stack trace browser. -# Note -- this module is obsolete -- use pdb.pm() instead. - -import sys -import os -from stat import * -import linecache - -def br(): browser(sys.last_traceback) - -def tb(): printtb(sys.last_traceback) - -def browser(tb): - if not tb: - print 'No traceback.' - return - tblist = [] - while tb: - tblist.append(tb) - tb = tb.tb_next - ptr = len(tblist)-1 - tb = tblist[ptr] - while 1: - if tb != tblist[ptr]: - tb = tblist[ptr] - print `ptr` + ':', - printtbheader(tb) - try: - line = raw_input('TB: ') - except KeyboardInterrupt: - print '\n[Interrupted]' - break - except EOFError: - print '\n[EOF]' - break - cmd = line.strip() - if cmd: - if cmd == 'quit': - break - elif cmd == 'list': - browserlist(tb) - elif cmd == 'up': - if ptr-1 >= 0: ptr = ptr-1 - else: print 'Bottom of stack.' - elif cmd == 'down': - if ptr+1 < len(tblist): ptr = ptr+1 - else: print 'Top of stack.' - elif cmd == 'locals': - printsymbols(tb.tb_frame.f_locals) - elif cmd == 'globals': - printsymbols(tb.tb_frame.f_globals) - elif cmd in ('?', 'help'): - browserhelp() - else: - browserexec(tb, cmd) - -def browserlist(tb): - filename = tb.tb_frame.f_code.co_filename - lineno = tb.tb_lineno - last = lineno - first = max(1, last-10) - for i in range(first, last+1): - if i == lineno: prefix = '***' + `i`.rjust(4) + ':' - else: prefix = `i`.rjust(7) + ':' - line = linecache.getline(filename, i) - if line[-1:] == '\n': line = line[:-1] - print prefix + line - -def browserexec(tb, cmd): - locals = tb.tb_frame.f_locals - globals = tb.tb_frame.f_globals - try: - exec cmd+'\n' in globals, locals - except: - t, v = sys.exc_info()[:2] - print '*** Exception:', - if type(t) is type(''): - print t, - else: - print t.__name__, - if v is not None: - print ':', v, - print - print 'Type help to get help.' - -def browserhelp(): - print - print ' This is the traceback browser. Commands are:' - print ' up : move one level up in the call stack' - print ' down : move one level down in the call stack' - print ' locals : print all local variables at this level' - print ' globals : print all global variables at this level' - print ' list : list source code around the failure' - print ' help : print help (what you are reading now)' - print ' quit : back to command interpreter' - print ' Typing any other 1-line statement will execute it' - print ' using the current level\'s symbol tables' - print - -def printtb(tb): - while tb: - print1tb(tb) - tb = tb.tb_next - -def print1tb(tb): - printtbheader(tb) - if tb.tb_frame.f_locals is not tb.tb_frame.f_globals: - printsymbols(tb.tb_frame.f_locals) - -def printtbheader(tb): - filename = tb.tb_frame.f_code.co_filename - lineno = tb.tb_lineno - info = '"' + filename + '"(' + `lineno` + ')' - line = linecache.getline(filename, lineno) - if line: - info = info + ': ' + line.strip() - print info - -def printsymbols(d): - keys = d.keys() - keys.sort() - for name in keys: - print ' ' + name.ljust(12) + ':', - printobject(d[name], 4) - print - -def printobject(v, maxlevel): - if v is None: - print 'None', - elif type(v) in (type(0), type(0.0)): - print v, - elif type(v) is type(''): - if len(v) > 20: - print `v[:17] + '...'`, - else: - print `v`, - elif type(v) is type(()): - print '(', - printlist(v, maxlevel) - print ')', - elif type(v) is type([]): - print '[', - printlist(v, maxlevel) - print ']', - elif type(v) is type({}): - print '{', - printdict(v, maxlevel) - print '}', - else: - print v, - -def printlist(v, maxlevel): - n = len(v) - if n == 0: return - if maxlevel <= 0: - print '...', - return - for i in range(min(6, n)): - printobject(v[i], maxlevel-1) - if i+1 < n: print ',', - if n > 6: print '...', - -def printdict(v, maxlevel): - keys = v.keys() - n = len(keys) - if n == 0: return - if maxlevel <= 0: - print '...', - return - keys.sort() - for i in range(min(6, n)): - key = keys[i] - print `key` + ':', - printobject(v[key], maxlevel-1) - if i+1 < n: print ',', - if n > 6: print '...', diff --git a/Lib/lib-old/tzparse.py b/Lib/lib-old/tzparse.py deleted file mode 100644 index 12468b5..0000000 --- a/Lib/lib-old/tzparse.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Parse a timezone specification.""" - -# XXX Unfinished. -# XXX Only the typical form "XXXhhYYY;ddd/hh,ddd/hh" is currently supported. - -import warnings -warnings.warn( - "The tzparse module is obsolete and will disappear in the future", - DeprecationWarning) - -tzpat = ('^([A-Z][A-Z][A-Z])([-+]?[0-9]+)([A-Z][A-Z][A-Z]);' - '([0-9]+)/([0-9]+),([0-9]+)/([0-9]+)$') - -tzprog = None - -def tzparse(tzstr): - """Given a timezone spec, return a tuple of information - (tzname, delta, dstname, daystart, hourstart, dayend, hourend), - where 'tzname' is the name of the timezone, 'delta' is the offset - in hours from GMT, 'dstname' is the name of the daylight-saving - timezone, and 'daystart'/'hourstart' and 'dayend'/'hourend' - specify the starting and ending points for daylight saving time.""" - global tzprog - if tzprog is None: - import re - tzprog = re.compile(tzpat) - match = tzprog.match(tzstr) - if not match: - raise ValueError, 'not the TZ syntax I understand' - subs = [] - for i in range(1, 8): - subs.append(match.group(i)) - for i in (1, 3, 4, 5, 6): - subs[i] = eval(subs[i]) - [tzname, delta, dstname, daystart, hourstart, dayend, hourend] = subs - return (tzname, delta, dstname, daystart, hourstart, dayend, hourend) - -def tzlocaltime(secs, params): - """Given a Unix time in seconds and a tuple of information about - a timezone as returned by tzparse(), return the local time in the - form (year, month, day, hour, min, sec, yday, wday, tzname).""" - import time - (tzname, delta, dstname, daystart, hourstart, dayend, hourend) = params - year, month, days, hours, mins, secs, yday, wday, isdst = \ - time.gmtime(secs - delta*3600) - if (daystart, hourstart) <= (yday+1, hours) < (dayend, hourend): - tzname = dstname - hours = hours + 1 - return year, month, days, hours, mins, secs, yday, wday, tzname - -def tzset(): - """Determine the current timezone from the "TZ" environment variable.""" - global tzparams, timezone, altzone, daylight, tzname - import os - tzstr = os.environ['TZ'] - tzparams = tzparse(tzstr) - timezone = tzparams[1] * 3600 - altzone = timezone - 3600 - daylight = 1 - tzname = tzparams[0], tzparams[2] - -def isdst(secs): - """Return true if daylight-saving time is in effect for the given - Unix time in the current timezone.""" - import time - (tzname, delta, dstname, daystart, hourstart, dayend, hourend) = \ - tzparams - year, month, days, hours, mins, secs, yday, wday, isdst = \ - time.gmtime(secs - delta*3600) - return (daystart, hourstart) <= (yday+1, hours) < (dayend, hourend) - -tzset() - -def localtime(secs): - """Get the local time in the current timezone.""" - return tzlocaltime(secs, tzparams) - -def test(): - from time import asctime, gmtime - import time, sys - now = time.time() - x = localtime(now) - tm = x[:-1] + (0,) - print 'now =', now, '=', asctime(tm), x[-1] - now = now - now % (24*3600) - if sys.argv[1:]: now = now + eval(sys.argv[1]) - x = gmtime(now) - tm = x[:-1] + (0,) - print 'gmtime =', now, '=', asctime(tm), 'yday =', x[-2] - jan1 = now - x[-2]*24*3600 - x = localtime(jan1) - tm = x[:-1] + (0,) - print 'jan1 =', jan1, '=', asctime(tm), x[-1] - for d in range(85, 95) + range(265, 275): - t = jan1 + d*24*3600 - x = localtime(t) - tm = x[:-1] + (0,) - print 'd =', d, 't =', t, '=', asctime(tm), x[-1] diff --git a/Lib/lib-old/util.py b/Lib/lib-old/util.py deleted file mode 100644 index 104af1e..0000000 --- a/Lib/lib-old/util.py +++ /dev/null @@ -1,25 +0,0 @@ -# Module 'util' -- some useful functions that don't fit elsewhere - -# NB: These are now built-in functions, but this module is provided -# for compatibility. Don't use in new programs unless you need backward -# compatibility (i.e. need to run with old interpreters). - - -# Remove an item from a list. -# No complaints if it isn't in the list at all. -# If it occurs more than once, remove the first occurrence. -# -def remove(item, list): - if item in list: list.remove(item) - - -# Return a string containing a file's contents. -# -def readfile(fn): - return readopenfile(open(fn, 'r')) - - -# Read an open file until EOF. -# -def readopenfile(fp): - return fp.read() diff --git a/Lib/lib-old/whatsound.py b/Lib/lib-old/whatsound.py deleted file mode 100644 index 1b1df23..0000000 --- a/Lib/lib-old/whatsound.py +++ /dev/null @@ -1 +0,0 @@ -from sndhdr import * diff --git a/Lib/lib-old/whrandom.py b/Lib/lib-old/whrandom.py deleted file mode 100644 index bc0d1a4..0000000 --- a/Lib/lib-old/whrandom.py +++ /dev/null @@ -1,144 +0,0 @@ -"""Wichman-Hill random number generator. - -Wichmann, B. A. & Hill, I. D. (1982) -Algorithm AS 183: -An efficient and portable pseudo-random number generator -Applied Statistics 31 (1982) 188-190 - -see also: - Correction to Algorithm AS 183 - Applied Statistics 33 (1984) 123 - - McLeod, A. I. (1985) - A remark on Algorithm AS 183 - Applied Statistics 34 (1985),198-200 - - -USE: -whrandom.random() yields double precision random numbers - uniformly distributed between 0 and 1. - -whrandom.seed(x, y, z) must be called before whrandom.random() - to seed the generator - -There is also an interface to create multiple independent -random generators, and to choose from other ranges. - - - -Multi-threading note: the random number generator used here is not -thread-safe; it is possible that nearly simultaneous calls in -different theads return the same random value. To avoid this, you -have to use a lock around all calls. (I didn't want to slow this -down in the serial case by using a lock here.) -""" - -import warnings -warnings.warn("the whrandom module is deprecated; please use the random module", - DeprecationWarning) - -# Translated by Guido van Rossum from C source provided by -# Adrian Baddeley. - - -class whrandom: - def __init__(self, x = 0, y = 0, z = 0): - """Initialize an instance. - Without arguments, initialize from current time. - With arguments (x, y, z), initialize from them.""" - self.seed(x, y, z) - - def seed(self, x = 0, y = 0, z = 0): - """Set the seed from (x, y, z). - These must be integers in the range [0, 256).""" - if not type(x) == type(y) == type(z) == type(0): - raise TypeError, 'seeds must be integers' - if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): - raise ValueError, 'seeds must be in range(0, 256)' - if 0 == x == y == z: - # Initialize from current time - import time - t = long(time.time() * 256) - t = int((t&0xffffff) ^ (t>>24)) - t, x = divmod(t, 256) - t, y = divmod(t, 256) - t, z = divmod(t, 256) - # Zero is a poor seed, so substitute 1 - self._seed = (x or 1, y or 1, z or 1) - - def random(self): - """Get the next random number in the range [0.0, 1.0).""" - # This part is thread-unsafe: - # BEGIN CRITICAL SECTION - x, y, z = self._seed - # - x = (171 * x) % 30269 - y = (172 * y) % 30307 - z = (170 * z) % 30323 - # - self._seed = x, y, z - # END CRITICAL SECTION - # - return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0 - - def uniform(self, a, b): - """Get a random number in the range [a, b).""" - return a + (b-a) * self.random() - - def randint(self, a, b): - """Get a random integer in the range [a, b] including - both end points. - - (Deprecated; use randrange below.)""" - return self.randrange(a, b+1) - - def choice(self, seq): - """Choose a random element from a non-empty sequence.""" - return seq[int(self.random() * len(seq))] - - def randrange(self, start, stop=None, step=1, int=int, default=None): - """Choose a random item from range(start, stop[, step]). - - This fixes the problem with randint() which includes the - endpoint; in Python this is usually not what you want. - Do not supply the 'int' and 'default' arguments.""" - # This code is a bit messy to make it fast for the - # common case while still doing adequate error checking - istart = int(start) - if istart != start: - raise ValueError, "non-integer arg 1 for randrange()" - if stop is default: - if istart > 0: - return int(self.random() * istart) - raise ValueError, "empty range for randrange()" - istop = int(stop) - if istop != stop: - raise ValueError, "non-integer stop for randrange()" - if step == 1: - if istart < istop: - return istart + int(self.random() * - (istop - istart)) - raise ValueError, "empty range for randrange()" - istep = int(step) - if istep != step: - raise ValueError, "non-integer step for randrange()" - if istep > 0: - n = (istop - istart + istep - 1) / istep - elif istep < 0: - n = (istop - istart + istep + 1) / istep - else: - raise ValueError, "zero step for randrange()" - - if n <= 0: - raise ValueError, "empty range for randrange()" - return istart + istep*int(self.random() * n) - - -# Initialize from the current time -_inst = whrandom() -seed = _inst.seed -random = _inst.random -uniform = _inst.uniform -randint = _inst.randint -choice = _inst.choice -randrange = _inst.randrange diff --git a/Lib/lib-old/zmod.py b/Lib/lib-old/zmod.py deleted file mode 100644 index 55f49df..0000000 --- a/Lib/lib-old/zmod.py +++ /dev/null @@ -1,94 +0,0 @@ -# module 'zmod' - -# Compute properties of mathematical "fields" formed by taking -# Z/n (the whole numbers modulo some whole number n) and an -# irreducible polynomial (i.e., a polynomial with only complex zeros), -# e.g., Z/5 and X**2 + 2. -# -# The field is formed by taking all possible linear combinations of -# a set of d base vectors (where d is the degree of the polynomial). -# -# Note that this procedure doesn't yield a field for all combinations -# of n and p: it may well be that some numbers have more than one -# inverse and others have none. This is what we check. -# -# Remember that a field is a ring where each element has an inverse. -# A ring has commutative addition and multiplication, a zero and a one: -# 0*x = x*0 = 0, 0+x = x+0 = x, 1*x = x*1 = x. Also, the distributive -# property holds: a*(b+c) = a*b + b*c. -# (XXX I forget if this is an axiom or follows from the rules.) - -import poly - - -# Example N and polynomial - -N = 5 -P = poly.plus(poly.one(0, 2), poly.one(2, 1)) # 2 + x**2 - - -# Return x modulo y. Returns >= 0 even if x < 0. - -def mod(x, y): - return divmod(x, y)[1] - - -# Normalize a polynomial modulo n and modulo p. - -def norm(a, n, p): - a = poly.modulo(a, p) - a = a[:] - for i in range(len(a)): a[i] = mod(a[i], n) - a = poly.normalize(a) - return a - - -# Make a list of all n^d elements of the proposed field. - -def make_all(mat): - all = [] - for row in mat: - for a in row: - all.append(a) - return all - -def make_elements(n, d): - if d == 0: return [poly.one(0, 0)] - sub = make_elements(n, d-1) - all = [] - for a in sub: - for i in range(n): - all.append(poly.plus(a, poly.one(d-1, i))) - return all - -def make_inv(all, n, p): - x = poly.one(1, 1) - inv = [] - for a in all: - inv.append(norm(poly.times(a, x), n, p)) - return inv - -def checkfield(n, p): - all = make_elements(n, len(p)-1) - inv = make_inv(all, n, p) - all1 = all[:] - inv1 = inv[:] - all1.sort() - inv1.sort() - if all1 == inv1: print 'BINGO!' - else: - print 'Sorry:', n, p - print all - print inv - -def rj(s, width): - if type(s) is not type(''): s = `s` - n = len(s) - if n >= width: return s - return ' '*(width - n) + s - -def lj(s, width): - if type(s) is not type(''): s = `s` - n = len(s) - if n >= width: return s - return s + ' '*(width - n) diff --git a/Lib/reconvert.py b/Lib/reconvert.py deleted file mode 100755 index 64bab5b..0000000 --- a/Lib/reconvert.py +++ /dev/null @@ -1,192 +0,0 @@ -#! /usr/bin/env python - -r"""Convert old ("regex") regular expressions to new syntax ("re"). - -When imported as a module, there are two functions, with their own -strings: - - convert(s, syntax=None) -- convert a regex regular expression to re syntax - - quote(s) -- return a quoted string literal - -When used as a script, read a Python string literal (or any other -expression evaluating to a string) from stdin, and write the -translated expression to stdout as a string literal. Unless stdout is -a tty, no trailing \n is written to stdout. This is done so that it -can be used with Emacs C-U M-| (shell-command-on-region with argument -which filters the region through the shell command). - -No attempt has been made at coding for performance. - -Translation table... - - \( ( (unless RE_NO_BK_PARENS set) - \) ) (unless RE_NO_BK_PARENS set) - \| | (unless RE_NO_BK_VBAR set) - \< \b (not quite the same, but alla...) - \> \b (not quite the same, but alla...) - \` \A - \' \Z - -Not translated... - - . - ^ - $ - * - + (unless RE_BK_PLUS_QM set, then to \+) - ? (unless RE_BK_PLUS_QM set, then to \?) - \ - \b - \B - \w - \W - \1 ... \9 - -Special cases... - - Non-printable characters are always replaced by their 3-digit - escape code (except \t, \n, \r, which use mnemonic escapes) - - Newline is turned into | when RE_NEWLINE_OR is set - -XXX To be done... - - [...] (different treatment of backslashed items?) - [^...] (different treatment of backslashed items?) - ^ $ * + ? (in some error contexts these are probably treated differently) - \vDD \DD (in the regex docs but only works when RE_ANSI_HEX set) - -""" - - -import warnings -warnings.filterwarnings("ignore", ".* regex .*", DeprecationWarning, __name__, - append=1) - -import regex -from regex_syntax import * # RE_* - -__all__ = ["convert","quote"] - -# Default translation table -mastertable = { - r'\<': r'\b', - r'\>': r'\b', - r'\`': r'\A', - r'\'': r'\Z', - r'\(': '(', - r'\)': ')', - r'\|': '|', - '(': r'\(', - ')': r'\)', - '|': r'\|', - '\t': r'\t', - '\n': r'\n', - '\r': r'\r', -} - - -def convert(s, syntax=None): - """Convert a regex regular expression to re syntax. - - The first argument is the regular expression, as a string object, - just like it would be passed to regex.compile(). (I.e., pass the - actual string object -- string quotes must already have been - removed and the standard escape processing has already been done, - e.g. by eval().) - - The optional second argument is the regex syntax variant to be - used. This is an integer mask as passed to regex.set_syntax(); - the flag bits are defined in regex_syntax. When not specified, or - when None is given, the current regex syntax mask (as retrieved by - regex.get_syntax()) is used -- which is 0 by default. - - The return value is a regular expression, as a string object that - could be passed to re.compile(). (I.e., no string quotes have - been added -- use quote() below, or repr().) - - The conversion is not always guaranteed to be correct. More - syntactical analysis should be performed to detect borderline - cases and decide what to do with them. For example, 'x*?' is not - translated correctly. - - """ - table = mastertable.copy() - if syntax is None: - syntax = regex.get_syntax() - if syntax & RE_NO_BK_PARENS: - del table[r'\('], table[r'\)'] - del table['('], table[')'] - if syntax & RE_NO_BK_VBAR: - del table[r'\|'] - del table['|'] - if syntax & RE_BK_PLUS_QM: - table['+'] = r'\+' - table['?'] = r'\?' - table[r'\+'] = '+' - table[r'\?'] = '?' - if syntax & RE_NEWLINE_OR: - table['\n'] = '|' - res = "" - - i = 0 - end = len(s) - while i < end: - c = s[i] - i = i+1 - if c == '\\': - c = s[i] - i = i+1 - key = '\\' + c - key = table.get(key, key) - res = res + key - else: - c = table.get(c, c) - res = res + c - return res - - -def quote(s, quote=None): - """Convert a string object to a quoted string literal. - - This is similar to repr() but will return a "raw" string (r'...' - or r"...") when the string contains backslashes, instead of - doubling all backslashes. The resulting string does *not* always - evaluate to the same string as the original; however it will do - just the right thing when passed into re.compile(). - - The optional second argument forces the string quote; it must be - a single character which is a valid Python string quote. - - """ - if quote is None: - q = "'" - altq = "'" - if q in s and altq not in s: - q = altq - else: - assert quote in ('"', "'", '"""', "'''") - q = quote - res = q - for c in s: - if c == q: c = '\\' + c - elif c < ' ' or c > '~': c = "\\%03o" % ord(c) - res = res + c - res = res + q - if '\\' in res: - res = 'r' + res - return res - - -def main(): - """Main program -- called when run as a script.""" - import sys - s = eval(sys.stdin.read()) - sys.stdout.write(quote(convert(s))) - if sys.stdout.isatty(): - sys.stdout.write("\n") - - -if __name__ == '__main__': - main() diff --git a/Lib/regex_syntax.py b/Lib/regex_syntax.py deleted file mode 100644 index b0a0dbf..0000000 --- a/Lib/regex_syntax.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Constants for selecting regexp syntaxes for the obsolete regex module. - -This module is only for backward compatibility. "regex" has now -been replaced by the new regular expression module, "re". - -These bits are passed to regex.set_syntax() to choose among -alternative regexp syntaxes. -""" - -# 1 means plain parentheses serve as grouping, and backslash -# parentheses are needed for literal searching. -# 0 means backslash-parentheses are grouping, and plain parentheses -# are for literal searching. -RE_NO_BK_PARENS = 1 - -# 1 means plain | serves as the "or"-operator, and \| is a literal. -# 0 means \| serves as the "or"-operator, and | is a literal. -RE_NO_BK_VBAR = 2 - -# 0 means plain + or ? serves as an operator, and \+, \? are literals. -# 1 means \+, \? are operators and plain +, ? are literals. -RE_BK_PLUS_QM = 4 - -# 1 means | binds tighter than ^ or $. -# 0 means the contrary. -RE_TIGHT_VBAR = 8 - -# 1 means treat \n as an _OR operator -# 0 means treat it as a normal character -RE_NEWLINE_OR = 16 - -# 0 means that a special characters (such as *, ^, and $) always have -# their special meaning regardless of the surrounding context. -# 1 means that special characters may act as normal characters in some -# contexts. Specifically, this applies to: -# ^ - only special at the beginning, or after ( or | -# $ - only special at the end, or before ) or | -# *, +, ? - only special when not after the beginning, (, or | -RE_CONTEXT_INDEP_OPS = 32 - -# ANSI sequences (\n etc) and \xhh -RE_ANSI_HEX = 64 - -# No GNU extensions -RE_NO_GNU_EXTENSIONS = 128 - -# Now define combinations of bits for the standard possibilities. -RE_SYNTAX_AWK = (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS) -RE_SYNTAX_EGREP = (RE_SYNTAX_AWK | RE_NEWLINE_OR) -RE_SYNTAX_GREP = (RE_BK_PLUS_QM | RE_NEWLINE_OR) -RE_SYNTAX_EMACS = 0 - -# (Python's obsolete "regexp" module used a syntax similar to awk.) diff --git a/Lib/regsub.py b/Lib/regsub.py deleted file mode 100644 index 0fc10a5..0000000 --- a/Lib/regsub.py +++ /dev/null @@ -1,198 +0,0 @@ -"""Regexp-based split and replace using the obsolete regex module. - -This module is only for backward compatibility. These operations -are now provided by the new regular expression module, "re". - -sub(pat, repl, str): replace first occurrence of pattern in string -gsub(pat, repl, str): replace all occurrences of pattern in string -split(str, pat, maxsplit): split string using pattern as delimiter -splitx(str, pat, maxsplit): split string using pattern as delimiter plus - return delimiters -""" - -import warnings -warnings.warn("the regsub module is deprecated; please use re.sub()", - DeprecationWarning) - -# Ignore further deprecation warnings about this module -warnings.filterwarnings("ignore", "", DeprecationWarning, __name__) - -import regex - -__all__ = ["sub","gsub","split","splitx","capwords"] - -# Replace first occurrence of pattern pat in string str by replacement -# repl. If the pattern isn't found, the string is returned unchanged. -# The replacement may contain references \digit to subpatterns and -# escaped backslashes. The pattern may be a string or an already -# compiled pattern. - -def sub(pat, repl, str): - prog = compile(pat) - if prog.search(str) >= 0: - regs = prog.regs - a, b = regs[0] - str = str[:a] + expand(repl, regs, str) + str[b:] - return str - - -# Replace all (non-overlapping) occurrences of pattern pat in string -# str by replacement repl. The same rules as for sub() apply. -# Empty matches for the pattern are replaced only when not adjacent to -# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'. - -def gsub(pat, repl, str): - prog = compile(pat) - new = '' - start = 0 - first = 1 - while prog.search(str, start) >= 0: - regs = prog.regs - a, b = regs[0] - if a == b == start and not first: - if start >= len(str) or prog.search(str, start+1) < 0: - break - regs = prog.regs - a, b = regs[0] - new = new + str[start:a] + expand(repl, regs, str) - start = b - first = 0 - new = new + str[start:] - return new - - -# Split string str in fields separated by delimiters matching pattern -# pat. Only non-empty matches for the pattern are considered, so e.g. -# split('abc', '') returns ['abc']. -# The optional 3rd argument sets the number of splits that are performed. - -def split(str, pat, maxsplit = 0): - return intsplit(str, pat, maxsplit, 0) - -# Split string str in fields separated by delimiters matching pattern -# pat. Only non-empty matches for the pattern are considered, so e.g. -# split('abc', '') returns ['abc']. The delimiters are also included -# in the list. -# The optional 3rd argument sets the number of splits that are performed. - - -def splitx(str, pat, maxsplit = 0): - return intsplit(str, pat, maxsplit, 1) - -# Internal function used to implement split() and splitx(). - -def intsplit(str, pat, maxsplit, retain): - prog = compile(pat) - res = [] - start = next = 0 - splitcount = 0 - while prog.search(str, next) >= 0: - regs = prog.regs - a, b = regs[0] - if a == b: - next = next + 1 - if next >= len(str): - break - else: - res.append(str[start:a]) - if retain: - res.append(str[a:b]) - start = next = b - splitcount = splitcount + 1 - if (maxsplit and (splitcount >= maxsplit)): - break - res.append(str[start:]) - return res - - -# Capitalize words split using a pattern - -def capwords(str, pat='[^a-zA-Z0-9_]+'): - words = splitx(str, pat) - for i in range(0, len(words), 2): - words[i] = words[i].capitalize() - return "".join(words) - - -# Internal subroutines: -# compile(pat): compile a pattern, caching already compiled patterns -# expand(repl, regs, str): expand \digit escapes in replacement string - - -# Manage a cache of compiled regular expressions. -# -# If the pattern is a string a compiled version of it is returned. If -# the pattern has been used before we return an already compiled -# version from the cache; otherwise we compile it now and save the -# compiled version in the cache, along with the syntax it was compiled -# with. Instead of a string, a compiled regular expression can also -# be passed. - -cache = {} - -def compile(pat): - if type(pat) != type(''): - return pat # Assume it is a compiled regex - key = (pat, regex.get_syntax()) - if key in cache: - prog = cache[key] # Get it from the cache - else: - prog = cache[key] = regex.compile(pat) - return prog - - -def clear_cache(): - global cache - cache = {} - - -# Expand \digit in the replacement. -# Each occurrence of \digit is replaced by the substring of str -# indicated by regs[digit]. To include a literal \ in the -# replacement, double it; other \ escapes are left unchanged (i.e. -# the \ and the following character are both copied). - -def expand(repl, regs, str): - if '\\' not in repl: - return repl - new = '' - i = 0 - ord0 = ord('0') - while i < len(repl): - c = repl[i]; i = i+1 - if c != '\\' or i >= len(repl): - new = new + c - else: - c = repl[i]; i = i+1 - if '0' <= c <= '9': - a, b = regs[ord(c)-ord0] - new = new + str[a:b] - elif c == '\\': - new = new + c - else: - new = new + '\\' + c - return new - - -# Test program, reads sequences "pat repl str" from stdin. -# Optional argument specifies pattern used to split lines. - -def test(): - import sys - if sys.argv[1:]: - delpat = sys.argv[1] - else: - delpat = '[ \t\n]+' - while 1: - if sys.stdin.isatty(): sys.stderr.write('--> ') - line = sys.stdin.readline() - if not line: break - if line[-1] == '\n': line = line[:-1] - fields = split(line, delpat) - if len(fields) != 3: - print 'Sorry, not three fields' - print 'split:', repr(fields) - continue - [pat, repl, str] = split(line, delpat) - print 'sub :', repr(sub(pat, repl, str)) - print 'gsub:', repr(gsub(pat, repl, str)) diff --git a/Lib/rexec.py b/Lib/rexec.py index 89ff509..d289d6a 100644 --- a/Lib/rexec.py +++ b/Lib/rexec.py @@ -136,7 +136,7 @@ class RExec(ihooks._Verbose): ok_builtin_modules = ('audioop', 'array', 'binascii', 'cmath', 'errno', 'imageop', 'marshal', 'math', 'md5', 'operator', - 'parser', 'regex', 'select', + 'parser', 'select', 'sha', '_sre', 'strop', 'struct', 'time', '_weakref') diff --git a/Lib/test/test___all__.py b/Lib/test/test___all__.py index 0b2e7da..0e17830 100644 --- a/Lib/test/test___all__.py +++ b/Lib/test/test___all__.py @@ -128,8 +128,6 @@ class AllTest(unittest.TestCase): self.check_all("quopri") self.check_all("random") self.check_all("re") - self.check_all("reconvert") - self.check_all("regsub") self.check_all("repr") self.check_all("rexec") self.check_all("rfc822") diff --git a/Lib/test/test_regex.py b/Lib/test/test_regex.py deleted file mode 100644 index 2e2c8f65..0000000 --- a/Lib/test/test_regex.py +++ /dev/null @@ -1,113 +0,0 @@ -from test.test_support import verbose, sortdict -import warnings -warnings.filterwarnings("ignore", "the regex module is deprecated", - DeprecationWarning, __name__) -import regex -from regex_syntax import * - -re = 'a+b+c+' -print 'no match:', regex.match(re, 'hello aaaabcccc world') -print 'successful search:', regex.search(re, 'hello aaaabcccc world') -try: - cre = regex.compile('\(' + re) -except regex.error: - print 'caught expected exception' -else: - print 'expected regex.error not raised' - -print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb') -prev = regex.set_syntax(RE_SYNTAX_AWK) -print 'successful awk syntax:', regex.search('(a+)|(b+)', 'cdb') -regex.set_syntax(prev) -print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb') - -re = '\(<one>[0-9]+\) *\(<two>[0-9]+\)' -print 'matching with group names and compile()' -cre = regex.compile(re) -print cre.match('801 999') -try: - print cre.group('one') -except regex.error: - print 'caught expected exception' -else: - print 'expected regex.error not raised' - -print 'matching with group names and symcomp()' -cre = regex.symcomp(re) -print cre.match('801 999') -print cre.group(0) -print cre.group('one') -print cre.group(1, 2) -print cre.group('one', 'two') -print 'realpat:', cre.realpat -print 'groupindex:', sortdict(cre.groupindex) - -re = 'world' -cre = regex.compile(re) -print 'not case folded search:', cre.search('HELLO WORLD') -cre = regex.compile(re, regex.casefold) -print 'case folded search:', cre.search('HELLO WORLD') - -print '__members__:', cre.__members__ -print 'regs:', cre.regs -print 'last:', cre.last -print 'translate:', len(cre.translate) -print 'givenpat:', cre.givenpat - -print 'match with pos:', cre.match('hello world', 7) -print 'search with pos:', cre.search('hello world there world', 7) -print 'bogus group:', cre.group(0, 1, 3) -try: - print 'no name:', cre.group('one') -except regex.error: - print 'caught expected exception' -else: - print 'expected regex.error not raised' - -from regex_tests import * -if verbose: print 'Running regex_tests test suite' - -for t in tests: - pattern=s=outcome=repl=expected=None - if len(t)==5: - pattern, s, outcome, repl, expected = t - elif len(t)==3: - pattern, s, outcome = t - else: - raise ValueError, ('Test tuples should have 3 or 5 fields',t) - - try: - obj=regex.compile(pattern) - except regex.error: - if outcome==SYNTAX_ERROR: pass # Expected a syntax error - else: - # Regex syntax errors aren't yet reported, so for - # the official test suite they'll be quietly ignored. - pass - #print '=== Syntax error:', t - else: - try: - result=obj.search(s) - except regex.error, msg: - print '=== Unexpected exception', t, repr(msg) - if outcome==SYNTAX_ERROR: - # This should have been a syntax error; forget it. - pass - elif outcome==FAIL: - if result==-1: pass # No match, as expected - else: print '=== Succeeded incorrectly', t - elif outcome==SUCCEED: - if result!=-1: - # Matched, as expected, so now we compute the - # result string and compare it to our expected result. - start, end = obj.regs[0] - found=s[start:end] - groups=obj.group(1,2,3,4,5,6,7,8,9,10) - vardict=vars() - for i in range(len(groups)): - vardict['g'+str(i+1)]=str(groups[i]) - repl=eval(repl) - if repl!=expected: - print '=== grouping error', t, repr(repl)+' should be '+repr(expected) - else: - print '=== Failed incorrectly', t diff --git a/Lib/test/test_sundry.py b/Lib/test/test_sundry.py index fd10b68..90610e0 100644 --- a/Lib/test/test_sundry.py +++ b/Lib/test/test_sundry.py @@ -68,7 +68,6 @@ import posixfile import profile import pstats import py_compile -#import reconvert import repr try: import rlcompleter # not available on Windows diff --git a/Misc/BeOS-setup.py b/Misc/BeOS-setup.py index 07dbe15..991e608 100644 --- a/Misc/BeOS-setup.py +++ b/Misc/BeOS-setup.py @@ -176,8 +176,6 @@ class PyBuildExt(build_ext): # # Some modules that are normally always on: - exts.append( Extension('regex', ['regexmodule.c', 'regexpr.c']) ) - exts.append( Extension('_weakref', ['_weakref.c']) ) exts.append( Extension('_symtable', ['symtablemodule.c']) ) @@ -291,7 +291,14 @@ Core and builtins Extension Modules ----------------- -- Swapped re and sre, so help(re) provides full help. importing sre +- Everything under lib-old was removed. This includes the following modules: + Para, addpack, cmp, cmpcache, codehack, dircmp, dump, find, fmt, grep, + lockfile, newdir, ni, packmail, poly, rand, statcache, tb, tzparse, + util, whatsound, whrandom, zmod + +- The following modules were removed: regsub, reconvert, regex, regex_syntax. + +- re and sre were swapped, so help(re) provides full help. importing sre is deprecated. The undocumented re.engine variable no longer exists. - Bug #1448490: Fixed a bug that ISO-2022 codecs could not handle diff --git a/Misc/cheatsheet b/Misc/cheatsheet index d50ed2e..ce02a53 100644 --- a/Misc/cheatsheet +++ b/Misc/cheatsheet @@ -1956,8 +1956,6 @@ quopri Conversions to/from quoted-printable transport encoding. rand Don't use unless you want compatibility with C's rand(). random Random variable generators re Regular Expressions. -reconvert Convert old ("regex") regular expressions to new syntax - ("re"). repr Redo repr() but with limits on most sizes. rexec Restricted execution facilities ("safe" exec, eval, etc). rfc822 RFC-822 message manipulation class. @@ -2035,7 +2033,6 @@ zipfile Read & write PK zipped files. array Obj efficiently representing arrays of basic values math Math functions of C standard time Time-related functions (also the newer datetime module) - regex Regular expression matching operations marshal Read and write some python values in binary format struct Convert between python values and C structs diff --git a/Modules/regexmodule.c b/Modules/regexmodule.c deleted file mode 100644 index 2fb4198..0000000 --- a/Modules/regexmodule.c +++ /dev/null @@ -1,690 +0,0 @@ -/* -XXX support range parameter on search -XXX support mstop parameter on search -*/ - - -/* Regular expression objects */ -/* This uses Tatu Ylonen's copyleft-free reimplementation of - GNU regular expressions */ - -#include "Python.h" - -#include <ctype.h> - -#include "regexpr.h" - -static PyObject *RegexError; /* Exception */ - -typedef struct { - PyObject_HEAD - struct re_pattern_buffer re_patbuf; /* The compiled expression */ - struct re_registers re_regs; /* The registers from the last match */ - char re_fastmap[256]; /* Storage for fastmap */ - PyObject *re_translate; /* String object for translate table */ - PyObject *re_lastok; /* String object last matched/searched */ - PyObject *re_groupindex; /* Group name to index dictionary */ - PyObject *re_givenpat; /* Pattern with symbolic groups */ - PyObject *re_realpat; /* Pattern without symbolic groups */ -} regexobject; - -/* Regex object methods */ - -static void -reg_dealloc(regexobject *re) -{ - if (re->re_patbuf.buffer) - free(re->re_patbuf.buffer); - Py_XDECREF(re->re_translate); - Py_XDECREF(re->re_lastok); - Py_XDECREF(re->re_groupindex); - Py_XDECREF(re->re_givenpat); - Py_XDECREF(re->re_realpat); - PyObject_Del(re); -} - -static PyObject * -makeresult(struct re_registers *regs) -{ - PyObject *v; - int i; - static PyObject *filler = NULL; - - if (filler == NULL) { - filler = Py_BuildValue("(ii)", -1, -1); - if (filler == NULL) - return NULL; - } - v = PyTuple_New(RE_NREGS); - if (v == NULL) - return NULL; - - for (i = 0; i < RE_NREGS; i++) { - int lo = regs->start[i]; - int hi = regs->end[i]; - PyObject *w; - if (lo == -1 && hi == -1) { - w = filler; - Py_INCREF(w); - } - else - w = Py_BuildValue("(ii)", lo, hi); - if (w == NULL || PyTuple_SetItem(v, i, w) < 0) { - Py_DECREF(v); - return NULL; - } - } - return v; -} - -static PyObject * -regobj_match(regexobject *re, PyObject *args) -{ - PyObject *argstring; - char *buffer; - int size; - int offset = 0; - int result; - - if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset)) - return NULL; - if (!PyArg_Parse(argstring, "t#", &buffer, &size)) - return NULL; - - if (offset < 0 || offset > size) { - PyErr_SetString(RegexError, "match offset out of range"); - return NULL; - } - Py_XDECREF(re->re_lastok); - re->re_lastok = NULL; - result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset, - &re->re_regs); - if (result < -1) { - /* Serious failure of some sort; if re_match didn't - set an exception, raise a generic error */ - if (!PyErr_Occurred()) - PyErr_SetString(RegexError, "match failure"); - return NULL; - } - if (result >= 0) { - Py_INCREF(argstring); - re->re_lastok = argstring; - } - return PyInt_FromLong((long)result); /* Length of the match or -1 */ -} - -static PyObject * -regobj_search(regexobject *re, PyObject *args) -{ - PyObject *argstring; - char *buffer; - int size; - int offset = 0; - int range; - int result; - - if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset)) - return NULL; - if (!PyArg_Parse(argstring, "t#:search", &buffer, &size)) - return NULL; - - if (offset < 0 || offset > size) { - PyErr_SetString(RegexError, "search offset out of range"); - return NULL; - } - /* NB: In Emacs 18.57, the documentation for re_search[_2] and - the implementation don't match: the documentation states that - |range| positions are tried, while the code tries |range|+1 - positions. It seems more productive to believe the code! */ - range = size - offset; - Py_XDECREF(re->re_lastok); - re->re_lastok = NULL; - result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range, - &re->re_regs); - if (result < -1) { - /* Serious failure of some sort; if re_match didn't - set an exception, raise a generic error */ - if (!PyErr_Occurred()) - PyErr_SetString(RegexError, "match failure"); - return NULL; - } - if (result >= 0) { - Py_INCREF(argstring); - re->re_lastok = argstring; - } - return PyInt_FromLong((long)result); /* Position of the match or -1 */ -} - -/* get the group from the regex where index can be a string (group name) or - an integer index [0 .. 99] - */ -static PyObject* -group_from_index(regexobject *re, PyObject *index) -{ - int i, a, b; - char *v; - - if (PyString_Check(index)) - if (re->re_groupindex == NULL || - !(index = PyDict_GetItem(re->re_groupindex, index))) - { - PyErr_SetString(RegexError, - "group() group name doesn't exist"); - return NULL; - } - - i = PyInt_AsLong(index); - if (i == -1 && PyErr_Occurred()) - return NULL; - - if (i < 0 || i >= RE_NREGS) { - PyErr_SetString(RegexError, "group() index out of range"); - return NULL; - } - if (re->re_lastok == NULL) { - PyErr_SetString(RegexError, - "group() only valid after successful match/search"); - return NULL; - } - a = re->re_regs.start[i]; - b = re->re_regs.end[i]; - if (a < 0 || b < 0) { - Py_INCREF(Py_None); - return Py_None; - } - - if (!(v = PyString_AsString(re->re_lastok))) - return NULL; - - return PyString_FromStringAndSize(v+a, b-a); -} - - -static PyObject * -regobj_group(regexobject *re, PyObject *args) -{ - int n = PyTuple_Size(args); - int i; - PyObject *res = NULL; - - if (n < 0) - return NULL; - if (n == 0) { - PyErr_SetString(PyExc_TypeError, "not enough arguments"); - return NULL; - } - if (n == 1) { - /* return value is a single string */ - PyObject *index = PyTuple_GetItem(args, 0); - if (!index) - return NULL; - - return group_from_index(re, index); - } - - /* return value is a tuple */ - if (!(res = PyTuple_New(n))) - return NULL; - - for (i = 0; i < n; i++) { - PyObject *index = PyTuple_GetItem(args, i); - PyObject *group = NULL; - - if (!index) - goto finally; - if (!(group = group_from_index(re, index))) - goto finally; - if (PyTuple_SetItem(res, i, group) < 0) - goto finally; - } - return res; - - finally: - Py_DECREF(res); - return NULL; -} - - -static struct PyMethodDef reg_methods[] = { - {"match", (PyCFunction)regobj_match, METH_VARARGS}, - {"search", (PyCFunction)regobj_search, METH_VARARGS}, - {"group", (PyCFunction)regobj_group, METH_VARARGS}, - {NULL, NULL} /* sentinel */ -}; - - - -static char* members[] = { - "last", "regs", "translate", - "groupindex", "realpat", "givenpat", - NULL -}; - - -static PyObject * -regobj_getattr(regexobject *re, char *name) -{ - if (strcmp(name, "regs") == 0) { - if (re->re_lastok == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - return makeresult(&re->re_regs); - } - if (strcmp(name, "last") == 0) { - if (re->re_lastok == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - Py_INCREF(re->re_lastok); - return re->re_lastok; - } - if (strcmp(name, "translate") == 0) { - if (re->re_translate == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - Py_INCREF(re->re_translate); - return re->re_translate; - } - if (strcmp(name, "groupindex") == 0) { - if (re->re_groupindex == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - Py_INCREF(re->re_groupindex); - return re->re_groupindex; - } - if (strcmp(name, "realpat") == 0) { - if (re->re_realpat == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - Py_INCREF(re->re_realpat); - return re->re_realpat; - } - if (strcmp(name, "givenpat") == 0) { - if (re->re_givenpat == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - Py_INCREF(re->re_givenpat); - return re->re_givenpat; - } - if (strcmp(name, "__members__") == 0) { - int i = 0; - PyObject *list = NULL; - - /* okay, so it's unlikely this list will change that often. - still, it's easier to change it in just one place. - */ - while (members[i]) - i++; - if (!(list = PyList_New(i))) - return NULL; - - i = 0; - while (members[i]) { - PyObject* v = PyString_FromString(members[i]); - if (!v || PyList_SetItem(list, i, v) < 0) { - Py_DECREF(list); - return NULL; - } - i++; - } - return list; - } - return Py_FindMethod(reg_methods, (PyObject *)re, name); -} - -static PyTypeObject Regextype = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "regex.regex", /*tp_name*/ - sizeof(regexobject), /*tp_size*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)reg_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - (getattrfunc)regobj_getattr, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ -}; - -/* reference counting invariants: - pattern: borrowed - translate: borrowed - givenpat: borrowed - groupindex: transferred -*/ -static PyObject * -newregexobject(PyObject *pattern, PyObject *translate, PyObject *givenpat, PyObject *groupindex) -{ - regexobject *re; - char *pat; - int size; - - if (!PyArg_Parse(pattern, "t#", &pat, &size)) - return NULL; - - if (translate != NULL && PyString_Size(translate) != 256) { - PyErr_SetString(RegexError, - "translation table must be 256 bytes"); - return NULL; - } - re = PyObject_New(regexobject, &Regextype); - if (re != NULL) { - char *error; - re->re_patbuf.buffer = NULL; - re->re_patbuf.allocated = 0; - re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap; - if (translate) { - re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate); - if (!re->re_patbuf.translate) - goto finally; - Py_INCREF(translate); - } - else - re->re_patbuf.translate = NULL; - re->re_translate = translate; - re->re_lastok = NULL; - re->re_groupindex = groupindex; - Py_INCREF(pattern); - re->re_realpat = pattern; - Py_INCREF(givenpat); - re->re_givenpat = givenpat; - error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf); - if (error != NULL) { - PyErr_SetString(RegexError, error); - goto finally; - } - } - return (PyObject *)re; - finally: - Py_DECREF(re); - return NULL; -} - -static PyObject * -regex_compile(PyObject *self, PyObject *args) -{ - PyObject *pat = NULL; - PyObject *tran = NULL; - - if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran)) - return NULL; - return newregexobject(pat, tran, pat, NULL); -} - -static PyObject * -symcomp(PyObject *pattern, PyObject *gdict) -{ - char *opat, *oend, *o, *n, *g, *v; - int group_count = 0; - int sz; - int escaped = 0; - char name_buf[128]; - PyObject *npattern; - int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1; - - if (!(opat = PyString_AsString(pattern))) - return NULL; - - if ((sz = PyString_Size(pattern)) < 0) - return NULL; - - oend = opat + sz; - o = opat; - - if (oend == opat) { - Py_INCREF(pattern); - return pattern; - } - - if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) || - !(n = PyString_AsString(npattern))) - return NULL; - - while (o < oend) { - if (*o == '(' && escaped == require_escape) { - char *backtrack; - escaped = 0; - ++group_count; - *n++ = *o; - if (++o >= oend || *o != '<') - continue; - /* *o == '<' */ - if (o+1 < oend && *(o+1) == '>') - continue; - backtrack = o; - g = name_buf; - for (++o; o < oend;) { - if (*o == '>') { - PyObject *group_name = NULL; - PyObject *group_index = NULL; - *g++ = '\0'; - group_name = PyString_FromString(name_buf); - group_index = PyInt_FromLong(group_count); - if (group_name == NULL || - group_index == NULL || - PyDict_SetItem(gdict, group_name, - group_index) != 0) - { - Py_XDECREF(group_name); - Py_XDECREF(group_index); - Py_XDECREF(npattern); - return NULL; - } - Py_DECREF(group_name); - Py_DECREF(group_index); - ++o; /* eat the '>' */ - break; - } - if (!isalnum(Py_CHARMASK(*o)) && *o != '_') { - o = backtrack; - break; - } - *g++ = *o++; - } - } - else if (*o == '[' && !escaped) { - *n++ = *o; - ++o; /* eat the char following '[' */ - *n++ = *o; - while (o < oend && *o != ']') { - ++o; - *n++ = *o; - } - if (o < oend) - ++o; - } - else if (*o == '\\') { - escaped = 1; - *n++ = *o; - ++o; - } - else { - escaped = 0; - *n++ = *o; - ++o; - } - } - - if (!(v = PyString_AsString(npattern))) { - Py_DECREF(npattern); - return NULL; - } - /* _PyString_Resize() decrements npattern on failure */ - _PyString_Resize(&npattern, n - v); - return npattern; - -} - -static PyObject * -regex_symcomp(PyObject *self, PyObject *args) -{ - PyObject *pattern; - PyObject *tran = NULL; - PyObject *gdict = NULL; - PyObject *npattern; - PyObject *retval = NULL; - - if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran)) - return NULL; - - gdict = PyDict_New(); - if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) { - Py_XDECREF(gdict); - return NULL; - } - retval = newregexobject(npattern, tran, pattern, gdict); - Py_DECREF(npattern); - return retval; -} - - -static PyObject *cache_pat; -static PyObject *cache_prog; - -static int -update_cache(PyObject *pat) -{ - PyObject *tuple = PyTuple_Pack(1, pat); - int status = 0; - - if (!tuple) - return -1; - - if (pat != cache_pat) { - Py_XDECREF(cache_pat); - cache_pat = NULL; - Py_XDECREF(cache_prog); - cache_prog = regex_compile((PyObject *)NULL, tuple); - if (cache_prog == NULL) { - status = -1; - goto finally; - } - cache_pat = pat; - Py_INCREF(cache_pat); - } - finally: - Py_DECREF(tuple); - return status; -} - -static PyObject * -regex_match(PyObject *self, PyObject *args) -{ - PyObject *pat, *string; - PyObject *tuple, *v; - - if (!PyArg_ParseTuple(args, "SS:match", &pat, &string)) - return NULL; - if (update_cache(pat) < 0) - return NULL; - - if (!(tuple = Py_BuildValue("(S)", string))) - return NULL; - v = regobj_match((regexobject *)cache_prog, tuple); - Py_DECREF(tuple); - return v; -} - -static PyObject * -regex_search(PyObject *self, PyObject *args) -{ - PyObject *pat, *string; - PyObject *tuple, *v; - - if (!PyArg_ParseTuple(args, "SS:search", &pat, &string)) - return NULL; - if (update_cache(pat) < 0) - return NULL; - - if (!(tuple = Py_BuildValue("(S)", string))) - return NULL; - v = regobj_search((regexobject *)cache_prog, tuple); - Py_DECREF(tuple); - return v; -} - -static PyObject * -regex_set_syntax(PyObject *self, PyObject *args) -{ - int syntax; - if (!PyArg_ParseTuple(args, "i:set_syntax", &syntax)) - return NULL; - syntax = re_set_syntax(syntax); - /* wipe the global pattern cache */ - Py_XDECREF(cache_pat); - cache_pat = NULL; - Py_XDECREF(cache_prog); - cache_prog = NULL; - return PyInt_FromLong((long)syntax); -} - -static PyObject * -regex_get_syntax(PyObject *self) -{ - return PyInt_FromLong((long)re_syntax); -} - - -static struct PyMethodDef regex_global_methods[] = { - {"compile", regex_compile, METH_VARARGS}, - {"symcomp", regex_symcomp, METH_VARARGS}, - {"match", regex_match, METH_VARARGS}, - {"search", regex_search, METH_VARARGS}, - {"set_syntax", regex_set_syntax, METH_VARARGS}, - {"get_syntax", (PyCFunction)regex_get_syntax, METH_NOARGS}, - {NULL, NULL} /* sentinel */ -}; - -PyMODINIT_FUNC -initregex(void) -{ - PyObject *m, *d, *v; - int i; - char *s; - - /* Initialize object type */ - Regextype.ob_type = &PyType_Type; - - m = Py_InitModule("regex", regex_global_methods); - if (m == NULL) - return; - d = PyModule_GetDict(m); - - if (PyErr_Warn(PyExc_DeprecationWarning, - "the regex module is deprecated; " - "please use the re module") < 0) - return; - - /* Initialize regex.error exception */ - v = RegexError = PyErr_NewException("regex.error", NULL, NULL); - if (v == NULL || PyDict_SetItemString(d, "error", v) != 0) - goto finally; - - /* Initialize regex.casefold constant */ - if (!(v = PyString_FromStringAndSize((char *)NULL, 256))) - goto finally; - - if (!(s = PyString_AsString(v))) - goto finally; - - for (i = 0; i < 256; i++) { - if (isupper(i)) - s[i] = tolower(i); - else - s[i] = i; - } - if (PyDict_SetItemString(d, "casefold", v) < 0) - goto finally; - Py_DECREF(v); - - if (!PyErr_Occurred()) - return; - finally: - /* Nothing */ ; -} diff --git a/Modules/regexpr.c b/Modules/regexpr.c deleted file mode 100644 index e6a5417..0000000 --- a/Modules/regexpr.c +++ /dev/null @@ -1,2094 +0,0 @@ -/* regexpr.c - * - * Author: Tatu Ylonen <ylo@ngs.fi> - * - * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland - * - * Permission to use, copy, modify, distribute, and sell this software - * and its documentation for any purpose is hereby granted without - * fee, provided that the above copyright notice appear in all copies. - * This software is provided "as is" without express or implied - * warranty. - * - * Created: Thu Sep 26 17:14:05 1991 ylo - * Last modified: Mon Nov 4 17:06:48 1991 ylo - * Ported to Think C: 19 Jan 1992 guido@cwi.nl - * - * This code draws many ideas from the regular expression packages by - * Henry Spencer of the University of Toronto and Richard Stallman of - * the Free Software Foundation. - * - * Emacs-specific code and syntax table code is almost directly borrowed - * from GNU regexp. - * - * Bugs fixed and lots of reorganization by Jeffrey C. Ollie, April - * 1997 Thanks for bug reports and ideas from Andrew Kuchling, Tim - * Peters, Guido van Rossum, Ka-Ping Yee, Sjoerd Mullender, and - * probably one or two others that I'm forgetting. - * - * $Id$ */ - -#include "Python.h" -#include "regexpr.h" - -/* The original code blithely assumed that sizeof(short) == 2. Not - * always true. Original instances of "(short)x" were replaced by - * SHORT(x), where SHORT is #defined below. */ - -#define SHORT(x) ((x) & 0x8000 ? (x) - 0x10000 : (x)) - -/* The stack implementation is taken from an idea by Andrew Kuchling. - * It's a doubly linked list of arrays. The advantages of this over a - * simple linked list are that the number of mallocs required are - * reduced. It also makes it possible to statically allocate enough - * space so that small patterns don't ever need to call malloc. - * - * The advantages over a single array is that is periodically - * realloced when more space is needed is that we avoid ever copying - * the stack. */ - -/* item_t is the basic stack element. Defined as a union of - * structures so that both registers, failure points, and counters can - * be pushed/popped from the stack. There's nothing built into the - * item to keep track of whether a certain stack item is a register, a - * failure point, or a counter. */ - -typedef union item_t -{ - struct - { - int num; - int level; - unsigned char *start; - unsigned char *end; - } reg; - struct - { - int count; - int level; - int phantom; - unsigned char *code; - unsigned char *text; - } fail; - struct - { - int num; - int level; - int count; - } cntr; -} item_t; - -#define STACK_PAGE_SIZE 256 -#define NUM_REGISTERS 256 - -/* A 'page' of stack items. */ - -typedef struct item_page_t -{ - item_t items[STACK_PAGE_SIZE]; - struct item_page_t *prev; - struct item_page_t *next; -} item_page_t; - - -typedef struct match_state -{ - /* The number of registers that have been pushed onto the stack - * since the last failure point. */ - - int count; - - /* Used to control when registers need to be pushed onto the - * stack. */ - - int level; - - /* The number of failure points on the stack. */ - - int point; - - /* Storage for the registers. Each register consists of two - * pointers to characters. So register N is represented as - * start[N] and end[N]. The pointers must be converted to - * offsets from the beginning of the string before returning the - * registers to the calling program. */ - - unsigned char *start[NUM_REGISTERS]; - unsigned char *end[NUM_REGISTERS]; - - /* Keeps track of whether a register has changed recently. */ - - int changed[NUM_REGISTERS]; - - /* Structure to encapsulate the stack. */ - struct - { - /* index into the current page. If index == 0 and you need - * to pop an item, move to the previous page and set index - * = STACK_PAGE_SIZE - 1. Otherwise decrement index to - * push a page. If index == STACK_PAGE_SIZE and you need - * to push a page move to the next page and set index = - * 0. If there is no new next page, allocate a new page - * and link it in. Otherwise, increment index to push a - * page. */ - - int index; - item_page_t *current; /* Pointer to the current page. */ - item_page_t first; /* First page is statically allocated. */ - } stack; -} match_state; - -/* Initialize a state object */ - -/* #define NEW_STATE(state) \ */ -/* memset(&state, 0, (void *)(&state.stack) - (void *)(&state)); \ */ -/* state.stack.current = &state.stack.first; \ */ -/* state.stack.first.prev = NULL; \ */ -/* state.stack.first.next = NULL; \ */ -/* state.stack.index = 0; \ */ -/* state.level = 1 */ - -#define NEW_STATE(state, nregs) \ -{ \ - int i; \ - for (i = 0; i < nregs; i++) \ - { \ - state.start[i] = NULL; \ - state.end[i] = NULL; \ - state.changed[i] = 0; \ - } \ - state.stack.current = &state.stack.first; \ - state.stack.first.prev = NULL; \ - state.stack.first.next = NULL; \ - state.stack.index = 0; \ - state.level = 1; \ - state.count = 0; \ - state.level = 0; \ - state.point = 0; \ -} - -/* Free any memory that might have been malloc'd */ - -#define FREE_STATE(state) \ -while(state.stack.first.next != NULL) \ -{ \ - state.stack.current = state.stack.first.next; \ - state.stack.first.next = state.stack.current->next; \ - free(state.stack.current); \ -} - -/* Discard the top 'count' stack items. */ - -#define STACK_DISCARD(stack, count, on_error) \ -stack.index -= count; \ -while (stack.index < 0) \ -{ \ - if (stack.current->prev == NULL) \ - on_error; \ - stack.current = stack.current->prev; \ - stack.index += STACK_PAGE_SIZE; \ -} - -/* Store a pointer to the previous item on the stack. Used to pop an - * item off of the stack. */ - -#define STACK_PREV(stack, top, on_error) \ -if (stack.index == 0) \ -{ \ - if (stack.current->prev == NULL) \ - on_error; \ - stack.current = stack.current->prev; \ - stack.index = STACK_PAGE_SIZE - 1; \ -} \ -else \ -{ \ - stack.index--; \ -} \ -top = &(stack.current->items[stack.index]) - -/* Store a pointer to the next item on the stack. Used to push an item - * on to the stack. */ - -#define STACK_NEXT(stack, top, on_error) \ -if (stack.index == STACK_PAGE_SIZE) \ -{ \ - if (stack.current->next == NULL) \ - { \ - stack.current->next = (item_page_t *)malloc(sizeof(item_page_t)); \ - if (stack.current->next == NULL) \ - on_error; \ - stack.current->next->prev = stack.current; \ - stack.current->next->next = NULL; \ - } \ - stack.current = stack.current->next; \ - stack.index = 0; \ -} \ -top = &(stack.current->items[stack.index++]) - -/* Store a pointer to the item that is 'count' items back in the - * stack. STACK_BACK(stack, top, 1, on_error) is equivalent to - * STACK_TOP(stack, top, on_error). */ - -#define STACK_BACK(stack, top, count, on_error) \ -{ \ - int index; \ - item_page_t *current; \ - current = stack.current; \ - index = stack.index - (count); \ - while (index < 0) \ - { \ - if (current->prev == NULL) \ - on_error; \ - current = current->prev; \ - index += STACK_PAGE_SIZE; \ - } \ - top = &(current->items[index]); \ -} - -/* Store a pointer to the top item on the stack. Execute the - * 'on_error' code if there are no items on the stack. */ - -#define STACK_TOP(stack, top, on_error) \ -if (stack.index == 0) \ -{ \ - if (stack.current->prev == NULL) \ - on_error; \ - top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \ -} \ -else \ -{ \ - top = &(stack.current->items[stack.index - 1]); \ -} - -/* Test to see if the stack is empty */ - -#define STACK_EMPTY(stack) ((stack.index == 0) && \ - (stack.current->prev == NULL)) - -/* Return the start of register 'reg' */ - -#define GET_REG_START(state, reg) (state.start[reg]) - -/* Return the end of register 'reg' */ - -#define GET_REG_END(state, reg) (state.end[reg]) - -/* Set the start of register 'reg'. If the state of the register needs - * saving, push it on the stack. */ - -#define SET_REG_START(state, reg, text, on_error) \ -if(state.changed[reg] < state.level) \ -{ \ - item_t *item; \ - STACK_NEXT(state.stack, item, on_error); \ - item->reg.num = reg; \ - item->reg.start = state.start[reg]; \ - item->reg.end = state.end[reg]; \ - item->reg.level = state.changed[reg]; \ - state.changed[reg] = state.level; \ - state.count++; \ -} \ -state.start[reg] = text - -/* Set the end of register 'reg'. If the state of the register needs - * saving, push it on the stack. */ - -#define SET_REG_END(state, reg, text, on_error) \ -if(state.changed[reg] < state.level) \ -{ \ - item_t *item; \ - STACK_NEXT(state.stack, item, on_error); \ - item->reg.num = reg; \ - item->reg.start = state.start[reg]; \ - item->reg.end = state.end[reg]; \ - item->reg.level = state.changed[reg]; \ - state.changed[reg] = state.level; \ - state.count++; \ -} \ -state.end[reg] = text - -#define PUSH_FAILURE(state, xcode, xtext, on_error) \ -{ \ - item_t *item; \ - STACK_NEXT(state.stack, item, on_error); \ - item->fail.code = xcode; \ - item->fail.text = xtext; \ - item->fail.count = state.count; \ - item->fail.level = state.level; \ - item->fail.phantom = 0; \ - state.count = 0; \ - state.level++; \ - state.point++; \ -} - -/* Update the last failure point with a new position in the text. */ - -#define UPDATE_FAILURE(state, xtext, on_error) \ -{ \ - item_t *item; \ - STACK_BACK(state.stack, item, state.count + 1, on_error); \ - if (!item->fail.phantom) \ - { \ - item_t *item2; \ - STACK_NEXT(state.stack, item2, on_error); \ - item2->fail.code = item->fail.code; \ - item2->fail.text = xtext; \ - item2->fail.count = state.count; \ - item2->fail.level = state.level; \ - item2->fail.phantom = 1; \ - state.count = 0; \ - state.level++; \ - state.point++; \ - } \ - else \ - { \ - STACK_DISCARD(state.stack, state.count, on_error); \ - STACK_TOP(state.stack, item, on_error); \ - item->fail.text = xtext; \ - state.count = 0; \ - state.level++; \ - } \ -} - -#define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \ -{ \ - item_t *item; \ - do \ - { \ - while(state.count > 0) \ - { \ - STACK_PREV(state.stack, item, on_error); \ - state.start[item->reg.num] = item->reg.start; \ - state.end[item->reg.num] = item->reg.end; \ - state.changed[item->reg.num] = item->reg.level; \ - state.count--; \ - } \ - STACK_PREV(state.stack, item, on_empty); \ - xcode = item->fail.code; \ - xtext = item->fail.text; \ - state.count = item->fail.count; \ - state.level = item->fail.level; \ - state.point--; \ - } \ - while (item->fail.text == NULL); \ -} - -enum regexp_compiled_ops /* opcodes for compiled regexp */ -{ - Cend, /* end of pattern reached */ - Cbol, /* beginning of line */ - Ceol, /* end of line */ - Cset, /* character set. Followed by 32 bytes of set. */ - Cexact, /* followed by a byte to match */ - Canychar, /* matches any character except newline */ - Cstart_memory, /* set register start addr (followed by reg number) */ - Cend_memory, /* set register end addr (followed by reg number) */ - Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/ - Cjump, /* followed by two bytes (lsb,msb) of displacement. */ - Cstar_jump, /* will change to jump/update_failure_jump at runtime */ - Cfailure_jump, /* jump to addr on failure */ - Cupdate_failure_jump, /* update topmost failure point and jump */ - Cdummy_failure_jump, /* push a dummy failure point and jump */ - Cbegbuf, /* match at beginning of buffer */ - Cendbuf, /* match at end of buffer */ - Cwordbeg, /* match at beginning of word */ - Cwordend, /* match at end of word */ - Cwordbound, /* match if at word boundary */ - Cnotwordbound, /* match if not at word boundary */ - Csyntaxspec, /* matches syntax code (1 byte follows) */ - Cnotsyntaxspec, /* matches if syntax code does not match (1 byte follows) */ - Crepeat1 -}; - -enum regexp_syntax_op /* syntax codes for plain and quoted characters */ -{ - Rend, /* special code for end of regexp */ - Rnormal, /* normal character */ - Ranychar, /* any character except newline */ - Rquote, /* the quote character */ - Rbol, /* match beginning of line */ - Reol, /* match end of line */ - Roptional, /* match preceding expression optionally */ - Rstar, /* match preceding expr zero or more times */ - Rplus, /* match preceding expr one or more times */ - Ror, /* match either of alternatives */ - Ropenpar, /* opening parenthesis */ - Rclosepar, /* closing parenthesis */ - Rmemory, /* match memory register */ - Rextended_memory, /* \vnn to match registers 10-99 */ - Ropenset, /* open set. Internal syntax hard-coded below. */ - /* the following are gnu extensions to "normal" regexp syntax */ - Rbegbuf, /* beginning of buffer */ - Rendbuf, /* end of buffer */ - Rwordchar, /* word character */ - Rnotwordchar, /* not word character */ - Rwordbeg, /* beginning of word */ - Rwordend, /* end of word */ - Rwordbound, /* word bound */ - Rnotwordbound, /* not word bound */ - Rnum_ops -}; - -static int re_compile_initialized = 0; -static int regexp_syntax = 0; -int re_syntax = 0; /* Exported copy of regexp_syntax */ -static unsigned char regexp_plain_ops[256]; -static unsigned char regexp_quoted_ops[256]; -static unsigned char regexp_precedences[Rnum_ops]; -static int regexp_context_indep_ops; -static int regexp_ansi_sequences; - -#define NUM_LEVELS 5 /* number of precedence levels in use */ -#define MAX_NESTING 100 /* max nesting level of operators */ - -#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)] - -unsigned char re_syntax_table[256]; - -void re_compile_initialize(void) -{ - int a; - - static int syntax_table_inited = 0; - - if (!syntax_table_inited) - { - syntax_table_inited = 1; - memset(re_syntax_table, 0, 256); - for (a = 'a'; a <= 'z'; a++) - re_syntax_table[a] = Sword; - for (a = 'A'; a <= 'Z'; a++) - re_syntax_table[a] = Sword; - for (a = '0'; a <= '9'; a++) - re_syntax_table[a] = Sword | Sdigit | Shexdigit; - for (a = '0'; a <= '7'; a++) - re_syntax_table[a] |= Soctaldigit; - for (a = 'A'; a <= 'F'; a++) - re_syntax_table[a] |= Shexdigit; - for (a = 'a'; a <= 'f'; a++) - re_syntax_table[a] |= Shexdigit; - re_syntax_table['_'] = Sword; - for (a = 9; a <= 13; a++) - re_syntax_table[a] = Swhitespace; - re_syntax_table[' '] = Swhitespace; - } - re_compile_initialized = 1; - for (a = 0; a < 256; a++) - { - regexp_plain_ops[a] = Rnormal; - regexp_quoted_ops[a] = Rnormal; - } - for (a = '0'; a <= '9'; a++) - regexp_quoted_ops[a] = Rmemory; - regexp_plain_ops['\134'] = Rquote; - if (regexp_syntax & RE_NO_BK_PARENS) - { - regexp_plain_ops['('] = Ropenpar; - regexp_plain_ops[')'] = Rclosepar; - } - else - { - regexp_quoted_ops['('] = Ropenpar; - regexp_quoted_ops[')'] = Rclosepar; - } - if (regexp_syntax & RE_NO_BK_VBAR) - regexp_plain_ops['\174'] = Ror; - else - regexp_quoted_ops['\174'] = Ror; - regexp_plain_ops['*'] = Rstar; - if (regexp_syntax & RE_BK_PLUS_QM) - { - regexp_quoted_ops['+'] = Rplus; - regexp_quoted_ops['?'] = Roptional; - } - else - { - regexp_plain_ops['+'] = Rplus; - regexp_plain_ops['?'] = Roptional; - } - if (regexp_syntax & RE_NEWLINE_OR) - regexp_plain_ops['\n'] = Ror; - regexp_plain_ops['\133'] = Ropenset; - regexp_plain_ops['\136'] = Rbol; - regexp_plain_ops['$'] = Reol; - regexp_plain_ops['.'] = Ranychar; - if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS)) - { - regexp_quoted_ops['w'] = Rwordchar; - regexp_quoted_ops['W'] = Rnotwordchar; - regexp_quoted_ops['<'] = Rwordbeg; - regexp_quoted_ops['>'] = Rwordend; - regexp_quoted_ops['b'] = Rwordbound; - regexp_quoted_ops['B'] = Rnotwordbound; - regexp_quoted_ops['`'] = Rbegbuf; - regexp_quoted_ops['\''] = Rendbuf; - } - if (regexp_syntax & RE_ANSI_HEX) - regexp_quoted_ops['v'] = Rextended_memory; - for (a = 0; a < Rnum_ops; a++) - regexp_precedences[a] = 4; - if (regexp_syntax & RE_TIGHT_VBAR) - { - regexp_precedences[Ror] = 3; - regexp_precedences[Rbol] = 2; - regexp_precedences[Reol] = 2; - } - else - { - regexp_precedences[Ror] = 2; - regexp_precedences[Rbol] = 3; - regexp_precedences[Reol] = 3; - } - regexp_precedences[Rclosepar] = 1; - regexp_precedences[Rend] = 0; - regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0; - regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0; -} - -int re_set_syntax(int syntax) -{ - int ret; - - ret = regexp_syntax; - regexp_syntax = syntax; - re_syntax = syntax; /* Exported copy */ - re_compile_initialize(); - return ret; -} - -static int hex_char_to_decimal(int ch) -{ - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'a' && ch <= 'f') - return ch - 'a' + 10; - if (ch >= 'A' && ch <= 'F') - return ch - 'A' + 10; - return 16; -} - -static void re_compile_fastmap_aux(unsigned char *code, int pos, - unsigned char *visited, - unsigned char *can_be_null, - unsigned char *fastmap) -{ - int a; - int b; - int syntaxcode; - - if (visited[pos]) - return; /* we have already been here */ - visited[pos] = 1; - for (;;) - switch (code[pos++]) { - case Cend: - { - *can_be_null = 1; - return; - } - case Cbol: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: - { - for (a = 0; a < 256; a++) - fastmap[a] = 1; - break; - } - case Csyntaxspec: - { - syntaxcode = code[pos++]; - for (a = 0; a < 256; a++) - if (SYNTAX(a) & syntaxcode) - fastmap[a] = 1; - return; - } - case Cnotsyntaxspec: - { - syntaxcode = code[pos++]; - for (a = 0; a < 256; a++) - if (!(SYNTAX(a) & syntaxcode) ) - fastmap[a] = 1; - return; - } - case Ceol: - { - fastmap['\n'] = 1; - if (*can_be_null == 0) - *can_be_null = 2; /* can match null, but only at end of buffer*/ - return; - } - case Cset: - { - for (a = 0; a < 256/8; a++) - if (code[pos + a] != 0) - for (b = 0; b < 8; b++) - if (code[pos + a] & (1 << b)) - fastmap[(a << 3) + b] = 1; - pos += 256/8; - return; - } - case Cexact: - { - fastmap[(unsigned char)code[pos]] = 1; - return; - } - case Canychar: - { - for (a = 0; a < 256; a++) - if (a != '\n') - fastmap[a] = 1; - return; - } - case Cstart_memory: - case Cend_memory: - { - pos++; - break; - } - case Cmatch_memory: - { - for (a = 0; a < 256; a++) - fastmap[a] = 1; - *can_be_null = 1; - return; - } - case Cjump: - case Cdummy_failure_jump: - case Cupdate_failure_jump: - case Cstar_jump: - { - a = (unsigned char)code[pos++]; - a |= (unsigned char)code[pos++] << 8; - pos += (int)SHORT(a); - if (visited[pos]) - { - /* argh... the regexp contains empty loops. This is not - good, as this may cause a failure stack overflow when - matching. Oh well. */ - /* this path leads nowhere; pursue other paths. */ - return; - } - visited[pos] = 1; - break; - } - case Cfailure_jump: - { - a = (unsigned char)code[pos++]; - a |= (unsigned char)code[pos++] << 8; - a = pos + (int)SHORT(a); - re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap); - break; - } - case Crepeat1: - { - pos += 2; - break; - } - default: - { - PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?"); - return; - /*NOTREACHED*/ - } - } -} - -static int re_do_compile_fastmap(unsigned char *buffer, int used, int pos, - unsigned char *can_be_null, - unsigned char *fastmap) -{ - unsigned char small_visited[512], *visited; - - if (used <= sizeof(small_visited)) - visited = small_visited; - else - { - visited = malloc(used); - if (!visited) - return 0; - } - *can_be_null = 0; - memset(fastmap, 0, 256); - memset(visited, 0, used); - re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap); - if (visited != small_visited) - free(visited); - return 1; -} - -void re_compile_fastmap(regexp_t bufp) -{ - if (!bufp->fastmap || bufp->fastmap_accurate) - return; - assert(bufp->used > 0); - if (!re_do_compile_fastmap(bufp->buffer, - bufp->used, - 0, - &bufp->can_be_null, - bufp->fastmap)) - return; - if (PyErr_Occurred()) return; - if (bufp->buffer[0] == Cbol) - bufp->anchor = 1; /* begline */ - else - if (bufp->buffer[0] == Cbegbuf) - bufp->anchor = 2; /* begbuf */ - else - bufp->anchor = 0; /* none */ - bufp->fastmap_accurate = 1; -} - -/* - * star is coded as: - * 1: failure_jump 2 - * ... code for operand of star - * star_jump 1 - * 2: ... code after star - * - * We change the star_jump to update_failure_jump if we can determine - * that it is safe to do so; otherwise we change it to an ordinary - * jump. - * - * plus is coded as - * - * jump 2 - * 1: failure_jump 3 - * 2: ... code for operand of plus - * star_jump 1 - * 3: ... code after plus - * - * For star_jump considerations this is processed identically to star. - * - */ - -static int re_optimize_star_jump(regexp_t bufp, unsigned char *code) -{ - unsigned char map[256]; - unsigned char can_be_null; - unsigned char *p1; - unsigned char *p2; - unsigned char ch; - int a; - int b; - int num_instructions = 0; - - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)SHORT(a); - - p1 = code + a + 3; /* skip the failure_jump */ - /* Check that the jump is within the pattern */ - if (p1<bufp->buffer || bufp->buffer+bufp->used<p1) - { - PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (failure_jump opt)"); - return 0; - } - - assert(p1[-3] == Cfailure_jump); - p2 = code; - /* p1 points inside loop, p2 points to after loop */ - if (!re_do_compile_fastmap(bufp->buffer, bufp->used, - (int)(p2 - bufp->buffer), - &can_be_null, map)) - goto make_normal_jump; - - /* If we might introduce a new update point inside the - * loop, we can't optimize because then update_jump would - * update a wrong failure point. Thus we have to be - * quite careful here. - */ - - /* loop until we find something that consumes a character */ - loop_p1: - num_instructions++; - switch (*p1++) - { - case Cbol: - case Ceol: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: - { - goto loop_p1; - } - case Cstart_memory: - case Cend_memory: - { - p1++; - goto loop_p1; - } - case Cexact: - { - ch = (unsigned char)*p1++; - if (map[(int)ch]) - goto make_normal_jump; - break; - } - case Canychar: - { - for (b = 0; b < 256; b++) - if (b != '\n' && map[b]) - goto make_normal_jump; - break; - } - case Cset: - { - for (b = 0; b < 256; b++) - if ((p1[b >> 3] & (1 << (b & 7))) && map[b]) - goto make_normal_jump; - p1 += 256/8; - break; - } - default: - { - goto make_normal_jump; - } - } - /* now we know that we can't backtrack. */ - while (p1 != p2 - 3) - { - num_instructions++; - switch (*p1++) - { - case Cend: - { - return 0; - } - case Cbol: - case Ceol: - case Canychar: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: - { - break; - } - case Cset: - { - p1 += 256/8; - break; - } - case Cexact: - case Cstart_memory: - case Cend_memory: - case Cmatch_memory: - case Csyntaxspec: - case Cnotsyntaxspec: - { - p1++; - break; - } - case Cjump: - case Cstar_jump: - case Cfailure_jump: - case Cupdate_failure_jump: - case Cdummy_failure_jump: - { - goto make_normal_jump; - } - default: - { - return 0; - } - } - } - - /* make_update_jump: */ - code -= 3; - a += 3; /* jump to after the Cfailure_jump */ - code[0] = Cupdate_failure_jump; - code[1] = a & 0xff; - code[2] = a >> 8; - if (num_instructions > 1) - return 1; - assert(num_instructions == 1); - /* if the only instruction matches a single character, we can do - * better */ - p1 = code + 3 + a; /* start of sole instruction */ - if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar || - *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec) - code[0] = Crepeat1; - return 1; - - make_normal_jump: - code -= 3; - *code = Cjump; - return 1; -} - -static int re_optimize(regexp_t bufp) -{ - unsigned char *code; - - code = bufp->buffer; - - while(1) - { - switch (*code++) - { - case Cend: - { - return 1; - } - case Canychar: - case Cbol: - case Ceol: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: - { - break; - } - case Cset: - { - code += 256/8; - break; - } - case Cexact: - case Cstart_memory: - case Cend_memory: - case Cmatch_memory: - case Csyntaxspec: - case Cnotsyntaxspec: - { - code++; - break; - } - case Cstar_jump: - { - if (!re_optimize_star_jump(bufp, code)) - { - return 0; - } - /* fall through */ - } - case Cupdate_failure_jump: - case Cjump: - case Cdummy_failure_jump: - case Cfailure_jump: - case Crepeat1: - { - code += 2; - break; - } - default: - { - return 0; - } - } - } -} - -#define NEXTCHAR(var) \ -{ \ - if (pos >= size) \ - goto ends_prematurely; \ - (var) = regex[pos]; \ - pos++; \ -} - -#define ALLOC(amount) \ -{ \ - if (pattern_offset+(amount) > alloc) \ - { \ - alloc += 256 + (amount); \ - pattern = realloc(pattern, alloc); \ - if (!pattern) \ - goto out_of_memory; \ - } \ -} - -#define STORE(ch) pattern[pattern_offset++] = (ch) - -#define CURRENT_LEVEL_START (starts[starts_base + current_level]) - -#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset - -#define PUSH_LEVEL_STARTS \ -if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \ - starts_base += NUM_LEVELS; \ -else \ - goto too_complex \ - -#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS - -#define PUT_ADDR(offset,addr) \ -{ \ - int disp = (addr) - (offset) - 2; \ - pattern[(offset)] = disp & 0xff; \ - pattern[(offset)+1] = (disp>>8) & 0xff; \ -} - -#define INSERT_JUMP(pos,type,addr) \ -{ \ - int a, p = (pos), t = (type), ad = (addr); \ - for (a = pattern_offset - 1; a >= p; a--) \ - pattern[a + 3] = pattern[a]; \ - pattern[p] = t; \ - PUT_ADDR(p+1,ad); \ - pattern_offset += 3; \ -} - -#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7)) - -#define SET_FIELDS \ -{ \ - bufp->allocated = alloc; \ - bufp->buffer = pattern; \ - bufp->used = pattern_offset; \ -} - -#define GETHEX(var) \ -{ \ - unsigned char gethex_ch, gethex_value; \ - NEXTCHAR(gethex_ch); \ - gethex_value = hex_char_to_decimal(gethex_ch); \ - if (gethex_value == 16) \ - goto hex_error; \ - NEXTCHAR(gethex_ch); \ - gethex_ch = hex_char_to_decimal(gethex_ch); \ - if (gethex_ch == 16) \ - goto hex_error; \ - (var) = gethex_value * 16 + gethex_ch; \ -} - -#define ANSI_TRANSLATE(ch) \ -{ \ - switch (ch) \ - { \ - case 'a': \ - case 'A': \ - { \ - ch = 7; /* audible bell */ \ - break; \ - } \ - case 'b': \ - case 'B': \ - { \ - ch = 8; /* backspace */ \ - break; \ - } \ - case 'f': \ - case 'F': \ - { \ - ch = 12; /* form feed */ \ - break; \ - } \ - case 'n': \ - case 'N': \ - { \ - ch = 10; /* line feed */ \ - break; \ - } \ - case 'r': \ - case 'R': \ - { \ - ch = 13; /* carriage return */ \ - break; \ - } \ - case 't': \ - case 'T': \ - { \ - ch = 9; /* tab */ \ - break; \ - } \ - case 'v': \ - case 'V': \ - { \ - ch = 11; /* vertical tab */ \ - break; \ - } \ - case 'x': /* hex code */ \ - case 'X': \ - { \ - GETHEX(ch); \ - break; \ - } \ - default: \ - { \ - /* other characters passed through */ \ - if (translate) \ - ch = translate[(unsigned char)ch]; \ - break; \ - } \ - } \ -} - -char *re_compile_pattern(unsigned char *regex, int size, regexp_t bufp) -{ - int a; - int pos; - int op; - int current_level; - int level; - int opcode; - int pattern_offset = 0, alloc; - int starts[NUM_LEVELS * MAX_NESTING]; - int starts_base; - int future_jumps[MAX_NESTING]; - int num_jumps; - unsigned char ch = '\0'; - unsigned char *pattern; - unsigned char *translate; - int next_register; - int paren_depth; - int num_open_registers; - int open_registers[RE_NREGS]; - int beginning_context; - - if (!re_compile_initialized) - re_compile_initialize(); - bufp->used = 0; - bufp->fastmap_accurate = 0; - bufp->uses_registers = 1; - bufp->num_registers = 1; - translate = bufp->translate; - pattern = bufp->buffer; - alloc = bufp->allocated; - if (alloc == 0 || pattern == NULL) - { - alloc = 256; - pattern = malloc(alloc); - if (!pattern) - goto out_of_memory; - } - pattern_offset = 0; - starts_base = 0; - num_jumps = 0; - current_level = 0; - SET_LEVEL_START; - num_open_registers = 0; - next_register = 1; - paren_depth = 0; - beginning_context = 1; - op = -1; - /* we use Rend dummy to ensure that pending jumps are updated - (due to low priority of Rend) before exiting the loop. */ - pos = 0; - while (op != Rend) - { - if (pos >= size) - op = Rend; - else - { - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - op = regexp_plain_ops[(unsigned char)ch]; - if (op == Rquote) - { - NEXTCHAR(ch); - op = regexp_quoted_ops[(unsigned char)ch]; - if (op == Rnormal && regexp_ansi_sequences) - ANSI_TRANSLATE(ch); - } - } - level = regexp_precedences[op]; - /* printf("ch='%c' op=%d level=%d current_level=%d - curlevstart=%d\n", ch, op, level, current_level, - CURRENT_LEVEL_START); */ - if (level > current_level) - { - for (current_level++; current_level < level; current_level++) - SET_LEVEL_START; - SET_LEVEL_START; - } - else - if (level < current_level) - { - current_level = level; - for (;num_jumps > 0 && - future_jumps[num_jumps-1] >= CURRENT_LEVEL_START; - num_jumps--) - PUT_ADDR(future_jumps[num_jumps-1], pattern_offset); - } - switch (op) - { - case Rend: - { - break; - } - case Rnormal: - { - normal_char: - opcode = Cexact; - store_opcode_and_arg: /* opcode & ch must be set */ - SET_LEVEL_START; - ALLOC(2); - STORE(opcode); - STORE(ch); - break; - } - case Ranychar: - { - opcode = Canychar; - store_opcode: - SET_LEVEL_START; - ALLOC(1); - STORE(opcode); - break; - } - case Rquote: - { - Py_FatalError("Rquote"); - /*NOTREACHED*/ - } - case Rbol: - { - if (!beginning_context) { - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; - } - opcode = Cbol; - goto store_opcode; - } - case Reol: - { - if (!((pos >= size) || - ((regexp_syntax & RE_NO_BK_VBAR) ? - (regex[pos] == '\174') : - (pos+1 < size && regex[pos] == '\134' && - regex[pos+1] == '\174')) || - ((regexp_syntax & RE_NO_BK_PARENS)? - (regex[pos] == ')'): - (pos+1 < size && regex[pos] == '\134' && - regex[pos+1] == ')')))) { - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; - } - opcode = Ceol; - goto store_opcode; - /* NOTREACHED */ - break; - } - case Roptional: - { - if (beginning_context) { - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; - } - if (CURRENT_LEVEL_START == pattern_offset) - break; /* ignore empty patterns for ? */ - ALLOC(3); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 3); - break; - } - case Rstar: - case Rplus: - { - if (beginning_context) { - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; - } - if (CURRENT_LEVEL_START == pattern_offset) - break; /* ignore empty patterns for + and * */ - ALLOC(9); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 6); - INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START); - if (op == Rplus) /* jump over initial failure_jump */ - INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump, - CURRENT_LEVEL_START + 6); - break; - } - case Ror: - { - ALLOC(6); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 6); - if (num_jumps >= MAX_NESTING) - goto too_complex; - STORE(Cjump); - future_jumps[num_jumps++] = pattern_offset; - STORE(0); - STORE(0); - SET_LEVEL_START; - break; - } - case Ropenpar: - { - SET_LEVEL_START; - if (next_register < RE_NREGS) - { - bufp->uses_registers = 1; - ALLOC(2); - STORE(Cstart_memory); - STORE(next_register); - open_registers[num_open_registers++] = next_register; - bufp->num_registers++; - next_register++; - } - paren_depth++; - PUSH_LEVEL_STARTS; - current_level = 0; - SET_LEVEL_START; - break; - } - case Rclosepar: - { - if (paren_depth <= 0) - goto parenthesis_error; - POP_LEVEL_STARTS; - current_level = regexp_precedences[Ropenpar]; - paren_depth--; - if (paren_depth < num_open_registers) - { - bufp->uses_registers = 1; - ALLOC(2); - STORE(Cend_memory); - num_open_registers--; - STORE(open_registers[num_open_registers]); - } - break; - } - case Rmemory: - { - if (ch == '0') - goto bad_match_register; - assert(ch >= '0' && ch <= '9'); - bufp->uses_registers = 1; - opcode = Cmatch_memory; - ch -= '0'; - goto store_opcode_and_arg; - } - case Rextended_memory: - { - NEXTCHAR(ch); - if (ch < '0' || ch > '9') - goto bad_match_register; - NEXTCHAR(a); - if (a < '0' || a > '9') - goto bad_match_register; - ch = 10 * (a - '0') + ch - '0'; - if (ch == 0 || ch >= RE_NREGS) - goto bad_match_register; - bufp->uses_registers = 1; - opcode = Cmatch_memory; - goto store_opcode_and_arg; - } - case Ropenset: - { - int complement; - int prev; - int offset; - int range; - int firstchar; - - SET_LEVEL_START; - ALLOC(1+256/8); - STORE(Cset); - offset = pattern_offset; - for (a = 0; a < 256/8; a++) - STORE(0); - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - if (ch == '\136') - { - complement = 1; - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - } - else - complement = 0; - prev = -1; - range = 0; - firstchar = 1; - while (ch != '\135' || firstchar) - { - firstchar = 0; - if (regexp_ansi_sequences && ch == '\134') - { - NEXTCHAR(ch); - ANSI_TRANSLATE(ch); - } - if (range) - { - for (a = prev; a <= (int)ch; a++) - SETBIT(pattern, offset, a); - prev = -1; - range = 0; - } - else - if (prev != -1 && ch == '-') - range = 1; - else - { - SETBIT(pattern, offset, ch); - prev = ch; - } - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - } - if (range) - SETBIT(pattern, offset, '-'); - if (complement) - { - for (a = 0; a < 256/8; a++) - pattern[offset+a] ^= 0xff; - } - break; - } - case Rbegbuf: - { - opcode = Cbegbuf; - goto store_opcode; - } - case Rendbuf: - { - opcode = Cendbuf; - goto store_opcode; - } - case Rwordchar: - { - opcode = Csyntaxspec; - ch = Sword; - goto store_opcode_and_arg; - } - case Rnotwordchar: - { - opcode = Cnotsyntaxspec; - ch = Sword; - goto store_opcode_and_arg; - } - case Rwordbeg: - { - opcode = Cwordbeg; - goto store_opcode; - } - case Rwordend: - { - opcode = Cwordend; - goto store_opcode; - } - case Rwordbound: - { - opcode = Cwordbound; - goto store_opcode; - } - case Rnotwordbound: - { - opcode = Cnotwordbound; - goto store_opcode; - } - default: - { - abort(); - } - } - beginning_context = (op == Ropenpar || op == Ror); - } - if (starts_base != 0) - goto parenthesis_error; - assert(num_jumps == 0); - ALLOC(1); - STORE(Cend); - SET_FIELDS; - if(!re_optimize(bufp)) - return "Optimization error"; - return NULL; - - op_error: - SET_FIELDS; - return "Badly placed special character"; - - bad_match_register: - SET_FIELDS; - return "Bad match register number"; - - hex_error: - SET_FIELDS; - return "Bad hexadecimal number"; - - parenthesis_error: - SET_FIELDS; - return "Badly placed parenthesis"; - - out_of_memory: - SET_FIELDS; - return "Out of memory"; - - ends_prematurely: - SET_FIELDS; - return "Regular expression ends prematurely"; - - too_complex: - SET_FIELDS; - return "Regular expression too complex"; -} - -#undef CHARAT -#undef NEXTCHAR -#undef GETHEX -#undef ALLOC -#undef STORE -#undef CURRENT_LEVEL_START -#undef SET_LEVEL_START -#undef PUSH_LEVEL_STARTS -#undef POP_LEVEL_STARTS -#undef PUT_ADDR -#undef INSERT_JUMP -#undef SETBIT -#undef SET_FIELDS - -#define PREFETCH if (text == textend) goto fail - -#define NEXTCHAR(var) \ -PREFETCH; \ -var = (unsigned char)*text++; \ -if (translate) \ - var = translate[var] - -int re_match(regexp_t bufp, unsigned char *string, int size, int pos, - regexp_registers_t old_regs) -{ - unsigned char *code; - unsigned char *translate; - unsigned char *text; - unsigned char *textstart; - unsigned char *textend; - int a; - int b; - int ch; - int reg; - int match_end; - unsigned char *regstart; - unsigned char *regend; - int regsize; - match_state state; - - assert(pos >= 0 && size >= 0); - assert(pos <= size); - - text = string + pos; - textstart = string; - textend = string + size; - - code = bufp->buffer; - - translate = bufp->translate; - - NEW_STATE(state, bufp->num_registers); - - continue_matching: - switch (*code++) - { - case Cend: - { - match_end = text - textstart; - if (old_regs) - { - old_regs->start[0] = pos; - old_regs->end[0] = match_end; - if (!bufp->uses_registers) - { - for (a = 1; a < RE_NREGS; a++) - { - old_regs->start[a] = -1; - old_regs->end[a] = -1; - } - } - else - { - for (a = 1; a < bufp->num_registers; a++) - { - if ((GET_REG_START(state, a) == NULL) || - (GET_REG_END(state, a) == NULL)) - { - old_regs->start[a] = -1; - old_regs->end[a] = -1; - continue; - } - old_regs->start[a] = GET_REG_START(state, a) - textstart; - old_regs->end[a] = GET_REG_END(state, a) - textstart; - } - for (; a < RE_NREGS; a++) - { - old_regs->start[a] = -1; - old_regs->end[a] = -1; - } - } - } - FREE_STATE(state); - return match_end - pos; - } - case Cbol: - { - if (text == textstart || text[-1] == '\n') - goto continue_matching; - goto fail; - } - case Ceol: - { - if (text == textend || *text == '\n') - goto continue_matching; - goto fail; - } - case Cset: - { - NEXTCHAR(ch); - if (code[ch/8] & (1<<(ch & 7))) - { - code += 256/8; - goto continue_matching; - } - goto fail; - } - case Cexact: - { - NEXTCHAR(ch); - if (ch != (unsigned char)*code++) - goto fail; - goto continue_matching; - } - case Canychar: - { - NEXTCHAR(ch); - if (ch == '\n') - goto fail; - goto continue_matching; - } - case Cstart_memory: - { - reg = *code++; - SET_REG_START(state, reg, text, goto error); - goto continue_matching; - } - case Cend_memory: - { - reg = *code++; - SET_REG_END(state, reg, text, goto error); - goto continue_matching; - } - case Cmatch_memory: - { - reg = *code++; - regstart = GET_REG_START(state, reg); - regend = GET_REG_END(state, reg); - if ((regstart == NULL) || (regend == NULL)) - goto fail; /* or should we just match nothing? */ - regsize = regend - regstart; - - if (regsize > (textend - text)) - goto fail; - if(translate) - { - for (; regstart < regend; regstart++, text++) - if (translate[*regstart] != translate[*text]) - goto fail; - } - else - for (; regstart < regend; regstart++, text++) - if (*regstart != *text) - goto fail; - goto continue_matching; - } - case Cupdate_failure_jump: - { - UPDATE_FAILURE(state, text, goto error); - /* fall to next case */ - } - /* treat Cstar_jump just like Cjump if it hasn't been optimized */ - case Cstar_jump: - case Cjump: - { - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - code += (int)SHORT(a); - if (code<bufp->buffer || bufp->buffer+bufp->used<code) { - PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cjump)"); - FREE_STATE(state); - return -2; - } - goto continue_matching; - } - case Cdummy_failure_jump: - { - unsigned char *failuredest; - - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)SHORT(a); - assert(*code == Cfailure_jump); - b = (unsigned char)code[1]; - b |= (unsigned char)code[2] << 8; - failuredest = code + (int)SHORT(b) + 3; - if (failuredest<bufp->buffer || bufp->buffer+bufp->used < failuredest) { - PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump failuredest)"); - FREE_STATE(state); - return -2; - } - PUSH_FAILURE(state, failuredest, NULL, goto error); - code += a; - if (code<bufp->buffer || bufp->buffer+bufp->used < code) { - PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump code)"); - FREE_STATE(state); - return -2; - } - goto continue_matching; - } - case Cfailure_jump: - { - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)SHORT(a); - if (code+a<bufp->buffer || bufp->buffer+bufp->used < code+a) { - PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cfailure_jump)"); - FREE_STATE(state); - return -2; - } - PUSH_FAILURE(state, code + a, text, goto error); - goto continue_matching; - } - case Crepeat1: - { - unsigned char *pinst; - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)SHORT(a); - pinst = code + a; - if (pinst<bufp->buffer || bufp->buffer+bufp->used<pinst) { - PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Crepeat1)"); - FREE_STATE(state); - return -2; - } - /* pinst is sole instruction in loop, and it matches a - * single character. Since Crepeat1 was originally a - * Cupdate_failure_jump, we also know that backtracking - * is useless: so long as the single-character - * expression matches, it must be used. Also, in the - * case of +, we've already matched one character, so + - * can't fail: nothing here can cause a failure. */ - switch (*pinst++) - { - case Cset: - { - if (translate) - { - while (text < textend) - { - ch = translate[(unsigned char)*text]; - if (pinst[ch/8] & (1<<(ch & 7))) - text++; - else - break; - } - } - else - { - while (text < textend) - { - ch = (unsigned char)*text; - if (pinst[ch/8] & (1<<(ch & 7))) - text++; - else - break; - } - } - break; - } - case Cexact: - { - ch = (unsigned char)*pinst; - if (translate) - { - while (text < textend && - translate[(unsigned char)*text] == ch) - text++; - } - else - { - while (text < textend && (unsigned char)*text == ch) - text++; - } - break; - } - case Canychar: - { - while (text < textend && (unsigned char)*text != '\n') - text++; - break; - } - case Csyntaxspec: - { - a = (unsigned char)*pinst; - if (translate) - { - while (text < textend && - (SYNTAX(translate[*text]) & a) ) - text++; - } - else - { - while (text < textend && (SYNTAX(*text) & a) ) - text++; - } - break; - } - case Cnotsyntaxspec: - { - a = (unsigned char)*pinst; - if (translate) - { - while (text < textend && - !(SYNTAX(translate[*text]) & a) ) - text++; - } - else - { - while (text < textend && !(SYNTAX(*text) & a) ) - text++; - } - break; - } - default: - { - FREE_STATE(state); - PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?"); - return -2; - /*NOTREACHED*/ - } - } - /* due to the funky way + and * are compiled, the top - * failure- stack entry at this point is actually a - * success entry -- update it & pop it */ - UPDATE_FAILURE(state, text, goto error); - goto fail; /* i.e., succeed <wink/sigh> */ - } - case Cbegbuf: - { - if (text == textstart) - goto continue_matching; - goto fail; - } - case Cendbuf: - { - if (text == textend) - goto continue_matching; - goto fail; - } - case Cwordbeg: - { - if (text == textend) - goto fail; - if (!(SYNTAX(*text) & Sword)) - goto fail; - if (text == textstart) - goto continue_matching; - if (!(SYNTAX(text[-1]) & Sword)) - goto continue_matching; - goto fail; - } - case Cwordend: - { - if (text == textstart) - goto fail; - if (!(SYNTAX(text[-1]) & Sword)) - goto fail; - if (text == textend) - goto continue_matching; - if (!(SYNTAX(*text) & Sword)) - goto continue_matching; - goto fail; - } - case Cwordbound: - { - /* Note: as in gnu regexp, this also matches at the - * beginning and end of buffer. */ - - if (text == textstart || text == textend) - goto continue_matching; - if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)) - goto continue_matching; - goto fail; - } - case Cnotwordbound: - { - /* Note: as in gnu regexp, this never matches at the - * beginning and end of buffer. */ - if (text == textstart || text == textend) - goto fail; - if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))) - goto continue_matching; - goto fail; - } - case Csyntaxspec: - { - NEXTCHAR(ch); - if (!(SYNTAX(ch) & (unsigned char)*code++)) - goto fail; - goto continue_matching; - } - case Cnotsyntaxspec: - { - NEXTCHAR(ch); - if (SYNTAX(ch) & (unsigned char)*code++) - goto fail; - goto continue_matching; - } - default: - { - FREE_STATE(state); - PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?"); - return -2; - /*NOTREACHED*/ - } - } - - - -#if 0 /* This line is never reached --Guido */ - abort(); -#endif - /* - *NOTREACHED - */ - - /* Using "break;" in the above switch statement is equivalent to "goto fail;" */ - fail: - POP_FAILURE(state, code, text, goto done_matching, goto error); - goto continue_matching; - - done_matching: -/* if(translated != NULL) */ -/* free(translated); */ - FREE_STATE(state); - return -1; - - error: -/* if (translated != NULL) */ -/* free(translated); */ - FREE_STATE(state); - return -2; -} - - -#undef PREFETCH -#undef NEXTCHAR - -int re_search(regexp_t bufp, unsigned char *string, int size, int pos, - int range, regexp_registers_t regs) -{ - unsigned char *fastmap; - unsigned char *translate; - unsigned char *text; - unsigned char *partstart; - unsigned char *partend; - int dir; - int ret; - unsigned char anchor; - - assert(size >= 0 && pos >= 0); - assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */ - - fastmap = bufp->fastmap; - translate = bufp->translate; - if (fastmap && !bufp->fastmap_accurate) { - re_compile_fastmap(bufp); - if (PyErr_Occurred()) return -2; - } - - anchor = bufp->anchor; - if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */ - fastmap = NULL; - - if (range < 0) - { - dir = -1; - range = -range; - } - else - dir = 1; - - if (anchor == 2) { - if (pos != 0) - return -1; - else - range = 0; - } - - for (; range >= 0; range--, pos += dir) - { - if (fastmap) - { - if (dir == 1) - { /* searching forwards */ - - text = string + pos; - partend = string + size; - partstart = text; - if (translate) - while (text != partend && - !fastmap[(unsigned char) translate[(unsigned char)*text]]) - text++; - else - while (text != partend && !fastmap[(unsigned char)*text]) - text++; - pos += text - partstart; - range -= text - partstart; - if (pos == size && bufp->can_be_null == 0) - return -1; - } - else - { /* searching backwards */ - text = string + pos; - partstart = string + pos - range; - partend = text; - if (translate) - while (text != partstart && - !fastmap[(unsigned char) - translate[(unsigned char)*text]]) - text--; - else - while (text != partstart && - !fastmap[(unsigned char)*text]) - text--; - pos -= partend - text; - range -= partend - text; - } - } - if (anchor == 1) - { /* anchored to begline */ - if (pos > 0 && (string[pos - 1] != '\n')) - continue; - } - assert(pos >= 0 && pos <= size); - ret = re_match(bufp, string, size, pos, regs); - if (ret >= 0) - return pos; - if (ret == -2) - return -2; - } - return -1; -} - -/* -** Local Variables: -** mode: c -** c-file-style: "python" -** End: -*/ diff --git a/Modules/regexpr.h b/Modules/regexpr.h deleted file mode 100644 index 2aee62d..0000000 --- a/Modules/regexpr.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * -*- mode: c-mode; c-file-style: python -*- - */ - -#ifndef Py_REGEXPR_H -#define Py_REGEXPR_H -#ifdef __cplusplus -extern "C" { -#endif - -/* - * regexpr.h - * - * Author: Tatu Ylonen <ylo@ngs.fi> - * - * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland - * - * Permission to use, copy, modify, distribute, and sell this software - * and its documentation for any purpose is hereby granted without fee, - * provided that the above copyright notice appear in all copies. This - * software is provided "as is" without express or implied warranty. - * - * Created: Thu Sep 26 17:15:36 1991 ylo - * Last modified: Mon Nov 4 15:49:46 1991 ylo - */ - -/* $Id$ */ - -#ifndef REGEXPR_H -#define REGEXPR_H - -#define RE_NREGS 100 /* number of registers available */ - -typedef struct re_pattern_buffer -{ - unsigned char *buffer; /* compiled pattern */ - int allocated; /* allocated size of compiled pattern */ - int used; /* actual length of compiled pattern */ - unsigned char *fastmap; /* fastmap[ch] is true if ch can start pattern */ - unsigned char *translate; /* translation to apply during compilation/matching */ - unsigned char fastmap_accurate; /* true if fastmap is valid */ - unsigned char can_be_null; /* true if can match empty string */ - unsigned char uses_registers; /* registers are used and need to be initialized */ - int num_registers; /* number of registers used */ - unsigned char anchor; /* anchor: 0=none 1=begline 2=begbuf */ -} *regexp_t; - -typedef struct re_registers -{ - int start[RE_NREGS]; /* start offset of region */ - int end[RE_NREGS]; /* end offset of region */ -} *regexp_registers_t; - -/* bit definitions for syntax */ -#define RE_NO_BK_PARENS 1 /* no quoting for parentheses */ -#define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */ -#define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */ -#define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */ -#define RE_NEWLINE_OR 16 /* treat newline as or */ -#define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */ -#define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */ -#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */ - -/* definitions for some common regexp styles */ -#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS) -#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR) -#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) -#define RE_SYNTAX_EMACS 0 - -#define Sword 1 -#define Swhitespace 2 -#define Sdigit 4 -#define Soctaldigit 8 -#define Shexdigit 16 - -/* Rename all exported symbols to avoid conflicts with similarly named - symbols in some systems' standard C libraries... */ - -#define re_syntax _Py_re_syntax -#define re_syntax_table _Py_re_syntax_table -#define re_compile_initialize _Py_re_compile_initialize -#define re_set_syntax _Py_re_set_syntax -#define re_compile_pattern _Py_re_compile_pattern -#define re_match _Py_re_match -#define re_search _Py_re_search -#define re_compile_fastmap _Py_re_compile_fastmap -#define re_comp _Py_re_comp -#define re_exec _Py_re_exec - -#ifdef HAVE_PROTOTYPES - -extern int re_syntax; -/* This is the actual syntax mask. It was added so that Python could do - * syntax-dependent munging of patterns before compilation. */ - -extern unsigned char re_syntax_table[256]; - -void re_compile_initialize(void); - -int re_set_syntax(int syntax); -/* This sets the syntax to use and returns the previous syntax. The - * syntax is specified by a bit mask of the above defined bits. */ - -char *re_compile_pattern(unsigned char *regex, int regex_size, regexp_t compiled); -/* This compiles the regexp (given in regex and length in regex_size). - * This returns NULL if the regexp compiled successfully, and an error - * message if an error was encountered. The buffer field must be - * initialized to a memory area allocated by malloc (or to NULL) before - * use, and the allocated field must be set to its length (or 0 if - * buffer is NULL). Also, the translate field must be set to point to a - * valid translation table, or NULL if it is not used. */ - -int re_match(regexp_t compiled, unsigned char *string, int size, int pos, - regexp_registers_t old_regs); -/* This tries to match the regexp against the string. This returns the - * length of the matched portion, or -1 if the pattern could not be - * matched and -2 if an error (such as failure stack overflow) is - * encountered. */ - -int re_search(regexp_t compiled, unsigned char *string, int size, int startpos, - int range, regexp_registers_t regs); -/* This searches for a substring matching the regexp. This returns the - * first index at which a match is found. range specifies at how many - * positions to try matching; positive values indicate searching - * forwards, and negative values indicate searching backwards. mstop - * specifies the offset beyond which a match must not go. This returns - * -1 if no match is found, and -2 if an error (such as failure stack - * overflow) is encountered. */ - -void re_compile_fastmap(regexp_t compiled); -/* This computes the fastmap for the regexp. For this to have any effect, - * the calling program must have initialized the fastmap field to point - * to an array of 256 characters. */ - -#else /* HAVE_PROTOTYPES */ - -extern int re_syntax; -extern unsigned char re_syntax_table[256]; -void re_compile_initialize(); -int re_set_syntax(); -char *re_compile_pattern(); -int re_match(); -int re_search(); -void re_compile_fastmap(); - -#endif /* HAVE_PROTOTYPES */ - -#endif /* REGEXPR_H */ - - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_REGEXPR_H */ diff --git a/PC/VC6/pythoncore.dsp b/PC/VC6/pythoncore.dsp index 04a1224..cf3200c 100644 --- a/PC/VC6/pythoncore.dsp +++ b/PC/VC6/pythoncore.dsp @@ -535,14 +535,6 @@ SOURCE=..\..\Objects\rangeobject.c # End Source File
# Begin Source File
-SOURCE=..\..\Modules\regexmodule.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\Modules\regexpr.c
-# End Source File
-# Begin Source File
-
SOURCE=..\..\Modules\rgbimgmodule.c
# End Source File
# Begin Source File
diff --git a/PC/os2emx/Makefile b/PC/os2emx/Makefile index 847fa67..762bfdb 100644 --- a/PC/os2emx/Makefile +++ b/PC/os2emx/Makefile @@ -304,8 +304,6 @@ SRC.MODULES= $(addprefix $(TOP), \ Modules/md5module.c \ Modules/operator.c \ Modules/_randommodule.c \ - Modules/regexmodule.c \ - Modules/regexpr.c \ Modules/rgbimgmodule.c \ Modules/shamodule.c \ Modules/_sre.c \ diff --git a/PC/os2vacpp/makefile b/PC/os2vacpp/makefile index 994ac49..f34047f 100644 --- a/PC/os2vacpp/makefile +++ b/PC/os2vacpp/makefile @@ -948,34 +948,6 @@ readline.obj: $(PY_INCLUDE)\abstract.h $(PY_INCLUDE)\ceval.h $(PY_INCLUDE)\class $(PY_INCLUDE)\sliceobject.h $(PY_INCLUDE)\stringobject.h \ $(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h $(PY_INCLUDE)\tupleobject.h -regexmodule.obj: $(PY_INCLUDE)\abstract.h $(PY_INCLUDE)\ceval.h \ - $(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \ - pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \ - $(PY_INCLUDE)\floatobject.h $(PY_INCLUDE)\funcobject.h $(PY_INCLUDE)\import.h \ - $(PY_INCLUDE)\intobject.h $(PY_INCLUDE)\intrcheck.h $(PY_INCLUDE)\listobject.h \ - $(PY_INCLUDE)\longobject.h $(PY_INCLUDE)\methodobject.h \ - $(PY_INCLUDE)\modsupport.h $(PY_INCLUDE)\moduleobject.h $(PY_INCLUDE)\mymalloc.h \ - $(PY_INCLUDE)\myproto.h $(PY_INCLUDE)\object.h $(PY_INCLUDE)\objimpl.h \ - $(PY_INCLUDE)\pydebug.h $(PY_INCLUDE)\pyerrors.h $(PY_INCLUDE)\pyfpe.h \ - $(PY_INCLUDE)\pystate.h $(PY_INCLUDE)\python.h $(PY_INCLUDE)\pythonrun.h \ - $(PY_INCLUDE)\rangeobject.h $(PY_MODULES)\regexpr.h $(PY_INCLUDE)\sliceobject.h \ - $(PY_INCLUDE)\stringobject.h $(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h \ - $(PY_INCLUDE)\tupleobject.h - -regexpr.obj: $(PY_INCLUDE)\abstract.h $(PY_INCLUDE)\ceval.h \ - $(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \ - pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \ - $(PY_INCLUDE)\floatobject.h $(PY_INCLUDE)\funcobject.h $(PY_INCLUDE)\import.h \ - $(PY_INCLUDE)\intobject.h $(PY_INCLUDE)\intrcheck.h $(PY_INCLUDE)\listobject.h \ - $(PY_INCLUDE)\longobject.h $(PY_INCLUDE)\methodobject.h \ - $(PY_INCLUDE)\modsupport.h $(PY_INCLUDE)\moduleobject.h $(PY_INCLUDE)\mymalloc.h \ - $(PY_INCLUDE)\myproto.h $(PY_INCLUDE)\object.h $(PY_INCLUDE)\objimpl.h \ - $(PY_INCLUDE)\pydebug.h $(PY_INCLUDE)\pyerrors.h $(PY_INCLUDE)\pyfpe.h \ - $(PY_INCLUDE)\pystate.h $(PY_INCLUDE)\python.h $(PY_INCLUDE)\pythonrun.h \ - $(PY_INCLUDE)\rangeobject.h $(PY_MODULES)\regexpr.h $(PY_INCLUDE)\sliceobject.h \ - $(PY_INCLUDE)\stringobject.h $(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h \ - $(PY_INCLUDE)\tupleobject.h - resource.obj: $(PY_INCLUDE)\abstract.h $(OS2TCPIP)\Include\sys\time.h $(PY_INCLUDE)\ceval.h \ $(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \ pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \ diff --git a/PC/os2vacpp/makefile.omk b/PC/os2vacpp/makefile.omk index 0d11b6a..9582338 100644 --- a/PC/os2vacpp/makefile.omk +++ b/PC/os2vacpp/makefile.omk @@ -699,30 +699,6 @@ readline.obj: abstract.h ceval.h classobject.h cobject.h complexobject.h \ pythonrun.h rangeobject.h sliceobject.h stringobject.h sysmodule.h \ traceback.h tupleobject.h -regexmodule.obj: abstract.h ceval.h classobject.h cobject.h complexobject.h \ - pyconfig.h dictobject.h fileobject.h floatobject.h funcobject.h \ - import.h intobject.h intrcheck.h listobject.h longobject.h \ - methodobject.h modsupport.h moduleobject.h mymalloc.h myproto.h \ - object.h objimpl.h pydebug.h pyerrors.h pyfpe.h pystate.h python.h \ - pythonrun.h rangeobject.h regexpr.h sliceobject.h stringobject.h \ - sysmodule.h traceback.h tupleobject.h - -regexpr.obj: abstract.h ceval.h classobject.h cobject.h \ - complexobject.h pyconfig.h dictobject.h fileobject.h floatobject.h \ - funcobject.h import.h intobject.h intrcheck.h listobject.h \ - longobject.h methodobject.h modsupport.h moduleobject.h mymalloc.h \ - myproto.h object.h objimpl.h pydebug.h pyerrors.h pyfpe.h \ - pystate.h python.h pythonrun.h rangeobject.h regexpr.h \ - sliceobject.h stringobject.h sysmodule.h traceback.h tupleobject.h - -reopmodule.obj: abstract.h ceval.h classobject.h cobject.h complexobject.h \ - pyconfig.h dictobject.h fileobject.h floatobject.h funcobject.h \ - import.h intobject.h intrcheck.h listobject.h longobject.h \ - methodobject.h modsupport.h moduleobject.h mymalloc.h myproto.h \ - object.h objimpl.h pydebug.h pyerrors.h pyfpe.h pystate.h python.h \ - pythonrun.h rangeobject.h regexpr.h sliceobject.h stringobject.h \ - sysmodule.h traceback.h tupleobject.h - resource.obj: abstract.h c:\mptn\include\sys\time.h ceval.h classobject.h \ cobject.h complexobject.h pyconfig.h dictobject.h fileobject.h \ floatobject.h funcobject.h import.h intobject.h intrcheck.h \ diff --git a/PC/testpy.py b/PC/testpy.py index f8746a3..78ad63c 100644 --- a/PC/testpy.py +++ b/PC/testpy.py @@ -5,23 +5,23 @@ import sys # change this module too. try: - import string + import os except: - print """Could not import the standard "string" module. + print """Could not import the standard "os" module. Please check your PYTHONPATH environment variable.""" sys.exit(1) try: - import regex_syntax + import symbol except: - print """Could not import the standard "regex_syntax" module. If this is + print """Could not import the standard "symbol" module. If this is a PC, you should add the dos_8x3 directory to your PYTHONPATH.""" sys.exit(1) import os for dir in sys.path: - file = os.path.join(dir, "string.py") + file = os.path.join(dir, "os.py") if os.path.isfile(file): test = os.path.join(dir, "test") if os.path.isdir(test): diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj index a1bb0ed..c4efe2c 100644 --- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -707,12 +707,6 @@ RelativePath="..\Objects\rangeobject.c"> </File> <File - RelativePath="..\Modules\regexmodule.c"> - </File> - <File - RelativePath="..\Modules\regexpr.c"> - </File> - <File RelativePath="..\Modules\rgbimgmodule.c"> </File> <File diff --git a/RISCOS/Makefile b/RISCOS/Makefile index 92f5272..1788b5c 100644 --- a/RISCOS/Makefile +++ b/RISCOS/Makefile @@ -74,7 +74,6 @@ MODULES_DYNAMIC =\ @.^.Lib.md5/pyd\ @.^.Lib.operator/pyd\ @.^.Lib.parser/pyd\ - @.^.Lib.regex/pyd\ @.^.Lib.rgbimg/pyd\ @.^.Lib.sha/pyd\ @.^.Lib.signal/pyd\ @@ -284,10 +283,6 @@ $(LIB_PYTHON): $(OBJECTS) @.^.Lib.parser/pyd: @.^.Modules.o.parsermodule s.linktab $(MAKEDLK) -d @.^.Lib.parser/pyd -s s.linktab -o @.^.Modules.o.parsermodule -e initparser -@.^.Lib.regex/pyd: @.^.Modules.o.regexmodule @.^.Modules.o.regexpr s.linktab - $(LINK) -aof -o @.^.Modules.o.regexlink @.^.Modules.o.regexmodule @.^.Modules.o.regexpr - $(MAKEDLK) -d @.^.Lib.regex/pyd -s s.linktab -o @.^.Modules.o.regexlink -e initregex - @.^.Lib.rgbimg/pyd: @.^.Modules.o.rgbimgmodule s.linktab $(MAKEDLK) -d @.^.Lib.rgbimg/pyd -s s.linktab -o @.^.Modules.o.rgbimgmodule -e initrgbimg diff --git a/Tools/scripts/classfix.py b/Tools/scripts/classfix.py index cdf006a..d30700f 100755 --- a/Tools/scripts/classfix.py +++ b/Tools/scripts/classfix.py @@ -30,7 +30,7 @@ # into a program for a different change to Python programs... import sys -import regex +import re import os from stat import * @@ -53,7 +53,7 @@ def main(): if fix(arg): bad = 1 sys.exit(bad) -ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$') +ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$') def ispython(name): return ispythonprog.match(name) >= 0 @@ -148,12 +148,12 @@ def fix(filename): # This expression doesn't catch *all* class definition headers, # but it's pretty darn close. -classexpr = '^\([ \t]*class +[a-zA-Z0-9_]+\) *( *) *\(\(=.*\)?\):' -classprog = regex.compile(classexpr) +classexpr = '^([ \t]*class +[a-zA-Z0-9_]+) *( *) *((=.*)?):' +classprog = re.compile(classexpr) # Expressions for finding base class expressions. -baseexpr = '^ *\(.*\) *( *) *$' -baseprog = regex.compile(baseexpr) +baseexpr = '^ *(.*) *( *) *$' +baseprog = re.compile(baseexpr) def fixline(line): if classprog.match(line) < 0: # No 'class' keyword -- no change diff --git a/Tools/scripts/fixcid.py b/Tools/scripts/fixcid.py index 42aa835..433a425 100755 --- a/Tools/scripts/fixcid.py +++ b/Tools/scripts/fixcid.py @@ -35,7 +35,7 @@ # files. import sys -import regex +import re import os from stat import * import getopt @@ -90,7 +90,7 @@ def main(): # Change this regular expression to select a different set of files Wanted = '^[a-zA-Z0-9_]+\.[ch]$' def wanted(name): - return regex.match(Wanted, name) >= 0 + return re.match(Wanted, name) >= 0 def recursedown(dirname): dbg('recursedown(%r)\n' % (dirname,)) @@ -212,12 +212,12 @@ Number = Floatnumber + '\|' + Intnumber # Anything else is an operator -- don't list this explicitly because of '/*' OutsideComment = (Identifier, Number, String, Char, CommentStart) -OutsideCommentPattern = '\(' + '\|'.join(OutsideComment) + '\)' -OutsideCommentProgram = regex.compile(OutsideCommentPattern) +OutsideCommentPattern = '(' + '|'.join(OutsideComment) + ')' +OutsideCommentProgram = re.compile(OutsideCommentPattern) InsideComment = (Identifier, Number, CommentEnd) -InsideCommentPattern = '\(' + '\|'.join(InsideComment) + '\)' -InsideCommentProgram = regex.compile(InsideCommentPattern) +InsideCommentPattern = '(' + '|'.join(InsideComment) + ')' +InsideCommentProgram = re.compile(InsideCommentPattern) def initfixline(): global Program diff --git a/Tools/scripts/ifdef.py b/Tools/scripts/ifdef.py index 7e7b5cc..2ed7a66 100755 --- a/Tools/scripts/ifdef.py +++ b/Tools/scripts/ifdef.py @@ -27,7 +27,6 @@ # preprocessor commands. import sys -import regex import getopt defs = [] diff --git a/Tools/scripts/methfix.py b/Tools/scripts/methfix.py index a872ab7..b81871f 100755 --- a/Tools/scripts/methfix.py +++ b/Tools/scripts/methfix.py @@ -27,7 +27,7 @@ # into a program for a different change to Python programs... import sys -import regex +import re import os from stat import * @@ -50,7 +50,7 @@ def main(): if fix(arg): bad = 1 sys.exit(bad) -ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$') +ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$') def ispython(name): return ispythonprog.match(name) >= 0 @@ -101,7 +101,7 @@ def fix(filename): if lineno == 1 and g is None and line[:2] == '#!': # Check for non-Python scripts words = line[2:].split() - if words and regex.search('[pP]ython', words[0]) < 0: + if words and re.search('[pP]ython', words[0]) < 0: msg = filename + ': ' + words[0] msg = msg + ' script; not fixed\n' err(msg) @@ -158,8 +158,8 @@ def fix(filename): return 0 -fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *\(( *\(.*\) *)\) *) *:' -fixprog = regex.compile(fixpat) +fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *(( *(.*) *)) *) *:' +fixprog = re.compile(fixpat) def fixline(line): if fixprog.match(line) >= 0: diff --git a/Tools/scripts/objgraph.py b/Tools/scripts/objgraph.py index 01060f9..f74c2b6 100755 --- a/Tools/scripts/objgraph.py +++ b/Tools/scripts/objgraph.py @@ -22,7 +22,7 @@ import sys import os import getopt -import regex +import re # Types of symbols. # @@ -32,7 +32,7 @@ ignore = 'Nntrgdsbavuc' # Regular expression to parse "nm -o" output. # -matcher = regex.compile('\(.*\):\t?........ \(.\) \(.*\)$') +matcher = re.compile('(.*):\t?........ (.) (.*)$') # Store "item" in "dict" under "key". # The dictionary maps keys to lists of items. diff --git a/Tools/scripts/pathfix.py b/Tools/scripts/pathfix.py index 5cb5add..7f6f191 100755 --- a/Tools/scripts/pathfix.py +++ b/Tools/scripts/pathfix.py @@ -20,7 +20,7 @@ # into a program for a different change to Python programs... import sys -import regex +import re import os from stat import * import getopt @@ -59,7 +59,7 @@ def main(): if fix(arg): bad = 1 sys.exit(bad) -ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$') +ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$') def ispython(name): return ispythonprog.match(name) >= 0 diff --git a/Tools/scripts/pdeps.py b/Tools/scripts/pdeps.py index e835f84..da63e35 100755 --- a/Tools/scripts/pdeps.py +++ b/Tools/scripts/pdeps.py @@ -21,7 +21,7 @@ import sys -import regex +import re import os @@ -57,8 +57,8 @@ def main(): # Compiled regular expressions to search for import statements # -m_import = regex.compile('^[ \t]*from[ \t]+\([^ \t]+\)[ \t]+') -m_from = regex.compile('^[ \t]*import[ \t]+\([^#]+\)') +m_import = re.compile('^[ \t]*from[ \t]+([^ \t]+)[ \t]+') +m_from = re.compile('^[ \t]*import[ \t]+([^#]+)') # Collect data from one file @@ -326,8 +326,6 @@ class PyBuildExt(build_ext): # # Some modules that are normally always on: - exts.append( Extension('regex', ['regexmodule.c', 'regexpr.c']) ) - exts.append( Extension('_weakref', ['_weakref.c']) ) # array objects |