summaryrefslogtreecommitdiffstats
path: root/Doc/libre.tex
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>1998-01-12 19:16:24 (GMT)
committerFred Drake <fdrake@acm.org>1998-01-12 19:16:24 (GMT)
commit023f87fbf80a25cac104b6019a37af388ab7a523 (patch)
treed302b602f98c5f359fdf74311ef2de2d48d30350 /Doc/libre.tex
parent97546399c72d8d144f07b408b4b6fa5b6edb4c1d (diff)
downloadcpython-023f87fbf80a25cac104b6019a37af388ab7a523.zip
cpython-023f87fbf80a25cac104b6019a37af388ab7a523.tar.gz
cpython-023f87fbf80a25cac104b6019a37af388ab7a523.tar.bz2
Lots of minor markup nits, all consistency related.
Consistently use trailing "()" on function / method names. Wrapped some long lines.
Diffstat (limited to 'Doc/libre.tex')
-rw-r--r--Doc/libre.tex163
1 files changed, 88 insertions, 75 deletions
diff --git a/Doc/libre.tex b/Doc/libre.tex
index e692e7e..7644c7e 100644
--- a/Doc/libre.tex
+++ b/Doc/libre.tex
@@ -113,8 +113,9 @@ backslash as an escape sequence in string literals; if the escape
sequence isn't recognized by Python's parser, the backslash and
subsequent character are included in the resulting string. However,
if Python would recognize the resulting sequence, the backslash should
-be repeated twice. This is complicated and hard to understand, so
-it's highly recommended that you use raw strings for all but the simplest expressions.
+be repeated twice. This is complicated and hard to understand, so
+it's highly recommended that you use raw strings for all but the
+simplest expressions.
%
\item[\code{[]}] Used to indicate a set of characters. Characters can
be listed individually, or a range of characters can be indicated by
@@ -149,12 +150,13 @@ class: \code{[(] [)]}.
determines what the meaning and further syntax of the construct is.
Following are the currently supported extensions.
%
-\item[\code{(?iLmsx)}] (One or more letters from the set 'i', 'L', 'm', 's',
-'x'.) The group matches the empty string; the letters set the
-corresponding flags (re.I, re.L, re.M, re.S, re.X) for the entire regular
-expression. This is useful if you wish include the flags as part of
-the regular expression, instead of passing a \var{flag} argument to
-the \code{compile} function.
+\item[\code{(?iLmsx)}] (One or more letters from the set '\code{i}',
+'\code{L}', '\code{m}', '\code{s}', '\code{x}'.) The group matches
+the empty string; the letters set the corresponding flags
+(\code{re.I}, \code{re.L}, \code{re.M}, \code{re.S}, \code{re.X}) for
+the entire regular expression. This is useful if you wish include the
+flags as part of the regular expression, instead of passing a
+\var{flag} argument to the \code{compile()} function.
%
\item[\code{(?:...)}] A non-grouping version of regular parentheses.
Matches whatever's inside the parentheses, but the text matched by the
@@ -171,19 +173,24 @@ referenced as the numbered group 1.
For example, if the pattern is
\code{(?P<id>[a-zA-Z_]\e w*)}, the group can be referenced by its
name in arguments to methods of match objects, such as \code{m.group('id')}
-or \code{m.end('id')}, and also by name in pattern text (e.g. \code{(?P=id)}) and
-replacement text (e.g. \code{\e g<id>}).
+or \code{m.end('id')}, and also by name in pattern text
+(e.g. \code{(?P=id)}) and replacement text (e.g. \code{\e g<id>}).
%
-\item[\code{(?P=\var{name})}] Matches whatever text was matched by the earlier group named \var{name}.
+\item[\code{(?P=\var{name})}] Matches whatever text was matched by the
+earlier group named \var{name}.
%
-\item[\code{(?\#...)}] A comment; the contents of the parentheses are simply ignored.
+\item[\code{(?\#...)}] A comment; the contents of the parentheses are
+simply ignored.
%
-\item[\code{(?=...)}] Matches if \code{...} matches next, but doesn't consume any of the string. This is called a lookahead assertion. For example,
-\code{Isaac (?=Asimov)} will match 'Isaac~' only if it's followed by 'Asimov'.
+\item[\code{(?=...)}] Matches if \code{...} matches next, but doesn't
+consume any of the string. This is called a lookahead assertion. For
+example, \code{Isaac (?=Asimov)} will match 'Isaac~' only if it's
+followed by 'Asimov'.
%
-\item[\code{(?!...)}] Matches if \code{...} doesn't match next. This is a negative lookahead assertion. For example,
-For example,
-\code{Isaac (?!Asimov)} will match 'Isaac~' only if it's \emph{not} followed by 'Asimov'.
+\item[\code{(?!...)}] Matches if \code{...} doesn't match next. This
+is a negative lookahead assertion. For example,
+\code{Isaac (?!Asimov)} will match 'Isaac~' only if it's \emph{not}
+followed by 'Asimov'.
\end{itemize}
@@ -227,15 +234,16 @@ equivalent to the set \code{[ \e t\e n\e r\e f\e v]}.
\item[\code{\e S}]Matches any non-whitespace character; this is
equivalent to the set \code{[\^ \e t\e n\e r\e f\e v]}.
%
-\item[\code{\e w}]When the LOCALE flag is not specified, matches any alphanumeric character; this is
-equivalent to the set \code{[a-zA-Z0-9_]}. With LOCALE, it will match
-the set \code{[0-9_]} plus whatever characters are defined as letters
-for the current locale.
+\item[\code{\e w}]When the \code{LOCALE} flag is not specified,
+matches any alphanumeric character; this is equivalent to the set
+\code{[a-zA-Z0-9_]}. With \code{LOCALE}, it will match the set
+\code{[0-9_]} plus whatever characters are defined as letters for the
+current locale.
%
-\item[\code{\e W}]When the LOCALE flag is not specified, matches any
-non-alphanumeric character; this is equivalent to the set
-\code{[{\^}a-zA-Z0-9_]}. With LOCALE, it will match any character
-not in the set \code{[0-9_]}, and not defined as a letter
+\item[\code{\e W}]When the \code{LOCALE} flag is not specified,
+matches any non-alphanumeric character; this is equivalent to the set
+\code{[{\^}a-zA-Z0-9_]}. With \code{LOCALE}, it will match any
+character not in the set \code{[0-9_]}, and not defined as a letter
for the current locale.
\item[\code{\e Z}]Matches only at the end of the string.
@@ -254,8 +262,8 @@ The module defines the following functions and constants, and an exception:
\begin{funcdesc}{compile}{pattern\optional{\, flags}}
Compile a regular expression pattern into a regular expression
- object, which can be used for matching using its \code{match} and
- \code{search} methods, described below.
+ object, which can be used for matching using its \code{match()} and
+ \code{search()} methods, described below.
The expression's behaviour can be modified by specifying a
\var{flags} value. Values can be any of the following variables,
@@ -266,34 +274,34 @@ The module defines the following functions and constants, and an exception:
% The use of \quad in the item labels is ugly but adds enough space
% to the label that it doesn't get visually run-in with the text.
-\item[I or IGNORECASE or \code{(?i)}\quad]
+\item[\code{I} or \code{IGNORECASE} or \code{(?i)}\quad]
Perform case-insensitive matching; expressions like \code{[A-Z]} will match
lowercase letters, too. This is not affected by the current locale.
-\item[L or LOCALE or \code{(?L)}\quad]
+\item[\code{L} or \code{LOCALE} or \code{(?L)}\quad]
Make \code{\e w}, \code{\e W}, \code{\e b},
\code{\e B}, dependent on the current locale.
-\item[M or MULTILINE or \code{(?m)}\quad]
+\item[\code{M} or \code{MULTILINE} or \code{(?m)}\quad]
When specified, the pattern character \code{\^} matches at the
- beginning of the string and at the beginning of each line
- (immediately following each newline); and the pattern character
+beginning of the string and at the beginning of each line
+(immediately following each newline); and the pattern character
\code{\$} matches at the end of the string and at the end of each line
(immediately preceding each newline).
By default, \code{\^} matches only at the beginning of the string, and
\code{\$} only at the end of the string and immediately before the
newline (if any) at the end of the string.
-\item[S or DOTALL or \code{(?s)}\quad]
+\item[\code{S} or \code{DOTALL} or \code{(?s)}\quad]
Make the \code{.} special character any character at all, including a
newline; without this flag, \code{.} will match anything \emph{except}
a newline.
-\item[X or VERBOSE or \code{(?x)}\quad]
+\item[\code{X} or \code{VERBOSE} or \code{(?x)}\quad]
Ignore whitespace within the pattern
except when in a character class or preceded by an unescaped
@@ -311,11 +319,11 @@ result = prog.match(str)
\end{verbatim}\ecode
%
is equivalent to
-%
-\bcode\begin{verbatim}
+
+\begin{verbatim}
result = re.match(pat, str)
-\end{verbatim}\ecode
-%
+\end{verbatim}
+
but the version using \code{compile()} is more efficient when the
expression will be used several times in a single program.
%(The compiled version of the last pattern passed to \code{regex.match()} or
@@ -340,7 +348,8 @@ expression will be used several times in a single program.
\begin{funcdesc}{search}{pattern\, string\optional{\, flags}}
Scan through \var{string} looking for a location where the regular
- expression \var{pattern} produces a match, and return a corresponding \code{MatchObject} instance.
+ expression \var{pattern} produces a match, and return a
+ corresponding \code{MatchObject} instance.
Return \code{None} if no
position in the string matches the pattern; note that this is
different from finding a zero-length match at some point in the string.
@@ -390,11 +399,11 @@ The pattern may be a string or a
regex object; if you need to specify
regular expression flags, you must use a regex object, or use
embedded modifiers in a pattern; e.g.
-%
-\bcode\begin{verbatim}
+
+\begin{verbatim}
sub("(?i)b+", "x", "bbbb BBBB") returns 'x x'.
-\end{verbatim}\ecode
-%
+\end{verbatim}
+
The optional argument \var{count} is the maximum number of pattern
occurrences to be replaced; count must be a non-negative integer, and
the default value of 0 means to replace all occurrences.
@@ -405,7 +414,7 @@ previous match, so \code{sub('x*', '-', 'abc')} returns '-a-b-c-'.
\begin{funcdesc}{subn}{pattern\, repl\, string\optional{, count=0}}
Perform the same operation as \code{sub()}, but return a tuple
-\code{(new_string, number_of_subs_made)}.
+\code{(\var{new_string}, \var{number_of_subs_made})}.
\end{funcdesc}
\begin{excdesc}{error}
@@ -445,19 +454,19 @@ attributes:
different from finding a zero-length match at some point in the string.
The optional \var{pos} and \var{endpos} parameters have the same
- meaning as for the \code{match} method.
+ meaning as for the \code{match()} method.
\end{funcdesc}
\begin{funcdesc}{split}{string\, \optional{, maxsplit=0}}
-Identical to the \code{split} function, using the compiled pattern.
+Identical to the \code{split()} function, using the compiled pattern.
\end{funcdesc}
\begin{funcdesc}{sub}{repl\, string\optional{, count=0}}
-Identical to the \code{sub} function, using the compiled pattern.
+Identical to the \code{sub()} function, using the compiled pattern.
\end{funcdesc}
\begin{funcdesc}{subn}{repl\, string\optional{, count=0}}
-Identical to the \code{subn} function, using the compiled pattern.
+Identical to the \code{subn()} function, using the compiled pattern.
\end{funcdesc}
\renewcommand{\indexsubitem}{(regex attribute)}
@@ -477,8 +486,9 @@ symbolic groups were used in the pattern.
The pattern string from which the regex object was compiled.
\end{datadesc}
-\subsection{MatchObjects}
-\code{Matchobject} instances support the following methods and attributes:
+\subsection{Match Objects}
+
+\code{MatchObject} instances support the following methods and attributes:
\begin{funcdesc}{group}{\optional{g1, g2, ...}}
Returns one or more groups of the match. If there is a single
@@ -495,12 +505,13 @@ the \var{index} arguments may also be strings identifying groups by
their group name.
A moderately complicated example:
-\bcode\begin{verbatim}
+
+\begin{verbatim}
m = re.match(r"(?P<int>\d+)\.(\d*)", '3.14')
-\end{verbatim}\ecode
-%
-After performing this match, \code{m.group(1)} is \code{'3'}, as is \code{m.group('int')}.
-\code{m.group(2)} is \code{'14'}.
+\end{verbatim}
+
+After performing this match, \code{m.group(1)} is \code{'3'}, as is
+\code{m.group('int')}. \code{m.group(2)} is \code{'14'}.
\end{funcdesc}
\begin{funcdesc}{groups}{}
@@ -519,37 +530,41 @@ singleton tuple is returned in such cases.)
Return the indices of the start and end of the substring
matched by \var{group}. Return \code{None} if \var{group} exists but
did not contribute to the match. For a match object
-\code{m}, and a group \code{g} that did contribute to the match, the
-substring matched by group \code{g} (equivalent to \code{m.group(g)}) is
-\bcode\begin{verbatim}
- m.string[m.start(g):m.end(g)]
-\end{verbatim}\ecode
-%
+\var{m}, and a group \var{g} that did contribute to the match, the
+substring matched by group \var{g} (equivalent to
+\code{\var{m}.group(\var{g})}) is
+
+\begin{verbatim}
+m.string[m.start(g):m.end(g)]
+\end{verbatim}
+
Note that
\code{m.start(\var{group})} will equal \code{m.end(\var{group})} if
-\var{group} matched a null string. For example, after \code{m =
-re.search('b(c?)', 'cba')}, \code{m.start(0)} is 1, \code{m.end(0)} is
-2, \code{m.start(1)} and \code{m.end(1)} are both 2, and
-\code{m.start(2)} raises an \code{IndexError} exception.
+\var{group} matched a null string. For example, after \code{\var{m} =
+re.search('b(c?)', 'cba')}, \code{\var{m}.start(0)} is 1,
+\code{\var{m}.end(0)} is 2, \code{\var{m}.start(1)} and
+\code{\var{m}.end(1)} are both 2, and \code{\var{m}.start(2)} raises
+an \code{IndexError} exception.
\end{funcdesc}
\begin{funcdesc}{span}{group}
-Return the 2-tuple \code{(start(\var{group}), end(\var{group}))}.
+For \code{MatchObject} \var{m}, return the 2-tuple
+\code{(\var{m}.start(\var{group}), \var{m}.end(\var{group}))}.
Note that if \var{group} did not contribute to the match, this is
\code{(None, None)}.
\end{funcdesc}
\begin{datadesc}{pos}
The value of \var{pos} which was passed to the
-\code{search} or \code{match} function. This is the index into the
-string at which the regex engine started looking for a match.
+\code{search()} or \code{match()} function. This is the index into
+the string at which the regex engine started looking for a match.
\end{datadesc}
\begin{datadesc}{endpos}
The value of \var{endpos} which was passed to the
-\code{search} or \code{match} function. This is the index into the
-string beyond which the regex engine will not go.
+\code{search()} or \code{match()} function. This is the index into
+the string beyond which the regex engine will not go.
\end{datadesc}
\begin{datadesc}{re}
@@ -563,9 +578,7 @@ The string passed to \code{match()} or \code{search()}.
\begin{seealso}
\seetext{Jeffrey Friedl, \emph{Mastering Regular Expressions},
-O'Reilly. The Python material in this book dates from before the re
-module, but it covers writing good regular expression patterns in
-great detail.}
+O'Reilly. The Python material in this book dates from before the
+\code{re} module, but it covers writing good regular expression
+patterns in great detail.}
\end{seealso}
-
-