summaryrefslogtreecommitdiffstats
path: root/tcl8.6/doc/RegExp.3
diff options
context:
space:
mode:
authorWilliam Joye <wjoye@cfa.harvard.edu>2016-12-21 22:46:09 (GMT)
committerWilliam Joye <wjoye@cfa.harvard.edu>2016-12-21 22:46:09 (GMT)
commit768f87f613cc9789fcf8073018fa02178c8c91df (patch)
treeec633f5608ef498bee52a5f42c12c49493ec8bf8 /tcl8.6/doc/RegExp.3
parent07e464099b99459d0a37757771791598ef3395d9 (diff)
parent05fa4c89f20e9769db0e6c0b429cef2590771ace (diff)
downloadblt-768f87f613cc9789fcf8073018fa02178c8c91df.zip
blt-768f87f613cc9789fcf8073018fa02178c8c91df.tar.gz
blt-768f87f613cc9789fcf8073018fa02178c8c91df.tar.bz2
Merge commit '05fa4c89f20e9769db0e6c0b429cef2590771ace' as 'tcl8.6'
Diffstat (limited to 'tcl8.6/doc/RegExp.3')
-rw-r--r--tcl8.6/doc/RegExp.3383
1 files changed, 383 insertions, 0 deletions
diff --git a/tcl8.6/doc/RegExp.3 b/tcl8.6/doc/RegExp.3
new file mode 100644
index 0000000..aa757bc
--- /dev/null
+++ b/tcl8.6/doc/RegExp.3
@@ -0,0 +1,383 @@
+'\"
+'\" Copyright (c) 1994 The Regents of the University of California.
+'\" Copyright (c) 1994-1996 Sun Microsystems, Inc.
+'\" Copyright (c) 1998-1999 Scriptics Corporation
+'\"
+'\" See the file "license.terms" for information on usage and redistribution
+'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+'\"
+.TH Tcl_RegExpMatch 3 8.1 Tcl "Tcl Library Procedures"
+.so man.macros
+.BS
+.SH NAME
+Tcl_RegExpMatch, Tcl_RegExpCompile, Tcl_RegExpExec, Tcl_RegExpRange, Tcl_GetRegExpFromObj, Tcl_RegExpMatchObj, Tcl_RegExpExecObj, Tcl_RegExpGetInfo \- Pattern matching with regular expressions
+.SH SYNOPSIS
+.nf
+\fB#include <tcl.h>\fR
+.sp
+int
+\fBTcl_RegExpMatchObj\fR(\fIinterp\fR, \fItextObj\fR, \fIpatObj\fR)
+.sp
+int
+\fBTcl_RegExpMatch\fR(\fIinterp\fR, \fItext\fR, \fIpattern\fR)
+.sp
+Tcl_RegExp
+\fBTcl_RegExpCompile\fR(\fIinterp\fR, \fIpattern\fR)
+.sp
+int
+\fBTcl_RegExpExec\fR(\fIinterp\fR, \fIregexp\fR, \fItext\fR, \fIstart\fR)
+.sp
+void
+\fBTcl_RegExpRange\fR(\fIregexp\fR, \fIindex\fR, \fIstartPtr\fR, \fIendPtr\fR)
+.sp
+Tcl_RegExp
+\fBTcl_GetRegExpFromObj\fR(\fIinterp\fR, \fIpatObj\fR, \fIcflags\fR)
+.sp
+int
+\fBTcl_RegExpExecObj\fR(\fIinterp\fR, \fIregexp\fR, \fItextObj\fR, \fIoffset\fR, \fInmatches\fR, \fIeflags\fR)
+.sp
+void
+\fBTcl_RegExpGetInfo\fR(\fIregexp\fR, \fIinfoPtr\fR)
+.fi
+.SH ARGUMENTS
+.AS Tcl_RegExpInfo *interp in/out
+.AP Tcl_Interp *interp in
+Tcl interpreter to use for error reporting. The interpreter may be
+NULL if no error reporting is desired.
+.AP Tcl_Obj *textObj in/out
+Refers to the value from which to get the text to search. The
+internal representation of the value may be converted to a form that
+can be efficiently searched.
+.AP Tcl_Obj *patObj in/out
+Refers to the value from which to get a regular expression. The
+compiled regular expression is cached in the value.
+.AP char *text in
+Text to search for a match with a regular expression.
+.AP "const char" *pattern in
+String in the form of a regular expression pattern.
+.AP Tcl_RegExp regexp in
+Compiled regular expression. Must have been returned previously
+by \fBTcl_GetRegExpFromObj\fR or \fBTcl_RegExpCompile\fR.
+.AP char *start in
+If \fItext\fR is just a portion of some other string, this argument
+identifies the beginning of the larger string.
+If it is not the same as \fItext\fR, then no
+.QW \fB^\fR
+matches will be allowed.
+.AP int index in
+Specifies which range is desired: 0 means the range of the entire
+match, 1 or greater means the range that matched a parenthesized
+sub-expression.
+.AP "const char" **startPtr out
+The address of the first character in the range is stored here, or
+NULL if there is no such range.
+.AP "const char" **endPtr out
+The address of the character just after the last one in the range
+is stored here, or NULL if there is no such range.
+.AP int cflags in
+OR-ed combination of the compilation flags \fBTCL_REG_ADVANCED\fR,
+\fBTCL_REG_EXTENDED\fR, \fBTCL_REG_BASIC\fR, \fBTCL_REG_EXPANDED\fR,
+\fBTCL_REG_QUOTE\fR, \fBTCL_REG_NOCASE\fR, \fBTCL_REG_NEWLINE\fR,
+\fBTCL_REG_NLSTOP\fR, \fBTCL_REG_NLANCH\fR, \fBTCL_REG_NOSUB\fR, and
+\fBTCL_REG_CANMATCH\fR. See below for more information.
+.AP int offset in
+The character offset into the text where matching should begin.
+The value of the offset has no impact on \fB^\fR matches. This
+behavior is controlled by \fIeflags\fR.
+.AP int nmatches in
+The number of matching subexpressions that should be remembered for
+later use. If this value is 0, then no subexpression match
+information will be computed. If the value is \-1, then
+all of the matching subexpressions will be remembered. Any other
+value will be taken as the maximum number of subexpressions to
+remember.
+.AP int eflags in
+OR-ed combination of the execution flags \fBTCL_REG_NOTBOL\fR and
+\fBTCL_REG_NOTEOL\fR. See below for more information.
+.AP Tcl_RegExpInfo *infoPtr out
+The address of the location where information about a previous match
+should be stored by \fBTcl_RegExpGetInfo\fR.
+.BE
+.SH DESCRIPTION
+.PP
+\fBTcl_RegExpMatch\fR determines whether its \fIpattern\fR argument
+matches \fIregexp\fR, where \fIregexp\fR is interpreted
+as a regular expression using the rules in the \fBre_syntax\fR
+reference page.
+If there is a match then \fBTcl_RegExpMatch\fR returns 1.
+If there is no match then \fBTcl_RegExpMatch\fR returns 0.
+If an error occurs in the matching process (e.g. \fIpattern\fR
+is not a valid regular expression) then \fBTcl_RegExpMatch\fR
+returns \-1 and leaves an error message in the interpreter result.
+\fBTcl_RegExpMatchObj\fR is similar to \fBTcl_RegExpMatch\fR except it
+operates on the Tcl values \fItextObj\fR and \fIpatObj\fR instead of
+UTF strings.
+\fBTcl_RegExpMatchObj\fR is generally more efficient than
+\fBTcl_RegExpMatch\fR, so it is the preferred interface.
+.PP
+\fBTcl_RegExpCompile\fR, \fBTcl_RegExpExec\fR, and \fBTcl_RegExpRange\fR
+provide lower-level access to the regular expression pattern matcher.
+\fBTcl_RegExpCompile\fR compiles a regular expression string into
+the internal form used for efficient pattern matching.
+The return value is a token for this compiled form, which can be
+used in subsequent calls to \fBTcl_RegExpExec\fR or \fBTcl_RegExpRange\fR.
+If an error occurs while compiling the regular expression then
+\fBTcl_RegExpCompile\fR returns NULL and leaves an error message
+in the interpreter result.
+Note: the return value from \fBTcl_RegExpCompile\fR is only valid
+up to the next call to \fBTcl_RegExpCompile\fR; it is not safe to
+retain these values for long periods of time.
+.PP
+\fBTcl_RegExpExec\fR executes the regular expression pattern matcher.
+It returns 1 if \fItext\fR contains a range of characters that
+match \fIregexp\fR, 0 if no match is found, and
+\-1 if an error occurs.
+In the case of an error, \fBTcl_RegExpExec\fR leaves an error
+message in the interpreter result.
+When searching a string for multiple matches of a pattern,
+it is important to distinguish between the start of the original
+string and the start of the current search.
+For example, when searching for the second occurrence of a
+match, the \fItext\fR argument might point to the character
+just after the first match; however, it is important for the
+pattern matcher to know that this is not the start of the entire string,
+so that it does not allow
+.QW \fB^\fR
+atoms in the pattern to match.
+The \fIstart\fR argument provides this information by pointing
+to the start of the overall string containing \fItext\fR.
+\fIStart\fR will be less than or equal to \fItext\fR; if it
+is less than \fItext\fR then no \fB^\fR matches will be allowed.
+.PP
+\fBTcl_RegExpRange\fR may be invoked after \fBTcl_RegExpExec\fR
+returns; it provides detailed information about what ranges of
+the string matched what parts of the pattern.
+\fBTcl_RegExpRange\fR returns a pair of pointers in \fI*startPtr\fR
+and \fI*endPtr\fR that identify a range of characters in
+the source string for the most recent call to \fBTcl_RegExpExec\fR.
+\fIIndex\fR indicates which of several ranges is desired:
+if \fIindex\fR is 0, information is returned about the overall range
+of characters that matched the entire pattern; otherwise,
+information is returned about the range of characters that matched the
+\fIindex\fR'th parenthesized subexpression within the pattern.
+If there is no range corresponding to \fIindex\fR then NULL
+is stored in \fI*startPtr\fR and \fI*endPtr\fR.
+.PP
+\fBTcl_GetRegExpFromObj\fR, \fBTcl_RegExpExecObj\fR, and
+\fBTcl_RegExpGetInfo\fR are value interfaces that provide the most
+direct control of Henry Spencer's regular expression library. For
+users that need to modify compilation and execution options directly,
+it is recommended that you use these interfaces instead of calling the
+internal regexp functions. These interfaces handle the details of UTF
+to Unicode translations as well as providing improved performance
+through caching in the pattern and string values.
+.PP
+\fBTcl_GetRegExpFromObj\fR attempts to return a compiled regular
+expression from the \fIpatObj\fR. If the value does not already
+contain a compiled regular expression it will attempt to create one
+from the string in the value and assign it to the internal
+representation of the \fIpatObj\fR. The return value of this function
+is of type \fBTcl_RegExp\fR. The return value is a token for this
+compiled form, which can be used in subsequent calls to
+\fBTcl_RegExpExecObj\fR or \fBTcl_RegExpGetInfo\fR. If an error
+occurs while compiling the regular expression then
+\fBTcl_GetRegExpFromObj\fR returns NULL and leaves an error message in
+the interpreter result. The regular expression token can be used as
+long as the internal representation of \fIpatObj\fR refers to the
+compiled form. The \fIcflags\fR argument is a bit-wise OR of
+zero or more of the following flags that control the compilation of
+\fIpatObj\fR:
+.RS 2
+.TP
+\fBTCL_REG_ADVANCED\fR
+Compile advanced regular expressions
+.PQ ARE s .
+This mode corresponds to
+the normal regular expression syntax accepted by the Tcl \fBregexp\fR and
+\fBregsub\fR commands.
+.TP
+\fBTCL_REG_EXTENDED\fR
+Compile extended regular expressions
+.PQ ERE s .
+This mode corresponds
+to the regular expression syntax recognized by Tcl 8.0 and earlier
+versions.
+.TP
+\fBTCL_REG_BASIC\fR
+Compile basic regular expressions
+.PQ BRE s .
+This mode corresponds
+to the regular expression syntax recognized by common Unix utilities
+like \fBsed\fR and \fBgrep\fR. This is the default if no flags are
+specified.
+.TP
+\fBTCL_REG_EXPANDED\fR
+Compile the regular expression (basic, extended, or advanced) using an
+expanded syntax that allows comments and whitespace. This mode causes
+non-backslashed non-bracket-expression white
+space and #-to-end-of-line comments to be ignored.
+.TP
+\fBTCL_REG_QUOTE\fR
+Compile a literal string, with all characters treated as ordinary characters.
+.TP
+\fBTCL_REG_NOCASE\fR
+Compile for matching that ignores upper/lower case distinctions.
+.TP
+\fBTCL_REG_NEWLINE\fR
+Compile for newline-sensitive matching. By default, newline is a
+completely ordinary character with no special meaning in either
+regular expressions or strings. With this flag,
+.QW [^
+bracket expressions and
+.QW .
+never match newline,
+.QW ^
+matches an empty string
+after any newline in addition to its normal function, and
+.QW $
+matches
+an empty string before any newline in addition to its normal function.
+\fBREG_NEWLINE\fR is the bit-wise OR of \fBREG_NLSTOP\fR and
+\fBREG_NLANCH\fR.
+.TP
+\fBTCL_REG_NLSTOP\fR
+Compile for partial newline-sensitive matching,
+with the behavior of
+.QW [^
+bracket expressions and
+.QW .
+affected, but not the behavior of
+.QW ^
+and
+.QW $ .
+In this mode,
+.QW [^
+bracket expressions and
+.QW .
+never match newline.
+.TP
+\fBTCL_REG_NLANCH\fR
+Compile for inverse partial newline-sensitive matching,
+with the behavior of
+.QW ^
+and
+.QW $
+(the
+.QW anchors )
+affected, but not the behavior of
+.QW [^
+bracket expressions and
+.QW . .
+In this mode
+.QW ^
+matches an empty string
+after any newline in addition to its normal function, and
+.QW $
+matches
+an empty string before any newline in addition to its normal function.
+.TP
+\fBTCL_REG_NOSUB\fR
+Compile for matching that reports only success or failure,
+not what was matched. This reduces compile overhead and may improve
+performance. Subsequent calls to \fBTcl_RegExpGetInfo\fR or
+\fBTcl_RegExpRange\fR will not report any match information.
+.TP
+\fBTCL_REG_CANMATCH\fR
+Compile for matching that reports the potential to complete a partial
+match given more text (see below).
+.RE
+.PP
+Only one of
+\fBTCL_REG_EXTENDED\fR,
+\fBTCL_REG_ADVANCED\fR,
+\fBTCL_REG_BASIC\fR, and
+\fBTCL_REG_QUOTE\fR may be specified.
+.PP
+\fBTcl_RegExpExecObj\fR executes the regular expression pattern
+matcher. It returns 1 if \fIobjPtr\fR contains a range of characters
+that match \fIregexp\fR, 0 if no match is found, and \-1 if an error
+occurs. In the case of an error, \fBTcl_RegExpExecObj\fR leaves an
+error message in the interpreter result. The \fInmatches\fR value
+indicates to the matcher how many subexpressions are of interest. If
+\fInmatches\fR is 0, then no subexpression match information is
+recorded, which may allow the matcher to make various optimizations.
+If the value is \-1, then all of the subexpressions in the pattern are
+remembered. If the value is a positive integer, then only that number
+of subexpressions will be remembered. Matching begins at the
+specified Unicode character index given by \fIoffset\fR. Unlike
+\fBTcl_RegExpExec\fR, the behavior of anchors is not affected by the
+offset value. Instead the behavior of the anchors is explicitly
+controlled by the \fIeflags\fR argument, which is a bit-wise OR of
+zero or more of the following flags:
+.RS 2
+.TP
+\fBTCL_REG_NOTBOL\fR
+The starting character will not be treated as the beginning of a
+line or the beginning of the string, so
+.QW ^
+will not match there.
+Note that this flag has no effect on how
+.QW \fB\eA\fR
+matches.
+.TP
+\fBTCL_REG_NOTEOL\fR
+The last character in the string will not be treated as the end of a
+line or the end of the string, so
+.QW $
+will not match there.
+Note that this flag has no effect on how
+.QW \fB\eZ\fR
+matches.
+.RE
+.PP
+\fBTcl_RegExpGetInfo\fR retrieves information about the last match
+performed with a given regular expression \fIregexp\fR. The
+\fIinfoPtr\fR argument contains a pointer to a structure that is
+defined as follows:
+.PP
+.CS
+typedef struct Tcl_RegExpInfo {
+ int \fInsubs\fR;
+ Tcl_RegExpIndices *\fImatches\fR;
+ long \fIextendStart\fR;
+} \fBTcl_RegExpInfo\fR;
+.CE
+.PP
+The \fInsubs\fR field contains a count of the number of parenthesized
+subexpressions within the regular expression. If the \fBTCL_REG_NOSUB\fR
+was used, then this value will be zero. The \fImatches\fR field
+points to an array of \fInsubs\fR+1 values that indicate the bounds of each
+subexpression matched. The first element in the array refers to the
+range matched by the entire regular expression, and subsequent elements
+refer to the parenthesized subexpressions in the order that they
+appear in the pattern. Each element is a structure that is defined as
+follows:
+.PP
+.CS
+typedef struct Tcl_RegExpIndices {
+ long \fIstart\fR;
+ long \fIend\fR;
+} \fBTcl_RegExpIndices\fR;
+.CE
+.PP
+The \fIstart\fR and \fIend\fR values are Unicode character indices
+relative to the offset location within \fIobjPtr\fR where matching began.
+The \fIstart\fR index identifies the first character of the matched
+subexpression. The \fIend\fR index identifies the first character
+after the matched subexpression. If the subexpression matched the
+empty string, then \fIstart\fR and \fIend\fR will be equal. If the
+subexpression did not participate in the match, then \fIstart\fR and
+\fIend\fR will be set to \-1.
+.PP
+The \fIextendStart\fR field in \fBTcl_RegExpInfo\fR is only set if the
+\fBTCL_REG_CANMATCH\fR flag was used. It indicates the first
+character in the string where a match could occur. If a match was
+found, this will be the same as the beginning of the current match.
+If no match was found, then it indicates the earliest point at which a
+match might occur if additional text is appended to the string. If it
+is no match is possible even with further text, this field will be set
+to \-1.
+.SH "SEE ALSO"
+re_syntax(n)
+.SH KEYWORDS
+match, pattern, regular expression, string, subexpression, Tcl_RegExpIndices, Tcl_RegExpInfo