summaryrefslogtreecommitdiffstats
path: root/generic/tclRegexp.c
diff options
context:
space:
mode:
authorstanton <stanton>1999-06-17 19:32:14 (GMT)
committerstanton <stanton>1999-06-17 19:32:14 (GMT)
commit132b0b161f32aebd943a596184fdda97aa960c7d (patch)
tree356c44709bbda344154f1a4fe7811fabda634581 /generic/tclRegexp.c
parent0db76eb23cf35b0d912eb915711eecbe51c65ac1 (diff)
downloadtcl-132b0b161f32aebd943a596184fdda97aa960c7d.zip
tcl-132b0b161f32aebd943a596184fdda97aa960c7d.tar.gz
tcl-132b0b161f32aebd943a596184fdda97aa960c7d.tar.bz2
* generic/tclTest.c:
* generic/tclRegexp.h: * generic/tclRegexp.c: * generic/tcl.h: * generic/tcl.decls: Renamed Tcl_RegExpMatchObj to Tcl_RegExpExecObj and added a new Tcl_RegExpMatchObj that is equivalent to Tcl_RegExpMatch. Added public macros for the regexp compile/execute flags. Changed to store either an object pointer or a string pointer in the TclRegexp structure. Changed to avoid adding a reference to the object or copying the string.
Diffstat (limited to 'generic/tclRegexp.c')
-rw-r--r--generic/tclRegexp.c134
1 files changed, 77 insertions, 57 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 3e28224..6736465 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.c,v 1.8 1999/06/15 01:16:24 hershey Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.9 1999/06/17 19:32:15 stanton Exp $
*/
#include "tclInt.h"
@@ -94,6 +94,9 @@ static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
+static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
+ Tcl_RegExp re, CONST Tcl_UniChar *uniString,
+ int numChars, int nmatches, int flags));
static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
Tcl_Obj *objPtr));
@@ -177,7 +180,10 @@ Tcl_RegExpExec(interp, re, string, start)
* this identifies beginning of larger
* string, so that "^" won't match. */
{
- int flags;
+ int flags, result, numChars;
+ TclRegexp *regexp = (TclRegexp *)re;
+ Tcl_DString ds;
+ Tcl_UniChar *ustr;
/*
* If the starting point is offset from the beginning of the buffer,
@@ -190,8 +196,25 @@ Tcl_RegExpExec(interp, re, string, start)
flags = 0;
}
- return Tcl_RegExpMatchObj(interp, re, Tcl_NewStringObj(string, -1),
- 0 /* offset */, -1 /* nmatches */, flags);
+ /*
+ * Remember the string for use by Tcl_RegExpRange().
+ */
+
+ regexp->string = string;
+ regexp->objPtr = NULL;
+
+ /*
+ * Convert the string to Unicode and perform the match.
+ */
+
+ Tcl_DStringInit(&ds);
+ ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
+ numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
+ result = RegExpExecUniChar(interp, re, ustr, numChars,
+ -1 /* nmatches */, flags);
+ Tcl_DStringFree(&ds);
+
+ return result;
}
/*
@@ -226,14 +249,18 @@ Tcl_RegExpRange(re, index, startPtr, endPtr)
* in (sub-) range here. */
{
TclRegexp *regexpPtr = (TclRegexp *) re;
- char *string;
+ CONST char *string;
if ((size_t) index > regexpPtr->re.re_nsub) {
*startPtr = *endPtr = NULL;
} else if (regexpPtr->matches[index].rm_so < 0) {
*startPtr = *endPtr = NULL;
} else {
- string = Tcl_GetString(regexpPtr->objPtr);
+ if (regexpPtr->objPtr) {
+ string = Tcl_GetString(regexpPtr->objPtr);
+ } else {
+ string = regexpPtr->string;
+ }
*startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
*endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
}
@@ -242,7 +269,7 @@ Tcl_RegExpRange(re, index, startPtr, endPtr)
/*
*---------------------------------------------------------------------------
*
- * TclRegExpExecUniChar --
+ * RegExpExecUniChar --
*
* Execute the regular expression matcher using a compiled form of a
* regular expression and save information about any match that is
@@ -260,8 +287,8 @@ Tcl_RegExpRange(re, index, startPtr, endPtr)
*----------------------------------------------------------------------
*/
-int
-TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
+static int
+RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
Tcl_Interp *interp; /* Interpreter to use for error reporting. */
Tcl_RegExp re; /* Compiled regular expression; returned by
* a previous call to Tcl_GetRegExpFromObj */
@@ -370,7 +397,7 @@ TclRegExpRangeUniChar(re, index, startPtr, endPtr)
int
Tcl_RegExpMatch(interp, string, pattern)
- Tcl_Interp *interp; /* Used for error reporting. */
+ Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
char *string; /* String. */
char *pattern; /* Regular expression to match against
* string. */
@@ -387,9 +414,9 @@ Tcl_RegExpMatch(interp, string, pattern)
/*
*----------------------------------------------------------------------
*
- * Tcl_RegExpMatchObj --
+ * Tcl_RegExpExecObj --
*
- * Match a precompiled regexp against the given object.
+ * Execute a precompiled regexp against the given object.
*
* Results:
* If an error occurs during the matching operation then -1
@@ -404,7 +431,7 @@ Tcl_RegExpMatch(interp, string, pattern)
*/
int
-Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags)
+Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
Tcl_Interp *interp; /* Interpreter to use for error reporting. */
Tcl_RegExp re; /* Compiled regular expression; must have
* been returned by previous call to
@@ -418,78 +445,74 @@ Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags)
int flags; /* Regular expression execution flags. */
{
TclRegexp *regexpPtr = (TclRegexp *) re;
- Tcl_Obj *oldPtr = regexpPtr->objPtr;
Tcl_UniChar *udata;
int length;
/*
- * Bump the refcount before we do anything in case the object
- * was newly created.
+ * Save the target object so we can extract strings from it later.
*/
- Tcl_IncrRefCount(objPtr);
+ regexpPtr->string = NULL;
+ regexpPtr->objPtr = objPtr;
udata = Tcl_GetUnicode(objPtr);
length = Tcl_GetCharLength(objPtr);
- /*
- * Save the target object so we can extract strings from it later.
- */
-
- regexpPtr->objPtr = objPtr;
- if (oldPtr) {
- Tcl_DecrRefCount(oldPtr);
- }
-
if (offset > length) {
offset = length;
}
udata += offset;
length -= offset;
- return TclRegExpExecUniChar(interp, re, udata, length, nmatches, flags);
+ return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
}
/*
*----------------------------------------------------------------------
*
- * Tcl_RegExpGetInfo --
+ * Tcl_RegExpMatchObj --
*
- * Retrieve information about the current match.
+ * See if an object matches a regular expression.
*
* Results:
- * None.
+ * If an error occurs during the matching operation then -1
+ * is returned and the interp's result contains an error message.
+ * Otherwise the return value is 1 if "string" matches "pattern"
+ * and 0 otherwise.
*
* Side effects:
- * None.
+ * Changes the internal rep of the pattern and string objects.
*
*----------------------------------------------------------------------
*/
-void
-Tcl_RegExpGetInfo(regexp, infoPtr)
- Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
- Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
+int
+Tcl_RegExpMatchObj(interp, stringObj, patternObj)
+ Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
+ Tcl_Obj *stringObj; /* Object containing the String to search. */
+ Tcl_Obj *patternObj; /* Regular expression to match against
+ * string. */
{
- TclRegexp *regexpPtr = (TclRegexp *) regexp;
+ Tcl_RegExp re;
- infoPtr->nsubs = regexpPtr->re.re_nsub;
- infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
- infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
+ re = Tcl_GetRegExpFromObj(interp, patternObj,
+ TCL_REG_ADVANCED | TCL_REG_NOSUB);
+ if (re == NULL) {
+ return -1;
+ }
+ return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
+ 0 /* nmatches */, 0 /* flags */);
}
/*
*----------------------------------------------------------------------
*
- * TclRegExpMatchObj --
+ * Tcl_RegExpGetInfo --
*
- * See if a string matches a regular expression pattern object.
+ * Retrieve information about the current match.
*
* Results:
- * If an error occurs during the matching operation then -1
- * is returned and the interp's result contains an error message.
- * Otherwise the return value is 1 if "string" matches "pattern"
- * and 0 otherwise.
+ * None.
*
* Side effects:
* None.
@@ -497,20 +520,16 @@ Tcl_RegExpGetInfo(regexp, infoPtr)
*----------------------------------------------------------------------
*/
-int
-TclRegExpMatchObj(interp, string, patObj)
- Tcl_Interp *interp; /* Used for error reporting. */
- char *string; /* String. */
- Tcl_Obj *patObj; /* Regular expression to match against
- * string. */
+void
+Tcl_RegExpGetInfo(regexp, infoPtr)
+ Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
+ Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
{
- Tcl_RegExp re;
+ TclRegexp *regexpPtr = (TclRegexp *) regexp;
- re = Tcl_GetRegExpFromObj(interp, patObj, REG_ADVANCED);
- if (re == NULL) {
- return -1;
- }
- return Tcl_RegExpExec(interp, re, string, string);
+ infoPtr->nsubs = regexpPtr->re.re_nsub;
+ infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
+ infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
}
/*
@@ -874,6 +893,7 @@ CompileRegexp(interp, string, length, flags)
regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
regexpPtr->objPtr = NULL;
+ regexpPtr->string = NULL;
regexpPtr->details.rm_extend.rm_so = -1;
regexpPtr->details.rm_extend.rm_eo = -1;