diff options
author | stanton <stanton> | 1999-06-10 04:28:49 (GMT) |
---|---|---|
committer | stanton <stanton> | 1999-06-10 04:28:49 (GMT) |
commit | 81e8aa10e3e8c90c32e241356ce9bc68b5e4b3dc (patch) | |
tree | d6b1bb3e195e2ca1b7f57a6a2987c276d5013ffa /generic/tclRegexp.c | |
parent | d94b3c0f6b26564b83b3767980271dd332314d06 (diff) | |
download | tcl-81e8aa10e3e8c90c32e241356ce9bc68b5e4b3dc.zip tcl-81e8aa10e3e8c90c32e241356ce9bc68b5e4b3dc.tar.gz tcl-81e8aa10e3e8c90c32e241356ce9bc68b5e4b3dc.tar.bz2 |
* generic/tclUnicodeObj.c: Lots of cleanup and simplification.
Fixed several memory bugs. Added TclAppendUnicodeToObj.
* generic/tclInt.h: Added declarations for various Unicode string
functions.
* generic/tclRegexp.c:
* generic/tclCmdMZ.c: Changed to use new Unicode string interfaces
for better performance.
* generic/tclRegexp.h:
* generic/tclRegexp.c:
* generic/tcl.h:
* generic/tcl.decls: Added Tcl_RegExpMatchObj and
Tcl_RegExpGetInfo calls to access lower level regexp API. These
features are needed by Expect. This is a preliminary
implementation pending final review and cleanup.
* generic/tclCmdMZ.c:
* tests/string.test: Fixed bug where string map failed on null
strings.
Diffstat (limited to 'generic/tclRegexp.c')
-rw-r--r-- | generic/tclRegexp.c | 142 |
1 files changed, 114 insertions, 28 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 2318780..7590c8d 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.6 1999/06/02 01:53:31 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.7 1999/06/10 04:28:51 stanton Exp $ */ #include "tclInt.h" @@ -177,33 +177,21 @@ Tcl_RegExpExec(interp, re, string, start) * this identifies beginning of larger * string, so that "^" won't match. */ { - int result, numChars; - Tcl_DString stringBuffer; - Tcl_UniChar *uniString; - - TclRegexp *regexpPtr = (TclRegexp *) re; + int flags; /* - * Remember the UTF-8 string so Tcl_RegExpRange() can convert the - * matches from character to byte offsets. + * If the starting point is offset from the beginning of the buffer, + * then we need to tell the regexp engine not to match "^". */ - regexpPtr->string = string; - - Tcl_DStringInit(&stringBuffer); - uniString = Tcl_UtfToUniCharDString(string, -1, &stringBuffer); - numChars = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar); - - /* - * Perform the regexp match. - */ - - result = TclRegExpExecUniChar(interp, re, uniString, numChars, -1, - ((string > start) ? REG_NOTBOL : 0)); - - Tcl_DStringFree(&stringBuffer); + if (string > start) { + flags = REG_NOTBOL; + } else { + flags = 0; + } - return result; + return Tcl_RegExpMatchObj(interp, re, Tcl_NewStringObj(string, -1), + 0 /* offset */, -1 /* nmatches */, flags); } /* @@ -238,16 +226,16 @@ Tcl_RegExpRange(re, index, startPtr, endPtr) * in (sub-) range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; + char *string; if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = *endPtr = NULL; } else if (regexpPtr->matches[index].rm_so < 0) { *startPtr = *endPtr = NULL; } else { - *startPtr = Tcl_UtfAtIndex(regexpPtr->string, - regexpPtr->matches[index].rm_so); - *endPtr = Tcl_UtfAtIndex(regexpPtr->string, - regexpPtr->matches[index].rm_eo); + string = Tcl_GetString(regexpPtr->objPtr); + *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); + *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); } } @@ -290,8 +278,9 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) size_t last = regexpPtr->re.re_nsub + 1; size_t nm = last; - if (nmatches >= 0 && (size_t) nmatches < nm) + if (nmatches >= 0 && (size_t) nmatches < nm) { nm = (size_t) nmatches; + } status = TclReExec(®expPtr->re, wString, (size_t) numChars, ®expPtr->details, nm, regexpPtr->matches, flags); @@ -398,6 +387,100 @@ Tcl_RegExpMatch(interp, string, pattern) /* *---------------------------------------------------------------------- * + * Tcl_RegExpMatchObj -- + * + * Match a precompiled regexp against the given object. + * + * Results: + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if "string" matches "pattern" + * and 0 otherwise. + * + * Side effects: + * Converts the object to a Unicode object. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags) + Tcl_Interp *interp; /* Interpreter to use for error reporting. */ + Tcl_RegExp re; /* Compiled regular expression; must have + * been returned by previous call to + * Tcl_GetRegExpFromObj. */ + Tcl_Obj *objPtr; /* String against which to match re. */ + int offset; /* Character index that marks where matching + * should begin. */ + int nmatches; /* How many subexpression matches (counting + * the whole match as subexpression 0) are + * of interest. -1 means all of them. */ + int flags; /* Regular expression execution flags. */ +{ + TclRegexp *regexpPtr = (TclRegexp *) re; + Tcl_Obj *oldPtr = regexpPtr->objPtr; + Tcl_UniChar *udata; + int length; + + /* + * Bump the refcount before we do anything in case the object + * was newly created. + */ + + Tcl_IncrRefCount(objPtr); + + udata = TclGetUnicodeFromObj(objPtr); + length = TclGetUnicodeLengthFromObj(objPtr); + + /* + * Save the target object so we can extract strings from it later. + */ + + regexpPtr->objPtr = objPtr; + if (oldPtr) { + Tcl_DecrRefCount(oldPtr); + } + + if (offset > length) { + offset = length; + } + udata += offset; + length -= offset; + + return TclRegExpExecUniChar(interp, re, udata, length, nmatches, flags); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_RegExpGetInfo -- + * + * Retrieve information about the current match. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +void +Tcl_RegExpGetInfo(regexp, infoPtr) + Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */ + Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */ +{ + TclRegexp *regexpPtr = (TclRegexp *) regexp; + + infoPtr->nsubs = regexpPtr->re.re_nsub; + infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches; + infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so; +} + +/* + *---------------------------------------------------------------------- + * * TclRegExpMatchObj -- * * See if a string matches a regular expression pattern object. @@ -790,6 +873,9 @@ CompileRegexp(interp, string, length, flags) */ regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp)); + regexpPtr->objPtr = NULL; + regexpPtr->details.rm_extend.rm_so = -1; + regexpPtr->details.rm_extend.rm_eo = -1; /* * Get the up-to-date string representation and map to unicode. |