diff options
author | stanton <stanton> | 1999-06-17 19:32:14 (GMT) |
---|---|---|
committer | stanton <stanton> | 1999-06-17 19:32:14 (GMT) |
commit | 132b0b161f32aebd943a596184fdda97aa960c7d (patch) | |
tree | 356c44709bbda344154f1a4fe7811fabda634581 | |
parent | 0db76eb23cf35b0d912eb915711eecbe51c65ac1 (diff) | |
download | tcl-132b0b161f32aebd943a596184fdda97aa960c7d.zip tcl-132b0b161f32aebd943a596184fdda97aa960c7d.tar.gz tcl-132b0b161f32aebd943a596184fdda97aa960c7d.tar.bz2 |
* generic/tclTest.c:
* generic/tclRegexp.h:
* generic/tclRegexp.c:
* generic/tcl.h:
* generic/tcl.decls: Renamed Tcl_RegExpMatchObj to
Tcl_RegExpExecObj and added a new Tcl_RegExpMatchObj that is
equivalent to Tcl_RegExpMatch. Added public macros for the regexp
compile/execute flags. Changed to store either an object pointer
or a string pointer in the TclRegexp structure. Changed to avoid
adding a reference to the object or copying the string.
-rw-r--r-- | generic/tcl.decls | 8 | ||||
-rw-r--r-- | generic/tcl.h | 35 | ||||
-rw-r--r-- | generic/tclDecls.h | 20 | ||||
-rw-r--r-- | generic/tclRegexp.c | 134 | ||||
-rw-r--r-- | generic/tclRegexp.h | 14 | ||||
-rw-r--r-- | generic/tclStubInit.c | 5 | ||||
-rw-r--r-- | generic/tclTest.c | 100 |
7 files changed, 188 insertions, 128 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls index 8b9d46d..ebe7f34 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -10,7 +10,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: tcl.decls,v 1.16 1999/06/15 01:16:21 hershey Exp $ +# RCS: @(#) $Id: tcl.decls,v 1.17 1999/06/17 19:32:14 stanton Exp $ library tcl @@ -1278,7 +1278,7 @@ declare 375 generic { int Tcl_UniCharIsPunct(int ch) } declare 376 generic { - int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_RegExp regexp, \ + int Tcl_RegExpExecObj(Tcl_Interp *interp, Tcl_RegExp regexp, \ Tcl_Obj *objPtr, int offset, int nmatches, int flags) } declare 377 generic { @@ -1307,6 +1307,10 @@ declare 384 generic { void Tcl_AppendUnicodeToObj (register Tcl_Obj *objPtr, \ Tcl_UniChar *unicode, int length) } +declare 385 generic { + int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_Obj *stringObj, \ + Tcl_Obj *patternObj) +} ############################################################################## diff --git a/generic/tcl.h b/generic/tcl.h index 2c7ea05..7d9fe91 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tcl.h,v 1.45 1999/06/10 04:28:49 stanton Exp $ + * RCS: @(#) $Id: tcl.h,v 1.46 1999/06/17 19:32:14 stanton Exp $ */ #ifndef _TCL @@ -392,24 +392,41 @@ typedef struct Tcl_Var_ *Tcl_Var; * matches */ /* + * The following flag is experimental and only intended for use by Expect. It + * will probably go away in a later release. + */ + +#define TCL_REG_BOSONLY 002000 /* prepend \A to pattern so it only + * matches at the beginning of the + * string. */ + +/* + * Flags values passed to Tcl_RegExpExecObj. + */ + +#define TCL_REG_NOTBOL 0001 /* Beginning of string does not match ^. */ +#define TCL_REG_NOTEOL 0002 /* End of string does not match $. */ + +/* * Structures filled in by Tcl_RegExpInfo. Note that all offset values are * relative to the start of the match string, not the beginning of the * entire string. */ typedef struct Tcl_RegExpIndices { - long start; /* character offset of first character in match */ - long end; /* character offset of first character after the + long start; /* character offset of first character in match */ + long end; /* character offset of first character after the * match. */ } Tcl_RegExpIndices; typedef struct Tcl_RegExpInfo { - int nsubs; /* number of subexpressions in the - * compiled expression*/ - Tcl_RegExpIndices *matches; /* array of nsubs match offset - * pairs */ - long extendStart; /* The offset at which a subsequent - * match might begin. */ + int nsubs; /* number of subexpressions in the + * compiled expression */ + Tcl_RegExpIndices *matches; /* array of nsubs match offset + * pairs */ + long extendStart; /* The offset at which a subsequent + * match might begin. */ + long reserved; /* Reserved for later use. */ } Tcl_RegExpInfo; /* diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 8cec11a..0b755fe 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclDecls.h,v 1.16 1999/06/15 01:16:22 hershey Exp $ + * RCS: @(#) $Id: tclDecls.h,v 1.17 1999/06/17 19:32:15 stanton Exp $ */ #ifndef _TCLDECLS @@ -1169,7 +1169,7 @@ EXTERN int Tcl_UniCharIsPrint _ANSI_ARGS_((int ch)); /* 375 */ EXTERN int Tcl_UniCharIsPunct _ANSI_ARGS_((int ch)); /* 376 */ -EXTERN int Tcl_RegExpMatchObj _ANSI_ARGS_((Tcl_Interp * interp, +EXTERN int Tcl_RegExpExecObj _ANSI_ARGS_((Tcl_Interp * interp, Tcl_RegExp regexp, Tcl_Obj * objPtr, int offset, int nmatches, int flags)); /* 377 */ @@ -1195,6 +1195,9 @@ EXTERN Tcl_Obj * Tcl_GetRange _ANSI_ARGS_((Tcl_Obj * objPtr, EXTERN void Tcl_AppendUnicodeToObj _ANSI_ARGS_(( register Tcl_Obj * objPtr, Tcl_UniChar * unicode, int length)); +/* 385 */ +EXTERN int Tcl_RegExpMatchObj _ANSI_ARGS_((Tcl_Interp * interp, + Tcl_Obj * stringObj, Tcl_Obj * patternObj)); typedef struct TclStubHooks { struct TclPlatStubs *tclPlatStubs; @@ -1638,7 +1641,7 @@ typedef struct TclStubs { int (*tcl_UniCharIsGraph) _ANSI_ARGS_((int ch)); /* 373 */ int (*tcl_UniCharIsPrint) _ANSI_ARGS_((int ch)); /* 374 */ int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */ - int (*tcl_RegExpMatchObj) _ANSI_ARGS_((Tcl_Interp * interp, Tcl_RegExp regexp, Tcl_Obj * objPtr, int offset, int nmatches, int flags)); /* 376 */ + int (*tcl_RegExpExecObj) _ANSI_ARGS_((Tcl_Interp * interp, Tcl_RegExp regexp, Tcl_Obj * objPtr, int offset, int nmatches, int flags)); /* 376 */ void (*tcl_RegExpGetInfo) _ANSI_ARGS_((Tcl_RegExp regexp, Tcl_RegExpInfo * infoPtr)); /* 377 */ Tcl_Obj * (*tcl_NewUnicodeObj) _ANSI_ARGS_((Tcl_UniChar * unicode, int numChars)); /* 378 */ void (*tcl_SetUnicodeObj) _ANSI_ARGS_((Tcl_Obj * objPtr, Tcl_UniChar * unicode, int numChars)); /* 379 */ @@ -1647,6 +1650,7 @@ typedef struct TclStubs { Tcl_UniChar * (*tcl_GetUnicode) _ANSI_ARGS_((Tcl_Obj * objPtr)); /* 382 */ Tcl_Obj * (*tcl_GetRange) _ANSI_ARGS_((Tcl_Obj * objPtr, int first, int last)); /* 383 */ void (*tcl_AppendUnicodeToObj) _ANSI_ARGS_((register Tcl_Obj * objPtr, Tcl_UniChar * unicode, int length)); /* 384 */ + int (*tcl_RegExpMatchObj) _ANSI_ARGS_((Tcl_Interp * interp, Tcl_Obj * stringObj, Tcl_Obj * patternObj)); /* 385 */ } TclStubs; #ifdef __cplusplus @@ -3187,9 +3191,9 @@ extern TclStubs *tclStubsPtr; #define Tcl_UniCharIsPunct \ (tclStubsPtr->tcl_UniCharIsPunct) /* 375 */ #endif -#ifndef Tcl_RegExpMatchObj -#define Tcl_RegExpMatchObj \ - (tclStubsPtr->tcl_RegExpMatchObj) /* 376 */ +#ifndef Tcl_RegExpExecObj +#define Tcl_RegExpExecObj \ + (tclStubsPtr->tcl_RegExpExecObj) /* 376 */ #endif #ifndef Tcl_RegExpGetInfo #define Tcl_RegExpGetInfo \ @@ -3223,6 +3227,10 @@ extern TclStubs *tclStubsPtr; #define Tcl_AppendUnicodeToObj \ (tclStubsPtr->tcl_AppendUnicodeToObj) /* 384 */ #endif +#ifndef Tcl_RegExpMatchObj +#define Tcl_RegExpMatchObj \ + (tclStubsPtr->tcl_RegExpMatchObj) /* 385 */ +#endif #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 3e28224..6736465 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.8 1999/06/15 01:16:24 hershey Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.9 1999/06/17 19:32:15 stanton Exp $ */ #include "tclInt.h" @@ -94,6 +94,9 @@ static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData)); static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr)); static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); +static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_RegExp re, CONST Tcl_UniChar *uniString, + int numChars, int nmatches, int flags)); static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Obj *objPtr)); @@ -177,7 +180,10 @@ Tcl_RegExpExec(interp, re, string, start) * this identifies beginning of larger * string, so that "^" won't match. */ { - int flags; + int flags, result, numChars; + TclRegexp *regexp = (TclRegexp *)re; + Tcl_DString ds; + Tcl_UniChar *ustr; /* * If the starting point is offset from the beginning of the buffer, @@ -190,8 +196,25 @@ Tcl_RegExpExec(interp, re, string, start) flags = 0; } - return Tcl_RegExpMatchObj(interp, re, Tcl_NewStringObj(string, -1), - 0 /* offset */, -1 /* nmatches */, flags); + /* + * Remember the string for use by Tcl_RegExpRange(). + */ + + regexp->string = string; + regexp->objPtr = NULL; + + /* + * Convert the string to Unicode and perform the match. + */ + + Tcl_DStringInit(&ds); + ustr = Tcl_UtfToUniCharDString(string, -1, &ds); + numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar); + result = RegExpExecUniChar(interp, re, ustr, numChars, + -1 /* nmatches */, flags); + Tcl_DStringFree(&ds); + + return result; } /* @@ -226,14 +249,18 @@ Tcl_RegExpRange(re, index, startPtr, endPtr) * in (sub-) range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; - char *string; + CONST char *string; if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = *endPtr = NULL; } else if (regexpPtr->matches[index].rm_so < 0) { *startPtr = *endPtr = NULL; } else { - string = Tcl_GetString(regexpPtr->objPtr); + if (regexpPtr->objPtr) { + string = Tcl_GetString(regexpPtr->objPtr); + } else { + string = regexpPtr->string; + } *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); } @@ -242,7 +269,7 @@ Tcl_RegExpRange(re, index, startPtr, endPtr) /* *--------------------------------------------------------------------------- * - * TclRegExpExecUniChar -- + * RegExpExecUniChar -- * * Execute the regular expression matcher using a compiled form of a * regular expression and save information about any match that is @@ -260,8 +287,8 @@ Tcl_RegExpRange(re, index, startPtr, endPtr) *---------------------------------------------------------------------- */ -int -TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) +static int +RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) Tcl_Interp *interp; /* Interpreter to use for error reporting. */ Tcl_RegExp re; /* Compiled regular expression; returned by * a previous call to Tcl_GetRegExpFromObj */ @@ -370,7 +397,7 @@ TclRegExpRangeUniChar(re, index, startPtr, endPtr) int Tcl_RegExpMatch(interp, string, pattern) - Tcl_Interp *interp; /* Used for error reporting. */ + Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ char *string; /* String. */ char *pattern; /* Regular expression to match against * string. */ @@ -387,9 +414,9 @@ Tcl_RegExpMatch(interp, string, pattern) /* *---------------------------------------------------------------------- * - * Tcl_RegExpMatchObj -- + * Tcl_RegExpExecObj -- * - * Match a precompiled regexp against the given object. + * Execute a precompiled regexp against the given object. * * Results: * If an error occurs during the matching operation then -1 @@ -404,7 +431,7 @@ Tcl_RegExpMatch(interp, string, pattern) */ int -Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags) +Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags) Tcl_Interp *interp; /* Interpreter to use for error reporting. */ Tcl_RegExp re; /* Compiled regular expression; must have * been returned by previous call to @@ -418,78 +445,74 @@ Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags) int flags; /* Regular expression execution flags. */ { TclRegexp *regexpPtr = (TclRegexp *) re; - Tcl_Obj *oldPtr = regexpPtr->objPtr; Tcl_UniChar *udata; int length; /* - * Bump the refcount before we do anything in case the object - * was newly created. + * Save the target object so we can extract strings from it later. */ - Tcl_IncrRefCount(objPtr); + regexpPtr->string = NULL; + regexpPtr->objPtr = objPtr; udata = Tcl_GetUnicode(objPtr); length = Tcl_GetCharLength(objPtr); - /* - * Save the target object so we can extract strings from it later. - */ - - regexpPtr->objPtr = objPtr; - if (oldPtr) { - Tcl_DecrRefCount(oldPtr); - } - if (offset > length) { offset = length; } udata += offset; length -= offset; - return TclRegExpExecUniChar(interp, re, udata, length, nmatches, flags); + return RegExpExecUniChar(interp, re, udata, length, nmatches, flags); } /* *---------------------------------------------------------------------- * - * Tcl_RegExpGetInfo -- + * Tcl_RegExpMatchObj -- * - * Retrieve information about the current match. + * See if an object matches a regular expression. * * Results: - * None. + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if "string" matches "pattern" + * and 0 otherwise. * * Side effects: - * None. + * Changes the internal rep of the pattern and string objects. * *---------------------------------------------------------------------- */ -void -Tcl_RegExpGetInfo(regexp, infoPtr) - Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */ - Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */ +int +Tcl_RegExpMatchObj(interp, stringObj, patternObj) + Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ + Tcl_Obj *stringObj; /* Object containing the String to search. */ + Tcl_Obj *patternObj; /* Regular expression to match against + * string. */ { - TclRegexp *regexpPtr = (TclRegexp *) regexp; + Tcl_RegExp re; - infoPtr->nsubs = regexpPtr->re.re_nsub; - infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches; - infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so; + re = Tcl_GetRegExpFromObj(interp, patternObj, + TCL_REG_ADVANCED | TCL_REG_NOSUB); + if (re == NULL) { + return -1; + } + return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */, + 0 /* nmatches */, 0 /* flags */); } /* *---------------------------------------------------------------------- * - * TclRegExpMatchObj -- + * Tcl_RegExpGetInfo -- * - * See if a string matches a regular expression pattern object. + * Retrieve information about the current match. * * Results: - * If an error occurs during the matching operation then -1 - * is returned and the interp's result contains an error message. - * Otherwise the return value is 1 if "string" matches "pattern" - * and 0 otherwise. + * None. * * Side effects: * None. @@ -497,20 +520,16 @@ Tcl_RegExpGetInfo(regexp, infoPtr) *---------------------------------------------------------------------- */ -int -TclRegExpMatchObj(interp, string, patObj) - Tcl_Interp *interp; /* Used for error reporting. */ - char *string; /* String. */ - Tcl_Obj *patObj; /* Regular expression to match against - * string. */ +void +Tcl_RegExpGetInfo(regexp, infoPtr) + Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */ + Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */ { - Tcl_RegExp re; + TclRegexp *regexpPtr = (TclRegexp *) regexp; - re = Tcl_GetRegExpFromObj(interp, patObj, REG_ADVANCED); - if (re == NULL) { - return -1; - } - return Tcl_RegExpExec(interp, re, string, string); + infoPtr->nsubs = regexpPtr->re.re_nsub; + infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches; + infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so; } /* @@ -874,6 +893,7 @@ CompileRegexp(interp, string, length, flags) regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp)); regexpPtr->objPtr = NULL; + regexpPtr->string = NULL; regexpPtr->details.rm_extend.rm_so = -1; regexpPtr->details.rm_extend.rm_eo = -1; diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h index 5cee78e..04d381d 100644 --- a/generic/tclRegexp.h +++ b/generic/tclRegexp.h @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.h,v 1.9 1999/06/10 04:28:51 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.h,v 1.10 1999/06/17 19:32:15 stanton Exp $ */ #ifndef _TCLREGEXP @@ -27,16 +27,17 @@ * The TclRegexp structure encapsulates a compiled regex_t, * the flags that were used to compile it, and an array of pointers * that are used to indicate subexpressions after a call to Tcl_RegExpExec. + * Note that the string and objPtr are mutually exclusive. These values + * are needed by Tcl_RegExpRange in order to return pointers into the + * original string. */ typedef struct TclRegexp { int flags; /* Regexp compile flags. */ regex_t re; /* Compiled re, includes number of * subexpressions. */ - Tcl_Obj *objPtr; /* Last object match with this regexp, so - * Tcl_RegExpRange() can convert the matches - * from character indices to UTF-8 byte - * offsets. */ + CONST char *string; /* Last string passed to Tcl_RegExpExec. */ + Tcl_Obj *objPtr; /* Last object passed to Tcl_RegExpExecObj. */ regmatch_t *matches; /* Array of indices into the Tcl_UniChar * representation of the last string matched * with this regexp to indicate the location @@ -53,9 +54,6 @@ typedef struct TclRegexp { EXTERN int TclRegAbout _ANSI_ARGS_((Tcl_Interp *interp, Tcl_RegExp re)); -EXTERN int TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp, - Tcl_RegExp re, CONST Tcl_UniChar *uniString, - int numChars, int nmatches, int flags)); EXTERN int TclRegExpMatchObj _ANSI_ARGS_((Tcl_Interp *interp, char *string, Tcl_Obj *patObj)); EXTERN void TclRegExpRangeUniChar _ANSI_ARGS_((Tcl_RegExp re, diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index a6f7e45..8b824cc 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStubInit.c,v 1.18 1999/06/15 01:16:25 hershey Exp $ + * RCS: @(#) $Id: tclStubInit.c,v 1.19 1999/06/17 19:32:15 stanton Exp $ */ #include "tclInt.h" @@ -756,7 +756,7 @@ TclStubs tclStubs = { Tcl_UniCharIsGraph, /* 373 */ Tcl_UniCharIsPrint, /* 374 */ Tcl_UniCharIsPunct, /* 375 */ - Tcl_RegExpMatchObj, /* 376 */ + Tcl_RegExpExecObj, /* 376 */ Tcl_RegExpGetInfo, /* 377 */ Tcl_NewUnicodeObj, /* 378 */ Tcl_SetUnicodeObj, /* 379 */ @@ -765,6 +765,7 @@ TclStubs tclStubs = { Tcl_GetUnicode, /* 382 */ Tcl_GetRange, /* 383 */ Tcl_AppendUnicodeToObj, /* 384 */ + Tcl_RegExpMatchObj, /* 385 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclTest.c b/generic/tclTest.c index efc2520..8226ed1 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclTest.c,v 1.13 1999/06/02 01:53:31 stanton Exp $ + * RCS: @(#) $Id: tclTest.c,v 1.14 1999/06/17 19:32:16 stanton Exp $ */ #define TCL_TEST @@ -2528,12 +2528,12 @@ TestregexpObjCmd(dummy, interp, objc, objv) int objc; /* Number of arguments. */ Tcl_Obj *CONST objv[]; /* Argument objects. */ { - int i, ii, result, indices, stringLength, wLen, match, about; + int i, ii, indices, stringLength, match, about; int hasxflags, cflags, eflags; Tcl_RegExp regExpr; char *string; - Tcl_DString stringBuffer, valueBuffer; - Tcl_UniChar *wStart; + Tcl_Obj *objPtr; + Tcl_RegExpInfo info; static char *options[] = { "-indices", "-nocase", "-about", "-expanded", "-line", "-linestop", "-lineanchor", @@ -2625,6 +2625,7 @@ TestregexpObjCmd(dummy, interp, objc, objv) if (regExpr == NULL) { return TCL_ERROR; } + objPtr = objv[1]; if (about) { if (TclRegAbout(interp, regExpr) < 0) { @@ -2633,23 +2634,16 @@ TestregexpObjCmd(dummy, interp, objc, objv) return TCL_OK; } - result = TCL_OK; - string = Tcl_GetStringFromObj(objv[1], &stringLength); - - Tcl_DStringInit(&valueBuffer); - - Tcl_DStringInit(&stringBuffer); - wStart = Tcl_UtfToUniCharDString(string, stringLength, &stringBuffer); - wLen = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar); + match = Tcl_RegExpExecObj(interp, regExpr, objPtr, 0 /* offset */, + objc-2 /* nmatches */, eflags); - match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, objc-2, eflags); if (match < 0) { - result = TCL_ERROR; - goto done; + return TCL_ERROR; } if (match == 0) { /* - * Set the interpreter's object result to an integer object w/ value 0. + * Set the interpreter's object result to an integer object w/ + * value 0. */ Tcl_SetIntObj(Tcl_GetObjResult(interp), 0); @@ -2665,10 +2659,10 @@ TestregexpObjCmd(dummy, interp, objc, objv) if (value == NULL) { Tcl_AppendResult(interp, "couldn't set variable \"", varName, "\"", (char *) NULL); - result = TCL_ERROR; + return TCL_ERROR; } } - goto done; + return TCL_OK; } /* @@ -2679,38 +2673,56 @@ TestregexpObjCmd(dummy, interp, objc, objv) objc -= 2; objv += 2; + Tcl_RegExpGetInfo(regExpr, &info); for (i = 0; i < objc; i++) { - char *varName, *value; int start, end; + Tcl_Obj *newPtr, *varPtr, *valuePtr; - varName = Tcl_GetString(objv[i]); - + varPtr = objv[i]; ii = ((cflags®_EXPECT) && i == objc-1) ? -1 : i; - TclRegExpRangeUniChar(regExpr, ii, &start, &end); - if (start < 0) { - if (indices) { - value = Tcl_SetVar(interp, varName, "-1 -1", 0); + if (indices) { + Tcl_Obj *objs[2]; + + if (ii == -1) { + TclRegExpRangeUniChar(regExpr, ii, &start, &end); + } else if (ii > info.nsubs) { + start = -1; + end = -1; } else { - value = Tcl_SetVar(interp, varName, "", 0); + start = info.matches[ii].start; + end = info.matches[ii].end; } - } else { - if (indices) { - char info[TCL_INTEGER_SPACE * 2]; - sprintf(info, "%d %d", start, end - 1); - value = Tcl_SetVar(interp, varName, info, 0); + /* + * Adjust index so it refers to the last character in the + * match instead of the first character after the match. + */ + + if (end >= 0) { + end--; + } + + objs[0] = Tcl_NewLongObj(start); + objs[1] = Tcl_NewLongObj(end); + + newPtr = Tcl_NewListObj(2, objs); + } else { + if (ii == -1) { + TclRegExpRangeUniChar(regExpr, ii, &start, &end); + newPtr = Tcl_GetRange(objPtr, start, end); + } else if (ii > info.nsubs) { + newPtr = Tcl_NewObj(); } else { - value = Tcl_UniCharToUtfDString(wStart + start, end - start, - &valueBuffer); - value = Tcl_SetVar(interp, varName, value, 0); - Tcl_DStringSetLength(&valueBuffer, 0); + newPtr = Tcl_GetRange(objPtr, info.matches[ii].start, + info.matches[ii].end - 1); } } - if (value == NULL) { + valuePtr = Tcl_ObjSetVar2(interp, varPtr, NULL, newPtr, 0); + if (valuePtr == NULL) { + Tcl_DecrRefCount(newPtr); Tcl_AppendResult(interp, "couldn't set variable \"", - varName, "\"", (char *) NULL); - result = TCL_ERROR; - goto done; + Tcl_GetString(varPtr), "\"", (char *) NULL); + return TCL_ERROR; } } @@ -2719,11 +2731,7 @@ TestregexpObjCmd(dummy, interp, objc, objv) */ Tcl_SetIntObj(Tcl_GetObjResult(interp), 1); - - done: - Tcl_DStringFree(&stringBuffer); - Tcl_DStringFree(&valueBuffer); - return result; + return TCL_OK; } /* @@ -2780,6 +2788,10 @@ TestregexpXflags(string, length, cflagsPtr, eflagsPtr) cflags |= REG_NOSUB; break; } + case 's': { /* s for start */ + cflags |= REG_BOSONLY; + break; + } case '+': { cflags |= REG_FAKEEC; break; |