diff options
Diffstat (limited to 'generic/tclRegexp.c')
-rw-r--r-- | generic/tclRegexp.c | 571 |
1 files changed, 304 insertions, 267 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 7e2a875..23a1c97 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -1,26 +1,25 @@ -/* +/* * tclRegexp.c -- * - * This file contains the public interfaces to the Tcl regular - * expression mechanism. + * This file contains the public interfaces to the Tcl regular expression + * mechanism. * * Copyright (c) 1998 by Sun Microsystems, Inc. * Copyright (c) 1998-1999 by Scriptics Corporation. * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.14.4.2 2006/04/07 01:14:28 hobbs Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.28 2007/12/13 15:23:20 dgp Exp $ */ #include "tclInt.h" -#include "tclPort.h" #include "tclRegexp.h" /* *---------------------------------------------------------------------- - * The routines in this file use Henry Spencer's regular expression - * package contained in the following additional source files: + * The routines in this file use Henry Spencer's regular expression package + * contained in the following additional source files: * * regc_color.c regc_cvec.c regc_lex.c * regc_nfa.c regcomp.c regcustom.h @@ -29,23 +28,23 @@ * regfronts.c regguts.h * * Copyright (c) 1998 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * + * + * I'd appreciate being given credit for this package in the documentation of + * software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; @@ -69,15 +68,14 @@ typedef struct ThreadSpecificData { int initialized; /* Set to 1 when the module is initialized. */ - char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled - * regular expression patterns. NULL - * means that this slot isn't used. - * Malloc-ed. */ + char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled regular + * expression patterns. NULL means that this + * slot isn't used. Malloc-ed. */ int patLengths[NUM_REGEXPS];/* Number of non-null characters in - * corresponding entry in patterns. - * -1 means entry isn't used. */ + * corresponding entry in patterns. -1 means + * entry isn't used. */ struct TclRegexp *regexps[NUM_REGEXPS]; - /* Compiled forms of above strings. Also + /* Compiled forms of above strings. Also * malloc-ed, or NULL if not in use yet. */ } ThreadSpecificData; @@ -87,49 +85,46 @@ static Tcl_ThreadDataKey dataKey; * Declarations for functions used only in this file. */ -static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp, - CONST char *pattern, int length, int flags)); -static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, - Tcl_Obj *copyPtr)); -static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData)); -static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr)); -static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); -static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp, - Tcl_RegExp re, CONST Tcl_UniChar *uniString, - int numChars, int nmatches, int flags)); -static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp, - Tcl_Obj *objPtr)); +static TclRegexp * CompileRegexp(Tcl_Interp *interp, const char *pattern, + int length, int flags); +static void DupRegexpInternalRep(Tcl_Obj *srcPtr, + Tcl_Obj *copyPtr); +static void FinalizeRegexp(ClientData clientData); +static void FreeRegexp(TclRegexp *regexpPtr); +static void FreeRegexpInternalRep(Tcl_Obj *objPtr); +static int RegExpExecUniChar(Tcl_Interp *interp, Tcl_RegExp re, + const Tcl_UniChar *uniString, int numChars, + int nmatches, int flags); +static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); /* - * The regular expression Tcl object type. This serves as a cache - * of the compiled form of the regular expression. + * The regular expression Tcl object type. This serves as a cache of the + * compiled form of the regular expression. */ -static Tcl_ObjType tclRegexpType = { +Tcl_ObjType tclRegexpType = { "regexp", /* name */ FreeRegexpInternalRep, /* freeIntRepProc */ DupRegexpInternalRep, /* dupIntRepProc */ NULL, /* updateStringProc */ SetRegexpFromAny /* setFromAnyProc */ }; - /* *---------------------------------------------------------------------- * * Tcl_RegExpCompile -- * - * Compile a regular expression into a form suitable for fast - * matching. This procedure is DEPRECATED in favor of the - * object version of the command. + * Compile a regular expression into a form suitable for fast matching. + * This function is DEPRECATED in favor of the object version of the + * command. * * Results: - * The return value is a pointer to the compiled form of string, - * suitable for passing to Tcl_RegExpExec. This compiled form - * is only valid up until the next call to this procedure, so - * don't keep these around for a long time! If an error occurred - * while compiling the pattern, then NULL is returned and an error - * message is left in the interp's result. + * The return value is a pointer to the compiled form of string, suitable + * for passing to Tcl_RegExpExec. This compiled form is only valid up + * until the next call to this function, so don't keep these around for a + * long time! If an error occurred while compiling the pattern, then NULL + * is returned and an error message is left in the interp's result. * * Side effects: * Updates the cache of compiled regexps. @@ -138,13 +133,13 @@ static Tcl_ObjType tclRegexpType = { */ Tcl_RegExp -Tcl_RegExpCompile(interp, string) - Tcl_Interp *interp; /* For use in error reporting and - * to access the interp regexp cache. */ - CONST char *string; /* String for which to produce - * compiled regular expression. */ +Tcl_RegExpCompile( + Tcl_Interp *interp, /* For use in error reporting and to access + * the interp regexp cache. */ + const char *pattern) /* String for which to produce compiled + * regular expression. */ { - return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string), + return (Tcl_RegExp) CompileRegexp(interp, pattern, (int) strlen(pattern), REG_ADVANCED); } @@ -153,15 +148,14 @@ Tcl_RegExpCompile(interp, string) * * Tcl_RegExpExec -- * - * Execute the regular expression matcher using a compiled form - * of a regular expression and save information about any match - * that is found. + * Execute the regular expression matcher using a compiled form of a + * regular expression and save information about any match that is found. * * Results: - * If an error occurs during the matching operation then -1 - * is returned and the interp's result contains an error message. - * Otherwise the return value is 1 if a matching range is - * found and 0 if there is no matching range. + * If an error occurs during the matching operation then -1 is returned + * and the interp's result contains an error message. Otherwise the + * return value is 1 if a matching range is found and 0 if there is no + * matching range. * * Side effects: * None. @@ -170,27 +164,27 @@ Tcl_RegExpCompile(interp, string) */ int -Tcl_RegExpExec(interp, re, string, start) - Tcl_Interp *interp; /* Interpreter to use for error reporting. */ - Tcl_RegExp re; /* Compiled regular expression; must have - * been returned by previous call to +Tcl_RegExpExec( + Tcl_Interp *interp, /* Interpreter to use for error reporting. */ + Tcl_RegExp re, /* Compiled regular expression; must have been + * returned by previous call to * Tcl_GetRegExpFromObj. */ - CONST char *string; /* String against which to match re. */ - CONST char *start; /* If string is part of a larger string, - * this identifies beginning of larger - * string, so that "^" won't match. */ + const char *text, /* Text against which to match re. */ + const char *start) /* If text is part of a larger string, this + * identifies beginning of larger string, so + * that "^" won't match. */ { int flags, result, numChars; TclRegexp *regexp = (TclRegexp *)re; Tcl_DString ds; - CONST Tcl_UniChar *ustr; + const Tcl_UniChar *ustr; /* - * If the starting point is offset from the beginning of the buffer, - * then we need to tell the regexp engine not to match "^". + * If the starting point is offset from the beginning of the buffer, then + * we need to tell the regexp engine not to match "^". */ - if (string > start) { + if (text > start) { flags = REG_NOTBOL; } else { flags = 0; @@ -200,7 +194,7 @@ Tcl_RegExpExec(interp, re, string, start) * Remember the string for use by Tcl_RegExpRange(). */ - regexp->string = string; + regexp->string = text; regexp->objPtr = NULL; /* @@ -208,10 +202,10 @@ Tcl_RegExpExec(interp, re, string, start) */ Tcl_DStringInit(&ds); - ustr = Tcl_UtfToUniCharDString(string, -1, &ds); + ustr = Tcl_UtfToUniCharDString(text, -1, &ds); numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar); - result = RegExpExecUniChar(interp, re, ustr, numChars, - -1 /* nmatches */, flags); + result = RegExpExecUniChar(interp, re, ustr, numChars, -1 /* nmatches */, + flags); Tcl_DStringFree(&ds); return result; @@ -227,7 +221,7 @@ Tcl_RegExpExec(interp, re, string, start) * * Results: * The variables at *startPtr and *endPtr are modified to hold the - * addresses of the endpoints of the range given by index. If the + * addresses of the endpoints of the range given by index. If the * specified range doesn't exist then NULLs are returned. * * Side effects: @@ -237,19 +231,19 @@ Tcl_RegExpExec(interp, re, string, start) */ void -Tcl_RegExpRange(re, index, startPtr, endPtr) - Tcl_RegExp re; /* Compiled regular expression that has - * been passed to Tcl_RegExpExec. */ - int index; /* 0 means give the range of the entire - * match, > 0 means give the range of - * a matching subrange. */ - CONST char **startPtr; /* Store address of first character in - * (sub-) range here. */ - CONST char **endPtr; /* Store address of character just after last - * in (sub-) range here. */ +Tcl_RegExpRange( + Tcl_RegExp re, /* Compiled regular expression that has been + * passed to Tcl_RegExpExec. */ + int index, /* 0 means give the range of the entire match, + * > 0 means give the range of a matching + * subrange. */ + const char **startPtr, /* Store address of first character in + * (sub-)range here. */ + const char **endPtr) /* Store address of character just after last + * in (sub-)range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; - CONST char *string; + const char *string; if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = *endPtr = NULL; @@ -257,7 +251,7 @@ Tcl_RegExpRange(re, index, startPtr, endPtr) *startPtr = *endPtr = NULL; } else { if (regexpPtr->objPtr) { - string = Tcl_GetString(regexpPtr->objPtr); + string = TclGetString(regexpPtr->objPtr); } else { string = regexpPtr->string; } @@ -272,14 +266,13 @@ Tcl_RegExpRange(re, index, startPtr, endPtr) * RegExpExecUniChar -- * * Execute the regular expression matcher using a compiled form of a - * regular expression and save information about any match that is - * found. + * regular expression and save information about any match that is found. * * Results: - * If an error occurs during the matching operation then -1 is - * returned and an error message is left in interp's result. - * Otherwise the return value is 1 if a matching range was found or - * 0 if there was no matching range. + * If an error occurs during the matching operation then -1 is returned + * and an error message is left in interp's result. Otherwise the return + * value is 1 if a matching range was found or 0 if there was no matching + * range. * * Side effects: * None. @@ -288,17 +281,17 @@ Tcl_RegExpRange(re, index, startPtr, endPtr) */ static int -RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) - Tcl_Interp *interp; /* Interpreter to use for error reporting. */ - Tcl_RegExp re; /* Compiled regular expression; returned by - * a previous call to Tcl_GetRegExpFromObj */ - CONST Tcl_UniChar *wString; /* String against which to match re. */ - int numChars; /* Length of Tcl_UniChar string (must - * be >= 0). */ - int nmatches; /* How many subexpression matches (counting - * the whole match as subexpression 0) are - * of interest. -1 means "don't know". */ - int flags; /* Regular expression flags. */ +RegExpExecUniChar( + Tcl_Interp *interp, /* Interpreter to use for error reporting. */ + Tcl_RegExp re, /* Compiled regular expression; returned by a + * previous call to Tcl_GetRegExpFromObj */ + const Tcl_UniChar *wString, /* String against which to match re. */ + int numChars, /* Length of Tcl_UniChar string (must be + * >=0). */ + int nmatches, /* How many subexpression matches (counting + * the whole match as subexpression 0) are of + * interest. -1 means "don't know". */ + int flags) /* Regular expression flags. */ { int status; TclRegexp *regexpPtr = (TclRegexp *) re; @@ -340,8 +333,8 @@ RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) * * Results: * The variables at *startPtr and *endPtr are modified to hold the - * offsets of the endpoints of the range given by index. If the - * specified range doesn't exist then -1s are supplied. + * offsets of the endpoints of the range given by index. If the specified + * range doesn't exist then -1s are supplied. * * Side effects: * None. @@ -350,17 +343,17 @@ RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) */ void -TclRegExpRangeUniChar(re, index, startPtr, endPtr) - Tcl_RegExp re; /* Compiled regular expression that has - * been passed to Tcl_RegExpExec. */ - int index; /* 0 means give the range of the entire - * match, > 0 means give the range of - * a matching subrange, -1 means the - * range of the rm_extend field. */ - int *startPtr; /* Store address of first character in - * (sub-) range here. */ - int *endPtr; /* Store address of character just after last - * in (sub-) range here. */ +TclRegExpRangeUniChar( + Tcl_RegExp re, /* Compiled regular expression that has been + * passed to Tcl_RegExpExec. */ + int index, /* 0 means give the range of the entire match, + * > 0 means give the range of a matching + * subrange, -1 means the range of the + * rm_extend field. */ + int *startPtr, /* Store address of first character in + * (sub-)range here. */ + int *endPtr) /* Store address of character just after last + * in (sub-)range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; @@ -384,10 +377,9 @@ TclRegExpRangeUniChar(re, index, startPtr, endPtr) * See if a string matches a regular expression. * * Results: - * If an error occurs during the matching operation then -1 - * is returned and the interp's result contains an error message. - * Otherwise the return value is 1 if "string" matches "pattern" - * and 0 otherwise. + * If an error occurs during the matching operation then -1 is returned + * and the interp's result contains an error message. Otherwise the + * return value is 1 if "text" matches "pattern" and 0 otherwise. * * Side effects: * None. @@ -396,11 +388,10 @@ TclRegExpRangeUniChar(re, index, startPtr, endPtr) */ int -Tcl_RegExpMatch(interp, string, pattern) - Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ - CONST char *string; /* String. */ - CONST char *pattern; /* Regular expression to match against - * string. */ +Tcl_RegExpMatch( + Tcl_Interp *interp, /* Used for error reporting. May be NULL. */ + const char *text, /* Text to search for pattern matches. */ + const char *pattern) /* Regular expression to match against text. */ { Tcl_RegExp re; @@ -408,7 +399,7 @@ Tcl_RegExpMatch(interp, string, pattern) if (re == NULL) { return -1; } - return Tcl_RegExpExec(interp, re, string, string); + return Tcl_RegExpExec(interp, re, text, text); } /* @@ -419,10 +410,9 @@ Tcl_RegExpMatch(interp, string, pattern) * Execute a precompiled regexp against the given object. * * Results: - * If an error occurs during the matching operation then -1 - * is returned and the interp's result contains an error message. - * Otherwise the return value is 1 if "string" matches "pattern" - * and 0 otherwise. + * If an error occurs during the matching operation then -1 is returned + * and the interp's result contains an error message. Otherwise the + * return value is 1 if "string" matches "pattern" and 0 otherwise. * * Side effects: * Converts the object to a Unicode object. @@ -431,38 +421,59 @@ Tcl_RegExpMatch(interp, string, pattern) */ int -Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags) - Tcl_Interp *interp; /* Interpreter to use for error reporting. */ - Tcl_RegExp re; /* Compiled regular expression; must have - * been returned by previous call to +Tcl_RegExpExecObj( + Tcl_Interp *interp, /* Interpreter to use for error reporting. */ + Tcl_RegExp re, /* Compiled regular expression; must have been + * returned by previous call to * Tcl_GetRegExpFromObj. */ - Tcl_Obj *objPtr; /* String against which to match re. */ - int offset; /* Character index that marks where matching + Tcl_Obj *textObj, /* Text against which to match re. */ + int offset, /* Character index that marks where matching * should begin. */ - int nmatches; /* How many subexpression matches (counting - * the whole match as subexpression 0) are - * of interest. -1 means all of them. */ - int flags; /* Regular expression execution flags. */ + int nmatches, /* How many subexpression matches (counting + * the whole match as subexpression 0) are of + * interest. -1 means all of them. */ + int flags) /* Regular expression execution flags. */ { TclRegexp *regexpPtr = (TclRegexp *) re; Tcl_UniChar *udata; int length; + int reflags = regexpPtr->flags; +#define TCL_REG_GLOBOK_FLAGS (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE) + + /* + * Take advantage of the equivalent glob pattern, if one exists. + * This is possible based only on the right mix of incoming flags (0) + * and regexp compile flags. + */ + if ((offset == 0) && (nmatches == 0) && (flags == 0) + && !(reflags & ~TCL_REG_GLOBOK_FLAGS) + && (regexpPtr->globObjPtr != NULL)) { + int nocase = (reflags & TCL_REG_NOCASE) ? TCL_MATCH_NOCASE : 0; + + /* + * Pass to TclStringMatchObj for obj-specific handling. + * XXX: Currently doesn't take advantage of exact-ness that + * XXX: TclReToGlob tells us about + */ + + return TclStringMatchObj(textObj, regexpPtr->globObjPtr, nocase); + } /* * Save the target object so we can extract strings from it later. */ regexpPtr->string = NULL; - regexpPtr->objPtr = objPtr; + regexpPtr->objPtr = textObj; - udata = Tcl_GetUnicodeFromObj(objPtr, &length); + udata = Tcl_GetUnicodeFromObj(textObj, &length); if (offset > length) { offset = length; } udata += offset; length -= offset; - + return RegExpExecUniChar(interp, re, udata, length, nmatches, flags); } @@ -474,10 +485,9 @@ Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags) * See if an object matches a regular expression. * * Results: - * If an error occurs during the matching operation then -1 - * is returned and the interp's result contains an error message. - * Otherwise the return value is 1 if "string" matches "pattern" - * and 0 otherwise. + * If an error occurs during the matching operation then -1 is returned + * and the interp's result contains an error message. Otherwise the + * return value is 1 if "text" matches "pattern" and 0 otherwise. * * Side effects: * Changes the internal rep of the pattern and string objects. @@ -486,10 +496,10 @@ Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags) */ int -Tcl_RegExpMatchObj(interp, stringObj, patternObj) - Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ - Tcl_Obj *stringObj; /* Object containing the String to search. */ - Tcl_Obj *patternObj; /* Regular expression to match against +Tcl_RegExpMatchObj( + Tcl_Interp *interp, /* Used for error reporting. May be NULL. */ + Tcl_Obj *textObj, /* Object containing the String to search. */ + Tcl_Obj *patternObj) /* Regular expression to match against * string. */ { Tcl_RegExp re; @@ -499,7 +509,7 @@ Tcl_RegExpMatchObj(interp, stringObj, patternObj) if (re == NULL) { return -1; } - return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */, + return Tcl_RegExpExecObj(interp, re, textObj, 0 /* offset */, 0 /* nmatches */, 0 /* flags */); } @@ -520,9 +530,9 @@ Tcl_RegExpMatchObj(interp, stringObj, patternObj) */ void -Tcl_RegExpGetInfo(regexp, infoPtr) - Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */ - Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */ +Tcl_RegExpGetInfo( + Tcl_RegExp regexp, /* Pattern from which to get subexpressions. */ + Tcl_RegExpInfo *infoPtr) /* Match information is stored here. */ { TclRegexp *regexpPtr = (TclRegexp *) regexp; @@ -536,14 +546,14 @@ Tcl_RegExpGetInfo(regexp, infoPtr) * * Tcl_GetRegExpFromObj -- * - * Compile a regular expression into a form suitable for fast - * matching. This procedure caches the result in a Tcl_Obj. + * Compile a regular expression into a form suitable for fast matching. + * This function caches the result in a Tcl_Obj. * * Results: - * The return value is a pointer to the compiled form of string, - * suitable for passing to Tcl_RegExpExec. If an error occurred - * while compiling the pattern, then NULL is returned and an error - * message is left in the interp's result. + * The return value is a pointer to the compiled form of string, suitable + * for passing to Tcl_RegExpExec. If an error occurred while compiling + * the pattern, then NULL is returned and an error message is left in the + * interp's result. * * Side effects: * Updates the native rep of the Tcl_Obj. @@ -552,25 +562,28 @@ Tcl_RegExpGetInfo(regexp, infoPtr) */ Tcl_RegExp -Tcl_GetRegExpFromObj(interp, objPtr, flags) - Tcl_Interp *interp; /* For use in error reporting, and to access +Tcl_GetRegExpFromObj( + Tcl_Interp *interp, /* For use in error reporting, and to access * the interp regexp cache. */ - Tcl_Obj *objPtr; /* Object whose string rep contains regular - * expression pattern. Internal rep will be + Tcl_Obj *objPtr, /* Object whose string rep contains regular + * expression pattern. Internal rep will be * changed to compiled form of this regular * expression. */ - int flags; /* Regular expression compilation flags. */ + int flags) /* Regular expression compilation flags. */ { int length; - Tcl_ObjType *typePtr; TclRegexp *regexpPtr; char *pattern; - typePtr = objPtr->typePtr; + /* + * This is OK because we only actually interpret this value properly as a + * TclRegexp* when the type is tclRegexpType. + */ + regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; - if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) { - pattern = Tcl_GetStringFromObj(objPtr, &length); + if ((objPtr->typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) { + pattern = TclGetStringFromObj(objPtr, &length); regexpPtr = CompileRegexp(interp, pattern, length, flags); if (regexpPtr == NULL) { @@ -579,7 +592,7 @@ Tcl_GetRegExpFromObj(interp, objPtr, flags) /* * Add a reference to the regexp so it will persist even if it is - * pushed out of the current thread's regexp cache. This reference + * pushed out of the current thread's regexp cache. This reference * will be removed when the object's internal rep is freed. */ @@ -589,10 +602,8 @@ Tcl_GetRegExpFromObj(interp, objPtr, flags) * Free the old representation and set our type. */ - if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) { - (*typePtr->freeIntRepProc)(objPtr); - } - objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr; + TclFreeIntRep(objPtr); + objPtr->internalRep.otherValuePtr = (void *) regexpPtr; objPtr->typePtr = &tclRegexpType; } return (Tcl_RegExp) regexpPtr; @@ -606,10 +617,10 @@ Tcl_GetRegExpFromObj(interp, objPtr, flags) * Return information about a compiled regular expression. * * Results: - * The return value is -1 for failure, 0 for success, although at - * the moment there's nothing that could fail. On success, a list - * is left in the interp's result: first element is the subexpression - * count, second is a list of re_info bit names. + * The return value is -1 for failure, 0 for success, although at the + * moment there's nothing that could fail. On success, a list is left in + * the interp's result: first element is the subexpression count, second + * is a list of re_info bit names. * * Side effects: * None. @@ -618,16 +629,16 @@ Tcl_GetRegExpFromObj(interp, objPtr, flags) */ int -TclRegAbout(interp, re) - Tcl_Interp *interp; /* For use in variable assignment. */ - Tcl_RegExp re; /* The compiled regular expression. */ +TclRegAbout( + Tcl_Interp *interp, /* For use in variable assignment. */ + Tcl_RegExp re) /* The compiled regular expression. */ { - TclRegexp *regexpPtr = (TclRegexp *)re; - char buf[TCL_INTEGER_SPACE]; - static struct infoname { + TclRegexp *regexpPtr = (TclRegexp *) re; + struct infoname { int bit; - char *text; - } infonames[] = { + const char *text; + }; + static const struct infoname infonames[] = { {REG_UBACKREF, "REG_UBACKREF"}, {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"}, {REG_UBOUNDS, "REG_UBOUNDS"}, @@ -642,37 +653,40 @@ TclRegAbout(interp, re) {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"}, {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"}, {REG_USHORTEST, "REG_USHORTEST"}, - {0, ""} + {0, NULL} }; - struct infoname *inf; - int n; + const struct infoname *inf; + Tcl_Obj *infoObj; + + /* + * The reset here guarantees that the interpreter result is empty and + * unshared. This means that we can use Tcl_ListObjAppendElement on the + * result object quite safely. + */ Tcl_ResetResult(interp); - sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub)); - Tcl_AppendElement(interp, buf); + /* + * Assume that there will never be more than INT_MAX subexpressions. This + * is a pretty reasonable assumption; the RE engine doesn't scale _that_ + * well and Tcl has other limits that constrain things as well... + */ + + Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), + Tcl_NewIntObj((int) regexpPtr->re.re_nsub)); /* - * Must count bits before generating list, because we must know - * whether {} are needed before we start appending names. + * Now append a list of all the bit-flags set for the RE. */ - n = 0; - for (inf = infonames; inf->bit != 0; inf++) { - if (regexpPtr->re.re_info&inf->bit) { - n++; - } - } - if (n != 1) { - Tcl_AppendResult(interp, " {", NULL); - } - for (inf = infonames; inf->bit != 0; inf++) { - if (regexpPtr->re.re_info&inf->bit) { - Tcl_AppendElement(interp, inf->text); + + TclNewObj(infoObj); + for (inf=infonames ; inf->bit != 0 ; inf++) { + if (regexpPtr->re.re_info & inf->bit) { + Tcl_ListObjAppendElement(NULL, infoObj, + Tcl_NewStringObj(inf->text, -1)); } } - if (n != 1) { - Tcl_AppendResult(interp, "}", NULL); - } + Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), infoObj); return 0; } @@ -694,26 +708,25 @@ TclRegAbout(interp, re) */ void -TclRegError(interp, msg, status) - Tcl_Interp *interp; /* Interpreter for error reporting. */ - CONST char *msg; /* Message to prepend to error. */ - int status; /* Status code to report. */ +TclRegError( + Tcl_Interp *interp, /* Interpreter for error reporting. */ + const char *msg, /* Message to prepend to error. */ + int status) /* Status code to report. */ { char buf[100]; /* ample in practice */ char cbuf[100]; /* lots in practice */ size_t n; - char *p; + const char *p; Tcl_ResetResult(interp); - n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf)); + n = TclReError(status, NULL, buf, sizeof(buf)); p = (n > sizeof(buf)) ? "..." : ""; Tcl_AppendResult(interp, msg, buf, p, NULL); sprintf(cbuf, "%d", status); - (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf)); + (void) TclReError(REG_ITOA, NULL, cbuf, sizeof(cbuf)); Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL); } - /* *---------------------------------------------------------------------- @@ -733,8 +746,8 @@ TclRegError(interp, msg, status) */ static void -FreeRegexpInternalRep(objPtr) - Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */ +FreeRegexpInternalRep( + Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */ { TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; @@ -752,8 +765,8 @@ FreeRegexpInternalRep(objPtr) * * DupRegexpInternalRep -- * - * We copy the reference to the compiled regexp and bump its - * reference count. + * We copy the reference to the compiled regexp and bump its reference + * count. * * Results: * None. @@ -765,11 +778,12 @@ FreeRegexpInternalRep(objPtr) */ static void -DupRegexpInternalRep(srcPtr, copyPtr) - Tcl_Obj *srcPtr; /* Object with internal rep to copy. */ - Tcl_Obj *copyPtr; /* Object with internal rep to set. */ +DupRegexpInternalRep( + Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ + Tcl_Obj *copyPtr) /* Object with internal rep to set. */ { TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr; + regexpPtr->refCount++; copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr; copyPtr->typePtr = &tclRegexpType; @@ -796,9 +810,9 @@ DupRegexpInternalRep(srcPtr, copyPtr) */ static int -SetRegexpFromAny(interp, objPtr) - Tcl_Interp *interp; /* Used for error reporting if not NULL. */ - Tcl_Obj *objPtr; /* The object to convert. */ +SetRegexpFromAny( + Tcl_Interp *interp, /* Used for error reporting if not NULL. */ + Tcl_Obj *objPtr) /* The object to convert. */ { if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) { return TCL_ERROR; @@ -811,37 +825,36 @@ SetRegexpFromAny(interp, objPtr) * * CompileRegexp -- * - * Attempt to compile the given regexp pattern. If the compiled - * regular expression can be found in the per-thread cache, it - * will be used instead of compiling a new copy. + * Attempt to compile the given regexp pattern. If the compiled regular + * expression can be found in the per-thread cache, it will be used + * instead of compiling a new copy. * * Results: - * The return value is a pointer to a newly allocated TclRegexp - * that represents the compiled pattern, or NULL if the pattern - * could not be compiled. If NULL is returned, an error message is - * left in the interp's result. + * The return value is a pointer to a newly allocated TclRegexp that + * represents the compiled pattern, or NULL if the pattern could not be + * compiled. If NULL is returned, an error message is left in the + * interp's result. * * Side effects: - * The thread-local regexp cache is updated and a new TclRegexp may - * be allocated. + * The thread-local regexp cache is updated and a new TclRegexp may be + * allocated. * *---------------------------------------------------------------------- */ static TclRegexp * -CompileRegexp(interp, string, length, flags) - Tcl_Interp *interp; /* Used for error reporting if not NULL. */ - CONST char *string; /* The regexp to compile (UTF-8). */ - int length; /* The length of the string in bytes. */ - int flags; /* Compilation flags. */ +CompileRegexp( + Tcl_Interp *interp, /* Used for error reporting if not NULL. */ + const char *string, /* The regexp to compile (UTF-8). */ + int length, /* The length of the string in bytes. */ + int flags) /* Compilation flags. */ { TclRegexp *regexpPtr; - CONST Tcl_UniChar *uniString; - int numChars; + const Tcl_UniChar *uniString; + int numChars, status, i, exact; Tcl_DString stringBuf; - int status, i; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); - + if (!tsdPtr->initialized) { tsdPtr->initialized = 1; Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL); @@ -849,14 +862,14 @@ CompileRegexp(interp, string, length, flags) /* * This routine maintains a second-level regular expression cache in - * addition to the per-object regexp cache. The per-thread cache is needed + * addition to the per-object regexp cache. The per-thread cache is needed * to handle the case where for various reasons the object is lost between * invocations of the regexp command, but the literal pattern is the same. */ /* - * Check the per-thread compiled regexp cache. We can only reuse - * a regexp if it has the same pattern and the same flags. + * Check the per-thread compiled regexp cache. We can only reuse a regexp + * if it has the same pattern and the same flags. */ for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { @@ -864,8 +877,8 @@ CompileRegexp(interp, string, length, flags) && (tsdPtr->regexps[i]->flags == flags) && (strcmp(string, tsdPtr->patterns[i]) == 0)) { /* - * Move the matched pattern to the first slot in the - * cache and shift the other patterns down one position. + * Move the matched pattern to the first slot in the cache and + * shift the other patterns down one position. */ if (i != 0) { @@ -890,7 +903,7 @@ CompileRegexp(interp, string, length, flags) /* * This is a new expression, so compile it and add it to the cache. */ - + regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp)); regexpPtr->objPtr = NULL; regexpPtr->string = NULL; @@ -921,15 +934,29 @@ CompileRegexp(interp, string, length, flags) ckfree((char *)regexpPtr); if (interp) { TclRegError(interp, - "couldn't compile regular expression pattern: ", - status); + "couldn't compile regular expression pattern: ", status); } return NULL; } /* - * Allocate enough space for all of the subexpressions, plus one - * extra for the entire pattern. + * Convert RE to a glob pattern equivalent, if any, and cache it. If this + * is not possible, then globObjPtr will be NULL. This is used by + * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine). + */ + + if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) { + regexpPtr->globObjPtr = Tcl_NewStringObj(Tcl_DStringValue(&stringBuf), + Tcl_DStringLength(&stringBuf)); + Tcl_IncrRefCount(regexpPtr->globObjPtr); + Tcl_DStringFree(&stringBuf); + } else { + regexpPtr->globObjPtr = NULL; + } + + /* + * Allocate enough space for all of the subexpressions, plus one extra for + * the entire pattern. */ regexpPtr->matches = (regmatch_t *) ckalloc( @@ -983,10 +1010,13 @@ CompileRegexp(interp, string, length, flags) */ static void -FreeRegexp(regexpPtr) - TclRegexp *regexpPtr; /* Compiled regular expression to free. */ +FreeRegexp( + TclRegexp *regexpPtr) /* Compiled regular expression to free. */ { TclReFree(®expPtr->re); + if (regexpPtr->globObjPtr) { + TclDecrRefCount(regexpPtr->globObjPtr); + } if (regexpPtr->matches) { ckfree((char *) regexpPtr->matches); } @@ -998,8 +1028,7 @@ FreeRegexp(regexpPtr) * * FinalizeRegexp -- * - * Release the storage associated with the per-thread regexp - * cache. + * Release the storage associated with the per-thread regexp cache. * * Results: * None. @@ -1011,8 +1040,8 @@ FreeRegexp(regexpPtr) */ static void -FinalizeRegexp(clientData) - ClientData clientData; /* Not used. */ +FinalizeRegexp( + ClientData clientData) /* Not used. */ { int i; TclRegexp *regexpPtr; @@ -1032,3 +1061,11 @@ FinalizeRegexp(clientData) */ tsdPtr->initialized = 0; } + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ |