diff options
Diffstat (limited to 'generic/tclRegexp.c')
| -rw-r--r-- | generic/tclRegexp.c | 570 |
1 files changed, 266 insertions, 304 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index d340f4c..c161d69 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -1,23 +1,24 @@ -/* +/* * tclRegexp.c -- * - * This file contains the public interfaces to the Tcl regular expression - * mechanism. + * This file contains the public interfaces to the Tcl regular + * expression mechanism. * * Copyright (c) 1998 by Sun Microsystems, Inc. * Copyright (c) 1998-1999 by Scriptics Corporation. * - * See the file "license.terms" for information on usage and redistribution of - * this file, and for a DISCLAIMER OF ALL WARRANTIES. + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" +#include "tclPort.h" #include "tclRegexp.h" /* *---------------------------------------------------------------------- - * The routines in this file use Henry Spencer's regular expression package - * contained in the following additional source files: + * The routines in this file use Henry Spencer's regular expression + * package contained in the following additional source files: * * regc_color.c regc_cvec.c regc_lex.c * regc_nfa.c regcomp.c regcustom.h @@ -26,23 +27,23 @@ * regfronts.c regguts.h * * Copyright (c) 1998 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation of - * software which uses it, but that is not a requirement. - * + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; @@ -66,14 +67,15 @@ typedef struct ThreadSpecificData { int initialized; /* Set to 1 when the module is initialized. */ - char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled regular - * expression patterns. NULL means that this - * slot isn't used. Malloc-ed. */ + char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled + * regular expression patterns. NULL + * means that this slot isn't used. + * Malloc-ed. */ int patLengths[NUM_REGEXPS];/* Number of non-null characters in - * corresponding entry in patterns. -1 means - * entry isn't used. */ + * corresponding entry in patterns. + * -1 means entry isn't used. */ struct TclRegexp *regexps[NUM_REGEXPS]; - /* Compiled forms of above strings. Also + /* Compiled forms of above strings. Also * malloc-ed, or NULL if not in use yet. */ } ThreadSpecificData; @@ -83,46 +85,49 @@ static Tcl_ThreadDataKey dataKey; * Declarations for functions used only in this file. */ -static TclRegexp * CompileRegexp(Tcl_Interp *interp, const char *pattern, - int length, int flags); -static void DupRegexpInternalRep(Tcl_Obj *srcPtr, - Tcl_Obj *copyPtr); -static void FinalizeRegexp(ClientData clientData); -static void FreeRegexp(TclRegexp *regexpPtr); -static void FreeRegexpInternalRep(Tcl_Obj *objPtr); -static int RegExpExecUniChar(Tcl_Interp *interp, Tcl_RegExp re, - const Tcl_UniChar *uniString, int numChars, - int nmatches, int flags); -static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); +static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp, + CONST char *pattern, int length, int flags)); +static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, + Tcl_Obj *copyPtr)); +static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData)); +static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr)); +static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); +static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_RegExp re, CONST Tcl_UniChar *uniString, + int numChars, int nmatches, int flags)); +static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_Obj *objPtr)); /* - * The regular expression Tcl object type. This serves as a cache of the - * compiled form of the regular expression. + * The regular expression Tcl object type. This serves as a cache + * of the compiled form of the regular expression. */ -Tcl_ObjType tclRegexpType = { +static Tcl_ObjType tclRegexpType = { "regexp", /* name */ FreeRegexpInternalRep, /* freeIntRepProc */ DupRegexpInternalRep, /* dupIntRepProc */ NULL, /* updateStringProc */ SetRegexpFromAny /* setFromAnyProc */ }; + /* *---------------------------------------------------------------------- * * Tcl_RegExpCompile -- * - * Compile a regular expression into a form suitable for fast matching. - * This function is DEPRECATED in favor of the object version of the - * command. + * Compile a regular expression into a form suitable for fast + * matching. This procedure is DEPRECATED in favor of the + * object version of the command. * * Results: - * The return value is a pointer to the compiled form of string, suitable - * for passing to Tcl_RegExpExec. This compiled form is only valid up - * until the next call to this function, so don't keep these around for a - * long time! If an error occurred while compiling the pattern, then NULL - * is returned and an error message is left in the interp's result. + * The return value is a pointer to the compiled form of string, + * suitable for passing to Tcl_RegExpExec. This compiled form + * is only valid up until the next call to this procedure, so + * don't keep these around for a long time! If an error occurred + * while compiling the pattern, then NULL is returned and an error + * message is left in the interp's result. * * Side effects: * Updates the cache of compiled regexps. @@ -131,13 +136,13 @@ Tcl_ObjType tclRegexpType = { */ Tcl_RegExp -Tcl_RegExpCompile( - Tcl_Interp *interp, /* For use in error reporting and to access - * the interp regexp cache. */ - const char *pattern) /* String for which to produce compiled - * regular expression. */ +Tcl_RegExpCompile(interp, string) + Tcl_Interp *interp; /* For use in error reporting and + * to access the interp regexp cache. */ + CONST char *string; /* String for which to produce + * compiled regular expression. */ { - return (Tcl_RegExp) CompileRegexp(interp, pattern, (int) strlen(pattern), + return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string), REG_ADVANCED); } @@ -146,14 +151,15 @@ Tcl_RegExpCompile( * * Tcl_RegExpExec -- * - * Execute the regular expression matcher using a compiled form of a - * regular expression and save information about any match that is found. + * Execute the regular expression matcher using a compiled form + * of a regular expression and save information about any match + * that is found. * * Results: - * If an error occurs during the matching operation then -1 is returned - * and the interp's result contains an error message. Otherwise the - * return value is 1 if a matching range is found and 0 if there is no - * matching range. + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if a matching range is + * found and 0 if there is no matching range. * * Side effects: * None. @@ -162,27 +168,27 @@ Tcl_RegExpCompile( */ int -Tcl_RegExpExec( - Tcl_Interp *interp, /* Interpreter to use for error reporting. */ - Tcl_RegExp re, /* Compiled regular expression; must have been - * returned by previous call to +Tcl_RegExpExec(interp, re, string, start) + Tcl_Interp *interp; /* Interpreter to use for error reporting. */ + Tcl_RegExp re; /* Compiled regular expression; must have + * been returned by previous call to * Tcl_GetRegExpFromObj. */ - const char *text, /* Text against which to match re. */ - const char *start) /* If text is part of a larger string, this - * identifies beginning of larger string, so - * that "^" won't match. */ + CONST char *string; /* String against which to match re. */ + CONST char *start; /* If string is part of a larger string, + * this identifies beginning of larger + * string, so that "^" won't match. */ { int flags, result, numChars; TclRegexp *regexp = (TclRegexp *)re; Tcl_DString ds; - const Tcl_UniChar *ustr; + CONST Tcl_UniChar *ustr; /* - * If the starting point is offset from the beginning of the buffer, then - * we need to tell the regexp engine not to match "^". + * If the starting point is offset from the beginning of the buffer, + * then we need to tell the regexp engine not to match "^". */ - if (text > start) { + if (string > start) { flags = REG_NOTBOL; } else { flags = 0; @@ -192,7 +198,7 @@ Tcl_RegExpExec( * Remember the string for use by Tcl_RegExpRange(). */ - regexp->string = text; + regexp->string = string; regexp->objPtr = NULL; /* @@ -200,10 +206,10 @@ Tcl_RegExpExec( */ Tcl_DStringInit(&ds); - ustr = Tcl_UtfToUniCharDString(text, -1, &ds); + ustr = Tcl_UtfToUniCharDString(string, -1, &ds); numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar); - result = RegExpExecUniChar(interp, re, ustr, numChars, -1 /* nmatches */, - flags); + result = RegExpExecUniChar(interp, re, ustr, numChars, + -1 /* nmatches */, flags); Tcl_DStringFree(&ds); return result; @@ -219,7 +225,7 @@ Tcl_RegExpExec( * * Results: * The variables at *startPtr and *endPtr are modified to hold the - * addresses of the endpoints of the range given by index. If the + * addresses of the endpoints of the range given by index. If the * specified range doesn't exist then NULLs are returned. * * Side effects: @@ -229,19 +235,19 @@ Tcl_RegExpExec( */ void -Tcl_RegExpRange( - Tcl_RegExp re, /* Compiled regular expression that has been - * passed to Tcl_RegExpExec. */ - int index, /* 0 means give the range of the entire match, - * > 0 means give the range of a matching - * subrange. */ - const char **startPtr, /* Store address of first character in - * (sub-)range here. */ - const char **endPtr) /* Store address of character just after last - * in (sub-)range here. */ +Tcl_RegExpRange(re, index, startPtr, endPtr) + Tcl_RegExp re; /* Compiled regular expression that has + * been passed to Tcl_RegExpExec. */ + int index; /* 0 means give the range of the entire + * match, > 0 means give the range of + * a matching subrange. */ + CONST char **startPtr; /* Store address of first character in + * (sub-) range here. */ + CONST char **endPtr; /* Store address of character just after last + * in (sub-) range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; - const char *string; + CONST char *string; if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = *endPtr = NULL; @@ -249,7 +255,7 @@ Tcl_RegExpRange( *startPtr = *endPtr = NULL; } else { if (regexpPtr->objPtr) { - string = TclGetString(regexpPtr->objPtr); + string = Tcl_GetString(regexpPtr->objPtr); } else { string = regexpPtr->string; } @@ -264,13 +270,14 @@ Tcl_RegExpRange( * RegExpExecUniChar -- * * Execute the regular expression matcher using a compiled form of a - * regular expression and save information about any match that is found. + * regular expression and save information about any match that is + * found. * * Results: - * If an error occurs during the matching operation then -1 is returned - * and an error message is left in interp's result. Otherwise the return - * value is 1 if a matching range was found or 0 if there was no matching - * range. + * If an error occurs during the matching operation then -1 is + * returned and an error message is left in interp's result. + * Otherwise the return value is 1 if a matching range was found or + * 0 if there was no matching range. * * Side effects: * None. @@ -279,17 +286,17 @@ Tcl_RegExpRange( */ static int -RegExpExecUniChar( - Tcl_Interp *interp, /* Interpreter to use for error reporting. */ - Tcl_RegExp re, /* Compiled regular expression; returned by a - * previous call to Tcl_GetRegExpFromObj */ - const Tcl_UniChar *wString, /* String against which to match re. */ - int numChars, /* Length of Tcl_UniChar string (must be - * >=0). */ - int nmatches, /* How many subexpression matches (counting - * the whole match as subexpression 0) are of - * interest. -1 means "don't know". */ - int flags) /* Regular expression flags. */ +RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) + Tcl_Interp *interp; /* Interpreter to use for error reporting. */ + Tcl_RegExp re; /* Compiled regular expression; returned by + * a previous call to Tcl_GetRegExpFromObj */ + CONST Tcl_UniChar *wString; /* String against which to match re. */ + int numChars; /* Length of Tcl_UniChar string (must + * be >= 0). */ + int nmatches; /* How many subexpression matches (counting + * the whole match as subexpression 0) are + * of interest. -1 means "don't know". */ + int flags; /* Regular expression flags. */ { int status; TclRegexp *regexpPtr = (TclRegexp *) re; @@ -331,8 +338,8 @@ RegExpExecUniChar( * * Results: * The variables at *startPtr and *endPtr are modified to hold the - * offsets of the endpoints of the range given by index. If the specified - * range doesn't exist then -1s are supplied. + * offsets of the endpoints of the range given by index. If the + * specified range doesn't exist then -1s are supplied. * * Side effects: * None. @@ -341,17 +348,17 @@ RegExpExecUniChar( */ void -TclRegExpRangeUniChar( - Tcl_RegExp re, /* Compiled regular expression that has been - * passed to Tcl_RegExpExec. */ - int index, /* 0 means give the range of the entire match, - * > 0 means give the range of a matching - * subrange, -1 means the range of the - * rm_extend field. */ - int *startPtr, /* Store address of first character in - * (sub-)range here. */ - int *endPtr) /* Store address of character just after last - * in (sub-)range here. */ +TclRegExpRangeUniChar(re, index, startPtr, endPtr) + Tcl_RegExp re; /* Compiled regular expression that has + * been passed to Tcl_RegExpExec. */ + int index; /* 0 means give the range of the entire + * match, > 0 means give the range of + * a matching subrange, -1 means the + * range of the rm_extend field. */ + int *startPtr; /* Store address of first character in + * (sub-) range here. */ + int *endPtr; /* Store address of character just after last + * in (sub-) range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; @@ -375,9 +382,10 @@ TclRegExpRangeUniChar( * See if a string matches a regular expression. * * Results: - * If an error occurs during the matching operation then -1 is returned - * and the interp's result contains an error message. Otherwise the - * return value is 1 if "text" matches "pattern" and 0 otherwise. + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if "string" matches "pattern" + * and 0 otherwise. * * Side effects: * None. @@ -386,10 +394,11 @@ TclRegExpRangeUniChar( */ int -Tcl_RegExpMatch( - Tcl_Interp *interp, /* Used for error reporting. May be NULL. */ - const char *text, /* Text to search for pattern matches. */ - const char *pattern) /* Regular expression to match against text. */ +Tcl_RegExpMatch(interp, string, pattern) + Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ + CONST char *string; /* String. */ + CONST char *pattern; /* Regular expression to match against + * string. */ { Tcl_RegExp re; @@ -397,7 +406,7 @@ Tcl_RegExpMatch( if (re == NULL) { return -1; } - return Tcl_RegExpExec(interp, re, text, text); + return Tcl_RegExpExec(interp, re, string, string); } /* @@ -408,9 +417,10 @@ Tcl_RegExpMatch( * Execute a precompiled regexp against the given object. * * Results: - * If an error occurs during the matching operation then -1 is returned - * and the interp's result contains an error message. Otherwise the - * return value is 1 if "string" matches "pattern" and 0 otherwise. + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if "string" matches "pattern" + * and 0 otherwise. * * Side effects: * Converts the object to a Unicode object. @@ -419,59 +429,38 @@ Tcl_RegExpMatch( */ int -Tcl_RegExpExecObj( - Tcl_Interp *interp, /* Interpreter to use for error reporting. */ - Tcl_RegExp re, /* Compiled regular expression; must have been - * returned by previous call to +Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags) + Tcl_Interp *interp; /* Interpreter to use for error reporting. */ + Tcl_RegExp re; /* Compiled regular expression; must have + * been returned by previous call to * Tcl_GetRegExpFromObj. */ - Tcl_Obj *textObj, /* Text against which to match re. */ - int offset, /* Character index that marks where matching + Tcl_Obj *objPtr; /* String against which to match re. */ + int offset; /* Character index that marks where matching * should begin. */ - int nmatches, /* How many subexpression matches (counting - * the whole match as subexpression 0) are of - * interest. -1 means all of them. */ - int flags) /* Regular expression execution flags. */ + int nmatches; /* How many subexpression matches (counting + * the whole match as subexpression 0) are + * of interest. -1 means all of them. */ + int flags; /* Regular expression execution flags. */ { TclRegexp *regexpPtr = (TclRegexp *) re; Tcl_UniChar *udata; int length; - int reflags = regexpPtr->flags; -#define TCL_REG_GLOBOK_FLAGS (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE) - - /* - * Take advantage of the equivalent glob pattern, if one exists. - * This is possible based only on the right mix of incoming flags (0) - * and regexp compile flags. - */ - if ((offset == 0) && (nmatches == 0) && (flags == 0) - && !(reflags & ~TCL_REG_GLOBOK_FLAGS) - && (regexpPtr->globObjPtr != NULL)) { - int nocase = (reflags & TCL_REG_NOCASE) ? TCL_MATCH_NOCASE : 0; - - /* - * Pass to TclStringMatchObj for obj-specific handling. - * XXX: Currently doesn't take advantage of exact-ness that - * XXX: TclReToGlob tells us about - */ - - return TclStringMatchObj(textObj, regexpPtr->globObjPtr, nocase); - } /* * Save the target object so we can extract strings from it later. */ regexpPtr->string = NULL; - regexpPtr->objPtr = textObj; + regexpPtr->objPtr = objPtr; - udata = Tcl_GetUnicodeFromObj(textObj, &length); + udata = Tcl_GetUnicodeFromObj(objPtr, &length); if (offset > length) { offset = length; } udata += offset; length -= offset; - + return RegExpExecUniChar(interp, re, udata, length, nmatches, flags); } @@ -483,9 +472,10 @@ Tcl_RegExpExecObj( * See if an object matches a regular expression. * * Results: - * If an error occurs during the matching operation then -1 is returned - * and the interp's result contains an error message. Otherwise the - * return value is 1 if "text" matches "pattern" and 0 otherwise. + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if "string" matches "pattern" + * and 0 otherwise. * * Side effects: * Changes the internal rep of the pattern and string objects. @@ -494,10 +484,10 @@ Tcl_RegExpExecObj( */ int -Tcl_RegExpMatchObj( - Tcl_Interp *interp, /* Used for error reporting. May be NULL. */ - Tcl_Obj *textObj, /* Object containing the String to search. */ - Tcl_Obj *patternObj) /* Regular expression to match against +Tcl_RegExpMatchObj(interp, stringObj, patternObj) + Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ + Tcl_Obj *stringObj; /* Object containing the String to search. */ + Tcl_Obj *patternObj; /* Regular expression to match against * string. */ { Tcl_RegExp re; @@ -507,7 +497,7 @@ Tcl_RegExpMatchObj( if (re == NULL) { return -1; } - return Tcl_RegExpExecObj(interp, re, textObj, 0 /* offset */, + return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */, 0 /* nmatches */, 0 /* flags */); } @@ -528,9 +518,9 @@ Tcl_RegExpMatchObj( */ void -Tcl_RegExpGetInfo( - Tcl_RegExp regexp, /* Pattern from which to get subexpressions. */ - Tcl_RegExpInfo *infoPtr) /* Match information is stored here. */ +Tcl_RegExpGetInfo(regexp, infoPtr) + Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */ + Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */ { TclRegexp *regexpPtr = (TclRegexp *) regexp; @@ -544,14 +534,14 @@ Tcl_RegExpGetInfo( * * Tcl_GetRegExpFromObj -- * - * Compile a regular expression into a form suitable for fast matching. - * This function caches the result in a Tcl_Obj. + * Compile a regular expression into a form suitable for fast + * matching. This procedure caches the result in a Tcl_Obj. * * Results: - * The return value is a pointer to the compiled form of string, suitable - * for passing to Tcl_RegExpExec. If an error occurred while compiling - * the pattern, then NULL is returned and an error message is left in the - * interp's result. + * The return value is a pointer to the compiled form of string, + * suitable for passing to Tcl_RegExpExec. If an error occurred + * while compiling the pattern, then NULL is returned and an error + * message is left in the interp's result. * * Side effects: * Updates the native rep of the Tcl_Obj. @@ -560,28 +550,25 @@ Tcl_RegExpGetInfo( */ Tcl_RegExp -Tcl_GetRegExpFromObj( - Tcl_Interp *interp, /* For use in error reporting, and to access +Tcl_GetRegExpFromObj(interp, objPtr, flags) + Tcl_Interp *interp; /* For use in error reporting, and to access * the interp regexp cache. */ - Tcl_Obj *objPtr, /* Object whose string rep contains regular - * expression pattern. Internal rep will be + Tcl_Obj *objPtr; /* Object whose string rep contains regular + * expression pattern. Internal rep will be * changed to compiled form of this regular * expression. */ - int flags) /* Regular expression compilation flags. */ + int flags; /* Regular expression compilation flags. */ { int length; + Tcl_ObjType *typePtr; TclRegexp *regexpPtr; char *pattern; - /* - * This is OK because we only actually interpret this value properly as a - * TclRegexp* when the type is tclRegexpType. - */ - + typePtr = objPtr->typePtr; regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; - if ((objPtr->typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) { - pattern = TclGetStringFromObj(objPtr, &length); + if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) { + pattern = Tcl_GetStringFromObj(objPtr, &length); regexpPtr = CompileRegexp(interp, pattern, length, flags); if (regexpPtr == NULL) { @@ -590,7 +577,7 @@ Tcl_GetRegExpFromObj( /* * Add a reference to the regexp so it will persist even if it is - * pushed out of the current thread's regexp cache. This reference + * pushed out of the current thread's regexp cache. This reference * will be removed when the object's internal rep is freed. */ @@ -600,8 +587,10 @@ Tcl_GetRegExpFromObj( * Free the old representation and set our type. */ - TclFreeIntRep(objPtr); - objPtr->internalRep.otherValuePtr = (void *) regexpPtr; + if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) { + (*typePtr->freeIntRepProc)(objPtr); + } + objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr; objPtr->typePtr = &tclRegexpType; } return (Tcl_RegExp) regexpPtr; @@ -615,10 +604,10 @@ Tcl_GetRegExpFromObj( * Return information about a compiled regular expression. * * Results: - * The return value is -1 for failure, 0 for success, although at the - * moment there's nothing that could fail. On success, a list is left in - * the interp's result: first element is the subexpression count, second - * is a list of re_info bit names. + * The return value is -1 for failure, 0 for success, although at + * the moment there's nothing that could fail. On success, a list + * is left in the interp's result: first element is the subexpression + * count, second is a list of re_info bit names. * * Side effects: * None. @@ -627,16 +616,16 @@ Tcl_GetRegExpFromObj( */ int -TclRegAbout( - Tcl_Interp *interp, /* For use in variable assignment. */ - Tcl_RegExp re) /* The compiled regular expression. */ +TclRegAbout(interp, re) + Tcl_Interp *interp; /* For use in variable assignment. */ + Tcl_RegExp re; /* The compiled regular expression. */ { - TclRegexp *regexpPtr = (TclRegexp *) re; - struct infoname { + TclRegexp *regexpPtr = (TclRegexp *)re; + char buf[TCL_INTEGER_SPACE]; + static struct infoname { int bit; - const char *text; - }; - static const struct infoname infonames[] = { + char *text; + } infonames[] = { {REG_UBACKREF, "REG_UBACKREF"}, {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"}, {REG_UBOUNDS, "REG_UBOUNDS"}, @@ -651,40 +640,37 @@ TclRegAbout( {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"}, {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"}, {REG_USHORTEST, "REG_USHORTEST"}, - {0, NULL} + {0, ""} }; - const struct infoname *inf; - Tcl_Obj *infoObj; - - /* - * The reset here guarantees that the interpreter result is empty and - * unshared. This means that we can use Tcl_ListObjAppendElement on the - * result object quite safely. - */ + struct infoname *inf; + int n; Tcl_ResetResult(interp); - /* - * Assume that there will never be more than INT_MAX subexpressions. This - * is a pretty reasonable assumption; the RE engine doesn't scale _that_ - * well and Tcl has other limits that constrain things as well... - */ - - Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), - Tcl_NewIntObj((int) regexpPtr->re.re_nsub)); + sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub)); + Tcl_AppendElement(interp, buf); /* - * Now append a list of all the bit-flags set for the RE. + * Must count bits before generating list, because we must know + * whether {} are needed before we start appending names. */ - - TclNewObj(infoObj); - for (inf=infonames ; inf->bit != 0 ; inf++) { - if (regexpPtr->re.re_info & inf->bit) { - Tcl_ListObjAppendElement(NULL, infoObj, - Tcl_NewStringObj(inf->text, -1)); + n = 0; + for (inf = infonames; inf->bit != 0; inf++) { + if (regexpPtr->re.re_info&inf->bit) { + n++; } } - Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), infoObj); + if (n != 1) { + Tcl_AppendResult(interp, " {", NULL); + } + for (inf = infonames; inf->bit != 0; inf++) { + if (regexpPtr->re.re_info&inf->bit) { + Tcl_AppendElement(interp, inf->text); + } + } + if (n != 1) { + Tcl_AppendResult(interp, "}", NULL); + } return 0; } @@ -706,25 +692,26 @@ TclRegAbout( */ void -TclRegError( - Tcl_Interp *interp, /* Interpreter for error reporting. */ - const char *msg, /* Message to prepend to error. */ - int status) /* Status code to report. */ +TclRegError(interp, msg, status) + Tcl_Interp *interp; /* Interpreter for error reporting. */ + CONST char *msg; /* Message to prepend to error. */ + int status; /* Status code to report. */ { char buf[100]; /* ample in practice */ char cbuf[100]; /* lots in practice */ size_t n; - const char *p; + char *p; Tcl_ResetResult(interp); - n = TclReError(status, NULL, buf, sizeof(buf)); + n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf)); p = (n > sizeof(buf)) ? "..." : ""; Tcl_AppendResult(interp, msg, buf, p, NULL); sprintf(cbuf, "%d", status); - (void) TclReError(REG_ITOA, NULL, cbuf, sizeof(cbuf)); + (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf)); Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL); } + /* *---------------------------------------------------------------------- @@ -744,8 +731,8 @@ TclRegError( */ static void -FreeRegexpInternalRep( - Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */ +FreeRegexpInternalRep(objPtr) + Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */ { TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; @@ -756,7 +743,6 @@ FreeRegexpInternalRep( if (--(regexpRepPtr->refCount) <= 0) { FreeRegexp(regexpRepPtr); } - objPtr->typePtr = NULL; } /* @@ -764,8 +750,8 @@ FreeRegexpInternalRep( * * DupRegexpInternalRep -- * - * We copy the reference to the compiled regexp and bump its reference - * count. + * We copy the reference to the compiled regexp and bump its + * reference count. * * Results: * None. @@ -777,12 +763,11 @@ FreeRegexpInternalRep( */ static void -DupRegexpInternalRep( - Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ - Tcl_Obj *copyPtr) /* Object with internal rep to set. */ +DupRegexpInternalRep(srcPtr, copyPtr) + Tcl_Obj *srcPtr; /* Object with internal rep to copy. */ + Tcl_Obj *copyPtr; /* Object with internal rep to set. */ { TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr; - regexpPtr->refCount++; copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr; copyPtr->typePtr = &tclRegexpType; @@ -809,9 +794,9 @@ DupRegexpInternalRep( */ static int -SetRegexpFromAny( - Tcl_Interp *interp, /* Used for error reporting if not NULL. */ - Tcl_Obj *objPtr) /* The object to convert. */ +SetRegexpFromAny(interp, objPtr) + Tcl_Interp *interp; /* Used for error reporting if not NULL. */ + Tcl_Obj *objPtr; /* The object to convert. */ { if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) { return TCL_ERROR; @@ -824,36 +809,37 @@ SetRegexpFromAny( * * CompileRegexp -- * - * Attempt to compile the given regexp pattern. If the compiled regular - * expression can be found in the per-thread cache, it will be used - * instead of compiling a new copy. + * Attempt to compile the given regexp pattern. If the compiled + * regular expression can be found in the per-thread cache, it + * will be used instead of compiling a new copy. * * Results: - * The return value is a pointer to a newly allocated TclRegexp that - * represents the compiled pattern, or NULL if the pattern could not be - * compiled. If NULL is returned, an error message is left in the - * interp's result. + * The return value is a pointer to a newly allocated TclRegexp + * that represents the compiled pattern, or NULL if the pattern + * could not be compiled. If NULL is returned, an error message is + * left in the interp's result. * * Side effects: - * The thread-local regexp cache is updated and a new TclRegexp may be - * allocated. + * The thread-local regexp cache is updated and a new TclRegexp may + * be allocated. * *---------------------------------------------------------------------- */ static TclRegexp * -CompileRegexp( - Tcl_Interp *interp, /* Used for error reporting if not NULL. */ - const char *string, /* The regexp to compile (UTF-8). */ - int length, /* The length of the string in bytes. */ - int flags) /* Compilation flags. */ +CompileRegexp(interp, string, length, flags) + Tcl_Interp *interp; /* Used for error reporting if not NULL. */ + CONST char *string; /* The regexp to compile (UTF-8). */ + int length; /* The length of the string in bytes. */ + int flags; /* Compilation flags. */ { TclRegexp *regexpPtr; - const Tcl_UniChar *uniString; - int numChars, status, i, exact; + CONST Tcl_UniChar *uniString; + int numChars; Tcl_DString stringBuf; + int status, i; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); - + if (!tsdPtr->initialized) { tsdPtr->initialized = 1; Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL); @@ -861,14 +847,14 @@ CompileRegexp( /* * This routine maintains a second-level regular expression cache in - * addition to the per-object regexp cache. The per-thread cache is needed + * addition to the per-object regexp cache. The per-thread cache is needed * to handle the case where for various reasons the object is lost between * invocations of the regexp command, but the literal pattern is the same. */ /* - * Check the per-thread compiled regexp cache. We can only reuse a regexp - * if it has the same pattern and the same flags. + * Check the per-thread compiled regexp cache. We can only reuse + * a regexp if it has the same pattern and the same flags. */ for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { @@ -876,8 +862,8 @@ CompileRegexp( && (tsdPtr->regexps[i]->flags == flags) && (strcmp(string, tsdPtr->patterns[i]) == 0)) { /* - * Move the matched pattern to the first slot in the cache and - * shift the other patterns down one position. + * Move the matched pattern to the first slot in the + * cache and shift the other patterns down one position. */ if (i != 0) { @@ -902,7 +888,7 @@ CompileRegexp( /* * This is a new expression, so compile it and add it to the cache. */ - + regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp)); regexpPtr->objPtr = NULL; regexpPtr->string = NULL; @@ -933,29 +919,15 @@ CompileRegexp( ckfree((char *)regexpPtr); if (interp) { TclRegError(interp, - "couldn't compile regular expression pattern: ", status); + "couldn't compile regular expression pattern: ", + status); } return NULL; } /* - * Convert RE to a glob pattern equivalent, if any, and cache it. If this - * is not possible, then globObjPtr will be NULL. This is used by - * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine). - */ - - if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) { - regexpPtr->globObjPtr = Tcl_NewStringObj(Tcl_DStringValue(&stringBuf), - Tcl_DStringLength(&stringBuf)); - Tcl_IncrRefCount(regexpPtr->globObjPtr); - Tcl_DStringFree(&stringBuf); - } else { - regexpPtr->globObjPtr = NULL; - } - - /* - * Allocate enough space for all of the subexpressions, plus one extra for - * the entire pattern. + * Allocate enough space for all of the subexpressions, plus one + * extra for the entire pattern. */ regexpPtr->matches = (regmatch_t *) ckalloc( @@ -1009,13 +981,10 @@ CompileRegexp( */ static void -FreeRegexp( - TclRegexp *regexpPtr) /* Compiled regular expression to free. */ +FreeRegexp(regexpPtr) + TclRegexp *regexpPtr; /* Compiled regular expression to free. */ { TclReFree(®expPtr->re); - if (regexpPtr->globObjPtr) { - TclDecrRefCount(regexpPtr->globObjPtr); - } if (regexpPtr->matches) { ckfree((char *) regexpPtr->matches); } @@ -1027,7 +996,8 @@ FreeRegexp( * * FinalizeRegexp -- * - * Release the storage associated with the per-thread regexp cache. + * Release the storage associated with the per-thread regexp + * cache. * * Results: * None. @@ -1039,8 +1009,8 @@ FreeRegexp( */ static void -FinalizeRegexp( - ClientData clientData) /* Not used. */ +FinalizeRegexp(clientData) + ClientData clientData; /* Not used. */ { int i; TclRegexp *regexpPtr; @@ -1060,11 +1030,3 @@ FinalizeRegexp( */ tsdPtr->initialized = 0; } - -/* - * Local Variables: - * mode: c - * c-basic-offset: 4 - * fill-column: 78 - * End: - */ |
