diff options
Diffstat (limited to 'generic/tclRegexp.c')
-rw-r--r-- | generic/tclRegexp.c | 206 |
1 files changed, 49 insertions, 157 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index d65b19a..a4c9c37 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.4 1998/11/11 01:44:53 stanton Exp $ */ #include "tclInt.h" @@ -22,60 +22,42 @@ * The routines in this file use Henry Spencer's regular expression * package contained in the following additional source files: * - * chr.h tclRegexp.h lex.c - * guts.h color.c locale.c - * wchar.h compile.c nfa.c - * wctype.h exec.c - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - * - * Beware that some of this code is subtly aware of the way operator - * precedence is structured in regular expressions. Serious changes in - * regular-expression syntax might require a total rethink. + * regc_color.c regc_cvec.c regc_lex.c + * regc_nfa.c regcomp.c regcustom.h + * rege_dfa.c regerror.c regerrs.h + * regex.h regexec.c regfree.c + * regfronts.c regguts.h + * + * Copyright (c) 1998 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * *** NOTE: this code has been altered slightly for use in Tcl: *** - * *** 1. Use ckalloc, ckfree, and ckrealloc instead of malloc, *** - * *** free, and realloc. *** - * *** 2. Add extra argument to regexp to specify the real *** - * *** start of the string separately from the start of the *** - * *** current search. This is needed to search for multiple *** - * *** matches within a string. *** - * *** 3. Names have been changed, e.g. from re_comp to *** + * *** 1. Names have been changed, e.g. from re_comp to *** * *** TclRegComp, to avoid clashes with other *** * *** regexp implementations used by applications. *** - * *** 4. Various lint-like things, such as casting arguments *** - * *** in procedure calls, removing debugging code and *** - * *** unused vars and procs. *** - * *** 5. Removed "backward-compatibility kludges" such as *** - * *** REG_PEND and REG_STARTEND flags, the re_endp field in *** - * *** the regex_t struct, and the fronts.c layer. *** - * *** 6. Changed char to Tcl_UniChar. *** - * *** 7. Removed -DPOSIX_MISTAKE compile-time flag. *** - * *** This is now the default. *** - * *** 8. For performance considerations, created new Unicode *** - * *** interfaces to avoid having to convert between UTF and *** - * *** Unicode when we already had the Unicode string. For *** - * *** example, in a "regsub -all" we were converting the *** - * *** match string to Unicode N times, where N is the number *** - * *** of bytes in the source string. *** - * *** 9. Changed/widened some of the interfaces to allow *** - * *** explicit passing of flags so that tclTest.c could *** - * *** test the full range of acceptable flags. *** */ /* @@ -181,7 +163,7 @@ Tcl_RegExpCompile(interp, string) if (iPtr->patterns[NUM_REGEXPS-1] != NULL) { ckfree(iPtr->patterns[NUM_REGEXPS-1]); - regfree(&(iPtr->regexps[NUM_REGEXPS-1]->re)); + re_ufree(&(iPtr->regexps[NUM_REGEXPS-1]->re)); ckfree((char *) iPtr->regexps[NUM_REGEXPS-1]); } for (i = NUM_REGEXPS - 2; i >= 0; i--) { @@ -249,7 +231,7 @@ Tcl_RegExpExec(interp, re, string, start) * Perform the regexp match. */ - result = TclRegExpExecUniChar(interp, re, uniString, numChars, + result = TclRegExpExecUniChar(interp, re, uniString, numChars, -1, ((string > start) ? REG_NOTBOL : 0)); Tcl_DStringFree(&stringBuffer); @@ -324,7 +306,7 @@ Tcl_RegExpRange(re, index, startPtr, endPtr) */ int -TclRegExpExecUniChar(interp, re, wString, numChars, flags) +TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) Tcl_Interp *interp; /* Interpreter to use for error reporting. */ Tcl_RegExp re; /* Compiled regular expression; returned by * a previous call to Tcl_RegExpCompile() or @@ -332,14 +314,20 @@ TclRegExpExecUniChar(interp, re, wString, numChars, flags) CONST Tcl_UniChar *wString; /* String against which to match re. */ int numChars; /* Length of string in Tcl_UniChars (must * be >= 0). */ + int nmatches; /* How many subexpression matches (counting + * the whole match as subexpression 0) are + * of interest. -1 means "don't know". */ int flags; /* Regular expression flags. */ { int status; TclRegexp *regexpPtr = (TclRegexp *) re; + size_t nm = regexpPtr->re.re_nsub + 1; + + if (nmatches >= 0 && (size_t) nmatches < nm) + nm = (size_t) nmatches; status = re_uexec(®expPtr->re, wString, (size_t) numChars, - (rm_detail_t *)NULL, - regexpPtr->re.re_nsub + 1, regexpPtr->matches, flags); + (rm_detail_t *)NULL, nm, regexpPtr->matches, flags); /* * Check for errors. @@ -569,6 +557,7 @@ TclRegAbout(interp, re) REG_UUNPORT, "REG_UUNPORT", REG_ULOCALE, "REG_ULOCALE", REG_UEMPTYMATCH, "REG_UEMPTYMATCH", + REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE", 0, "", }; struct infoname *inf; @@ -632,12 +621,12 @@ TclRegError(interp, msg, status) char *p; Tcl_ResetResult(interp); - n = regerror(status, (regex_t *)NULL, buf, sizeof(buf)); + n = re_uerror(status, (regex_t *)NULL, buf, sizeof(buf)); p = (n > sizeof(buf)) ? "..." : ""; Tcl_AppendResult(interp, msg, buf, p, NULL); sprintf(cbuf, "%d", status); - (VOID) regerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf)); + (VOID) re_uerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf)); Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL); } @@ -665,7 +654,7 @@ FreeRegexpInternalRep(objPtr) { TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; - regfree(®expRepPtr->re); + re_ufree(®expRepPtr->re); if (regexpRepPtr->matches) { ckfree((char *) regexpRepPtr->matches); } @@ -677,7 +666,7 @@ FreeRegexpInternalRep(objPtr) * * DupRegexpInternalRep -- * - * It is way to hairy to copy a regular expression, so we punt + * It is way too hairy to copy a regular expression, so we punt * and revert the object back to a vanilla string. * * Results: @@ -802,100 +791,3 @@ CompileRegexp(interp, string, length, flags) return regexpPtr; } - -/* - *--------------------------------------------------------------------------- - * - * TclRegXflags -- - * - * Parse a string of extended regexp flag letters, for testing. - * - * Results: - * No return value (you're on your own for errors here). - * - * Side effects: - * Modifies *cflagsPtr, a regcomp flags word, and *eflagsPtr, a - * regexec flags word, as appropriate. - * - *---------------------------------------------------------------------- - */ - -VOID -TclRegXflags(string, length, cflagsPtr, eflagsPtr) - char *string; /* The string of flags. */ - int length; /* The length of the string in bytes. */ - int *cflagsPtr; /* compile flags word */ - int *eflagsPtr; /* exec flags word */ -{ - int i; - int cflags; - int eflags; - - cflags = *cflagsPtr; - eflags = *eflagsPtr; - for (i = 0; i < length; i++) { - switch (string[i]) { - case 'a': { - cflags |= REG_ADVF; - break; - } - case 'b': { - cflags &= ~REG_ADVANCED; - break; - } - case 'e': { - cflags &= ~REG_ADVANCED; - cflags |= REG_EXTENDED; - break; - } - case 'q': { - cflags &= ~REG_ADVANCED; - cflags |= REG_QUOTE; - break; - } - case 'i': { - cflags |= REG_ICASE; - break; - } - case 'o': { /* o for opaque */ - cflags |= REG_NOSUB; - break; - } - case 'x': { - cflags |= REG_EXPANDED; - break; - } - case 'p': { - cflags |= REG_NLSTOP; - break; - } - case 'w': { - cflags |= REG_NLANCH; - break; - } - case 'n': { - cflags |= REG_NEWLINE; - break; - } - case '+': { - cflags |= REG_FAKEEC; - break; - } - case '^': { - eflags |= REG_NOTBOL; - break; - } - case '$': { - eflags |= REG_NOTEOL; - break; - } - case '%': { - eflags |= REG_SMALL; - break; - } - } - } - - *cflagsPtr = cflags; - *eflagsPtr = eflags; -} |