diff options
author | hobbs <hobbs> | 2007-11-12 02:07:18 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2007-11-12 02:07:18 (GMT) |
commit | cf8a7199f105edc95e59373e098af6eb47d22a16 (patch) | |
tree | c7f005156cbe08f8e11d6845d4a71f991dc5b488 /generic/tclUtil.c | |
parent | 094b6f7ae513ec561543276d7659f3a8b2a5b853 (diff) | |
download | tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.zip tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.gz tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.bz2 |
* generic/tclCompCmds.c, generic/tclCompile.c, generic/tclCompile.h:
* generic/tclExecute.c, generic/tclInt.decls, generic/tclIntDecls.h:
* generic/tclRegexp.c, generic/tclRegexp.h: Add INST_REGEXP and fully
* generic/tclStubInit.c, generic/tclUtil.c: compiled [regexp] for the
* tests/regexpComp.test: [Bug 1830166] simple cases. Also
added TclReToGlob function to convert RE to glob patterns and use
these in the possible cases.
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r-- | generic/tclUtil.c | 186 |
1 files changed, 185 insertions, 1 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 1bcd957..d6bf7f1 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtil.c,v 1.87 2007/11/11 19:32:17 msofer Exp $ + * RCS: @(#) $Id: tclUtil.c,v 1.88 2007/11/12 02:07:20 hobbs Exp $ */ #include "tclInt.h" @@ -3183,6 +3183,190 @@ TclGetPlatform(void) } /* + *---------------------------------------------------------------------- + * + * TclReToGlob -- + * + * Attempt to convert a regular expression to an equivalent glob pattern. + * + * Results: + * Returns TCL_OK on success, TCL_ERROR on failure. + * If interp is not NULL, an error message is placed in the result. + * On success, the DString will contain an exact equivalent glob pattern. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclReToGlob(Tcl_Interp *interp, + const char *reStr, + int reStrLen, + Tcl_DString *dsPtr, + int *exactPtr) +{ + int anchorLeft, anchorRight; + char *dsStr, *dsStrStart, *msg; + const char *p, *strEnd; + + strEnd = reStr + reStrLen; + Tcl_DStringInit(dsPtr); + + /* + * "***=xxx" == "*xxx*" + */ + + if ((reStrLen >= 4) && (memcmp("***=", reStr, 4) == 0)) { + *exactPtr = 1; + Tcl_DStringAppend(dsPtr, reStr + 4, reStrLen - 4); + return TCL_OK; + } + + /* + * Write to the ds directly without the function overhead. + * An equivalent glob pattern can be no more than reStrLen+2 in size. + */ + + Tcl_DStringSetLength(dsPtr, reStrLen + 2); + dsStrStart = Tcl_DStringValue(dsPtr); + + /* + * Check for anchored REs (ie ^foo$), so we can use string equal if + * possible. Do not alter the start of str so we can free it correctly. + */ + + msg = NULL; + p = reStr; + anchorRight = 0; + dsStr = dsStrStart; + if (*p == '^') { + anchorLeft = 1; + p++; + } else { + anchorLeft = 0; + *dsStr++ = '*'; + } + + for ( ; p < strEnd; p++) { + switch (*p) { + case '\\': + p++; + switch (*p) { + case 'a': + *dsStr++ = '\a'; + break; + case 'b': + *dsStr++ = '\b'; + break; + case 'f': + *dsStr++ = '\f'; + break; + case 'n': + *dsStr++ = '\n'; + break; + case 'r': + *dsStr++ = '\r'; + break; + case 't': + *dsStr++ = '\t'; + break; + case 'v': + *dsStr++ = '\v'; + break; + case 'B': + *dsStr++ = '\\'; + *dsStr++ = '\\'; + anchorLeft = 0; /* prevent exact match */ + break; + case '\\': case '*': case '+': case '?': + case '{': case '}': case '(': case ')': case '[': case ']': + case '.': case '|': case '^': case '$': + *dsStr++ = '\\'; + *dsStr++ = *p; + anchorLeft = 0; /* prevent exact match */ + break; + default: + msg = "invalid escape sequence"; + goto invalidGlob; + } + break; + case '.': + anchorLeft = 0; /* prevent exact match */ + if (p+1 < strEnd) { + if (p[1] == '*') { + p++; + if ((dsStr == dsStrStart) || (dsStr[-1] != '*')) { + *dsStr++ = '*'; + } + continue; + } else if (p[1] == '+') { + p++; + *dsStr++ = '?'; + *dsStr++ = '*'; + continue; + } + } + *dsStr++ = '?'; + break; + case '$': + if (p+1 != strEnd) { + msg = "$ not anchor"; + goto invalidGlob; + } + anchorRight = 1; + break; + case '*': case '+': case '?': case '|': case '^': + case '{': case '}': case '(': case ')': case '[': case ']': + msg = "unhandled RE special char"; + goto invalidGlob; + break; + default: + *dsStr++ = *p; + break; + } + } + if (!anchorRight && ((dsStr == dsStrStart) || (dsStr[-1] != '*'))) { + *dsStr++ = '*'; + } + Tcl_DStringSetLength(dsPtr, dsStr - dsStrStart); + +#ifdef TCL_MEM_DEBUG + /* + * Check if this is a bad RE (do this at the end because it can be + * expensive). + * XXX: Is it possible that we can have a bad RE make it through the + * XXX: above checks? + */ + + if (Tcl_RegExpCompile(NULL, reStr) == NULL) { + msg = "couldn't compile RE"; + goto invalidGlob; + } +#endif + + *exactPtr = (anchorLeft && anchorRight); + +#if 0 + fprintf(stderr, "INPUT RE '%.*s' OUTPUT GLOB '%s' anchor %d:%d \n", + reStrLen, reStr, + Tcl_DStringValue(dsPtr), anchorLeft, anchorRight); + fflush(stderr); +#endif + return TCL_OK; + + invalidGlob: +#if 0 + fprintf(stderr, "INPUT RE '%.*s' NO OUTPUT GLOB %s (%c)\n", + reStrLen, reStr, msg, *p); + fflush(stderr); +#endif + Tcl_DStringFree(dsPtr); + return TCL_ERROR; +} + +/* * Local Variables: * mode: c * c-basic-offset: 4 |