diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2010-02-11 11:14:22 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2010-02-11 11:14:22 (GMT) |
commit | a80d0318d2c6fdaaad538515154acc68edfe3bbc (patch) | |
tree | dc5c33e1b83e8fb8d6bf21e796e20d3ad93e52d2 /generic | |
parent | ee2ddea6e375270013c0bbe70e83853301efc279 (diff) | |
download | tcl-a80d0318d2c6fdaaad538515154acc68edfe3bbc.zip tcl-a80d0318d2c6fdaaad538515154acc68edfe3bbc.tar.gz tcl-a80d0318d2c6fdaaad538515154acc68edfe3bbc.tar.bz2 |
Tcl Bug 2826551 regexp bugs related to -all -line and -start and newlines
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclCmdMZ.c | 49 |
1 files changed, 29 insertions, 20 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 7e42ec3..5cf9761 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,7 +15,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.198 2009/12/25 22:45:05 nijtmans Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.199 2010/02/11 11:14:22 dkf Exp $ */ #include "tclInt.h" @@ -108,7 +108,7 @@ Tcl_RegexpObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int i, indices, match, about, offset, all, doinline, numMatchesSaved; - int cflags, eflags, stringLength; + int cflags, eflags, stringLength, matchLength; Tcl_RegExp regExpr; Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL; Tcl_RegExpInfo info; @@ -126,7 +126,6 @@ Tcl_RegexpObjCmd( indices = 0; about = 0; cflags = TCL_REG_ADVANCED; - eflags = 0; offset = 0; all = 0; doinline = 0; @@ -250,15 +249,6 @@ Tcl_RegexpObjCmd( return TCL_ERROR; } - if (offset > 0) { - /* - * Add flag if using offset (string is part of a larger string), so - * that "^" won't match. - */ - - eflags |= TCL_REG_NOTBOL; - } - objc -= 2; objv += 2; @@ -286,12 +276,23 @@ Tcl_RegexpObjCmd( */ while (1) { - match = Tcl_RegExpExecObj(interp, regExpr, objPtr, - offset /* offset */, numMatchesSaved, eflags - | ((offset > 0 && - (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n')) - ? TCL_REG_NOTBOL : 0)); + /* + * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing + * TCL_REG_NOTBOL indicates that the character at offset should not be + * considered the start of the line. If for example the pattern {^} is + * passed and -start is positive, then the pattern will not match the + * start of the string unless the previous character is a newline. + */ + if ((offset == 0) || ((offset > 0) && + (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar) '\n'))) { + eflags = 0; + } else { + eflags = TCL_REG_NOTBOL; + } + + match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, + numMatchesSaved, eflags); if (match < 0) { return TCL_ERROR; } @@ -385,6 +386,7 @@ Tcl_RegexpObjCmd( } } else { Tcl_Obj *valuePtr; + valuePtr = Tcl_ObjSetVar2(interp, objv[i], NULL, newPtr, 0); if (valuePtr == NULL) { Tcl_AppendResult(interp, "couldn't set variable \"", @@ -408,12 +410,19 @@ Tcl_RegexpObjCmd( * offset never changes). */ - if (info.matches[0].end == 0) { + matchLength = (info.matches[0].end - info.matches[0].start); + + offset += info.matches[0].end; + + /* + * A match of length zero could happen for {^} {$} or {.*} and in + * these cases we always want to bump the index up one. + */ + + if (matchLength == 0) { offset++; } - offset += info.matches[0].end; all++; - eflags |= TCL_REG_NOTBOL; if (offset >= stringLength) { break; } |