diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2010-08-12 08:55:37 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2010-08-12 08:55:37 (GMT) |
commit | 9f5140ba5656acce75cbb9f43602fdd70cf400c6 (patch) | |
tree | f54d945578648bccdf66c7f87c5c1dc95164125b /generic/tclCmdMZ.c | |
parent | 4678f5b6436d4675fc5ef46fb270aa89bfaf382c (diff) | |
download | tcl-9f5140ba5656acce75cbb9f43602fdd70cf400c6.zip tcl-9f5140ba5656acce75cbb9f43602fdd70cf400c6.tar.gz tcl-9f5140ba5656acce75cbb9f43602fdd70cf400c6.tar.bz2 |
* generic/tclCmdMZ.c (Tcl_RegexpObjCmd): [Bug 2826551, Patch 2948425]:
Backport of updates to make handling of RE line anchors correct.
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r-- | generic/tclCmdMZ.c | 46 |
1 files changed, 27 insertions, 19 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 2011b4b..0c4615a 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,7 +15,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.8 2010/04/06 08:26:02 vasiljevic Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.9 2010/08/12 08:55:38 dkf Exp $ */ #include "tclInt.h" @@ -89,7 +89,7 @@ Tcl_RegexpObjCmd( Tcl_Obj *CONST objv[]) /* Argument objects. */ { int i, indices, match, about, offset, all, doinline, numMatchesSaved; - int cflags, eflags, stringLength; + int cflags, eflags, stringLength, matchLength; Tcl_RegExp regExpr; Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL; Tcl_RegExpInfo info; @@ -231,15 +231,6 @@ Tcl_RegexpObjCmd( return TCL_ERROR; } - if (offset > 0) { - /* - * Add flag if using offset (string is part of a larger string), so - * that "^" won't match. - */ - - eflags |= TCL_REG_NOTBOL; - } - objc -= 2; objv += 2; @@ -267,12 +258,23 @@ Tcl_RegexpObjCmd( */ while (1) { - match = Tcl_RegExpExecObj(interp, regExpr, objPtr, - offset /* offset */, numMatchesSaved, eflags - | ((offset > 0 && - (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n')) - ? TCL_REG_NOTBOL : 0)); + /* + * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing + * TCL_REG_NOTBOL indicates that the character at offset should not be + * considered the start of the line. If for example the pattern {^} is + * passed and -start is positive, then the pattern will not match the + * start of the string unless the previous character is a newline. + */ + if ((offset == 0) || ((offset > 0) && + (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n'))) { + eflags = 0; + } else { + eflags = TCL_REG_NOTBOL; + } + + match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, + numMatchesSaved, eflags); if (match < 0) { return TCL_ERROR; } @@ -389,12 +391,18 @@ Tcl_RegexpObjCmd( * offset never changes). */ - if (info.matches[0].end == 0) { + matchLength = info.matches[0].end - info.matches[0].start; + offset += info.matches[0].end; + + /* + * A match of length zero could happen for {^} {$} or {.*} and in + * these cases we always want to bump the index up one. + */ + + if (matchLength == 0) { offset++; } - offset += info.matches[0].end; all++; - eflags |= TCL_REG_NOTBOL; if (offset >= stringLength) { break; } |