summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2010-08-12 08:55:37 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2010-08-12 08:55:37 (GMT)
commit9f5140ba5656acce75cbb9f43602fdd70cf400c6 (patch)
treef54d945578648bccdf66c7f87c5c1dc95164125b /generic
parent4678f5b6436d4675fc5ef46fb270aa89bfaf382c (diff)
downloadtcl-9f5140ba5656acce75cbb9f43602fdd70cf400c6.zip
tcl-9f5140ba5656acce75cbb9f43602fdd70cf400c6.tar.gz
tcl-9f5140ba5656acce75cbb9f43602fdd70cf400c6.tar.bz2
* generic/tclCmdMZ.c (Tcl_RegexpObjCmd): [Bug 2826551, Patch 2948425]:
Backport of updates to make handling of RE line anchors correct.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclCmdMZ.c46
1 files changed, 27 insertions, 19 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 2011b4b..0c4615a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -15,7 +15,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.8 2010/04/06 08:26:02 vasiljevic Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.9 2010/08/12 08:55:38 dkf Exp $
*/
#include "tclInt.h"
@@ -89,7 +89,7 @@ Tcl_RegexpObjCmd(
Tcl_Obj *CONST objv[]) /* Argument objects. */
{
int i, indices, match, about, offset, all, doinline, numMatchesSaved;
- int cflags, eflags, stringLength;
+ int cflags, eflags, stringLength, matchLength;
Tcl_RegExp regExpr;
Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL;
Tcl_RegExpInfo info;
@@ -231,15 +231,6 @@ Tcl_RegexpObjCmd(
return TCL_ERROR;
}
- if (offset > 0) {
- /*
- * Add flag if using offset (string is part of a larger string), so
- * that "^" won't match.
- */
-
- eflags |= TCL_REG_NOTBOL;
- }
-
objc -= 2;
objv += 2;
@@ -267,12 +258,23 @@ Tcl_RegexpObjCmd(
*/
while (1) {
- match = Tcl_RegExpExecObj(interp, regExpr, objPtr,
- offset /* offset */, numMatchesSaved, eflags
- | ((offset > 0 &&
- (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n'))
- ? TCL_REG_NOTBOL : 0));
+ /*
+ * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing
+ * TCL_REG_NOTBOL indicates that the character at offset should not be
+ * considered the start of the line. If for example the pattern {^} is
+ * passed and -start is positive, then the pattern will not match the
+ * start of the string unless the previous character is a newline.
+ */
+ if ((offset == 0) || ((offset > 0) &&
+ (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n'))) {
+ eflags = 0;
+ } else {
+ eflags = TCL_REG_NOTBOL;
+ }
+
+ match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset,
+ numMatchesSaved, eflags);
if (match < 0) {
return TCL_ERROR;
}
@@ -389,12 +391,18 @@ Tcl_RegexpObjCmd(
* offset never changes).
*/
- if (info.matches[0].end == 0) {
+ matchLength = info.matches[0].end - info.matches[0].start;
+ offset += info.matches[0].end;
+
+ /*
+ * A match of length zero could happen for {^} {$} or {.*} and in
+ * these cases we always want to bump the index up one.
+ */
+
+ if (matchLength == 0) {
offset++;
}
- offset += info.matches[0].end;
all++;
- eflags |= TCL_REG_NOTBOL;
if (offset >= stringLength) {
break;
}