summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2010-02-01 11:12:00 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2010-02-01 11:12:00 (GMT)
commitf231dc527e35148e1fb38301a10ccb8614fd1c72 (patch)
tree39bc26fb3acf944bf541192fef351f5c0a327c73
parent31c2cd054d791e99d70c6ff41a6c160142229d1c (diff)
downloadtcl-f231dc527e35148e1fb38301a10ccb8614fd1c72.zip
tcl-f231dc527e35148e1fb38301a10ccb8614fd1c72.tar.gz
tcl-f231dc527e35148e1fb38301a10ccb8614fd1c72.tar.bz2
[Bug 2942697]: Rework the RE engine so that certain pathological patterns are
matched much more rapidly. Many thanks to Tom Lane for dianosing this issue and providing an initial patch.
-rw-r--r--ChangeLog169
-rw-r--r--generic/regexec.c91
2 files changed, 128 insertions, 132 deletions
diff --git a/ChangeLog b/ChangeLog
index 98b9589..a476c12 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,30 +1,38 @@
+2010-02-01 Donal K. Fellows <dkf@users.sf.net>
+
+ * generic/regexec.c (ccondissect, crevdissect): [Bug 2942697]: Rework
+ these functions so that certain pathological patterns are matched much
+ more rapidly. Many thanks to Tom Lane for dianosing this issue and
+ providing an initial patch.
+
2010-02-01 Jan Nijtmans <nijtmans@users.sf.net>
- * generic/tclInt.decls Various CYGWIN-related fixes
- * generic/tclInt.h backported from HEAD. Still
- * generic/tclIntPlatDecls.h configure script not modified,
- * generic/tclPort.h so CYGWIN build is still
- * generic/tclTest/c disabled. Reason: although the
- * win/cat.c build succeeds with those changes,
- * win/tclWinDde.c many tests still fail.
- * win/tclWinError.c
- * win/tclWinFile.c
- * win/tclWinPipe.c
- * win/tclWinPort.h
- * win/tclWinReg.c
- * win/tclWinSerial.c
- * win/tclWinSock.c
- * win/tclWinTest.c
- * win/tclWinThrd.c
+ * generic/tclInt.decls: Various CYGWIN-related fixes
+ * generic/tclInt.h: backported from HEAD. Still
+ * generic/tclIntPlatDecls.h: configure script not modified,
+ * generic/tclPort.h: so CYGWIN build is still
+ * generic/tclTest.c: disabled. Reason: although the
+ * win/cat.c: build succeeds with those changes,
+ * win/tclWinDde.c: many tests still fail.
+ * win/tclWinError.c:
+ * win/tclWinFile.c:
+ * win/tclWinPipe.c:
+ * win/tclWinPort.h:
+ * win/tclWinReg.c:
+ * win/tclWinSerial.c:
+ * win/tclWinSock.c:
+ * win/tclWinTest.c:
+ * win/tclWinThrd.c:
2010-01-29 Jan Nijtmans <nijtmans@users.sf.net>
- * generic/tcl.h Use correct TCL_LL_MODIFIER for CYGWIN.
- Formatting (all backported from HEAD)
- * generic/rege_dfa.c: Fix macro conflict on CYGWIN: don't use "small".
- * generic/tclTest.c: Fix gcc 4.4 warning: ignoring return value of
- * unix/tclUnixPipe.c ‘write’
- * unix/tclUnixNotify.c
+ * generic/tcl.h: Use correct TCL_LL_MODIFIER for CYGWIN.
+ Formatting (all backported from HEAD)
+ * generic/rege_dfa.c: Fix macro conflict on CYGWIN: don't use
+ "small".
+ * generic/tclTest.c: Fix gcc 4.4 warning: ignoring return value of
+ * unix/tclUnixPipe.c: ‘write’
+ * unix/tclUnixNotify.c:
2010-01-19 Donal K. Fellows <dkf@users.sf.net>
@@ -47,7 +55,6 @@
internal representation when not needed. Thanks to Alexandre Ferrieux
for this fix.
->>>>>>> 1.3975.2.291
2010-01-06 Jan Nijtmans <nijtmans@users.sf.net>
* generic/tclCompExpr.c: Warning: array subscript has type 'char'
@@ -55,22 +62,22 @@
* libtommath/bn_mp_read_radix.c:
* unix/tclUnixCompat.c: Fix gcc warning: signed and unsigned type
in conditional expression.
- * unix/tcl.m4 Add support for Haiku and CYGWIN dynamical loading
- * unix/configure (regenerated)
- * unix/Makefile.in
- * unix/.cvsignore
- * tests/stack.test Reduced minimum required C-stack size to 2034:
- CYGWIN has this stack size and the test runs fine!
- * generic/tclEnv.c Fix environment tests under CYGWIN
- * generic/tclPort.h
- * tests/env.test
+ * unix/tcl.m4: Add support for Haiku and CYGWIN dynamical loading
+ * unix/configure: (regenerated)
+ * unix/Makefile.in:
+ * unix/.cvsignore:
+ * tests/stack.test: Reduced minimum required C-stack size to 2034:
+ CYGWIN has this stack size and the test runs fine!
+ * generic/tclEnv.c: Fix environment tests under CYGWIN
+ * generic/tclPort.h:
+ * tests/env.test:
2010-01-05 Don Porter <dgp@users.sourceforge.net>
- * generic/tclPathObj.c (TclPathPart): Correct inconsistency between
- * tests/fileName.test (filename-14.31): the string rep and the intrep
- of a path value created by [file rootname]. Thanks to Vitaly Magerya
- for reporting. [Bug 2918610]
+ * generic/tclPathObj.c (TclPathPart): [Bug 2918610]: Correct
+ * tests/fileName.test (filename-14.31): inconsistency between the
+ string rep and the intrep of a path value created by [file rootname].
+ Thanks to Vitaly Magerya for reporting.
2010-01-03 Donal K. Fellows <dkf@users.sf.net>
@@ -152,8 +159,8 @@
2009-12-02 Jan Nijtmans <nijtmans@users.sf.net>
- * tools/genStubs.tcl: Add support for win32 CALLBACK functions
- (needed for Tk bugfix).
+ * tools/genStubs.tcl: Add support for win32 CALLBACK functions (needed
+ for Tk bugfix).
2009-11-30 Donal K. Fellows <dkf@users.sf.net>
@@ -194,22 +201,21 @@
2009-11-12 Andreas Kupries <andreask@activestate.com>
- * generic/tclIO.c (CopyData): [Bug 2895565]. Dropped bogosity
- which used the number of _written_ bytes or character to update
- the counters for the read bytes/characters. See last entry for the
- test case.
+ * generic/tclIO.c (CopyData): [Bug 2895565]: Dropped bogosity which
+ used the number of _written_ bytes or character to update the counters
+ for the read bytes/characters. See last entry for the test case.
2009-11-11 Pat Thoyts <patthoyts@users.sourceforge.net>
- * tests/fCmd.test: Fixed a number of issues for Vista
- * tests/registry.test: and Win7 that are due to restricted
- * tests/winFCmd.test: permissions.
+ * tests/fCmd.test: Fixed a number of issues for Vista and Win7
+ * tests/registry.test: that are due to restricted permissions.
+ * tests/winFCmd.test:
2009-11-11 Don Porter <dgp@users.sourceforge.net>
- * library/http/http.tcl: Update the URL syntax check to
- RFC 3986 compliance on the subject of non-encoded question mark
- characters. [Bug 2891171].
+ * library/http/http.tcl: [Bug 2891171]: Update the URL syntax
+ check to RFC 3986 compliance on the subject of non-encoded question
+ mark characters.
* library/http/pkgIndex.tcl: Bump to http 2.7.5 to avoid any
* unix/Makefile.in: confusion with snapshot "releases"
@@ -258,17 +264,16 @@
2009-11-03 Andreas Kupries <andreask@activestate.com>
- * library/safe.tcl (::safe::InterpSetConfig): [Bug 2854929]. Added
+ * library/safe.tcl (::safe::InterpSetConfig): [Bug 2854929]: Added
code to recursively find deeper paths which may contain modules.
- Required to handle modules with names like 'platform::shell',
- which translate into 'platform/shell-X.tm', i.e arbitrarily deep
+ Required to handle modules with names like 'platform::shell', which
+ translate into 'platform/shell-X.tm', i.e arbitrarily deep
subdirectories.
2009-11-03 Kevin B. Kenny <kennykb@acm.org>
- * library/tzdata/Asia/Novokuznetsk: New tzdata locale for
- Kemerovo oblast', which now keeps Novosibirsk time and not
- Kranoyarsk time.
+ * library/tzdata/Asia/Novokuznetsk: New tzdata locale for Kemerovo
+ oblast', which now keeps Novosibirsk time and not Kranoyarsk time.
* library/tzdata/Asia/Damascus: Syrian DST changes.
* library/tzdata/Asia/Hong_Kong: Hong Kong historic DST corrections.
Olson tzdata2009q.
@@ -290,12 +295,12 @@
to:
typedef unsigned int mp_digit;
For 32-bit builds where "long" and "int" are two names for the same
- thing, this is no change at all. For 64-bit builds, though, this
+ thing, this is no change at all. For 64-bit builds, though, this
causes the dp[] array of an mp_int to be made up of 32-bit elements
- instead of 64-bit elements. This is a huge improvement because
- details elsewhere in the mp_int implementation cause only 28 bits of
- each element to be actually used storing number data. Without this
- change bignums are over 50% wasted space on 64-bit systems.
+ instead of 64-bit elements. This is a huge improvement because details
+ elsewhere in the mp_int implementation cause only 28 bits of each
+ element to be actually used storing number data. Without this change
+ bignums are over 50% wasted space on 64-bit systems.
***POTENTIAL INCOMPATIBILITY***
For 64-bit builds, callers of routines with (mp_digit) or (mp_digit *)
@@ -321,12 +326,12 @@
* tests/fileName.test (fileName-20.[78]): Corrected poor test
hygiene (failure to save and restore the working directory) that
- caused these two tests to fail on Windows (and [Bug 2806250]
- to be reopened).
+ caused these two tests to fail on Windows (and [Bug 2806250] to be
+ reopened).
2009-10-27 Don Porter <dgp@users.sourceforge.net>
- * generic/tclPathObj.c: [Bug 2884203]: Missing refcount on cached
+ * generic/tclPathObj.c: [Bug 2884203]: Missing refcount on cached
normalized path caused crashes.
2009-10-27 Kevin B. Kenny <kennykb@acm.org>
@@ -345,9 +350,9 @@
2009-10-24 Kevin B. Kenny <kennykb@acm.org>
* library/clock.tcl (ProcessPosixTimeZone):
- Corrected a regression in the fix to [Bug 2207436] that caused
- [clock] to apply EU daylight saving time rules in the US. Thanks
- to Karl Lehenbauer for reporting this regression.
+ Corrected a regression in the fix to [Bug 2207436] that caused [clock]
+ to apply EU daylight saving time rules in the US. Thanks to Karl
+ Lehenbauer for reporting this regression.
* tests/clock.test (clock-52.4):
Added a regression test for the above regression.
* library/tzdata/Asia/Dhaka:
@@ -357,9 +362,9 @@
2009-10-23 Andreas Kupries <andreask@activestate.com>
* generic/tclIO.c (FlushChannel): Skip OutputProc for low-level
- 0-length writes. When closing pipes which have already been closed
- not skipping leads to spurious SIG_PIPE signals. Reported by
- Mikhail Teterin <mi+thun@aldan.algebra.com>.
+ 0-length writes. When closing pipes which have already been closed not
+ skipping leads to spurious SIG_PIPE signals. Reported by Mikhail
+ Teterin <mi+thun@aldan.algebra.com>.
2009-10-21 Donal K. Fellows <dkf@users.sf.net>
@@ -368,27 +373,27 @@
2009-10-19 Don Porter <dgp@users.sourceforge.net>
- * generic/tclIO.c: Revised ReadChars and FilterInputBytes routines
- to permit reads to continue up to the string limits of Tcl values.
- Before revisions, large read attempts could panic when as little as
- half the limiting value length was reached. [Patch 2107634]
+ * generic/tclIO.c: Revised ReadChars and FilterInputBytes
+ routines to permit reads to continue up to the string limits of Tcl
+ values. Before revisions, large read attempts could panic when as
+ little as half the limiting value length was reached. [Patch 2107634]
Thanks to Sean Morrison and Bob Parker for their roles in the fix.
2009-10-18 Joe Mistachkin <joe@mistachkin.com>
* tests/thread.test (thread-4.[345]): [Bug 1565466]: Correct tests to
- save their error state before the final call to threadReap just in case
- it triggers an "invalid thread id" error. This error can occur if one
- or more of the target threads has exited prior to the attempt to send
- it an asynchronous exit command.
+ save their error state before the final call to threadReap just in
+ case it triggers an "invalid thread id" error. This error can occur
+ if one or more of the target threads has exited prior to the attempt
+ to send it an asynchronous exit command.
* doc/memory.n: [Bug 988703]: Add mechanism for finding what Tcl_Objs
- * generic/tclCkalloc.c (MemoryCmd): are allocated when built for memory
- * generic/tclInt.decls: debugging. This was previously backported from
- * generic/tclInt.h: Tcl 8.6 with the corrections to fix [Bug 2871908].
- * generic/tclObj.c (ObjData, TclFinalizeThreadObjects): However, there
- were key elements missing. These changes make things consistent between
- branches.
+ * generic/tclCkalloc.c (MemoryCmd): are allocated when built for
+ * generic/tclInt.decls: memory debugging. This was previously
+ * generic/tclInt.h: backported from Tcl 8.6 with the corrections to
+ * generic/tclObj.c (ObjData, TclFinalizeThreadObjects): fix [Bug
+ 2871908]. However, there were key elements missing. These changes make
+ things consistent between branches.
2009-10-17 Donal K. Fellows <dkf@users.sf.net>
diff --git a/generic/regexec.c b/generic/regexec.c
index d39685c..c902209 100644
--- a/generic/regexec.c
+++ b/generic/regexec.c
@@ -624,27 +624,21 @@ dissect(
case '=': /* terminal node */
assert(t->left == NULL && t->right == NULL);
return REG_OKAY; /* no action, parent did the work */
- break;
case '|': /* alternation */
assert(t->left != NULL);
return altdissect(v, t, begin, end);
- break;
case 'b': /* back ref -- shouldn't be calling us! */
return REG_ASSERT;
- break;
case '.': /* concatenation */
assert(t->left != NULL && t->right != NULL);
return condissect(v, t, begin, end);
- break;
case '(': /* capturing */
assert(t->left != NULL && t->right == NULL);
assert(t->subno > 0);
subset(v, t, begin, end);
return dissect(v, t->left, begin, end);
- break;
default:
return REG_ASSERT;
- break;
}
}
@@ -802,19 +796,15 @@ cdissect(
case '=': /* terminal node */
assert(t->left == NULL && t->right == NULL);
return REG_OKAY; /* no action, parent did the work */
- break;
case '|': /* alternation */
assert(t->left != NULL);
return caltdissect(v, t, begin, end);
- break;
case 'b': /* back ref -- shouldn't be calling us! */
assert(t->left == NULL && t->right == NULL);
return cbrdissect(v, t, begin, end);
- break;
case '.': /* concatenation */
assert(t->left != NULL && t->right != NULL);
return ccondissect(v, t, begin, end);
- break;
case '(': /* capturing */
assert(t->left != NULL && t->right == NULL);
assert(t->subno > 0);
@@ -823,10 +813,8 @@ cdissect(
subset(v, t, begin, end);
}
return er;
- break;
default:
return REG_ASSERT;
- break;
}
}
@@ -843,8 +831,7 @@ ccondissect(
chr *begin, /* beginning of relevant substring */
chr *end) /* end of same */
{
- struct dfa *d;
- struct dfa *d2;
+ struct dfa *d, *d2;
chr *mid;
int er;
@@ -894,15 +881,26 @@ ccondissect(
* Try this midpoint on for size.
*/
- er = cdissect(v, t->left, begin, mid);
- if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end)
- && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) {
- break; /* NOTE BREAK OUT */
- }
- if ((er != REG_OKAY) && (er != REG_NOMATCH)) {
- freedfa(d);
- freedfa(d2);
- return er;
+ if (longest(v, d2, mid, end, NULL) == end) {
+ er = cdissect(v, t->left, begin, mid);
+ if (er == REG_OKAY) {
+ er = cdissect(v, t->right, mid, end);
+ if (er == REG_OKAY) {
+ /*
+ * Satisfaction.
+ */
+
+ MDEBUG(("successful\n"));
+ freedfa(d);
+ freedfa(d2);
+ return REG_OKAY;
+ }
+ }
+ if ((er != REG_OKAY) && (er != REG_NOMATCH)) {
+ freedfa(d);
+ freedfa(d2);
+ return er;
+ }
}
/*
@@ -935,15 +933,6 @@ ccondissect(
zapmem(v, t->left);
zapmem(v, t->right);
}
-
- /*
- * Satisfaction.
- */
-
- MDEBUG(("successful\n"));
- freedfa(d);
- freedfa(d2);
- return REG_OKAY;
}
/*
@@ -1011,15 +1000,26 @@ crevdissect(
* Try this midpoint on for size.
*/
- er = cdissect(v, t->left, begin, mid);
- if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end)
- && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) {
- break; /* NOTE BREAK OUT */
- }
- if (er != REG_OKAY && er != REG_NOMATCH) {
- freedfa(d);
- freedfa(d2);
- return er;
+ if (longest(v, d2, mid, end, NULL) == end) {
+ er = cdissect(v, t->left, begin, mid);
+ if (er == REG_OKAY) {
+ er = cdissect(v, t->right, mid, end);
+ if (er == REG_OKAY) {
+ /*
+ * Satisfaction.
+ */
+
+ MDEBUG(("successful\n"));
+ freedfa(d);
+ freedfa(d2);
+ return REG_OKAY;
+ }
+ }
+ if (er != REG_OKAY && er != REG_NOMATCH) {
+ freedfa(d);
+ freedfa(d2);
+ return er;
+ }
}
/*
@@ -1052,15 +1052,6 @@ crevdissect(
zapmem(v, t->left);
zapmem(v, t->right);
}
-
- /*
- * Satisfaction.
- */
-
- MDEBUG(("successful\n"));
- freedfa(d);
- freedfa(d2);
- return REG_OKAY;
}
/*