From f231dc527e35148e1fb38301a10ccb8614fd1c72 Mon Sep 17 00:00:00 2001 From: dkf Date: Mon, 1 Feb 2010 11:12:00 +0000 Subject: [Bug 2942697]: Rework the RE engine so that certain pathological patterns are matched much more rapidly. Many thanks to Tom Lane for dianosing this issue and providing an initial patch. --- ChangeLog | 169 ++++++++++++++++++++++++++++-------------------------- generic/regexec.c | 91 +++++++++++++---------------- 2 files changed, 128 insertions(+), 132 deletions(-) diff --git a/ChangeLog b/ChangeLog index 98b9589..a476c12 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,30 +1,38 @@ +2010-02-01 Donal K. Fellows + + * generic/regexec.c (ccondissect, crevdissect): [Bug 2942697]: Rework + these functions so that certain pathological patterns are matched much + more rapidly. Many thanks to Tom Lane for dianosing this issue and + providing an initial patch. + 2010-02-01 Jan Nijtmans - * generic/tclInt.decls Various CYGWIN-related fixes - * generic/tclInt.h backported from HEAD. Still - * generic/tclIntPlatDecls.h configure script not modified, - * generic/tclPort.h so CYGWIN build is still - * generic/tclTest/c disabled. Reason: although the - * win/cat.c build succeeds with those changes, - * win/tclWinDde.c many tests still fail. - * win/tclWinError.c - * win/tclWinFile.c - * win/tclWinPipe.c - * win/tclWinPort.h - * win/tclWinReg.c - * win/tclWinSerial.c - * win/tclWinSock.c - * win/tclWinTest.c - * win/tclWinThrd.c + * generic/tclInt.decls: Various CYGWIN-related fixes + * generic/tclInt.h: backported from HEAD. Still + * generic/tclIntPlatDecls.h: configure script not modified, + * generic/tclPort.h: so CYGWIN build is still + * generic/tclTest.c: disabled. Reason: although the + * win/cat.c: build succeeds with those changes, + * win/tclWinDde.c: many tests still fail. + * win/tclWinError.c: + * win/tclWinFile.c: + * win/tclWinPipe.c: + * win/tclWinPort.h: + * win/tclWinReg.c: + * win/tclWinSerial.c: + * win/tclWinSock.c: + * win/tclWinTest.c: + * win/tclWinThrd.c: 2010-01-29 Jan Nijtmans - * generic/tcl.h Use correct TCL_LL_MODIFIER for CYGWIN. - Formatting (all backported from HEAD) - * generic/rege_dfa.c: Fix macro conflict on CYGWIN: don't use "small". - * generic/tclTest.c: Fix gcc 4.4 warning: ignoring return value of - * unix/tclUnixPipe.c ‘write’ - * unix/tclUnixNotify.c + * generic/tcl.h: Use correct TCL_LL_MODIFIER for CYGWIN. + Formatting (all backported from HEAD) + * generic/rege_dfa.c: Fix macro conflict on CYGWIN: don't use + "small". + * generic/tclTest.c: Fix gcc 4.4 warning: ignoring return value of + * unix/tclUnixPipe.c: ‘write’ + * unix/tclUnixNotify.c: 2010-01-19 Donal K. Fellows @@ -47,7 +55,6 @@ internal representation when not needed. Thanks to Alexandre Ferrieux for this fix. ->>>>>>> 1.3975.2.291 2010-01-06 Jan Nijtmans * generic/tclCompExpr.c: Warning: array subscript has type 'char' @@ -55,22 +62,22 @@ * libtommath/bn_mp_read_radix.c: * unix/tclUnixCompat.c: Fix gcc warning: signed and unsigned type in conditional expression. - * unix/tcl.m4 Add support for Haiku and CYGWIN dynamical loading - * unix/configure (regenerated) - * unix/Makefile.in - * unix/.cvsignore - * tests/stack.test Reduced minimum required C-stack size to 2034: - CYGWIN has this stack size and the test runs fine! - * generic/tclEnv.c Fix environment tests under CYGWIN - * generic/tclPort.h - * tests/env.test + * unix/tcl.m4: Add support for Haiku and CYGWIN dynamical loading + * unix/configure: (regenerated) + * unix/Makefile.in: + * unix/.cvsignore: + * tests/stack.test: Reduced minimum required C-stack size to 2034: + CYGWIN has this stack size and the test runs fine! + * generic/tclEnv.c: Fix environment tests under CYGWIN + * generic/tclPort.h: + * tests/env.test: 2010-01-05 Don Porter - * generic/tclPathObj.c (TclPathPart): Correct inconsistency between - * tests/fileName.test (filename-14.31): the string rep and the intrep - of a path value created by [file rootname]. Thanks to Vitaly Magerya - for reporting. [Bug 2918610] + * generic/tclPathObj.c (TclPathPart): [Bug 2918610]: Correct + * tests/fileName.test (filename-14.31): inconsistency between the + string rep and the intrep of a path value created by [file rootname]. + Thanks to Vitaly Magerya for reporting. 2010-01-03 Donal K. Fellows @@ -152,8 +159,8 @@ 2009-12-02 Jan Nijtmans - * tools/genStubs.tcl: Add support for win32 CALLBACK functions - (needed for Tk bugfix). + * tools/genStubs.tcl: Add support for win32 CALLBACK functions (needed + for Tk bugfix). 2009-11-30 Donal K. Fellows @@ -194,22 +201,21 @@ 2009-11-12 Andreas Kupries - * generic/tclIO.c (CopyData): [Bug 2895565]. Dropped bogosity - which used the number of _written_ bytes or character to update - the counters for the read bytes/characters. See last entry for the - test case. + * generic/tclIO.c (CopyData): [Bug 2895565]: Dropped bogosity which + used the number of _written_ bytes or character to update the counters + for the read bytes/characters. See last entry for the test case. 2009-11-11 Pat Thoyts - * tests/fCmd.test: Fixed a number of issues for Vista - * tests/registry.test: and Win7 that are due to restricted - * tests/winFCmd.test: permissions. + * tests/fCmd.test: Fixed a number of issues for Vista and Win7 + * tests/registry.test: that are due to restricted permissions. + * tests/winFCmd.test: 2009-11-11 Don Porter - * library/http/http.tcl: Update the URL syntax check to - RFC 3986 compliance on the subject of non-encoded question mark - characters. [Bug 2891171]. + * library/http/http.tcl: [Bug 2891171]: Update the URL syntax + check to RFC 3986 compliance on the subject of non-encoded question + mark characters. * library/http/pkgIndex.tcl: Bump to http 2.7.5 to avoid any * unix/Makefile.in: confusion with snapshot "releases" @@ -258,17 +264,16 @@ 2009-11-03 Andreas Kupries - * library/safe.tcl (::safe::InterpSetConfig): [Bug 2854929]. Added + * library/safe.tcl (::safe::InterpSetConfig): [Bug 2854929]: Added code to recursively find deeper paths which may contain modules. - Required to handle modules with names like 'platform::shell', - which translate into 'platform/shell-X.tm', i.e arbitrarily deep + Required to handle modules with names like 'platform::shell', which + translate into 'platform/shell-X.tm', i.e arbitrarily deep subdirectories. 2009-11-03 Kevin B. Kenny - * library/tzdata/Asia/Novokuznetsk: New tzdata locale for - Kemerovo oblast', which now keeps Novosibirsk time and not - Kranoyarsk time. + * library/tzdata/Asia/Novokuznetsk: New tzdata locale for Kemerovo + oblast', which now keeps Novosibirsk time and not Kranoyarsk time. * library/tzdata/Asia/Damascus: Syrian DST changes. * library/tzdata/Asia/Hong_Kong: Hong Kong historic DST corrections. Olson tzdata2009q. @@ -290,12 +295,12 @@ to: typedef unsigned int mp_digit; For 32-bit builds where "long" and "int" are two names for the same - thing, this is no change at all. For 64-bit builds, though, this + thing, this is no change at all. For 64-bit builds, though, this causes the dp[] array of an mp_int to be made up of 32-bit elements - instead of 64-bit elements. This is a huge improvement because - details elsewhere in the mp_int implementation cause only 28 bits of - each element to be actually used storing number data. Without this - change bignums are over 50% wasted space on 64-bit systems. + instead of 64-bit elements. This is a huge improvement because details + elsewhere in the mp_int implementation cause only 28 bits of each + element to be actually used storing number data. Without this change + bignums are over 50% wasted space on 64-bit systems. ***POTENTIAL INCOMPATIBILITY*** For 64-bit builds, callers of routines with (mp_digit) or (mp_digit *) @@ -321,12 +326,12 @@ * tests/fileName.test (fileName-20.[78]): Corrected poor test hygiene (failure to save and restore the working directory) that - caused these two tests to fail on Windows (and [Bug 2806250] - to be reopened). + caused these two tests to fail on Windows (and [Bug 2806250] to be + reopened). 2009-10-27 Don Porter - * generic/tclPathObj.c: [Bug 2884203]: Missing refcount on cached + * generic/tclPathObj.c: [Bug 2884203]: Missing refcount on cached normalized path caused crashes. 2009-10-27 Kevin B. Kenny @@ -345,9 +350,9 @@ 2009-10-24 Kevin B. Kenny * library/clock.tcl (ProcessPosixTimeZone): - Corrected a regression in the fix to [Bug 2207436] that caused - [clock] to apply EU daylight saving time rules in the US. Thanks - to Karl Lehenbauer for reporting this regression. + Corrected a regression in the fix to [Bug 2207436] that caused [clock] + to apply EU daylight saving time rules in the US. Thanks to Karl + Lehenbauer for reporting this regression. * tests/clock.test (clock-52.4): Added a regression test for the above regression. * library/tzdata/Asia/Dhaka: @@ -357,9 +362,9 @@ 2009-10-23 Andreas Kupries * generic/tclIO.c (FlushChannel): Skip OutputProc for low-level - 0-length writes. When closing pipes which have already been closed - not skipping leads to spurious SIG_PIPE signals. Reported by - Mikhail Teterin . + 0-length writes. When closing pipes which have already been closed not + skipping leads to spurious SIG_PIPE signals. Reported by Mikhail + Teterin . 2009-10-21 Donal K. Fellows @@ -368,27 +373,27 @@ 2009-10-19 Don Porter - * generic/tclIO.c: Revised ReadChars and FilterInputBytes routines - to permit reads to continue up to the string limits of Tcl values. - Before revisions, large read attempts could panic when as little as - half the limiting value length was reached. [Patch 2107634] + * generic/tclIO.c: Revised ReadChars and FilterInputBytes + routines to permit reads to continue up to the string limits of Tcl + values. Before revisions, large read attempts could panic when as + little as half the limiting value length was reached. [Patch 2107634] Thanks to Sean Morrison and Bob Parker for their roles in the fix. 2009-10-18 Joe Mistachkin * tests/thread.test (thread-4.[345]): [Bug 1565466]: Correct tests to - save their error state before the final call to threadReap just in case - it triggers an "invalid thread id" error. This error can occur if one - or more of the target threads has exited prior to the attempt to send - it an asynchronous exit command. + save their error state before the final call to threadReap just in + case it triggers an "invalid thread id" error. This error can occur + if one or more of the target threads has exited prior to the attempt + to send it an asynchronous exit command. * doc/memory.n: [Bug 988703]: Add mechanism for finding what Tcl_Objs - * generic/tclCkalloc.c (MemoryCmd): are allocated when built for memory - * generic/tclInt.decls: debugging. This was previously backported from - * generic/tclInt.h: Tcl 8.6 with the corrections to fix [Bug 2871908]. - * generic/tclObj.c (ObjData, TclFinalizeThreadObjects): However, there - were key elements missing. These changes make things consistent between - branches. + * generic/tclCkalloc.c (MemoryCmd): are allocated when built for + * generic/tclInt.decls: memory debugging. This was previously + * generic/tclInt.h: backported from Tcl 8.6 with the corrections to + * generic/tclObj.c (ObjData, TclFinalizeThreadObjects): fix [Bug + 2871908]. However, there were key elements missing. These changes make + things consistent between branches. 2009-10-17 Donal K. Fellows diff --git a/generic/regexec.c b/generic/regexec.c index d39685c..c902209 100644 --- a/generic/regexec.c +++ b/generic/regexec.c @@ -624,27 +624,21 @@ dissect( case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); return REG_OKAY; /* no action, parent did the work */ - break; case '|': /* alternation */ assert(t->left != NULL); return altdissect(v, t, begin, end); - break; case 'b': /* back ref -- shouldn't be calling us! */ return REG_ASSERT; - break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); return condissect(v, t, begin, end); - break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); subset(v, t, begin, end); return dissect(v, t->left, begin, end); - break; default: return REG_ASSERT; - break; } } @@ -802,19 +796,15 @@ cdissect( case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); return REG_OKAY; /* no action, parent did the work */ - break; case '|': /* alternation */ assert(t->left != NULL); return caltdissect(v, t, begin, end); - break; case 'b': /* back ref -- shouldn't be calling us! */ assert(t->left == NULL && t->right == NULL); return cbrdissect(v, t, begin, end); - break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); return ccondissect(v, t, begin, end); - break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); @@ -823,10 +813,8 @@ cdissect( subset(v, t, begin, end); } return er; - break; default: return REG_ASSERT; - break; } } @@ -843,8 +831,7 @@ ccondissect( chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { - struct dfa *d; - struct dfa *d2; + struct dfa *d, *d2; chr *mid; int er; @@ -894,15 +881,26 @@ ccondissect( * Try this midpoint on for size. */ - er = cdissect(v, t->left, begin, mid); - if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end) - && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) { - break; /* NOTE BREAK OUT */ - } - if ((er != REG_OKAY) && (er != REG_NOMATCH)) { - freedfa(d); - freedfa(d2); - return er; + if (longest(v, d2, mid, end, NULL) == end) { + er = cdissect(v, t->left, begin, mid); + if (er == REG_OKAY) { + er = cdissect(v, t->right, mid, end); + if (er == REG_OKAY) { + /* + * Satisfaction. + */ + + MDEBUG(("successful\n")); + freedfa(d); + freedfa(d2); + return REG_OKAY; + } + } + if ((er != REG_OKAY) && (er != REG_NOMATCH)) { + freedfa(d); + freedfa(d2); + return er; + } } /* @@ -935,15 +933,6 @@ ccondissect( zapmem(v, t->left); zapmem(v, t->right); } - - /* - * Satisfaction. - */ - - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - return REG_OKAY; } /* @@ -1011,15 +1000,26 @@ crevdissect( * Try this midpoint on for size. */ - er = cdissect(v, t->left, begin, mid); - if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end) - && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) { - break; /* NOTE BREAK OUT */ - } - if (er != REG_OKAY && er != REG_NOMATCH) { - freedfa(d); - freedfa(d2); - return er; + if (longest(v, d2, mid, end, NULL) == end) { + er = cdissect(v, t->left, begin, mid); + if (er == REG_OKAY) { + er = cdissect(v, t->right, mid, end); + if (er == REG_OKAY) { + /* + * Satisfaction. + */ + + MDEBUG(("successful\n")); + freedfa(d); + freedfa(d2); + return REG_OKAY; + } + } + if (er != REG_OKAY && er != REG_NOMATCH) { + freedfa(d); + freedfa(d2); + return er; + } } /* @@ -1052,15 +1052,6 @@ crevdissect( zapmem(v, t->left); zapmem(v, t->right); } - - /* - * Satisfaction. - */ - - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - return REG_OKAY; } /* -- cgit v0.12