Replaced the ELLIPSIS implementation with a worst-case linear-time one.

author: Tim Peters <tim.peters@gmail.com> 2004-08-19 08:10:08 (GMT)
committer: Tim Peters <tim.peters@gmail.com> 2004-08-19 08:10:08 (GMT)
commit: 26b3ebb515eacb997885ff4fe1b425674429e253 (patch)
tree: b8c937c492db93256726cb43df70afa8c45fc93b /Lib
parent: 1cf3aa6e6615dcddc58e42d4fb214f9e0d90d22c (diff)
download: cpython-26b3ebb515eacb997885ff4fe1b425674429e253.zip
cpython-26b3ebb515eacb997885ff4fe1b425674429e253.tar.gz
cpython-26b3ebb515eacb997885ff4fe1b425674429e253.tar.bz2
2 files changed, 66 insertions, 23 deletions
diff --git a/Lib/doctest.py b/Lib/doctest.py
index f7b29d3..6c5679d 100644
--- a/Lib/doctest.py
+++ b/Lib/doctest.py
@@ -362,6 +362,50 @@ class _SpoofOut(StringIO):
         if hasattr(self, "softspace"):
             del self.softspace
 
+# Worst-case linear-time ellipsis matching.
+def ellipsis_match(want, got):
+    if ELLIPSIS_MARKER not in want:
+        return want == got
+    # Remove \n from ...\n, else the newline will be required,
+    # and (for example) ... on a line by itself can't match
+    # nothing gracefully.
+    want = want.replace(ELLIPSIS_MARKER + '\n', ELLIPSIS_MARKER)
+    # Find "the real" strings.
+    ws = want.split(ELLIPSIS_MARKER)
+    assert len(ws) >= 2
+    # Match.  In general, we only need to find the leftmost non-overlapping
+    # match for each piece.  "Real strings" at the start or end of `want`
+    # are special cases.
+    w = ws[0]
+    if w:
+        # An ellipsis didn't start `want`.  We need to match exactly
+        # at the start.
+        if not got.startswith(w):
+            return False
+        pos = len(w)
+        del ws[0]
+    else:
+        pos = 0
+
+    for w in ws:
+        # w may be '' at times, if there are consecutive ellipses, or
+        # due to an ellipsis at the start or end of `want`.  That's OK.
+        # Search for an empty string succeeds, and doesn't change pos.
+        pos = got.find(w, pos)
+        if pos < 0:
+            return False
+        pos += len(w)
+
+    # If `want` ended with an ellipsis, the tail matches anything.
+    if ws[-1] == '':
+        return True
+    # Else `want` ended with a real string.  If the last real match
+    # exhausted `got`, we win.
+    if pos == len(got):
+        return True
+    # Else maybe we matched the last real string too early.
+    return got.endswith(ws[-1])
+
 ######################################################################
 ## 2. Example & DocTest
 ######################################################################
@@ -1475,22 +1519,9 @@ class OutputChecker:
                 return True
 
         # The ELLIPSIS flag says to let the sequence "..." in `want`
-        # match any substring in `got`.  We implement this by
-        # transforming `want` into a regular expression.
+        # match any substring in `got`.
         if optionflags & ELLIPSIS:
-            # Remove \n from ...\n, else the newline will be required,
-            # and (for example) ... on a line by itself can't match
-            # nothing gracefully.
-            want_re = want.replace(ELLIPSIS_MARKER + '\n', ELLIPSIS_MARKER)
-            # Escape any special regexp characters
-            want_re = re.escape(want_re)
-            # Replace escaped ellipsis markers ('\.\.\.') with .*
-            want_re = want_re.replace(re.escape(ELLIPSIS_MARKER), '.*')
-            # Require that it matches the entire string; and set the
-            # re.DOTALL flag (with '(?s)').
-            want_re = '(?s)^%s$' % want_re
-            # Check if the `want_re` regexp matches got.
-            if re.match(want_re, got):
+            if ellipsis_match(want, got):
                 return True
 
         # We didn't find any match; return false.
diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py
index d49e6cf..e96fd2a 100644
--- a/Lib/test/test_doctest.py
+++ b/Lib/test/test_doctest.py
@@ -780,13 +780,8 @@ output to match any substring in the actual output:
     >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test)
     (0, 1)
 
-... should also match nothing gracefully:
-XXX This can be provoked into requiring exponential time by adding more
-XXX ellipses; the implementation should change.  It's much easier to
-XXX provoke exponential time with expected output that doesn't match,
-XXX BTW (then multiple regexp .* thingies each try all possiblities,
-XXX multiplicatively, without hope of success).  That's the real danger,
-XXX that a failing test will appear to be hung.
+... should also match nothing gracefully (note that a regular-expression
+implementation of ELLIPSIS would take a loooong time to match this one!):
 
     >>> for i in range(100):
     ...     print i**2 #doctest: +ELLIPSIS
@@ -794,15 +789,32 @@ XXX that a failing test will appear to be hung.
     ...
     1
     ...
+    ......
+    ...
     36
     ...
     ...
+    ...
     49
     64
-    ......
+    .........
     9801
     ...
 
+... can be surprising; e.g., this test passes:
+
+    >>> for i in range(21): #doctest: +ELLIPSIS
+    ...     print i
+    0
+    1
+    2
+    ...
+    1
+    ...
+    2
+    ...
+    0
+
 The UNIFIED_DIFF flag causes failures that involve multi-line expected
 and actual outputs to be displayed using a unified diff:
author	Tim Peters <tim.peters@gmail.com>	2004-08-19 08:10:08 (GMT)
committer	Tim Peters <tim.peters@gmail.com>	2004-08-19 08:10:08 (GMT)
commit	26b3ebb515eacb997885ff4fe1b425674429e253 (patch)
tree	b8c937c492db93256726cb43df70afa8c45fc93b /Lib
parent	1cf3aa6e6615dcddc58e42d4fb214f9e0d90d22c (diff)
download	cpython-26b3ebb515eacb997885ff4fe1b425674429e253.zip cpython-26b3ebb515eacb997885ff4fe1b425674429e253.tar.gz cpython-26b3ebb515eacb997885ff4fe1b425674429e253.tar.bz2