summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2013-10-25 19:36:10 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2013-10-25 19:36:10 (GMT)
commit79aa68dfc1ab4936107f9528d939e865f94da4b6 (patch)
tree8cfe598860de5ebc6c9490a365884bff634fcd39
parente38b0544c4f89fe3e665721e52c027e006922032 (diff)
downloadcpython-79aa68dfc1ab4936107f9528d939e865f94da4b6.zip
cpython-79aa68dfc1ab4936107f9528d939e865f94da4b6.tar.gz
cpython-79aa68dfc1ab4936107f9528d939e865f94da4b6.tar.bz2
Issue #19387: explain and test the sre overlap table
-rw-r--r--Lib/sre_compile.py28
-rw-r--r--Lib/test/test_re.py22
2 files changed, 41 insertions, 9 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index e08ec66..e194aaa 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -353,6 +353,27 @@ def _simple(av):
lo, hi = av[2].getwidth()
return lo == hi == 1 and av[2][0][0] != SUBPATTERN
+def _generate_overlap_table(prefix):
+ """
+ Generate an overlap table for the following prefix.
+ An overlap table is a table of the same size as the prefix which
+ informs about the potential self-overlap for each index in the prefix:
+ - if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]
+ - if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with
+ prefix[0:k]
+ """
+ table = [0] * len(prefix)
+ for i in range(1, len(prefix)):
+ idx = table[i - 1]
+ while prefix[i] != prefix[idx]:
+ if idx == 0:
+ table[i] = 0
+ break
+ idx = table[idx - 1]
+ else:
+ table[i] = idx + 1
+ return table
+
def _compile_info(code, pattern, flags):
# internal: compile an info block. in the current version,
# this contains min/max pattern width, and an optional literal
@@ -449,12 +470,7 @@ def _compile_info(code, pattern, flags):
emit(prefix_skip) # skip
code.extend(prefix)
# generate overlap table
- table = [-1] + ([0]*len(prefix))
- for i in range(len(prefix)):
- table[i+1] = table[i]+1
- while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
- table[i+1] = table[table[i+1]-1]+1
- code.extend(table[1:]) # don't store first entry
+ code.extend(_generate_overlap_table(prefix))
elif charset:
_compile_charset(charset, flags, code)
code[skip] = len(code) - skip
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 5e68585..9ee077d 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -3,10 +3,12 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
import io
import re
from re import Scanner
+import sre_compile
import sre_constants
import sys
import string
import traceback
+import unittest
from weakref import proxy
# Misc tests from Tim Peters' re.doc
@@ -15,8 +17,6 @@ from weakref import proxy
# what you're doing. Some of these tests were carefully modeled to
# cover most of the code.
-import unittest
-
class S(str):
def __getitem__(self, index):
return S(super().__getitem__(index))
@@ -1140,6 +1140,22 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(1), "")
self.assertEqual(m.group(2), "y")
+
+class ImplementationTest(unittest.TestCase):
+ """
+ Test implementation details of the re module.
+ """
+
+ def test_overlap_table(self):
+ f = sre_compile._generate_overlap_table
+ self.assertEqual(f(""), [])
+ self.assertEqual(f("a"), [0])
+ self.assertEqual(f("abcd"), [0, 0, 0, 0])
+ self.assertEqual(f("aaaa"), [0, 1, 2, 3])
+ self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
+ self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
+
+
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
@@ -1269,7 +1285,7 @@ def run_re_tests():
def test_main():
- run_unittest(ReTests)
+ run_unittest(__name__)
run_re_tests()
if __name__ == "__main__":