summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/sre_compile.py8
-rw-r--r--Lib/test/test_re.py9
-rw-r--r--Modules/sre.h2
3 files changed, 15 insertions, 4 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 7ddc097..fa21d95 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -333,14 +333,16 @@ def _optimize_unicode(charset, fixup):
block = block + 1
data = data + _mk_bitmap(chunk)
header = [block]
- if MAXCODE == 65535:
+ if _sre.CODESIZE == 2:
code = 'H'
else:
- code = 'L'
+ code = 'I'
# Convert block indices to byte array of 256 bytes
mapping = array.array('b', mapping).tostring()
# Convert byte array to word array
- header = header + array.array(code, mapping).tolist()
+ mapping = array.array(code, mapping)
+ assert mapping.itemsize == _sre.CODESIZE
+ header = header + mapping.tolist()
data[0:0] = header
return [(BIGCHARSET, data)]
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 9edca6e..2363ce5 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -497,6 +497,15 @@ class ReTests(unittest.TestCase):
self.assert_(re.compile('bug_926075') is not
re.compile(eval("u'bug_926075'")))
+ def test_bug_931848(self):
+ try:
+ unicode
+ except NameError:
+ pass
+ pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
+ self.assertEqual(re.compile(pattern).split("a.b.c"),
+ ['a','b','c'])
+
def run_re_tests():
from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
diff --git a/Modules/sre.h b/Modules/sre.h
index ba8500b..4502802 100644
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -16,7 +16,7 @@
/* size of a code word (must be unsigned short or larger, and
large enough to hold a Py_UNICODE character) */
#ifdef Py_UNICODE_WIDE
-#define SRE_CODE unsigned long
+#define SRE_CODE Py_UCS4
#else
#define SRE_CODE unsigned short
#endif