diff options
-rw-r--r-- | Lib/sre_compile.py | 8 | ||||
-rw-r--r-- | Lib/test/test_re.py | 9 | ||||
-rw-r--r-- | Modules/sre.h | 2 |
3 files changed, 15 insertions, 4 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 7ddc097..fa21d95 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -333,14 +333,16 @@ def _optimize_unicode(charset, fixup): block = block + 1 data = data + _mk_bitmap(chunk) header = [block] - if MAXCODE == 65535: + if _sre.CODESIZE == 2: code = 'H' else: - code = 'L' + code = 'I' # Convert block indices to byte array of 256 bytes mapping = array.array('b', mapping).tostring() # Convert byte array to word array - header = header + array.array(code, mapping).tolist() + mapping = array.array(code, mapping) + assert mapping.itemsize == _sre.CODESIZE + header = header + mapping.tolist() data[0:0] = header return [(BIGCHARSET, data)] diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 9edca6e..2363ce5 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -497,6 +497,15 @@ class ReTests(unittest.TestCase): self.assert_(re.compile('bug_926075') is not re.compile(eval("u'bug_926075'"))) + def test_bug_931848(self): + try: + unicode + except NameError: + pass + pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"') + self.assertEqual(re.compile(pattern).split("a.b.c"), + ['a','b','c']) + def run_re_tests(): from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: diff --git a/Modules/sre.h b/Modules/sre.h index ba8500b..4502802 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -16,7 +16,7 @@ /* size of a code word (must be unsigned short or larger, and large enough to hold a Py_UNICODE character) */ #ifdef Py_UNICODE_WIDE -#define SRE_CODE unsigned long +#define SRE_CODE Py_UCS4 #else #define SRE_CODE unsigned short #endif |