1 files changed, 12 insertions, 45 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index f24f681..1b3e9f8 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -169,13 +169,13 @@ def _compile(code, pattern, flags):
 def _compile_charset(charset, flags, code, fixup=None):
     # compile charset subprogram
     emit = code.append
-    for op, av in _optimize_charset(charset, fixup, flags & SRE_FLAG_UNICODE):
+    for op, av in _optimize_charset(charset, fixup):
         emit(OPCODES[op])
         if op is NEGATE:
             pass
         elif op is LITERAL:
             emit(av)
-        elif op is RANGE:
+        elif op is RANGE or op is RANGE_IGNORE:
             emit(av[0])
             emit(av[1])
         elif op is CHARSET:
@@ -193,7 +193,7 @@ def _compile_charset(charset, flags, code, fixup=None):
             raise error("internal: unsupported set operator")
     emit(OPCODES[FAILURE])
 
-def _optimize_charset(charset, fixup, isunicode):
+def _optimize_charset(charset, fixup):
     # internal: optimize character set
     out = []
     tail = []
@@ -202,10 +202,9 @@ def _optimize_charset(charset, fixup, isunicode):
         while True:
             try:
                 if op is LITERAL:
-                    i = av
                     if fixup:
-                        i = fixup(i)
-                    charmap[i] = 1
+                        av = fixup(av)
+                    charmap[av] = 1
                 elif op is RANGE:
                     r = range(av[0], av[1]+1)
                     if fixup:
@@ -221,21 +220,13 @@ def _optimize_charset(charset, fixup, isunicode):
                     # character set contains non-UCS1 character codes
                     charmap += b'\0' * 0xff00
                     continue
-                # character set contains non-BMP character codes
-                if fixup and isunicode and op is RANGE:
-                    lo, hi = av
-                    ranges = [av]
-                    # There are only two ranges of cased astral characters:
-                    # 10400-1044F (Deseret) and 118A0-118DF (Warang Citi).
-                    _fixup_range(max(0x10000, lo), min(0x11fff, hi),
-                                 ranges, fixup)
-                    for lo, hi in ranges:
-                        if lo == hi:
-                            tail.append((LITERAL, hi))
-                        else:
-                            tail.append((RANGE, (lo, hi)))
-                else:
-                    tail.append((op, av))
+                # Character set contains non-BMP character codes.
+                # There are only two ranges of cased non-BMP characters:
+                # 10400-1044F (Deseret) and 118A0-118DF (Warang Citi),
+                # and for both ranges RANGE_IGNORE works.
+                if fixup and op is RANGE:
+                    op = RANGE_IGNORE
+                tail.append((op, av))
             break
 
     # compress character map
@@ -313,24 +304,6 @@ def _optimize_charset(charset, fixup, isunicode):
     out += tail
     return out
 
-def _fixup_range(lo, hi, ranges, fixup):
-    for i in map(fixup, range(lo, hi+1)):
-        for k, (lo, hi) in enumerate(ranges):
-            if i < lo:
-                if l == lo - 1:
-                    ranges[k] = (i, hi)
-                else:
-                    ranges.insert(k, (i, i))
-                break
-            elif i > hi:
-                if i == hi + 1:
-                    ranges[k] = (lo, i)
-                    break
-            else:
-                break
-        else:
-            ranges.append((i, i))
-
 _CODEBITS = _sre.CODESIZE * 8
 _BITS_TRANS = b'0' + b'1' * 255
 def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
@@ -504,12 +477,6 @@ def compile(p, flags=0):
 
     # print code
 
-    # XXX: <fl> get rid of this limitation!
-    if p.pattern.groups > 100:
-        raise AssertionError(
-            "sorry, but this version only supports 100 named groups"
-            )
-
     # map in either direction
     groupindex = p.pattern.groupdict
     indexgroup = [None] * p.pattern.groups