Change normalize_encodings() to avoid using .translate() or depending on

the string type. It will always return a Unicode string. The algoritm's specification is unchanged.
author: Guido van Rossum <guido@python.org> 2007-06-07 21:43:46 (GMT)
committer: Guido van Rossum <guido@python.org> 2007-06-07 21:43:46 (GMT)
commit: ad5b9de288de6b5f965fdfd4db30753c30f9d5ca (patch)
tree: 91155dd70100bd8d866374735fc466ba3c06ed9d /Lib/encodings
parent: c3b6ac796f160ed27d2d1094ef606514ac3ae898 (diff)
download: cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.zip
cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.tar.gz
cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.tar.bz2
1 files changed, 11 insertions, 14 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 90ff479..221ba82 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -34,12 +34,6 @@ from . import aliases
 _cache = {}
 _unknown = '--unknown--'
 _import_tail = ['*']
-_norm_encoding_map = ('                                              . '
-                      '0123456789       ABCDEFGHIJKLMNOPQRSTUVWXYZ     '
-                      ' abcdefghijklmnopqrstuvwxyz                     '
-                      '                                                '
-                      '                                                '
-                      '                ')
 _aliases = aliases.aliases
 
 class CodecRegistryError(LookupError, SystemError):
@@ -58,14 +52,17 @@ def normalize_encoding(encoding):
         non-ASCII characters, these must be Latin-1 compatible.
 
     """
-    # Make sure we have an 8-bit string, because .translate() works
-    # differently for Unicode strings.
-    if isinstance(encoding, str):
-        # Note that .encode('latin-1') does *not* use the codec
-        # registry, so this call doesn't recurse. (See unicodeobject.c
-        # PyUnicode_AsEncodedString() for details)
-        encoding = encoding.encode('latin-1')
-    return '_'.join(encoding.translate(_norm_encoding_map).split())
+    chars = []
+    punct = False
+    for c in encoding:
+        if c.isalnum() or c == '.':
+            if punct and chars:
+                chars.append('_')
+            chars.append(c)
+            punct = False
+        else:
+            punct = True
+    return ''.join(chars)
 
 def search_function(encoding):
author	Guido van Rossum <guido@python.org>	2007-06-07 21:43:46 (GMT)
committer	Guido van Rossum <guido@python.org>	2007-06-07 21:43:46 (GMT)
commit	ad5b9de288de6b5f965fdfd4db30753c30f9d5ca (patch)
tree	91155dd70100bd8d866374735fc466ba3c06ed9d /Lib/encodings
parent	c3b6ac796f160ed27d2d1094ef606514ac3ae898 (diff)
download	cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.zip cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.tar.gz cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.tar.bz2