summaryrefslogtreecommitdiffstats
path: root/Lib/encodings/__init__.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-06-07 21:43:46 (GMT)
committerGuido van Rossum <guido@python.org>2007-06-07 21:43:46 (GMT)
commitad5b9de288de6b5f965fdfd4db30753c30f9d5ca (patch)
tree91155dd70100bd8d866374735fc466ba3c06ed9d /Lib/encodings/__init__.py
parentc3b6ac796f160ed27d2d1094ef606514ac3ae898 (diff)
downloadcpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.zip
cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.tar.gz
cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.tar.bz2
Change normalize_encodings() to avoid using .translate() or depending on
the string type. It will always return a Unicode string. The algoritm's specification is unchanged.
Diffstat (limited to 'Lib/encodings/__init__.py')
-rw-r--r--Lib/encodings/__init__.py25
1 files changed, 11 insertions, 14 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 90ff479..221ba82 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -34,12 +34,6 @@ from . import aliases
_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
-_norm_encoding_map = (' . '
- '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
- ' abcdefghijklmnopqrstuvwxyz '
- ' '
- ' '
- ' ')
_aliases = aliases.aliases
class CodecRegistryError(LookupError, SystemError):
@@ -58,14 +52,17 @@ def normalize_encoding(encoding):
non-ASCII characters, these must be Latin-1 compatible.
"""
- # Make sure we have an 8-bit string, because .translate() works
- # differently for Unicode strings.
- if isinstance(encoding, str):
- # Note that .encode('latin-1') does *not* use the codec
- # registry, so this call doesn't recurse. (See unicodeobject.c
- # PyUnicode_AsEncodedString() for details)
- encoding = encoding.encode('latin-1')
- return '_'.join(encoding.translate(_norm_encoding_map).split())
+ chars = []
+ punct = False
+ for c in encoding:
+ if c.isalnum() or c == '.':
+ if punct and chars:
+ chars.append('_')
+ chars.append(c)
+ punct = False
+ else:
+ punct = True
+ return ''.join(chars)
def search_function(encoding):