diff options
author | Guido van Rossum <guido@python.org> | 2007-06-07 21:43:46 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-06-07 21:43:46 (GMT) |
commit | ad5b9de288de6b5f965fdfd4db30753c30f9d5ca (patch) | |
tree | 91155dd70100bd8d866374735fc466ba3c06ed9d /Lib/encodings | |
parent | c3b6ac796f160ed27d2d1094ef606514ac3ae898 (diff) | |
download | cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.zip cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.tar.gz cpython-ad5b9de288de6b5f965fdfd4db30753c30f9d5ca.tar.bz2 |
Change normalize_encodings() to avoid using .translate() or depending on
the string type. It will always return a Unicode string. The algoritm's
specification is unchanged.
Diffstat (limited to 'Lib/encodings')
-rw-r--r-- | Lib/encodings/__init__.py | 25 |
1 files changed, 11 insertions, 14 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 90ff479..221ba82 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -34,12 +34,6 @@ from . import aliases _cache = {} _unknown = '--unknown--' _import_tail = ['*'] -_norm_encoding_map = (' . ' - '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ ' - ' abcdefghijklmnopqrstuvwxyz ' - ' ' - ' ' - ' ') _aliases = aliases.aliases class CodecRegistryError(LookupError, SystemError): @@ -58,14 +52,17 @@ def normalize_encoding(encoding): non-ASCII characters, these must be Latin-1 compatible. """ - # Make sure we have an 8-bit string, because .translate() works - # differently for Unicode strings. - if isinstance(encoding, str): - # Note that .encode('latin-1') does *not* use the codec - # registry, so this call doesn't recurse. (See unicodeobject.c - # PyUnicode_AsEncodedString() for details) - encoding = encoding.encode('latin-1') - return '_'.join(encoding.translate(_norm_encoding_map).split()) + chars = [] + punct = False + for c in encoding: + if c.isalnum() or c == '.': + if punct and chars: + chars.append('_') + chars.append(c) + punct = False + else: + punct = True + return ''.join(chars) def search_function(encoding): |