diff options
author | Marc-André Lemburg <mal@egenix.com> | 2003-05-16 17:07:51 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2003-05-16 17:07:51 (GMT) |
commit | 282012593510a285fec5b8b5e42b04fef3ffffe0 (patch) | |
tree | d95d49ec6f9a35e2bf7f5d6d3137929c4c4358cd /Lib/encodings | |
parent | 813cec9a620e164d749ae9d36b72d0efd9260c07 (diff) | |
download | cpython-282012593510a285fec5b8b5e42b04fef3ffffe0.zip cpython-282012593510a285fec5b8b5e42b04fef3ffffe0.tar.gz cpython-282012593510a285fec5b8b5e42b04fef3ffffe0.tar.bz2 |
Remove usage of re module from encodings package search function.
Diffstat (limited to 'Lib/encodings')
-rw-r--r-- | Lib/encodings/__init__.py | 23 |
1 files changed, 19 insertions, 4 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 66bea5c..666afad 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -27,12 +27,17 @@ Written by Marc-Andre Lemburg (mal@lemburg.com). """#" -import codecs, exceptions, re +import codecs, exceptions, types _cache = {} _unknown = '--unknown--' _import_tail = ['*'] -_norm_encoding_RE = re.compile('[^a-zA-Z0-9.]') +_norm_encoding_map = (' . ' + '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ ' + ' abcdefghijklmnopqrstuvwxyz ' + ' ' + ' ' + ' ') class CodecRegistryError(exceptions.LookupError, exceptions.SystemError): @@ -45,10 +50,20 @@ def normalize_encoding(encoding): Normalization works as follows: all non-alphanumeric characters except the dot used for Python package names are collapsed and replaced with a single underscore, e.g. ' -;#' - becomes '_'. + becomes '_'. Leading and trailing underscores are removed. + + Note that encoding names should be ASCII only; if they do use + non-ASCII characters, these must be Latin-1 compatible. """ - return '_'.join(_norm_encoding_RE.split(encoding)) + # Make sure we have an 8-bit string, because .translate() works + # differently for Unicode strings. + if type(encoding) is types.UnicodeType: + # Note that .encode('latin-1') does *not* use the codec + # registry, so this call doesn't recurse. (See unicodeobject.c + # PyUnicode_AsEncodedString() for details) + encoding = encoding.encode('latin-1') + return '_'.join(encoding.translate(_norm_encoding_map).split()) def search_function(encoding): |