summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2002-10-04 11:45:38 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2002-10-04 11:45:38 (GMT)
commit7012673d676e1576fe7bf66df03252bdd4595590 (patch)
tree0a182bef62826e7d578278dab3609a14eb4de7e1 /Lib
parent399a6890f5726b83f25fb3b0758dbb79770a1c91 (diff)
downloadcpython-7012673d676e1576fe7bf66df03252bdd4595590.zip
cpython-7012673d676e1576fe7bf66df03252bdd4595590.tar.gz
cpython-7012673d676e1576fe7bf66df03252bdd4595590.tar.bz2
Extending the encoding name normalization to handle more non-alphanumeric
characters.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/encodings/__init__.py28
1 files changed, 20 insertions, 8 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 9fc96d7..b928976 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -3,9 +3,9 @@
Standard Python encoding modules are stored in this package
directory.
- Codec modules must have names corresponding to standard lower-case
- encoding names with hyphens mapped to underscores, e.g. 'utf-8' is
- implemented by the module 'utf_8.py'.
+ Codec modules must have names corresponding to normalized encoding
+ names as defined in the normalize_encoding() function below, e.g.
+ 'utf-8' must be implemented by the module 'utf_8.py'.
Each codec module must export the following interface:
@@ -18,9 +18,8 @@
* getaliases() -> sequence of encoding name strings to use as aliases
- Alias names returned by getaliases() must be standard encoding
- names as defined above (lower-case, hyphens converted to
- underscores).
+ Alias names returned by getaliases() must be normalized encoding
+ names as defined by normalize_encoding().
Written by Marc-Andre Lemburg (mal@lemburg.com).
@@ -28,16 +27,29 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
"""#"
-import codecs,exceptions
+import codecs, exceptions, re
_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
+_norm_encoding_RE = re.compile('[^a-zA-Z0-9.]')
class CodecRegistryError(exceptions.LookupError,
exceptions.SystemError):
pass
+def normalize_encoding(encoding):
+
+ """ Normalize an encoding name.
+
+ Normalization works as follows: all non-alphanumeric
+ characters except the dot used for Python package names are
+ collapsed and replaced with a single underscore, e.g. ' -;#'
+ becomes '_'.
+
+ """
+ return '_'.join(_norm_encoding_RE.split(encoding))
+
def search_function(encoding):
# Cache lookup
@@ -51,7 +63,7 @@ def search_function(encoding):
# encoding in the aliases mapping and retry the import using the
# default import module lookup scheme with the alias name.
#
- modname = encoding.replace('-', '_')
+ modname = normalize_encoding(encoding)
try:
mod = __import__('encodings.' + modname,
globals(), locals(), _import_tail)