Extending the encoding name normalization to handle more non-alphanumeric

characters.
author: Marc-André Lemburg <mal@egenix.com> 2002-10-04 11:45:38 (GMT)
committer: Marc-André Lemburg <mal@egenix.com> 2002-10-04 11:45:38 (GMT)
commit: 7012673d676e1576fe7bf66df03252bdd4595590 (patch)
tree: 0a182bef62826e7d578278dab3609a14eb4de7e1 /Lib/encodings/__init__.py
parent: 399a6890f5726b83f25fb3b0758dbb79770a1c91 (diff)
download: cpython-7012673d676e1576fe7bf66df03252bdd4595590.zip
cpython-7012673d676e1576fe7bf66df03252bdd4595590.tar.gz
cpython-7012673d676e1576fe7bf66df03252bdd4595590.tar.bz2
1 files changed, 20 insertions, 8 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 9fc96d7..b928976 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -3,9 +3,9 @@
     Standard Python encoding modules are stored in this package
     directory.
 
-    Codec modules must have names corresponding to standard lower-case
-    encoding names with hyphens mapped to underscores, e.g. 'utf-8' is
-    implemented by the module 'utf_8.py'.
+    Codec modules must have names corresponding to normalized encoding
+    names as defined in the normalize_encoding() function below, e.g.
+    'utf-8' must be implemented by the module 'utf_8.py'.
 
     Each codec module must export the following interface:
 
@@ -18,9 +18,8 @@
 
     * getaliases() -> sequence of encoding name strings to use as aliases
 
-    Alias names returned by getaliases() must be standard encoding
-    names as defined above (lower-case, hyphens converted to
-    underscores).
+    Alias names returned by getaliases() must be normalized encoding
+    names as defined by normalize_encoding().
 
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 
@@ -28,16 +27,29 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
 
 """#"
 
-import codecs,exceptions
+import codecs, exceptions, re
 
 _cache = {}
 _unknown = '--unknown--'
 _import_tail = ['*']
+_norm_encoding_RE = re.compile('[^a-zA-Z0-9.]')
 
 class CodecRegistryError(exceptions.LookupError,
                          exceptions.SystemError):
     pass
 
+def normalize_encoding(encoding):
+
+    """ Normalize an encoding name.
+
+        Normalization works as follows: all non-alphanumeric
+        characters except the dot used for Python package names are
+        collapsed and replaced with a single underscore, e.g. '  -;#'
+        becomes '_'.
+        
+    """
+    return '_'.join(_norm_encoding_RE.split(encoding))
+
 def search_function(encoding):
 
     # Cache lookup
@@ -51,7 +63,7 @@ def search_function(encoding):
     # encoding in the aliases mapping and retry the import using the
     # default import module lookup scheme with the alias name.
     #
-    modname = encoding.replace('-', '_')
+    modname = normalize_encoding(encoding)
     try:
         mod = __import__('encodings.' + modname,
                          globals(), locals(), _import_tail)
author	Marc-André Lemburg <mal@egenix.com>	2002-10-04 11:45:38 (GMT)
committer	Marc-André Lemburg <mal@egenix.com>	2002-10-04 11:45:38 (GMT)
commit	7012673d676e1576fe7bf66df03252bdd4595590 (patch)
tree	0a182bef62826e7d578278dab3609a14eb4de7e1 /Lib/encodings/__init__.py
parent	399a6890f5726b83f25fb3b0758dbb79770a1c91 (diff)
download	cpython-7012673d676e1576fe7bf66df03252bdd4595590.zip cpython-7012673d676e1576fe7bf66df03252bdd4595590.tar.gz cpython-7012673d676e1576fe7bf66df03252bdd4595590.tar.bz2