diff options
author | Marc-André Lemburg <mal@egenix.com> | 2002-10-04 11:45:38 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2002-10-04 11:45:38 (GMT) |
commit | 7012673d676e1576fe7bf66df03252bdd4595590 (patch) | |
tree | 0a182bef62826e7d578278dab3609a14eb4de7e1 /Lib | |
parent | 399a6890f5726b83f25fb3b0758dbb79770a1c91 (diff) | |
download | cpython-7012673d676e1576fe7bf66df03252bdd4595590.zip cpython-7012673d676e1576fe7bf66df03252bdd4595590.tar.gz cpython-7012673d676e1576fe7bf66df03252bdd4595590.tar.bz2 |
Extending the encoding name normalization to handle more non-alphanumeric
characters.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/encodings/__init__.py | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 9fc96d7..b928976 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -3,9 +3,9 @@ Standard Python encoding modules are stored in this package directory. - Codec modules must have names corresponding to standard lower-case - encoding names with hyphens mapped to underscores, e.g. 'utf-8' is - implemented by the module 'utf_8.py'. + Codec modules must have names corresponding to normalized encoding + names as defined in the normalize_encoding() function below, e.g. + 'utf-8' must be implemented by the module 'utf_8.py'. Each codec module must export the following interface: @@ -18,9 +18,8 @@ * getaliases() -> sequence of encoding name strings to use as aliases - Alias names returned by getaliases() must be standard encoding - names as defined above (lower-case, hyphens converted to - underscores). + Alias names returned by getaliases() must be normalized encoding + names as defined by normalize_encoding(). Written by Marc-Andre Lemburg (mal@lemburg.com). @@ -28,16 +27,29 @@ Written by Marc-Andre Lemburg (mal@lemburg.com). """#" -import codecs,exceptions +import codecs, exceptions, re _cache = {} _unknown = '--unknown--' _import_tail = ['*'] +_norm_encoding_RE = re.compile('[^a-zA-Z0-9.]') class CodecRegistryError(exceptions.LookupError, exceptions.SystemError): pass +def normalize_encoding(encoding): + + """ Normalize an encoding name. + + Normalization works as follows: all non-alphanumeric + characters except the dot used for Python package names are + collapsed and replaced with a single underscore, e.g. ' -;#' + becomes '_'. + + """ + return '_'.join(_norm_encoding_RE.split(encoding)) + def search_function(encoding): # Cache lookup @@ -51,7 +63,7 @@ def search_function(encoding): # encoding in the aliases mapping and retry the import using the # default import module lookup scheme with the alias name. # - modname = encoding.replace('-', '_') + modname = normalize_encoding(encoding) try: mod = __import__('encodings.' + modname, globals(), locals(), _import_tail) |