Issue #11303: Added shortcuts for utf8 and latin1 encodings.

Documented the list of optimized encodings as CPython implementation detail.
author: Alexander Belopolsky <alexander.belopolsky@gmail.com> 2011-02-25 19:19:57 (GMT)
committer: Alexander Belopolsky <alexander.belopolsky@gmail.com> 2011-02-25 19:19:57 (GMT)
commit: 1d52146a25fdf534117602e77c7f3f539ea64294 (patch)
tree: 5adfb510b3345e0321770055f224e54ddb33d090
parent: eea22d2d660eae039ab392684ac8a8693f1f2a4d (diff)
download: cpython-1d52146a25fdf534117602e77c7f3f539ea64294.zip
cpython-1d52146a25fdf534117602e77c7f3f539ea64294.tar.gz
cpython-1d52146a25fdf534117602e77c7f3f539ea64294.tar.bz2
2 files changed, 19 insertions, 4 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index c9222ca..0bffdb3 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -904,6 +904,15 @@ is meant to be exhaustive. Notice that spelling alternatives that only differ in
 case or use a hyphen instead of an underscore are also valid aliases; therefore,
 e.g. ``'utf-8'`` is a valid alias for the ``'utf_8'`` codec.
 
+.. impl-detail::
+
+   Some common encodings can bypass the codecs lookup machinery to
+   improve performance.  These optimization opportunities are only
+   recognized by CPython for a limited set of aliases: utf-8, utf8,
+   latin-1, latin1, iso-8859-1, mbcs (Windows only), ascii, utf-16,
+   and utf-32.  Using alternative spellings for these encodings may
+   result in slower execution.
+
 Many of the character sets support the same languages. They vary in individual
 characters (e.g. whether the EURO SIGN is supported or not), and in the
 assignment of characters to code positions. For the European languages in
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e7bbd80..48ea0a2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1462,13 +1462,15 @@ PyObject *PyUnicode_Decode(const char *s,
     char lower[11];  /* Enough for any encoding shortcut */
 
     if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
+        return PyUnicode_DecodeUTF8(s, size, errors);
 
     /* Shortcuts for common default encodings */
     if (normalize_encoding(encoding, lower, sizeof(lower))) {
-        if (strcmp(lower, "utf-8") == 0)
+        if ((strcmp(lower, "utf-8") == 0) ||
+            (strcmp(lower, "utf8") == 0))
             return PyUnicode_DecodeUTF8(s, size, errors);
         else if ((strcmp(lower, "latin-1") == 0) ||
+                 (strcmp(lower, "latin1") == 0) ||
                  (strcmp(lower, "iso-8859-1") == 0))
             return PyUnicode_DecodeLatin1(s, size, errors);
 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
@@ -1670,15 +1672,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
     }
 
     if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
+        return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+                                    PyUnicode_GET_SIZE(unicode),
+                                    errors);
 
     /* Shortcuts for common default encodings */
     if (normalize_encoding(encoding, lower, sizeof(lower))) {
-        if (strcmp(lower, "utf-8") == 0)
+        if ((strcmp(lower, "utf-8") == 0) ||
+            (strcmp(lower, "utf8") == 0))
             return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                                         PyUnicode_GET_SIZE(unicode),
                                         errors);
         else if ((strcmp(lower, "latin-1") == 0) ||
+                 (strcmp(lower, "latin1") == 0) ||
                  (strcmp(lower, "iso-8859-1") == 0))
             return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
                                           PyUnicode_GET_SIZE(unicode),
author	Alexander Belopolsky <alexander.belopolsky@gmail.com>	2011-02-25 19:19:57 (GMT)
committer	Alexander Belopolsky <alexander.belopolsky@gmail.com>	2011-02-25 19:19:57 (GMT)
commit	1d52146a25fdf534117602e77c7f3f539ea64294 (patch)
tree	5adfb510b3345e0321770055f224e54ddb33d090
parent	eea22d2d660eae039ab392684ac8a8693f1f2a4d (diff)
download	cpython-1d52146a25fdf534117602e77c7f3f539ea64294.zip cpython-1d52146a25fdf534117602e77c7f3f539ea64294.tar.gz cpython-1d52146a25fdf534117602e77c7f3f539ea64294.tar.bz2