summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorAlexander Belopolsky <alexander.belopolsky@gmail.com>2011-02-25 19:19:57 (GMT)
committerAlexander Belopolsky <alexander.belopolsky@gmail.com>2011-02-25 19:19:57 (GMT)
commit1d52146a25fdf534117602e77c7f3f539ea64294 (patch)
tree5adfb510b3345e0321770055f224e54ddb33d090 /Objects/unicodeobject.c
parenteea22d2d660eae039ab392684ac8a8693f1f2a4d (diff)
downloadcpython-1d52146a25fdf534117602e77c7f3f539ea64294.zip
cpython-1d52146a25fdf534117602e77c7f3f539ea64294.tar.gz
cpython-1d52146a25fdf534117602e77c7f3f539ea64294.tar.bz2
Issue #11303: Added shortcuts for utf8 and latin1 encodings.
Documented the list of optimized encodings as CPython implementation detail.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e7bbd80..48ea0a2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1462,13 +1462,15 @@ PyObject *PyUnicode_Decode(const char *s,
char lower[11]; /* Enough for any encoding shortcut */
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ return PyUnicode_DecodeUTF8(s, size, errors);
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
- if (strcmp(lower, "utf-8") == 0)
+ if ((strcmp(lower, "utf-8") == 0) ||
+ (strcmp(lower, "utf8") == 0))
return PyUnicode_DecodeUTF8(s, size, errors);
else if ((strcmp(lower, "latin-1") == 0) ||
+ (strcmp(lower, "latin1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
return PyUnicode_DecodeLatin1(s, size, errors);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
@@ -1670,15 +1672,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
}
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+ PyUnicode_GET_SIZE(unicode),
+ errors);
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
- if (strcmp(lower, "utf-8") == 0)
+ if ((strcmp(lower, "utf-8") == 0) ||
+ (strcmp(lower, "utf8") == 0))
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
errors);
else if ((strcmp(lower, "latin-1") == 0) ||
+ (strcmp(lower, "latin1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),