summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-11-10 19:56:30 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-11-10 19:56:30 (GMT)
commit9f4b1e9c50da83b51a4b0c7ee7d7dc3ef94a0cf6 (patch)
treedc0742e3af4ceb33102cd7fe0a0deb38588d851f
parent240c55f721aee364f7fc341f86c5a25ab5c97095 (diff)
downloadcpython-9f4b1e9c50da83b51a4b0c7ee7d7dc3ef94a0cf6.zip
cpython-9f4b1e9c50da83b51a4b0c7ee7d7dc3ef94a0cf6.tar.gz
cpython-9f4b1e9c50da83b51a4b0c7ee7d7dc3ef94a0cf6.tar.bz2
Fix and deprecated the unicode_internal codec
unicode_internal codec uses Py_UNICODE instead of the real internal representation (PEP 393: Py_UCS1, Py_UCS2 or Py_UCS4) for backward compatibility.
-rw-r--r--Doc/library/codecs.rst2
-rw-r--r--Doc/whatsnew/3.3.rst2
-rw-r--r--Modules/_codecsmodule.c20
-rw-r--r--Objects/unicodeobject.c28
4 files changed, 42 insertions, 10 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 4523c7f..a9fae95 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -1173,6 +1173,8 @@ particular, the following variants typically exist:
| unicode_internal | | Return the internal |
| | | representation of the |
| | | operand |
+| | | |
+| | | .. deprecated:: 3.3 |
+--------------------+---------+---------------------------+
The following codecs provide bytes-to-bytes mappings.
diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst
index 911d8d9..7f4517f 100644
--- a/Doc/whatsnew/3.3.rst
+++ b/Doc/whatsnew/3.3.rst
@@ -250,6 +250,8 @@ versions.
(:issue:`12100`)
+The ``unicode_internal`` codec has been deprecated.
+
crypt
-----
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 727cf5e..93cb1b7 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -675,18 +675,30 @@ unicode_internal_encode(PyObject *self,
PyObject *obj;
const char *errors = NULL;
const char *data;
- Py_ssize_t size;
+ Py_ssize_t len, size;
if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
&obj, &errors))
return NULL;
if (PyUnicode_Check(obj)) {
+ Py_UNICODE *u;
+
if (PyUnicode_READY(obj) < 0)
return NULL;
- data = PyUnicode_AS_DATA(obj);
- size = PyUnicode_GET_DATA_SIZE(obj);
- return codec_tuple(PyBytes_FromStringAndSize(data, size),
+
+ if (PyErr_WarnEx(PyExc_DeprecationWarning,
+ "unicode_internal codecs has been deprecated",
+ 1))
+ return NULL;
+
+ u = PyUnicode_AsUnicodeAndSize(obj, &len);
+ if (u == NULL)
+ return NULL;
+ if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
+ return PyErr_NoMemory();
+ size = len * sizeof(Py_UNICODE);
+ return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
PyUnicode_GET_LENGTH(obj));
}
else {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 61534b4..3f580b5 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6237,6 +6237,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
+ if (PyErr_WarnEx(PyExc_DeprecationWarning,
+ "unicode_internal codecs has been deprecated",
+ 1))
+ return NULL;
+
/* XXX overflow detection missing */
v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127);
if (v == NULL)
@@ -6270,15 +6275,26 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
errors, &errorHandler,
"unicode_internal", reason,
&starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos)) {
+ &v, &outpos))
goto onError;
- }
+ continue;
}
- else {
- if (unicode_putchar(&v, &outpos, ch) < 0)
- goto onError;
- s += Py_UNICODE_SIZE;
+
+ s += Py_UNICODE_SIZE;
+#ifndef Py_UNICODE_WIDE
+ if (ch >= 0xD800 && ch <= 0xDBFF && s < end)
+ {
+ Py_UCS4 ch2 = *(Py_UNICODE*)s;
+ if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
+ {
+ ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
+ s += Py_UNICODE_SIZE;
+ }
}
+#endif
+
+ if (unicode_putchar(&v, &outpos, ch) < 0)
+ goto onError;
}
if (PyUnicode_Resize(&v, outpos) < 0)