summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-11-03 23:05:13 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-11-03 23:05:13 (GMT)
commit76a31a6bff3b0e809fbf08dd6e572e4e55b8fcfc (patch)
tree1eb08d313fb09cc7bf1ac51e14d3d0f4a20660b7 /Objects/unicodeobject.c
parenta9e73640bf86f4bbbbee15cd7a1af6299a0c49dd (diff)
downloadcpython-76a31a6bff3b0e809fbf08dd6e572e4e55b8fcfc.zip
cpython-76a31a6bff3b0e809fbf08dd6e572e4e55b8fcfc.tar.gz
cpython-76a31a6bff3b0e809fbf08dd6e572e4e55b8fcfc.tar.bz2
Cleanup decode_code_page_stateful() and encode_code_page()
* Fix decode_code_page_errors() result * Inline decode_code_page() and encode_code_page_chunk() * Replace the PyUnicodeObject type by PyObject
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c186
1 files changed, 75 insertions, 111 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 46b55909..b0a712d 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7006,7 +7006,7 @@ decode_code_page_flags(UINT code_page)
*/
static int
decode_code_page_strict(UINT code_page,
- PyUnicodeObject **v,
+ PyObject **v,
const char *in,
int insize)
{
@@ -7022,7 +7022,7 @@ decode_code_page_strict(UINT code_page,
if (*v == NULL) {
/* Create unicode object */
- *v = _PyUnicode_New(outsize);
+ *v = (PyObject*)_PyUnicode_New(outsize);
if (*v == NULL)
return -1;
out = PyUnicode_AS_UNICODE(*v);
@@ -7030,7 +7030,7 @@ decode_code_page_strict(UINT code_page,
else {
/* Extend unicode object */
Py_ssize_t n = PyUnicode_GET_SIZE(*v);
- if (PyUnicode_Resize((PyObject**)v, n + outsize) < 0)
+ if (PyUnicode_Resize(v, n + outsize) < 0)
return -1;
out = PyUnicode_AS_UNICODE(*v) + n;
}
@@ -7057,9 +7057,8 @@ error:
*/
static int
decode_code_page_errors(UINT code_page,
- PyUnicodeObject **v,
- const char *in,
- int size,
+ PyObject **v,
+ const char *in, const int size,
const char *errors)
{
const char *startin = in;
@@ -7103,7 +7102,7 @@ decode_code_page_errors(UINT code_page,
PyErr_NoMemory();
goto error;
}
- *v = _PyUnicode_New(size * Py_ARRAY_LENGTH(buffer));
+ *v = (PyObject*)_PyUnicode_New(size * Py_ARRAY_LENGTH(buffer));
if (*v == NULL)
goto error;
startout = PyUnicode_AS_UNICODE(*v);
@@ -7115,7 +7114,7 @@ decode_code_page_errors(UINT code_page,
PyErr_NoMemory();
goto error;
}
- if (PyUnicode_Resize((PyObject**)v, n + size * Py_ARRAY_LENGTH(buffer)) < 0)
+ if (PyUnicode_Resize(v, n + size * Py_ARRAY_LENGTH(buffer)) < 0)
goto error;
startout = PyUnicode_AS_UNICODE(*v) + n;
}
@@ -7173,9 +7172,9 @@ decode_code_page_errors(UINT code_page,
/* Extend unicode object */
outsize = out - startout;
assert(outsize <= PyUnicode_WSTR_LENGTH(*v));
- if (PyUnicode_Resize((PyObject**)v, outsize) < 0)
+ if (PyUnicode_Resize(v, outsize) < 0)
goto error;
- ret = 0;
+ ret = size;
error:
Py_XDECREF(encoding_obj);
@@ -7184,50 +7183,13 @@ error:
return ret;
}
-/*
- * Decode a byte string from a Windows code page into unicode object. If
- * 'final' is set, converts trailing lead-byte too.
- *
- * Returns consumed size if succeed, or raise a WindowsError or
- * UnicodeDecodeError exception and returns -1 on error.
- */
-static int
-decode_code_page(UINT code_page,
- PyUnicodeObject **v,
- const char *s, int size,
- int final, const char *errors)
-{
- int done;
-
- /* Skip trailing lead-byte unless 'final' is set */
- if (size == 0) {
- if (*v == NULL) {
- Py_INCREF(unicode_empty);
- *v = (PyUnicodeObject*)unicode_empty;
- if (*v == NULL)
- return -1;
- }
- return 0;
- }
-
- if (!final && is_dbcs_lead_byte(code_page, s, size - 1))
- --size;
-
- done = decode_code_page_strict(code_page, v, s, size);
- if (done == -2)
- done = decode_code_page_errors(code_page, v, s, size, errors);
- return done;
-}
-
static PyObject *
decode_code_page_stateful(int code_page,
- const char *s,
- Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
+ const char *s, Py_ssize_t size,
+ const char *errors, Py_ssize_t *consumed)
{
- PyUnicodeObject *v = NULL;
- int done;
+ PyObject *v = NULL;
+ int chunk_size, final, converted, done;
if (code_page < 0) {
PyErr_SetString(PyExc_ValueError, "invalid code page number");
@@ -7237,29 +7199,53 @@ decode_code_page_stateful(int code_page,
if (consumed)
*consumed = 0;
+ do
+ {
#ifdef NEED_RETRY
- retry:
- if (size > INT_MAX)
- done = decode_code_page(code_page, &v, s, INT_MAX, 0, errors);
- else
+ if (size > INT_MAX) {
+ chunk_size = INT_MAX;
+ final = 0;
+ done = 0;
+ }
+ else
#endif
- done = decode_code_page(code_page, &v, s, (int)size, !consumed, errors);
+ {
+ chunk_size = (int)size;
+ final = (consumed == NULL);
+ done = 1;
+ }
- if (done < 0) {
- Py_XDECREF(v);
- return NULL;
- }
+ /* Skip trailing lead-byte unless 'final' is set */
+ if (!final && is_dbcs_lead_byte(code_page, s, chunk_size - 1))
+ --chunk_size;
- if (consumed)
- *consumed += done;
+ if (chunk_size == 0 && done) {
+ if (v != NULL)
+ break;
+ Py_INCREF(unicode_empty);
+ return unicode_empty;
+ }
-#ifdef NEED_RETRY
- if (size > INT_MAX) {
- s += done;
- size -= done;
- goto retry;
- }
-#endif
+
+ converted = decode_code_page_strict(code_page, &v,
+ s, chunk_size);
+ if (converted == -2)
+ converted = decode_code_page_errors(code_page, &v,
+ s, chunk_size,
+ errors);
+ assert(converted != 0);
+
+ if (converted < 0) {
+ Py_XDECREF(v);
+ return NULL;
+ }
+
+ if (consumed)
+ *consumed += converted;
+
+ s += converted;
+ size -= converted;
+ } while (!done);
#ifndef DONT_MAKE_RESULT_READY
if (_PyUnicode_READY_REPLACE(&v)) {
@@ -7268,7 +7254,7 @@ decode_code_page_stateful(int code_page,
}
#endif
assert(_PyUnicode_CheckConsistency(v, 1));
- return (PyObject *)v;
+ return v;
}
PyObject *
@@ -7583,40 +7569,6 @@ error:
return ret;
}
-/*
- * Encode a Unicode string to a Windows code page into a byte string.
- *
- * Returns consumed characters if succeed, or raise a WindowsError and returns
- * -1 on other error.
- */
-static int
-encode_code_page_chunk(UINT code_page, PyObject **outbytes,
- PyObject *unicode, Py_ssize_t unicode_offset,
- const Py_UNICODE *p, int size,
- const char* errors)
-{
- int done;
-
- if (size == 0) {
- if (*outbytes == NULL) {
- *outbytes = PyBytes_FromStringAndSize(NULL, 0);
- if (*outbytes == NULL)
- return -1;
- }
- return 0;
- }
-
- done = encode_code_page_strict(code_page, outbytes,
- p, size,
- errors);
- if (done == -2)
- done = encode_code_page_errors(code_page, outbytes,
- unicode, unicode_offset,
- p, size,
- errors);
- return done;
-}
-
static PyObject *
encode_code_page(int code_page,
PyObject *unicode,
@@ -7626,7 +7578,7 @@ encode_code_page(int code_page,
Py_ssize_t size;
PyObject *outbytes = NULL;
Py_ssize_t offset;
- int chunk_len, ret;
+ int chunk_len, ret, done;
p = PyUnicode_AsUnicodeAndSize(unicode, &size);
if (p == NULL)
@@ -7637,20 +7589,32 @@ encode_code_page(int code_page,
return NULL;
}
+ if (size == 0)
+ return PyBytes_FromStringAndSize(NULL, 0);
+
offset = 0;
do
{
#ifdef NEED_RETRY
- if (size > INT_MAX)
+ if (size > INT_MAX) {
chunk_len = INT_MAX;
+ done = 0;
+ }
else
#endif
+ {
chunk_len = (int)size;
- ret = encode_code_page_chunk(code_page, &outbytes,
- unicode, offset,
- p, chunk_len,
- errors);
+ done = 1;
+ }
+ ret = encode_code_page_strict(code_page, &outbytes,
+ p, chunk_len,
+ errors);
+ if (ret == -2)
+ ret = encode_code_page_errors(code_page, &outbytes,
+ unicode, offset,
+ p, chunk_len,
+ errors);
if (ret < 0) {
Py_XDECREF(outbytes);
return NULL;
@@ -7659,7 +7623,7 @@ encode_code_page(int code_page,
p += chunk_len;
offset += chunk_len;
size -= chunk_len;
- } while (size != 0);
+ } while (!done);
return outbytes;
}