summaryrefslogtreecommitdiffstats
path: root/Modules/_json.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_json.c')
-rw-r--r--Modules/_json.c439
1 files changed, 257 insertions, 182 deletions
diff --git a/Modules/_json.c b/Modules/_json.c
index 0924873..40c2ced 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -1,5 +1,7 @@
#include "Python.h"
#include "structmember.h"
+#include "accu.h"
+
#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
#endif
@@ -76,6 +78,21 @@ static PyMemberDef encoder_members[] = {
};
static PyObject *
+join_list_unicode(PyObject *lst)
+{
+ /* return u''.join(lst) */
+ static PyObject *sep = NULL;
+ if (sep == NULL) {
+ sep = PyUnicode_FromStringAndSize("", 0);
+ if (sep == NULL)
+ return NULL;
+ }
+ return PyUnicode_Join(sep, lst);
+}
+
+/* Forward decls */
+
+static PyObject *
ascii_escape_unicode(PyObject *pystr);
static PyObject *
py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
@@ -101,11 +118,11 @@ encoder_dealloc(PyObject *self);
static int
encoder_clear(PyObject *self);
static int
-encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
+encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
static int
-encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
+encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
static int
-encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
+encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
static PyObject *
_encoded_const(PyObject *obj);
static void
@@ -122,13 +139,6 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj);
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
-#define MIN_EXPANSION 6
-#ifdef Py_UNICODE_WIDE
-#define MAX_EXPANSION (2 * MIN_EXPANSION)
-#else
-#define MAX_EXPANSION MIN_EXPANSION
-#endif
-
static int
_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
{
@@ -147,7 +157,7 @@ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
}
static Py_ssize_t
-ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
+ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
{
/* Escape unicode code point c to ASCII escape sequences
in char *output. output must have at least 12 bytes unused to
@@ -162,25 +172,23 @@ ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
case '\r': output[chars++] = 'r'; break;
case '\t': output[chars++] = 't'; break;
default:
-#ifdef Py_UNICODE_WIDE
if (c >= 0x10000) {
/* UTF-16 surrogate pair */
- Py_UNICODE v = c - 0x10000;
+ Py_UCS4 v = c - 0x10000;
c = 0xd800 | ((v >> 10) & 0x3ff);
output[chars++] = 'u';
- output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
- output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
- output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
- output[chars++] = "0123456789abcdef"[(c ) & 0xf];
+ output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
+ output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
+ output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
+ output[chars++] = Py_hexdigits[(c ) & 0xf];
c = 0xdc00 | (v & 0x3ff);
output[chars++] = '\\';
}
-#endif
output[chars++] = 'u';
- output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
- output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
- output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
- output[chars++] = "0123456789abcdef"[(c ) & 0xf];
+ output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
+ output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
+ output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
+ output[chars++] = Py_hexdigits[(c ) & 0xf];
}
return chars;
}
@@ -192,54 +200,53 @@ ascii_escape_unicode(PyObject *pystr)
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
- Py_ssize_t max_output_size;
Py_ssize_t chars;
PyObject *rval;
- Py_UNICODE *output;
- Py_UNICODE *input_unicode;
+ void *input;
+ unsigned char *output;
+ int kind;
+
+ if (PyUnicode_READY(pystr) == -1)
+ return NULL;
- input_chars = PyUnicode_GET_SIZE(pystr);
- input_unicode = PyUnicode_AS_UNICODE(pystr);
+ input_chars = PyUnicode_GET_LENGTH(pystr);
+ input = PyUnicode_DATA(pystr);
+ kind = PyUnicode_KIND(pystr);
- /* One char input can be up to 6 chars output, estimate 4 of these */
- output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
- max_output_size = 2 + (input_chars * MAX_EXPANSION);
- rval = PyUnicode_FromStringAndSize(NULL, output_size);
+ /* Compute the output size */
+ for (i = 0, output_size = 2; i < input_chars; i++) {
+ Py_UCS4 c = PyUnicode_READ(kind, input, i);
+ if (S_CHAR(c))
+ output_size++;
+ else {
+ switch(c) {
+ case '\\': case '"': case '\b': case '\f':
+ case '\n': case '\r': case '\t':
+ output_size += 2; break;
+ default:
+ output_size += c >= 0x10000 ? 12 : 6;
+ }
+ }
+ }
+
+ rval = PyUnicode_New(output_size, 127);
if (rval == NULL) {
return NULL;
}
- output = PyUnicode_AS_UNICODE(rval);
+ output = PyUnicode_1BYTE_DATA(rval);
chars = 0;
output[chars++] = '"';
for (i = 0; i < input_chars; i++) {
- Py_UNICODE c = input_unicode[i];
+ Py_UCS4 c = PyUnicode_READ(kind, input, i);
if (S_CHAR(c)) {
output[chars++] = c;
}
else {
chars = ascii_escape_unichar(c, output, chars);
}
- if (output_size - chars < (1 + MAX_EXPANSION)) {
- /* There's more than four, so let's resize by a lot */
- Py_ssize_t new_output_size = output_size * 2;
- /* This is an upper bound */
- if (new_output_size > max_output_size) {
- new_output_size = max_output_size;
- }
- /* Make sure that the output size changed before resizing */
- if (new_output_size != output_size) {
- output_size = new_output_size;
- if (PyUnicode_Resize(&rval, output_size) == -1) {
- return NULL;
- }
- output = PyUnicode_AS_UNICODE(rval);
- }
- }
}
output[chars++] = '"';
- if (PyUnicode_Resize(&rval, chars) == -1) {
- return NULL;
- }
+ assert(_PyUnicode_CheckConsistency(rval, 1));
return rval;
}
@@ -267,19 +274,6 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
}
static PyObject *
-join_list_unicode(PyObject *lst)
-{
- /* return u''.join(lst) */
- static PyObject *sep = NULL;
- if (sep == NULL) {
- sep = PyUnicode_FromStringAndSize("", 0);
- if (sep == NULL)
- return NULL;
- }
- return PyUnicode_Join(sep, lst);
-}
-
-static PyObject *
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
/* return (rval, idx) tuple, stealing reference to rval */
PyObject *tpl;
@@ -333,22 +327,30 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
Return value is a new PyUnicode
*/
PyObject *rval = NULL;
- Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
+ Py_ssize_t len;
Py_ssize_t begin = end - 1;
- Py_ssize_t next = begin;
- const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
+ Py_ssize_t next /* = begin */;
+ const void *buf;
+ int kind;
PyObject *chunks = NULL;
PyObject *chunk = NULL;
+ if (PyUnicode_READY(pystr) == -1)
+ return 0;
+
+ len = PyUnicode_GET_LENGTH(pystr);
+ buf = PyUnicode_DATA(pystr);
+ kind = PyUnicode_KIND(pystr);
+
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
while (1) {
/* Find the end of the string or the next escape */
- Py_UNICODE c = 0;
+ Py_UCS4 c = 0;
for (next = end; next < len; next++) {
- c = buf[next];
+ c = PyUnicode_READ(kind, buf, next);
if (c == '"' || c == '\\') {
break;
}
@@ -364,7 +366,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
/* Pick up this chunk if it's not zero length */
if (next != end) {
APPEND_OLD_CHUNK
- chunk = PyUnicode_FromUnicode(&buf[end], next - end);
+ chunk = PyUnicode_FromKindAndData(
+ kind,
+ (char*)buf + kind * end,
+ next - end);
if (chunk == NULL) {
goto bail;
}
@@ -378,7 +383,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
- c = buf[next];
+ c = PyUnicode_READ(kind, buf, next);
if (c != 'u') {
/* Non-unicode backslash escapes */
end = next + 1;
@@ -408,7 +413,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
}
/* Decode 4 hex digits */
for (; next < end; next++) {
- Py_UNICODE digit = buf[next];
+ Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
c <<= 4;
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
@@ -425,22 +430,22 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
goto bail;
}
}
-#ifdef Py_UNICODE_WIDE
/* Surrogate pair */
if ((c & 0xfc00) == 0xd800) {
- Py_UNICODE c2 = 0;
+ Py_UCS4 c2 = 0;
if (end + 6 >= len) {
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
goto bail;
}
- if (buf[next++] != '\\' || buf[next++] != 'u') {
+ if (PyUnicode_READ(kind, buf, next++) != '\\' ||
+ PyUnicode_READ(kind, buf, next++) != 'u') {
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
goto bail;
}
end += 6;
/* Decode 4 hex digits */
for (; next < end; next++) {
- Py_UNICODE digit = buf[next];
+ Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
c2 <<= 4;
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
@@ -467,10 +472,9 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
raise_errmsg("Unpaired low surrogate", pystr, end - 5);
goto bail;
}
-#endif
}
APPEND_OLD_CHUNK
- chunk = PyUnicode_FromUnicode(&c, 1);
+ chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
if (chunk == NULL) {
goto bail;
}
@@ -608,8 +612,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
Returns a new PyObject (usually a dict, but object_hook can change that)
*/
- Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
- Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ void *str;
+ int kind;
+ Py_ssize_t end_idx;
PyObject *val = NULL;
PyObject *rval = NULL;
PyObject *key = NULL;
@@ -617,6 +622,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
int has_pairs_hook = (s->object_pairs_hook != Py_None);
Py_ssize_t next_idx;
+ if (PyUnicode_READY(pystr) == -1)
+ return NULL;
+
+ str = PyUnicode_DATA(pystr);
+ kind = PyUnicode_KIND(pystr);
+ end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
+
if (has_pairs_hook)
rval = PyList_New(0);
else
@@ -625,15 +637,15 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
return NULL;
/* skip whitespace after { */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
/* only loop if the object is non-empty */
- if (idx <= end_idx && str[idx] != '}') {
+ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
while (idx <= end_idx) {
PyObject *memokey;
/* read key */
- if (str[idx] != '"') {
+ if (PyUnicode_READ(kind, str, idx) != '"') {
raise_errmsg("Expecting property name", pystr, idx);
goto bail;
}
@@ -653,13 +665,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
idx = next_idx;
/* skip whitespace between key and : delimiter, read :, skip whitespace */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
- if (idx > end_idx || str[idx] != ':') {
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
raise_errmsg("Expecting : delimiter", pystr, idx);
goto bail;
}
idx++;
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* read any JSON term */
val = scan_once_unicode(s, pystr, idx, &next_idx);
@@ -687,26 +699,26 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
idx = next_idx;
/* skip whitespace before } or , */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the object is closed or we didn't get the , delimiter */
if (idx > end_idx) break;
- if (str[idx] == '}') {
+ if (PyUnicode_READ(kind, str, idx) == '}') {
break;
}
- else if (str[idx] != ',') {
+ else if (PyUnicode_READ(kind, str, idx) != ',') {
raise_errmsg("Expecting , delimiter", pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , delimiter */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
}
}
/* verify that idx < end_idx, str[idx] should be '}' */
- if (idx > end_idx || str[idx] != '}') {
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
raise_errmsg("Expecting object", pystr, end_idx);
goto bail;
}
@@ -742,19 +754,27 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
Returns a new PyList
*/
- Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
- Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ void *str;
+ int kind;
+ Py_ssize_t end_idx;
PyObject *val = NULL;
PyObject *rval = PyList_New(0);
Py_ssize_t next_idx;
if (rval == NULL)
return NULL;
+ if (PyUnicode_READY(pystr) == -1)
+ return NULL;
+
+ str = PyUnicode_DATA(pystr);
+ kind = PyUnicode_KIND(pystr);
+ end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
+
/* skip whitespace after [ */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* only loop if the array is non-empty */
- if (idx <= end_idx && str[idx] != ']') {
+ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
while (idx <= end_idx) {
/* read any JSON term */
@@ -769,26 +789,26 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
idx = next_idx;
/* skip whitespace between term and , */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the array is closed or we didn't get the , delimiter */
if (idx > end_idx) break;
- if (str[idx] == ']') {
+ if (PyUnicode_READ(kind, str, idx) == ']') {
break;
}
- else if (str[idx] != ',') {
+ else if (PyUnicode_READ(kind, str, idx) != ',') {
raise_errmsg("Expecting , delimiter", pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
}
}
- /* verify that idx < end_idx, str[idx] should be ']' */
- if (idx > end_idx || str[idx] != ']') {
+ /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
raise_errmsg("Expecting object", pystr, end_idx);
goto bail;
}
@@ -820,7 +840,7 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *
/* rval = parse_constant(constant) */
rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
- idx += PyUnicode_GET_SIZE(cstr);
+ idx += PyUnicode_GET_LENGTH(cstr);
Py_DECREF(cstr);
*next_idx_ptr = idx;
return rval;
@@ -837,15 +857,24 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
PyInt, PyLong, or PyFloat.
May return other types if parse_int or parse_float are set
*/
- Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
- Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ void *str;
+ int kind;
+ Py_ssize_t end_idx;
Py_ssize_t idx = start;
int is_float = 0;
PyObject *rval;
- PyObject *numstr;
+ PyObject *numstr = NULL;
+ PyObject *custom_func;
+
+ if (PyUnicode_READY(pystr) == -1)
+ return NULL;
+
+ str = PyUnicode_DATA(pystr);
+ kind = PyUnicode_KIND(pystr);
+ end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
/* read a sign if it's there, make sure it's not the end of the string */
- if (str[idx] == '-') {
+ if (PyUnicode_READ(kind, str, idx) == '-') {
idx++;
if (idx > end_idx) {
PyErr_SetNone(PyExc_StopIteration);
@@ -854,12 +883,12 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
}
/* read as many integer digits as we find as long as it doesn't start with 0 */
- if (str[idx] >= '1' && str[idx] <= '9') {
+ if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
idx++;
- while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
}
/* if it starts with 0 we only expect one integer digit */
- else if (str[idx] == '0') {
+ else if (PyUnicode_READ(kind, str, idx) == '0') {
idx++;
}
/* no integer digits, error */
@@ -869,25 +898,25 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
}
/* if the next char is '.' followed by a digit then read all float digits */
- if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
+ if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
is_float = 1;
idx += 2;
- while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
}
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
- if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
+ if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Py_ssize_t e_start = idx;
idx++;
/* read an exponent sign if present */
- if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
+ if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
/* read all digits */
- while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
/* if we got a digit, then parse as float. if not, backtrack */
- if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
+ if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
is_float = 1;
}
else {
@@ -895,22 +924,39 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
}
}
- /* copy the section we determined to be a number */
- numstr = PyUnicode_FromUnicode(&str[start], idx - start);
- if (numstr == NULL)
- return NULL;
- if (is_float) {
- /* parse as a float using a fast path if available, otherwise call user defined method */
- if (s->parse_float != (PyObject *)&PyFloat_Type) {
- rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
- }
- else {
- rval = PyFloat_FromString(numstr);
- }
+ if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
+ custom_func = s->parse_float;
+ else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
+ custom_func = s->parse_int;
+ else
+ custom_func = NULL;
+
+ if (custom_func) {
+ /* copy the section we determined to be a number */
+ numstr = PyUnicode_FromKindAndData(kind,
+ (char*)str + kind * start,
+ idx - start);
+ if (numstr == NULL)
+ return NULL;
+ rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
}
else {
- /* no fast path for unicode -> int, just call */
- rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
+ Py_ssize_t i, n;
+ char *buf;
+ /* Straight conversion to ASCII, to avoid costly conversion of
+ decimal unicode digits (which cannot appear here) */
+ n = idx - start;
+ numstr = PyBytes_FromStringAndSize(NULL, n);
+ if (numstr == NULL)
+ return NULL;
+ buf = PyBytes_AS_STRING(numstr);
+ for (i = 0; i < n; i++) {
+ buf[i] = (char) PyUnicode_READ(kind, str, i + start);
+ }
+ if (is_float)
+ rval = PyFloat_FromString(numstr);
+ else
+ rval = PyLong_FromString(buf, NULL, 10);
}
Py_DECREF(numstr);
*next_idx_ptr = idx;
@@ -928,13 +974,23 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
Returns a new PyObject representation of the term.
*/
PyObject *res;
- Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
- Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
+ void *str;
+ int kind;
+ Py_ssize_t length;
+
+ if (PyUnicode_READY(pystr) == -1)
+ return NULL;
+
+ str = PyUnicode_DATA(pystr);
+ kind = PyUnicode_KIND(pystr);
+ length = PyUnicode_GET_LENGTH(pystr);
+
if (idx >= length) {
PyErr_SetNone(PyExc_StopIteration);
return NULL;
}
- switch (str[idx]) {
+
+ switch (PyUnicode_READ(kind, str, idx)) {
case '"':
/* string */
return scanstring_unicode(pystr, idx + 1,
@@ -958,7 +1014,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
return res;
case 'n':
/* null */
- if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
+ if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Py_INCREF(Py_None);
*next_idx_ptr = idx + 4;
return Py_None;
@@ -966,7 +1022,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case 't':
/* true */
- if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
+ if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Py_INCREF(Py_True);
*next_idx_ptr = idx + 4;
return Py_True;
@@ -974,7 +1030,10 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case 'f':
/* false */
- if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
+ if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'l' &&
+ PyUnicode_READ(kind, str, idx + 3) == 's' &&
+ PyUnicode_READ(kind, str, idx + 4) == 'e') {
Py_INCREF(Py_False);
*next_idx_ptr = idx + 5;
return Py_False;
@@ -982,19 +1041,33 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case 'N':
/* NaN */
- if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
+ if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'N') {
return _parse_constant(s, "NaN", idx, next_idx_ptr);
}
break;
case 'I':
/* Infinity */
- if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
+ if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'f' &&
+ PyUnicode_READ(kind, str, idx + 3) == 'i' &&
+ PyUnicode_READ(kind, str, idx + 4) == 'n' &&
+ PyUnicode_READ(kind, str, idx + 5) == 'i' &&
+ PyUnicode_READ(kind, str, idx + 6) == 't' &&
+ PyUnicode_READ(kind, str, idx + 7) == 'y') {
return _parse_constant(s, "Infinity", idx, next_idx_ptr);
}
break;
case '-':
/* -Infinity */
- if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
+ if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'n' &&
+ PyUnicode_READ(kind, str, idx + 3) == 'f' &&
+ PyUnicode_READ(kind, str, idx + 4) == 'i' &&
+ PyUnicode_READ(kind, str, idx + 5) == 'n' &&
+ PyUnicode_READ(kind, str, idx + 6) == 'i' &&
+ PyUnicode_READ(kind, str, idx + 7) == 't' &&
+ PyUnicode_READ(kind, str, idx + 8) == 'y') {
return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
}
break;
@@ -1210,22 +1283,22 @@ encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
/* Python callable interface to encode_listencode_obj */
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
PyObject *obj;
- PyObject *rval;
Py_ssize_t indent_level;
PyEncoderObject *s;
+ _PyAccu acc;
+
assert(PyEncoder_Check(self));
s = (PyEncoderObject *)self;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
&obj, _convertPyInt_AsSsize_t, &indent_level))
return NULL;
- rval = PyList_New(0);
- if (rval == NULL)
+ if (_PyAccu_Init(&acc))
return NULL;
- if (encoder_listencode_obj(s, rval, obj, indent_level)) {
- Py_DECREF(rval);
+ if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
+ _PyAccu_Destroy(&acc);
return NULL;
}
- return rval;
+ return _PyAccu_FinishAsList(&acc);
}
static PyObject *
@@ -1297,18 +1370,19 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
}
static int
-_steal_list_append(PyObject *lst, PyObject *stolen)
+_steal_accumulate(_PyAccu *acc, PyObject *stolen)
{
/* Append stolen and then decrement its reference count */
- int rval = PyList_Append(lst, stolen);
+ int rval = _PyAccu_Accumulate(acc, stolen);
Py_DECREF(stolen);
return rval;
}
static int
-encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
+encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
+ PyObject *obj, Py_ssize_t indent_level)
{
- /* Encode Python object obj to a JSON term, rval is a PyList */
+ /* Encode Python object obj to a JSON term */
PyObject *newobj;
int rv;
@@ -1316,38 +1390,38 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
PyObject *cstr = _encoded_const(obj);
if (cstr == NULL)
return -1;
- return _steal_list_append(rval, cstr);
+ return _steal_accumulate(acc, cstr);
}
else if (PyUnicode_Check(obj))
{
PyObject *encoded = encoder_encode_string(s, obj);
if (encoded == NULL)
return -1;
- return _steal_list_append(rval, encoded);
+ return _steal_accumulate(acc, encoded);
}
else if (PyLong_Check(obj)) {
PyObject *encoded = PyObject_Str(obj);
if (encoded == NULL)
return -1;
- return _steal_list_append(rval, encoded);
+ return _steal_accumulate(acc, encoded);
}
else if (PyFloat_Check(obj)) {
PyObject *encoded = encoder_encode_float(s, obj);
if (encoded == NULL)
return -1;
- return _steal_list_append(rval, encoded);
+ return _steal_accumulate(acc, encoded);
}
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
if (Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
- rv = encoder_listencode_list(s, rval, obj, indent_level);
+ rv = encoder_listencode_list(s, acc, obj, indent_level);
Py_LeaveRecursiveCall();
return rv;
}
else if (PyDict_Check(obj)) {
if (Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
- rv = encoder_listencode_dict(s, rval, obj, indent_level);
+ rv = encoder_listencode_dict(s, acc, obj, indent_level);
Py_LeaveRecursiveCall();
return rv;
}
@@ -1378,7 +1452,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
if (Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
- rv = encoder_listencode_obj(s, rval, newobj, indent_level);
+ rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Py_LeaveRecursiveCall();
Py_DECREF(newobj);
@@ -1398,9 +1472,10 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
}
static int
-encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
+encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
+ PyObject *dct, Py_ssize_t indent_level)
{
- /* Encode Python dict dct a JSON term, rval is a PyList */
+ /* Encode Python dict dct a JSON term */
static PyObject *open_dict = NULL;
static PyObject *close_dict = NULL;
static PyObject *empty_dict = NULL;
@@ -1420,7 +1495,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
return -1;
}
if (Py_SIZE(dct) == 0)
- return PyList_Append(rval, empty_dict);
+ return _PyAccu_Accumulate(acc, empty_dict);
if (s->markers != Py_None) {
int has_key;
@@ -1438,7 +1513,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
}
}
- if (PyList_Append(rval, open_dict))
+ if (_PyAccu_Accumulate(acc, open_dict))
goto bail;
if (s->indent != Py_None) {
@@ -1525,7 +1600,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
}
if (idx) {
- if (PyList_Append(rval, s->item_separator))
+ if (_PyAccu_Accumulate(acc, s->item_separator))
goto bail;
}
@@ -1533,16 +1608,16 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
Py_CLEAR(kstr);
if (encoded == NULL)
goto bail;
- if (PyList_Append(rval, encoded)) {
+ if (_PyAccu_Accumulate(acc, encoded)) {
Py_DECREF(encoded);
goto bail;
}
Py_DECREF(encoded);
- if (PyList_Append(rval, s->key_separator))
+ if (_PyAccu_Accumulate(acc, s->key_separator))
goto bail;
value = PyTuple_GET_ITEM(item, 1);
- if (encoder_listencode_obj(s, rval, value, indent_level))
+ if (encoder_listencode_obj(s, acc, value, indent_level))
goto bail;
idx += 1;
Py_DECREF(item);
@@ -1556,14 +1631,13 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
goto bail;
Py_CLEAR(ident);
}
+ /* TODO DOES NOT RUN; dead code
if (s->indent != Py_None) {
- /* TODO: DOES NOT RUN */
indent_level -= 1;
- /*
- yield '\n' + (' ' * (_indent * _current_indent_level))
- */
- }
- if (PyList_Append(rval, close_dict))
+
+ yield '\n' + (' ' * (_indent * _current_indent_level))
+ }*/
+ if (_PyAccu_Accumulate(acc, close_dict))
goto bail;
return 0;
@@ -1577,9 +1651,10 @@ bail:
static int
-encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
+encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
+ PyObject *seq, Py_ssize_t indent_level)
{
- /* Encode Python list seq to a JSON term, rval is a PyList */
+ /* Encode Python list seq to a JSON term */
static PyObject *open_array = NULL;
static PyObject *close_array = NULL;
static PyObject *empty_array = NULL;
@@ -1603,7 +1678,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
num_items = PySequence_Fast_GET_SIZE(s_fast);
if (num_items == 0) {
Py_DECREF(s_fast);
- return PyList_Append(rval, empty_array);
+ return _PyAccu_Accumulate(acc, empty_array);
}
if (s->markers != Py_None) {
@@ -1623,7 +1698,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
}
seq_items = PySequence_Fast_ITEMS(s_fast);
- if (PyList_Append(rval, open_array))
+ if (_PyAccu_Accumulate(acc, open_array))
goto bail;
if (s->indent != Py_None) {
/* TODO: DOES NOT RUN */
@@ -1637,10 +1712,10 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
for (i = 0; i < num_items; i++) {
PyObject *obj = seq_items[i];
if (i) {
- if (PyList_Append(rval, s->item_separator))
+ if (_PyAccu_Accumulate(acc, s->item_separator))
goto bail;
}
- if (encoder_listencode_obj(s, rval, obj, indent_level))
+ if (encoder_listencode_obj(s, acc, obj, indent_level))
goto bail;
}
if (ident != NULL) {
@@ -1648,14 +1723,14 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
goto bail;
Py_CLEAR(ident);
}
+
+ /* TODO: DOES NOT RUN
if (s->indent != Py_None) {
- /* TODO: DOES NOT RUN */
indent_level -= 1;
- /*
- yield '\n' + (' ' * (_indent * _current_indent_level))
- */
- }
- if (PyList_Append(rval, close_array))
+
+ yield '\n' + (' ' * (_indent * _current_indent_level))
+ }*/
+ if (_PyAccu_Accumulate(acc, close_array))
goto bail;
Py_DECREF(s_fast);
return 0;