diff options
Diffstat (limited to 'Modules/_json.c')
-rw-r--r-- | Modules/_json.c | 439 |
1 files changed, 257 insertions, 182 deletions
diff --git a/Modules/_json.c b/Modules/_json.c index 0924873..40c2ced 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1,5 +1,7 @@ #include "Python.h" #include "structmember.h" +#include "accu.h" + #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif @@ -76,6 +78,21 @@ static PyMemberDef encoder_members[] = { }; static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *sep = NULL; + if (sep == NULL) { + sep = PyUnicode_FromStringAndSize("", 0); + if (sep == NULL) + return NULL; + } + return PyUnicode_Join(sep, lst); +} + +/* Forward decls */ + +static PyObject * ascii_escape_unicode(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); @@ -101,11 +118,11 @@ encoder_dealloc(PyObject *self); static int encoder_clear(PyObject *self); static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level); static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level); static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level); static PyObject * _encoded_const(PyObject *obj); static void @@ -122,13 +139,6 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj); #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) -#define MIN_EXPANSION 6 -#ifdef Py_UNICODE_WIDE -#define MAX_EXPANSION (2 * MIN_EXPANSION) -#else -#define MAX_EXPANSION MIN_EXPANSION -#endif - static int _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) { @@ -147,7 +157,7 @@ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) } static Py_ssize_t -ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars) +ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars) { /* Escape unicode code point c to ASCII escape sequences in char *output. output must have at least 12 bytes unused to @@ -162,25 +172,23 @@ ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars) case '\r': output[chars++] = 'r'; break; case '\t': output[chars++] = 't'; break; default: -#ifdef Py_UNICODE_WIDE if (c >= 0x10000) { /* UTF-16 surrogate pair */ - Py_UNICODE v = c - 0x10000; + Py_UCS4 v = c - 0x10000; c = 0xd800 | ((v >> 10) & 0x3ff); output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + output[chars++] = Py_hexdigits[(c >> 12) & 0xf]; + output[chars++] = Py_hexdigits[(c >> 8) & 0xf]; + output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; + output[chars++] = Py_hexdigits[(c ) & 0xf]; c = 0xdc00 | (v & 0x3ff); output[chars++] = '\\'; } -#endif output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + output[chars++] = Py_hexdigits[(c >> 12) & 0xf]; + output[chars++] = Py_hexdigits[(c >> 8) & 0xf]; + output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; + output[chars++] = Py_hexdigits[(c ) & 0xf]; } return chars; } @@ -192,54 +200,53 @@ ascii_escape_unicode(PyObject *pystr) Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; - Py_ssize_t max_output_size; Py_ssize_t chars; PyObject *rval; - Py_UNICODE *output; - Py_UNICODE *input_unicode; + void *input; + unsigned char *output; + int kind; + + if (PyUnicode_READY(pystr) == -1) + return NULL; - input_chars = PyUnicode_GET_SIZE(pystr); - input_unicode = PyUnicode_AS_UNICODE(pystr); + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - max_output_size = 2 + (input_chars * MAX_EXPANSION); - rval = PyUnicode_FromStringAndSize(NULL, output_size); + /* Compute the output size */ + for (i = 0, output_size = 2; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); + if (S_CHAR(c)) + output_size++; + else { + switch(c) { + case '\\': case '"': case '\b': case '\f': + case '\n': case '\r': case '\t': + output_size += 2; break; + default: + output_size += c >= 0x10000 ? 12 : 6; + } + } + } + + rval = PyUnicode_New(output_size, 127); if (rval == NULL) { return NULL; } - output = PyUnicode_AS_UNICODE(rval); + output = PyUnicode_1BYTE_DATA(rval); chars = 0; output[chars++] = '"'; for (i = 0; i < input_chars; i++) { - Py_UNICODE c = input_unicode[i]; + Py_UCS4 c = PyUnicode_READ(kind, input, i); if (S_CHAR(c)) { output[chars++] = c; } else { chars = ascii_escape_unichar(c, output, chars); } - if (output_size - chars < (1 + MAX_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - Py_ssize_t new_output_size = output_size * 2; - /* This is an upper bound */ - if (new_output_size > max_output_size) { - new_output_size = max_output_size; - } - /* Make sure that the output size changed before resizing */ - if (new_output_size != output_size) { - output_size = new_output_size; - if (PyUnicode_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyUnicode_AS_UNICODE(rval); - } - } } output[chars++] = '"'; - if (PyUnicode_Resize(&rval, chars) == -1) { - return NULL; - } + assert(_PyUnicode_CheckConsistency(rval, 1)); return rval; } @@ -267,19 +274,6 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) } static PyObject * -join_list_unicode(PyObject *lst) -{ - /* return u''.join(lst) */ - static PyObject *sep = NULL; - if (sep == NULL) { - sep = PyUnicode_FromStringAndSize("", 0); - if (sep == NULL) - return NULL; - } - return PyUnicode_Join(sep, lst); -} - -static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { /* return (rval, idx) tuple, stealing reference to rval */ PyObject *tpl; @@ -333,22 +327,30 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next Return value is a new PyUnicode */ PyObject *rval = NULL; - Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t len; Py_ssize_t begin = end - 1; - Py_ssize_t next = begin; - const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t next /* = begin */; + const void *buf; + int kind; PyObject *chunks = NULL; PyObject *chunk = NULL; + if (PyUnicode_READY(pystr) == -1) + return 0; + + len = PyUnicode_GET_LENGTH(pystr); + buf = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + if (end < 0 || len <= end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } while (1) { /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; + Py_UCS4 c = 0; for (next = end; next < len; next++) { - c = buf[next]; + c = PyUnicode_READ(kind, buf, next); if (c == '"' || c == '\\') { break; } @@ -364,7 +366,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next /* Pick up this chunk if it's not zero length */ if (next != end) { APPEND_OLD_CHUNK - chunk = PyUnicode_FromUnicode(&buf[end], next - end); + chunk = PyUnicode_FromKindAndData( + kind, + (char*)buf + kind * end, + next - end); if (chunk == NULL) { goto bail; } @@ -378,7 +383,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; } - c = buf[next]; + c = PyUnicode_READ(kind, buf, next); if (c != 'u') { /* Non-unicode backslash escapes */ end = next + 1; @@ -408,7 +413,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next } /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + Py_UCS4 digit = PyUnicode_READ(kind, buf, next); c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -425,22 +430,22 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next goto bail; } } -#ifdef Py_UNICODE_WIDE /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; + Py_UCS4 c2 = 0; if (end + 6 >= len) { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } - if (buf[next++] != '\\' || buf[next++] != 'u') { + if (PyUnicode_READ(kind, buf, next++) != '\\' || + PyUnicode_READ(kind, buf, next++) != 'u') { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } end += 6; /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + Py_UCS4 digit = PyUnicode_READ(kind, buf, next); c2 <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -467,10 +472,9 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next raise_errmsg("Unpaired low surrogate", pystr, end - 5); goto bail; } -#endif } APPEND_OLD_CHUNK - chunk = PyUnicode_FromUnicode(&c, 1); + chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1); if (chunk == NULL) { goto bail; } @@ -608,8 +612,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss Returns a new PyObject (usually a dict, but object_hook can change that) */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + void *str; + int kind; + Py_ssize_t end_idx; PyObject *val = NULL; PyObject *rval = NULL; PyObject *key = NULL; @@ -617,6 +622,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss int has_pairs_hook = (s->object_pairs_hook != Py_None); Py_ssize_t next_idx; + if (PyUnicode_READY(pystr) == -1) + return NULL; + + str = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + end_idx = PyUnicode_GET_LENGTH(pystr) - 1; + if (has_pairs_hook) rval = PyList_New(0); else @@ -625,15 +637,15 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss return NULL; /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++; /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { while (idx <= end_idx) { PyObject *memokey; /* read key */ - if (str[idx] != '"') { + if (PyUnicode_READ(kind, str, idx) != '"') { raise_errmsg("Expecting property name", pystr, idx); goto bail; } @@ -653,13 +665,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { raise_errmsg("Expecting : delimiter", pystr, idx); goto bail; } idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); @@ -687,26 +699,26 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss idx = next_idx; /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the object is closed or we didn't get the , delimiter */ if (idx > end_idx) break; - if (str[idx] == '}') { + if (PyUnicode_READ(kind, str, idx) == '}') { break; } - else if (str[idx] != ',') { + else if (PyUnicode_READ(kind, str, idx) != ',') { raise_errmsg("Expecting , delimiter", pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; } } /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { raise_errmsg("Expecting object", pystr, end_idx); goto bail; } @@ -742,19 +754,27 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi Returns a new PyList */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + void *str; + int kind; + Py_ssize_t end_idx; PyObject *val = NULL; PyObject *rval = PyList_New(0); Py_ssize_t next_idx; if (rval == NULL) return NULL; + if (PyUnicode_READY(pystr) == -1) + return NULL; + + str = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + end_idx = PyUnicode_GET_LENGTH(pystr) - 1; + /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { while (idx <= end_idx) { /* read any JSON term */ @@ -769,26 +789,26 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi idx = next_idx; /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the array is closed or we didn't get the , delimiter */ if (idx > end_idx) break; - if (str[idx] == ']') { + if (PyUnicode_READ(kind, str, idx) == ']') { break; } - else if (str[idx] != ',') { + else if (PyUnicode_READ(kind, str, idx) != ',') { raise_errmsg("Expecting , delimiter", pystr, idx); goto bail; } idx++; /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; } } - /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { + /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */ + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { raise_errmsg("Expecting object", pystr, end_idx); goto bail; } @@ -820,7 +840,7 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * /* rval = parse_constant(constant) */ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyUnicode_GET_SIZE(cstr); + idx += PyUnicode_GET_LENGTH(cstr); Py_DECREF(cstr); *next_idx_ptr = idx; return rval; @@ -837,15 +857,24 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ PyInt, PyLong, or PyFloat. May return other types if parse_int or parse_float are set */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + void *str; + int kind; + Py_ssize_t end_idx; Py_ssize_t idx = start; int is_float = 0; PyObject *rval; - PyObject *numstr; + PyObject *numstr = NULL; + PyObject *custom_func; + + if (PyUnicode_READY(pystr) == -1) + return NULL; + + str = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + end_idx = PyUnicode_GET_LENGTH(pystr) - 1; /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { + if (PyUnicode_READ(kind, str, idx) == '-') { idx++; if (idx > end_idx) { PyErr_SetNone(PyExc_StopIteration); @@ -854,12 +883,12 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { + if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') { idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++; } /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { + else if (PyUnicode_READ(kind, str, idx) == '0') { idx++; } /* no integer digits, error */ @@ -869,25 +898,25 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') { is_float = 1; idx += 2; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++; } /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) { Py_ssize_t e_start = idx; idx++; /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++; /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++; /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') { is_float = 1; } else { @@ -895,22 +924,39 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } } - /* copy the section we determined to be a number */ - numstr = PyUnicode_FromUnicode(&str[start], idx - start); - if (numstr == NULL) - return NULL; - if (is_float) { - /* parse as a float using a fast path if available, otherwise call user defined method */ - if (s->parse_float != (PyObject *)&PyFloat_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); - } - else { - rval = PyFloat_FromString(numstr); - } + if (is_float && s->parse_float != (PyObject *)&PyFloat_Type) + custom_func = s->parse_float; + else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type) + custom_func = s->parse_int; + else + custom_func = NULL; + + if (custom_func) { + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromKindAndData(kind, + (char*)str + kind * start, + idx - start); + if (numstr == NULL) + return NULL; + rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL); } else { - /* no fast path for unicode -> int, just call */ - rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + Py_ssize_t i, n; + char *buf; + /* Straight conversion to ASCII, to avoid costly conversion of + decimal unicode digits (which cannot appear here) */ + n = idx - start; + numstr = PyBytes_FromStringAndSize(NULL, n); + if (numstr == NULL) + return NULL; + buf = PyBytes_AS_STRING(numstr); + for (i = 0; i < n; i++) { + buf[i] = (char) PyUnicode_READ(kind, str, i + start); + } + if (is_float) + rval = PyFloat_FromString(numstr); + else + rval = PyLong_FromString(buf, NULL, 10); } Py_DECREF(numstr); *next_idx_ptr = idx; @@ -928,13 +974,23 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ Returns a new PyObject representation of the term. */ PyObject *res; - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + void *str; + int kind; + Py_ssize_t length; + + if (PyUnicode_READY(pystr) == -1) + return NULL; + + str = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + length = PyUnicode_GET_LENGTH(pystr); + if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } - switch (str[idx]) { + + switch (PyUnicode_READ(kind, str, idx)) { case '"': /* string */ return scanstring_unicode(pystr, idx + 1, @@ -958,7 +1014,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ return res; case 'n': /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; return Py_None; @@ -966,7 +1022,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 't': /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; return Py_True; @@ -974,7 +1030,10 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'f': /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 's' && + PyUnicode_READ(kind, str, idx + 4) == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; return Py_False; @@ -982,19 +1041,33 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'N': /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'N') { return _parse_constant(s, "NaN", idx, next_idx_ptr); } break; case 'I': /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' && + PyUnicode_READ(kind, str, idx + 2) == 'f' && + PyUnicode_READ(kind, str, idx + 3) == 'i' && + PyUnicode_READ(kind, str, idx + 4) == 'n' && + PyUnicode_READ(kind, str, idx + 5) == 'i' && + PyUnicode_READ(kind, str, idx + 6) == 't' && + PyUnicode_READ(kind, str, idx + 7) == 'y') { return _parse_constant(s, "Infinity", idx, next_idx_ptr); } break; case '-': /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' && + PyUnicode_READ(kind, str, idx + 2) == 'n' && + PyUnicode_READ(kind, str, idx + 3) == 'f' && + PyUnicode_READ(kind, str, idx + 4) == 'i' && + PyUnicode_READ(kind, str, idx + 5) == 'n' && + PyUnicode_READ(kind, str, idx + 6) == 'i' && + PyUnicode_READ(kind, str, idx + 7) == 't' && + PyUnicode_READ(kind, str, idx + 8) == 'y') { return _parse_constant(s, "-Infinity", idx, next_idx_ptr); } break; @@ -1210,22 +1283,22 @@ encoder_call(PyObject *self, PyObject *args, PyObject *kwds) /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; - PyObject *rval; Py_ssize_t indent_level; PyEncoderObject *s; + _PyAccu acc; + assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, &obj, _convertPyInt_AsSsize_t, &indent_level)) return NULL; - rval = PyList_New(0); - if (rval == NULL) + if (_PyAccu_Init(&acc)) return NULL; - if (encoder_listencode_obj(s, rval, obj, indent_level)) { - Py_DECREF(rval); + if (encoder_listencode_obj(s, &acc, obj, indent_level)) { + _PyAccu_Destroy(&acc); return NULL; } - return rval; + return _PyAccu_FinishAsList(&acc); } static PyObject * @@ -1297,18 +1370,19 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_list_append(PyObject *lst, PyObject *stolen) +_steal_accumulate(_PyAccu *acc, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = PyList_Append(lst, stolen); + int rval = _PyAccu_Accumulate(acc, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, + PyObject *obj, Py_ssize_t indent_level) { - /* Encode Python object obj to a JSON term, rval is a PyList */ + /* Encode Python object obj to a JSON term */ PyObject *newobj; int rv; @@ -1316,38 +1390,38 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi PyObject *cstr = _encoded_const(obj); if (cstr == NULL) return -1; - return _steal_list_append(rval, cstr); + return _steal_accumulate(acc, cstr); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); if (encoded == NULL) return -1; - return _steal_list_append(rval, encoded); + return _steal_accumulate(acc, encoded); } else if (PyLong_Check(obj)) { PyObject *encoded = PyObject_Str(obj); if (encoded == NULL) return -1; - return _steal_list_append(rval, encoded); + return _steal_accumulate(acc, encoded); } else if (PyFloat_Check(obj)) { PyObject *encoded = encoder_encode_float(s, obj); if (encoded == NULL) return -1; - return _steal_list_append(rval, encoded); + return _steal_accumulate(acc, encoded); } else if (PyList_Check(obj) || PyTuple_Check(obj)) { if (Py_EnterRecursiveCall(" while encoding a JSON object")) return -1; - rv = encoder_listencode_list(s, rval, obj, indent_level); + rv = encoder_listencode_list(s, acc, obj, indent_level); Py_LeaveRecursiveCall(); return rv; } else if (PyDict_Check(obj)) { if (Py_EnterRecursiveCall(" while encoding a JSON object")) return -1; - rv = encoder_listencode_dict(s, rval, obj, indent_level); + rv = encoder_listencode_dict(s, acc, obj, indent_level); Py_LeaveRecursiveCall(); return rv; } @@ -1378,7 +1452,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi if (Py_EnterRecursiveCall(" while encoding a JSON object")) return -1; - rv = encoder_listencode_obj(s, rval, newobj, indent_level); + rv = encoder_listencode_obj(s, acc, newobj, indent_level); Py_LeaveRecursiveCall(); Py_DECREF(newobj); @@ -1398,9 +1472,10 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi } static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, + PyObject *dct, Py_ssize_t indent_level) { - /* Encode Python dict dct a JSON term, rval is a PyList */ + /* Encode Python dict dct a JSON term */ static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; @@ -1420,7 +1495,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss return -1; } if (Py_SIZE(dct) == 0) - return PyList_Append(rval, empty_dict); + return _PyAccu_Accumulate(acc, empty_dict); if (s->markers != Py_None) { int has_key; @@ -1438,7 +1513,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss } } - if (PyList_Append(rval, open_dict)) + if (_PyAccu_Accumulate(acc, open_dict)) goto bail; if (s->indent != Py_None) { @@ -1525,7 +1600,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss } if (idx) { - if (PyList_Append(rval, s->item_separator)) + if (_PyAccu_Accumulate(acc, s->item_separator)) goto bail; } @@ -1533,16 +1608,16 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss Py_CLEAR(kstr); if (encoded == NULL) goto bail; - if (PyList_Append(rval, encoded)) { + if (_PyAccu_Accumulate(acc, encoded)) { Py_DECREF(encoded); goto bail; } Py_DECREF(encoded); - if (PyList_Append(rval, s->key_separator)) + if (_PyAccu_Accumulate(acc, s->key_separator)) goto bail; value = PyTuple_GET_ITEM(item, 1); - if (encoder_listencode_obj(s, rval, value, indent_level)) + if (encoder_listencode_obj(s, acc, value, indent_level)) goto bail; idx += 1; Py_DECREF(item); @@ -1556,14 +1631,13 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss goto bail; Py_CLEAR(ident); } + /* TODO DOES NOT RUN; dead code if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ indent_level -= 1; - /* - yield '\n' + (' ' * (_indent * _current_indent_level)) - */ - } - if (PyList_Append(rval, close_dict)) + + yield '\n' + (' ' * (_indent * _current_indent_level)) + }*/ + if (_PyAccu_Accumulate(acc, close_dict)) goto bail; return 0; @@ -1577,9 +1651,10 @@ bail: static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, + PyObject *seq, Py_ssize_t indent_level) { - /* Encode Python list seq to a JSON term, rval is a PyList */ + /* Encode Python list seq to a JSON term */ static PyObject *open_array = NULL; static PyObject *close_array = NULL; static PyObject *empty_array = NULL; @@ -1603,7 +1678,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss num_items = PySequence_Fast_GET_SIZE(s_fast); if (num_items == 0) { Py_DECREF(s_fast); - return PyList_Append(rval, empty_array); + return _PyAccu_Accumulate(acc, empty_array); } if (s->markers != Py_None) { @@ -1623,7 +1698,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss } seq_items = PySequence_Fast_ITEMS(s_fast); - if (PyList_Append(rval, open_array)) + if (_PyAccu_Accumulate(acc, open_array)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ @@ -1637,10 +1712,10 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss for (i = 0; i < num_items; i++) { PyObject *obj = seq_items[i]; if (i) { - if (PyList_Append(rval, s->item_separator)) + if (_PyAccu_Accumulate(acc, s->item_separator)) goto bail; } - if (encoder_listencode_obj(s, rval, obj, indent_level)) + if (encoder_listencode_obj(s, acc, obj, indent_level)) goto bail; } if (ident != NULL) { @@ -1648,14 +1723,14 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss goto bail; Py_CLEAR(ident); } + + /* TODO: DOES NOT RUN if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ indent_level -= 1; - /* - yield '\n' + (' ' * (_indent * _current_indent_level)) - */ - } - if (PyList_Append(rval, close_array)) + + yield '\n' + (' ' * (_indent * _current_indent_level)) + }*/ + if (_PyAccu_Accumulate(acc, close_array)) goto bail; Py_DECREF(s_fast); return 0; |