diff options
Diffstat (limited to 'Modules/_json.c')
| -rw-r--r-- | Modules/_json.c | 1072 |
1 files changed, 227 insertions, 845 deletions
diff --git a/Modules/_json.c b/Modules/_json.c index 13f5816..01436b6 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -20,8 +20,6 @@ typedef int Py_ssize_t; #define UNUSED #endif -#define DEFAULT_ENCODING "utf-8" - #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) @@ -32,20 +30,19 @@ static PyTypeObject PyEncoderType; typedef struct _PyScannerObject { PyObject_HEAD - PyObject *encoding; PyObject *strict; PyObject *object_hook; - PyObject *pairs_hook; + PyObject *object_pairs_hook; PyObject *parse_float; PyObject *parse_int; PyObject *parse_constant; + PyObject *memo; } PyScannerObject; static PyMemberDef scanner_members[] = { - {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, - {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, + {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY}, {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, @@ -78,18 +75,12 @@ static PyMemberDef encoder_members[] = { {NULL} }; -static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); static PyObject * ascii_escape_unicode(PyObject *pystr); static PyObject * -ascii_escape_str(PyObject *pystr); -static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); void init_json(void); static PyObject * -scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); -static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); @@ -142,7 +133,7 @@ static int _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) { /* PyObject to Py_ssize_t converter */ - *size_ptr = PyInt_AsSsize_t(o); + *size_ptr = PyLong_AsSsize_t(o); if (*size_ptr == -1 && PyErr_Occurred()) return 0; return 1; @@ -152,19 +143,19 @@ static PyObject * _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) { /* Py_ssize_t to PyObject converter */ - return PyInt_FromSsize_t(*size_ptr); + return PyLong_FromSsize_t(*size_ptr); } static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars) { /* Escape unicode code point c to ASCII escape sequences in char *output. output must have at least 12 bytes unused to accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ output[chars++] = '\\'; switch (c) { - case '\\': output[chars++] = (char)c; break; - case '"': output[chars++] = (char)c; break; + case '\\': output[chars++] = c; break; + case '"': output[chars++] = c; break; case '\b': output[chars++] = 'b'; break; case '\f': output[chars++] = 'f'; break; case '\n': output[chars++] = 'n'; break; @@ -197,14 +188,14 @@ ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) static PyObject * ascii_escape_unicode(PyObject *pystr) { - /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */ Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; Py_ssize_t max_output_size; Py_ssize_t chars; PyObject *rval; - char *output; + Py_UNICODE *output; Py_UNICODE *input_unicode; input_chars = PyUnicode_GET_SIZE(pystr); @@ -213,20 +204,20 @@ ascii_escape_unicode(PyObject *pystr) /* One char input can be up to 6 chars output, estimate 4 of these */ output_size = 2 + (MIN_EXPANSION * 4) + input_chars; max_output_size = 2 + (input_chars * MAX_EXPANSION); - rval = PyString_FromStringAndSize(NULL, output_size); + rval = PyUnicode_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } - output = PyString_AS_STRING(rval); + output = PyUnicode_AS_UNICODE(rval); chars = 0; output[chars++] = '"'; for (i = 0; i < input_chars; i++) { Py_UNICODE c = input_unicode[i]; if (S_CHAR(c)) { - output[chars++] = (char)c; + output[chars++] = c; } else { - chars = ascii_escape_char(c, output, chars); + chars = ascii_escape_unichar(c, output, chars); } if (output_size - chars < (1 + MAX_EXPANSION)) { /* There's more than four, so let's resize by a lot */ @@ -238,101 +229,15 @@ ascii_escape_unicode(PyObject *pystr) /* Make sure that the output size changed before resizing */ if (new_output_size != output_size) { output_size = new_output_size; - if (_PyString_Resize(&rval, output_size) == -1) { + if (PyUnicode_Resize(&rval, output_size) == -1) { return NULL; } - output = PyString_AS_STRING(rval); - } - } - } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; -} - -static PyObject * -ascii_escape_str(PyObject *pystr) -{ - /* Take a PyString pystr and return a new ASCII-only escaped PyString */ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t output_size; - Py_ssize_t chars; - PyObject *rval; - char *output; - char *input_str; - - input_chars = PyString_GET_SIZE(pystr); - input_str = PyString_AS_STRING(pystr); - - /* Fast path for a string that's already ASCII */ - for (i = 0; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (!S_CHAR(c)) { - /* If we have to escape something, scan the string for unicode */ - Py_ssize_t j; - for (j = i; j < input_chars; j++) { - c = (Py_UNICODE)(unsigned char)input_str[j]; - if (c > 0x7f) { - /* We hit a non-ASCII character, bail to unicode mode */ - PyObject *uni; - uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); - if (uni == NULL) { - return NULL; - } - rval = ascii_escape_unicode(uni); - Py_DECREF(uni); - return rval; - } - } - break; - } - } - - if (i == input_chars) { - /* Input is already ASCII */ - output_size = 2 + input_chars; - } - else { - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - } - rval = PyString_FromStringAndSize(NULL, output_size); - if (rval == NULL) { - return NULL; - } - output = PyString_AS_STRING(rval); - output[0] = '"'; - - /* We know that everything up to i is ASCII already */ - chars = i + 1; - memcpy(&output[1], input_str, i); - - for (; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - /* An ASCII char can't possibly expand to a surrogate! */ - if (output_size - chars < (1 + MIN_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - output_size *= 2; - if (output_size > 2 + (input_chars * MIN_EXPANSION)) { - output_size = 2 + (input_chars * MIN_EXPANSION); - } - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; + output = PyUnicode_AS_UNICODE(rval); } - output = PyString_AS_STRING(rval); } } output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { + if (PyUnicode_Resize(&rval, chars) == -1) { return NULL; } return rval; @@ -365,18 +270,13 @@ static PyObject * join_list_unicode(PyObject *lst) { /* return u''.join(lst) */ - static PyObject *joinfn = NULL; - if (joinfn == NULL) { - PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); - if (joinfn == NULL) + static PyObject *sep = NULL; + if (sep == NULL) { + sep = PyUnicode_FromStringAndSize("", 0); + if (sep == NULL) return NULL; } - return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); + return PyUnicode_Join(sep, lst); } static PyObject * @@ -390,7 +290,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { if (rval == NULL) { return NULL; } - pyidx = PyInt_FromSsize_t(idx); + pyidx = PyLong_FromSsize_t(idx); if (pyidx == NULL) { Py_DECREF(rval); return NULL; @@ -406,190 +306,20 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { return tpl; } -static PyObject * -scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) -{ - /* Read the JSON string from PyString pystr. - end is the index of the first character after the quote. - encoding is the encoding of pystr (must be an ASCII superset) - if strict is zero then literal control characters are allowed - *next_end_ptr is a return-by-reference index of the character - after the end quote - - Return value is a new PyString (if ASCII-only) or PyUnicode - */ - PyObject *rval; - Py_ssize_t len = PyString_GET_SIZE(pystr); - Py_ssize_t begin = end - 1; - Py_ssize_t next; - char *buf = PyString_AS_STRING(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { - goto bail; - } - if (end < 0 || len <= end) { - PyErr_SetString(PyExc_ValueError, "end is out of bounds"); - goto bail; - } - while (1) { - /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; - PyObject *chunk = NULL; - for (next = end; next < len; next++) { - c = (unsigned char)buf[next]; - if (c == '"' || c == '\\') { - break; - } - else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); - goto bail; - } - } - if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - /* Pick up this chunk if it's not zero length */ - if (next != end) { - PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); - if (strchunk == NULL) { - goto bail; - } - chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); - Py_DECREF(strchunk); - if (chunk == NULL) { - goto bail; - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - next++; - if (c == '"') { - end = next; - break; - } - if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - c = buf[next]; - if (c != 'u') { - /* Non-unicode backslash escapes */ - end = next + 1; - switch (c) { - case '"': break; - case '\\': break; - case '/': break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - default: c = 0; - } - if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); - goto bail; - } - } - else { - c = 0; - next++; - end = next + 4; - if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); - goto bail; - } - /* Decode 4 hex digits */ - for (; next < end; next++) { - Py_UNICODE digit = buf[next]; - c <<= 4; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } -#ifdef Py_UNICODE_WIDE - /* Surrogate pair */ - if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - Py_UNICODE digit = buf[next]; - c2 <<= 4; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c2 |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c2 |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c2 |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif - } - chunk = PyUnicode_FromUnicode(&c, 1); - if (chunk == NULL) { - goto bail; - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - - rval = join_list_unicode(chunks); - if (rval == NULL) { - goto bail; +#define APPEND_OLD_CHUNK \ + if (chunk != NULL) { \ + if (chunks == NULL) { \ + chunks = PyList_New(0); \ + if (chunks == NULL) { \ + goto bail; \ + } \ + } \ + if (PyList_Append(chunks, chunk)) { \ + Py_DECREF(chunk); \ + goto bail; \ + } \ + Py_CLEAR(chunk); \ } - Py_CLEAR(chunks); - *next_end_ptr = end; - return rval; -bail: - *next_end_ptr = -1; - Py_XDECREF(chunks); - return NULL; -} - static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) @@ -602,15 +332,14 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next Return value is a new PyUnicode */ - PyObject *rval; + PyObject *rval = NULL; Py_ssize_t len = PyUnicode_GET_SIZE(pystr); Py_ssize_t begin = end - 1; - Py_ssize_t next; + Py_ssize_t next = begin; const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { - goto bail; - } + PyObject *chunks = NULL; + PyObject *chunk = NULL; + if (end < 0 || len <= end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; @@ -618,7 +347,6 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next while (1) { /* Find the end of the string or the next escape */ Py_UNICODE c = 0; - PyObject *chunk = NULL; for (next = end; next < len; next++) { c = buf[next]; if (c == '"' || c == '\\') { @@ -635,15 +363,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next } /* Pick up this chunk if it's not zero length */ if (next != end) { + APPEND_OLD_CHUNK chunk = PyUnicode_FromUnicode(&buf[end], next - end); if (chunk == NULL) { goto bail; } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); } next++; if (c == '"') { @@ -745,32 +469,39 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next } #endif } + APPEND_OLD_CHUNK chunk = PyUnicode_FromUnicode(&c, 1); if (chunk == NULL) { goto bail; } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); + } + + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = PyUnicode_FromStringAndSize("", 0); + } + else { + APPEND_OLD_CHUNK + rval = join_list_unicode(chunks); + if (rval == NULL) { goto bail; } - Py_DECREF(chunk); + Py_CLEAR(chunks); } - rval = join_list_unicode(chunks); - if (rval == NULL) { - goto bail; - } - Py_DECREF(chunks); *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; Py_XDECREF(chunks); + Py_XDECREF(chunk); return NULL; } PyDoc_STRVAR(pydoc_scanstring, - "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "scanstring(string, end, strict=True) -> (string, end)\n" "\n" "Scan the string s for a JSON string. End is the index of the\n" "character in s after the quote that started the JSON string.\n" @@ -789,18 +520,11 @@ py_scanstring(PyObject* self UNUSED, PyObject *args) PyObject *rval; Py_ssize_t end; Py_ssize_t next_end = -1; - char *encoding = NULL; int strict = 1; - if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + if (!PyArg_ParseTuple(args, "OO&|i:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &strict)) { return NULL; } - if (encoding == NULL) { - encoding = DEFAULT_ENCODING; - } - if (PyString_Check(pystr)) { - rval = scanstring_str(pystr, end, encoding, strict, &next_end); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scanstring_unicode(pystr, end, strict, &next_end); } else { @@ -813,7 +537,7 @@ py_scanstring(PyObject* self UNUSED, PyObject *args) } PyDoc_STRVAR(pydoc_encode_basestring_ascii, - "encode_basestring_ascii(basestring) -> str\n" + "encode_basestring_ascii(string) -> string\n" "\n" "Return an ASCII-only JSON representation of a Python string" ); @@ -821,13 +545,11 @@ PyDoc_STRVAR(pydoc_encode_basestring_ascii, static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) { + PyObject *rval; /* Return an ASCII-only JSON representation of a Python string */ /* METH_O */ - if (PyString_Check(pystr)) { - return ascii_escape_str(pystr); - } - else if (PyUnicode_Check(pystr)) { - return ascii_escape_unicode(pystr); + if (PyUnicode_Check(pystr)) { + rval = ascii_escape_unicode(pystr); } else { PyErr_Format(PyExc_TypeError, @@ -835,6 +557,7 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) Py_TYPE(pystr)->tp_name); return NULL; } + return rval; } static void @@ -851,10 +574,9 @@ scanner_traverse(PyObject *self, visitproc visit, void *arg) PyScannerObject *s; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; - Py_VISIT(s->encoding); Py_VISIT(s->strict); Py_VISIT(s->object_hook); - Py_VISIT(s->pairs_hook); + Py_VISIT(s->object_pairs_hook); Py_VISIT(s->parse_float); Py_VISIT(s->parse_int); Py_VISIT(s->parse_constant); @@ -867,141 +589,17 @@ scanner_clear(PyObject *self) PyScannerObject *s; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; - Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); - Py_CLEAR(s->pairs_hook); + Py_CLEAR(s->object_pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); + Py_CLEAR(s->memo); return 0; } static PyObject * -_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON object from PyString pystr. - idx is the index of the first character after the opening curly brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing curly brace. - - Returns a new PyObject (usually a dict, but object_hook can change that) - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *rval; - PyObject *pairs; - PyObject *item; - PyObject *key = NULL; - PyObject *val = NULL; - char *encoding = PyString_AS_STRING(s->encoding); - int strict = PyObject_IsTrue(s->strict); - Py_ssize_t next_idx; - - pairs = PyList_New(0); - if (pairs == NULL) - return NULL; - - /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { - while (idx <= end_idx) { - /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); - goto bail; - } - key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); - if (key == NULL) - goto bail; - idx = next_idx; - - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); - goto bail; - } - idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* read any JSON data type */ - val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - item = PyTuple_Pack(2, key, val); - if (item == NULL) - goto bail; - Py_CLEAR(key); - Py_CLEAR(val); - if (PyList_Append(pairs, item) == -1) { - Py_DECREF(item); - goto bail; - } - Py_DECREF(item); - idx = next_idx; - - /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the object is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == '}') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - - /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ - if (s->pairs_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); - if (val == NULL) - goto bail; - Py_DECREF(pairs); - *next_idx_ptr = idx + 1; - return val; - } - - rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), - pairs, NULL); - if (rval == NULL) - goto bail; - Py_CLEAR(pairs); - - /* if object_hook is not None: rval = object_hook(rval) */ - if (s->object_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); - if (val == NULL) - goto bail; - Py_DECREF(rval); - rval = val; - val = NULL; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(key); - Py_XDECREF(val); - Py_XDECREF(pairs); - return NULL; -} - -static PyObject * _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON object from PyUnicode pystr. idx is the index of the first character after the opening curly brace. @@ -1012,16 +610,18 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss */ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *rval; - PyObject *pairs; - PyObject *item; - PyObject *key = NULL; PyObject *val = NULL; + PyObject *rval = NULL; + PyObject *key = NULL; int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->object_pairs_hook != Py_None); Py_ssize_t next_idx; - pairs = PyList_New(0); - if (pairs == NULL) + if (has_pairs_hook) + rval = PyList_New(0); + else + rval = PyDict_New(); + if (rval == NULL) return NULL; /* skip whitespace after { */ @@ -1030,6 +630,8 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss /* only loop if the object is non-empty */ if (idx <= end_idx && str[idx] != '}') { while (idx <= end_idx) { + PyObject *memokey; + /* read key */ if (str[idx] != '"') { raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx); @@ -1038,6 +640,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); if (key == NULL) goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ @@ -1054,16 +666,24 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss if (val == NULL) goto bail; - item = PyTuple_Pack(2, key, val); - if (item == NULL) - goto bail; - Py_CLEAR(key); - Py_CLEAR(val); - if (PyList_Append(pairs, item) == -1) { + if (has_pairs_hook) { + PyObject *item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(rval, item) == -1) { + Py_DECREF(item); + goto bail; + } Py_DECREF(item); - goto bail; } - Py_DECREF(item); + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } idx = next_idx; /* skip whitespace before } or , */ @@ -1091,104 +711,25 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss goto bail; } - /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ - if (s->pairs_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); - if (val == NULL) - goto bail; - Py_DECREF(pairs); - *next_idx_ptr = idx + 1; + *next_idx_ptr = idx + 1; + + if (has_pairs_hook) { + val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL); + Py_DECREF(rval); return val; } - rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), - pairs, NULL); - if (rval == NULL) - goto bail; - Py_CLEAR(pairs); - /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); - if (val == NULL) - goto bail; Py_DECREF(rval); - rval = val; - val = NULL; + return val; } - *next_idx_ptr = idx + 1; return rval; bail: Py_XDECREF(key); Py_XDECREF(val); - Py_XDECREF(pairs); - return NULL; -} - -static PyObject * -_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON array from PyString pystr. - idx is the index of the first character after the opening brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing brace. - - Returns a new PyList - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyList_New(0); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { - while (idx <= end_idx) { - - /* read any JSON term and de-tuplefy the (rval, idx) */ - val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyList_Append(rval, val) == -1) - goto bail; - - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the array is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == ']') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(val); - Py_DECREF(rval); + Py_XDECREF(rval); return NULL; } @@ -1273,121 +814,19 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * PyObject *cstr; PyObject *rval; /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = PyString_InternFromString(constant); + cstr = PyUnicode_InternFromString(constant); if (cstr == NULL) return NULL; /* rval = parse_constant(constant) */ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyString_GET_SIZE(cstr); + idx += PyUnicode_GET_SIZE(cstr); Py_DECREF(cstr); *next_idx_ptr = idx; return rval; } static PyObject * -_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { - /* Read a JSON number from PyString pystr. - idx is the index of the first character of the number - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of that number: - PyInt, PyLong, or PyFloat. - May return other types if parse_int or parse_float are set - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - Py_ssize_t idx = start; - int is_float = 0; - PyObject *rval; - PyObject *numstr; - - /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - } - - /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { - idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { - idx++; - } - /* no integer digits, error */ - else { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { - is_float = 1; - idx += 2; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - - /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { - - /* save the index of the 'e' or 'E' just in case we need to backtrack */ - Py_ssize_t e_start = idx; - idx++; - - /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; - - /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - - /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { - is_float = 1; - } - else { - idx = e_start; - } - } - - /* copy the section we determined to be a number */ - numstr = PyString_FromStringAndSize(&str[start], idx - start); - if (numstr == NULL) - return NULL; - if (is_float) { - /* parse as a float using a fast path if available, otherwise call user defined method */ - if (s->parse_float != (PyObject *)&PyFloat_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); - } - else { - double d = PyOS_string_to_double(PyString_AS_STRING(numstr), - NULL, NULL); - if (d == -1.0 && PyErr_Occurred()) - return NULL; - rval = PyFloat_FromDouble(d); - } - } - else { - /* parse as an int using a fast path if available, otherwise call user defined method */ - if (s->parse_int != (PyObject *)&PyInt_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); - } - else { - rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); - } - } - Py_DECREF(numstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { /* Read a JSON number from PyUnicode pystr. idx is the index of the first character of the number @@ -1466,7 +905,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { - rval = PyFloat_FromString(numstr, NULL); + rval = PyFloat_FromString(numstr); } } else { @@ -1479,93 +918,6 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } static PyObject * -scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) -{ - /* Read one JSON term (of any kind) from PyString pystr. - idx is the index of the first character of the term - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of the term. - */ - PyObject *res; - char *str = PyString_AS_STRING(pystr); - Py_ssize_t length = PyString_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - switch (str[idx]) { - case '"': - /* string */ - return scanstring_str(pystr, idx + 1, - PyString_AS_STRING(s->encoding), - PyObject_IsTrue(s->strict), - next_idx_ptr); - case '{': - /* object */ - if (Py_EnterRecursiveCall(" while decoding a JSON object " - "from a byte string")) - return NULL; - res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); - Py_LeaveRecursiveCall(); - return res; - case '[': - /* array */ - if (Py_EnterRecursiveCall(" while decoding a JSON array " - "from a byte string")) - return NULL; - res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); - Py_LeaveRecursiveCall(); - return res; - case 'n': - /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { - Py_INCREF(Py_None); - *next_idx_ptr = idx + 4; - return Py_None; - } - break; - case 't': - /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { - Py_INCREF(Py_True); - *next_idx_ptr = idx + 4; - return Py_True; - } - break; - case 'f': - /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { - Py_INCREF(Py_False); - *next_idx_ptr = idx + 5; - return Py_False; - } - break; - case 'N': - /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); - } - break; - case 'I': - /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); - } - break; - case '-': - /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); - } - break; - } - /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_str(s, pystr, idx, next_idx_ptr); -} - -static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read one JSON term (of any kind) from PyUnicode pystr. @@ -1666,10 +1018,7 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) return NULL; - if (PyString_Check(pystr)) { - rval = scan_once_str(s, pystr, idx, &next_idx); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scan_once_unicode(s, pystr, idx, &next_idx); } else { @@ -1678,6 +1027,9 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) Py_TYPE(pystr)->tp_name); return NULL; } + PyDict_Clear(s->memo); + if (rval == NULL) + return NULL; return _build_rval_index_tuple(rval, next_idx); } @@ -1687,10 +1039,9 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) PyScannerObject *s; s = (PyScannerObject *)type->tp_alloc(type, 0); if (s != NULL) { - s->encoding = NULL; s->strict = NULL; s->object_hook = NULL; - s->pairs_hook = NULL; + s->object_pairs_hook = NULL; s->parse_float = NULL; s->parse_int = NULL; s->parse_constant = NULL; @@ -1712,28 +1063,11 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) return -1; - /* PyString_AS_STRING is used on encoding */ - s->encoding = PyObject_GetAttrString(ctx, "encoding"); - if (s->encoding == NULL) - goto bail; - if (s->encoding == Py_None) { - Py_DECREF(Py_None); - s->encoding = PyString_InternFromString(DEFAULT_ENCODING); - } - else if (PyUnicode_Check(s->encoding)) { - PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); - Py_DECREF(s->encoding); - s->encoding = tmp; - } - if (s->encoding == NULL) - goto bail; - if (!PyString_Check(s->encoding)) { - PyErr_Format(PyExc_TypeError, - "encoding must be a string, not %.80s", - Py_TYPE(s->encoding)->tp_name); - goto bail; + if (s->memo == NULL) { + s->memo = PyDict_New(); + if (s->memo == NULL) + goto bail; } - /* All of these will fail "gracefully" so we don't need to verify them */ s->strict = PyObject_GetAttrString(ctx, "strict"); @@ -1742,8 +1076,8 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); if (s->object_hook == NULL) goto bail; - s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); - if (s->pairs_hook == NULL) + s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); + if (s->object_pairs_hook == NULL) goto bail; s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); if (s->parse_float == NULL) @@ -1758,10 +1092,9 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) return 0; bail: - Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); - Py_CLEAR(s->pairs_hook); + Py_CLEAR(s->object_pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); @@ -1772,8 +1105,7 @@ PyDoc_STRVAR(scanner_doc, "JSON scanner object"); static PyTypeObject PyScannerType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "_json.Scanner", /* tp_name */ sizeof(PyScannerObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -1903,7 +1235,7 @@ _encoded_const(PyObject *obj) if (obj == Py_None) { static PyObject *s_null = NULL; if (s_null == NULL) { - s_null = PyString_InternFromString("null"); + s_null = PyUnicode_InternFromString("null"); } Py_INCREF(s_null); return s_null; @@ -1911,7 +1243,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_True) { static PyObject *s_true = NULL; if (s_true == NULL) { - s_true = PyString_InternFromString("true"); + s_true = PyUnicode_InternFromString("true"); } Py_INCREF(s_true); return s_true; @@ -1919,7 +1251,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_False) { static PyObject *s_false = NULL; if (s_false == NULL) { - s_false = PyString_InternFromString("false"); + s_false = PyUnicode_InternFromString("false"); } Py_INCREF(s_false); return s_false; @@ -1941,13 +1273,13 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) return NULL; } if (i > 0) { - return PyString_FromString("Infinity"); + return PyUnicode_FromString("Infinity"); } else if (i < 0) { - return PyString_FromString("-Infinity"); + return PyUnicode_FromString("-Infinity"); } else { - return PyString_FromString("NaN"); + return PyUnicode_FromString("NaN"); } } /* Use a better float format here? */ @@ -1986,14 +1318,14 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi return -1; return _steal_list_append(rval, cstr); } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) + else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); if (encoded == NULL) return -1; return _steal_list_append(rval, encoded); } - else if (PyInt_Check(obj) || PyLong_Check(obj)) { + else if (PyLong_Check(obj)) { PyObject *encoded = PyObject_Str(obj); if (encoded == NULL) return -1; @@ -2074,16 +1406,16 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss static PyObject *empty_dict = NULL; PyObject *kstr = NULL; PyObject *ident = NULL; - PyObject *key = NULL; - PyObject *value = NULL; PyObject *it = NULL; + PyObject *items; + PyObject *item = NULL; int skipkeys; Py_ssize_t idx; if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { - open_dict = PyString_InternFromString("{"); - close_dict = PyString_InternFromString("}"); - empty_dict = PyString_InternFromString("{}"); + open_dict = PyUnicode_InternFromString("{"); + close_dict = PyUnicode_InternFromString("}"); + empty_dict = PyUnicode_InternFromString("{}"); if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) return -1; } @@ -2119,17 +1451,49 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss */ } - /* TODO: C speedup not implemented for sort_keys */ - - it = PyObject_GetIter(dct); + if (PyObject_IsTrue(s->sort_keys)) { + /* First sort the keys then replace them with (key, value) tuples. */ + Py_ssize_t i, nitems; + items = PyMapping_Keys(dct); + if (items == NULL) + goto bail; + if (!PyList_Check(items)) { + PyErr_SetString(PyExc_ValueError, "keys must return list"); + goto bail; + } + if (PyList_Sort(items) < 0) + goto bail; + nitems = PyList_GET_SIZE(items); + for (i = 0; i < nitems; i++) { + PyObject *key, *value; + key = PyList_GET_ITEM(items, i); + value = PyDict_GetItem(dct, key); + item = PyTuple_Pack(2, key, value); + if (item == NULL) + goto bail; + PyList_SET_ITEM(items, i, item); + Py_DECREF(key); + } + } + else { + items = PyMapping_Items(dct); + } + if (items == NULL) + goto bail; + it = PyObject_GetIter(items); + Py_DECREF(items); if (it == NULL) goto bail; skipkeys = PyObject_IsTrue(s->skipkeys); idx = 0; - while ((key = PyIter_Next(it)) != NULL) { - PyObject *encoded; - - if (PyString_Check(key) || PyUnicode_Check(key)) { + while ((item = PyIter_Next(it)) != NULL) { + PyObject *encoded, *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (PyUnicode_Check(key)) { Py_INCREF(key); kstr = key; } @@ -2138,18 +1502,20 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss if (kstr == NULL) goto bail; } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyLong_Check because + True and False are also 1 and 0.*/ + kstr = _encoded_const(key); if (kstr == NULL) goto bail; } - else if (key == Py_True || key == Py_False || key == Py_None) { - kstr = _encoded_const(key); + else if (PyLong_Check(key)) { + kstr = PyObject_Str(key); if (kstr == NULL) goto bail; } else if (skipkeys) { - Py_DECREF(key); + Py_DECREF(item); continue; } else { @@ -2163,10 +1529,6 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss goto bail; } - value = PyObject_GetItem(dct, key); - if (value == NULL) - goto bail; - encoded = encoder_encode_string(s, kstr); Py_CLEAR(kstr); if (encoded == NULL) @@ -2178,11 +1540,12 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss Py_DECREF(encoded); if (PyList_Append(rval, s->key_separator)) goto bail; + + value = PyTuple_GET_ITEM(item, 1); if (encoder_listencode_obj(s, rval, value, indent_level)) goto bail; idx += 1; - Py_CLEAR(value); - Py_DECREF(key); + Py_DECREF(item); } if (PyErr_Occurred()) goto bail; @@ -2195,9 +1558,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss } if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ + indent_level -= 1; /* - indent_level -= 1; - yield '\n' + (' ' * (_indent * _current_indent_level)) */ } @@ -2207,8 +1569,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss bail: Py_XDECREF(it); - Py_XDECREF(key); - Py_XDECREF(value); + Py_XDECREF(item); Py_XDECREF(kstr); Py_XDECREF(ident); return -1; @@ -2227,9 +1588,9 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss Py_ssize_t i; if (open_array == NULL || close_array == NULL || empty_array == NULL) { - open_array = PyString_InternFromString("["); - close_array = PyString_InternFromString("]"); - empty_array = PyString_InternFromString("[]"); + open_array = PyUnicode_InternFromString("["); + close_array = PyUnicode_InternFromString("]"); + empty_array = PyUnicode_InternFromString("[]"); if (open_array == NULL || close_array == NULL || empty_array == NULL) return -1; } @@ -2285,9 +1646,8 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss } if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ + indent_level -= 1; /* - indent_level -= 1; - yield '\n' + (' ' * (_indent * _current_indent_level)) */ } @@ -2349,8 +1709,7 @@ PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") static PyTypeObject PyEncoderType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "_json.Encoder", /* tp_name */ sizeof(PyEncoderObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -2406,19 +1765,42 @@ static PyMethodDef speedups_methods[] = { PyDoc_STRVAR(module_doc, "json speedups\n"); -void -init_json(void) +static struct PyModuleDef jsonmodule = { + PyModuleDef_HEAD_INIT, + "_json", + module_doc, + -1, + speedups_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyObject* +PyInit__json(void) { - PyObject *m; + PyObject *m = PyModule_Create(&jsonmodule); + if (!m) + return NULL; PyScannerType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyScannerType) < 0) - return; + goto fail; PyEncoderType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyEncoderType) < 0) - return; - m = Py_InitModule3("_json", speedups_methods, module_doc); + goto fail; Py_INCREF((PyObject*)&PyScannerType); - PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) { + Py_DECREF((PyObject*)&PyScannerType); + goto fail; + } Py_INCREF((PyObject*)&PyEncoderType); - PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); + if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) { + Py_DECREF((PyObject*)&PyEncoderType); + goto fail; + } + return m; + fail: + Py_DECREF(m); + return NULL; } |
