summaryrefslogtreecommitdiffstats
path: root/Modules/_json.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_json.c')
-rw-r--r--Modules/_json.c226
1 files changed, 155 insertions, 71 deletions
diff --git a/Modules/_json.c b/Modules/_json.c
index dded2c9..f82af34 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -47,7 +47,7 @@ typedef struct _PyEncoderObject {
PyObject *item_separator;
PyObject *sort_keys;
PyObject *skipkeys;
- int fast_encode;
+ PyCFunction fast_encode;
int allow_nan;
} PyEncoderObject;
@@ -116,8 +116,6 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
static PyObject *
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
static PyObject *
-encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj);
-static PyObject *
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
@@ -225,26 +223,122 @@ ascii_escape_unicode(PyObject *pystr)
return rval;
}
+static PyObject *
+escape_unicode(PyObject *pystr)
+{
+ /* Take a PyUnicode pystr and return a new escaped PyUnicode */
+ Py_ssize_t i;
+ Py_ssize_t input_chars;
+ Py_ssize_t output_size;
+ Py_ssize_t chars;
+ PyObject *rval;
+ void *input;
+ int kind;
+ Py_UCS4 maxchar;
+
+ if (PyUnicode_READY(pystr) == -1)
+ return NULL;
+
+ maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
+ input_chars = PyUnicode_GET_LENGTH(pystr);
+ input = PyUnicode_DATA(pystr);
+ kind = PyUnicode_KIND(pystr);
+
+ /* Compute the output size */
+ for (i = 0, output_size = 2; i < input_chars; i++) {
+ Py_UCS4 c = PyUnicode_READ(kind, input, i);
+ Py_ssize_t d;
+ switch (c) {
+ case '\\': case '"': case '\b': case '\f':
+ case '\n': case '\r': case '\t':
+ d = 2;
+ break;
+ default:
+ if (c <= 0x1f)
+ d = 6;
+ else
+ d = 1;
+ }
+ if (output_size > PY_SSIZE_T_MAX - d) {
+ PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
+ return NULL;
+ }
+ output_size += d;
+ }
+
+ rval = PyUnicode_New(output_size, maxchar);
+ if (rval == NULL)
+ return NULL;
+
+ kind = PyUnicode_KIND(rval);
+
+#define ENCODE_OUTPUT do { \
+ chars = 0; \
+ output[chars++] = '"'; \
+ for (i = 0; i < input_chars; i++) { \
+ Py_UCS4 c = PyUnicode_READ(kind, input, i); \
+ switch (c) { \
+ case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
+ case '"': output[chars++] = '\\'; output[chars++] = c; break; \
+ case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
+ case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
+ case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
+ case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
+ case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
+ default: \
+ if (c <= 0x1f) { \
+ output[chars++] = '\\'; \
+ output[chars++] = 'u'; \
+ output[chars++] = '0'; \
+ output[chars++] = '0'; \
+ output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
+ output[chars++] = Py_hexdigits[(c ) & 0xf]; \
+ } else { \
+ output[chars++] = c; \
+ } \
+ } \
+ } \
+ output[chars++] = '"'; \
+ } while (0)
+
+ if (kind == PyUnicode_1BYTE_KIND) {
+ Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
+ ENCODE_OUTPUT;
+ } else if (kind == PyUnicode_2BYTE_KIND) {
+ Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
+ ENCODE_OUTPUT;
+ } else {
+ Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
+ assert(kind == PyUnicode_4BYTE_KIND);
+ ENCODE_OUTPUT;
+ }
+#undef ENCODE_OUTPUT
+
+#ifdef Py_DEBUG
+ assert(_PyUnicode_CheckConsistency(rval, 1));
+#endif
+ return rval;
+}
+
static void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
- /* Use the Python function json.decoder.errmsg to raise a nice
- looking ValueError exception */
- static PyObject *errmsg_fn = NULL;
- PyObject *pymsg;
- if (errmsg_fn == NULL) {
+ /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
+ static PyObject *JSONDecodeError = NULL;
+ PyObject *exc;
+ if (JSONDecodeError == NULL) {
PyObject *decoder = PyImport_ImportModule("json.decoder");
if (decoder == NULL)
return;
- errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
+ JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Py_DECREF(decoder);
- if (errmsg_fn == NULL)
+ if (JSONDecodeError == NULL)
return;
}
- pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);
- if (pymsg) {
- PyErr_SetObject(PyExc_ValueError, pymsg);
- Py_DECREF(pymsg);
+ exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
+ if (exc) {
+ PyErr_SetObject(JSONDecodeError, exc);
+ Py_DECREF(exc);
}
}
@@ -537,6 +631,31 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
return rval;
}
+
+PyDoc_STRVAR(pydoc_encode_basestring,
+ "encode_basestring(string) -> string\n"
+ "\n"
+ "Return a JSON representation of a Python string"
+);
+
+static PyObject *
+py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
+{
+ PyObject *rval;
+ /* Return a JSON representation of a Python string */
+ /* METH_O */
+ if (PyUnicode_Check(pystr)) {
+ rval = escape_unicode(pystr);
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "first argument must be a string, not %.80s",
+ Py_TYPE(pystr)->tp_name);
+ return NULL;
+ }
+ return rval;
+}
+
static void
scanner_dealloc(PyObject *self)
{
@@ -715,7 +834,7 @@ bail:
static PyObject *
_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
- /* Read a JSON array from PyString pystr.
+ /* Read a JSON array from PyUnicode pystr.
idx is the index of the first character after the opening brace.
*next_idx_ptr is a return-by-reference index to the first character after
the closing brace.
@@ -787,8 +906,8 @@ bail:
}
static PyObject *
-_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
- /* Read a JSON constant from PyString pystr.
+_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON constant.
constant is the constant string that was found
("NaN", "Infinity", "-Infinity").
idx is the index of the first character of the constant
@@ -820,7 +939,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
the number.
Returns a new PyObject representation of that number:
- PyInt, PyLong, or PyFloat.
+ PyLong, or PyFloat.
May return other types if parse_int or parse_float are set
*/
void *str;
@@ -1244,7 +1363,14 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
s->item_separator = item_separator;
s->sort_keys = sort_keys;
s->skipkeys = skipkeys;
- s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
+ s->fast_encode = NULL;
+ if (PyCFunction_Check(s->encoder)) {
+ PyCFunction f = PyCFunction_GetFunction(s->encoder);
+ if (f == (PyCFunction)py_encode_basestring_ascii ||
+ f == (PyCFunction)py_encode_basestring) {
+ s->fast_encode = f;
+ }
+ }
s->allow_nan = allow_nan;
Py_INCREF(s->markers);
@@ -1317,38 +1443,9 @@ _encoded_const(PyObject *obj)
}
static PyObject *
-encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj)
-{
- /* Return the JSON representation of a PyLong and PyLong subclasses.
- Calls int() on PyLong subclasses in case the str() was changed.
- Added specifically to deal with IntEnum. See Issue18264. */
- PyObject *encoded, *longobj;
- if (PyLong_CheckExact(obj)) {
- encoded = PyObject_Str(obj);
- }
- else {
- longobj = PyNumber_Long(obj);
- if (longobj == NULL) {
- PyErr_SetString(
- PyExc_ValueError,
- "Unable to coerce int subclass to int"
- );
- return NULL;
- }
- encoded = PyObject_Str(longobj);
- Py_DECREF(longobj);
- }
- return encoded;
-}
-
-
-static PyObject *
encoder_encode_float(PyEncoderObject *s, PyObject *obj)
{
- /* Return the JSON representation of a PyFloat.
- Modified to call float() on float subclasses in case the subclass
- changes the repr. See Issue18264. */
- PyObject *encoded, *floatobj;
+ /* Return the JSON representation of a PyFloat. */
double i = PyFloat_AS_DOUBLE(obj);
if (!Py_IS_FINITE(i)) {
if (!s->allow_nan) {
@@ -1368,24 +1465,7 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
return PyUnicode_FromString("NaN");
}
}
- /* coerce float subclasses to float (primarily for Enum) */
- if (PyFloat_CheckExact(obj)) {
- /* Use a better float format here? */
- encoded = PyObject_Repr(obj);
- }
- else {
- floatobj = PyNumber_Float(obj);
- if (floatobj == NULL) {
- PyErr_SetString(
- PyExc_ValueError,
- "Unable to coerce float subclass to float"
- );
- return NULL;
- }
- encoded = PyObject_Repr(floatobj);
- Py_DECREF(floatobj);
- }
- return encoded;
+ return PyFloat_Type.tp_repr(obj);
}
static PyObject *
@@ -1393,7 +1473,7 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
{
/* Return the JSON representation of a string */
if (s->fast_encode)
- return py_encode_basestring_ascii(NULL, obj);
+ return s->fast_encode(NULL, obj);
else
return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
}
@@ -1429,7 +1509,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
return _steal_accumulate(acc, encoded);
}
else if (PyLong_Check(obj)) {
- PyObject *encoded = encoder_encode_long(s, obj);
+ PyObject *encoded = PyLong_Type.tp_str(obj);
if (encoded == NULL)
return -1;
return _steal_accumulate(acc, encoded);
@@ -1594,7 +1674,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
goto bail;
}
else if (PyLong_Check(key)) {
- kstr = encoder_encode_long(s, key);
+ kstr = PyLong_Type.tp_str(key);
if (kstr == NULL) {
goto bail;
}
@@ -1840,6 +1920,10 @@ static PyMethodDef speedups_methods[] = {
(PyCFunction)py_encode_basestring_ascii,
METH_O,
pydoc_encode_basestring_ascii},
+ {"encode_basestring",
+ (PyCFunction)py_encode_basestring,
+ METH_O,
+ pydoc_encode_basestring},
{"scanstring",
(PyCFunction)py_scanstring,
METH_VARARGS,
@@ -1862,7 +1946,7 @@ static struct PyModuleDef jsonmodule = {
NULL
};
-PyObject*
+PyMODINIT_FUNC
PyInit__json(void)
{
PyObject *m = PyModule_Create(&jsonmodule);