summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/json/encoder.py9
-rw-r--r--Lib/test/test_json/test_encode_basestring_ascii.py3
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_json.c133
5 files changed, 142 insertions, 7 deletions
diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py
index 0513838..26e9eb2 100644
--- a/Lib/json/encoder.py
+++ b/Lib/json/encoder.py
@@ -7,6 +7,10 @@ try:
except ImportError:
c_encode_basestring_ascii = None
try:
+ from _json import encode_basestring as c_encode_basestring
+except ImportError:
+ c_encode_basestring = None
+try:
from _json import make_encoder as c_make_encoder
except ImportError:
c_make_encoder = None
@@ -30,7 +34,7 @@ for i in range(0x20):
INFINITY = float('inf')
FLOAT_REPR = repr
-def encode_basestring(s):
+def py_encode_basestring(s):
"""Return a JSON representation of a Python string
"""
@@ -39,6 +43,9 @@ def encode_basestring(s):
return '"' + ESCAPE.sub(replace, s) + '"'
+encode_basestring = (c_encode_basestring or py_encode_basestring)
+
+
def py_encode_basestring_ascii(s):
"""Return an ASCII-only JSON representation of a Python string
diff --git a/Lib/test/test_json/test_encode_basestring_ascii.py b/Lib/test/test_json/test_encode_basestring_ascii.py
index 480afd6..3a4ad18 100644
--- a/Lib/test/test_json/test_encode_basestring_ascii.py
+++ b/Lib/test/test_json/test_encode_basestring_ascii.py
@@ -11,9 +11,6 @@ CASES = [
(' s p a c e d ', '" s p a c e d "'),
('\U0001d120', '"\\ud834\\udd20"'),
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
- ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
- ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
- ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
diff --git a/Misc/ACKS b/Misc/ACKS
index 42ff010..95f12ef 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -626,6 +626,7 @@ Ali Ikinci
Aaron Iles
Lars Immisch
Bobby Impollonia
+Naoki Inada
Meador Inge
Peter Ingebretson
Tony Ingraldi
diff --git a/Misc/NEWS b/Misc/NEWS
index 9a10abf..3c71d9e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -203,6 +203,9 @@ Core and Builtins
Library
-------
+- Issue #23206: Make ``json.dumps(..., ensure_ascii=False)`` as fast as the
+ default case of ``ensure_ascii=True``. Patch by Naoki Inada.
+
- Issue #23185: Add math.inf and math.nan constants.
- Issue #23186: Add ssl.SSLObject.shared_ciphers() and
diff --git a/Modules/_json.c b/Modules/_json.c
index 1580ee6..9430990 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -47,7 +47,7 @@ typedef struct _PyEncoderObject {
PyObject *item_separator;
PyObject *sort_keys;
PyObject *skipkeys;
- int fast_encode;
+ PyCFunction fast_encode;
int allow_nan;
} PyEncoderObject;
@@ -218,6 +218,97 @@ ascii_escape_unicode(PyObject *pystr)
return rval;
}
+static PyObject *
+escape_unicode(PyObject *pystr)
+{
+ /* Take a PyUnicode pystr and return a new escaped PyUnicode */
+ Py_ssize_t i;
+ Py_ssize_t input_chars;
+ Py_ssize_t output_size;
+ Py_ssize_t chars;
+ PyObject *rval;
+ void *input;
+ int kind;
+ Py_UCS4 maxchar;
+
+ if (PyUnicode_READY(pystr) == -1)
+ return NULL;
+
+ maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
+ input_chars = PyUnicode_GET_LENGTH(pystr);
+ input = PyUnicode_DATA(pystr);
+ kind = PyUnicode_KIND(pystr);
+
+ /* Compute the output size */
+ for (i = 0, output_size = 2; i < input_chars; i++) {
+ Py_UCS4 c = PyUnicode_READ(kind, input, i);
+ switch (c) {
+ case '\\': case '"': case '\b': case '\f':
+ case '\n': case '\r': case '\t':
+ output_size += 2;
+ break;
+ default:
+ if (c <= 0x1f)
+ output_size += 6;
+ else
+ output_size++;
+ }
+ }
+
+ rval = PyUnicode_New(output_size, maxchar);
+ if (rval == NULL)
+ return NULL;
+
+ kind = PyUnicode_KIND(rval);
+
+#define ENCODE_OUTPUT do { \
+ chars = 0; \
+ output[chars++] = '"'; \
+ for (i = 0; i < input_chars; i++) { \
+ Py_UCS4 c = PyUnicode_READ(kind, input, i); \
+ switch (c) { \
+ case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
+ case '"': output[chars++] = '\\'; output[chars++] = c; break; \
+ case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
+ case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
+ case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
+ case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
+ case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
+ default: \
+ if (c <= 0x1f) { \
+ output[chars++] = '\\'; \
+ output[chars++] = 'u'; \
+ output[chars++] = '0'; \
+ output[chars++] = '0'; \
+ output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
+ output[chars++] = Py_hexdigits[(c ) & 0xf]; \
+ } else { \
+ output[chars++] = c; \
+ } \
+ } \
+ } \
+ output[chars++] = '"'; \
+ } while (0)
+
+ if (kind == PyUnicode_1BYTE_KIND) {
+ Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
+ ENCODE_OUTPUT;
+ } else if (kind == PyUnicode_2BYTE_KIND) {
+ Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
+ ENCODE_OUTPUT;
+ } else {
+ Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
+ assert(kind == PyUnicode_4BYTE_KIND);
+ ENCODE_OUTPUT;
+ }
+#undef ENCODE_OUTPUT
+
+#ifdef Py_DEBUG
+ assert(_PyUnicode_CheckConsistency(rval, 1));
+#endif
+ return rval;
+}
+
static void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
@@ -530,6 +621,31 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
return rval;
}
+
+PyDoc_STRVAR(pydoc_encode_basestring,
+ "encode_basestring(string) -> string\n"
+ "\n"
+ "Return a JSON representation of a Python string"
+);
+
+static PyObject *
+py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
+{
+ PyObject *rval;
+ /* Return a JSON representation of a Python string */
+ /* METH_O */
+ if (PyUnicode_Check(pystr)) {
+ rval = escape_unicode(pystr);
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "first argument must be a string, not %.80s",
+ Py_TYPE(pystr)->tp_name);
+ return NULL;
+ }
+ return rval;
+}
+
static void
scanner_dealloc(PyObject *self)
{
@@ -1223,7 +1339,14 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
s->item_separator = item_separator;
s->sort_keys = sort_keys;
s->skipkeys = skipkeys;
- s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
+ s->fast_encode = NULL;
+ if (PyCFunction_Check(s->encoder)) {
+ PyCFunction f = PyCFunction_GetFunction(s->encoder);
+ if (f == (PyCFunction)py_encode_basestring_ascii ||
+ f == (PyCFunction)py_encode_basestring) {
+ s->fast_encode = f;
+ }
+ }
s->allow_nan = PyObject_IsTrue(allow_nan);
Py_INCREF(s->markers);
@@ -1372,7 +1495,7 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
{
/* Return the JSON representation of a string */
if (s->fast_encode)
- return py_encode_basestring_ascii(NULL, obj);
+ return s->fast_encode(NULL, obj);
else
return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
}
@@ -1840,6 +1963,10 @@ static PyMethodDef speedups_methods[] = {
(PyCFunction)py_encode_basestring_ascii,
METH_O,
pydoc_encode_basestring_ascii},
+ {"encode_basestring",
+ (PyCFunction)py_encode_basestring,
+ METH_O,
+ pydoc_encode_basestring},
{"scanstring",
(PyCFunction)py_scanstring,
METH_VARARGS,