diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-09-29 21:50:23 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-09-29 21:50:23 (GMT) |
commit | c806fdcd8b9b4a4e42c647d1a9155485a2fae502 (patch) | |
tree | 5a438efe1102a6a2ea9438fa612a8c3b3a3b4883 | |
parent | 121aab4ba13e2a26d0414da862cdf887e3a0395b (diff) | |
download | cpython-c806fdcd8b9b4a4e42c647d1a9155485a2fae502.zip cpython-c806fdcd8b9b4a4e42c647d1a9155485a2fae502.tar.gz cpython-c806fdcd8b9b4a4e42c647d1a9155485a2fae502.tar.bz2 |
raw_unicode_escape() uses the new Unicode API
-rw-r--r-- | Modules/_pickle.c | 67 |
1 files changed, 21 insertions, 46 deletions
diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 52965a4..7bee1e1 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1769,33 +1769,37 @@ save_bytes(PicklerObject *self, PyObject *obj) /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates backslash and newline characters to \uXXXX escapes. */ static PyObject * -raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size) +raw_unicode_escape(PyObject *obj) { + static const char *hexdigits = "0123456789abcdef"; PyObject *repr, *result; char *p; - char *q; + Py_ssize_t i, size, expandsize; + void *data; + unsigned int kind; - static const char *hexdigits = "0123456789abcdef"; + if (PyUnicode_READY(obj)) + return NULL; -#ifdef Py_UNICODE_WIDE - const Py_ssize_t expandsize = 10; -#else - const Py_ssize_t expandsize = 6; -#endif + size = PyUnicode_GET_LENGTH(obj); + data = PyUnicode_DATA(obj); + kind = PyUnicode_KIND(obj); + if (kind == PyUnicode_4BYTE_KIND) + expandsize = 10; + else + expandsize = 6; if (size > PY_SSIZE_T_MAX / expandsize) return PyErr_NoMemory(); - repr = PyByteArray_FromStringAndSize(NULL, expandsize * size); if (repr == NULL) return NULL; if (size == 0) goto done; - p = q = PyByteArray_AS_STRING(repr); - while (size-- > 0) { - Py_UNICODE ch = *s++; -#ifdef Py_UNICODE_WIDE + p = PyByteArray_AS_STRING(repr); + for (i=0; i < size; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); /* Map 32-bit characters to '\Uxxxxxxxx' */ if (ch >= 0x10000) { *p++ = '\\'; @@ -1809,36 +1813,8 @@ raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size) *p++ = hexdigits[(ch >> 4) & 0xf]; *p++ = hexdigits[ch & 15]; } - else -#else - /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ - if (ch >= 0xD800 && ch < 0xDC00) { - Py_UNICODE ch2; - Py_UCS4 ucs; - - ch2 = *s++; - size--; - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { - ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; - *p++ = '\\'; - *p++ = 'U'; - *p++ = hexdigits[(ucs >> 28) & 0xf]; - *p++ = hexdigits[(ucs >> 24) & 0xf]; - *p++ = hexdigits[(ucs >> 20) & 0xf]; - *p++ = hexdigits[(ucs >> 16) & 0xf]; - *p++ = hexdigits[(ucs >> 12) & 0xf]; - *p++ = hexdigits[(ucs >> 8) & 0xf]; - *p++ = hexdigits[(ucs >> 4) & 0xf]; - *p++ = hexdigits[ucs & 0xf]; - continue; - } - /* Fall through: isolated surrogates are copied as-is */ - s--; - size++; - } -#endif /* Map 16-bit characters to '\uxxxx' */ - if (ch >= 256 || ch == '\\' || ch == '\n') { + else if (ch >= 256 || ch == '\\' || ch == '\n') { *p++ = '\\'; *p++ = 'u'; *p++ = hexdigits[(ch >> 12) & 0xf]; @@ -1850,9 +1826,9 @@ raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size) else *p++ = (char) ch; } - size = p - q; + size = p - PyByteArray_AS_STRING(repr); - done: +done: result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size); Py_DECREF(repr); return result; @@ -1893,8 +1869,7 @@ save_unicode(PicklerObject *self, PyObject *obj) else { const char unicode_op = UNICODE; - encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj), - PyUnicode_GET_SIZE(obj)); + encoded = raw_unicode_escape(obj); if (encoded == NULL) goto error; |