diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-21 21:52:58 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-21 21:52:58 (GMT) |
commit | 42bf77537e612737c7e7e2495c3c481e92391a42 (patch) | |
tree | 860ee37c9a22144d4f9712d8f4b8f2d9288304d9 /Objects | |
parent | 6dd381eb62278f75de7ba01626813de84cd248e7 (diff) | |
download | cpython-42bf77537e612737c7e7e2495c3c481e92391a42.zip cpython-42bf77537e612737c7e7e2495c3c481e92391a42.tar.gz cpython-42bf77537e612737c7e7e2495c3c481e92391a42.tar.bz2 |
Rewrite PyUnicode_EncodeDecimal() to use the new Unicode API
Add tests for PyUnicode_EncodeDecimal() and
PyUnicode_TransformDecimalToASCII().
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 97 |
1 files changed, 53 insertions, 44 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9dedf0b..bcd5b64 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8829,7 +8829,6 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, char *output, const char *errors) { - Py_UNICODE *p, *end; PyObject *errorHandler = NULL; PyObject *exc = NULL; PyObject *unicode; @@ -8838,47 +8837,50 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, /* the following variable is used for caching string comparisons * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ int known_errorHandler = -1; + Py_ssize_t i, j; + enum PyUnicode_Kind kind; + void *data; if (output == NULL) { PyErr_BadArgument(); return -1; } - p = s; - end = s + length; - while (p < end) { - register Py_UNICODE ch = *p; + unicode = PyUnicode_FromUnicode(s, length); + if (unicode == NULL) + return -1; + + if (PyUnicode_READY(unicode) < 0) + goto onError; + kind = PyUnicode_KIND(unicode); + data = PyUnicode_DATA(unicode); + + for (i=0; i < length; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); int decimal; - PyObject *repunicode; - Py_ssize_t repsize; - Py_ssize_t newpos; - Py_UNICODE *uni2; - Py_UNICODE *collstart; - Py_UNICODE *collend; + Py_ssize_t startpos, endpos; if (Py_UNICODE_ISSPACE(ch)) { *output++ = ' '; - ++p; continue; } decimal = Py_UNICODE_TODECIMAL(ch); if (decimal >= 0) { *output++ = '0' + decimal; - ++p; continue; } if (0 < ch && ch < 256) { *output++ = (char)ch; - ++p; continue; } /* All other characters are considered unencodable */ - collstart = p; - collend = p+1; - while (collend < end) { - if ((0 < *collend && *collend < 256) || - !Py_UNICODE_ISSPACE(*collend) || - Py_UNICODE_TODECIMAL(*collend)) + startpos = i; + endpos = i+1; + for (; endpos < length; endpos++) { + ch = PyUnicode_READ(kind, data, endpos); + if ((0 < ch && ch < 256) || + !Py_UNICODE_ISSPACE(ch) || + Py_UNICODE_TODECIMAL(ch)) break; } /* cache callback name lookup @@ -8897,33 +8899,33 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, } switch (known_errorHandler) { case 1: /* strict */ - unicode = PyUnicode_FromUnicode(s, length); - if (unicode == NULL) - goto onError; - raise_encode_exception(&exc, encoding, unicode, collstart-s, collend-s, reason); - Py_DECREF(unicode); + raise_encode_exception(&exc, encoding, unicode, startpos, endpos, reason); goto onError; case 2: /* replace */ - for (p = collstart; p < collend; ++p) + for (j=startpos; j < endpos; j++) *output++ = '?'; /* fall through */ case 3: /* ignore */ - p = collend; + i = endpos; break; case 4: /* xmlcharrefreplace */ - /* generate replacement (temporarily (mis)uses p) */ - for (p = collstart; p < collend; ++p) - output += sprintf(output, "&#%d;", (int)*p); - p = collend; + /* generate replacement */ + for (j=startpos; j < endpos; j++) { + ch = PyUnicode_READ(kind, data, i); + output += sprintf(output, "&#%d;", (int)ch); + i++; + } break; default: - unicode = PyUnicode_FromUnicode(s, length); - if (unicode == NULL) - goto onError; + { + PyObject *repunicode; + Py_ssize_t repsize, newpos, k; + enum PyUnicode_Kind repkind; + void *repdata; + repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, encoding, reason, unicode, &exc, - collstart-s, collend-s, &newpos); - Py_DECREF(unicode); + startpos, endpos, &newpos); if (repunicode == NULL) goto onError; if (!PyUnicode_Check(repunicode)) { @@ -8932,10 +8934,17 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, Py_DECREF(repunicode); goto onError; } + if (PyUnicode_READY(repunicode) < 0) { + Py_DECREF(repunicode); + goto onError; + } + repkind = PyUnicode_KIND(repunicode); + repdata = PyUnicode_DATA(repunicode); + /* generate replacement */ repsize = PyUnicode_GET_SIZE(repunicode); - for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) { - Py_UNICODE ch = *uni2; + for (k=0; k<repsize; k++) { + ch = PyUnicode_READ(repkind, repdata, k); if (Py_UNICODE_ISSPACE(ch)) *output++ = ' '; else { @@ -8946,29 +8955,29 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, *output++ = (char)ch; else { Py_DECREF(repunicode); - unicode = PyUnicode_FromUnicode(s, length); - if (unicode == NULL) - goto onError; raise_encode_exception(&exc, encoding, - unicode, collstart-s, collend-s, reason); - Py_DECREF(unicode); + unicode, startpos, endpos, + reason); goto onError; } } } - p = s + newpos; + i = newpos; Py_DECREF(repunicode); } + } } /* 0-terminate the output string */ *output++ = '\0'; Py_XDECREF(exc); Py_XDECREF(errorHandler); + Py_DECREF(unicode); return 0; onError: Py_XDECREF(exc); Py_XDECREF(errorHandler); + Py_DECREF(unicode); return -1; } |