diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 572 |
1 files changed, 276 insertions, 296 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3bf1932..5234901 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -727,7 +727,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) _Py_DEC_REFTOTAL; _Py_ForgetReference(unicode); - new_unicode = (PyObject *)PyObject_REALLOC((char *)unicode, new_size); + new_unicode = (PyObject *)PyObject_REALLOC(unicode, new_size); if (new_unicode == NULL) { _Py_NewReference(unicode); PyErr_NoMemory(); @@ -816,7 +816,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) assert(_PyUnicode_WSTR(unicode) != NULL); /* check for integer overflow */ - if (length > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { + if (length > PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1) { PyErr_NoMemory(); return -1; } @@ -888,7 +888,7 @@ _PyUnicode_New(Py_ssize_t length) } /* Ensure we won't overflow the size. */ - if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { + if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { return (PyUnicodeObject *)PyErr_NoMemory(); } if (length < 0) { @@ -2239,7 +2239,7 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize, if (copy_null) targetlen++; if (!target) { - if (PY_SSIZE_T_MAX / sizeof(Py_UCS4) < targetlen) { + if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UCS4) < targetlen) { PyErr_NoMemory(); return NULL; } @@ -2313,35 +2313,6 @@ PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size) #endif /* HAVE_WCHAR_H */ -static void -makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, - char c) -{ - *fmt++ = '%'; - if (longflag) - *fmt++ = 'l'; - else if (longlongflag) { - /* longlongflag should only ever be nonzero on machines with - HAVE_LONG_LONG defined */ -#ifdef HAVE_LONG_LONG - char *f = PY_FORMAT_LONG_LONG; - while (*f) - *fmt++ = *f++; -#else - /* we shouldn't ever get here */ - assert(0); - *fmt++ = 'l'; -#endif - } - else if (size_tflag) { - char *f = PY_FORMAT_SIZE_T; - while (*f) - *fmt++ = *f++; - } - *fmt++ = c; - *fmt = '\0'; -} - /* maximum number of characters required for output of %lld or %p. We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, plus 1 for the sign. 53/22 is an upper bound for log10(256). */ @@ -2517,48 +2488,42 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, case 'x': { /* used by sprintf */ - char fmt[10]; /* should be enough for "%0lld\0" */ char buffer[MAX_LONG_LONG_CHARS]; Py_ssize_t arglen; if (*f == 'u') { - makefmt(fmt, longflag, longlongflag, size_tflag, *f); - if (longflag) - len = sprintf(buffer, fmt, + len = sprintf(buffer, "%lu", va_arg(*vargs, unsigned long)); #ifdef HAVE_LONG_LONG else if (longlongflag) - len = sprintf(buffer, fmt, + len = sprintf(buffer, "%" PY_FORMAT_LONG_LONG "u", va_arg(*vargs, unsigned PY_LONG_LONG)); #endif else if (size_tflag) - len = sprintf(buffer, fmt, + len = sprintf(buffer, "%" PY_FORMAT_SIZE_T "u", va_arg(*vargs, size_t)); else - len = sprintf(buffer, fmt, + len = sprintf(buffer, "%u", va_arg(*vargs, unsigned int)); } else if (*f == 'x') { - makefmt(fmt, 0, 0, 0, 'x'); - len = sprintf(buffer, fmt, va_arg(*vargs, int)); + len = sprintf(buffer, "%x", va_arg(*vargs, int)); } else { - makefmt(fmt, longflag, longlongflag, size_tflag, *f); - if (longflag) - len = sprintf(buffer, fmt, + len = sprintf(buffer, "%li", va_arg(*vargs, long)); #ifdef HAVE_LONG_LONG else if (longlongflag) - len = sprintf(buffer, fmt, + len = sprintf(buffer, "%" PY_FORMAT_LONG_LONG "i", va_arg(*vargs, PY_LONG_LONG)); #endif else if (size_tflag) - len = sprintf(buffer, fmt, + len = sprintf(buffer, "%" PY_FORMAT_SIZE_T "i", va_arg(*vargs, Py_ssize_t)); else - len = sprintf(buffer, fmt, + len = sprintf(buffer, "%i", va_arg(*vargs, int)); } assert(len >= 0); @@ -2852,7 +2817,7 @@ PyUnicode_AsWideCharString(PyObject *unicode, buflen = unicode_aswidechar(unicode, NULL, 0); if (buflen == -1) return NULL; - if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) { + if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < buflen) { PyErr_NoMemory(); return NULL; } @@ -3247,7 +3212,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) wlen2 = wcslen(wstr); if (wlen2 != wlen) { PyMem_Free(wstr); - PyErr_SetString(PyExc_TypeError, "embedded null character"); + PyErr_SetString(PyExc_ValueError, "embedded null character"); return NULL; } @@ -3255,7 +3220,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) /* "surrogateescape" error handler */ char *str; - str = _Py_wchar2char(wstr, &error_pos); + str = Py_EncodeLocale(wstr, &error_pos); if (str == NULL) { if (error_pos == (size_t)-1) { PyErr_NoMemory(); @@ -3308,7 +3273,7 @@ encode_error: if (errmsg != NULL) { size_t errlen; - wstr = _Py_char2wchar(errmsg, &errlen); + wstr = Py_DecodeLocale(errmsg, &errlen); if (wstr != NULL) { reason = PyUnicode_FromWideChar(wstr, errlen); PyMem_RawFree(wstr); @@ -3483,7 +3448,7 @@ mbstowcs_errorpos(const char *str, size_t len) memset(&mbs, 0, sizeof mbs); while (len) { - converted = mbrtowc(&ch, (char*)str, len, &mbs); + converted = mbrtowc(&ch, str, len, &mbs); if (converted == 0) /* Reached end of string */ break; @@ -3519,14 +3484,14 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, if (locale_error_handler(errors, &surrogateescape) < 0) return NULL; - if (str[len] != '\0' || len != strlen(str)) { - PyErr_SetString(PyExc_TypeError, "embedded null character"); + if (str[len] != '\0' || (size_t)len != strlen(str)) { + PyErr_SetString(PyExc_ValueError, "embedded null byte"); return NULL; } if (surrogateescape) { /* "surrogateescape" error handler */ - wstr = _Py_char2wchar(str, &wlen); + wstr = Py_DecodeLocale(str, &wlen); if (wstr == NULL) { if (wlen == (size_t)-1) PyErr_NoMemory(); @@ -3581,7 +3546,7 @@ decode_error: error_pos = mbstowcs_errorpos(str, len); if (errmsg != NULL) { size_t errlen; - wstr = _Py_char2wchar(errmsg, &errlen); + wstr = Py_DecodeLocale(errmsg, &errlen); if (wstr != NULL) { reason = PyUnicode_FromWideChar(wstr, errlen); PyMem_RawFree(wstr); @@ -3696,8 +3661,8 @@ PyUnicode_FSConverter(PyObject* arg, void* addr) } size = PyBytes_GET_SIZE(output); data = PyBytes_AS_STRING(output); - if (size != strlen(data)) { - PyErr_SetString(PyExc_TypeError, "embedded NUL character"); + if ((size_t)size != strlen(data)) { + PyErr_SetString(PyExc_ValueError, "embedded null byte"); Py_DECREF(output); return 0; } @@ -3741,7 +3706,7 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr) } if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output), PyUnicode_GET_LENGTH(output), 0, 1) >= 0) { - PyErr_SetString(PyExc_TypeError, "embedded NUL character"); + PyErr_SetString(PyExc_ValueError, "embedded null character"); Py_DECREF(output); return 0; } @@ -6849,28 +6814,6 @@ code_page_name(UINT code_page, PyObject **obj) return PyBytes_AS_STRING(*obj); } -static int -is_dbcs_lead_byte(UINT code_page, const char *s, int offset) -{ - const char *curr = s + offset; - const char *prev; - - if (!IsDBCSLeadByteEx(code_page, *curr)) - return 0; - - prev = CharPrevExA(code_page, s, curr, 0); - if (prev == curr) - return 1; - /* FIXME: This code is limited to "true" double-byte encodings, - as it assumes an incomplete character consists of a single - byte. */ - if (curr - prev == 2) - return 1; - if (!IsDBCSLeadByteEx(code_page, *prev)) - return 1; - return 0; -} - static DWORD decode_code_page_flags(UINT code_page) { @@ -6945,7 +6888,7 @@ static int decode_code_page_errors(UINT code_page, PyObject **v, const char *in, const int size, - const char *errors) + const char *errors, int final) { const char *startin = in; const char *endin = in + size; @@ -6972,7 +6915,7 @@ decode_code_page_errors(UINT code_page, if (encoding == NULL) return -1; - if (errors == NULL || strcmp(errors, "strict") == 0) { + if ((errors == NULL || strcmp(errors, "strict") == 0) && final) { /* The last error was ERROR_NO_UNICODE_TRANSLATION, then we raise a UnicodeDecodeError. */ make_decode_exception(&exc, encoding, in, size, 0, 0, reason); @@ -7035,6 +6978,10 @@ decode_code_page_errors(UINT code_page, if (outsize <= 0) { Py_ssize_t startinpos, endinpos, outpos; + /* last character in partial decode? */ + if (in + insize >= endin && !final) + break; + startinpos = in - startin; endinpos = startinpos + 1; outpos = out - PyUnicode_AS_UNICODE(*v); @@ -7063,7 +7010,8 @@ decode_code_page_errors(UINT code_page, assert(outsize <= PyUnicode_WSTR_LENGTH(*v)); if (unicode_resize(v, outsize) < 0) goto error; - ret = size; + /* (in - startin) <= size and size is an int */ + ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int); error: Py_XDECREF(encoding_obj); @@ -7104,24 +7052,19 @@ decode_code_page_stateful(int code_page, done = 1; } - /* Skip trailing lead-byte unless 'final' is set */ - if (!final && is_dbcs_lead_byte(code_page, s, chunk_size - 1)) - --chunk_size; - if (chunk_size == 0 && done) { if (v != NULL) break; _Py_RETURN_UNICODE_EMPTY(); } - converted = decode_code_page_strict(code_page, &v, s, chunk_size); if (converted == -2) converted = decode_code_page_errors(code_page, &v, s, chunk_size, - errors); - assert(converted != 0); + errors, final); + assert(converted != 0 || done); if (converted < 0) { Py_XDECREF(v); @@ -8528,10 +8471,10 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result) } else if (PyLong_Check(x)) { long value = PyLong_AS_LONG(x); - long max = PyUnicode_GetMax(); - if (value < 0 || value > max) { - PyErr_Format(PyExc_TypeError, - "character mapping must be in range(0x%x)", max+1); + if (value < 0 || value > MAX_UNICODE) { + PyErr_Format(PyExc_ValueError, + "character mapping must be in range(0x%x)", + MAX_UNICODE+1); Py_DECREF(x); return -1; } @@ -8550,76 +8493,168 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result) return -1; } } -/* ensure that *outobj is at least requiredsize characters long, - if not reallocate and adjust various state variables. - Return 0 on success, -1 on error */ + +/* lookup the character, write the result into the writer. + Return 1 if the result was written into the writer, return 0 if the mapping + was undefined, raise an exception return -1 on error. */ static int -charmaptranslate_makespace(Py_UCS4 **outobj, Py_ssize_t *psize, - Py_ssize_t requiredsize) -{ - Py_ssize_t oldsize = *psize; - Py_UCS4 *new_outobj; - if (requiredsize > oldsize) { - /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2 * oldsize) - requiredsize = 2 * oldsize; - new_outobj = PyMem_Realloc(*outobj, requiredsize * sizeof(Py_UCS4)); - if (new_outobj == 0) +charmaptranslate_output(Py_UCS4 ch, PyObject *mapping, + _PyUnicodeWriter *writer) +{ + PyObject *item; + + if (charmaptranslate_lookup(ch, mapping, &item)) + return -1; + + if (item == NULL) { + /* not found => default to 1:1 mapping */ + if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) { return -1; - *outobj = new_outobj; - *psize = requiredsize; + } + return 1; } - return 0; + + if (item == Py_None) { + Py_DECREF(item); + return 0; + } + + if (PyLong_Check(item)) { + long ch = (Py_UCS4)PyLong_AS_LONG(item); + /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already + used it */ + if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) { + Py_DECREF(item); + return -1; + } + Py_DECREF(item); + return 1; + } + + if (!PyUnicode_Check(item)) { + Py_DECREF(item); + return -1; + } + + if (_PyUnicodeWriter_WriteStr(writer, item) < 0) { + Py_DECREF(item); + return -1; + } + + Py_DECREF(item); + return 1; } -/* lookup the character, put the result in the output string and adjust - various state variables. Return a new reference to the object that - was put in the output buffer in *result, or Py_None, if the mapping was - undefined (in which case no character was written). - The called must decref result. - Return 0 on success, -1 on error. */ + static int -charmaptranslate_output(PyObject *input, Py_ssize_t ipos, - PyObject *mapping, Py_UCS4 **output, - Py_ssize_t *osize, Py_ssize_t *opos, - PyObject **res) +unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch, + Py_UCS1 *translate) { - Py_UCS4 curinp = PyUnicode_READ_CHAR(input, ipos); - if (charmaptranslate_lookup(curinp, mapping, res)) + PyObject *item = NULL; + int ret = 0; + + if (charmaptranslate_lookup(ch, mapping, &item)) { return -1; - if (*res==NULL) { + } + + if (item == Py_None) { + /* deletion */ + translate[ch] = 0xfe; + } + else if (item == NULL) { /* not found => default to 1:1 mapping */ - (*output)[(*opos)++] = curinp; + translate[ch] = ch; + return 1; } - else if (*res==Py_None) - ; - else if (PyLong_Check(*res)) { - /* no overflow check, because we know that the space is enough */ - (*output)[(*opos)++] = (Py_UCS4)PyLong_AS_LONG(*res); + else if (PyLong_Check(item)) { + long replace = PyLong_AS_LONG(item); + /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already + used it */ + if (127 < replace) { + /* invalid character or character outside ASCII: + skip the fast translate */ + goto exit; + } + translate[ch] = (Py_UCS1)replace; } - else if (PyUnicode_Check(*res)) { - Py_ssize_t repsize; - if (PyUnicode_READY(*res) == -1) + else if (PyUnicode_Check(item)) { + Py_UCS4 replace; + + if (PyUnicode_READY(item) == -1) { + Py_DECREF(item); return -1; - repsize = PyUnicode_GET_LENGTH(*res); - if (repsize==1) { - /* no overflow check, because we know that the space is enough */ - (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, 0); } - else if (repsize!=0) { - /* more than one character */ - Py_ssize_t requiredsize = *opos + - (PyUnicode_GET_LENGTH(input) - ipos) + - repsize - 1; - Py_ssize_t i; - if (charmaptranslate_makespace(output, osize, requiredsize)) + if (PyUnicode_GET_LENGTH(item) != 1) + goto exit; + + replace = PyUnicode_READ_CHAR(item, 0); + if (replace > 127) + goto exit; + translate[ch] = (Py_UCS1)replace; + } + else { + /* not None, NULL, long or unicode */ + goto exit; + } + ret = 1; + + exit: + Py_DECREF(item); + return ret; +} + +/* Fast path for ascii => ascii translation. Return 1 if the whole string + was translated into writer, return 0 if the input string was partially + translated into writer, raise an exception and return -1 on error. */ +static int +unicode_fast_translate(PyObject *input, PyObject *mapping, + _PyUnicodeWriter *writer, int ignore) +{ + Py_UCS1 ascii_table[128], ch, ch2; + Py_ssize_t len; + Py_UCS1 *in, *end, *out; + int res = 0; + + if (PyUnicode_READY(input) == -1) + return -1; + if (!PyUnicode_IS_ASCII(input)) + return 0; + len = PyUnicode_GET_LENGTH(input); + + memset(ascii_table, 0xff, 128); + + in = PyUnicode_1BYTE_DATA(input); + end = in + len; + + assert(PyUnicode_IS_ASCII(writer->buffer)); + assert(PyUnicode_GET_LENGTH(writer->buffer) == len); + out = PyUnicode_1BYTE_DATA(writer->buffer); + + for (; in < end; in++) { + ch = *in; + ch2 = ascii_table[ch]; + if (ch2 == 0xff) { + int translate = unicode_fast_translate_lookup(mapping, ch, + ascii_table); + if (translate < 0) return -1; - for(i = 0; i < repsize; i++) - (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, i); + if (translate == 0) + goto exit; + ch2 = ascii_table[ch]; } + if (ch2 == 0xfe) { + if (ignore) + continue; + goto exit; + } + assert(ch2 < 128); + *out = ch2; + out++; } - else - return -1; - return 0; + res = 1; + +exit: + writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer); + return res; } PyObject * @@ -8628,22 +8663,17 @@ _PyUnicode_TranslateCharmap(PyObject *input, const char *errors) { /* input object */ - char *idata; + char *data; Py_ssize_t size, i; int kind; /* output buffer */ - Py_UCS4 *output = NULL; - Py_ssize_t osize; - PyObject *res; - /* current output position */ - Py_ssize_t opos; + _PyUnicodeWriter writer; + /* error handler */ char *reason = "character maps to <undefined>"; PyObject *errorHandler = NULL; PyObject *exc = NULL; - /* the following variable is used for caching string comparisons - * -1=not initialized, 0=unknown, 1=strict, 2=replace, - * 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; + int ignore; + int res; if (mapping == NULL) { PyErr_BadArgument(); @@ -8652,10 +8682,9 @@ _PyUnicode_TranslateCharmap(PyObject *input, if (PyUnicode_READY(input) == -1) return NULL; - idata = (char*)PyUnicode_DATA(input); + data = (char*)PyUnicode_DATA(input); kind = PyUnicode_KIND(input); size = PyUnicode_GET_LENGTH(input); - i = 0; if (size == 0) { Py_INCREF(input); @@ -8664,121 +8693,81 @@ _PyUnicode_TranslateCharmap(PyObject *input, /* allocate enough for a simple 1:1 translation without replacements, if we need more, we'll resize */ - osize = size; - output = PyMem_Malloc(osize * sizeof(Py_UCS4)); - opos = 0; - if (output == NULL) { - PyErr_NoMemory(); + _PyUnicodeWriter_Init(&writer); + if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; + + ignore = (errors != NULL && strcmp(errors, "ignore") == 0); + + res = unicode_fast_translate(input, mapping, &writer, ignore); + if (res < 0) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; } + if (res == 1) + return _PyUnicodeWriter_Finish(&writer); + i = writer.pos; while (i<size) { /* try to encode it */ - PyObject *x = NULL; - if (charmaptranslate_output(input, i, mapping, - &output, &osize, &opos, &x)) { - Py_XDECREF(x); + int translate; + PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ + Py_ssize_t newpos; + /* startpos for collecting untranslatable chars */ + Py_ssize_t collstart; + Py_ssize_t collend; + Py_UCS4 ch; + + ch = PyUnicode_READ(kind, data, i); + translate = charmaptranslate_output(ch, mapping, &writer); + if (translate < 0) goto onError; - } - Py_XDECREF(x); - if (x!=Py_None) /* it worked => adjust input pointer */ + + if (translate != 0) { + /* it worked => adjust input pointer */ ++i; - else { /* untranslatable character */ - PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ - Py_ssize_t repsize; - Py_ssize_t newpos; - Py_ssize_t uni2; - /* startpos for collecting untranslatable chars */ - Py_ssize_t collstart = i; - Py_ssize_t collend = i+1; - Py_ssize_t coll; - - /* find all untranslatable characters */ - while (collend < size) { - if (charmaptranslate_lookup(PyUnicode_READ(kind,idata, collend), mapping, &x)) - goto onError; - Py_XDECREF(x); - if (x!=Py_None) - break; - ++collend; - } - /* cache callback name lookup - * (if not done yet, i.e. it's the first error) */ - if (known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - known_errorHandler = 4; - else - known_errorHandler = 0; - } - switch (known_errorHandler) { - case 1: /* strict */ - make_translate_exception(&exc, - input, collstart, collend, reason); - if (exc != NULL) - PyCodec_StrictErrors(exc); + continue; + } + + /* untranslatable character */ + collstart = i; + collend = i+1; + + /* find all untranslatable characters */ + while (collend < size) { + PyObject *x; + ch = PyUnicode_READ(kind, data, collend); + if (charmaptranslate_lookup(ch, mapping, &x)) goto onError; - case 2: /* replace */ - /* No need to check for space, this is a 1:1 replacement */ - for (coll = collstart; coll<collend; coll++) - output[opos++] = '?'; - /* fall through */ - case 3: /* ignore */ - i = collend; - break; - case 4: /* xmlcharrefreplace */ - /* generate replacement (temporarily (mis)uses i) */ - for (i = collstart; i < collend; ++i) { - char buffer[2+29+1+1]; - char *cp; - sprintf(buffer, "&#%d;", PyUnicode_READ(kind, idata, i)); - if (charmaptranslate_makespace(&output, &osize, - opos+strlen(buffer)+(size-collend))) - goto onError; - for (cp = buffer; *cp; ++cp) - output[opos++] = *cp; - } - i = collend; + Py_XDECREF(x); + if (x != Py_None) break; - default: - repunicode = unicode_translate_call_errorhandler(errors, &errorHandler, - reason, input, &exc, - collstart, collend, &newpos); - if (repunicode == NULL) - goto onError; - if (PyUnicode_READY(repunicode) == -1) { - Py_DECREF(repunicode); - goto onError; - } - /* generate replacement */ - repsize = PyUnicode_GET_LENGTH(repunicode); - if (charmaptranslate_makespace(&output, &osize, - opos+repsize+(size-collend))) { - Py_DECREF(repunicode); - goto onError; - } - for (uni2 = 0; repsize-->0; ++uni2) - output[opos++] = PyUnicode_READ_CHAR(repunicode, uni2); - i = newpos; + ++collend; + } + + if (ignore) { + i = collend; + } + else { + repunicode = unicode_translate_call_errorhandler(errors, &errorHandler, + reason, input, &exc, + collstart, collend, &newpos); + if (repunicode == NULL) + goto onError; + if (_PyUnicodeWriter_WriteStr(&writer, repunicode) < 0) { Py_DECREF(repunicode); + goto onError; } + Py_DECREF(repunicode); + i = newpos; } } - res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, opos); - if (!res) - goto onError; - PyMem_Free(output); Py_XDECREF(exc); Py_XDECREF(errorHandler); - return res; + return _PyUnicodeWriter_Finish(&writer); onError: - PyMem_Free(output); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(exc); Py_XDECREF(errorHandler); return NULL; @@ -8880,7 +8869,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, maxchar = 127; for (i = 0; i < length; i++) { - Py_UNICODE ch = s[i]; + Py_UCS4 ch = s[i]; if (ch > 127) { int decimal = Py_UNICODE_TODECIMAL(ch); if (decimal >= 0) @@ -8897,7 +8886,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, data = PyUnicode_DATA(decimal); /* Iterate over code points */ for (i = 0; i < length; i++) { - Py_UNICODE ch = s[i]; + Py_UCS4 ch = s[i]; if (ch > 127) { int decimal = Py_UNICODE_TODECIMAL(ch); if (decimal >= 0) @@ -10843,7 +10832,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) void *data = PyUnicode_DATA(uni); /* Compare Unicode string and source character set string */ for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++) - if (chr != str[i]) + if (chr != (unsigned char)str[i]) return (chr < (unsigned char)(str[i])) ? -1 : 1; /* This check keeps Python strings that end in '\0' from comparing equal to C strings identical up to that point. */ @@ -14051,24 +14040,14 @@ mainformatlong(PyObject *v, if (!PyNumber_Check(v)) goto wrongtype; - /* make sure number is a type of integer */ - /* if not, issue deprecation warning for now */ + /* make sure number is a type of integer for o, x, and X */ if (!PyLong_Check(v)) { if (type == 'o' || type == 'x' || type == 'X') { iobj = PyNumber_Index(v); if (iobj == NULL) { - PyErr_Clear(); - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "automatic int conversions have been deprecated", - 1)) { - return -1; - } - iobj = PyNumber_Long(v); - if (iobj == NULL ) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - goto wrongtype; - return -1; - } + if (PyErr_ExceptionMatches(PyExc_TypeError)) + goto wrongtype; + return -1; } } else { @@ -14129,10 +14108,23 @@ mainformatlong(PyObject *v, return 0; wrongtype: - PyErr_Format(PyExc_TypeError, - "%%%c format: a number is required, " - "not %.200s", - type, Py_TYPE(v)->tp_name); + switch(type) + { + case 'o': + case 'x': + case 'X': + PyErr_Format(PyExc_TypeError, + "%%%c format: an integer is required, " + "not %.200s", + type, Py_TYPE(v)->tp_name); + break; + default: + PyErr_Format(PyExc_TypeError, + "%%%c format: a number is required, " + "not %.200s", + type, Py_TYPE(v)->tp_name); + break; + } return -1; } @@ -14150,22 +14142,10 @@ formatchar(PyObject *v) PyObject *iobj; long x; /* make sure number is a type of integer */ - /* if not, issue deprecation warning for now */ if (!PyLong_Check(v)) { iobj = PyNumber_Index(v); if (iobj == NULL) { - PyErr_Clear(); - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "automatic int conversions have been deprecated", - 1)) { - return -1; - } - iobj = PyNumber_Long(v); - if (iobj == NULL ) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - goto onError; - return -1; - } + goto onError; } v = iobj; Py_DECREF(iobj); @@ -15419,7 +15399,7 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode) if (u == NULL) return NULL; /* Ensure we won't overflow the size. */ - if (len > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { + if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { PyErr_NoMemory(); return NULL; } |