diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 859 |
1 files changed, 466 insertions, 393 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7a70a5e..4361908 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -142,16 +142,18 @@ const unsigned char _Py_ascii_whitespace[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; -static PyObject *unicode_encode_call_errorhandler(const char *errors, +static PyObject * +unicode_encode_call_errorhandler(const char *errors, PyObject **errorHandler,const char *encoding, const char *reason, const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos); -static void raise_encode_exception(PyObject **exceptionObject, - const char *encoding, - const Py_UNICODE *unicode, Py_ssize_t size, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason); +static void +raise_encode_exception(PyObject **exceptionObject, + const char *encoding, + const Py_UNICODE *unicode, Py_ssize_t size, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason); /* Same for linebreaks */ static unsigned char ascii_linebreak[] = { @@ -223,7 +225,8 @@ static BLOOM_MASK bloom_linebreak; ((ch) < 128U ? ascii_linebreak[(ch)] : \ (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch))) -Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len) +Py_LOCAL_INLINE(BLOOM_MASK) +make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len) { /* calculate simple bloom-style bitmask for a given unicode string */ @@ -237,7 +240,8 @@ Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len) return mask; } -Py_LOCAL_INLINE(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen) +Py_LOCAL_INLINE(int) +unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen) { Py_ssize_t i; @@ -253,9 +257,9 @@ Py_LOCAL_INLINE(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t /* --- Unicode Object ----------------------------------------------------- */ -static -int unicode_resize(register PyUnicodeObject *unicode, - Py_ssize_t length) +static int +unicode_resize(register PyUnicodeObject *unicode, + Py_ssize_t length) { void *oldstr; @@ -311,8 +315,8 @@ int unicode_resize(register PyUnicodeObject *unicode, */ -static -PyUnicodeObject *_PyUnicode_New(Py_ssize_t length) +static PyUnicodeObject * +_PyUnicode_New(Py_ssize_t length) { register PyUnicodeObject *unicode; @@ -383,8 +387,8 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length) return NULL; } -static -void unicode_dealloc(register PyUnicodeObject *unicode) +static void +unicode_dealloc(register PyUnicodeObject *unicode) { switch (PyUnicode_CHECK_INTERNED(unicode)) { case SSTATE_NOT_INTERNED: @@ -428,8 +432,8 @@ void unicode_dealloc(register PyUnicodeObject *unicode) } } -static -int _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length) +static int +_PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length) { register PyUnicodeObject *v; @@ -464,13 +468,14 @@ int _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length) return unicode_resize(v, length); } -int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) +int +PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) { return _PyUnicode_Resize((PyUnicodeObject **)unicode, length); } -PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, - Py_ssize_t size) +PyObject * +PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) { PyUnicodeObject *unicode; @@ -511,7 +516,8 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, return (PyObject *)unicode; } -PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) +PyObject * +PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) { PyUnicodeObject *unicode; @@ -558,7 +564,8 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) return (PyObject *)unicode; } -PyObject *PyUnicode_FromString(const char *u) +PyObject * +PyUnicode_FromString(const char *u) { size_t size = strlen(u); if (size > PY_SSIZE_T_MAX) { @@ -580,8 +587,8 @@ PyObject *PyUnicode_FromString(const char *u) /* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need to convert from UTF32 to UTF16. */ -PyObject *PyUnicode_FromWideChar(register const wchar_t *w, - Py_ssize_t size) +PyObject * +PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) { PyUnicodeObject *unicode; register Py_ssize_t i; @@ -631,8 +638,8 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w, #else -PyObject *PyUnicode_FromWideChar(register const wchar_t *w, - Py_ssize_t size) +PyObject * +PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) { PyUnicodeObject *unicode; @@ -707,6 +714,71 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, *fmt = '\0'; } +/* helper for PyUnicode_FromFormatV() */ + +static const char* +parse_format_flags(const char *f, + int *p_width, int *p_precision, + int *p_longflag, int *p_longlongflag, int *p_size_tflag) +{ + int width, precision, longflag, longlongflag, size_tflag; + + /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */ + f++; + width = 0; + while (Py_ISDIGIT((unsigned)*f)) + width = (width*10) + *f++ - '0'; + precision = 0; + if (*f == '.') { + f++; + while (Py_ISDIGIT((unsigned)*f)) + precision = (precision*10) + *f++ - '0'; + if (*f == '%') { + /* "%.3%s" => f points to "3" */ + f--; + } + } + if (*f == '\0') { + /* bogus format "%.1" => go backward, f points to "1" */ + f--; + } + if (p_width != NULL) + *p_width = width; + if (p_precision != NULL) + *p_precision = precision; + + /* Handle %ld, %lu, %lld and %llu. */ + longflag = 0; + longlongflag = 0; + size_tflag = 0; + + if (*f == 'l') { + if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') { + longflag = 1; + ++f; + } +#ifdef HAVE_LONG_LONG + else if (f[1] == 'l' && + (f[2] == 'd' || f[2] == 'u' || f[2] == 'i')) { + longlongflag = 1; + f += 2; + } +#endif + } + /* handle the size_t flag. */ + else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u' || f[1] == 'i')) { + size_tflag = 1; + ++f; + } + if (p_longflag != NULL) + *p_longflag = longflag; + if (p_longlongflag != NULL) + *p_longlongflag = longlongflag; + if (p_size_tflag != NULL) + *p_size_tflag = size_tflag; + return f; +} + #define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;} /* size of fixed-size buffer for formatting single arguments */ @@ -750,15 +822,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) * result in an array) */ for (f = format; *f; f++) { if (*f == '%') { - if (*(f+1)=='%') - continue; - if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V') - ++callcount; - while (Py_ISDIGIT((unsigned)*f)) - width = (width*10) + *f++ - '0'; - while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)) - ; - if (*f == 's') + /* skip width or width.precision (eg. "1.2" of "%1.2f") */ + f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL); + if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V') ++callcount; } else if (128 <= (unsigned char)*f) { @@ -783,33 +849,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) for (f = format; *f; f++) { if (*f == '%') { #ifdef HAVE_LONG_LONG - int longlongflag = 0; + int longlongflag; #endif - const char* p = f; - width = 0; - while (Py_ISDIGIT((unsigned)*f)) - width = (width*10) + *f++ - '0'; - while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)) - ; - - /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since - * they don't affect the amount of space we reserve. - */ - if (*f == 'l') { - if (f[1] == 'd' || f[1] == 'u') { - ++f; - } -#ifdef HAVE_LONG_LONG - else if (f[1] == 'l' && - (f[2] == 'd' || f[2] == 'u')) { - longlongflag = 1; - f += 2; - } -#endif - } - else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { - ++f; - } + const char* p; + + p = f; + f = parse_format_flags(f, &width, NULL, + NULL, &longlongflag, NULL); switch (*f) { case 'c': @@ -974,40 +1020,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) for (f = format; *f; f++) { if (*f == '%') { - const char* p = f++; - int longflag = 0; - int longlongflag = 0; - int size_tflag = 0; - zeropad = (*f == '0'); - /* parse the width.precision part */ - width = 0; - while (Py_ISDIGIT((unsigned)*f)) - width = (width*10) + *f++ - '0'; - precision = 0; - if (*f == '.') { - f++; - while (Py_ISDIGIT((unsigned)*f)) - precision = (precision*10) + *f++ - '0'; - } - /* Handle %ld, %lu, %lld and %llu. */ - if (*f == 'l') { - if (f[1] == 'd' || f[1] == 'u') { - longflag = 1; - ++f; - } -#ifdef HAVE_LONG_LONG - else if (f[1] == 'l' && - (f[2] == 'd' || f[2] == 'u')) { - longlongflag = 1; - f += 2; - } -#endif - } - /* handle the size_t flag. */ - if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { - size_tflag = 1; - ++f; - } + const char* p; + int longflag; + int longlongflag; + int size_tflag; + + p = f; + zeropad = (f[1] == '0'); + f = parse_format_flags(f, &width, &precision, + &longflag, &longlongflag, &size_tflag); switch (*f) { case 'c': @@ -1023,9 +1044,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) *s++ = ordinal; break; } + case 'i': case 'd': makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, - width, precision, 'd'); + width, precision, *f); if (longflag) sprintf(realbuffer, fmt, va_arg(vargs, long)); #ifdef HAVE_LONG_LONG @@ -1054,11 +1076,6 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) sprintf(realbuffer, fmt, va_arg(vargs, unsigned int)); appendstring(realbuffer); break; - case 'i': - makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'i'); - sprintf(realbuffer, fmt, va_arg(vargs, int)); - appendstring(realbuffer); - break; case 'x': makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x'); sprintf(realbuffer, fmt, va_arg(vargs, int)); @@ -1336,7 +1353,8 @@ PyUnicode_AsWideCharString(PyObject *unicode, #endif -PyObject *PyUnicode_FromOrdinal(int ordinal) +PyObject * +PyUnicode_FromOrdinal(int ordinal) { Py_UNICODE s[2]; @@ -1359,7 +1377,8 @@ PyObject *PyUnicode_FromOrdinal(int ordinal) return PyUnicode_FromUnicode(s, 1); } -PyObject *PyUnicode_FromObject(register PyObject *obj) +PyObject * +PyUnicode_FromObject(register PyObject *obj) { /* XXX Perhaps we should make this API an alias of PyObject_Str() instead ?! */ @@ -1379,9 +1398,10 @@ PyObject *PyUnicode_FromObject(register PyObject *obj) return NULL; } -PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, - const char *encoding, - const char *errors) +PyObject * +PyUnicode_FromEncodedObject(register PyObject *obj, + const char *encoding, + const char *errors) { Py_buffer buffer; PyObject *v; @@ -1464,23 +1484,26 @@ normalize_encoding(const char *encoding, return 1; } -PyObject *PyUnicode_Decode(const char *s, - Py_ssize_t size, - const char *encoding, - const char *errors) +PyObject * +PyUnicode_Decode(const char *s, + Py_ssize_t size, + const char *encoding, + const char *errors) { PyObject *buffer = NULL, *unicode; Py_buffer info; char lower[11]; /* Enough for any encoding shortcut */ if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); + return PyUnicode_DecodeUTF8(s, size, errors); /* Shortcuts for common default encodings */ if (normalize_encoding(encoding, lower, sizeof(lower))) { - if (strcmp(lower, "utf-8") == 0) + if ((strcmp(lower, "utf-8") == 0) || + (strcmp(lower, "utf8") == 0)) return PyUnicode_DecodeUTF8(s, size, errors); else if ((strcmp(lower, "latin-1") == 0) || + (strcmp(lower, "latin1") == 0) || (strcmp(lower, "iso-8859-1") == 0)) return PyUnicode_DecodeLatin1(s, size, errors); #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) @@ -1520,9 +1543,10 @@ PyObject *PyUnicode_Decode(const char *s, return NULL; } -PyObject *PyUnicode_AsDecodedObject(PyObject *unicode, - const char *encoding, - const char *errors) +PyObject * +PyUnicode_AsDecodedObject(PyObject *unicode, + const char *encoding, + const char *errors) { PyObject *v; @@ -1544,9 +1568,10 @@ PyObject *PyUnicode_AsDecodedObject(PyObject *unicode, return NULL; } -PyObject *PyUnicode_AsDecodedUnicode(PyObject *unicode, - const char *encoding, - const char *errors) +PyObject * +PyUnicode_AsDecodedUnicode(PyObject *unicode, + const char *encoding, + const char *errors) { PyObject *v; @@ -1575,10 +1600,11 @@ PyObject *PyUnicode_AsDecodedUnicode(PyObject *unicode, return NULL; } -PyObject *PyUnicode_Encode(const Py_UNICODE *s, - Py_ssize_t size, - const char *encoding, - const char *errors) +PyObject * +PyUnicode_Encode(const Py_UNICODE *s, + Py_ssize_t size, + const char *encoding, + const char *errors) { PyObject *v, *unicode; @@ -1590,9 +1616,10 @@ PyObject *PyUnicode_Encode(const Py_UNICODE *s, return v; } -PyObject *PyUnicode_AsEncodedObject(PyObject *unicode, - const char *encoding, - const char *errors) +PyObject * +PyUnicode_AsEncodedObject(PyObject *unicode, + const char *encoding, + const char *errors) { PyObject *v; @@ -1679,9 +1706,10 @@ PyUnicode_EncodeFSDefault(PyObject *unicode) #endif } -PyObject *PyUnicode_AsEncodedString(PyObject *unicode, - const char *encoding, - const char *errors) +PyObject * +PyUnicode_AsEncodedString(PyObject *unicode, + const char *encoding, + const char *errors) { PyObject *v; char lower[11]; /* Enough for any encoding shortcut */ @@ -1691,16 +1719,29 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, return NULL; } - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Shortcuts for common default encodings */ - if (normalize_encoding(encoding, lower, sizeof(lower))) { - if (strcmp(lower, "utf-8") == 0) + if (encoding == NULL) { + if (errors == NULL || strcmp(errors, "strict") == 0) + return PyUnicode_AsUTF8String(unicode); + else return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), errors); + } + + /* Shortcuts for common default encodings */ + if (normalize_encoding(encoding, lower, sizeof(lower))) { + if ((strcmp(lower, "utf-8") == 0) || + (strcmp(lower, "utf8") == 0)) + { + if (errors == NULL || strcmp(errors, "strict") == 0) + return PyUnicode_AsUTF8String(unicode); + else + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); + } else if ((strcmp(lower, "latin-1") == 0) || + (strcmp(lower, "latin1") == 0) || (strcmp(lower, "iso-8859-1") == 0)) return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), @@ -1716,21 +1757,6 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, PyUnicode_GET_SIZE(unicode), errors); } - /* During bootstrap, we may need to find the encodings - package, to load the file system encoding, and require the - file system encoding in order to load the encodings - package. - - Break out of this dependency by assuming that the path to - the encodings module is ASCII-only. XXX could try wcstombs - instead, if the file system encoding is the locale's - encoding. */ - if (Py_FileSystemDefaultEncoding && - strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 && - !PyThreadState_GET()->interp->codecs_initialized) - return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - errors); /* Encode via the codec registry */ v = PyCodec_Encode(unicode, encoding, errors); @@ -1766,9 +1792,10 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, return NULL; } -PyObject *PyUnicode_AsEncodedUnicode(PyObject *unicode, - const char *encoding, - const char *errors) +PyObject * +PyUnicode_AsEncodedUnicode(PyObject *unicode, + const char *encoding, + const char *errors) { PyObject *v; @@ -1797,14 +1824,12 @@ PyObject *PyUnicode_AsEncodedUnicode(PyObject *unicode, return NULL; } -PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, - const char *errors) +PyObject * +_PyUnicode_AsDefaultEncodedString(PyObject *unicode) { PyObject *v = ((PyUnicodeObject *)unicode)->defenc; if (v) return v; - if (errors != NULL) - Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString"); v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), NULL); @@ -1955,7 +1980,7 @@ _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) PyErr_BadArgument(); return NULL; } - bytes = _PyUnicode_AsDefaultEncodedString(unicode, NULL); + bytes = _PyUnicode_AsDefaultEncodedString(unicode); if (bytes == NULL) return NULL; if (psize != NULL) @@ -1969,7 +1994,8 @@ _PyUnicode_AsString(PyObject *unicode) return _PyUnicode_AsStringAndSize(unicode, NULL); } -Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode) +Py_UNICODE * +PyUnicode_AsUnicode(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); @@ -1981,7 +2007,8 @@ Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode) return NULL; } -Py_ssize_t PyUnicode_GetSize(PyObject *unicode) +Py_ssize_t +PyUnicode_GetSize(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); @@ -1993,7 +2020,8 @@ Py_ssize_t PyUnicode_GetSize(PyObject *unicode) return -1; } -const char *PyUnicode_GetDefaultEncoding(void) +const char * +PyUnicode_GetDefaultEncoding(void) { return "utf-8"; } @@ -2032,12 +2060,12 @@ onError: return 0 on success, -1 on error */ -static -int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, - const char *encoding, const char *reason, - const char **input, const char **inend, Py_ssize_t *startinpos, - Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, - PyUnicodeObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr) +static int +unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, + const char *encoding, const char *reason, + const char **input, const char **inend, Py_ssize_t *startinpos, + Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, + PyUnicodeObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr) { static char *argparse = "O!n;decoding error handler must return (str, int) tuple"; @@ -2207,9 +2235,10 @@ char utf7_category[128] = { (directWS && (utf7_category[(c)] == 2)) || \ (directO && (utf7_category[(c)] == 1)))) -PyObject *PyUnicode_DecodeUTF7(const char *s, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_DecodeUTF7(const char *s, + Py_ssize_t size, + const char *errors) { return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL); } @@ -2221,10 +2250,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, * all the shift state (seen bits, number of bits seen, high * surrogate). */ -PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, - Py_ssize_t size, - const char *errors, - Py_ssize_t *consumed) +PyObject * +PyUnicode_DecodeUTF7Stateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) { const char *starts = s; Py_ssize_t startinpos; @@ -2411,11 +2441,12 @@ utf7Error: } -PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, - Py_ssize_t size, - int base64SetO, - int base64WhiteSpace, - const char *errors) +PyObject * +PyUnicode_EncodeUTF7(const Py_UNICODE *s, + Py_ssize_t size, + int base64SetO, + int base64WhiteSpace, + const char *errors) { PyObject *v; /* It might be possible to tighten this worst case */ @@ -2536,9 +2567,10 @@ char utf8_code_length[256] = { 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-F4 + F5-FF */ }; -PyObject *PyUnicode_DecodeUTF8(const char *s, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_DecodeUTF8(const char *s, + Py_ssize_t size, + const char *errors) { return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); } @@ -2556,10 +2588,11 @@ PyObject *PyUnicode_DecodeUTF8(const char *s, # error C 'long' size should be either 4 or 8! #endif -PyObject *PyUnicode_DecodeUTF8Stateful(const char *s, - Py_ssize_t size, - const char *errors, - Py_ssize_t *consumed) +PyObject * +PyUnicode_DecodeUTF8Stateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) { const char *starts = s; int n; @@ -3065,15 +3098,19 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, #undef MAX_SHORT_UNICHARS } -PyObject *PyUnicode_AsUTF8String(PyObject *unicode) +PyObject * +PyUnicode_AsUTF8String(PyObject *unicode) { + PyObject *utf8; if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); return NULL; } - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - NULL); + utf8 = _PyUnicode_AsDefaultEncodedString(unicode); + if (utf8 == NULL) + return NULL; + Py_INCREF(utf8); + return utf8; } /* --- UTF-32 Codec ------------------------------------------------------- */ @@ -3339,7 +3376,8 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, #undef STORECHAR } -PyObject *PyUnicode_AsUTF32String(PyObject *unicode) +PyObject * +PyUnicode_AsUTF32String(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); @@ -3729,7 +3767,8 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, #undef STORECHAR } -PyObject *PyUnicode_AsUTF16String(PyObject *unicode) +PyObject * +PyUnicode_AsUTF16String(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); @@ -3745,9 +3784,10 @@ PyObject *PyUnicode_AsUTF16String(PyObject *unicode) static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; -PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors) { const char *starts = s; Py_ssize_t startinpos; @@ -4003,8 +4043,9 @@ Py_LOCAL_INLINE(const Py_UNICODE *) findchar(const Py_UNICODE *s, static const char *hexdigits = "0123456789abcdef"; -PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, - Py_ssize_t size) +PyObject * +PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, + Py_ssize_t size) { PyObject *repr; char *p; @@ -4144,7 +4185,8 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, return repr; } -PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode) +PyObject * +PyUnicode_AsUnicodeEscapeString(PyObject *unicode) { PyObject *s; if (!PyUnicode_Check(unicode)) { @@ -4158,9 +4200,10 @@ PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode) /* --- Raw Unicode Escape Codec ------------------------------------------- */ -PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_DecodeRawUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors) { const char *starts = s; Py_ssize_t startinpos; @@ -4275,8 +4318,9 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, return NULL; } -PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, - Py_ssize_t size) +PyObject * +PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, + Py_ssize_t size) { PyObject *repr; char *p; @@ -4363,7 +4407,8 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, return repr; } -PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) +PyObject * +PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) { PyObject *s; if (!PyUnicode_Check(unicode)) { @@ -4378,9 +4423,10 @@ PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) /* --- Unicode Internal Codec ------------------------------------------- */ -PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, - Py_ssize_t size, - const char *errors) +PyObject * +_PyUnicode_DecodeUnicodeInternal(const char *s, + Py_ssize_t size, + const char *errors) { const char *starts = s; Py_ssize_t startinpos; @@ -4456,9 +4502,10 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, /* --- Latin-1 Codec ------------------------------------------------------ */ -PyObject *PyUnicode_DecodeLatin1(const char *s, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_DecodeLatin1(const char *s, + Py_ssize_t size, + const char *errors) { PyUnicodeObject *v; Py_UNICODE *p; @@ -4498,11 +4545,12 @@ PyObject *PyUnicode_DecodeLatin1(const char *s, } /* create or adjust a UnicodeEncodeError */ -static void make_encode_exception(PyObject **exceptionObject, - const char *encoding, - const Py_UNICODE *unicode, Py_ssize_t size, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) +static void +make_encode_exception(PyObject **exceptionObject, + const char *encoding, + const Py_UNICODE *unicode, Py_ssize_t size, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason) { if (*exceptionObject == NULL) { *exceptionObject = PyUnicodeEncodeError_Create( @@ -4523,11 +4571,12 @@ static void make_encode_exception(PyObject **exceptionObject, } /* raises a UnicodeEncodeError */ -static void raise_encode_exception(PyObject **exceptionObject, - const char *encoding, - const Py_UNICODE *unicode, Py_ssize_t size, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) +static void +raise_encode_exception(PyObject **exceptionObject, + const char *encoding, + const Py_UNICODE *unicode, Py_ssize_t size, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason) { make_encode_exception(exceptionObject, encoding, unicode, size, startpos, endpos, reason); @@ -4539,12 +4588,13 @@ static void raise_encode_exception(PyObject **exceptionObject, build arguments, call the callback and check the arguments, put the result into newpos and return the replacement string, which has to be freed by the caller */ -static PyObject *unicode_encode_call_errorhandler(const char *errors, - PyObject **errorHandler, - const char *encoding, const char *reason, - const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, - Py_ssize_t startpos, Py_ssize_t endpos, - Py_ssize_t *newpos) +static PyObject * +unicode_encode_call_errorhandler(const char *errors, + PyObject **errorHandler, + const char *encoding, const char *reason, + const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, + Py_ssize_t startpos, Py_ssize_t endpos, + Py_ssize_t *newpos) { static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple"; @@ -4593,10 +4643,11 @@ static PyObject *unicode_encode_call_errorhandler(const char *errors, return resunicode; } -static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors, - int limit) +static PyObject * +unicode_encode_ucs1(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors, + int limit) { /* output object */ PyObject *res; @@ -4789,14 +4840,16 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, return NULL; } -PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_EncodeLatin1(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) { return unicode_encode_ucs1(p, size, errors, 256); } -PyObject *PyUnicode_AsLatin1String(PyObject *unicode) +PyObject * +PyUnicode_AsLatin1String(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); @@ -4809,9 +4862,10 @@ PyObject *PyUnicode_AsLatin1String(PyObject *unicode) /* --- 7-bit ASCII Codec -------------------------------------------------- */ -PyObject *PyUnicode_DecodeASCII(const char *s, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_DecodeASCII(const char *s, + Py_ssize_t size, + const char *errors) { const char *starts = s; PyUnicodeObject *v; @@ -4868,14 +4922,16 @@ PyObject *PyUnicode_DecodeASCII(const char *s, return NULL; } -PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_EncodeASCII(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) { return unicode_encode_ucs1(p, size, errors, 128); } -PyObject *PyUnicode_AsASCIIString(PyObject *unicode) +PyObject * +PyUnicode_AsASCIIString(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); @@ -4899,7 +4955,8 @@ PyObject *PyUnicode_AsASCIIString(PyObject *unicode) b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte encodings, see IsDBCSLeadByteEx documentation. */ -static int is_dbcs_lead_byte(const char *s, int offset) +static int +is_dbcs_lead_byte(const char *s, int offset) { const char *curr = s + offset; @@ -4914,11 +4971,12 @@ static int is_dbcs_lead_byte(const char *s, int offset) * Decode MBCS string into unicode object. If 'final' is set, converts * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise. */ -static int decode_mbcs(PyUnicodeObject **v, - const char *s, /* MBCS string */ - int size, /* sizeof MBCS string */ - int final, - const char *errors) +static int +decode_mbcs(PyUnicodeObject **v, + const char *s, /* MBCS string */ + int size, /* sizeof MBCS string */ + int final, + const char *errors) { Py_UNICODE *p; Py_ssize_t n; @@ -4997,10 +5055,11 @@ mbcs_decode_error: return -1; } -PyObject *PyUnicode_DecodeMBCSStateful(const char *s, - Py_ssize_t size, - const char *errors, - Py_ssize_t *consumed) +PyObject * +PyUnicode_DecodeMBCSStateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) { PyUnicodeObject *v = NULL; int done; @@ -5035,9 +5094,10 @@ PyObject *PyUnicode_DecodeMBCSStateful(const char *s, return (PyObject *)v; } -PyObject *PyUnicode_DecodeMBCS(const char *s, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_DecodeMBCS(const char *s, + Py_ssize_t size, + const char *errors) { return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL); } @@ -5046,10 +5106,11 @@ PyObject *PyUnicode_DecodeMBCS(const char *s, * Convert unicode into string object (MBCS). * Returns 0 if succeed, -1 otherwise. */ -static int encode_mbcs(PyObject **repr, - const Py_UNICODE *p, /* unicode */ - int size, /* size of unicode */ - const char* errors) +static int +encode_mbcs(PyObject **repr, + const Py_UNICODE *p, /* unicode */ + int size, /* size of unicode */ + const char* errors) { BOOL usedDefaultChar = FALSE; BOOL *pusedDefaultChar; @@ -5122,9 +5183,10 @@ mbcs_encode_error: return -1; } -PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) +PyObject * +PyUnicode_EncodeMBCS(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) { PyObject *repr = NULL; int ret; @@ -5153,7 +5215,8 @@ PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p, return repr; } -PyObject *PyUnicode_AsMBCSString(PyObject *unicode) +PyObject * +PyUnicode_AsMBCSString(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); @@ -5170,10 +5233,11 @@ PyObject *PyUnicode_AsMBCSString(PyObject *unicode) /* --- Character Mapping Codec -------------------------------------------- */ -PyObject *PyUnicode_DecodeCharmap(const char *s, - Py_ssize_t size, - PyObject *mapping, - const char *errors) +PyObject * +PyUnicode_DecodeCharmap(const char *s, + Py_ssize_t size, + PyObject *mapping, + const char *errors) { const char *starts = s; Py_ssize_t startinpos; @@ -5333,7 +5397,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, /* Charmap encoding: the lookup table */ -struct encoding_map{ +struct encoding_map { PyObject_HEAD unsigned char level1[32]; int count2, count3; @@ -5460,7 +5524,6 @@ PyUnicode_BuildEncodingMap(PyObject* string) if (!result) return NULL; for (i = 0; i < 256; i++) { - key = value = NULL; key = PyLong_FromLong(decode[i]); value = PyLong_FromLong(i); if (!key || !value) @@ -5548,7 +5611,8 @@ encoding_map_lookup(Py_UNICODE c, PyObject *mapping) /* Lookup the character ch in the mapping. If the character can't be found, Py_None is returned (or NULL, if another error occurred). */ -static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping) +static PyObject * +charmapencode_lookup(Py_UNICODE c, PyObject *mapping) { PyObject *w = PyLong_FromLong((long)c); PyObject *x; @@ -5605,16 +5669,16 @@ charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requireds typedef enum charmapencode_result { enc_SUCCESS, enc_FAILED, enc_EXCEPTION -}charmapencode_result; +} charmapencode_result; /* lookup the character, put the result in the output string and adjust various state variables. Resize the output bytes object if not enough space is available. Return a new reference to the object that was put in the output buffer, or Py_None, if the mapping was undefined (in which case no character was written) or NULL, if a reallocation error occurred. The caller must decref the result */ -static -charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping, - PyObject **outobj, Py_ssize_t *outpos) +static charmapencode_result +charmapencode_output(Py_UNICODE c, PyObject *mapping, + PyObject **outobj, Py_ssize_t *outpos) { PyObject *rep; char *outstart; @@ -5670,8 +5734,8 @@ charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping, /* handle an error in PyUnicode_EncodeCharmap Return 0 on success, -1 on error */ -static -int charmap_encoding_error( +static int +charmap_encoding_error( const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping, PyObject **exceptionObject, int *known_errorHandler, PyObject **errorHandler, const char *errors, @@ -5805,10 +5869,11 @@ int charmap_encoding_error( return 0; } -PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, - Py_ssize_t size, - PyObject *mapping, - const char *errors) +PyObject * +PyUnicode_EncodeCharmap(const Py_UNICODE *p, + Py_ssize_t size, + PyObject *mapping, + const char *errors) { /* output object */ PyObject *res = NULL; @@ -5869,8 +5934,9 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, return NULL; } -PyObject *PyUnicode_AsCharmapString(PyObject *unicode, - PyObject *mapping) +PyObject * +PyUnicode_AsCharmapString(PyObject *unicode, + PyObject *mapping) { if (!PyUnicode_Check(unicode) || mapping == NULL) { PyErr_BadArgument(); @@ -5883,10 +5949,11 @@ PyObject *PyUnicode_AsCharmapString(PyObject *unicode, } /* create or adjust a UnicodeTranslateError */ -static void make_translate_exception(PyObject **exceptionObject, - const Py_UNICODE *unicode, Py_ssize_t size, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) +static void +make_translate_exception(PyObject **exceptionObject, + const Py_UNICODE *unicode, Py_ssize_t size, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason) { if (*exceptionObject == NULL) { *exceptionObject = PyUnicodeTranslateError_Create( @@ -5907,10 +5974,11 @@ static void make_translate_exception(PyObject **exceptionObject, } /* raises a UnicodeTranslateError */ -static void raise_translate_exception(PyObject **exceptionObject, - const Py_UNICODE *unicode, Py_ssize_t size, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) +static void +raise_translate_exception(PyObject **exceptionObject, + const Py_UNICODE *unicode, Py_ssize_t size, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason) { make_translate_exception(exceptionObject, unicode, size, startpos, endpos, reason); @@ -5922,12 +5990,13 @@ static void raise_translate_exception(PyObject **exceptionObject, build arguments, call the callback and check the arguments, put the result into newpos and return the replacement string, which has to be freed by the caller */ -static PyObject *unicode_translate_call_errorhandler(const char *errors, - PyObject **errorHandler, - const char *reason, - const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, - Py_ssize_t startpos, Py_ssize_t endpos, - Py_ssize_t *newpos) +static PyObject * +unicode_translate_call_errorhandler(const char *errors, + PyObject **errorHandler, + const char *reason, + const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, + Py_ssize_t startpos, Py_ssize_t endpos, + Py_ssize_t *newpos) { static char *argparse = "O!n;translating error handler must return (str, int) tuple"; @@ -5977,8 +6046,8 @@ static PyObject *unicode_translate_call_errorhandler(const char *errors, /* Lookup the character ch in the mapping and put the result in result, which must be decrefed by the caller. Return 0 on success, -1 on error */ -static -int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result) +static int +charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result) { PyObject *w = PyLong_FromLong((long)c); PyObject *x; @@ -6027,8 +6096,8 @@ int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result) /* ensure that *outobj is at least requiredsize characters long, if not reallocate and adjust various state variables. Return 0 on success, -1 on error */ -static -int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, +static int +charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, Py_ssize_t requiredsize) { Py_ssize_t oldsize = PyUnicode_GET_SIZE(*outobj); @@ -6050,10 +6119,10 @@ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, undefined (in which case no character was written). The called must decref result. Return 0 on success, -1 on error. */ -static -int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp, - Py_ssize_t insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp, - PyObject **res) +static int +charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp, + Py_ssize_t insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp, + PyObject **res) { if (charmaptranslate_lookup(*curinp, mapping, res)) return -1; @@ -6089,10 +6158,11 @@ int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp return 0; } -PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, - Py_ssize_t size, - PyObject *mapping, - const char *errors) +PyObject * +PyUnicode_TranslateCharmap(const Py_UNICODE *p, + Py_ssize_t size, + PyObject *mapping, + const char *errors) { /* output object */ PyObject *res = NULL; @@ -6231,9 +6301,10 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, return NULL; } -PyObject *PyUnicode_Translate(PyObject *str, - PyObject *mapping, - const char *errors) +PyObject * +PyUnicode_Translate(PyObject *str, + PyObject *mapping, + const char *errors) { PyObject *result; @@ -6278,10 +6349,11 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, } /* --- Decimal Encoder ---------------------------------------------------- */ -int PyUnicode_EncodeDecimal(Py_UNICODE *s, - Py_ssize_t length, - char *output, - const char *errors) +int +PyUnicode_EncodeDecimal(Py_UNICODE *s, + Py_ssize_t length, + char *output, + const char *errors) { Py_UNICODE *p, *end; PyObject *errorHandler = NULL; @@ -6442,10 +6514,11 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, start = 0; \ } -Py_ssize_t PyUnicode_Count(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end) +Py_ssize_t +PyUnicode_Count(PyObject *str, + PyObject *substr, + Py_ssize_t start, + Py_ssize_t end) { Py_ssize_t result; PyUnicodeObject* str_obj; @@ -6472,11 +6545,12 @@ Py_ssize_t PyUnicode_Count(PyObject *str, return result; } -Py_ssize_t PyUnicode_Find(PyObject *str, - PyObject *sub, - Py_ssize_t start, - Py_ssize_t end, - int direction) +Py_ssize_t +PyUnicode_Find(PyObject *str, + PyObject *sub, + Py_ssize_t start, + Py_ssize_t end, + int direction) { Py_ssize_t result; @@ -6508,12 +6582,12 @@ Py_ssize_t PyUnicode_Find(PyObject *str, return result; } -static -int tailmatch(PyUnicodeObject *self, - PyUnicodeObject *substring, - Py_ssize_t start, - Py_ssize_t end, - int direction) +static int +tailmatch(PyUnicodeObject *self, + PyUnicodeObject *substring, + Py_ssize_t start, + Py_ssize_t end, + int direction) { if (substring->length == 0) return 1; @@ -6534,11 +6608,12 @@ int tailmatch(PyUnicodeObject *self, return 0; } -Py_ssize_t PyUnicode_Tailmatch(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end, - int direction) +Py_ssize_t +PyUnicode_Tailmatch(PyObject *str, + PyObject *substr, + Py_ssize_t start, + Py_ssize_t end, + int direction) { Py_ssize_t result; @@ -6562,9 +6637,9 @@ Py_ssize_t PyUnicode_Tailmatch(PyObject *str, /* Apply fixfct filter to the Unicode object self and return a reference to the modified object */ -static -PyObject *fixup(PyUnicodeObject *self, - int (*fixfct)(PyUnicodeObject *s)) +static PyObject * +fixup(PyUnicodeObject *self, + int (*fixfct)(PyUnicodeObject *s)) { PyUnicodeObject *u; @@ -6586,8 +6661,8 @@ PyObject *fixup(PyUnicodeObject *self, return (PyObject*) u; } -static -int fixupper(PyUnicodeObject *self) +static int +fixupper(PyUnicodeObject *self) { Py_ssize_t len = self->length; Py_UNICODE *s = self->str; @@ -6607,8 +6682,8 @@ int fixupper(PyUnicodeObject *self) return status; } -static -int fixlower(PyUnicodeObject *self) +static int +fixlower(PyUnicodeObject *self) { Py_ssize_t len = self->length; Py_UNICODE *s = self->str; @@ -6628,8 +6703,8 @@ int fixlower(PyUnicodeObject *self) return status; } -static -int fixswapcase(PyUnicodeObject *self) +static int +fixswapcase(PyUnicodeObject *self) { Py_ssize_t len = self->length; Py_UNICODE *s = self->str; @@ -6649,8 +6724,8 @@ int fixswapcase(PyUnicodeObject *self) return status; } -static -int fixcapitalize(PyUnicodeObject *self) +static int +fixcapitalize(PyUnicodeObject *self) { Py_ssize_t len = self->length; Py_UNICODE *s = self->str; @@ -6673,8 +6748,8 @@ int fixcapitalize(PyUnicodeObject *self) return status; } -static -int fixtitle(PyUnicodeObject *self) +static int +fixtitle(PyUnicodeObject *self) { register Py_UNICODE *p = PyUnicode_AS_UNICODE(self); register Py_UNICODE *e; @@ -6824,11 +6899,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) return NULL; } -static -PyUnicodeObject *pad(PyUnicodeObject *self, - Py_ssize_t left, - Py_ssize_t right, - Py_UNICODE fill) +static PyUnicodeObject * +pad(PyUnicodeObject *self, + Py_ssize_t left, + Py_ssize_t right, + Py_UNICODE fill) { PyUnicodeObject *u; @@ -6859,7 +6934,8 @@ PyUnicodeObject *pad(PyUnicodeObject *self, return u; } -PyObject *PyUnicode_Splitlines(PyObject *string, int keepends) +PyObject * +PyUnicode_Splitlines(PyObject *string, int keepends) { PyObject *list; @@ -6875,10 +6951,10 @@ PyObject *PyUnicode_Splitlines(PyObject *string, int keepends) return list; } -static -PyObject *split(PyUnicodeObject *self, - PyUnicodeObject *substring, - Py_ssize_t maxcount) +static PyObject * +split(PyUnicodeObject *self, + PyUnicodeObject *substring, + Py_ssize_t maxcount) { if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; @@ -6895,10 +6971,10 @@ PyObject *split(PyUnicodeObject *self, ); } -static -PyObject *rsplit(PyUnicodeObject *self, - PyUnicodeObject *substring, - Py_ssize_t maxcount) +static PyObject * +rsplit(PyUnicodeObject *self, + PyUnicodeObject *substring, + Py_ssize_t maxcount) { if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; @@ -6915,11 +6991,11 @@ PyObject *rsplit(PyUnicodeObject *self, ); } -static -PyObject *replace(PyUnicodeObject *self, - PyUnicodeObject *str1, - PyUnicodeObject *str2, - Py_ssize_t maxcount) +static PyObject * +replace(PyUnicodeObject *self, + PyUnicodeObject *str1, + PyUnicodeObject *str2, + Py_ssize_t maxcount) { PyUnicodeObject *u; @@ -6977,7 +7053,7 @@ PyObject *replace(PyUnicodeObject *self, } } else { - Py_ssize_t n, i, j, e; + Py_ssize_t n, i, j; Py_ssize_t product, new_size, delta; Py_UNICODE *p; @@ -7009,7 +7085,6 @@ PyObject *replace(PyUnicodeObject *self, return NULL; i = 0; p = u->str; - e = self->length - str1->length; if (str1->length > 0) { while (n-- > 0) { /* look for next match */ @@ -7254,8 +7329,8 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) #endif -int PyUnicode_Compare(PyObject *left, - PyObject *right) +int +PyUnicode_Compare(PyObject *left, PyObject *right) { if (PyUnicode_Check(left) && PyUnicode_Check(right)) return unicode_compare((PyUnicodeObject *)left, @@ -7291,16 +7366,14 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) #define TEST_COND(cond) \ ((cond) ? Py_True : Py_False) -PyObject *PyUnicode_RichCompare(PyObject *left, - PyObject *right, - int op) +PyObject * +PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) { int result; if (PyUnicode_Check(left) && PyUnicode_Check(right)) { PyObject *v; - if (((PyUnicodeObject *) left)->length != - ((PyUnicodeObject *) right)->length) { + if (PyUnicode_GET_SIZE(left) != PyUnicode_GET_SIZE(right)) { if (op == Py_EQ) { Py_INCREF(Py_False); return Py_False; @@ -7348,8 +7421,8 @@ PyObject *PyUnicode_RichCompare(PyObject *left, return Py_NotImplemented; } -int PyUnicode_Contains(PyObject *container, - PyObject *element) +int +PyUnicode_Contains(PyObject *container, PyObject *element) { PyObject *str, *sub; int result; @@ -7379,8 +7452,8 @@ int PyUnicode_Contains(PyObject *container, /* Concat to string or Unicode object giving a new Unicode object. */ -PyObject *PyUnicode_Concat(PyObject *left, - PyObject *right) +PyObject * +PyUnicode_Concat(PyObject *left, PyObject *right) { PyUnicodeObject *u = NULL, *v = NULL, *w; @@ -8293,10 +8366,11 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len) return (PyObject*) u; } -PyObject *PyUnicode_Replace(PyObject *obj, - PyObject *subobj, - PyObject *replobj, - Py_ssize_t maxcount) +PyObject * +PyUnicode_Replace(PyObject *obj, + PyObject *subobj, + PyObject *replobj, + Py_ssize_t maxcount) { PyObject *self; PyObject *str1; @@ -8360,8 +8434,8 @@ unicode_replace(PyUnicodeObject *self, PyObject *args) return result; } -static -PyObject *unicode_repr(PyObject *unicode) +static PyObject * +unicode_repr(PyObject *unicode) { PyObject *repr; Py_UNICODE *p; @@ -8596,9 +8670,8 @@ unicode_rjust(PyUnicodeObject *self, PyObject *args) return (PyObject*) pad(self, width - self->length, 0, fillchar); } -PyObject *PyUnicode_Split(PyObject *s, - PyObject *sep, - Py_ssize_t maxsplit) +PyObject * +PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) { PyObject *result; @@ -8727,9 +8800,8 @@ unicode_rpartition(PyUnicodeObject *self, PyObject *separator) return PyUnicode_RPartition((PyObject *)self, separator); } -PyObject *PyUnicode_RSplit(PyObject *s, - PyObject *sep, - Py_ssize_t maxsplit) +PyObject * +PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) { PyObject *result; @@ -9426,8 +9498,8 @@ formatchar(Py_UNICODE *buf, */ #define FORMATBUFLEN (size_t)10 -PyObject *PyUnicode_Format(PyObject *format, - PyObject *args) +PyObject * +PyUnicode_Format(PyObject *format, PyObject *args) { Py_UNICODE *fmt, *res; Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx; @@ -10118,7 +10190,8 @@ PyUnicode_InternFromString(const char *cp) return s; } -void _Py_ReleaseInternedUnicodeStrings(void) +void +_Py_ReleaseInternedUnicodeStrings(void) { PyObject *keys; PyUnicodeObject *s; |