diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-08-05 20:26:11 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-08-05 20:26:11 (GMT) |
commit | 9c121069d3a61868f4586ad2ba2e5435a82af061 (patch) | |
tree | 2b855fe92ed298ec849c14a4f01a9c0402a6fff7 /Objects/unicodeobject.c | |
parent | 64ce5052e1c2495bcbc78f732e8ece2f4c8375ac (diff) | |
download | cpython-9c121069d3a61868f4586ad2ba2e5435a82af061.zip cpython-9c121069d3a61868f4586ad2ba2e5435a82af061.tar.gz cpython-9c121069d3a61868f4586ad2ba2e5435a82af061.tar.bz2 |
Change PyUnicode_FromString[AndSize] to expect UTF-8.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 20 |
1 files changed, 8 insertions, 12 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d1b5747..27fedca 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -427,7 +427,9 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) { PyUnicodeObject *unicode; /* If the Unicode data is known at construction time, we can apply - some optimizations which share commonly used objects. */ + some optimizations which share commonly used objects. + Also, this means the input must be UTF-8, so fall back to the + UTF-8 decoder at the end. */ if (u != NULL) { /* Optimization for empty strings */ @@ -436,8 +438,9 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) return (PyObject *)unicode_empty; } - /* Single characters are shared when using this constructor */ - if (size == 1) { + /* Single characters are shared when using this constructor. + Restrict to ASCII, since the input must be UTF-8. */ + if (size == 1 && Py_CHARMASK(*u) < 128) { unicode = unicode_latin1[Py_CHARMASK(*u)]; if (!unicode) { unicode = _PyUnicode_New(1); @@ -449,21 +452,14 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) Py_INCREF(unicode); return (PyObject *)unicode; } + + return PyUnicode_DecodeUTF8(u, size, NULL); } unicode = _PyUnicode_New(size); if (!unicode) return NULL; - /* Copy the Unicode data into the new object */ - if (u != NULL) { - Py_UNICODE *p = unicode->str; - while (size--) - *p++ = Py_CHARMASK(*u++); - /* Don't need to write trailing 0 because - that's already done by _PyUnicode_New */ - } - return (PyObject *)unicode; } |