diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-05-05 12:00:46 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-05-05 12:00:46 (GMT) |
commit | acaa5a16d6cd9a94e7e111761264eef14a033d2c (patch) | |
tree | c2c2156bc64012da26a254caebdaf1a35b96f706 | |
parent | 1255ed62bfc2f9f8f5c50935c21cbe0a34e12cc7 (diff) | |
download | cpython-acaa5a16d6cd9a94e7e111761264eef14a033d2c.zip cpython-acaa5a16d6cd9a94e7e111761264eef14a033d2c.tar.gz cpython-acaa5a16d6cd9a94e7e111761264eef14a033d2c.tar.bz2 |
Add PyUnicode_FromString(), which create a unicode object from a
const char * (i.e. 0-terminated latin-1 encoded bytes).
-rw-r--r-- | Doc/api/concrete.tex | 11 | ||||
-rw-r--r-- | Include/unicodeobject.h | 8 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 45 |
3 files changed, 64 insertions, 0 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index cdf6856..e1ab3ec 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -995,6 +995,17 @@ use these APIs: \var{u} is \NULL{}. \end{cfuncdesc} +\begin{cfuncdesc}{PyObject*}{PyUnicode_FromString}{const char *u} + Create a Unicode Object from the char buffer \var{u} of the. + \var{u} must be 0-terminated, the bytes will be interpreted as + being latin-1 encoded. \var{u} may also be \NULL{} which causes the + contents to be undefined. It is the user's responsibility to fill + in the needed data. The buffer is copied into the new object. + If the buffer is not \NULL{}, the return value might be a shared object. + Therefore, modification of the resulting Unicode object is only allowed + when \var{u} is \NULL{}. +\end{cfuncdesc} + \begin{cfuncdesc}{Py_UNICODE*}{PyUnicode_AsUnicode}{PyObject *unicode} Return a read-only pointer to the Unicode object's internal \ctype{Py_UNICODE} buffer, \NULL{} if \var{unicode} is not a Unicode diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index c12cb96..9d0cabf 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -172,6 +172,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_FromObject PyUnicodeUCS2_FromObject # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode +# define PyUnicode_FromString PyUnicodeUCS2_FromString # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding # define PyUnicode_GetMax PyUnicodeUCS2_GetMax @@ -250,6 +251,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_FromObject PyUnicodeUCS4_FromObject # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode +# define PyUnicode_FromString PyUnicodeUCS4_FromString # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding # define PyUnicode_GetMax PyUnicodeUCS4_GetMax @@ -427,6 +429,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( Py_ssize_t size /* size of buffer */ ); +/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated + Latin-1 encoded bytes */ +PyAPI_FUNC(PyObject*) PyUnicode_FromString( + const char *u /* string */ + ); + /* Return a read-only pointer to the Unicode object's internal Py_UNICODE buffer. */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 45c52cc..c9a922d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -393,6 +393,51 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, return (PyObject *)unicode; } +PyObject *PyUnicode_FromString(const char *u) +{ + PyUnicodeObject *unicode; + Py_ssize_t size = strlen(u); + + /* If the Unicode data is known at construction time, we can apply + some optimizations which share commonly used objects. */ + if (u != NULL) { + + /* Optimization for empty strings */ + if (size == 0 && unicode_empty != NULL) { + Py_INCREF(unicode_empty); + return (PyObject *)unicode_empty; + } + + /* Single character Unicode objects in the Latin-1 range are + shared when using this constructor */ + if (size == 1 && *u < 256) { + unicode = unicode_latin1[*u]; + if (!unicode) { + unicode = _PyUnicode_New(1); + if (!unicode) + return NULL; + unicode->str[0] = *u; + unicode_latin1[*u] = unicode; + } + Py_INCREF(unicode); + return (PyObject *)unicode; + } + } + + unicode = _PyUnicode_New(size); + if (!unicode) + return NULL; + + /* Copy the Unicode data into the new object */ + if (u != NULL) { + char *p = unicode->str; + while (*p++ = *u++) + ; + } + + return (PyObject *)unicode; +} + #ifdef HAVE_WCHAR_H PyObject *PyUnicode_FromWideChar(register const wchar_t *w, |