summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-05-05 12:00:46 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-05-05 12:00:46 (GMT)
commitacaa5a16d6cd9a94e7e111761264eef14a033d2c (patch)
treec2c2156bc64012da26a254caebdaf1a35b96f706
parent1255ed62bfc2f9f8f5c50935c21cbe0a34e12cc7 (diff)
downloadcpython-acaa5a16d6cd9a94e7e111761264eef14a033d2c.zip
cpython-acaa5a16d6cd9a94e7e111761264eef14a033d2c.tar.gz
cpython-acaa5a16d6cd9a94e7e111761264eef14a033d2c.tar.bz2
Add PyUnicode_FromString(), which create a unicode object from a
const char * (i.e. 0-terminated latin-1 encoded bytes).
-rw-r--r--Doc/api/concrete.tex11
-rw-r--r--Include/unicodeobject.h8
-rw-r--r--Objects/unicodeobject.c45
3 files changed, 64 insertions, 0 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
index cdf6856..e1ab3ec 100644
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -995,6 +995,17 @@ use these APIs:
\var{u} is \NULL{}.
\end{cfuncdesc}
+\begin{cfuncdesc}{PyObject*}{PyUnicode_FromString}{const char *u}
+ Create a Unicode Object from the char buffer \var{u} of the.
+ \var{u} must be 0-terminated, the bytes will be interpreted as
+ being latin-1 encoded. \var{u} may also be \NULL{} which causes the
+ contents to be undefined. It is the user's responsibility to fill
+ in the needed data. The buffer is copied into the new object.
+ If the buffer is not \NULL{}, the return value might be a shared object.
+ Therefore, modification of the resulting Unicode object is only allowed
+ when \var{u} is \NULL{}.
+\end{cfuncdesc}
+
\begin{cfuncdesc}{Py_UNICODE*}{PyUnicode_AsUnicode}{PyObject *unicode}
Return a read-only pointer to the Unicode object's internal
\ctype{Py_UNICODE} buffer, \NULL{} if \var{unicode} is not a Unicode
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index c12cb96..9d0cabf 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -172,6 +172,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
+# define PyUnicode_FromString PyUnicodeUCS2_FromString
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
@@ -250,6 +251,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
+# define PyUnicode_FromString PyUnicodeUCS4_FromString
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
@@ -427,6 +429,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Py_ssize_t size /* size of buffer */
);
+/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
+ Latin-1 encoded bytes */
+PyAPI_FUNC(PyObject*) PyUnicode_FromString(
+ const char *u /* string */
+ );
+
/* Return a read-only pointer to the Unicode object's internal
Py_UNICODE buffer. */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 45c52cc..c9a922d 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -393,6 +393,51 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
return (PyObject *)unicode;
}
+PyObject *PyUnicode_FromString(const char *u)
+{
+ PyUnicodeObject *unicode;
+ Py_ssize_t size = strlen(u);
+
+ /* If the Unicode data is known at construction time, we can apply
+ some optimizations which share commonly used objects. */
+ if (u != NULL) {
+
+ /* Optimization for empty strings */
+ if (size == 0 && unicode_empty != NULL) {
+ Py_INCREF(unicode_empty);
+ return (PyObject *)unicode_empty;
+ }
+
+ /* Single character Unicode objects in the Latin-1 range are
+ shared when using this constructor */
+ if (size == 1 && *u < 256) {
+ unicode = unicode_latin1[*u];
+ if (!unicode) {
+ unicode = _PyUnicode_New(1);
+ if (!unicode)
+ return NULL;
+ unicode->str[0] = *u;
+ unicode_latin1[*u] = unicode;
+ }
+ Py_INCREF(unicode);
+ return (PyObject *)unicode;
+ }
+ }
+
+ unicode = _PyUnicode_New(size);
+ if (!unicode)
+ return NULL;
+
+ /* Copy the Unicode data into the new object */
+ if (u != NULL) {
+ char *p = unicode->str;
+ while (*p++ = *u++)
+ ;
+ }
+
+ return (PyObject *)unicode;
+}
+
#ifdef HAVE_WCHAR_H
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,