- SF #962502: Add two more methods for unicode type; width() and

iswide() for east asian width manipulation. (Inspired by David Goodger, Reviewed by Martin v. Loewis) - Move _PyUnicode_TypeRecord.flags to the end of the struct so that no padding is added for UCS-4 builds. (Suggested by Martin v. Loewis)
author: Hye-Shik Chang <hyeshik@gmail.com> 2004-06-02 16:49:17 (GMT)
committer: Hye-Shik Chang <hyeshik@gmail.com> 2004-06-02 16:49:17 (GMT)
commit: 974ed7cfa50b666c9ab91f7a3f8f26049d387107 (patch)
tree: d821c74c26231d988f34764d0fdfe3494036ee95 /Objects/unicodeobject.c
parent: b6568b91fdf7de1377dba395c6725a7307b818ee (diff)
download: cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.zip
cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.tar.gz
cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.tar.bz2
1 files changed, 68 insertions, 1 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ada01fc..0858525 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -655,6 +655,27 @@ int PyUnicode_GetSize(PyObject *unicode)
     return -1;
 }
 
+int PyUnicode_GetWidth(PyObject *unicode)
+{
+    const Py_UNICODE *p, *e;
+    int width;
+
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return -1;
+    }
+
+    p = PyUnicode_AS_UNICODE(unicode);
+    e = p + PyUnicode_GET_SIZE(unicode);
+    for (width = 0; p < e; p++)
+	if (Py_UNICODE_ISWIDE(*p))
+	    width += 2;
+	else
+	    width++;
+
+    return width;
+}
+
 const char *PyUnicode_GetDefaultEncoding(void)
 {
     return unicode_default_encoding;
@@ -5316,6 +5337,35 @@ unicode_isnumeric(PyUnicodeObject *self)
     return PyBool_FromLong(1);
 }
 
+PyDoc_STRVAR(iswide__doc__,
+"S.iswide() -> bool\n\
+\n\
+Return True if all characters in S are wide width\n\
+and there is at least one character in S, False otherwise.");
+
+static PyObject*
+unicode_iswide(PyUnicodeObject *self)
+{
+    register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self);
+    register const Py_UNICODE *e;
+
+    /* Shortcut for single character strings */
+    if (PyUnicode_GET_SIZE(self) == 1 &&
+	Py_UNICODE_ISWIDE(*p))
+	Py_RETURN_TRUE;
+
+    /* Special case for empty strings */
+    if (PyString_GET_SIZE(self) == 0)
+	Py_RETURN_FALSE;
+
+    e = p + PyUnicode_GET_SIZE(self);
+    for (; p < e; p++) {
+	if (!Py_UNICODE_ISWIDE(*p))
+	    Py_RETURN_FALSE;
+    }
+    Py_RETURN_TRUE;
+}
+
 PyDoc_STRVAR(join__doc__,
 "S.join(sequence) -> unicode\n\
 \n\
@@ -5335,7 +5385,7 @@ unicode_length(PyUnicodeObject *self)
 }
 
 PyDoc_STRVAR(ljust__doc__,
-"S.ljust(width[, fillchar]) -> unicode\n\
+"S.ljust(width[, fillchar]) -> int\n\
 \n\
 Return S left justified in a Unicode string of length width. Padding is\n\
 done using the specified fill character (default is a space).");
@@ -5927,6 +5977,21 @@ unicode_upper(PyUnicodeObject *self)
     return fixup(self, fixupper);
 }
 
+PyDoc_STRVAR(width__doc__,
+"S.width() -> unicode\n\
+\n\
+Return a fixed-width representation length of S.");
+
+static PyObject*
+unicode_width(PyObject *self)
+{
+    int width = PyUnicode_GetWidth(self);
+    if (width == -1)
+	return NULL;
+    else
+	return PyInt_FromLong((long)width);
+}
+
 PyDoc_STRVAR(zfill__doc__,
 "S.zfill(width) -> unicode\n\
 \n\
@@ -6090,6 +6155,8 @@ static PyMethodDef unicode_methods[] = {
     {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__},
     {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
     {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
+    {"iswide", (PyCFunction) unicode_iswide, METH_NOARGS, iswide__doc__},
+    {"width", (PyCFunction) unicode_width, METH_NOARGS, width__doc__},
     {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
 #if 0
     {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
author	Hye-Shik Chang <hyeshik@gmail.com>	2004-06-02 16:49:17 (GMT)
committer	Hye-Shik Chang <hyeshik@gmail.com>	2004-06-02 16:49:17 (GMT)
commit	974ed7cfa50b666c9ab91f7a3f8f26049d387107 (patch)
tree	d821c74c26231d988f34764d0fdfe3494036ee95 /Objects/unicodeobject.c
parent	b6568b91fdf7de1377dba395c6725a7307b818ee (diff)
download	cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.zip cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.tar.gz cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.tar.bz2