summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorHye-Shik Chang <hyeshik@gmail.com>2004-06-02 16:49:17 (GMT)
committerHye-Shik Chang <hyeshik@gmail.com>2004-06-02 16:49:17 (GMT)
commit974ed7cfa50b666c9ab91f7a3f8f26049d387107 (patch)
treed821c74c26231d988f34764d0fdfe3494036ee95 /Objects/unicodeobject.c
parentb6568b91fdf7de1377dba395c6725a7307b818ee (diff)
downloadcpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.zip
cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.tar.gz
cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.tar.bz2
- SF #962502: Add two more methods for unicode type; width() and
iswide() for east asian width manipulation. (Inspired by David Goodger, Reviewed by Martin v. Loewis) - Move _PyUnicode_TypeRecord.flags to the end of the struct so that no padding is added for UCS-4 builds. (Suggested by Martin v. Loewis)
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c69
1 files changed, 68 insertions, 1 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ada01fc..0858525 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -655,6 +655,27 @@ int PyUnicode_GetSize(PyObject *unicode)
return -1;
}
+int PyUnicode_GetWidth(PyObject *unicode)
+{
+ const Py_UNICODE *p, *e;
+ int width;
+
+ if (!PyUnicode_Check(unicode)) {
+ PyErr_BadArgument();
+ return -1;
+ }
+
+ p = PyUnicode_AS_UNICODE(unicode);
+ e = p + PyUnicode_GET_SIZE(unicode);
+ for (width = 0; p < e; p++)
+ if (Py_UNICODE_ISWIDE(*p))
+ width += 2;
+ else
+ width++;
+
+ return width;
+}
+
const char *PyUnicode_GetDefaultEncoding(void)
{
return unicode_default_encoding;
@@ -5316,6 +5337,35 @@ unicode_isnumeric(PyUnicodeObject *self)
return PyBool_FromLong(1);
}
+PyDoc_STRVAR(iswide__doc__,
+"S.iswide() -> bool\n\
+\n\
+Return True if all characters in S are wide width\n\
+and there is at least one character in S, False otherwise.");
+
+static PyObject*
+unicode_iswide(PyUnicodeObject *self)
+{
+ register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self);
+ register const Py_UNICODE *e;
+
+ /* Shortcut for single character strings */
+ if (PyUnicode_GET_SIZE(self) == 1 &&
+ Py_UNICODE_ISWIDE(*p))
+ Py_RETURN_TRUE;
+
+ /* Special case for empty strings */
+ if (PyString_GET_SIZE(self) == 0)
+ Py_RETURN_FALSE;
+
+ e = p + PyUnicode_GET_SIZE(self);
+ for (; p < e; p++) {
+ if (!Py_UNICODE_ISWIDE(*p))
+ Py_RETURN_FALSE;
+ }
+ Py_RETURN_TRUE;
+}
+
PyDoc_STRVAR(join__doc__,
"S.join(sequence) -> unicode\n\
\n\
@@ -5335,7 +5385,7 @@ unicode_length(PyUnicodeObject *self)
}
PyDoc_STRVAR(ljust__doc__,
-"S.ljust(width[, fillchar]) -> unicode\n\
+"S.ljust(width[, fillchar]) -> int\n\
\n\
Return S left justified in a Unicode string of length width. Padding is\n\
done using the specified fill character (default is a space).");
@@ -5927,6 +5977,21 @@ unicode_upper(PyUnicodeObject *self)
return fixup(self, fixupper);
}
+PyDoc_STRVAR(width__doc__,
+"S.width() -> unicode\n\
+\n\
+Return a fixed-width representation length of S.");
+
+static PyObject*
+unicode_width(PyObject *self)
+{
+ int width = PyUnicode_GetWidth(self);
+ if (width == -1)
+ return NULL;
+ else
+ return PyInt_FromLong((long)width);
+}
+
PyDoc_STRVAR(zfill__doc__,
"S.zfill(width) -> unicode\n\
\n\
@@ -6090,6 +6155,8 @@ static PyMethodDef unicode_methods[] = {
{"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__},
{"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
{"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
+ {"iswide", (PyCFunction) unicode_iswide, METH_NOARGS, iswide__doc__},
+ {"width", (PyCFunction) unicode_width, METH_NOARGS, width__doc__},
{"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},