diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2004-06-02 16:49:17 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2004-06-02 16:49:17 (GMT) |
commit | 974ed7cfa50b666c9ab91f7a3f8f26049d387107 (patch) | |
tree | d821c74c26231d988f34764d0fdfe3494036ee95 /Objects/unicodeobject.c | |
parent | b6568b91fdf7de1377dba395c6725a7307b818ee (diff) | |
download | cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.zip cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.tar.gz cpython-974ed7cfa50b666c9ab91f7a3f8f26049d387107.tar.bz2 |
- SF #962502: Add two more methods for unicode type; width() and
iswide() for east asian width manipulation. (Inspired by David
Goodger, Reviewed by Martin v. Loewis)
- Move _PyUnicode_TypeRecord.flags to the end of the struct so that
no padding is added for UCS-4 builds. (Suggested by Martin v. Loewis)
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 69 |
1 files changed, 68 insertions, 1 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ada01fc..0858525 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -655,6 +655,27 @@ int PyUnicode_GetSize(PyObject *unicode) return -1; } +int PyUnicode_GetWidth(PyObject *unicode) +{ + const Py_UNICODE *p, *e; + int width; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return -1; + } + + p = PyUnicode_AS_UNICODE(unicode); + e = p + PyUnicode_GET_SIZE(unicode); + for (width = 0; p < e; p++) + if (Py_UNICODE_ISWIDE(*p)) + width += 2; + else + width++; + + return width; +} + const char *PyUnicode_GetDefaultEncoding(void) { return unicode_default_encoding; @@ -5316,6 +5337,35 @@ unicode_isnumeric(PyUnicodeObject *self) return PyBool_FromLong(1); } +PyDoc_STRVAR(iswide__doc__, +"S.iswide() -> bool\n\ +\n\ +Return True if all characters in S are wide width\n\ +and there is at least one character in S, False otherwise."); + +static PyObject* +unicode_iswide(PyUnicodeObject *self) +{ + register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self); + register const Py_UNICODE *e; + + /* Shortcut for single character strings */ + if (PyUnicode_GET_SIZE(self) == 1 && + Py_UNICODE_ISWIDE(*p)) + Py_RETURN_TRUE; + + /* Special case for empty strings */ + if (PyString_GET_SIZE(self) == 0) + Py_RETURN_FALSE; + + e = p + PyUnicode_GET_SIZE(self); + for (; p < e; p++) { + if (!Py_UNICODE_ISWIDE(*p)) + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; +} + PyDoc_STRVAR(join__doc__, "S.join(sequence) -> unicode\n\ \n\ @@ -5335,7 +5385,7 @@ unicode_length(PyUnicodeObject *self) } PyDoc_STRVAR(ljust__doc__, -"S.ljust(width[, fillchar]) -> unicode\n\ +"S.ljust(width[, fillchar]) -> int\n\ \n\ Return S left justified in a Unicode string of length width. Padding is\n\ done using the specified fill character (default is a space)."); @@ -5927,6 +5977,21 @@ unicode_upper(PyUnicodeObject *self) return fixup(self, fixupper); } +PyDoc_STRVAR(width__doc__, +"S.width() -> unicode\n\ +\n\ +Return a fixed-width representation length of S."); + +static PyObject* +unicode_width(PyObject *self) +{ + int width = PyUnicode_GetWidth(self); + if (width == -1) + return NULL; + else + return PyInt_FromLong((long)width); +} + PyDoc_STRVAR(zfill__doc__, "S.zfill(width) -> unicode\n\ \n\ @@ -6090,6 +6155,8 @@ static PyMethodDef unicode_methods[] = { {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__}, {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, + {"iswide", (PyCFunction) unicode_iswide, METH_NOARGS, iswide__doc__}, + {"width", (PyCFunction) unicode_width, METH_NOARGS, width__doc__}, {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, #if 0 {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, |