summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
Diffstat (limited to 'Objects')
-rw-r--r--Objects/stringobject.c40
-rw-r--r--Objects/unicodeobject.c221
2 files changed, 191 insertions, 70 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 6a0eece..d3c9e4b 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -1005,7 +1005,9 @@ static PyBufferProcs string_as_buffer = {
#define BOTHSTRIP 2
/* Arrays indexed by above */
-static const char *stripname[] = {"lstrip", "rstrip", "strip"};
+static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
+
+#define STRIPNAME(i) (stripformat[i]+3)
static PyObject *
@@ -1449,15 +1451,26 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
{
PyObject *sep = NULL;
- if (!PyArg_ParseTuple(args, "|O:[lr]strip", &sep))
+ if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
return NULL;
if (sep != NULL && sep != Py_None) {
- /* XXX What about Unicode? */
- if (!PyString_Check(sep)) {
+ if (PyString_Check(sep))
+ return do_xstrip(self, striptype, sep);
+ else if (PyUnicode_Check(sep)) {
+ PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
+ PyObject *res;
+ if (uniself==NULL)
+ return NULL;
+ res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
+ striptype, sep);
+ Py_DECREF(uniself);
+ return res;
+ }
+ else {
PyErr_Format(PyExc_TypeError,
- "%s arg must be None or string",
- stripname[striptype]);
+ "%s arg must be None, str or unicode",
+ STRIPNAME(striptype));
return NULL;
}
return do_xstrip(self, striptype, sep);
@@ -1468,11 +1481,12 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
static char strip__doc__[] =
-"S.strip([sep]) -> string\n\
+"S.strip([sep]) -> string or unicode\n\
\n\
Return a copy of the string S with leading and trailing\n\
whitespace removed.\n\
-If sep is given and not None, remove characters in sep instead.";
+If sep is given and not None, remove characters in sep instead.\n\
+If sep is unicode, S will be converted to unicode before stripping";
static PyObject *
string_strip(PyStringObject *self, PyObject *args)
@@ -1485,10 +1499,11 @@ string_strip(PyStringObject *self, PyObject *args)
static char lstrip__doc__[] =
-"S.lstrip([sep]) -> string\n\
+"S.lstrip([sep]) -> string or unicode\n\
\n\
Return a copy of the string S with leading whitespace removed.\n\
-If sep is given and not None, remove characters in sep instead.";
+If sep is given and not None, remove characters in sep instead.\n\
+If sep is unicode, S will be converted to unicode before stripping";
static PyObject *
string_lstrip(PyStringObject *self, PyObject *args)
@@ -1501,10 +1516,11 @@ string_lstrip(PyStringObject *self, PyObject *args)
static char rstrip__doc__[] =
-"S.rstrip([sep]) -> string\n\
+"S.rstrip([sep]) -> string or unicode\n\
\n\
Return a copy of the string S with trailing whitespace removed.\n\
-If sep is given and not None, remove characters in sep instead.";
+If sep is given and not None, remove characters in sep instead.\n\
+If sep is unicode, S will be converted to unicode before stripping";
static PyObject *
string_rstrip(PyStringObject *self, PyObject *args)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8dbca6d..2fe9668 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3504,35 +3504,6 @@ PyObject *split(PyUnicodeObject *self,
}
static
-PyObject *strip(PyUnicodeObject *self,
- int left,
- int right)
-{
- Py_UNICODE *p = self->str;
- int start = 0;
- int end = self->length;
-
- if (left)
- while (start < end && Py_UNICODE_ISSPACE(p[start]))
- start++;
-
- if (right)
- while (end > start && Py_UNICODE_ISSPACE(p[end-1]))
- end--;
-
- if (start == 0 && end == self->length && PyUnicode_CheckExact(self)) {
- /* couldn't strip anything off, return original string */
- Py_INCREF(self);
- return (PyObject*) self;
- }
-
- return (PyObject*) PyUnicode_FromUnicode(
- self->str + start,
- end - start
- );
-}
-
-static
PyObject *replace(PyUnicodeObject *self,
PyUnicodeObject *str1,
PyUnicodeObject *str2,
@@ -4464,17 +4435,173 @@ unicode_lower(PyUnicodeObject *self)
return fixup(self, fixlower);
}
+#define LEFTSTRIP 0
+#define RIGHTSTRIP 1
+#define BOTHSTRIP 2
+
+/* Arrays indexed by above */
+static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
+
+#define STRIPNAME(i) (stripformat[i]+3)
+
+static const Py_UNICODE *
+unicode_memchr(const Py_UNICODE *s, Py_UNICODE c, size_t n)
+{
+ int i;
+ for (i = 0; i<n; ++i)
+ if (s[i]==c)
+ return s+i;
+ return NULL;
+}
+
+/* externally visible for str.strip(unicode) */
+PyObject *
+_PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
+{
+ Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
+ int len = PyUnicode_GET_SIZE(self);
+ Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj);
+ int seplen = PyUnicode_GET_SIZE(sepobj);
+ int i, j;
+
+ i = 0;
+ if (striptype != RIGHTSTRIP) {
+ while (i < len && unicode_memchr(sep, s[i], seplen)) {
+ i++;
+ }
+ }
+
+ j = len;
+ if (striptype != LEFTSTRIP) {
+ do {
+ j--;
+ } while (j >= i && unicode_memchr(sep, s[j], seplen));
+ j++;
+ }
+
+ if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
+ Py_INCREF(self);
+ return (PyObject*)self;
+ }
+ else
+ return PyUnicode_FromUnicode(s+i, j-i);
+}
+
+
+static PyObject *
+do_strip(PyUnicodeObject *self, int striptype)
+{
+ Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
+ int len = PyUnicode_GET_SIZE(self), i, j;
+
+ i = 0;
+ if (striptype != RIGHTSTRIP) {
+ while (i < len && Py_UNICODE_ISSPACE(s[i])) {
+ i++;
+ }
+ }
+
+ j = len;
+ if (striptype != LEFTSTRIP) {
+ do {
+ j--;
+ } while (j >= i && Py_UNICODE_ISSPACE(s[j]));
+ j++;
+ }
+
+ if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
+ Py_INCREF(self);
+ return (PyObject*)self;
+ }
+ else
+ return PyUnicode_FromUnicode(s+i, j-i);
+}
+
+
+static PyObject *
+do_argstrip(PyUnicodeObject *self, int striptype, PyObject *args)
+{
+ PyObject *sep = NULL;
+
+ if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
+ return NULL;
+
+ if (sep != NULL && sep != Py_None) {
+ if (PyUnicode_Check(sep))
+ return _PyUnicode_XStrip(self, striptype, sep);
+ else if (PyString_Check(sep)) {
+ PyObject *res;
+ sep = PyUnicode_FromObject(sep);
+ if (sep==NULL)
+ return NULL;
+ res = _PyUnicode_XStrip(self, striptype, sep);
+ Py_DECREF(sep);
+ return res;
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "%s arg must be None, unicode or str",
+ STRIPNAME(striptype));
+ return NULL;
+ }
+ }
+
+ return do_strip(self, striptype);
+}
+
+
+static char strip__doc__[] =
+"S.strip([sep]) -> unicode\n\
+\n\
+Return a copy of the string S with leading and trailing\n\
+whitespace removed.\n\
+If sep is given and not None, remove characters in sep instead.\n\
+If sep is a str, it will be converted to unicode before stripping";
+
+static PyObject *
+unicode_strip(PyUnicodeObject *self, PyObject *args)
+{
+ if (PyTuple_GET_SIZE(args) == 0)
+ return do_strip(self, BOTHSTRIP); /* Common case */
+ else
+ return do_argstrip(self, BOTHSTRIP, args);
+}
+
+
static char lstrip__doc__[] =
-"S.lstrip() -> unicode\n\
+"S.lstrip([sep]) -> unicode\n\
\n\
-Return a copy of the string S with leading whitespace removed.";
+Return a copy of the string S with leading whitespace removed.\n\
+If sep is given and not None, remove characters in sep instead.\n\
+If sep is a str, it will be converted to unicode before stripping";
static PyObject *
-unicode_lstrip(PyUnicodeObject *self)
+unicode_lstrip(PyUnicodeObject *self, PyObject *args)
{
- return strip(self, 1, 0);
+ if (PyTuple_GET_SIZE(args) == 0)
+ return do_strip(self, LEFTSTRIP); /* Common case */
+ else
+ return do_argstrip(self, LEFTSTRIP, args);
}
+
+static char rstrip__doc__[] =
+"S.rstrip([sep]) -> unicode\n\
+\n\
+Return a copy of the string S with trailing whitespace removed.\n\
+If sep is given and not None, remove characters in sep instead.\n\
+If sep is a str, it will be converted to unicode before stripping";
+
+static PyObject *
+unicode_rstrip(PyUnicodeObject *self, PyObject *args)
+{
+ if (PyTuple_GET_SIZE(args) == 0)
+ return do_strip(self, RIGHTSTRIP); /* Common case */
+ else
+ return do_argstrip(self, RIGHTSTRIP, args);
+}
+
+
static PyObject*
unicode_repeat(PyUnicodeObject *str, int len)
{
@@ -4677,17 +4804,6 @@ unicode_rjust(PyUnicodeObject *self, PyObject *args)
return (PyObject*) pad(self, width - self->length, 0, ' ');
}
-static char rstrip__doc__[] =
-"S.rstrip() -> unicode\n\
-\n\
-Return a copy of the string S with trailing whitespace removed.";
-
-static PyObject *
-unicode_rstrip(PyUnicodeObject *self)
-{
- return strip(self, 0, 1);
-}
-
static PyObject*
unicode_slice(PyUnicodeObject *self, int start, int end)
{
@@ -4783,17 +4899,6 @@ PyObject *unicode_str(PyUnicodeObject *self)
return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL);
}
-static char strip__doc__[] =
-"S.strip() -> unicode\n\
-\n\
-Return a copy of S with leading and trailing whitespace removed.";
-
-static PyObject *
-unicode_strip(PyUnicodeObject *self)
-{
- return strip(self, 1, 1);
-}
-
static char swapcase__doc__[] =
"S.swapcase() -> unicode\n\
\n\
@@ -4966,14 +5071,14 @@ static PyMethodDef unicode_methods[] = {
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
- {"lstrip", (PyCFunction) unicode_lstrip, METH_NOARGS, lstrip__doc__},
+ {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
{"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
- {"rstrip", (PyCFunction) unicode_rstrip, METH_NOARGS, rstrip__doc__},
+ {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
{"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
- {"strip", (PyCFunction) unicode_strip, METH_NOARGS, strip__doc__},
+ {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
{"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
{"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
{"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},