RFE #1491485: str/unicode.endswith()/startswith() now accept a tuple as first argument.

author: Georg Brandl <georg@python.org> 2006-06-09 18:45:48 (GMT)
committer: Georg Brandl <georg@python.org> 2006-06-09 18:45:48 (GMT)
commit: 242508160eb6520cca0f7a831449987f3ea1fba0 (patch)
tree: 0f75dd6fcef4bf7627c07df3fd21eeb746cdc271
parent: 932f5afbe8567047c74496662170c0e370416ec3 (diff)
download: cpython-242508160eb6520cca0f7a831449987f3ea1fba0.zip
cpython-242508160eb6520cca0f7a831449987f3ea1fba0.tar.gz
cpython-242508160eb6520cca0f7a831449987f3ea1fba0.tar.bz2
5 files changed, 182 insertions, 83 deletions
diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex
index 576a5ad..1fe2f60 100644
--- a/Doc/lib/libstdtypes.tex
+++ b/Doc/lib/libstdtypes.tex
@@ -618,8 +618,11 @@ For a list of possible encodings, see section~\ref{standard-encodings}.
 
 \begin{methoddesc}[string]{endswith}{suffix\optional{, start\optional{, end}}}
 Return \code{True} if the string ends with the specified \var{suffix},
-otherwise return \code{False}.  With optional \var{start}, test beginning at
+otherwise return \code{False}.  \var{suffix} can also be a tuple of
+suffixes to look for.  With optional \var{start}, test beginning at
 that position.  With optional \var{end}, stop comparing at that position.
+
+\versionchanged[Accept tuples as \var{suffix}]{2.5}
 \end{methoddesc}
 
 \begin{methoddesc}[string]{expandtabs}{\optional{tabsize}}
@@ -829,9 +832,12 @@ boundaries.  Line breaks are not included in the resulting list unless
 \begin{methoddesc}[string]{startswith}{prefix\optional{,
                                        start\optional{, end}}}
 Return \code{True} if string starts with the \var{prefix}, otherwise
-return \code{False}.  With optional \var{start}, test string beginning at
+return \code{False}.  \var{prefix} can also be a tuple of
+suffixes to look for.  With optional \var{start}, test string beginning at
 that position.  With optional \var{end}, stop comparing string at that
 position.
+
+\versionchanged[Accept tuples as \var{prefix}]{2.5}
 \end{methoddesc}
 
 \begin{methoddesc}[string]{strip}{\optional{chars}}
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index aaa2dc2..5a1cc8a 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -819,6 +819,21 @@ class MixinStrUnicodeUserStringTest:
         self.checkraises(TypeError, 'hello', 'startswith')
         self.checkraises(TypeError, 'hello', 'startswith', 42)
 
+        # test tuple arguments
+        self.checkequal(True, 'hello', 'startswith', ('he', 'ha'))
+        self.checkequal(False, 'hello', 'startswith', ('lo', 'llo'))
+        self.checkequal(True, 'hello', 'startswith', ('hellox', 'hello'))
+        self.checkequal(False, 'hello', 'startswith', ())
+        self.checkequal(True, 'helloworld', 'startswith', ('hellowo',
+                                                           'rld', 'lowo'), 3)
+        self.checkequal(False, 'helloworld', 'startswith', ('hellowo', 'ello',
+                                                            'rld'), 3)
+        self.checkequal(True, 'hello', 'startswith', ('lo', 'he'), 0, -1)
+        self.checkequal(False, 'hello', 'startswith', ('he', 'hel'), 0, 1)
+        self.checkequal(True, 'hello', 'startswith', ('he', 'hel'), 0, 2)
+
+        self.checkraises(TypeError, 'hello', 'startswith', (42,))
+
     def test_endswith(self):
         self.checkequal(True, 'hello', 'endswith', 'lo')
         self.checkequal(False, 'hello', 'endswith', 'he')
@@ -853,6 +868,21 @@ class MixinStrUnicodeUserStringTest:
         self.checkraises(TypeError, 'hello', 'endswith')
         self.checkraises(TypeError, 'hello', 'endswith', 42)
 
+        # test tuple arguments
+        self.checkequal(False, 'hello', 'endswith', ('he', 'ha'))
+        self.checkequal(True, 'hello', 'endswith', ('lo', 'llo'))
+        self.checkequal(True, 'hello', 'endswith', ('hellox', 'hello'))
+        self.checkequal(False, 'hello', 'endswith', ())
+        self.checkequal(True, 'helloworld', 'endswith', ('hellowo',
+                                                           'rld', 'lowo'), 3)
+        self.checkequal(False, 'helloworld', 'endswith', ('hellowo', 'ello',
+                                                            'rld'), 3, -1)
+        self.checkequal(True, 'hello', 'endswith', ('hell', 'ell'), 0, -1)
+        self.checkequal(False, 'hello', 'endswith', ('he', 'hel'), 0, 1)
+        self.checkequal(True, 'hello', 'endswith', ('he', 'hell'), 0, 4)
+
+        self.checkraises(TypeError, 'hello', 'endswith', (42,))
+
     def test___contains__(self):
         self.checkequal(True, '', '__contains__', '')         # vereq('' in '', True)
         self.checkequal(True, 'abc', '__contains__', '')      # vereq('' in 'abc', True)
@@ -872,7 +902,7 @@ class MixinStrUnicodeUserStringTest:
         self.checkequal(u'abc', 'abc', '__getitem__', slice(0, 1000))
         self.checkequal(u'a', 'abc', '__getitem__', slice(0, 1))
         self.checkequal(u'', 'abc', '__getitem__', slice(0, 0))
-        # FIXME What about negative indizes? This is handled differently by [] and __getitem__(slice)
+        # FIXME What about negative indices? This is handled differently by [] and __getitem__(slice)
 
         self.checkraises(TypeError, 'abc', '__getitem__', 'def')
 
diff --git a/Misc/NEWS b/Misc/NEWS
index 2fb9256..3bd732e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.5 beta 1?
 Core and builtins
 -----------------
 
+- The string and unicode methods startswith() and endswith() now accept
+  a tuple of prefixes/suffixes to look for. Implements RFE #1491485.
+
 - Buffer objects, at the C level, never used the char buffer
   implementation even when the char buffer for the wrapped object was
   explicitly requested (originally returned the read or write buffer).
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index a980345..831d54a 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -3099,54 +3099,96 @@ string_replace(PyStringObject *self, PyObject *args)
 
 /** End DALKE **/
 
+/* Matches the end (direction > 0) or start (direction < 0) of self
+ * against substr, using the start and end arguments. Returns
+ * -1 on error, 0 if not found and 1 if found.
+ */
+Py_LOCAL(int)
+_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
+		  Py_ssize_t end, int direction)
+{
+	Py_ssize_t len = PyString_GET_SIZE(self);
+	Py_ssize_t slen;
+	const char* sub;
+	const char* str;
+
+	if (PyString_Check(substr)) {
+		sub = PyString_AS_STRING(substr);
+		slen = PyString_GET_SIZE(substr);
+	}
+#ifdef Py_USING_UNICODE
+	else if (PyUnicode_Check(substr))
+		return PyUnicode_Tailmatch((PyObject *)self,
+					   substr, start, end, direction);
+#endif
+	else if (PyObject_AsCharBuffer(substr, &sub, &slen))
+		return -1;
+	str = PyString_AS_STRING(self);
+
+	string_adjust_indices(&start, &end, len);
+
+	if (direction < 0) {
+		/* startswith */
+		if (start+slen > len)
+			return 0;
+
+		if (end-start >= slen)
+			return ! memcmp(str+start, sub, slen);
+		else
+			return 0;
+	} else {
+		/* endswith */
+		if (end-start < slen || start > len)
+			return 0;
+
+		if (end-slen > start)
+			start = end - slen;
+		if (end-start >= slen)
+			return ! memcmp(str+start, sub, slen);
+		else
+			return 0;
+	}
+}
+
+
 PyDoc_STRVAR(startswith__doc__,
 "S.startswith(prefix[, start[, end]]) -> bool\n\
 \n\
 Return True if S starts with the specified prefix, False otherwise.\n\
 With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.");
+With optional end, stop comparing S at that position.\n\
+prefix can also be a tuple of strings to try.");
 
 static PyObject *
 string_startswith(PyStringObject *self, PyObject *args)
 {
-	const char* str = PyString_AS_STRING(self);
-	Py_ssize_t len = PyString_GET_SIZE(self);
-	const char* prefix;
-	Py_ssize_t plen;
 	Py_ssize_t start = 0;
 	Py_ssize_t end = PY_SSIZE_T_MAX;
 	PyObject *subobj;
+	int result;
 
 	if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
 		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
 		return NULL;
-	if (PyString_Check(subobj)) {
-		prefix = PyString_AS_STRING(subobj);
-		plen = PyString_GET_SIZE(subobj);
-	}
-#ifdef Py_USING_UNICODE
-	else if (PyUnicode_Check(subobj)) {
-	    	Py_ssize_t rc;
-		rc = PyUnicode_Tailmatch((PyObject *)self,
-					  subobj, start, end, -1);
-		if (rc == -1)
-			return NULL;
-		else
-			return PyBool_FromLong((long) rc);
+	if (PyTuple_Check(subobj)) {
+		Py_ssize_t i;
+		for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
+			result = _string_tailmatch(self,
+					PyTuple_GET_ITEM(subobj, i),
+					start, end, -1);
+			if (result == -1)
+				return NULL;
+			else if (result) {
+				Py_RETURN_TRUE;
+			}
+		}
+		Py_RETURN_FALSE;
 	}
-#endif
-	else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
+	result = _string_tailmatch(self, subobj, start, end, -1);
+	if (result == -1)
 		return NULL;
-
-	string_adjust_indices(&start, &end, len);
-
-	if (start+plen > len)
-		return PyBool_FromLong(0);
-
-	if (end-start >= plen)
-		return PyBool_FromLong(!memcmp(str+start, prefix, plen));
 	else
-		return PyBool_FromLong(0);
+		return PyBool_FromLong(result);
 }
 
 
@@ -3155,51 +3197,39 @@ PyDoc_STRVAR(endswith__doc__,
 \n\
 Return True if S ends with the specified suffix, False otherwise.\n\
 With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.");
+With optional end, stop comparing S at that position.\n\
+suffix can also be a tuple of strings to try.");
 
 static PyObject *
 string_endswith(PyStringObject *self, PyObject *args)
 {
-	const char* str = PyString_AS_STRING(self);
-	Py_ssize_t len = PyString_GET_SIZE(self);
-	const char* suffix;
-	Py_ssize_t slen;
 	Py_ssize_t start = 0;
 	Py_ssize_t end = PY_SSIZE_T_MAX;
 	PyObject *subobj;
+	int result;
 
 	if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
 		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
 		return NULL;
-	if (PyString_Check(subobj)) {
-		suffix = PyString_AS_STRING(subobj);
-		slen = PyString_GET_SIZE(subobj);
-	}
-#ifdef Py_USING_UNICODE
-	else if (PyUnicode_Check(subobj)) {
-	    	Py_ssize_t rc;
-		rc = PyUnicode_Tailmatch((PyObject *)self,
-					  subobj, start, end, +1);
-		if (rc == -1)
-			return NULL;
-		else
-			return PyBool_FromLong((long) rc);
+	if (PyTuple_Check(subobj)) {
+		Py_ssize_t i;
+		for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
+			result = _string_tailmatch(self,
+					PyTuple_GET_ITEM(subobj, i),
+					start, end, +1);
+			if (result == -1)
+				return NULL;
+			else if (result) {
+				Py_RETURN_TRUE;
+			}
+		}
+		Py_RETURN_FALSE;
 	}
-#endif
-	else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
+	result = _string_tailmatch(self, subobj, start, end, +1);
+	if (result == -1)
 		return NULL;
-
-	string_adjust_indices(&start, &end, len);
-
-	if (end-start < slen || start > len)
-		return PyBool_FromLong(0);
-
-	if (end-slen > start)
-		start = end - slen;
-	if (end-start >= slen)
-		return PyBool_FromLong(!memcmp(str+start, suffix, slen));
 	else
-		return PyBool_FromLong(0);
+		return PyBool_FromLong(result);
 }
 
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 970e69f..bf2425c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6667,29 +6667,44 @@ PyDoc_STRVAR(startswith__doc__,
 \n\
 Return True if S starts with the specified prefix, False otherwise.\n\
 With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.");
+With optional end, stop comparing S at that position.\n\
+prefix can also be a tuple of strings to try.");
 
 static PyObject *
 unicode_startswith(PyUnicodeObject *self,
 		   PyObject *args)
 {
+    PyObject *subobj;
     PyUnicodeObject *substring;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
-    PyObject *result;
+    int result;
 
-    if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &substring,
+    if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
 		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
 	return NULL;
-    substring = (PyUnicodeObject *)PyUnicode_FromObject(
-						(PyObject *)substring);
+    if (PyTuple_Check(subobj)) {
+        Py_ssize_t i;
+        for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
+            substring = (PyUnicodeObject *)PyUnicode_FromObject(
+                            PyTuple_GET_ITEM(subobj, i));
+            if (substring == NULL)
+                return NULL;
+            result = tailmatch(self, substring, start, end, -1);
+            Py_DECREF(substring);
+            if (result) {
+                Py_RETURN_TRUE;
+            }
+        }
+        /* nothing matched */
+        Py_RETURN_FALSE;
+    }
+    substring = (PyUnicodeObject *)PyUnicode_FromObject(subobj);
     if (substring == NULL)
-	return NULL;
-
-    result = PyBool_FromLong(tailmatch(self, substring, start, end, -1));
-
+         return NULL;
+    result = tailmatch(self, substring, start, end, -1);
     Py_DECREF(substring);
-    return result;
+    return PyBool_FromLong(result);
 }
 
 
@@ -6698,29 +6713,44 @@ PyDoc_STRVAR(endswith__doc__,
 \n\
 Return True if S ends with the specified suffix, False otherwise.\n\
 With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.");
+With optional end, stop comparing S at that position.\n\
+suffix can also be a tuple of strings to try.");
 
 static PyObject *
 unicode_endswith(PyUnicodeObject *self,
 		 PyObject *args)
 {
+    PyObject *subobj;
     PyUnicodeObject *substring;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
-    PyObject *result;
+    int result;
 
-    if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &substring,
-		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
+    if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
+        _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
 	return NULL;
-    substring = (PyUnicodeObject *)PyUnicode_FromObject(
-						(PyObject *)substring);
+    if (PyTuple_Check(subobj)) {
+        Py_ssize_t i;
+        for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
+            substring = (PyUnicodeObject *)PyUnicode_FromObject(
+                            PyTuple_GET_ITEM(subobj, i));
+            if (substring == NULL)
+            return NULL;
+            result = tailmatch(self, substring, start, end, +1);
+            Py_DECREF(substring);
+            if (result) {
+                Py_RETURN_TRUE;
+            }
+        }
+        Py_RETURN_FALSE;
+    }
+    substring = (PyUnicodeObject *)PyUnicode_FromObject(subobj);
     if (substring == NULL)
-	return NULL;
-
-    result = PyBool_FromLong(tailmatch(self, substring, start, end, +1));
+    return NULL;
 
+    result = tailmatch(self, substring, start, end, +1);
     Py_DECREF(substring);
-    return result;
+    return PyBool_FromLong(result);
 }
author	Georg Brandl <georg@python.org>	2006-06-09 18:45:48 (GMT)
committer	Georg Brandl <georg@python.org>	2006-06-09 18:45:48 (GMT)
commit	242508160eb6520cca0f7a831449987f3ea1fba0 (patch)
tree	0f75dd6fcef4bf7627c07df3fd21eeb746cdc271
parent	932f5afbe8567047c74496662170c0e370416ec3 (diff)
download	cpython-242508160eb6520cca0f7a831449987f3ea1fba0.zip cpython-242508160eb6520cca0f7a831449987f3ea1fba0.tar.gz cpython-242508160eb6520cca0f7a831449987f3ea1fba0.tar.bz2