summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2002-08-06 16:58:21 (GMT)
committerBarry Warsaw <barry@python.org>2002-08-06 16:58:21 (GMT)
commit817918cc3c10d0ed6b14e0e3f2bc0c5227c508cd (patch)
tree5dcdd3861db33fde0a76f275c09d40cba9c6fa22
parentb57089cdf8b63b38ca736785c9fcc38a9fce89da (diff)
downloadcpython-817918cc3c10d0ed6b14e0e3f2bc0c5227c508cd.zip
cpython-817918cc3c10d0ed6b14e0e3f2bc0c5227c508cd.tar.gz
cpython-817918cc3c10d0ed6b14e0e3f2bc0c5227c508cd.tar.bz2
Committing patch #591250 which provides "str1 in str2" when str1 is a
string of longer than 1 character.
-rw-r--r--Doc/lib/libstdtypes.tex21
-rw-r--r--Lib/test/string_tests.py22
-rw-r--r--Lib/test/test_contains.py54
-rw-r--r--Lib/test/test_string.py1
-rw-r--r--Lib/test/test_unicode.py75
-rwxr-xr-xLib/test/test_userstring.py1
-rw-r--r--Objects/stringobject.c25
-rw-r--r--Objects/unicodeobject.c40
8 files changed, 140 insertions, 99 deletions
diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex
index 87d5402..df602cd 100644
--- a/Doc/lib/libstdtypes.tex
+++ b/Doc/lib/libstdtypes.tex
@@ -432,15 +432,15 @@ This table lists the sequence operations sorted in ascending priority
and \var{j} are integers:
\begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes}
- \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{}
+ \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{(1)}
\lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
-equal to \var{x}, else \code{1}}{}
+equal to \var{x}, else \code{1}}{(1)}
\hline
\lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
- \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(1)}
+ \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
\hline
- \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(2)}
- \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(2), (3)}
+ \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
+ \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(3), (4)}
\hline
\lineiii{len(\var{s})}{length of \var{s}}{}
\lineiii{min(\var{s})}{smallest item of \var{s}}{}
@@ -461,7 +461,12 @@ equal to \var{x}, else \code{1}}{}
Notes:
\begin{description}
-\item[(1)] Values of \var{n} less than \code{0} are treated as
+\item[(1)] When \var{s} is a string or Unicode string object the
+\code{in} and \code{not in} operations act like a substring test. In
+Python versions before 2.3, \var{x} had to be a string of length 1.
+In Python 2.3 and beyond, \var{x} may be a string of any length.
+
+\item[(2)] Values of \var{n} less than \code{0} are treated as
\code{0} (which yields an empty sequence of the same type as
\var{s}). Note also that the copies are shallow; nested structures
are not copied. This often haunts new Python programmers; consider:
@@ -489,12 +494,12 @@ Notes:
[[3], [5], [7]]
\end{verbatim}
-\item[(2)] If \var{i} or \var{j} is negative, the index is relative to
+\item[(3)] If \var{i} or \var{j} is negative, the index is relative to
the end of the string: \code{len(\var{s}) + \var{i}} or
\code{len(\var{s}) + \var{j}} is substituted. But note that \code{-0} is
still \code{0}.
-\item[(3)] The slice of \var{s} from \var{i} to \var{j} is defined as
+\item[(4)] The slice of \var{s} from \var{i} to \var{j} is defined as
the sequence of items with index \var{k} such that \code{\var{i} <=
\var{k} < \var{j}}. If \var{i} or \var{j} is greater than
\code{len(\var{s})}, use \code{len(\var{s})}. If \var{i} is omitted,
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 47d7510..836836b1 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1,7 +1,7 @@
"""Common tests shared by test_string and test_userstring"""
import string
-from test.test_support import verify, verbose, TestFailed, have_unicode
+from test.test_support import verify, vereq, verbose, TestFailed, have_unicode
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
@@ -295,3 +295,23 @@ def run_method_tests(test):
data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
verify('hello world'.encode('zlib') == data)
verify(data.decode('zlib') == 'hello world')
+
+def test_exception(lhs, rhs, msg):
+ try:
+ lhs in rhs
+ except TypeError:
+ pass
+ else:
+ raise TestFailed, msg
+
+def run_contains_tests(test):
+ vereq('' in '', True)
+ vereq('' in 'abc', True)
+ vereq('\0' in 'abc', False)
+ vereq('\0' in '\0abc', True)
+ vereq('\0' in 'abc\0', True)
+ vereq('a' in '\0abc', True)
+ vereq('asdf' in 'asdf', True)
+ vereq('asdf' in 'asd', False)
+ vereq('asdf' in '', False)
+
diff --git a/Lib/test/test_contains.py b/Lib/test/test_contains.py
index 9abed15..04eedf1 100644
--- a/Lib/test/test_contains.py
+++ b/Lib/test/test_contains.py
@@ -45,17 +45,8 @@ except TypeError:
check('c' in 'abc', "'c' not in 'abc'")
check('d' not in 'abc', "'d' in 'abc'")
-try:
- '' in 'abc'
- check(0, "'' in 'abc' did not raise error")
-except TypeError:
- pass
-
-try:
- 'ab' in 'abc'
- check(0, "'ab' in 'abc' did not raise error")
-except TypeError:
- pass
+check('' in '', "'' not in ''")
+check('' in 'abc', "'' not in 'abc'")
try:
None in 'abc'
@@ -71,17 +62,12 @@ if have_unicode:
check('c' in unicode('abc'), "'c' not in u'abc'")
check('d' not in unicode('abc'), "'d' in u'abc'")
- try:
- '' in unicode('abc')
- check(0, "'' in u'abc' did not raise error")
- except TypeError:
- pass
-
- try:
- 'ab' in unicode('abc')
- check(0, "'ab' in u'abc' did not raise error")
- except TypeError:
- pass
+ check('' in unicode(''), "'' not in u''")
+ check(unicode('') in '', "u'' not in ''")
+ check(unicode('') in unicode(''), "u'' not in u''")
+ check('' in unicode('abc'), "'' not in u'abc'")
+ check(unicode('') in 'abc', "u'' not in 'abc'")
+ check(unicode('') in unicode('abc'), "u'' not in u'abc'")
try:
None in unicode('abc')
@@ -94,35 +80,11 @@ if have_unicode:
check(unicode('c') in unicode('abc'), "u'c' not in u'abc'")
check(unicode('d') not in unicode('abc'), "u'd' in u'abc'")
- try:
- unicode('') in unicode('abc')
- check(0, "u'' in u'abc' did not raise error")
- except TypeError:
- pass
-
- try:
- unicode('ab') in unicode('abc')
- check(0, "u'ab' in u'abc' did not raise error")
- except TypeError:
- pass
-
# Test Unicode char in string
check(unicode('c') in 'abc', "u'c' not in 'abc'")
check(unicode('d') not in 'abc', "u'd' in 'abc'")
- try:
- unicode('') in 'abc'
- check(0, "u'' in 'abc' did not raise error")
- except TypeError:
- pass
-
- try:
- unicode('ab') in 'abc'
- check(0, "u'ab' in 'abc' did not raise error")
- except TypeError:
- pass
-
# A collection of tests on builtin sequence types
a = range(10)
for i in a:
diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py
index af8c1bc..c92f5f7 100644
--- a/Lib/test/test_string.py
+++ b/Lib/test/test_string.py
@@ -51,6 +51,7 @@ def test(name, input, output, *args):
string_tests.run_module_tests(test)
string_tests.run_method_tests(test)
+string_tests.run_contains_tests(test)
string.whitespace
string.lowercase
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 028e97a..f38467a 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -6,7 +6,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
-from test.test_support import verify, verbose, TestFailed
+from test.test_support import verify, vereq, verbose, TestFailed
import sys, string
if not sys.platform.startswith('java'):
@@ -396,23 +396,23 @@ test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c
# Contains:
print 'Testing Unicode contains method...',
-verify(('a' in u'abdb') == 1)
-verify(('a' in u'bdab') == 1)
-verify(('a' in u'bdaba') == 1)
-verify(('a' in u'bdba') == 1)
-verify(('a' in u'bdba') == 1)
-verify((u'a' in u'bdba') == 1)
-verify((u'a' in u'bdb') == 0)
-verify((u'a' in 'bdb') == 0)
-verify((u'a' in 'bdba') == 1)
-verify((u'a' in ('a',1,None)) == 1)
-verify((u'a' in (1,None,'a')) == 1)
-verify((u'a' in (1,None,u'a')) == 1)
-verify(('a' in ('a',1,None)) == 1)
-verify(('a' in (1,None,'a')) == 1)
-verify(('a' in (1,None,u'a')) == 1)
-verify(('a' in ('x',1,u'y')) == 0)
-verify(('a' in ('x',1,None)) == 0)
+vereq(('a' in u'abdb'), True)
+vereq(('a' in u'bdab'), True)
+vereq(('a' in u'bdaba'), True)
+vereq(('a' in u'bdba'), True)
+vereq(('a' in u'bdba'), True)
+vereq((u'a' in u'bdba'), True)
+vereq((u'a' in u'bdb'), False)
+vereq((u'a' in 'bdb'), False)
+vereq((u'a' in 'bdba'), True)
+vereq((u'a' in ('a',1,None)), True)
+vereq((u'a' in (1,None,'a')), True)
+vereq((u'a' in (1,None,u'a')), True)
+vereq(('a' in ('a',1,None)), True)
+vereq(('a' in (1,None,'a')), True)
+vereq(('a' in (1,None,u'a')), True)
+vereq(('a' in ('x',1,u'y')), False)
+vereq(('a' in ('x',1,None)), False)
print 'done.'
# Formatting:
@@ -758,3 +758,42 @@ print u'abc\n',
print u'def\n'
print u'def\n'
print 'done.'
+
+def test_exception(lhs, rhs, msg):
+ try:
+ lhs in rhs
+ except TypeError:
+ pass
+ else:
+ raise TestFailed, msg
+
+def run_contains_tests():
+ vereq(u'' in '', True)
+ vereq('' in u'', True)
+ vereq(u'' in u'', True)
+ vereq(u'' in 'abc', True)
+ vereq('' in u'abc', True)
+ vereq(u'' in u'abc', True)
+ vereq(u'\0' in 'abc', False)
+ vereq('\0' in u'abc', False)
+ vereq(u'\0' in u'abc', False)
+ vereq(u'\0' in '\0abc', True)
+ vereq('\0' in u'\0abc', True)
+ vereq(u'\0' in u'\0abc', True)
+ vereq(u'\0' in 'abc\0', True)
+ vereq('\0' in u'abc\0', True)
+ vereq(u'\0' in u'abc\0', True)
+ vereq(u'a' in '\0abc', True)
+ vereq('a' in u'\0abc', True)
+ vereq(u'a' in u'\0abc', True)
+ vereq(u'asdf' in 'asdf', True)
+ vereq('asdf' in u'asdf', True)
+ vereq(u'asdf' in u'asdf', True)
+ vereq(u'asdf' in 'asd', False)
+ vereq('asdf' in u'asd', False)
+ vereq(u'asdf' in u'asd', False)
+ vereq(u'asdf' in '', False)
+ vereq('asdf' in u'', False)
+ vereq(u'asdf' in u'', False)
+
+run_contains_tests()
diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py
index 78af807..5492f2e 100755
--- a/Lib/test/test_userstring.py
+++ b/Lib/test/test_userstring.py
@@ -41,3 +41,4 @@ def test(methodname, input, output, *args):
print (methodname, input, output, args, res[0], res[1], res[2])
string_tests.run_method_tests(test)
+string_tests.run_contains_tests(test)
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 3c1b303..1d5277c 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -803,24 +803,31 @@ string_slice(register PyStringObject *a, register int i, register int j)
static int
string_contains(PyObject *a, PyObject *el)
{
- register char *s, *end;
- register char c;
+ const char *lhs, *rhs, *end;
+ int size;
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(el))
return PyUnicode_Contains(a, el);
#endif
- if (!PyString_Check(el) || PyString_Size(el) != 1) {
+ if (!PyString_Check(el)) {
PyErr_SetString(PyExc_TypeError,
- "'in <string>' requires character as left operand");
+ "'in <string>' requires string as left operand");
return -1;
}
- c = PyString_AsString(el)[0];
- s = PyString_AsString(a);
- end = s + PyString_Size(a);
- while (s < end) {
- if (c == *s++)
+ size = PyString_Size(el);
+ rhs = PyString_AS_STRING(el);
+ lhs = PyString_AS_STRING(a);
+
+ /* optimize for a single character */
+ if (size == 1)
+ return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
+
+ end = lhs + (PyString_Size(a) - size);
+ while (lhs <= end) {
+ if (memcmp(lhs++, rhs, size) == 0)
return 1;
}
+
return 0;
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 6ca709b..a577bfd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3732,15 +3732,14 @@ int PyUnicode_Contains(PyObject *container,
PyObject *element)
{
PyUnicodeObject *u = NULL, *v = NULL;
- int result;
- register const Py_UNICODE *p, *e;
- register Py_UNICODE ch;
+ int result, size;
+ register const Py_UNICODE *lhs, *end, *rhs;
/* Coerce the two arguments */
v = (PyUnicodeObject *)PyUnicode_FromObject(element);
if (v == NULL) {
PyErr_SetString(PyExc_TypeError,
- "'in <string>' requires character as left operand");
+ "'in <string>' requires string as left operand");
goto onError;
}
u = (PyUnicodeObject *)PyUnicode_FromObject(container);
@@ -3749,20 +3748,27 @@ int PyUnicode_Contains(PyObject *container,
goto onError;
}
- /* Check v in u */
- if (PyUnicode_GET_SIZE(v) != 1) {
- PyErr_SetString(PyExc_TypeError,
- "'in <string>' requires character as left operand");
- goto onError;
- }
- ch = *PyUnicode_AS_UNICODE(v);
- p = PyUnicode_AS_UNICODE(u);
- e = p + PyUnicode_GET_SIZE(u);
+ size = PyUnicode_GET_SIZE(v);
+ rhs = PyUnicode_AS_UNICODE(v);
+ lhs = PyUnicode_AS_UNICODE(u);
+
result = 0;
- while (p < e) {
- if (*p++ == ch) {
- result = 1;
- break;
+ if (size == 1) {
+ end = lhs + PyUnicode_GET_SIZE(u);
+ while (lhs < end) {
+ if (*lhs++ == *rhs) {
+ result = 1;
+ break;
+ }
+ }
+ }
+ else {
+ end = lhs + (PyUnicode_GET_SIZE(u) - size);
+ while (lhs <= end) {
+ if (memcmp(lhs++, rhs, size) == 0) {
+ result = 1;
+ break;
+ }
}
}