summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Heimes <christian@cheimes.de>2007-12-10 15:50:56 (GMT)
committerChristian Heimes <christian@cheimes.de>2007-12-10 15:50:56 (GMT)
commit0ded5b54bb499865fb4ab8c0ac3d0977df9a334d (patch)
treea395f726d50a71b1bf363d5a041224ad51e80691
parent110194048e2d6a42f5954c39c8646ba93b66a7e1 (diff)
downloadcpython-0ded5b54bb499865fb4ab8c0ac3d0977df9a334d.zip
cpython-0ded5b54bb499865fb4ab8c0ac3d0977df9a334d.tar.gz
cpython-0ded5b54bb499865fb4ab8c0ac3d0977df9a334d.tar.bz2
Fixed issue #1564: The set implementation should special-case PyUnicode instead of PyString
I moved the unicode_eq to stringlib/eq.h to keep the function static and possible inline for setobject.c and dictobject.h. I also removed the unused _PyString_Eq method. If it's required in the future it can be added to eq.h
-rw-r--r--Include/stringobject.h1
-rw-r--r--Lib/test/test_set.py42
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/dictobject.c20
-rw-r--r--Objects/setobject.c33
-rw-r--r--Objects/stringlib/eq.h21
-rw-r--r--Objects/stringobject.c10
7 files changed, 85 insertions, 45 deletions
diff --git a/Include/stringobject.h b/Include/stringobject.h
index 184bcdd..6a63576 100644
--- a/Include/stringobject.h
+++ b/Include/stringobject.h
@@ -58,7 +58,6 @@ PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int);
PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t);
-PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
int, char**, int*);
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
index ba5801d..e761f19 100644
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -7,6 +7,7 @@ import pickle
import os
from random import randrange, shuffle
import sys
+import warnings
class PassThru(Exception):
pass
@@ -817,6 +818,44 @@ class TestBasicOpsTriple(TestBasicOps):
self.length = 3
self.repr = None
+#------------------------------------------------------------------------------
+
+class TestBasicOpsString(TestBasicOps):
+ def setUp(self):
+ self.case = "string set"
+ self.values = ["a", "b", "c"]
+ self.set = set(self.values)
+ self.dup = set(self.values)
+ self.length = 3
+ self.repr = "{'a', 'c', 'b'}"
+
+#------------------------------------------------------------------------------
+
+class TestBasicOpsBytes(TestBasicOps):
+ def setUp(self):
+ self.case = "string set"
+ self.values = [b"a", b"b", b"c"]
+ self.set = set(self.values)
+ self.dup = set(self.values)
+ self.length = 3
+ self.repr = "{b'a', b'c', b'b'}"
+
+#------------------------------------------------------------------------------
+
+class TestBasicOpsMixedStringBytes(TestBasicOps):
+ def setUp(self):
+ self.warning_filters = warnings.filters[:]
+ warnings.simplefilter('ignore', BytesWarning)
+ self.case = "string and bytes set"
+ self.values = ["a", "b", b"a", b"b"]
+ self.set = set(self.values)
+ self.dup = set(self.values)
+ self.length = 4
+ self.repr = "{'a', b'a', 'b', b'b'}"
+
+ def tearDown(self):
+ warnings.filters = self.warning_filters
+
#==============================================================================
def baditer():
@@ -1581,6 +1620,9 @@ def test_main(verbose=None):
TestBasicOpsSingleton,
TestBasicOpsTuple,
TestBasicOpsTriple,
+ TestBasicOpsString,
+ TestBasicOpsBytes,
+ TestBasicOpsMixedStringBytes,
TestBinaryOps,
TestUpdateOps,
TestMutate,
diff --git a/Misc/NEWS b/Misc/NEWS
index f5ab868..bd8a3e7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -14,6 +14,9 @@ Core and Builtins
- Issue #1573: Improper use of the keyword-only syntax makes the parser crash
+- Issue #1564: The set implementation should special-case PyUnicode instead
+ of PyString
+
Extension Modules
-----------------
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index ae400b6..05252d7 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -8,6 +8,7 @@
*/
#include "Python.h"
+#include "stringlib/eq.h"
/* Set a key error with the specified argument, wrapping it in a
@@ -327,25 +328,6 @@ lookdict(PyDictObject *mp, PyObject *key, register long hash)
return 0;
}
-/* Return 1 if two unicode objects are equal, 0 if not. */
-static int
-unicode_eq(PyObject *aa, PyObject *bb)
-{
- PyUnicodeObject *a = (PyUnicodeObject *)aa;
- PyUnicodeObject *b = (PyUnicodeObject *)bb;
-
- if (a->length != b->length)
- return 0;
- if (a->length == 0)
- return 1;
- if (a->str[0] != b->str[0])
- return 0;
- if (a->length == 1)
- return 1;
- return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
-}
-
-
/*
* Hacked up version of lookdict which can assume keys are always
* unicodes; this assumption allows testing for errors during
diff --git a/Objects/setobject.c b/Objects/setobject.c
index f22268e..cd63968 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -9,6 +9,7 @@
#include "Python.h"
#include "structmember.h"
+#include "stringlib/eq.h"
/* Set a key error with the specified argument, wrapping it in a
* tuple automatically so that tuple keys are not unpacked as the
@@ -55,6 +56,7 @@ _PySet_Dummy(void)
static PySetObject *free_sets[MAXFREESETS];
static int num_free_sets = 0;
+
/*
The basic lookup function used by all operations.
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -144,12 +146,12 @@ set_lookkey(PySetObject *so, PyObject *key, register long hash)
}
/*
- * Hacked up version of set_lookkey which can assume keys are always strings;
- * This means we can always use _PyString_Eq directly and not have to check to
+ * Hacked up version of set_lookkey which can assume keys are always unicode;
+ * This means we can always use unicode_eq directly and not have to check to
* see if the comparison altered the table.
*/
static setentry *
-set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
+set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash)
{
register Py_ssize_t i;
register size_t perturb;
@@ -158,11 +160,11 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
setentry *table = so->table;
register setentry *entry;
- /* Make sure this function doesn't have to handle non-string keys,
+ /* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
strings is to override __eq__, and for speed we don't cater to
that here. */
- if (!PyString_CheckExact(key)) {
+ if (!PyUnicode_CheckExact(key)) {
so->lookup = set_lookkey;
return set_lookkey(so, key, hash);
}
@@ -173,7 +175,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
if (entry->key == dummy)
freeslot = entry;
else {
- if (entry->hash == hash && _PyString_Eq(entry->key, key))
+ if (entry->hash == hash && unicode_eq(entry->key, key))
return entry;
freeslot = NULL;
}
@@ -188,7 +190,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
if (entry->key == key
|| (entry->hash == hash
&& entry->key != dummy
- && _PyString_Eq(entry->key, key)))
+ && unicode_eq(entry->key, key)))
return entry;
if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
@@ -375,8 +377,8 @@ set_add_key(register PySetObject *so, PyObject *key)
register long hash;
register Py_ssize_t n_used;
- if (!PyString_CheckExact(key) ||
- (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+ if (!PyUnicode_CheckExact(key) ||
+ (hash = ((PyUnicodeObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
@@ -422,8 +424,9 @@ set_discard_key(PySetObject *so, PyObject *key)
PyObject *old_key;
assert (PyAnySet_Check(so));
- if (!PyString_CheckExact(key) ||
- (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+
+ if (!PyUnicode_CheckExact(key) ||
+ (hash = ((PyUnicodeObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
@@ -668,8 +671,8 @@ set_contains_key(PySetObject *so, PyObject *key)
long hash;
setentry *entry;
- if (!PyString_CheckExact(key) ||
- (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+ if (!PyUnicode_CheckExact(key) ||
+ (hash = ((PyUnicodeObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
@@ -989,7 +992,7 @@ make_new_set(PyTypeObject *type, PyObject *iterable)
INIT_NONZERO_SET_SLOTS(so);
}
- so->lookup = set_lookkey_string;
+ so->lookup = set_lookkey_unicode;
so->weakreflist = NULL;
if (iterable != NULL) {
@@ -1352,7 +1355,7 @@ set_isdisjoint(PySetObject *so, PyObject *other)
while ((key = PyIter_Next(it)) != NULL) {
int rv;
setentry entry;
- long hash = PyObject_Hash(key);
+ long hash = PyObject_Hash(key);;
if (hash == -1) {
Py_DECREF(key);
diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h
new file mode 100644
index 0000000..4e989dc
--- /dev/null
+++ b/Objects/stringlib/eq.h
@@ -0,0 +1,21 @@
+/* Fast unicode equal function optimized for dictobject.c and setobject.c */
+
+/* Return 1 if two unicode objects are equal, 0 if not.
+ * unicode_eq() is called when the hash of two unicode objects is equal.
+ */
+Py_LOCAL_INLINE(int)
+unicode_eq(PyObject *aa, PyObject *bb)
+{
+ register PyUnicodeObject *a = (PyUnicodeObject *)aa;
+ register PyUnicodeObject *b = (PyUnicodeObject *)bb;
+
+ if (a->length != b->length)
+ return 0;
+ if (a->length == 0)
+ return 1;
+ if (a->str[0] != b->str[0])
+ return 0;
+ if (a->length == 1)
+ return 1;
+ return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
+}
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 855f6cd..619804b 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -877,16 +877,6 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
return result;
}
-int
-_PyString_Eq(PyObject *o1, PyObject *o2)
-{
- PyStringObject *a = (PyStringObject*) o1;
- PyStringObject *b = (PyStringObject*) o2;
- return Py_Size(a) == Py_Size(b)
- && *a->ob_sval == *b->ob_sval
- && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
-}
-
static long
string_hash(PyStringObject *a)
{