diff options
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/abstract.c | 2 | ||||
-rw-r--r-- | Objects/setobject.c | 537 | ||||
-rw-r--r-- | Objects/stringlib/unicode_format.h | 2 | ||||
-rw-r--r-- | Objects/structseq.c | 24 | ||||
-rw-r--r-- | Objects/typeobject.c | 46 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 252 |
6 files changed, 513 insertions, 350 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c index 31cc0a8..039a4ca 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -2131,7 +2131,7 @@ PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) /* PyObject_Call() must not be called with an exception set, because it may clear it (directly or indirectly) and so the - caller looses its exception */ + caller loses its exception */ assert(!PyErr_Occurred()); call = func->ob_type->tp_call; diff --git a/Objects/setobject.c b/Objects/setobject.c index 704d7e2..03bb230 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -29,7 +29,6 @@ #include "Python.h" #include "structmember.h" -#include "stringlib/eq.h" /* Object used as dummy key to fill deleted entries */ static PyObject _dummy_struct; @@ -51,19 +50,20 @@ static PyObject _dummy_struct; static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { - setentry *table = so->table; - setentry *freeslot = NULL; + setentry *table; setentry *entry; - size_t perturb = hash; + size_t perturb; size_t mask = so->mask; size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */ size_t j; int cmp; - entry = &table[i]; + entry = &so->table[i]; if (entry->key == NULL) return entry; + perturb = hash; + while (1) { if (entry->hash == hash) { PyObject *startkey = entry->key; @@ -73,8 +73,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; if (PyUnicode_CheckExact(startkey) && PyUnicode_CheckExact(key) - && unicode_eq(startkey, key)) + && _PyUnicode_EQ(startkey, key)) return entry; + table = so->table; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); @@ -86,14 +87,12 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; mask = so->mask; /* help avoid a register spill */ } - if (entry->hash == -1 && freeslot == NULL) - freeslot = entry; if (i + LINEAR_PROBES <= mask) { for (j = 0 ; j < LINEAR_PROBES ; j++) { entry++; - if (entry->key == NULL) - goto found_null; + if (entry->hash == 0 && entry->key == NULL) + return entry; if (entry->hash == hash) { PyObject *startkey = entry->key; assert(startkey != dummy); @@ -101,8 +100,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; if (PyUnicode_CheckExact(startkey) && PyUnicode_CheckExact(key) - && unicode_eq(startkey, key)) + && _PyUnicode_EQ(startkey, key)) return entry; + table = so->table; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); @@ -114,7 +114,104 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; mask = so->mask; } - if (entry->hash == -1 && freeslot == NULL) + } + } + + perturb >>= PERTURB_SHIFT; + i = (i * 5 + 1 + perturb) & mask; + + entry = &so->table[i]; + if (entry->key == NULL) + return entry; + } +} + +static int set_table_resize(PySetObject *, Py_ssize_t); + +static int +set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash) +{ + setentry *table; + setentry *freeslot; + setentry *entry; + size_t perturb; + size_t mask; + size_t i; /* Unsigned for defined overflow behavior */ + size_t j; + int cmp; + + /* Pre-increment is necessary to prevent arbitrary code in the rich + comparison from deallocating the key just before the insertion. */ + Py_INCREF(key); + + restart: + + mask = so->mask; + i = (size_t)hash & mask; + + entry = &so->table[i]; + if (entry->key == NULL) + goto found_unused; + + freeslot = NULL; + perturb = hash; + + while (1) { + if (entry->hash == hash) { + PyObject *startkey = entry->key; + /* startkey cannot be a dummy because the dummy hash field is -1 */ + assert(startkey != dummy); + if (startkey == key) + goto found_active; + if (PyUnicode_CheckExact(startkey) + && PyUnicode_CheckExact(key) + && _PyUnicode_EQ(startkey, key)) + goto found_active; + table = so->table; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp > 0) /* likely */ + goto found_active; + if (cmp < 0) + goto comparison_error; + /* Continuing the search from the current entry only makes + sense if the table and entry are unchanged; otherwise, + we have to restart from the beginning */ + if (table != so->table || entry->key != startkey) + goto restart; + mask = so->mask; /* help avoid a register spill */ + } + else if (entry->hash == -1 && freeslot == NULL) + freeslot = entry; + + if (i + LINEAR_PROBES <= mask) { + for (j = 0 ; j < LINEAR_PROBES ; j++) { + entry++; + if (entry->hash == 0 && entry->key == NULL) + goto found_unused_or_dummy; + if (entry->hash == hash) { + PyObject *startkey = entry->key; + assert(startkey != dummy); + if (startkey == key) + goto found_active; + if (PyUnicode_CheckExact(startkey) + && PyUnicode_CheckExact(key) + && _PyUnicode_EQ(startkey, key)) + goto found_active; + table = so->table; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp > 0) + goto found_active; + if (cmp < 0) + goto comparison_error; + if (table != so->table || entry->key != startkey) + goto restart; + mask = so->mask; + } + else if (entry->hash == -1 && freeslot == NULL) freeslot = entry; } } @@ -122,12 +219,35 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) perturb >>= PERTURB_SHIFT; i = (i * 5 + 1 + perturb) & mask; - entry = &table[i]; + entry = &so->table[i]; if (entry->key == NULL) - goto found_null; + goto found_unused_or_dummy; } - found_null: - return freeslot == NULL ? entry : freeslot; + + found_unused_or_dummy: + if (freeslot == NULL) + goto found_unused; + so->used++; + freeslot->key = key; + freeslot->hash = hash; + return 0; + + found_unused: + so->fill++; + so->used++; + entry->key = key; + entry->hash = hash; + if ((size_t)so->fill*3 < mask*2) + return 0; + return set_table_resize(so, so->used); + + found_active: + Py_DECREF(key); + return 0; + + comparison_error: + Py_DECREF(key); + return -1; } /* @@ -172,38 +292,6 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) /* ======== End logic for probing the hash table ========================== */ /* ======================================================================== */ - -/* -Internal routine to insert a new key into the table. -Used by the public insert routine. -Eats a reference to key. -*/ -static int -set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) -{ - setentry *entry; - - entry = set_lookkey(so, key, hash); - if (entry == NULL) - return -1; - if (entry->key == NULL) { - /* UNUSED */ - entry->key = key; - entry->hash = hash; - so->fill++; - so->used++; - } else if (entry->key == dummy) { - /* DUMMY */ - entry->key = key; - entry->hash = hash; - so->used++; - } else { - /* ACTIVE */ - Py_DECREF(key); - } - return 0; -} - /* Restructure the table by allocating a new table and reinserting all keys again. When entries have been deleted, the new table may @@ -220,6 +308,7 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) setentry small_copy[PySet_MINSIZE]; assert(minused >= 0); + minused = (minused > 50000) ? minused * 2 : minused * 4; /* Find the smallest table size > minused. */ /* XXX speed-up with intrinsics */ @@ -295,31 +384,42 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) return 0; } -/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */ +static int +set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash) +{ + setentry *entry; + + entry = set_lookkey(so, key, hash); + if (entry != NULL) + return entry->key != NULL; + return -1; +} + +#define DISCARD_NOTFOUND 0 +#define DISCARD_FOUND 1 static int -set_add_entry(PySetObject *so, setentry *entry) +set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash) { - Py_ssize_t n_used; - PyObject *key = entry->key; - Py_hash_t hash = entry->hash; + setentry *entry; + PyObject *old_key; - assert(so->fill <= so->mask); /* at least one empty slot */ - n_used = so->used; - Py_INCREF(key); - if (set_insert_key(so, key, hash)) { - Py_DECREF(key); + entry = set_lookkey(so, key, hash); + if (entry == NULL) return -1; - } - if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) - return 0; - return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); + if (entry->key == NULL) + return DISCARD_NOTFOUND; + old_key = entry->key; + entry->key = dummy; + entry->hash = -1; + so->used--; + Py_DECREF(old_key); + return DISCARD_FOUND; } static int set_add_key(PySetObject *so, PyObject *key) { - setentry entry; Py_hash_t hash; if (!PyUnicode_CheckExact(key) || @@ -328,50 +428,35 @@ set_add_key(PySetObject *so, PyObject *key) if (hash == -1) return -1; } - entry.key = key; - entry.hash = hash; - return set_add_entry(so, &entry); + return set_add_entry(so, key, hash); } -#define DISCARD_NOTFOUND 0 -#define DISCARD_FOUND 1 - static int -set_discard_entry(PySetObject *so, setentry *oldentry) +set_contains_key(PySetObject *so, PyObject *key) { - setentry *entry; - PyObject *old_key; + Py_hash_t hash; - entry = set_lookkey(so, oldentry->key, oldentry->hash); - if (entry == NULL) - return -1; - if (entry->key == NULL || entry->key == dummy) - return DISCARD_NOTFOUND; - old_key = entry->key; - entry->key = dummy; - entry->hash = -1; - so->used--; - Py_DECREF(old_key); - return DISCARD_FOUND; + if (!PyUnicode_CheckExact(key) || + (hash = ((PyASCIIObject *) key)->hash) == -1) { + hash = PyObject_Hash(key); + if (hash == -1) + return -1; + } + return set_contains_entry(so, key, hash); } static int set_discard_key(PySetObject *so, PyObject *key) { - setentry entry; Py_hash_t hash; - assert (PyAnySet_Check(so)); - if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; } - entry.key = key; - entry.hash = hash; - return set_discard_entry(so, &entry); + return set_discard_entry(so, key, hash); } static void @@ -452,20 +537,22 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) { Py_ssize_t i; Py_ssize_t mask; - setentry *table; + setentry *entry; assert (PyAnySet_Check(so)); i = *pos_ptr; assert(i >= 0); - table = so->table; mask = so->mask; - while (i <= mask && (table[i].key == NULL || table[i].key == dummy)) + entry = &so->table[i]; + while (i <= mask && (entry->key == NULL || entry->key == dummy)) { i++; + entry++; + } *pos_ptr = i+1; if (i > mask) return 0; - assert(table[i].key != NULL); - *entry_ptr = &table[i]; + assert(entry != NULL); + *entry_ptr = entry; return 1; } @@ -563,8 +650,8 @@ set_merge(PySetObject *so, PyObject *otherset) * incrementally resizing as we insert new keys. Expect * that there will be no (or few) overlapping keys. */ - if ((so->fill + other->used)*3 >= (so->mask+1)*2) { - if (set_table_resize(so, (so->used + other->used)*2) != 0) + if ((so->fill + other->used)*3 >= so->mask*2) { + if (set_table_resize(so, so->used + other->used) != 0) return -1; } so_entry = so->table; @@ -604,46 +691,13 @@ set_merge(PySetObject *so, PyObject *otherset) other_entry = &other->table[i]; key = other_entry->key; if (key != NULL && key != dummy) { - Py_INCREF(key); - if (set_insert_key(so, key, other_entry->hash)) { - Py_DECREF(key); + if (set_add_entry(so, key, other_entry->hash)) return -1; - } } } return 0; } -static int -set_contains_entry(PySetObject *so, setentry *entry) -{ - PyObject *key; - setentry *lu_entry; - - lu_entry = set_lookkey(so, entry->key, entry->hash); - if (lu_entry == NULL) - return -1; - key = lu_entry->key; - return key != NULL && key != dummy; -} - -static int -set_contains_key(PySetObject *so, PyObject *key) -{ - setentry entry; - Py_hash_t hash; - - if (!PyUnicode_CheckExact(key) || - (hash = ((PyASCIIObject *) key)->hash) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return -1; - } - entry.key = key; - entry.hash = hash; - return set_contains_entry(so, &entry); -} - static PyObject * set_pop(PySetObject *so) { @@ -685,43 +739,64 @@ set_traverse(PySetObject *so, visitproc visit, void *arg) return 0; } -static Py_hash_t -frozenset_hash(PyObject *self) +/* Work to increase the bit dispersion for closely spaced hash values. + This is important because some use cases have many combinations of a + small number of elements with nearby hashes so that many distinct + combinations collapse to only a handful of distinct hash values. */ + +static Py_uhash_t +_shuffle_bits(Py_uhash_t h) { - /* Most of the constants in this hash algorithm are randomly choosen - large primes with "interesting bit patterns" and that passed - tests for good collision statistics on a variety of problematic - datasets such as: + return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; +} - ps = [] - for r in range(21): - ps += itertools.combinations(range(20), r) - num_distinct_hashes = len({hash(frozenset(s)) for s in ps}) +/* Most of the constants in this hash algorithm are randomly chosen + large primes with "interesting bit patterns" and that passed tests + for good collision statistics on a variety of problematic datasets + including powersets and graph structures (such as David Eppstein's + graph recipes in Lib/test/test_set.py) */ - */ +static Py_hash_t +frozenset_hash(PyObject *self) +{ PySetObject *so = (PySetObject *)self; - Py_uhash_t h, hash = 1927868237UL; + Py_uhash_t hash = 0; setentry *entry; - Py_ssize_t pos = 0; if (so->hash != -1) return so->hash; - hash *= (Py_uhash_t)PySet_GET_SIZE(self) + 1; - while (set_next(so, &pos, &entry)) { - /* Work to increase the bit dispersion for closely spaced hash - values. This is important because some use cases have many - combinations of a small number of elements with nearby - hashes so that many distinct combinations collapse to only - a handful of distinct hash values. */ - h = entry->hash; - hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; - } - /* Make the final result spread-out in a different pattern - than the algorithm for tuples or other python objects. */ + /* Xor-in shuffled bits from every entry's hash field because xor is + commutative and a frozenset hash should be independent of order. + + For speed, include null entries and dummy entries and then + subtract out their effect afterwards so that the final hash + depends only on active entries. This allows the code to be + vectorized by the compiler and it saves the unpredictable + branches that would arise when trying to exclude null and dummy + entries on every iteration. */ + + for (entry = so->table; entry <= &so->table[so->mask]; entry++) + hash ^= _shuffle_bits(entry->hash); + + /* Remove the effect of an odd number of NULL entries */ + if ((so->mask + 1 - so->fill) & 1) + hash ^= _shuffle_bits(0); + + /* Remove the effect of an odd number of dummy entries */ + if ((so->fill - so->used) & 1) + hash ^= _shuffle_bits(-1); + + /* Factor in the number of active entries */ + hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL; + + /* Disperse patterns arising in nested frozensets */ hash = hash * 69069U + 907133923UL; + + /* -1 is reserved as an error code */ if (hash == (Py_uhash_t)-1) hash = 590923713UL; + so->hash = hash; return hash; } @@ -868,7 +943,7 @@ PyTypeObject PySetIter_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc)setiter_traverse, /* tp_traverse */ 0, /* tp_clear */ @@ -913,18 +988,14 @@ set_update_internal(PySetObject *so, PyObject *other) * incrementally resizing as we insert new keys. Expect * that there will be no (or few) overlapping keys. */ - if (dictsize == -1) + if (dictsize < 0) return -1; - if ((so->fill + dictsize)*3 >= (so->mask+1)*2) { - if (set_table_resize(so, (so->used + dictsize)*2) != 0) + if ((so->fill + dictsize)*3 >= so->mask*2) { + if (set_table_resize(so, so->used + dictsize) != 0) return -1; } while (_PyDict_Next(other, &pos, &key, &value, &hash)) { - setentry an_entry; - - an_entry.hash = hash; - an_entry.key = key; - if (set_add_entry(so, &an_entry)) + if (set_add_entry(so, key, hash)) return -1; } return 0; @@ -1204,6 +1275,8 @@ set_intersection(PySetObject *so, PyObject *other) { PySetObject *result; PyObject *key, *it, *tmp; + Py_hash_t hash; + int rv; if ((PyObject *)so == other) return set_copy(so); @@ -1223,13 +1296,15 @@ set_intersection(PySetObject *so, PyObject *other) } while (set_next((PySetObject *)other, &pos, &entry)) { - int rv = set_contains_entry(so, entry); - if (rv == -1) { + key = entry->key; + hash = entry->hash; + rv = set_contains_entry(so, key, hash); + if (rv < 0) { Py_DECREF(result); return NULL; } if (rv) { - if (set_add_entry(result, entry)) { + if (set_add_entry(result, key, hash)) { Py_DECREF(result); return NULL; } @@ -1245,32 +1320,15 @@ set_intersection(PySetObject *so, PyObject *other) } while ((key = PyIter_Next(it)) != NULL) { - int rv; - setentry entry; - Py_hash_t hash = PyObject_Hash(key); - - if (hash == -1) { - Py_DECREF(it); - Py_DECREF(result); - Py_DECREF(key); - return NULL; - } - entry.hash = hash; - entry.key = key; - rv = set_contains_entry(so, &entry); - if (rv == -1) { - Py_DECREF(it); - Py_DECREF(result); - Py_DECREF(key); - return NULL; - } + hash = PyObject_Hash(key); + if (hash == -1) + goto error; + rv = set_contains_entry(so, key, hash); + if (rv < 0) + goto error; if (rv) { - if (set_add_entry(result, &entry)) { - Py_DECREF(it); - Py_DECREF(result); - Py_DECREF(key); - return NULL; - } + if (set_add_entry(result, key, hash)) + goto error; } Py_DECREF(key); } @@ -1280,6 +1338,11 @@ set_intersection(PySetObject *so, PyObject *other) return NULL; } return (PyObject *)result; + error: + Py_DECREF(it); + Py_DECREF(result); + Py_DECREF(key); + return NULL; } static PyObject * @@ -1366,6 +1429,7 @@ static PyObject * set_isdisjoint(PySetObject *so, PyObject *other) { PyObject *key, *it, *tmp; + int rv; if ((PyObject *)so == other) { if (PySet_GET_SIZE(so) == 0) @@ -1384,8 +1448,8 @@ set_isdisjoint(PySetObject *so, PyObject *other) other = tmp; } while (set_next((PySetObject *)other, &pos, &entry)) { - int rv = set_contains_entry(so, entry); - if (rv == -1) + rv = set_contains_entry(so, entry->key, entry->hash); + if (rv < 0) return NULL; if (rv) Py_RETURN_FALSE; @@ -1398,8 +1462,6 @@ set_isdisjoint(PySetObject *so, PyObject *other) return NULL; while ((key = PyIter_Next(it)) != NULL) { - int rv; - setentry entry; Py_hash_t hash = PyObject_Hash(key); if (hash == -1) { @@ -1407,11 +1469,9 @@ set_isdisjoint(PySetObject *so, PyObject *other) Py_DECREF(it); return NULL; } - entry.hash = hash; - entry.key = key; - rv = set_contains_entry(so, &entry); + rv = set_contains_entry(so, key, hash); Py_DECREF(key); - if (rv == -1) { + if (rv < 0) { Py_DECREF(it); return NULL; } @@ -1440,7 +1500,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other) Py_ssize_t pos = 0; while (set_next((PySetObject *)other, &pos, &entry)) - if (set_discard_entry(so, entry) == -1) + if (set_discard_entry(so, entry->key, entry->hash) < 0) return -1; } else { PyObject *key, *it; @@ -1449,7 +1509,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other) return -1; while ((key = PyIter_Next(it)) != NULL) { - if (set_discard_key(so, key) == -1) { + if (set_discard_key(so, key) < 0) { Py_DECREF(it); Py_DECREF(key); return -1; @@ -1460,10 +1520,10 @@ set_difference_update_internal(PySetObject *so, PyObject *other) if (PyErr_Occurred()) return -1; } - /* If more than 1/5 are dummies, then resize them away. */ - if ((so->fill - so->used) * 5 < so->mask) + /* If more than 1/4th are dummies, then resize them away. */ + if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4) return 0; - return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); + return set_table_resize(so, so->used); } static PyObject * @@ -1490,7 +1550,7 @@ set_copy_and_difference(PySetObject *so, PyObject *other) result = set_copy(so); if (result == NULL) return NULL; - if (set_difference_update_internal((PySetObject *) result, other) != -1) + if (set_difference_update_internal((PySetObject *) result, other) == 0) return result; Py_DECREF(result); return NULL; @@ -1500,8 +1560,11 @@ static PyObject * set_difference(PySetObject *so, PyObject *other) { PyObject *result; + PyObject *key; + Py_hash_t hash; setentry *entry; Py_ssize_t pos = 0; + int rv; if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) { return set_copy_and_difference(so, other); @@ -1519,17 +1582,15 @@ set_difference(PySetObject *so, PyObject *other) if (PyDict_CheckExact(other)) { while (set_next(so, &pos, &entry)) { - setentry entrycopy; - int rv; - entrycopy.hash = entry->hash; - entrycopy.key = entry->key; - rv = _PyDict_Contains(other, entry->key, entry->hash); + key = entry->key; + hash = entry->hash; + rv = _PyDict_Contains(other, key, hash); if (rv < 0) { Py_DECREF(result); return NULL; } if (!rv) { - if (set_add_entry((PySetObject *)result, &entrycopy)) { + if (set_add_entry((PySetObject *)result, key, hash)) { Py_DECREF(result); return NULL; } @@ -1540,13 +1601,15 @@ set_difference(PySetObject *so, PyObject *other) /* Iterate over so, checking for common elements in other. */ while (set_next(so, &pos, &entry)) { - int rv = set_contains_entry((PySetObject *)other, entry); - if (rv == -1) { + key = entry->key; + hash = entry->hash; + rv = set_contains_entry((PySetObject *)other, key, hash); + if (rv < 0) { Py_DECREF(result); return NULL; } if (!rv) { - if (set_add_entry((PySetObject *)result, entry)) { + if (set_add_entry((PySetObject *)result, key, hash)) { Py_DECREF(result); return NULL; } @@ -1608,29 +1671,24 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) PySetObject *otherset; PyObject *key; Py_ssize_t pos = 0; + Py_hash_t hash; setentry *entry; + int rv; if ((PyObject *)so == other) return set_clear(so); if (PyDict_CheckExact(other)) { PyObject *value; - int rv; - Py_hash_t hash; while (_PyDict_Next(other, &pos, &key, &value, &hash)) { - setentry an_entry; - Py_INCREF(key); - an_entry.hash = hash; - an_entry.key = key; - - rv = set_discard_entry(so, &an_entry); - if (rv == -1) { + rv = set_discard_entry(so, key, hash); + if (rv < 0) { Py_DECREF(key); return NULL; } if (rv == DISCARD_NOTFOUND) { - if (set_add_entry(so, &an_entry)) { + if (set_add_entry(so, key, hash)) { Py_DECREF(key); return NULL; } @@ -1650,13 +1708,15 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) } while (set_next(otherset, &pos, &entry)) { - int rv = set_discard_entry(so, entry); - if (rv == -1) { + key = entry->key; + hash = entry->hash; + rv = set_discard_entry(so, key, hash); + if (rv < 0) { Py_DECREF(otherset); return NULL; } if (rv == DISCARD_NOTFOUND) { - if (set_add_entry(so, entry)) { + if (set_add_entry(so, key, hash)) { Py_DECREF(otherset); return NULL; } @@ -1718,6 +1778,7 @@ set_issubset(PySetObject *so, PyObject *other) { setentry *entry; Py_ssize_t pos = 0; + int rv; if (!PyAnySet_Check(other)) { PyObject *tmp, *result; @@ -1732,8 +1793,8 @@ set_issubset(PySetObject *so, PyObject *other) Py_RETURN_FALSE; while (set_next(so, &pos, &entry)) { - int rv = set_contains_entry((PySetObject *)other, entry); - if (rv == -1) + rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash); + if (rv < 0) return NULL; if (!rv) Py_RETURN_FALSE; @@ -1824,7 +1885,7 @@ set_contains(PySetObject *so, PyObject *key) int rv; rv = set_contains_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return -1; PyErr_Clear(); @@ -1843,7 +1904,7 @@ set_direct_contains(PySetObject *so, PyObject *key) long result; result = set_contains(so, key); - if (result == -1) + if (result < 0) return NULL; return PyBool_FromLong(result); } @@ -1857,7 +1918,7 @@ set_remove(PySetObject *so, PyObject *key) int rv; rv = set_discard_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return NULL; PyErr_Clear(); @@ -1866,7 +1927,7 @@ set_remove(PySetObject *so, PyObject *key) return NULL; rv = set_discard_key(so, tmpkey); Py_DECREF(tmpkey); - if (rv == -1) + if (rv < 0) return NULL; } @@ -1889,7 +1950,7 @@ set_discard(PySetObject *so, PyObject *key) int rv; rv = set_discard_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return NULL; PyErr_Clear(); @@ -1898,7 +1959,7 @@ set_discard(PySetObject *so, PyObject *key) return NULL; rv = set_discard_key(so, tmpkey); Py_DECREF(tmpkey); - if (rv == -1) + if (rv < 0) return NULL; } Py_RETURN_NONE; @@ -2125,7 +2186,7 @@ static PyMethodDef frozenset_methods[] = { copy_doc}, {"difference", (PyCFunction)set_difference_multi, METH_VARARGS, difference_doc}, - {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS, + {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS, intersection_doc}, {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O, isdisjoint_doc}, @@ -2196,7 +2257,7 @@ PyTypeObject PyFrozenSet_Type = { (traverseproc)set_traverse, /* tp_traverse */ (inquiry)set_clear_internal, /* tp_clear */ (richcmpfunc)set_richcompare, /* tp_richcompare */ - offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */ + offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */ (getiterfunc)set_iter, /* tp_iter */ 0, /* tp_iternext */ frozenset_methods, /* tp_methods */ diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index aec221a..d72e47d 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -67,7 +67,7 @@ SubString_new_object(SubString *str) return PyUnicode_Substring(str->str, str->start, str->end); } -/* return a new string. if str->str is NULL, return None */ +/* return a new string. if str->str is NULL, return a new empty string */ Py_LOCAL_INLINE(PyObject *) SubString_new_object_or_empty(SubString *str) { diff --git a/Objects/structseq.c b/Objects/structseq.c index 664344b..7209738 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -16,14 +16,14 @@ _Py_IDENTIFIER(n_fields); _Py_IDENTIFIER(n_unnamed_fields); #define VISIBLE_SIZE(op) Py_SIZE(op) -#define VISIBLE_SIZE_TP(tp) PyLong_AsLong( \ +#define VISIBLE_SIZE_TP(tp) PyLong_AsSsize_t( \ _PyDict_GetItemId((tp)->tp_dict, &PyId_n_sequence_fields)) -#define REAL_SIZE_TP(tp) PyLong_AsLong( \ +#define REAL_SIZE_TP(tp) PyLong_AsSsize_t( \ _PyDict_GetItemId((tp)->tp_dict, &PyId_n_fields)) #define REAL_SIZE(op) REAL_SIZE_TP(Py_TYPE(op)) -#define UNNAMED_FIELDS_TP(tp) PyLong_AsLong( \ +#define UNNAMED_FIELDS_TP(tp) PyLong_AsSsize_t( \ _PyDict_GetItemId((tp)->tp_dict, &PyId_n_unnamed_fields)) #define UNNAMED_FIELDS(op) UNNAMED_FIELDS_TP(Py_TYPE(op)) @@ -164,7 +164,8 @@ structseq_repr(PyStructSequence *obj) #define TYPE_MAXSIZE 100 PyTypeObject *typ = Py_TYPE(obj); - int i, removelast = 0; + Py_ssize_t i; + int removelast = 0; Py_ssize_t len; char buf[REPR_BUFFER_SIZE]; char *endofbuf, *pbuf = buf; @@ -236,8 +237,7 @@ structseq_reduce(PyStructSequence* self) PyObject* tup = NULL; PyObject* dict = NULL; PyObject* result; - Py_ssize_t n_fields, n_visible_fields, n_unnamed_fields; - int i; + Py_ssize_t n_fields, n_visible_fields, n_unnamed_fields, i; n_fields = REAL_SIZE(self); n_visible_fields = VISIBLE_SIZE(self); @@ -325,7 +325,7 @@ PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc) { PyObject *dict; PyMemberDef* members; - int n_members, n_unnamed_members, i, k; + Py_ssize_t n_members, n_unnamed_members, i, k; PyObject *v; #ifdef Py_TRACE_REFS @@ -373,9 +373,9 @@ PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc) Py_INCREF(type); dict = type->tp_dict; -#define SET_DICT_FROM_INT(key, value) \ +#define SET_DICT_FROM_SIZE(key, value) \ do { \ - v = PyLong_FromLong((long) value); \ + v = PyLong_FromSsize_t(value); \ if (v == NULL) \ return -1; \ if (PyDict_SetItemString(dict, key, v) < 0) { \ @@ -385,9 +385,9 @@ PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc) Py_DECREF(v); \ } while (0) - SET_DICT_FROM_INT(visible_length_key, desc->n_in_sequence); - SET_DICT_FROM_INT(real_length_key, n_members); - SET_DICT_FROM_INT(unnamed_fields_key, n_unnamed_members); + SET_DICT_FROM_SIZE(visible_length_key, desc->n_in_sequence); + SET_DICT_FROM_SIZE(real_length_key, n_members); + SET_DICT_FROM_SIZE(unnamed_fields_key, n_unnamed_members); return 0; } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index bb83530..a311e99 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -906,25 +906,33 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds) #endif obj = type->tp_new(type, args, kwds); - if (obj != NULL) { - /* Ugly exception: when the call was type(something), - don't call tp_init on the result. */ - if (type == &PyType_Type && - PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1 && - (kwds == NULL || - (PyDict_Check(kwds) && PyDict_Size(kwds) == 0))) - return obj; - /* If the returned object is not an instance of type, - it won't be initialized. */ - if (!PyType_IsSubtype(Py_TYPE(obj), type)) - return obj; - type = Py_TYPE(obj); - if (type->tp_init != NULL) { - int res = type->tp_init(obj, args, kwds); - if (res < 0) { - Py_DECREF(obj); - obj = NULL; - } + obj = _Py_CheckFunctionResult((PyObject*)type, obj, NULL); + if (obj == NULL) + return NULL; + + /* Ugly exception: when the call was type(something), + don't call tp_init on the result. */ + if (type == &PyType_Type && + PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1 && + (kwds == NULL || + (PyDict_Check(kwds) && PyDict_Size(kwds) == 0))) + return obj; + + /* If the returned object is not an instance of type, + it won't be initialized. */ + if (!PyType_IsSubtype(Py_TYPE(obj), type)) + return obj; + + type = Py_TYPE(obj); + if (type->tp_init != NULL) { + int res = type->tp_init(obj, args, kwds); + if (res < 0) { + assert(PyErr_Occurred()); + Py_DECREF(obj); + obj = NULL; + } + else { + assert(!PyErr_Occurred()); } } return obj; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9223c99..6657cd4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "Python.h" #include "ucnhash.h" #include "bytes_methods.h" +#include "stringlib/eq.h" #ifdef MS_WINDOWS #include <windows.h> @@ -292,6 +293,34 @@ static unsigned char ascii_linebreak[] = { #include "clinic/unicodeobject.c.h" +typedef enum { + _Py_ERROR_UNKNOWN=0, + _Py_ERROR_STRICT, + _Py_ERROR_SURROGATEESCAPE, + _Py_ERROR_REPLACE, + _Py_ERROR_IGNORE, + _Py_ERROR_XMLCHARREFREPLACE, + _Py_ERROR_OTHER +} _Py_error_handler; + +static _Py_error_handler +get_error_handler(const char *errors) +{ + if (errors == NULL) + return _Py_ERROR_STRICT; + if (strcmp(errors, "strict") == 0) + return _Py_ERROR_STRICT; + if (strcmp(errors, "surrogateescape") == 0) + return _Py_ERROR_SURROGATEESCAPE; + if (strcmp(errors, "ignore") == 0) + return _Py_ERROR_IGNORE; + if (strcmp(errors, "replace") == 0) + return _Py_ERROR_REPLACE; + if (strcmp(errors, "xmlcharrefreplace") == 0) + return _Py_ERROR_XMLCHARREFREPLACE; + return _Py_ERROR_OTHER; +} + /* The max unicode value is always 0x10FFFF while using the PEP-393 API. This function is kept for backward compatibility with the old API. */ Py_UNICODE @@ -3162,24 +3191,22 @@ wcstombs_errorpos(const wchar_t *wstr) static int locale_error_handler(const char *errors, int *surrogateescape) { - if (errors == NULL) { - *surrogateescape = 0; - return 0; - } - - if (strcmp(errors, "strict") == 0) { + _Py_error_handler error_handler = get_error_handler(errors); + switch (error_handler) + { + case _Py_ERROR_STRICT: *surrogateescape = 0; return 0; - } - if (strcmp(errors, "surrogateescape") == 0) { + case _Py_ERROR_SURROGATEESCAPE: *surrogateescape = 1; return 0; + default: + PyErr_Format(PyExc_ValueError, + "only 'strict' and 'surrogateescape' error handlers " + "are supported, not '%s'", + errors); + return -1; } - PyErr_Format(PyExc_ValueError, - "only 'strict' and 'surrogateescape' error handlers " - "are supported, not '%s'", - errors); - return -1; } PyObject * @@ -6388,7 +6415,7 @@ unicode_encode_call_errorhandler(const char *errors, static PyObject * unicode_encode_ucs1(PyObject *unicode, const char *errors, - unsigned int limit) + const Py_UCS4 limit) { /* input state */ Py_ssize_t pos=0, size; @@ -6402,11 +6429,9 @@ unicode_encode_ucs1(PyObject *unicode, Py_ssize_t ressize; const char *encoding = (limit == 256) ? "latin-1" : "ascii"; const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; - PyObject *errorHandler = NULL; + PyObject *error_handler_obj = NULL; PyObject *exc = NULL; - /* the following variable is used for caching string comparisons - * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; + _Py_error_handler error_handler = _Py_ERROR_UNKNOWN; if (PyUnicode_READY(unicode) == -1) return NULL; @@ -6424,12 +6449,12 @@ unicode_encode_ucs1(PyObject *unicode, ressize = size; while (pos < size) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos); + Py_UCS4 ch = PyUnicode_READ(kind, data, pos); /* can we encode this? */ - if (c<limit) { + if (ch < limit) { /* no overflow check, because we know that the space is enough */ - *str++ = (char)c; + *str++ = (char)ch; ++pos; } else { @@ -6440,38 +6465,35 @@ unicode_encode_ucs1(PyObject *unicode, Py_ssize_t collstart = pos; Py_ssize_t collend = pos; /* find all unecodable characters */ + while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) ++collend; + /* cache callback name lookup (if not done yet, i.e. it's the first error) */ - if (known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - known_errorHandler = 4; - else - known_errorHandler = 0; - } - switch (known_errorHandler) { - case 1: /* strict */ + if (error_handler == _Py_ERROR_UNKNOWN) + error_handler = get_error_handler(errors); + + switch (error_handler) { + case _Py_ERROR_STRICT: raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); goto onError; - case 2: /* replace */ + + case _Py_ERROR_REPLACE: while (collstart++ < collend) - *str++ = '?'; /* fall through */ - case 3: /* ignore */ + *str++ = '?'; + /* fall through ignore error handler */ + case _Py_ERROR_IGNORE: pos = collend; break; - case 4: /* xmlcharrefreplace */ + + case _Py_ERROR_XMLCHARREFREPLACE: respos = str - PyBytes_AS_STRING(res); requiredsize = respos; /* determine replacement size */ for (i = collstart; i < collend; ++i) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); Py_ssize_t incr; + + ch = PyUnicode_READ(kind, data, i); if (ch < 10) incr = 2+1+1; else if (ch < 100) @@ -6509,13 +6531,31 @@ unicode_encode_ucs1(PyObject *unicode, } pos = collend; break; + + case _Py_ERROR_SURROGATEESCAPE: + for (i = collstart; i < collend; ++i) { + ch = PyUnicode_READ(kind, data, i); + if (ch < 0xdc80 || 0xdcff < ch) { + /* Not a UTF-8b surrogate */ + break; + } + *str++ = (char)(ch - 0xdc00); + ++pos; + } + if (i >= collend) + break; + collstart = pos; + assert(collstart != collend); + /* fallback to general error handling */ + default: - repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, + repunicode = unicode_encode_call_errorhandler(errors, &error_handler_obj, encoding, reason, unicode, &exc, collstart, collend, &newpos); if (repunicode == NULL || (PyUnicode_Check(repunicode) && PyUnicode_READY(repunicode) == -1)) goto onError; + if (PyBytes_Check(repunicode)) { /* Directly copy bytes result to output. */ repsize = PyBytes_Size(repunicode); @@ -6539,6 +6579,7 @@ unicode_encode_ucs1(PyObject *unicode, Py_DECREF(repunicode); break; } + /* need more space? (at least enough for what we have+the replacement+the rest of the string, so we won't have to check space for encodable characters) */ @@ -6561,17 +6602,18 @@ unicode_encode_ucs1(PyObject *unicode, str = PyBytes_AS_STRING(res) + respos; ressize = requiredsize; } + /* check if there is anything unencodable in the replacement and copy it to the output */ for (i = 0; repsize-->0; ++i, ++str) { - c = PyUnicode_READ_CHAR(repunicode, i); - if (c >= limit) { + ch = PyUnicode_READ_CHAR(repunicode, i); + if (ch >= limit) { raise_encode_exception(&exc, encoding, unicode, pos, pos+1, reason); Py_DECREF(repunicode); goto onError; } - *str = (char)c; + *str = (char)ch; } pos = newpos; Py_DECREF(repunicode); @@ -6586,7 +6628,7 @@ unicode_encode_ucs1(PyObject *unicode, goto onError; } - Py_XDECREF(errorHandler); + Py_XDECREF(error_handler_obj); Py_XDECREF(exc); return res; @@ -6596,7 +6638,7 @@ unicode_encode_ucs1(PyObject *unicode, onError: Py_XDECREF(res); - Py_XDECREF(errorHandler); + Py_XDECREF(error_handler_obj); Py_XDECREF(exc); return NULL; } @@ -6656,8 +6698,9 @@ PyUnicode_DecodeASCII(const char *s, Py_ssize_t endinpos; Py_ssize_t outpos; const char *e; - PyObject *errorHandler = NULL; + PyObject *error_handler_obj = NULL; PyObject *exc = NULL; + _Py_error_handler error_handler = _Py_ERROR_UNKNOWN; if (size == 0) _Py_RETURN_UNICODE_EMPTY(); @@ -6686,12 +6729,42 @@ PyUnicode_DecodeASCII(const char *s, PyUnicode_WRITE(kind, data, writer.pos, c); writer.pos++; ++s; + continue; } - else { + + /* byte outsize range 0x00..0x7f: call the error handler */ + + if (error_handler == _Py_ERROR_UNKNOWN) + error_handler = get_error_handler(errors); + + switch (error_handler) + { + case _Py_ERROR_REPLACE: + case _Py_ERROR_SURROGATEESCAPE: + /* Fast-path: the error handler only writes one character, + but we may switch to UCS2 at the first write */ + if (_PyUnicodeWriter_PrepareKind(&writer, PyUnicode_2BYTE_KIND) < 0) + goto onError; + kind = writer.kind; + data = writer.data; + + if (error_handler == _Py_ERROR_REPLACE) + PyUnicode_WRITE(kind, data, writer.pos, 0xfffd); + else + PyUnicode_WRITE(kind, data, writer.pos, c + 0xdc00); + writer.pos++; + ++s; + break; + + case _Py_ERROR_IGNORE: + ++s; + break; + + default: startinpos = s-starts; endinpos = startinpos + 1; if (unicode_decode_call_errorhandler_writer( - errors, &errorHandler, + errors, &error_handler_obj, "ascii", "ordinal not in range(128)", &starts, &e, &startinpos, &endinpos, &exc, &s, &writer)) @@ -6700,13 +6773,13 @@ PyUnicode_DecodeASCII(const char *s, data = writer.data; } } - Py_XDECREF(errorHandler); + Py_XDECREF(error_handler_obj); Py_XDECREF(exc); return _PyUnicodeWriter_Finish(&writer); onError: _PyUnicodeWriter_Dealloc(&writer); - Py_XDECREF(errorHandler); + Py_XDECREF(error_handler_obj); Py_XDECREF(exc); return NULL; } @@ -8072,7 +8145,7 @@ static int charmap_encoding_error( PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping, PyObject **exceptionObject, - int *known_errorHandler, PyObject **errorHandler, const char *errors, + _Py_error_handler *error_handler, PyObject **error_handler_obj, const char *errors, PyObject **res, Py_ssize_t *respos) { PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ @@ -8119,23 +8192,15 @@ charmap_encoding_error( } /* cache callback name lookup * (if not done yet, i.e. it's the first error) */ - if (*known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - *known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - *known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - *known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - *known_errorHandler = 4; - else - *known_errorHandler = 0; - } - switch (*known_errorHandler) { - case 1: /* strict */ + if (*error_handler == _Py_ERROR_UNKNOWN) + *error_handler = get_error_handler(errors); + + switch (*error_handler) { + case _Py_ERROR_STRICT: raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason); return -1; - case 2: /* replace */ + + case _Py_ERROR_REPLACE: for (collpos = collstartpos; collpos<collendpos; ++collpos) { x = charmapencode_output('?', mapping, res, respos); if (x==enc_EXCEPTION) { @@ -8147,10 +8212,11 @@ charmap_encoding_error( } } /* fall through */ - case 3: /* ignore */ + case _Py_ERROR_IGNORE: *inpos = collendpos; break; - case 4: /* xmlcharrefreplace */ + + case _Py_ERROR_XMLCHARREFREPLACE: /* generate replacement (temporarily (mis)uses p) */ for (collpos = collstartpos; collpos < collendpos; ++collpos) { char buffer[2+29+1+1]; @@ -8168,8 +8234,9 @@ charmap_encoding_error( } *inpos = collendpos; break; + default: - repunicode = unicode_encode_call_errorhandler(errors, errorHandler, + repunicode = unicode_encode_call_errorhandler(errors, error_handler_obj, encoding, reason, unicode, exceptionObject, collstartpos, collendpos, &newpos); if (repunicode == NULL) @@ -8232,12 +8299,9 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, Py_ssize_t size; /* current output position */ Py_ssize_t respos = 0; - PyObject *errorHandler = NULL; + PyObject *error_handler_obj = NULL; PyObject *exc = NULL; - /* the following variable is used for caching string comparisons - * -1=not initialized, 0=unknown, 1=strict, 2=replace, - * 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; + _Py_error_handler error_handler = _Py_ERROR_UNKNOWN; void *data; int kind; @@ -8268,7 +8332,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, if (x==enc_FAILED) { /* unencodable character */ if (charmap_encoding_error(unicode, &inpos, mapping, &exc, - &known_errorHandler, &errorHandler, errors, + &error_handler, &error_handler_obj, errors, &res, &respos)) { goto onError; } @@ -8284,13 +8348,13 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, goto onError; Py_XDECREF(exc); - Py_XDECREF(errorHandler); + Py_XDECREF(error_handler_obj); return res; onError: Py_XDECREF(res); Py_XDECREF(exc); - Py_XDECREF(errorHandler); + Py_XDECREF(error_handler_obj); return NULL; } @@ -10887,6 +10951,12 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) } int +_PyUnicode_EQ(PyObject *aa, PyObject *bb) +{ + return unicode_eq(aa, bb); +} + +int PyUnicode_Contains(PyObject *container, PyObject *element) { PyObject *str, *sub; @@ -13256,7 +13326,9 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t newlen; PyObject *newbuffer; - assert(length > 0); + /* ensure that the _PyUnicodeWriter_Prepare macro was used */ + assert((maxchar > writer->maxchar && length >= 0) + || length > 0); if (length > PY_SSIZE_T_MAX - writer->pos) { PyErr_NoMemory(); @@ -13322,6 +13394,28 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, #undef OVERALLOCATE_FACTOR } +int +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + enum PyUnicode_Kind kind) +{ + Py_UCS4 maxchar; + + /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */ + assert(writer->kind < kind); + + switch (kind) + { + case PyUnicode_1BYTE_KIND: maxchar = 0xff; break; + case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break; + case PyUnicode_4BYTE_KIND: maxchar = 0x10ffff; break; + default: + assert(0 && "invalid kind"); + return -1; + } + + return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar); +} + Py_LOCAL_INLINE(int) _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch) { |