summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
Diffstat (limited to 'Objects')
-rw-r--r--Objects/abstract.c11
-rw-r--r--Objects/bytearrayobject.c995
-rw-r--r--Objects/bytes_methods.c430
-rw-r--r--Objects/bytesobject.c2245
-rw-r--r--Objects/clinic/bytesobject.c.h22
-rw-r--r--Objects/codeobject.c11
-rw-r--r--Objects/descrobject.c2
-rw-r--r--Objects/dictobject.c63
-rw-r--r--Objects/exceptions.c22
-rw-r--r--Objects/floatobject.c59
-rw-r--r--Objects/frameobject.c8
-rw-r--r--Objects/funcobject.c35
-rw-r--r--Objects/genobject.c14
-rw-r--r--Objects/listobject.c22
-rw-r--r--Objects/listsort.txt2
-rw-r--r--Objects/lnotab_notes.txt47
-rw-r--r--Objects/longobject.c212
-rw-r--r--Objects/memoryobject.c4
-rw-r--r--Objects/object.c66
-rw-r--r--Objects/obmalloc.c283
-rw-r--r--Objects/odictobject.c23
-rw-r--r--Objects/setobject.c615
-rw-r--r--Objects/stringlib/codecs.h207
-rw-r--r--Objects/stringlib/ctype.h5
-rw-r--r--Objects/stringlib/fastsearch.h150
-rw-r--r--Objects/stringlib/find.h82
-rw-r--r--Objects/stringlib/find_max_char.h5
-rw-r--r--Objects/stringlib/join.h2
-rw-r--r--Objects/stringlib/localeutil.h4
-rw-r--r--Objects/stringlib/transmogrify.h611
-rw-r--r--Objects/stringlib/unicode_format.h2
-rw-r--r--Objects/structseq.c30
-rw-r--r--Objects/tupleobject.c5
-rw-r--r--Objects/typeobject.c100
-rw-r--r--Objects/unicodeobject.c1450
-rw-r--r--Objects/weakrefobject.c2
36 files changed, 3867 insertions, 3979 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c
index a0362e7..3e1ff97 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -141,8 +141,11 @@ PyObject_GetItem(PyObject *o, PyObject *key)
return null_error();
m = o->ob_type->tp_as_mapping;
- if (m && m->mp_subscript)
- return m->mp_subscript(o, key);
+ if (m && m->mp_subscript) {
+ PyObject *item = m->mp_subscript(o, key);
+ assert((item != NULL) ^ (PyErr_Occurred() != NULL));
+ return item;
+ }
if (o->ob_type->tp_as_sequence) {
if (PyIndex_Check(key)) {
@@ -1544,8 +1547,10 @@ PySequence_GetItem(PyObject *s, Py_ssize_t i)
if (i < 0) {
if (m->sq_length) {
Py_ssize_t l = (*m->sq_length)(s);
- if (l < 0)
+ if (l < 0) {
+ assert(PyErr_Occurred());
return NULL;
+ }
i += l;
}
}
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 277be59..b7dfd6f 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -279,31 +279,6 @@ PyByteArray_Concat(PyObject *a, PyObject *b)
return (PyObject *)result;
}
-static PyObject *
-bytearray_format(PyByteArrayObject *self, PyObject *args)
-{
- PyObject *bytes_in, *bytes_out, *res;
- char *bytestring;
-
- if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
- PyErr_BadInternalCall();
- return NULL;
- }
- bytestring = PyByteArray_AS_STRING(self);
- bytes_in = PyBytes_FromString(bytestring);
- if (bytes_in == NULL)
- return NULL;
- bytes_out = _PyBytes_Format(bytes_in, args);
- Py_DECREF(bytes_in);
- if (bytes_out == NULL)
- return NULL;
- res = PyByteArray_FromObject(bytes_out);
- Py_DECREF(bytes_out);
- if (res == NULL)
- return NULL;
- return res;
-}
-
/* Functions stuffed into the type object */
static Py_ssize_t
@@ -1122,161 +1097,27 @@ bytearray_dealloc(PyByteArrayObject *self)
#include "stringlib/transmogrify.h"
-/* The following Py_LOCAL_INLINE and Py_LOCAL functions
-were copied from the old char* style string object. */
-
-/* helper macro to fixup start/end slice values */
-#define ADJUST_INDICES(start, end, len) \
- if (end > len) \
- end = len; \
- else if (end < 0) { \
- end += len; \
- if (end < 0) \
- end = 0; \
- } \
- if (start < 0) { \
- start += len; \
- if (start < 0) \
- start = 0; \
- }
-
-Py_LOCAL_INLINE(Py_ssize_t)
-bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
-{
- PyObject *subobj;
- char byte;
- Py_buffer subbuf;
- const char *sub;
- Py_ssize_t len, sub_len;
- Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
- Py_ssize_t res;
-
- if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
- args, &subobj, &byte, &start, &end))
- return -2;
-
- if (subobj) {
- if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
- return -2;
-
- sub = subbuf.buf;
- sub_len = subbuf.len;
- }
- else {
- sub = &byte;
- sub_len = 1;
- }
- len = PyByteArray_GET_SIZE(self);
-
- ADJUST_INDICES(start, end, len);
- if (end - start < sub_len)
- res = -1;
- else if (sub_len == 1
-#ifndef HAVE_MEMRCHR
- && dir > 0
-#endif
- ) {
- unsigned char needle = *sub;
- int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
- res = stringlib_fastsearch_memchr_1char(
- PyByteArray_AS_STRING(self) + start, end - start,
- needle, needle, mode);
- if (res >= 0)
- res += start;
- }
- else {
- if (dir > 0)
- res = stringlib_find_slice(
- PyByteArray_AS_STRING(self), len,
- sub, sub_len, start, end);
- else
- res = stringlib_rfind_slice(
- PyByteArray_AS_STRING(self), len,
- sub, sub_len, start, end);
- }
-
- if (subobj)
- PyBuffer_Release(&subbuf);
-
- return res;
-}
-
-PyDoc_STRVAR(find__doc__,
-"B.find(sub[, start[, end]]) -> int\n\
-\n\
-Return the lowest index in B where subsection sub is found,\n\
-such that sub is contained within B[start,end]. Optional\n\
-arguments start and end are interpreted as in slice notation.\n\
-\n\
-Return -1 on failure.");
-
static PyObject *
bytearray_find(PyByteArrayObject *self, PyObject *args)
{
- Py_ssize_t result = bytearray_find_internal(self, args, +1);
- if (result == -2)
- return NULL;
- return PyLong_FromSsize_t(result);
+ return _Py_bytes_find(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
}
-PyDoc_STRVAR(count__doc__,
-"B.count(sub[, start[, end]]) -> int\n\
-\n\
-Return the number of non-overlapping occurrences of subsection sub in\n\
-bytes B[start:end]. Optional arguments start and end are interpreted\n\
-as in slice notation.");
-
static PyObject *
bytearray_count(PyByteArrayObject *self, PyObject *args)
{
- PyObject *sub_obj;
- const char *str = PyByteArray_AS_STRING(self), *sub;
- Py_ssize_t sub_len;
- char byte;
- Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
-
- Py_buffer vsub;
- PyObject *count_obj;
-
- if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
- &start, &end))
- return NULL;
-
- if (sub_obj) {
- if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
- return NULL;
-
- sub = vsub.buf;
- sub_len = vsub.len;
- }
- else {
- sub = &byte;
- sub_len = 1;
- }
-
- ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self));
-
- count_obj = PyLong_FromSsize_t(
- stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
- );
-
- if (sub_obj)
- PyBuffer_Release(&vsub);
-
- return count_obj;
+ return _Py_bytes_count(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
}
/*[clinic input]
bytearray.clear
- self: self(type="PyByteArrayObject *")
-
Remove all items from the bytearray.
[clinic start generated code]*/
static PyObject *
bytearray_clear_impl(PyByteArrayObject *self)
-/*[clinic end generated code: output=85c2fe6aede0956c input=e524fd330abcdc18]*/
+/*[clinic end generated code: output=85c2fe6aede0956c input=ed6edae9de447ac4]*/
{
if (PyByteArray_Resize((PyObject *)self, 0) < 0)
return NULL;
@@ -1286,236 +1127,57 @@ bytearray_clear_impl(PyByteArrayObject *self)
/*[clinic input]
bytearray.copy
- self: self(type="PyByteArrayObject *")
-
Return a copy of B.
[clinic start generated code]*/
static PyObject *
bytearray_copy_impl(PyByteArrayObject *self)
-/*[clinic end generated code: output=68cfbcfed484c132 input=6d5d2975aa0f33f3]*/
+/*[clinic end generated code: output=68cfbcfed484c132 input=6597b0c01bccaa9e]*/
{
return PyByteArray_FromStringAndSize(PyByteArray_AS_STRING((PyObject *)self),
PyByteArray_GET_SIZE(self));
}
-PyDoc_STRVAR(index__doc__,
-"B.index(sub[, start[, end]]) -> int\n\
-\n\
-Like B.find() but raise ValueError when the subsection is not found.");
-
static PyObject *
bytearray_index(PyByteArrayObject *self, PyObject *args)
{
- Py_ssize_t result = bytearray_find_internal(self, args, +1);
- if (result == -2)
- return NULL;
- if (result == -1) {
- PyErr_SetString(PyExc_ValueError,
- "subsection not found");
- return NULL;
- }
- return PyLong_FromSsize_t(result);
+ return _Py_bytes_index(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
}
-
-PyDoc_STRVAR(rfind__doc__,
-"B.rfind(sub[, start[, end]]) -> int\n\
-\n\
-Return the highest index in B where subsection sub is found,\n\
-such that sub is contained within B[start,end]. Optional\n\
-arguments start and end are interpreted as in slice notation.\n\
-\n\
-Return -1 on failure.");
-
static PyObject *
bytearray_rfind(PyByteArrayObject *self, PyObject *args)
{
- Py_ssize_t result = bytearray_find_internal(self, args, -1);
- if (result == -2)
- return NULL;
- return PyLong_FromSsize_t(result);
+ return _Py_bytes_rfind(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
}
-
-PyDoc_STRVAR(rindex__doc__,
-"B.rindex(sub[, start[, end]]) -> int\n\
-\n\
-Like B.rfind() but raise ValueError when the subsection is not found.");
-
static PyObject *
bytearray_rindex(PyByteArrayObject *self, PyObject *args)
{
- Py_ssize_t result = bytearray_find_internal(self, args, -1);
- if (result == -2)
- return NULL;
- if (result == -1) {
- PyErr_SetString(PyExc_ValueError,
- "subsection not found");
- return NULL;
- }
- return PyLong_FromSsize_t(result);
+ return _Py_bytes_rindex(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
}
-
static int
bytearray_contains(PyObject *self, PyObject *arg)
{
- Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
- if (ival == -1 && PyErr_Occurred()) {
- Py_buffer varg;
- Py_ssize_t pos;
- PyErr_Clear();
- if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
- return -1;
- pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
- varg.buf, varg.len, 0);
- PyBuffer_Release(&varg);
- return pos >= 0;
- }
- if (ival < 0 || ival >= 256) {
- PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
- return -1;
- }
-
- return memchr(PyByteArray_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
+ return _Py_bytes_contains(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), arg);
}
-
-/* Matches the end (direction >= 0) or start (direction < 0) of self
- * against substr, using the start and end arguments. Returns
- * -1 on error, 0 if not found and 1 if found.
- */
-Py_LOCAL(int)
-_bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
- Py_ssize_t end, int direction)
-{
- Py_ssize_t len = PyByteArray_GET_SIZE(self);
- const char* str;
- Py_buffer vsubstr;
- int rv = 0;
-
- str = PyByteArray_AS_STRING(self);
-
- if (PyObject_GetBuffer(substr, &vsubstr, PyBUF_SIMPLE) != 0)
- return -1;
-
- ADJUST_INDICES(start, end, len);
-
- if (direction < 0) {
- /* startswith */
- if (start+vsubstr.len > len) {
- goto done;
- }
- } else {
- /* endswith */
- if (end-start < vsubstr.len || start > len) {
- goto done;
- }
-
- if (end-vsubstr.len > start)
- start = end - vsubstr.len;
- }
- if (end-start >= vsubstr.len)
- rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
-
-done:
- PyBuffer_Release(&vsubstr);
- return rv;
-}
-
-
-PyDoc_STRVAR(startswith__doc__,
-"B.startswith(prefix[, start[, end]]) -> bool\n\
-\n\
-Return True if B starts with the specified prefix, False otherwise.\n\
-With optional start, test B beginning at that position.\n\
-With optional end, stop comparing B at that position.\n\
-prefix can also be a tuple of bytes to try.");
-
static PyObject *
bytearray_startswith(PyByteArrayObject *self, PyObject *args)
{
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *subobj;
- int result;
-
- if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
- return NULL;
- if (PyTuple_Check(subobj)) {
- Py_ssize_t i;
- for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
- result = _bytearray_tailmatch(self,
- PyTuple_GET_ITEM(subobj, i),
- start, end, -1);
- if (result == -1)
- return NULL;
- else if (result) {
- Py_RETURN_TRUE;
- }
- }
- Py_RETURN_FALSE;
- }
- result = _bytearray_tailmatch(self, subobj, start, end, -1);
- if (result == -1) {
- if (PyErr_ExceptionMatches(PyExc_TypeError))
- PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
- "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
- return NULL;
- }
- else
- return PyBool_FromLong(result);
+ return _Py_bytes_startswith(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
}
-PyDoc_STRVAR(endswith__doc__,
-"B.endswith(suffix[, start[, end]]) -> bool\n\
-\n\
-Return True if B ends with the specified suffix, False otherwise.\n\
-With optional start, test B beginning at that position.\n\
-With optional end, stop comparing B at that position.\n\
-suffix can also be a tuple of bytes to try.");
-
static PyObject *
bytearray_endswith(PyByteArrayObject *self, PyObject *args)
{
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *subobj;
- int result;
-
- if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
- return NULL;
- if (PyTuple_Check(subobj)) {
- Py_ssize_t i;
- for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
- result = _bytearray_tailmatch(self,
- PyTuple_GET_ITEM(subobj, i),
- start, end, +1);
- if (result == -1)
- return NULL;
- else if (result) {
- Py_RETURN_TRUE;
- }
- }
- Py_RETURN_FALSE;
- }
- result = _bytearray_tailmatch(self, subobj, start, end, +1);
- if (result == -1) {
- if (PyErr_ExceptionMatches(PyExc_TypeError))
- PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
- "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
- return NULL;
- }
- else
- return PyBool_FromLong(result);
+ return _Py_bytes_endswith(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
}
/*[clinic input]
bytearray.translate
- self: self(type="PyByteArrayObject *")
table: object
Translation table, which must be a bytes object of length 256.
[
@@ -1532,7 +1194,7 @@ The remaining characters are mapped through the given translation table.
static PyObject *
bytearray_translate_impl(PyByteArrayObject *self, PyObject *table,
int group_right_1, PyObject *deletechars)
-/*[clinic end generated code: output=2bebc86a9a1ff083 input=b749ad85f4860824]*/
+/*[clinic end generated code: output=2bebc86a9a1ff083 input=846a01671bccc1c5]*/
{
char *input, *output;
const char *table_chars;
@@ -1575,7 +1237,7 @@ bytearray_translate_impl(PyByteArrayObject *self, PyObject *table,
result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
if (result == NULL)
goto done;
- output_start = output = PyByteArray_AsString(result);
+ output_start = output = PyByteArray_AS_STRING(result);
input = PyByteArray_AS_STRING(input_obj);
if (vdel.len == 0 && table_chars != NULL) {
@@ -1645,493 +1307,6 @@ bytearray_maketrans_impl(Py_buffer *frm, Py_buffer *to)
}
-/* find and count characters and substrings */
-
-#define findchar(target, target_len, c) \
- ((char *)memchr((const void *)(target), c, target_len))
-
-
-/* Bytes ops must return a string, create a copy */
-Py_LOCAL(PyByteArrayObject *)
-return_self(PyByteArrayObject *self)
-{
- /* always return a new bytearray */
- return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
- PyByteArray_AS_STRING(self),
- PyByteArray_GET_SIZE(self));
-}
-
-Py_LOCAL_INLINE(Py_ssize_t)
-countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
-{
- Py_ssize_t count=0;
- const char *start=target;
- const char *end=target+target_len;
-
- while ( (start=findchar(start, end-start, c)) != NULL ) {
- count++;
- if (count >= maxcount)
- break;
- start += 1;
- }
- return count;
-}
-
-
-/* Algorithms for different cases of string replacement */
-
-/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
-Py_LOCAL(PyByteArrayObject *)
-replace_interleave(PyByteArrayObject *self,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- char *self_s, *result_s;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, i;
- PyByteArrayObject *result;
-
- self_len = PyByteArray_GET_SIZE(self);
-
- /* 1 at the end plus 1 after every character;
- count = min(maxcount, self_len + 1) */
- if (maxcount <= self_len)
- count = maxcount;
- else
- /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
- count = self_len + 1;
-
- /* Check for overflow */
- /* result_len = count * to_len + self_len; */
- assert(count > 0);
- if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError,
- "replace string is too long");
- return NULL;
- }
- result_len = count * to_len + self_len;
-
- if (! (result = (PyByteArrayObject *)
- PyByteArray_FromStringAndSize(NULL, result_len)) )
- return NULL;
-
- self_s = PyByteArray_AS_STRING(self);
- result_s = PyByteArray_AS_STRING(result);
-
- /* TODO: special case single character, which doesn't need memcpy */
-
- /* Lay the first one down (guaranteed this will occur) */
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- count -= 1;
-
- for (i=0; i<count; i++) {
- *result_s++ = *self_s++;
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- }
-
- /* Copy the rest of the original string */
- Py_MEMCPY(result_s, self_s, self_len-i);
-
- return result;
-}
-
-/* Special case for deleting a single character */
-/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
-Py_LOCAL(PyByteArrayObject *)
-replace_delete_single_character(PyByteArrayObject *self,
- char from_c, Py_ssize_t maxcount)
-{
- char *self_s, *result_s;
- char *start, *next, *end;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count;
- PyByteArrayObject *result;
-
- self_len = PyByteArray_GET_SIZE(self);
- self_s = PyByteArray_AS_STRING(self);
-
- count = countchar(self_s, self_len, from_c, maxcount);
- if (count == 0) {
- return return_self(self);
- }
-
- result_len = self_len - count; /* from_len == 1 */
- assert(result_len>=0);
-
- if ( (result = (PyByteArrayObject *)
- PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
- return NULL;
- result_s = PyByteArray_AS_STRING(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- next = findchar(start, end-start, from_c);
- if (next == NULL)
- break;
- Py_MEMCPY(result_s, start, next-start);
- result_s += (next-start);
- start = next+1;
- }
- Py_MEMCPY(result_s, start, end-start);
-
- return result;
-}
-
-/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
-
-Py_LOCAL(PyByteArrayObject *)
-replace_delete_substring(PyByteArrayObject *self,
- const char *from_s, Py_ssize_t from_len,
- Py_ssize_t maxcount)
-{
- char *self_s, *result_s;
- char *start, *next, *end;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, offset;
- PyByteArrayObject *result;
-
- self_len = PyByteArray_GET_SIZE(self);
- self_s = PyByteArray_AS_STRING(self);
-
- count = stringlib_count(self_s, self_len,
- from_s, from_len,
- maxcount);
-
- if (count == 0) {
- /* no matches */
- return return_self(self);
- }
-
- result_len = self_len - (count * from_len);
- assert (result_len>=0);
-
- if ( (result = (PyByteArrayObject *)
- PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
- return NULL;
-
- result_s = PyByteArray_AS_STRING(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- offset = stringlib_find(start, end-start,
- from_s, from_len,
- 0);
- if (offset == -1)
- break;
- next = start + offset;
-
- Py_MEMCPY(result_s, start, next-start);
-
- result_s += (next-start);
- start = next+from_len;
- }
- Py_MEMCPY(result_s, start, end-start);
- return result;
-}
-
-/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
-Py_LOCAL(PyByteArrayObject *)
-replace_single_character_in_place(PyByteArrayObject *self,
- char from_c, char to_c,
- Py_ssize_t maxcount)
-{
- char *self_s, *result_s, *start, *end, *next;
- Py_ssize_t self_len;
- PyByteArrayObject *result;
-
- /* The result string will be the same size */
- self_s = PyByteArray_AS_STRING(self);
- self_len = PyByteArray_GET_SIZE(self);
-
- next = findchar(self_s, self_len, from_c);
-
- if (next == NULL) {
- /* No matches; return the original bytes */
- return return_self(self);
- }
-
- /* Need to make a new bytes */
- result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
- if (result == NULL)
- return NULL;
- result_s = PyByteArray_AS_STRING(result);
- Py_MEMCPY(result_s, self_s, self_len);
-
- /* change everything in-place, starting with this one */
- start = result_s + (next-self_s);
- *start = to_c;
- start++;
- end = result_s + self_len;
-
- while (--maxcount > 0) {
- next = findchar(start, end-start, from_c);
- if (next == NULL)
- break;
- *next = to_c;
- start = next+1;
- }
-
- return result;
-}
-
-/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
-Py_LOCAL(PyByteArrayObject *)
-replace_substring_in_place(PyByteArrayObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- char *result_s, *start, *end;
- char *self_s;
- Py_ssize_t self_len, offset;
- PyByteArrayObject *result;
-
- /* The result bytes will be the same size */
-
- self_s = PyByteArray_AS_STRING(self);
- self_len = PyByteArray_GET_SIZE(self);
-
- offset = stringlib_find(self_s, self_len,
- from_s, from_len,
- 0);
- if (offset == -1) {
- /* No matches; return the original bytes */
- return return_self(self);
- }
-
- /* Need to make a new bytes */
- result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
- if (result == NULL)
- return NULL;
- result_s = PyByteArray_AS_STRING(result);
- Py_MEMCPY(result_s, self_s, self_len);
-
- /* change everything in-place, starting with this one */
- start = result_s + offset;
- Py_MEMCPY(start, to_s, from_len);
- start += from_len;
- end = result_s + self_len;
-
- while ( --maxcount > 0) {
- offset = stringlib_find(start, end-start,
- from_s, from_len,
- 0);
- if (offset==-1)
- break;
- Py_MEMCPY(start+offset, to_s, from_len);
- start += offset+from_len;
- }
-
- return result;
-}
-
-/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
-Py_LOCAL(PyByteArrayObject *)
-replace_single_character(PyByteArrayObject *self,
- char from_c,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- char *self_s, *result_s;
- char *start, *next, *end;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count;
- PyByteArrayObject *result;
-
- self_s = PyByteArray_AS_STRING(self);
- self_len = PyByteArray_GET_SIZE(self);
-
- count = countchar(self_s, self_len, from_c, maxcount);
- if (count == 0) {
- /* no matches, return unchanged */
- return return_self(self);
- }
-
- /* use the difference between current and new, hence the "-1" */
- /* result_len = self_len + count * (to_len-1) */
- assert(count > 0);
- if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
- return NULL;
- }
- result_len = self_len + count * (to_len - 1);
-
- if ( (result = (PyByteArrayObject *)
- PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
- return NULL;
- result_s = PyByteArray_AS_STRING(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- next = findchar(start, end-start, from_c);
- if (next == NULL)
- break;
-
- if (next == start) {
- /* replace with the 'to' */
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- start += 1;
- } else {
- /* copy the unchanged old then the 'to' */
- Py_MEMCPY(result_s, start, next-start);
- result_s += (next-start);
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- start = next+1;
- }
- }
- /* Copy the remainder of the remaining bytes */
- Py_MEMCPY(result_s, start, end-start);
-
- return result;
-}
-
-/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
-Py_LOCAL(PyByteArrayObject *)
-replace_substring(PyByteArrayObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- char *self_s, *result_s;
- char *start, *next, *end;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, offset;
- PyByteArrayObject *result;
-
- self_s = PyByteArray_AS_STRING(self);
- self_len = PyByteArray_GET_SIZE(self);
-
- count = stringlib_count(self_s, self_len,
- from_s, from_len,
- maxcount);
-
- if (count == 0) {
- /* no matches, return unchanged */
- return return_self(self);
- }
-
- /* Check for overflow */
- /* result_len = self_len + count * (to_len-from_len) */
- assert(count > 0);
- if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
- return NULL;
- }
- result_len = self_len + count * (to_len - from_len);
-
- if ( (result = (PyByteArrayObject *)
- PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
- return NULL;
- result_s = PyByteArray_AS_STRING(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- offset = stringlib_find(start, end-start,
- from_s, from_len,
- 0);
- if (offset == -1)
- break;
- next = start+offset;
- if (next == start) {
- /* replace with the 'to' */
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- start += from_len;
- } else {
- /* copy the unchanged old then the 'to' */
- Py_MEMCPY(result_s, start, next-start);
- result_s += (next-start);
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- start = next+from_len;
- }
- }
- /* Copy the remainder of the remaining bytes */
- Py_MEMCPY(result_s, start, end-start);
-
- return result;
-}
-
-
-Py_LOCAL(PyByteArrayObject *)
-replace(PyByteArrayObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- if (maxcount < 0) {
- maxcount = PY_SSIZE_T_MAX;
- } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
- /* nothing to do; return the original bytes */
- return return_self(self);
- }
-
- if (maxcount == 0 ||
- (from_len == 0 && to_len == 0)) {
- /* nothing to do; return the original bytes */
- return return_self(self);
- }
-
- /* Handle zero-length special cases */
-
- if (from_len == 0) {
- /* insert the 'to' bytes everywhere. */
- /* >>> "Python".replace("", ".") */
- /* '.P.y.t.h.o.n.' */
- return replace_interleave(self, to_s, to_len, maxcount);
- }
-
- /* Except for "".replace("", "A") == "A" there is no way beyond this */
- /* point for an empty self bytes to generate a non-empty bytes */
- /* Special case so the remaining code always gets a non-empty bytes */
- if (PyByteArray_GET_SIZE(self) == 0) {
- return return_self(self);
- }
-
- if (to_len == 0) {
- /* delete all occurrences of 'from' bytes */
- if (from_len == 1) {
- return replace_delete_single_character(
- self, from_s[0], maxcount);
- } else {
- return replace_delete_substring(self, from_s, from_len, maxcount);
- }
- }
-
- /* Handle special case where both bytes have the same length */
-
- if (from_len == to_len) {
- if (from_len == 1) {
- return replace_single_character_in_place(
- self,
- from_s[0],
- to_s[0],
- maxcount);
- } else {
- return replace_substring_in_place(
- self, from_s, from_len, to_s, to_len, maxcount);
- }
- }
-
- /* Otherwise use the more generic algorithms */
- if (from_len == 1) {
- return replace_single_character(self, from_s[0],
- to_s, to_len, maxcount);
- } else {
- /* len('from')>=2, len('to')>=1 */
- return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
- }
-}
-
-
/*[clinic input]
bytearray.replace
@@ -2153,9 +1328,9 @@ bytearray_replace_impl(PyByteArrayObject *self, Py_buffer *old,
Py_buffer *new, Py_ssize_t count)
/*[clinic end generated code: output=d39884c4dc59412a input=aa379d988637c7fb]*/
{
- return (PyObject *)replace((PyByteArrayObject *) self,
- old->buf, old->len,
- new->buf, new->len, count);
+ return stringlib_replace((PyObject *)self,
+ (const char *)old->buf, old->len,
+ (const char *)new->buf, new->len, count);
}
/*[clinic input]
@@ -2203,7 +1378,6 @@ bytearray_split_impl(PyByteArrayObject *self, PyObject *sep,
/*[clinic input]
bytearray.partition
- self: self(type="PyByteArrayObject *")
sep: object
/
@@ -2219,7 +1393,7 @@ bytearray object and two empty bytearray objects.
static PyObject *
bytearray_partition(PyByteArrayObject *self, PyObject *sep)
-/*[clinic end generated code: output=45d2525ddd35f957 input=7d7fe37b1696d506]*/
+/*[clinic end generated code: output=45d2525ddd35f957 input=86f89223892b70b5]*/
{
PyObject *bytesep, *result;
@@ -2241,7 +1415,6 @@ bytearray_partition(PyByteArrayObject *self, PyObject *sep)
/*[clinic input]
bytearray.rpartition
- self: self(type="PyByteArrayObject *")
sep: object
/
@@ -2257,7 +1430,7 @@ objects and the original bytearray object.
static PyObject *
bytearray_rpartition(PyByteArrayObject *self, PyObject *sep)
-/*[clinic end generated code: output=440de3c9426115e8 input=9b8cd540c1b75853]*/
+/*[clinic end generated code: output=440de3c9426115e8 input=5f4094f2de87c8f3]*/
{
PyObject *bytesep, *result;
@@ -2315,14 +1488,12 @@ bytearray_rsplit_impl(PyByteArrayObject *self, PyObject *sep,
/*[clinic input]
bytearray.reverse
- self: self(type="PyByteArrayObject *")
-
Reverse the order of the values in B in place.
[clinic start generated code]*/
static PyObject *
bytearray_reverse_impl(PyByteArrayObject *self)
-/*[clinic end generated code: output=9f7616f29ab309d3 input=7933a499b8597bd1]*/
+/*[clinic end generated code: output=9f7616f29ab309d3 input=543356319fc78557]*/
{
char swap, *head, *tail;
Py_ssize_t i, j, n = Py_SIZE(self);
@@ -2351,7 +1522,6 @@ class bytesvalue_converter(CConverter):
/*[clinic input]
bytearray.insert
- self: self(type="PyByteArrayObject *")
index: Py_ssize_t
The index where the value is to be inserted.
item: bytesvalue
@@ -2363,7 +1533,7 @@ Insert a single item into the bytearray before the given index.
static PyObject *
bytearray_insert_impl(PyByteArrayObject *self, Py_ssize_t index, int item)
-/*[clinic end generated code: output=76c775a70e7b07b7 input=833766836ba30e1e]*/
+/*[clinic end generated code: output=76c775a70e7b07b7 input=b2b5d07e9de6c070]*/
{
Py_ssize_t n = Py_SIZE(self);
char *buf;
@@ -2393,7 +1563,6 @@ bytearray_insert_impl(PyByteArrayObject *self, Py_ssize_t index, int item)
/*[clinic input]
bytearray.append
- self: self(type="PyByteArrayObject *")
item: bytesvalue
The item to be appended.
/
@@ -2403,7 +1572,7 @@ Append a single item to the end of the bytearray.
static PyObject *
bytearray_append_impl(PyByteArrayObject *self, int item)
-/*[clinic end generated code: output=a154e19ed1886cb6 input=ae56ea87380407cc]*/
+/*[clinic end generated code: output=a154e19ed1886cb6 input=20d6bec3d1340593]*/
{
Py_ssize_t n = Py_SIZE(self);
@@ -2423,7 +1592,6 @@ bytearray_append_impl(PyByteArrayObject *self, int item)
/*[clinic input]
bytearray.extend
- self: self(type="PyByteArrayObject *")
iterable_of_ints: object
The iterable of items to append.
/
@@ -2433,7 +1601,7 @@ Append all the items from the iterator or sequence to the end of the bytearray.
static PyObject *
bytearray_extend(PyByteArrayObject *self, PyObject *iterable_of_ints)
-/*[clinic end generated code: output=98155dbe249170b1 input=ce83a5d75b70d850]*/
+/*[clinic end generated code: output=98155dbe249170b1 input=c617b3a93249ba28]*/
{
PyObject *it, *item, *bytearray_obj;
Py_ssize_t buf_size = 0, len = 0;
@@ -2508,7 +1676,6 @@ bytearray_extend(PyByteArrayObject *self, PyObject *iterable_of_ints)
/*[clinic input]
bytearray.pop
- self: self(type="PyByteArrayObject *")
index: Py_ssize_t = -1
The index from where to remove the item.
-1 (the default value) means remove the last item.
@@ -2521,7 +1688,7 @@ If no index argument is given, will pop the last item.
static PyObject *
bytearray_pop_impl(PyByteArrayObject *self, Py_ssize_t index)
-/*[clinic end generated code: output=e0ccd401f8021da8 input=0797e6c0ca9d5a85]*/
+/*[clinic end generated code: output=e0ccd401f8021da8 input=3591df2d06c0d237]*/
{
int value;
Py_ssize_t n = Py_SIZE(self);
@@ -2553,7 +1720,6 @@ bytearray_pop_impl(PyByteArrayObject *self, Py_ssize_t index)
/*[clinic input]
bytearray.remove
- self: self(type="PyByteArrayObject *")
value: bytesvalue
The value to remove.
/
@@ -2563,20 +1729,20 @@ Remove the first occurrence of a value in the bytearray.
static PyObject *
bytearray_remove_impl(PyByteArrayObject *self, int value)
-/*[clinic end generated code: output=d659e37866709c13 input=47560b11fd856c24]*/
+/*[clinic end generated code: output=d659e37866709c13 input=121831240cd51ddf]*/
{
- Py_ssize_t n = Py_SIZE(self);
+ Py_ssize_t where, n = Py_SIZE(self);
char *buf = PyByteArray_AS_STRING(self);
- char *where = memchr(buf, value, n);
- if (!where) {
+ where = stringlib_find_char(buf, n, value);
+ if (where < 0) {
PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
return NULL;
}
if (!_canresize(self))
return NULL;
- memmove(where, where + 1, buf + n - where);
+ memmove(buf + where, buf + where + 1, n - where);
if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
return NULL;
@@ -2586,8 +1752,8 @@ bytearray_remove_impl(PyByteArrayObject *self, int value)
/* XXX These two helpers could be optimized if argsize == 1 */
static Py_ssize_t
-lstrip_helper(char *myptr, Py_ssize_t mysize,
- void *argptr, Py_ssize_t argsize)
+lstrip_helper(const char *myptr, Py_ssize_t mysize,
+ const void *argptr, Py_ssize_t argsize)
{
Py_ssize_t i = 0;
while (i < mysize && memchr(argptr, (unsigned char) myptr[i], argsize))
@@ -2596,8 +1762,8 @@ lstrip_helper(char *myptr, Py_ssize_t mysize,
}
static Py_ssize_t
-rstrip_helper(char *myptr, Py_ssize_t mysize,
- void *argptr, Py_ssize_t argsize)
+rstrip_helper(const char *myptr, Py_ssize_t mysize,
+ const void *argptr, Py_ssize_t argsize)
{
Py_ssize_t i = mysize - 1;
while (i >= 0 && memchr(argptr, (unsigned char) myptr[i], argsize))
@@ -2798,22 +1964,6 @@ bytearray_splitlines_impl(PyByteArrayObject *self, int keepends)
);
}
-static int
-hex_digit_to_int(Py_UCS4 c)
-{
- if (c >= 128)
- return -1;
- if (Py_ISDIGIT(c))
- return c - '0';
- else {
- if (Py_ISUPPER(c))
- c = Py_TOLOWER(c);
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- }
- return -1;
-}
-
/*[clinic input]
@classmethod
bytearray.fromhex
@@ -2832,48 +1982,7 @@ static PyObject *
bytearray_fromhex_impl(PyObject*cls, PyObject *string)
/*[clinic end generated code: output=df3da60129b3700c input=907bbd2d34d9367a]*/
{
- PyObject *newbytes;
- char *buf;
- Py_ssize_t hexlen, byteslen, i, j;
- int top, bot;
- void *data;
- unsigned int kind;
-
- assert(PyUnicode_Check(string));
- if (PyUnicode_READY(string))
- return NULL;
- kind = PyUnicode_KIND(string);
- data = PyUnicode_DATA(string);
- hexlen = PyUnicode_GET_LENGTH(string);
-
- byteslen = hexlen/2; /* This overestimates if there are spaces */
- newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
- if (!newbytes)
- return NULL;
- buf = PyByteArray_AS_STRING(newbytes);
- for (i = j = 0; i < hexlen; i += 2) {
- /* skip over spaces in the input */
- while (PyUnicode_READ(kind, data, i) == ' ')
- i++;
- if (i >= hexlen)
- break;
- top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
- bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
- if (top == -1 || bot == -1) {
- PyErr_Format(PyExc_ValueError,
- "non-hexadecimal number found in "
- "fromhex() arg at position %zd", i);
- goto error;
- }
- buf[j++] = (top << 4) + bot;
- }
- if (PyByteArray_Resize(newbytes, j) < 0)
- goto error;
- return newbytes;
-
- error:
- Py_DECREF(newbytes);
- return NULL;
+ return _PyBytes_FromHex(string, 1);
}
PyDoc_STRVAR(hex__doc__,
@@ -2928,14 +2037,12 @@ _common_reduce(PyByteArrayObject *self, int proto)
/*[clinic input]
bytearray.__reduce__ as bytearray_reduce
- self: self(type="PyByteArrayObject *")
-
Return state information for pickling.
[clinic start generated code]*/
static PyObject *
bytearray_reduce_impl(PyByteArrayObject *self)
-/*[clinic end generated code: output=52bf304086464cab input=fbb07de4d102a03a]*/
+/*[clinic end generated code: output=52bf304086464cab input=44b5737ada62dd3f]*/
{
return _common_reduce(self, 2);
}
@@ -2943,7 +2050,6 @@ bytearray_reduce_impl(PyByteArrayObject *self)
/*[clinic input]
bytearray.__reduce_ex__ as bytearray_reduce_ex
- self: self(type="PyByteArrayObject *")
proto: int = 0
/
@@ -2952,7 +2058,7 @@ Return state information for pickling.
static PyObject *
bytearray_reduce_ex_impl(PyByteArrayObject *self, int proto)
-/*[clinic end generated code: output=52eac33377197520 input=0e091a42ca6dbd91]*/
+/*[clinic end generated code: output=52eac33377197520 input=f129bc1a1aa151ee]*/
{
return _common_reduce(self, proto);
}
@@ -2960,14 +2066,12 @@ bytearray_reduce_ex_impl(PyByteArrayObject *self, int proto)
/*[clinic input]
bytearray.__sizeof__ as bytearray_sizeof
- self: self(type="PyByteArrayObject *")
-
Returns the size of the bytearray object in memory, in bytes.
[clinic start generated code]*/
static PyObject *
bytearray_sizeof_impl(PyByteArrayObject *self)
-/*[clinic end generated code: output=738abdd17951c427 input=6b23d305362b462b]*/
+/*[clinic end generated code: output=738abdd17951c427 input=e27320fd98a4bc5a]*/
{
Py_ssize_t res;
@@ -3008,19 +2112,22 @@ bytearray_methods[] = {
BYTEARRAY_APPEND_METHODDEF
{"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
_Py_capitalize__doc__},
- {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
+ {"center", (PyCFunction)stringlib_center, METH_VARARGS, _Py_center__doc__},
BYTEARRAY_CLEAR_METHODDEF
BYTEARRAY_COPY_METHODDEF
- {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
+ {"count", (PyCFunction)bytearray_count, METH_VARARGS,
+ _Py_count__doc__},
BYTEARRAY_DECODE_METHODDEF
- {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
+ {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS,
+ _Py_endswith__doc__},
{"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
- expandtabs__doc__},
+ _Py_expandtabs__doc__},
BYTEARRAY_EXTEND_METHODDEF
- {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__},
+ {"find", (PyCFunction)bytearray_find, METH_VARARGS,
+ _Py_find__doc__},
BYTEARRAY_FROMHEX_METHODDEF
{"hex", (PyCFunction)bytearray_hex, METH_NOARGS, hex__doc__},
- {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__},
+ {"index", (PyCFunction)bytearray_index, METH_VARARGS, _Py_index__doc__},
BYTEARRAY_INSERT_METHODDEF
{"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
_Py_isalnum__doc__},
@@ -3037,7 +2144,7 @@ bytearray_methods[] = {
{"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
_Py_isupper__doc__},
BYTEARRAY_JOIN_METHODDEF
- {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
+ {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
{"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
BYTEARRAY_LSTRIP_METHODDEF
BYTEARRAY_MAKETRANS_METHODDEF
@@ -3046,23 +2153,23 @@ bytearray_methods[] = {
BYTEARRAY_REMOVE_METHODDEF
BYTEARRAY_REPLACE_METHODDEF
BYTEARRAY_REVERSE_METHODDEF
- {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, rfind__doc__},
- {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__},
- {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
+ {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, _Py_rfind__doc__},
+ {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, _Py_rindex__doc__},
+ {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
BYTEARRAY_RPARTITION_METHODDEF
BYTEARRAY_RSPLIT_METHODDEF
BYTEARRAY_RSTRIP_METHODDEF
BYTEARRAY_SPLIT_METHODDEF
BYTEARRAY_SPLITLINES_METHODDEF
{"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
- startswith__doc__},
+ _Py_startswith__doc__},
BYTEARRAY_STRIP_METHODDEF
{"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
{"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
BYTEARRAY_TRANSLATE_METHODDEF
{"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
- {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
+ {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
{NULL}
};
@@ -3071,7 +2178,7 @@ bytearray_mod(PyObject *v, PyObject *w)
{
if (!PyByteArray_Check(v))
Py_RETURN_NOTIMPLEMENTED;
- return bytearray_format((PyByteArrayObject *)v, w);
+ return _PyBytes_FormatEx(PyByteArray_AS_STRING(v), PyByteArray_GET_SIZE(v), w, 1);
}
static PyNumberMethods bytearray_as_number = {
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c
index a299915..fe666c6 100644
--- a/Objects/bytes_methods.c
+++ b/Objects/bytes_methods.c
@@ -277,7 +277,7 @@ Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
characters, all remaining cased characters have lowercase.");
void
-_Py_bytes_title(char *result, char *s, Py_ssize_t len)
+_Py_bytes_title(char *result, const char *s, Py_ssize_t len)
{
Py_ssize_t i;
int previous_is_cased = 0;
@@ -306,7 +306,7 @@ Return a copy of B with only its first character capitalized (ASCII)\n\
and the rest lower-cased.");
void
-_Py_bytes_capitalize(char *result, char *s, Py_ssize_t len)
+_Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
{
Py_ssize_t i;
@@ -336,7 +336,7 @@ Return a copy of B with uppercase ASCII characters converted\n\
to lowercase ASCII and vice versa.");
void
-_Py_bytes_swapcase(char *result, char *s, Py_ssize_t len)
+_Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
{
Py_ssize_t i;
@@ -387,3 +387,427 @@ _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
return res;
}
+
+#define FASTSEARCH fastsearch
+#define STRINGLIB(F) stringlib_##F
+#define STRINGLIB_CHAR char
+#define STRINGLIB_SIZEOF_CHAR 1
+
+#include "stringlib/fastsearch.h"
+#include "stringlib/count.h"
+#include "stringlib/find.h"
+
+/*
+Wraps stringlib_parse_args_finds() and additionally checks whether the
+first argument is an integer in range(0, 256).
+
+If this is the case, writes the integer value to the byte parameter
+and sets subobj to NULL. Otherwise, sets the first argument to subobj
+and doesn't touch byte. The other parameters are similar to those of
+stringlib_parse_args_finds().
+*/
+
+Py_LOCAL_INLINE(int)
+parse_args_finds_byte(const char *function_name, PyObject *args,
+ PyObject **subobj, char *byte,
+ Py_ssize_t *start, Py_ssize_t *end)
+{
+ PyObject *tmp_subobj;
+ Py_ssize_t ival;
+ PyObject *err;
+
+ if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
+ start, end))
+ return 0;
+
+ if (!PyNumber_Check(tmp_subobj)) {
+ *subobj = tmp_subobj;
+ return 1;
+ }
+
+ ival = PyNumber_AsSsize_t(tmp_subobj, PyExc_OverflowError);
+ if (ival == -1) {
+ err = PyErr_Occurred();
+ if (err && !PyErr_GivenExceptionMatches(err, PyExc_OverflowError)) {
+ PyErr_Clear();
+ *subobj = tmp_subobj;
+ return 1;
+ }
+ }
+
+ if (ival < 0 || ival > 255) {
+ PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+ return 0;
+ }
+
+ *subobj = NULL;
+ *byte = (char)ival;
+ return 1;
+}
+
+/* helper macro to fixup start/end slice values */
+#define ADJUST_INDICES(start, end, len) \
+ if (end > len) \
+ end = len; \
+ else if (end < 0) { \
+ end += len; \
+ if (end < 0) \
+ end = 0; \
+ } \
+ if (start < 0) { \
+ start += len; \
+ if (start < 0) \
+ start = 0; \
+ }
+
+Py_LOCAL_INLINE(Py_ssize_t)
+find_internal(const char *str, Py_ssize_t len,
+ const char *function_name, PyObject *args, int dir)
+{
+ PyObject *subobj;
+ char byte;
+ Py_buffer subbuf;
+ const char *sub;
+ Py_ssize_t sub_len;
+ Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
+ Py_ssize_t res;
+
+ if (!parse_args_finds_byte(function_name, args,
+ &subobj, &byte, &start, &end))
+ return -2;
+
+ if (subobj) {
+ if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
+ return -2;
+
+ sub = subbuf.buf;
+ sub_len = subbuf.len;
+ }
+ else {
+ sub = &byte;
+ sub_len = 1;
+ }
+
+ ADJUST_INDICES(start, end, len);
+ if (end - start < sub_len)
+ res = -1;
+ else if (sub_len == 1) {
+ if (dir > 0)
+ res = stringlib_find_char(
+ str + start, end - start,
+ *sub);
+ else
+ res = stringlib_rfind_char(
+ str + start, end - start,
+ *sub);
+ if (res >= 0)
+ res += start;
+ }
+ else {
+ if (dir > 0)
+ res = stringlib_find_slice(
+ str, len,
+ sub, sub_len, start, end);
+ else
+ res = stringlib_rfind_slice(
+ str, len,
+ sub, sub_len, start, end);
+ }
+
+ if (subobj)
+ PyBuffer_Release(&subbuf);
+
+ return res;
+}
+
+PyDoc_STRVAR_shared(_Py_find__doc__,
+"B.find(sub[, start[, end]]) -> int\n\
+\n\
+Return the lowest index in B where subsection sub is found,\n\
+such that sub is contained within B[start,end]. Optional\n\
+arguments start and end are interpreted as in slice notation.\n\
+\n\
+Return -1 on failure.");
+
+PyObject *
+_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
+{
+ Py_ssize_t result = find_internal(str, len, "find", args, +1);
+ if (result == -2)
+ return NULL;
+ return PyLong_FromSsize_t(result);
+}
+
+PyDoc_STRVAR_shared(_Py_index__doc__,
+"B.index(sub[, start[, end]]) -> int\n\
+\n\
+Like B.find() but raise ValueError when the subsection is not found.");
+
+PyObject *
+_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
+{
+ Py_ssize_t result = find_internal(str, len, "index", args, +1);
+ if (result == -2)
+ return NULL;
+ if (result == -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "subsection not found");
+ return NULL;
+ }
+ return PyLong_FromSsize_t(result);
+}
+
+PyDoc_STRVAR_shared(_Py_rfind__doc__,
+"B.rfind(sub[, start[, end]]) -> int\n\
+\n\
+Return the highest index in B where subsection sub is found,\n\
+such that sub is contained within B[start,end]. Optional\n\
+arguments start and end are interpreted as in slice notation.\n\
+\n\
+Return -1 on failure.");
+
+PyObject *
+_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
+{
+ Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
+ if (result == -2)
+ return NULL;
+ return PyLong_FromSsize_t(result);
+}
+
+PyDoc_STRVAR_shared(_Py_rindex__doc__,
+"B.rindex(sub[, start[, end]]) -> int\n\
+\n\
+Like B.rfind() but raise ValueError when the subsection is not found.");
+
+PyObject *
+_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
+{
+ Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
+ if (result == -2)
+ return NULL;
+ if (result == -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "subsection not found");
+ return NULL;
+ }
+ return PyLong_FromSsize_t(result);
+}
+
+PyDoc_STRVAR_shared(_Py_count__doc__,
+"B.count(sub[, start[, end]]) -> int\n\
+\n\
+Return the number of non-overlapping occurrences of subsection sub in\n\
+bytes B[start:end]. Optional arguments start and end are interpreted\n\
+as in slice notation.");
+
+PyObject *
+_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
+{
+ PyObject *sub_obj;
+ const char *sub;
+ Py_ssize_t sub_len;
+ char byte;
+ Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
+
+ Py_buffer vsub;
+ PyObject *count_obj;
+
+ if (!parse_args_finds_byte("count", args,
+ &sub_obj, &byte, &start, &end))
+ return NULL;
+
+ if (sub_obj) {
+ if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
+ return NULL;
+
+ sub = vsub.buf;
+ sub_len = vsub.len;
+ }
+ else {
+ sub = &byte;
+ sub_len = 1;
+ }
+
+ ADJUST_INDICES(start, end, len);
+
+ count_obj = PyLong_FromSsize_t(
+ stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
+ );
+
+ if (sub_obj)
+ PyBuffer_Release(&vsub);
+
+ return count_obj;
+}
+
+int
+_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
+{
+ Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
+ if (ival == -1 && PyErr_Occurred()) {
+ Py_buffer varg;
+ Py_ssize_t pos;
+ PyErr_Clear();
+ if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
+ return -1;
+ pos = stringlib_find(str, len,
+ varg.buf, varg.len, 0);
+ PyBuffer_Release(&varg);
+ return pos >= 0;
+ }
+ if (ival < 0 || ival >= 256) {
+ PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+ return -1;
+ }
+
+ return memchr(str, (int) ival, len) != NULL;
+}
+
+
+/* Matches the end (direction >= 0) or start (direction < 0) of the buffer
+ * against substr, using the start and end arguments. Returns
+ * -1 on error, 0 if not found and 1 if found.
+ */
+Py_LOCAL(int)
+tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
+ Py_ssize_t start, Py_ssize_t end, int direction)
+{
+ Py_buffer sub_view = {NULL, NULL};
+ const char *sub;
+ Py_ssize_t slen;
+
+ if (PyBytes_Check(substr)) {
+ sub = PyBytes_AS_STRING(substr);
+ slen = PyBytes_GET_SIZE(substr);
+ }
+ else {
+ if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
+ return -1;
+ sub = sub_view.buf;
+ slen = sub_view.len;
+ }
+
+ ADJUST_INDICES(start, end, len);
+
+ if (direction < 0) {
+ /* startswith */
+ if (start + slen > len)
+ goto notfound;
+ } else {
+ /* endswith */
+ if (end - start < slen || start > len)
+ goto notfound;
+
+ if (end - slen > start)
+ start = end - slen;
+ }
+ if (end - start < slen)
+ goto notfound;
+ if (memcmp(str + start, sub, slen) != 0)
+ goto notfound;
+
+ PyBuffer_Release(&sub_view);
+ return 1;
+
+notfound:
+ PyBuffer_Release(&sub_view);
+ return 0;
+}
+
+Py_LOCAL(PyObject *)
+_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
+ const char *function_name, PyObject *args,
+ int direction)
+{
+ Py_ssize_t start = 0;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
+ PyObject *subobj;
+ int result;
+
+ if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
+ return NULL;
+ if (PyTuple_Check(subobj)) {
+ Py_ssize_t i;
+ for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
+ result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
+ start, end, direction);
+ if (result == -1)
+ return NULL;
+ else if (result) {
+ Py_RETURN_TRUE;
+ }
+ }
+ Py_RETURN_FALSE;
+ }
+ result = tailmatch(str, len, subobj, start, end, direction);
+ if (result == -1) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
+ PyErr_Format(PyExc_TypeError,
+ "%s first arg must be bytes or a tuple of bytes, "
+ "not %s",
+ function_name, Py_TYPE(subobj)->tp_name);
+ return NULL;
+ }
+ else
+ return PyBool_FromLong(result);
+}
+
+PyDoc_STRVAR_shared(_Py_startswith__doc__,
+"B.startswith(prefix[, start[, end]]) -> bool\n\
+\n\
+Return True if B starts with the specified prefix, False otherwise.\n\
+With optional start, test B beginning at that position.\n\
+With optional end, stop comparing B at that position.\n\
+prefix can also be a tuple of bytes to try.");
+
+PyObject *
+_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
+{
+ return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
+}
+
+PyDoc_STRVAR_shared(_Py_endswith__doc__,
+"B.endswith(suffix[, start[, end]]) -> bool\n\
+\n\
+Return True if B ends with the specified suffix, False otherwise.\n\
+With optional start, test B beginning at that position.\n\
+With optional end, stop comparing B at that position.\n\
+suffix can also be a tuple of bytes to try.");
+
+PyObject *
+_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
+{
+ return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
+}
+
+PyDoc_STRVAR_shared(_Py_expandtabs__doc__,
+"B.expandtabs(tabsize=8) -> copy of B\n\
+\n\
+Return a copy of B where all tab characters are expanded using spaces.\n\
+If tabsize is not given, a tab size of 8 characters is assumed.");
+
+PyDoc_STRVAR_shared(_Py_ljust__doc__,
+"B.ljust(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B left justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space).");
+
+PyDoc_STRVAR_shared(_Py_rjust__doc__,
+"B.rjust(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B right justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space)");
+
+PyDoc_STRVAR_shared(_Py_center__doc__,
+"B.center(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B centered in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space).");
+
+PyDoc_STRVAR_shared(_Py_zfill__doc__,
+"B.zfill(width) -> copy of B\n"
+"\n"
+"Pad a numeric string B with zeros on the left, to fill a field\n"
+"of the specified width. B is never truncated.");
+
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 495c3eb..aeddf53 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -9,9 +9,9 @@
#include <stddef.h>
/*[clinic input]
-class bytes "PyBytesObject*" "&PyBytes_Type"
+class bytes "PyBytesObject *" "&PyBytes_Type"
[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
#include "clinic/bytesobject.c.h"
@@ -30,6 +30,10 @@ static PyBytesObject *nullstring;
*/
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
+/* Forward declaration */
+Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
+ char *str);
+
/*
For PyBytes_FromString(), the parameter `str' points to a null-terminated
string containing exactly `size' bytes.
@@ -174,190 +178,184 @@ PyBytes_FromString(const char *str)
PyObject *
PyBytes_FromFormatV(const char *format, va_list vargs)
{
- va_list count;
- Py_ssize_t n = 0;
- const char* f;
char *s;
- PyObject* string;
+ const char *f;
+ const char *p;
+ Py_ssize_t prec;
+ int longflag;
+ int size_tflag;
+ /* Longest 64-bit formatted numbers:
+ - "18446744073709551615\0" (21 bytes)
+ - "-9223372036854775808\0" (21 bytes)
+ Decimal takes the most space (it isn't enough for octal.)
+
+ Longest 64-bit pointer representation:
+ "0xffffffffffffffff\0" (19 bytes). */
+ char buffer[21];
+ _PyBytesWriter writer;
+
+ _PyBytesWriter_Init(&writer);
+
+ s = _PyBytesWriter_Alloc(&writer, strlen(format));
+ if (s == NULL)
+ return NULL;
+ writer.overallocate = 1;
+
+#define WRITE_BYTES(str) \
+ do { \
+ s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
+ if (s == NULL) \
+ goto error; \
+ } while (0)
- Py_VA_COPY(count, vargs);
- /* step 1: figure out how large a buffer we need */
for (f = format; *f; f++) {
- if (*f == '%') {
- const char* p = f;
- while (*++f && *f != '%' && !Py_ISALPHA(*f))
- ;
-
- /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
- * they don't affect the amount of space we reserve.
- */
- if ((*f == 'l' || *f == 'z') &&
- (f[1] == 'd' || f[1] == 'u'))
- ++f;
-
- switch (*f) {
- case 'c':
- {
- int c = va_arg(count, int);
- if (c < 0 || c > 255) {
- PyErr_SetString(PyExc_OverflowError,
- "PyBytes_FromFormatV(): %c format "
- "expects an integer in range [0; 255]");
- return NULL;
- }
- n++;
- break;
+ if (*f != '%') {
+ *s++ = *f;
+ continue;
+ }
+
+ p = f++;
+
+ /* ignore the width (ex: 10 in "%10s") */
+ while (Py_ISDIGIT(*f))
+ f++;
+
+ /* parse the precision (ex: 10 in "%.10s") */
+ prec = 0;
+ if (*f == '.') {
+ f++;
+ for (; Py_ISDIGIT(*f); f++) {
+ prec = (prec * 10) + (*f - '0');
}
- case '%':
- n++;
- break;
- case 'd': case 'u': case 'i': case 'x':
- (void) va_arg(count, int);
- /* 20 bytes is enough to hold a 64-bit
- integer. Decimal takes the most space.
- This isn't enough for octal. */
- n += 20;
- break;
- case 's':
- s = va_arg(count, char*);
- n += strlen(s);
- break;
- case 'p':
- (void) va_arg(count, int);
- /* maximum 64-bit pointer representation:
- * 0xffffffffffffffff
- * so 19 characters is enough.
- * XXX I count 18 -- what's the extra for?
- */
- n += 19;
- break;
- default:
- /* if we stumble upon an unknown
- formatting code, copy the rest of
- the format string to the output
- string. (we cannot just skip the
- code, since there's no way to know
- what's in the argument list) */
- n += strlen(p);
- goto expand;
+ }
+
+ while (*f && *f != '%' && !Py_ISALPHA(*f))
+ f++;
+
+ /* handle the long flag ('l'), but only for %ld and %lu.
+ others can be added when necessary. */
+ longflag = 0;
+ if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
+ longflag = 1;
+ ++f;
+ }
+
+ /* handle the size_t flag ('z'). */
+ size_tflag = 0;
+ if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
+ size_tflag = 1;
+ ++f;
+ }
+
+ /* substract bytes preallocated for the format string
+ (ex: 2 for "%s") */
+ writer.min_size -= (f - p + 1);
+
+ switch (*f) {
+ case 'c':
+ {
+ int c = va_arg(vargs, int);
+ if (c < 0 || c > 255) {
+ PyErr_SetString(PyExc_OverflowError,
+ "PyBytes_FromFormatV(): %c format "
+ "expects an integer in range [0; 255]");
+ goto error;
}
- } else
- n++;
- }
- expand:
- /* step 2: fill the buffer */
- /* Since we've analyzed how much space we need for the worst case,
- use sprintf directly instead of the slower PyOS_snprintf. */
- string = PyBytes_FromStringAndSize(NULL, n);
- if (!string)
- return NULL;
+ writer.min_size++;
+ *s++ = (unsigned char)c;
+ break;
+ }
- s = PyBytes_AsString(string);
+ case 'd':
+ if (longflag)
+ sprintf(buffer, "%ld", va_arg(vargs, long));
+ else if (size_tflag)
+ sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
+ va_arg(vargs, Py_ssize_t));
+ else
+ sprintf(buffer, "%d", va_arg(vargs, int));
+ assert(strlen(buffer) < sizeof(buffer));
+ WRITE_BYTES(buffer);
+ break;
- for (f = format; *f; f++) {
- if (*f == '%') {
- const char* p = f++;
+ case 'u':
+ if (longflag)
+ sprintf(buffer, "%lu",
+ va_arg(vargs, unsigned long));
+ else if (size_tflag)
+ sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
+ va_arg(vargs, size_t));
+ else
+ sprintf(buffer, "%u",
+ va_arg(vargs, unsigned int));
+ assert(strlen(buffer) < sizeof(buffer));
+ WRITE_BYTES(buffer);
+ break;
+
+ case 'i':
+ sprintf(buffer, "%i", va_arg(vargs, int));
+ assert(strlen(buffer) < sizeof(buffer));
+ WRITE_BYTES(buffer);
+ break;
+
+ case 'x':
+ sprintf(buffer, "%x", va_arg(vargs, int));
+ assert(strlen(buffer) < sizeof(buffer));
+ WRITE_BYTES(buffer);
+ break;
+
+ case 's':
+ {
Py_ssize_t i;
- int longflag = 0;
- int size_tflag = 0;
- /* parse the width.precision part (we're only
- interested in the precision value, if any) */
- n = 0;
- while (Py_ISDIGIT(*f))
- n = (n*10) + *f++ - '0';
- if (*f == '.') {
- f++;
- n = 0;
- while (Py_ISDIGIT(*f))
- n = (n*10) + *f++ - '0';
- }
- while (*f && *f != '%' && !Py_ISALPHA(*f))
- f++;
- /* handle the long flag, but only for %ld and %lu.
- others can be added when necessary. */
- if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
- longflag = 1;
- ++f;
- }
- /* handle the size_t flag. */
- if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
- size_tflag = 1;
- ++f;
- }
- switch (*f) {
- case 'c':
- {
- int c = va_arg(vargs, int);
- /* c has been checked for overflow in the first step */
- *s++ = (unsigned char)c;
- break;
+ p = va_arg(vargs, const char*);
+ i = strlen(p);
+ if (prec > 0 && i > prec)
+ i = prec;
+ s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
+ if (s == NULL)
+ goto error;
+ break;
+ }
+
+ case 'p':
+ sprintf(buffer, "%p", va_arg(vargs, void*));
+ assert(strlen(buffer) < sizeof(buffer));
+ /* %p is ill-defined: ensure leading 0x. */
+ if (buffer[1] == 'X')
+ buffer[1] = 'x';
+ else if (buffer[1] != 'x') {
+ memmove(buffer+2, buffer, strlen(buffer)+1);
+ buffer[0] = '0';
+ buffer[1] = 'x';
}
- case 'd':
- if (longflag)
- sprintf(s, "%ld", va_arg(vargs, long));
- else if (size_tflag)
- sprintf(s, "%" PY_FORMAT_SIZE_T "d",
- va_arg(vargs, Py_ssize_t));
- else
- sprintf(s, "%d", va_arg(vargs, int));
- s += strlen(s);
- break;
- case 'u':
- if (longflag)
- sprintf(s, "%lu",
- va_arg(vargs, unsigned long));
- else if (size_tflag)
- sprintf(s, "%" PY_FORMAT_SIZE_T "u",
- va_arg(vargs, size_t));
- else
- sprintf(s, "%u",
- va_arg(vargs, unsigned int));
- s += strlen(s);
- break;
- case 'i':
- sprintf(s, "%i", va_arg(vargs, int));
- s += strlen(s);
- break;
- case 'x':
- sprintf(s, "%x", va_arg(vargs, int));
- s += strlen(s);
- break;
- case 's':
- p = va_arg(vargs, char*);
- i = strlen(p);
- if (n > 0 && i > n)
- i = n;
- Py_MEMCPY(s, p, i);
- s += i;
- break;
- case 'p':
- sprintf(s, "%p", va_arg(vargs, void*));
- /* %p is ill-defined: ensure leading 0x. */
- if (s[1] == 'X')
- s[1] = 'x';
- else if (s[1] != 'x') {
- memmove(s+2, s, strlen(s)+1);
- s[0] = '0';
- s[1] = 'x';
- }
- s += strlen(s);
- break;
- case '%':
- *s++ = '%';
- break;
- default:
- strcpy(s, p);
- s += strlen(s);
- goto end;
+ WRITE_BYTES(buffer);
+ break;
+
+ case '%':
+ writer.min_size++;
+ *s++ = '%';
+ break;
+
+ default:
+ if (*f == 0) {
+ /* fix min_size if we reached the end of the format string */
+ writer.min_size++;
}
- } else
- *s++ = *f;
+
+ /* invalid format string: copy unformatted string and exit */
+ WRITE_BYTES(p);
+ return _PyBytesWriter_Finish(&writer, s);
+ }
}
- end:
- _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
- return string;
+#undef WRITE_BYTES
+
+ return _PyBytesWriter_Finish(&writer, s);
+
+ error:
+ _PyBytesWriter_Dealloc(&writer);
+ return NULL;
}
PyObject *
@@ -409,12 +407,14 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
/* Returns a new reference to a PyBytes object, or NULL on failure. */
-static PyObject *
-formatfloat(PyObject *v, int flags, int prec, int type)
+static char*
+formatfloat(PyObject *v, int flags, int prec, int type,
+ PyObject **p_result, _PyBytesWriter *writer, char *str)
{
char *p;
PyObject *result;
double x;
+ size_t len;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred()) {
@@ -431,9 +431,22 @@ formatfloat(PyObject *v, int flags, int prec, int type)
if (p == NULL)
return NULL;
- result = PyBytes_FromStringAndSize(p, strlen(p));
+
+ len = strlen(p);
+ if (writer != NULL) {
+ str = _PyBytesWriter_Prepare(writer, str, len);
+ if (str == NULL)
+ return NULL;
+ Py_MEMCPY(str, p, len);
+ PyMem_Free(p);
+ str += len;
+ return str;
+ }
+
+ result = PyBytes_FromStringAndSize(p, len);
PyMem_Free(p);
- return result;
+ *p_result = result;
+ return str;
}
static PyObject *
@@ -473,11 +486,11 @@ formatlong(PyObject *v, int flags, int prec, int type)
static int
byte_converter(PyObject *arg, char *p)
{
- if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
+ if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
*p = PyBytes_AS_STRING(arg)[0];
return 1;
}
- else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
+ else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
*p = PyByteArray_AS_STRING(arg)[0];
return 1;
}
@@ -557,36 +570,36 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
return NULL;
}
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
- FORMATBUFLEN is the length of the buffer in which the ints &
- chars are formatted. XXX This is a magic number. Each formatting
- routine does bounds checking to ensure no overflow, but a better
- solution may be to malloc a buffer of appropriate size for each
- format. For now, the current solution is sufficient.
-*/
-#define FORMATBUFLEN (size_t)120
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
PyObject *
-_PyBytes_Format(PyObject *format, PyObject *args)
+_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
+ PyObject *args, int use_bytearray)
{
- char *fmt, *res;
+ const char *fmt;
+ char *res;
Py_ssize_t arglen, argidx;
- Py_ssize_t reslen, rescnt, fmtcnt;
+ Py_ssize_t fmtcnt;
int args_owned = 0;
- PyObject *result;
PyObject *dict = NULL;
- if (format == NULL || !PyBytes_Check(format) || args == NULL) {
+ _PyBytesWriter writer;
+
+ if (args == NULL) {
PyErr_BadInternalCall();
return NULL;
}
- fmt = PyBytes_AS_STRING(format);
- fmtcnt = PyBytes_GET_SIZE(format);
- reslen = rescnt = fmtcnt + 100;
- result = PyBytes_FromStringAndSize((char *)NULL, reslen);
- if (result == NULL)
+ fmt = format;
+ fmtcnt = format_len;
+
+ _PyBytesWriter_Init(&writer);
+ writer.use_bytearray = use_bytearray;
+
+ res = _PyBytesWriter_Alloc(&writer, fmtcnt);
+ if (res == NULL)
return NULL;
- res = PyBytes_AsString(result);
+ if (!use_bytearray)
+ writer.overallocate = 1;
+
if (PyTuple_Check(args)) {
arglen = PyTuple_GET_SIZE(args);
argidx = 0;
@@ -600,18 +613,23 @@ _PyBytes_Format(PyObject *format, PyObject *args)
!PyByteArray_Check(args)) {
dict = args;
}
+
while (--fmtcnt >= 0) {
if (*fmt != '%') {
- if (--rescnt < 0) {
- rescnt = fmtcnt + 100;
- reslen += rescnt;
- if (_PyBytes_Resize(&result, reslen))
- return NULL;
- res = PyBytes_AS_STRING(result)
- + reslen - rescnt;
- --rescnt;
- }
- *res++ = *fmt++;
+ Py_ssize_t len;
+ char *pos;
+
+ pos = strchr(fmt + 1, '%');
+ if (pos != NULL)
+ len = pos - fmt;
+ else
+ len = format_len - (fmt - format);
+ assert(len != 0);
+
+ Py_MEMCPY(res, fmt, len);
+ res += len;
+ fmt += len;
+ fmtcnt -= (len - 1);
}
else {
/* Got a format specifier */
@@ -626,10 +644,14 @@ _PyBytes_Format(PyObject *format, PyObject *args)
int sign;
Py_ssize_t len = 0;
char onechar; /* For byte_converter() */
+ Py_ssize_t alloc;
+#ifdef Py_DEBUG
+ char *before;
+#endif
fmt++;
if (*fmt == '(') {
- char *keystart;
+ const char *keystart;
Py_ssize_t keylen;
PyObject *key;
int pcount = 1;
@@ -673,6 +695,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
arglen = -1;
argidx = -2;
}
+
+ /* Parse flags. Example: "%+i" => flags=F_SIGN. */
while (--fmtcnt >= 0) {
switch (c = *fmt++) {
case '-': flags |= F_LJUST; continue;
@@ -683,6 +707,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
}
break;
}
+
+ /* Parse width. Example: "%10s" => width=10 */
if (c == '*') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
@@ -717,6 +743,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
width = width*10 + (c - '0');
}
}
+
+ /* Parse precision. Example: "%.3f" => prec=3 */
if (c == '.') {
prec = 0;
if (--fmtcnt >= 0)
@@ -771,13 +799,19 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (v == NULL)
goto error;
}
+
+ if (fmtcnt < 0) {
+ /* last writer: disable writer overallocation */
+ writer.overallocate = 0;
+ }
+
sign = 0;
fill = ' ';
switch (c) {
case '%':
- pbuf = "%";
- len = 1;
- break;
+ *res++ = '%';
+ continue;
+
case 'r':
// %r is only for 2/3 code; 3 only code should use %a
case 'a':
@@ -790,6 +824,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (prec >= 0 && len > prec)
len = prec;
break;
+
case 's':
// %s is only for 2/3 code; 3 only code should use %b
case 'b':
@@ -799,12 +834,49 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (prec >= 0 && len > prec)
len = prec;
break;
+
case 'i':
case 'd':
case 'u':
case 'o':
case 'x':
case 'X':
+ if (PyLong_CheckExact(v)
+ && width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK))
+ && c != 'X')
+ {
+ /* Fast path */
+ int alternate = flags & F_ALT;
+ int base;
+
+ switch(c)
+ {
+ default:
+ assert(0 && "'type' not in [diuoxX]");
+ case 'd':
+ case 'i':
+ case 'u':
+ base = 10;
+ break;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ }
+
+ /* Fast path */
+ writer.min_size -= 2; /* size preallocated for "%d" */
+ res = _PyLong_FormatBytesWriter(&writer, res,
+ v, base, alternate);
+ if (res == NULL)
+ goto error;
+ continue;
+ }
+
temp = formatlong(v, flags, prec, c);
if (!temp)
goto error;
@@ -815,14 +887,25 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (flags & F_ZERO)
fill = '0';
break;
+
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
- temp = formatfloat(v, flags, prec, c);
- if (temp == NULL)
+ if (width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ writer.min_size -= 2; /* size preallocated for "%f" */
+ res = formatfloat(v, flags, prec, c, NULL, &writer, res);
+ if (res == NULL)
+ goto error;
+ continue;
+ }
+
+ if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
goto error;
pbuf = PyBytes_AS_STRING(temp);
len = PyBytes_GET_SIZE(temp);
@@ -830,21 +913,28 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (flags & F_ZERO)
fill = '0';
break;
+
case 'c':
pbuf = &onechar;
len = byte_converter(v, &onechar);
if (!len)
goto error;
+ if (width == -1) {
+ /* Fast path */
+ *res++ = onechar;
+ continue;
+ }
break;
+
default:
PyErr_Format(PyExc_ValueError,
"unsupported format character '%c' (0x%x) "
"at index %zd",
c, c,
- (Py_ssize_t)(fmt - 1 -
- PyBytes_AsString(format)));
+ (Py_ssize_t)(fmt - 1 - format));
goto error;
}
+
if (sign) {
if (*pbuf == '-' || *pbuf == '+') {
sign = *pbuf++;
@@ -859,29 +949,31 @@ _PyBytes_Format(PyObject *format, PyObject *args)
}
if (width < len)
width = len;
- if (rescnt - (sign != 0) < width) {
- reslen -= rescnt;
- rescnt = width + fmtcnt + 100;
- reslen += rescnt;
- if (reslen < 0) {
- Py_DECREF(result);
- Py_XDECREF(temp);
- return PyErr_NoMemory();
- }
- if (_PyBytes_Resize(&result, reslen)) {
- Py_XDECREF(temp);
- return NULL;
- }
- res = PyBytes_AS_STRING(result)
- + reslen - rescnt;
+
+ alloc = width;
+ if (sign != 0 && len == width)
+ alloc++;
+ /* 2: size preallocated for %s */
+ if (alloc > 2) {
+ res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
+ if (res == NULL)
+ goto error;
}
+#ifdef Py_DEBUG
+ before = res;
+#endif
+
+ /* Write the sign if needed */
if (sign) {
if (fill != ' ')
*res++ = sign;
- rescnt--;
if (width > len)
width--;
}
+
+ /* Write the numeric prefix for "x", "X" and "o" formats
+ if the alternate form is used.
+ For example, write "0x" for the "%#x" format. */
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
@@ -889,18 +981,21 @@ _PyBytes_Format(PyObject *format, PyObject *args)
*res++ = *pbuf++;
*res++ = *pbuf++;
}
- rescnt -= 2;
width -= 2;
if (width < 0)
width = 0;
len -= 2;
}
+
+ /* Pad left with the fill character if needed */
if (width > len && !(flags & F_LJUST)) {
- do {
- --rescnt;
- *res++ = fill;
- } while (--width > len);
+ memset(res, fill, width - len);
+ res += (width - len);
+ width = len;
}
+
+ /* If padding with spaces: write sign if needed and/or numeric
+ prefix if the alternate form is used */
if (fill == ' ') {
if (sign)
*res++ = sign;
@@ -912,13 +1007,17 @@ _PyBytes_Format(PyObject *format, PyObject *args)
*res++ = *pbuf++;
}
}
+
+ /* Copy bytes */
Py_MEMCPY(res, pbuf, len);
res += len;
- rescnt -= len;
- while (--width >= len) {
- --rescnt;
- *res++ = ' ';
+
+ /* Pad right with the fill character if needed */
+ if (width > len) {
+ memset(res, ' ', width - len);
+ res += (width - len);
}
+
if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting");
@@ -926,22 +1025,31 @@ _PyBytes_Format(PyObject *format, PyObject *args)
goto error;
}
Py_XDECREF(temp);
+
+#ifdef Py_DEBUG
+ /* check that we computed the exact size for this write */
+ assert((res - before) == alloc);
+#endif
} /* '%' */
+
+ /* If overallocation was disabled, ensure that it was the last
+ write. Otherwise, we missed an optimization */
+ assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
} /* until end */
+
if (argidx < arglen && !dict) {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting");
goto error;
}
+
if (args_owned) {
Py_DECREF(args);
}
- if (_PyBytes_Resize(&result, reslen - rescnt))
- return NULL;
- return result;
+ return _PyBytesWriter_Finish(&writer, res);
error:
- Py_DECREF(result);
+ _PyBytesWriter_Dealloc(&writer);
if (args_owned) {
Py_DECREF(args);
}
@@ -961,6 +1069,42 @@ bytes_dealloc(PyObject *op)
the string is UTF-8 encoded and should be re-encoded in the
specified encoding. */
+static char *
+_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
+ const char *errors, const char *recode_encoding,
+ _PyBytesWriter *writer, char *p)
+{
+ PyObject *u, *w;
+ const char* t;
+
+ t = *s;
+ /* Decode non-ASCII bytes as UTF-8. */
+ while (t < end && (*t & 0x80))
+ t++;
+ u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
+ if (u == NULL)
+ return NULL;
+
+ /* Recode them in target encoding. */
+ w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
+ Py_DECREF(u);
+ if (w == NULL)
+ return NULL;
+ assert(PyBytes_Check(w));
+
+ /* Append bytes to output buffer. */
+ writer->min_size--; /* substract 1 preallocated byte */
+ p = _PyBytesWriter_WriteBytes(writer, p,
+ PyBytes_AS_STRING(w),
+ PyBytes_GET_SIZE(w));
+ Py_DECREF(w);
+ if (p == NULL)
+ return NULL;
+
+ *s = t;
+ return p;
+}
+
PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
@@ -968,54 +1112,42 @@ PyObject *PyBytes_DecodeEscape(const char *s,
const char *recode_encoding)
{
int c;
- char *p, *buf;
+ char *p;
const char *end;
- PyObject *v;
- Py_ssize_t newlen = recode_encoding ? 4*len:len;
- v = PyBytes_FromStringAndSize((char *)NULL, newlen);
- if (v == NULL)
+ _PyBytesWriter writer;
+
+ _PyBytesWriter_Init(&writer);
+
+ p = _PyBytesWriter_Alloc(&writer, len);
+ if (p == NULL)
return NULL;
- p = buf = PyBytes_AsString(v);
+ writer.overallocate = 1;
+
end = s + len;
while (s < end) {
if (*s != '\\') {
non_esc:
- if (recode_encoding && (*s & 0x80)) {
- PyObject *u, *w;
- char *r;
- const char* t;
- Py_ssize_t rn;
- t = s;
- /* Decode non-ASCII bytes as UTF-8. */
- while (t < end && (*t & 0x80)) t++;
- u = PyUnicode_DecodeUTF8(s, t - s, errors);
- if(!u) goto failed;
-
- /* Recode them in target encoding. */
- w = PyUnicode_AsEncodedString(
- u, recode_encoding, errors);
- Py_DECREF(u);
- if (!w) goto failed;
-
- /* Append bytes to output buffer. */
- assert(PyBytes_Check(w));
- r = PyBytes_AS_STRING(w);
- rn = PyBytes_GET_SIZE(w);
- Py_MEMCPY(p, r, rn);
- p += rn;
- Py_DECREF(w);
- s = t;
- } else {
+ if (!(recode_encoding && (*s & 0x80))) {
*p++ = *s++;
}
+ else {
+ /* non-ASCII character and need to recode */
+ p = _PyBytes_DecodeEscapeRecode(&s, end,
+ errors, recode_encoding,
+ &writer, p);
+ if (p == NULL)
+ goto failed;
+ }
continue;
}
+
s++;
- if (s==end) {
+ if (s == end) {
PyErr_SetString(PyExc_ValueError,
"Trailing \\ in string");
goto failed;
}
+
switch (*s++) {
/* XXX This assumes ASCII! */
case '\n': break;
@@ -1040,28 +1172,18 @@ PyObject *PyBytes_DecodeEscape(const char *s,
*p++ = c;
break;
case 'x':
- if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
- unsigned int x = 0;
- c = Py_CHARMASK(*s);
- s++;
- if (Py_ISDIGIT(c))
- x = c - '0';
- else if (Py_ISLOWER(c))
- x = 10 + c - 'a';
- else
- x = 10 + c - 'A';
- x = x << 4;
- c = Py_CHARMASK(*s);
- s++;
- if (Py_ISDIGIT(c))
- x += c - '0';
- else if (Py_ISLOWER(c))
- x += 10 + c - 'a';
- else
- x += 10 + c - 'A';
- *p++ = x;
- break;
+ if (s+1 < end) {
+ int digit1, digit2;
+ digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
+ digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
+ if (digit1 < 16 && digit2 < 16) {
+ *p++ = (unsigned char)((digit1 << 4) + digit2);
+ s += 2;
+ break;
+ }
}
+ /* invalid hexadecimal digits */
+
if (!errors || strcmp(errors, "strict") == 0) {
PyErr_Format(PyExc_ValueError,
"invalid \\x escape at position %d",
@@ -1083,6 +1205,7 @@ PyObject *PyBytes_DecodeEscape(const char *s,
if (s < end && Py_ISXDIGIT(s[0]))
s++; /* and a hexdigit */
break;
+
default:
*p++ = '\\';
s--;
@@ -1090,11 +1213,11 @@ PyObject *PyBytes_DecodeEscape(const char *s,
UTF-8 bytes may follow. */
}
}
- if (p-buf < newlen)
- _PyBytes_Resize(&v, p - buf);
- return v;
+
+ return _PyBytesWriter_Finish(&writer, p);
+
failed:
- Py_DECREF(v);
+ _PyBytesWriter_Dealloc(&writer);
return NULL;
}
@@ -1365,24 +1488,7 @@ bytes_repeat(PyBytesObject *a, Py_ssize_t n)
static int
bytes_contains(PyObject *self, PyObject *arg)
{
- Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
- if (ival == -1 && PyErr_Occurred()) {
- Py_buffer varg;
- Py_ssize_t pos;
- PyErr_Clear();
- if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
- return -1;
- pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
- varg.buf, varg.len, 0);
- PyBuffer_Release(&varg);
- return pos >= 0;
- }
- if (ival < 0 || ival >= 256) {
- PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
- return -1;
- }
-
- return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
+ return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
}
static PyObject *
@@ -1629,8 +1735,8 @@ Return a list of the sections in the bytes, using sep as the delimiter.
[clinic start generated code]*/
static PyObject *
-bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
-/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
+bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
+/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
{
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
const char *s = PyBytes_AS_STRING(self), *sub;
@@ -1654,7 +1760,6 @@ bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
/*[clinic input]
bytes.partition
- self: self(type="PyBytesObject *")
sep: Py_buffer
/
@@ -1670,7 +1775,7 @@ object and two empty bytes objects.
static PyObject *
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
-/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
+/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
{
return stringlib_partition(
(PyObject*) self,
@@ -1682,7 +1787,6 @@ bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
/*[clinic input]
bytes.rpartition
- self: self(type="PyBytesObject *")
sep: Py_buffer
/
@@ -1698,7 +1802,7 @@ objects and the original bytes object.
static PyObject *
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
-/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
+/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
{
return stringlib_rpartition(
(PyObject*) self,
@@ -1716,8 +1820,8 @@ Splitting is done starting at the end of the bytes and working to the front.
[clinic start generated code]*/
static PyObject *
-bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
-/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
+bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
+/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
{
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
const char *s = PyBytes_AS_STRING(self), *sub;
@@ -1755,8 +1859,8 @@ Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
[clinic start generated code]*/
static PyObject *
-bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
-/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
+bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
+/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
{
return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
}
@@ -1769,158 +1873,30 @@ _PyBytes_Join(PyObject *sep, PyObject *x)
return bytes_join((PyBytesObject*)sep, x);
}
-/* helper macro to fixup start/end slice values */
-#define ADJUST_INDICES(start, end, len) \
- if (end > len) \
- end = len; \
- else if (end < 0) { \
- end += len; \
- if (end < 0) \
- end = 0; \
- } \
- if (start < 0) { \
- start += len; \
- if (start < 0) \
- start = 0; \
- }
-
-Py_LOCAL_INLINE(Py_ssize_t)
-bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
-{
- PyObject *subobj;
- char byte;
- Py_buffer subbuf;
- const char *sub;
- Py_ssize_t len, sub_len;
- Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
- Py_ssize_t res;
-
- if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
- args, &subobj, &byte, &start, &end))
- return -2;
-
- if (subobj) {
- if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
- return -2;
-
- sub = subbuf.buf;
- sub_len = subbuf.len;
- }
- else {
- sub = &byte;
- sub_len = 1;
- }
- len = PyBytes_GET_SIZE(self);
-
- ADJUST_INDICES(start, end, len);
- if (end - start < sub_len)
- res = -1;
- else if (sub_len == 1
-#ifndef HAVE_MEMRCHR
- && dir > 0
-#endif
- ) {
- unsigned char needle = *sub;
- int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
- res = stringlib_fastsearch_memchr_1char(
- PyBytes_AS_STRING(self) + start, end - start,
- needle, needle, mode);
- if (res >= 0)
- res += start;
- }
- else {
- if (dir > 0)
- res = stringlib_find_slice(
- PyBytes_AS_STRING(self), len,
- sub, sub_len, start, end);
- else
- res = stringlib_rfind_slice(
- PyBytes_AS_STRING(self), len,
- sub, sub_len, start, end);
- }
-
- if (subobj)
- PyBuffer_Release(&subbuf);
-
- return res;
-}
-
-
-PyDoc_STRVAR(find__doc__,
-"B.find(sub[, start[, end]]) -> int\n\
-\n\
-Return the lowest index in B where substring sub is found,\n\
-such that sub is contained within B[start:end]. Optional\n\
-arguments start and end are interpreted as in slice notation.\n\
-\n\
-Return -1 on failure.");
-
static PyObject *
bytes_find(PyBytesObject *self, PyObject *args)
{
- Py_ssize_t result = bytes_find_internal(self, args, +1);
- if (result == -2)
- return NULL;
- return PyLong_FromSsize_t(result);
+ return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
}
-
-PyDoc_STRVAR(index__doc__,
-"B.index(sub[, start[, end]]) -> int\n\
-\n\
-Like B.find() but raise ValueError when the substring is not found.");
-
static PyObject *
bytes_index(PyBytesObject *self, PyObject *args)
{
- Py_ssize_t result = bytes_find_internal(self, args, +1);
- if (result == -2)
- return NULL;
- if (result == -1) {
- PyErr_SetString(PyExc_ValueError,
- "substring not found");
- return NULL;
- }
- return PyLong_FromSsize_t(result);
+ return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
}
-PyDoc_STRVAR(rfind__doc__,
-"B.rfind(sub[, start[, end]]) -> int\n\
-\n\
-Return the highest index in B where substring sub is found,\n\
-such that sub is contained within B[start:end]. Optional\n\
-arguments start and end are interpreted as in slice notation.\n\
-\n\
-Return -1 on failure.");
-
static PyObject *
bytes_rfind(PyBytesObject *self, PyObject *args)
{
- Py_ssize_t result = bytes_find_internal(self, args, -1);
- if (result == -2)
- return NULL;
- return PyLong_FromSsize_t(result);
+ return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
}
-PyDoc_STRVAR(rindex__doc__,
-"B.rindex(sub[, start[, end]]) -> int\n\
-\n\
-Like B.rfind() but raise ValueError when the substring is not found.");
-
static PyObject *
bytes_rindex(PyBytesObject *self, PyObject *args)
{
- Py_ssize_t result = bytes_find_internal(self, args, -1);
- if (result == -2)
- return NULL;
- if (result == -1) {
- PyErr_SetString(PyExc_ValueError,
- "substring not found");
- return NULL;
- }
- return PyLong_FromSsize_t(result);
+ return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
}
@@ -2007,7 +1983,6 @@ do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
/*[clinic input]
bytes.strip
- self: self(type="PyBytesObject *")
bytes: object = None
/
@@ -2018,7 +1993,7 @@ If the argument is omitted or None, strip leading and trailing ASCII whitespace.
static PyObject *
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
-/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
+/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
{
return do_argstrip(self, BOTHSTRIP, bytes);
}
@@ -2026,7 +2001,6 @@ bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
/*[clinic input]
bytes.lstrip
- self: self(type="PyBytesObject *")
bytes: object = None
/
@@ -2037,7 +2011,7 @@ If the argument is omitted or None, strip leading ASCII whitespace.
static PyObject *
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
-/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
+/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
{
return do_argstrip(self, LEFTSTRIP, bytes);
}
@@ -2045,7 +2019,6 @@ bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
/*[clinic input]
bytes.rstrip
- self: self(type="PyBytesObject *")
bytes: object = None
/
@@ -2056,64 +2029,22 @@ If the argument is omitted or None, strip trailing ASCII whitespace.
static PyObject *
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
-/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
+/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
{
return do_argstrip(self, RIGHTSTRIP, bytes);
}
-PyDoc_STRVAR(count__doc__,
-"B.count(sub[, start[, end]]) -> int\n\
-\n\
-Return the number of non-overlapping occurrences of substring sub in\n\
-string B[start:end]. Optional arguments start and end are interpreted\n\
-as in slice notation.");
-
static PyObject *
bytes_count(PyBytesObject *self, PyObject *args)
{
- PyObject *sub_obj;
- const char *str = PyBytes_AS_STRING(self), *sub;
- Py_ssize_t sub_len;
- char byte;
- Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
-
- Py_buffer vsub;
- PyObject *count_obj;
-
- if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
- &start, &end))
- return NULL;
-
- if (sub_obj) {
- if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
- return NULL;
-
- sub = vsub.buf;
- sub_len = vsub.len;
- }
- else {
- sub = &byte;
- sub_len = 1;
- }
-
- ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
-
- count_obj = PyLong_FromSsize_t(
- stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
- );
-
- if (sub_obj)
- PyBuffer_Release(&vsub);
-
- return count_obj;
+ return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
}
/*[clinic input]
bytes.translate
- self: self(type="PyBytesObject *")
table: object
Translation table, which must be a bytes object of length 256.
[
@@ -2130,7 +2061,7 @@ The remaining characters are mapped through the given translation table.
static PyObject *
bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
PyObject *deletechars)
-/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
+/*[clinic end generated code: output=233df850eb50bf8d input=ca20edf39d780d49]*/
{
char *input, *output;
Py_buffer table_view = {NULL, NULL};
@@ -2191,7 +2122,7 @@ bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
PyBuffer_Release(&table_view);
return NULL;
}
- output_start = output = PyBytes_AsString(result);
+ output_start = output = PyBytes_AS_STRING(result);
input = PyBytes_AS_STRING(input_obj);
if (dellen == 0 && table_chars != NULL) {
@@ -2267,498 +2198,6 @@ bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
return _Py_bytes_maketrans(frm, to);
}
-/* find and count characters and substrings */
-
-#define findchar(target, target_len, c) \
- ((char *)memchr((const void *)(target), c, target_len))
-
-/* String ops must return a string. */
-/* If the object is subclass of string, create a copy */
-Py_LOCAL(PyBytesObject *)
-return_self(PyBytesObject *self)
-{
- if (PyBytes_CheckExact(self)) {
- Py_INCREF(self);
- return self;
- }
- return (PyBytesObject *)PyBytes_FromStringAndSize(
- PyBytes_AS_STRING(self),
- PyBytes_GET_SIZE(self));
-}
-
-Py_LOCAL_INLINE(Py_ssize_t)
-countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
-{
- Py_ssize_t count=0;
- const char *start=target;
- const char *end=target+target_len;
-
- while ( (start=findchar(start, end-start, c)) != NULL ) {
- count++;
- if (count >= maxcount)
- break;
- start += 1;
- }
- return count;
-}
-
-
-/* Algorithms for different cases of string replacement */
-
-/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
-Py_LOCAL(PyBytesObject *)
-replace_interleave(PyBytesObject *self,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- char *self_s, *result_s;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, i;
- PyBytesObject *result;
-
- self_len = PyBytes_GET_SIZE(self);
-
- /* 1 at the end plus 1 after every character;
- count = min(maxcount, self_len + 1) */
- if (maxcount <= self_len)
- count = maxcount;
- else
- /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
- count = self_len + 1;
-
- /* Check for overflow */
- /* result_len = count * to_len + self_len; */
- assert(count > 0);
- if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError,
- "replacement bytes are too long");
- return NULL;
- }
- result_len = count * to_len + self_len;
-
- if (! (result = (PyBytesObject *)
- PyBytes_FromStringAndSize(NULL, result_len)) )
- return NULL;
-
- self_s = PyBytes_AS_STRING(self);
- result_s = PyBytes_AS_STRING(result);
-
- /* TODO: special case single character, which doesn't need memcpy */
-
- /* Lay the first one down (guaranteed this will occur) */
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- count -= 1;
-
- for (i=0; i<count; i++) {
- *result_s++ = *self_s++;
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- }
-
- /* Copy the rest of the original string */
- Py_MEMCPY(result_s, self_s, self_len-i);
-
- return result;
-}
-
-/* Special case for deleting a single character */
-/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
-Py_LOCAL(PyBytesObject *)
-replace_delete_single_character(PyBytesObject *self,
- char from_c, Py_ssize_t maxcount)
-{
- char *self_s, *result_s;
- char *start, *next, *end;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count;
- PyBytesObject *result;
-
- self_len = PyBytes_GET_SIZE(self);
- self_s = PyBytes_AS_STRING(self);
-
- count = countchar(self_s, self_len, from_c, maxcount);
- if (count == 0) {
- return return_self(self);
- }
-
- result_len = self_len - count; /* from_len == 1 */
- assert(result_len>=0);
-
- if ( (result = (PyBytesObject *)
- PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
- return NULL;
- result_s = PyBytes_AS_STRING(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- next = findchar(start, end-start, from_c);
- if (next == NULL)
- break;
- Py_MEMCPY(result_s, start, next-start);
- result_s += (next-start);
- start = next+1;
- }
- Py_MEMCPY(result_s, start, end-start);
-
- return result;
-}
-
-/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
-
-Py_LOCAL(PyBytesObject *)
-replace_delete_substring(PyBytesObject *self,
- const char *from_s, Py_ssize_t from_len,
- Py_ssize_t maxcount) {
- char *self_s, *result_s;
- char *start, *next, *end;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, offset;
- PyBytesObject *result;
-
- self_len = PyBytes_GET_SIZE(self);
- self_s = PyBytes_AS_STRING(self);
-
- count = stringlib_count(self_s, self_len,
- from_s, from_len,
- maxcount);
-
- if (count == 0) {
- /* no matches */
- return return_self(self);
- }
-
- result_len = self_len - (count * from_len);
- assert (result_len>=0);
-
- if ( (result = (PyBytesObject *)
- PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
- return NULL;
-
- result_s = PyBytes_AS_STRING(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- offset = stringlib_find(start, end-start,
- from_s, from_len,
- 0);
- if (offset == -1)
- break;
- next = start + offset;
-
- Py_MEMCPY(result_s, start, next-start);
-
- result_s += (next-start);
- start = next+from_len;
- }
- Py_MEMCPY(result_s, start, end-start);
- return result;
-}
-
-/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
-Py_LOCAL(PyBytesObject *)
-replace_single_character_in_place(PyBytesObject *self,
- char from_c, char to_c,
- Py_ssize_t maxcount)
-{
- char *self_s, *result_s, *start, *end, *next;
- Py_ssize_t self_len;
- PyBytesObject *result;
-
- /* The result string will be the same size */
- self_s = PyBytes_AS_STRING(self);
- self_len = PyBytes_GET_SIZE(self);
-
- next = findchar(self_s, self_len, from_c);
-
- if (next == NULL) {
- /* No matches; return the original string */
- return return_self(self);
- }
-
- /* Need to make a new string */
- result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
- if (result == NULL)
- return NULL;
- result_s = PyBytes_AS_STRING(result);
- Py_MEMCPY(result_s, self_s, self_len);
-
- /* change everything in-place, starting with this one */
- start = result_s + (next-self_s);
- *start = to_c;
- start++;
- end = result_s + self_len;
-
- while (--maxcount > 0) {
- next = findchar(start, end-start, from_c);
- if (next == NULL)
- break;
- *next = to_c;
- start = next+1;
- }
-
- return result;
-}
-
-/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
-Py_LOCAL(PyBytesObject *)
-replace_substring_in_place(PyBytesObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- char *result_s, *start, *end;
- char *self_s;
- Py_ssize_t self_len, offset;
- PyBytesObject *result;
-
- /* The result string will be the same size */
-
- self_s = PyBytes_AS_STRING(self);
- self_len = PyBytes_GET_SIZE(self);
-
- offset = stringlib_find(self_s, self_len,
- from_s, from_len,
- 0);
- if (offset == -1) {
- /* No matches; return the original string */
- return return_self(self);
- }
-
- /* Need to make a new string */
- result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
- if (result == NULL)
- return NULL;
- result_s = PyBytes_AS_STRING(result);
- Py_MEMCPY(result_s, self_s, self_len);
-
- /* change everything in-place, starting with this one */
- start = result_s + offset;
- Py_MEMCPY(start, to_s, from_len);
- start += from_len;
- end = result_s + self_len;
-
- while ( --maxcount > 0) {
- offset = stringlib_find(start, end-start,
- from_s, from_len,
- 0);
- if (offset==-1)
- break;
- Py_MEMCPY(start+offset, to_s, from_len);
- start += offset+from_len;
- }
-
- return result;
-}
-
-/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
-Py_LOCAL(PyBytesObject *)
-replace_single_character(PyBytesObject *self,
- char from_c,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- char *self_s, *result_s;
- char *start, *next, *end;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count;
- PyBytesObject *result;
-
- self_s = PyBytes_AS_STRING(self);
- self_len = PyBytes_GET_SIZE(self);
-
- count = countchar(self_s, self_len, from_c, maxcount);
- if (count == 0) {
- /* no matches, return unchanged */
- return return_self(self);
- }
-
- /* use the difference between current and new, hence the "-1" */
- /* result_len = self_len + count * (to_len-1) */
- assert(count > 0);
- if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError,
- "replacement bytes are too long");
- return NULL;
- }
- result_len = self_len + count * (to_len - 1);
-
- if ( (result = (PyBytesObject *)
- PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
- return NULL;
- result_s = PyBytes_AS_STRING(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- next = findchar(start, end-start, from_c);
- if (next == NULL)
- break;
-
- if (next == start) {
- /* replace with the 'to' */
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- start += 1;
- } else {
- /* copy the unchanged old then the 'to' */
- Py_MEMCPY(result_s, start, next-start);
- result_s += (next-start);
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- start = next+1;
- }
- }
- /* Copy the remainder of the remaining string */
- Py_MEMCPY(result_s, start, end-start);
-
- return result;
-}
-
-/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
-Py_LOCAL(PyBytesObject *)
-replace_substring(PyBytesObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount) {
- char *self_s, *result_s;
- char *start, *next, *end;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, offset;
- PyBytesObject *result;
-
- self_s = PyBytes_AS_STRING(self);
- self_len = PyBytes_GET_SIZE(self);
-
- count = stringlib_count(self_s, self_len,
- from_s, from_len,
- maxcount);
-
- if (count == 0) {
- /* no matches, return unchanged */
- return return_self(self);
- }
-
- /* Check for overflow */
- /* result_len = self_len + count * (to_len-from_len) */
- assert(count > 0);
- if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError,
- "replacement bytes are too long");
- return NULL;
- }
- result_len = self_len + count * (to_len-from_len);
-
- if ( (result = (PyBytesObject *)
- PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
- return NULL;
- result_s = PyBytes_AS_STRING(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- offset = stringlib_find(start, end-start,
- from_s, from_len,
- 0);
- if (offset == -1)
- break;
- next = start+offset;
- if (next == start) {
- /* replace with the 'to' */
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- start += from_len;
- } else {
- /* copy the unchanged old then the 'to' */
- Py_MEMCPY(result_s, start, next-start);
- result_s += (next-start);
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- start = next+from_len;
- }
- }
- /* Copy the remainder of the remaining string */
- Py_MEMCPY(result_s, start, end-start);
-
- return result;
-}
-
-
-Py_LOCAL(PyBytesObject *)
-replace(PyBytesObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- if (maxcount < 0) {
- maxcount = PY_SSIZE_T_MAX;
- } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
- /* nothing to do; return the original string */
- return return_self(self);
- }
-
- if (maxcount == 0 ||
- (from_len == 0 && to_len == 0)) {
- /* nothing to do; return the original string */
- return return_self(self);
- }
-
- /* Handle zero-length special cases */
-
- if (from_len == 0) {
- /* insert the 'to' string everywhere. */
- /* >>> "Python".replace("", ".") */
- /* '.P.y.t.h.o.n.' */
- return replace_interleave(self, to_s, to_len, maxcount);
- }
-
- /* Except for "".replace("", "A") == "A" there is no way beyond this */
- /* point for an empty self string to generate a non-empty string */
- /* Special case so the remaining code always gets a non-empty string */
- if (PyBytes_GET_SIZE(self) == 0) {
- return return_self(self);
- }
-
- if (to_len == 0) {
- /* delete all occurrences of 'from' string */
- if (from_len == 1) {
- return replace_delete_single_character(
- self, from_s[0], maxcount);
- } else {
- return replace_delete_substring(self, from_s,
- from_len, maxcount);
- }
- }
-
- /* Handle special case where both strings have the same length */
-
- if (from_len == to_len) {
- if (from_len == 1) {
- return replace_single_character_in_place(
- self,
- from_s[0],
- to_s[0],
- maxcount);
- } else {
- return replace_substring_in_place(
- self, from_s, from_len, to_s, to_len,
- maxcount);
- }
- }
-
- /* Otherwise use the more generic algorithms */
- if (from_len == 1) {
- return replace_single_character(self, from_s[0],
- to_s, to_len, maxcount);
- } else {
- /* len('from')>=2, len('to')>=1 */
- return replace_substring(self, from_s, from_len, to_s, to_len,
- maxcount);
- }
-}
-
/*[clinic input]
bytes.replace
@@ -2777,156 +2216,28 @@ replaced.
[clinic start generated code]*/
static PyObject *
-bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
+bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Py_ssize_t count)
-/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
+/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
{
- return (PyObject *)replace((PyBytesObject *) self,
- (const char *)old->buf, old->len,
- (const char *)new->buf, new->len, count);
+ return stringlib_replace((PyObject *)self,
+ (const char *)old->buf, old->len,
+ (const char *)new->buf, new->len, count);
}
/** End DALKE **/
-/* Matches the end (direction >= 0) or start (direction < 0) of self
- * against substr, using the start and end arguments. Returns
- * -1 on error, 0 if not found and 1 if found.
- */
-Py_LOCAL(int)
-_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
- Py_ssize_t end, int direction)
-{
- Py_ssize_t len = PyBytes_GET_SIZE(self);
- Py_ssize_t slen;
- Py_buffer sub_view = {NULL, NULL};
- const char* sub;
- const char* str;
-
- if (PyBytes_Check(substr)) {
- sub = PyBytes_AS_STRING(substr);
- slen = PyBytes_GET_SIZE(substr);
- }
- else {
- if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
- return -1;
- sub = sub_view.buf;
- slen = sub_view.len;
- }
- str = PyBytes_AS_STRING(self);
-
- ADJUST_INDICES(start, end, len);
-
- if (direction < 0) {
- /* startswith */
- if (start+slen > len)
- goto notfound;
- } else {
- /* endswith */
- if (end-start < slen || start > len)
- goto notfound;
-
- if (end-slen > start)
- start = end - slen;
- }
- if (end-start < slen)
- goto notfound;
- if (memcmp(str+start, sub, slen) != 0)
- goto notfound;
-
- PyBuffer_Release(&sub_view);
- return 1;
-
-notfound:
- PyBuffer_Release(&sub_view);
- return 0;
-}
-
-
-PyDoc_STRVAR(startswith__doc__,
-"B.startswith(prefix[, start[, end]]) -> bool\n\
-\n\
-Return True if B starts with the specified prefix, False otherwise.\n\
-With optional start, test B beginning at that position.\n\
-With optional end, stop comparing B at that position.\n\
-prefix can also be a tuple of bytes to try.");
static PyObject *
bytes_startswith(PyBytesObject *self, PyObject *args)
{
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *subobj;
- int result;
-
- if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
- return NULL;
- if (PyTuple_Check(subobj)) {
- Py_ssize_t i;
- for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
- result = _bytes_tailmatch(self,
- PyTuple_GET_ITEM(subobj, i),
- start, end, -1);
- if (result == -1)
- return NULL;
- else if (result) {
- Py_RETURN_TRUE;
- }
- }
- Py_RETURN_FALSE;
- }
- result = _bytes_tailmatch(self, subobj, start, end, -1);
- if (result == -1) {
- if (PyErr_ExceptionMatches(PyExc_TypeError))
- PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
- "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
- return NULL;
- }
- else
- return PyBool_FromLong(result);
+ return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
}
-
-PyDoc_STRVAR(endswith__doc__,
-"B.endswith(suffix[, start[, end]]) -> bool\n\
-\n\
-Return True if B ends with the specified suffix, False otherwise.\n\
-With optional start, test B beginning at that position.\n\
-With optional end, stop comparing B at that position.\n\
-suffix can also be a tuple of bytes to try.");
-
static PyObject *
bytes_endswith(PyBytesObject *self, PyObject *args)
{
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *subobj;
- int result;
-
- if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
- return NULL;
- if (PyTuple_Check(subobj)) {
- Py_ssize_t i;
- for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
- result = _bytes_tailmatch(self,
- PyTuple_GET_ITEM(subobj, i),
- start, end, +1);
- if (result == -1)
- return NULL;
- else if (result) {
- Py_RETURN_TRUE;
- }
- }
- Py_RETURN_FALSE;
- }
- result = _bytes_tailmatch(self, subobj, start, end, +1);
- if (result == -1) {
- if (PyErr_ExceptionMatches(PyExc_TypeError))
- PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
- "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
- return NULL;
- }
- else
- return PyBool_FromLong(result);
+ return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
}
@@ -2946,9 +2257,9 @@ Decode the bytes using the codec registered for encoding.
[clinic start generated code]*/
static PyObject *
-bytes_decode_impl(PyBytesObject*self, const char *encoding,
+bytes_decode_impl(PyBytesObject *self, const char *encoding,
const char *errors)
-/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
+/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
{
return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
}
@@ -2966,8 +2277,8 @@ true.
[clinic start generated code]*/
static PyObject *
-bytes_splitlines_impl(PyBytesObject*self, int keepends)
-/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
+bytes_splitlines_impl(PyBytesObject *self, int keepends)
+/*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
{
return stringlib_splitlines(
(PyObject*) self, PyBytes_AS_STRING(self),
@@ -2975,22 +2286,6 @@ bytes_splitlines_impl(PyBytesObject*self, int keepends)
);
}
-static int
-hex_digit_to_int(Py_UCS4 c)
-{
- if (c >= 128)
- return -1;
- if (Py_ISDIGIT(c))
- return c - '0';
- else {
- if (Py_ISUPPER(c))
- c = Py_TOLOWER(c);
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- }
- return -1;
-}
-
/*[clinic input]
@classmethod
bytes.fromhex
@@ -3008,47 +2303,83 @@ static PyObject *
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
{
- PyObject *newstring;
+ return _PyBytes_FromHex(string, 0);
+}
+
+PyObject*
+_PyBytes_FromHex(PyObject *string, int use_bytearray)
+{
char *buf;
- Py_ssize_t hexlen, byteslen, i, j;
- int top, bot;
- void *data;
- unsigned int kind;
+ Py_ssize_t hexlen, invalid_char;
+ unsigned int top, bot;
+ Py_UCS1 *str, *end;
+ _PyBytesWriter writer;
+
+ _PyBytesWriter_Init(&writer);
+ writer.use_bytearray = use_bytearray;
assert(PyUnicode_Check(string));
if (PyUnicode_READY(string))
return NULL;
- kind = PyUnicode_KIND(string);
- data = PyUnicode_DATA(string);
hexlen = PyUnicode_GET_LENGTH(string);
- byteslen = hexlen/2; /* This overestimates if there are spaces */
- newstring = PyBytes_FromStringAndSize(NULL, byteslen);
- if (!newstring)
+ if (!PyUnicode_IS_ASCII(string)) {
+ void *data = PyUnicode_DATA(string);
+ unsigned int kind = PyUnicode_KIND(string);
+ Py_ssize_t i;
+
+ /* search for the first non-ASCII character */
+ for (i = 0; i < hexlen; i++) {
+ if (PyUnicode_READ(kind, data, i) >= 128)
+ break;
+ }
+ invalid_char = i;
+ goto error;
+ }
+
+ assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
+ str = PyUnicode_1BYTE_DATA(string);
+
+ /* This overestimates if there are spaces */
+ buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
+ if (buf == NULL)
return NULL;
- buf = PyBytes_AS_STRING(newstring);
- for (i = j = 0; i < hexlen; i += 2) {
+
+ end = str + hexlen;
+ while (str < end) {
/* skip over spaces in the input */
- while (PyUnicode_READ(kind, data, i) == ' ')
- i++;
- if (i >= hexlen)
- break;
- top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
- bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
- if (top == -1 || bot == -1) {
- PyErr_Format(PyExc_ValueError,
- "non-hexadecimal number found in "
- "fromhex() arg at position %zd", i);
+ if (*str == ' ') {
+ do {
+ str++;
+ } while (*str == ' ');
+ if (str >= end)
+ break;
+ }
+
+ top = _PyLong_DigitValue[*str];
+ if (top >= 16) {
+ invalid_char = str - PyUnicode_1BYTE_DATA(string);
+ goto error;
+ }
+ str++;
+
+ bot = _PyLong_DigitValue[*str];
+ if (bot >= 16) {
+ invalid_char = str - PyUnicode_1BYTE_DATA(string);
goto error;
}
- buf[j++] = (top << 4) + bot;
+ str++;
+
+ *buf++ = (unsigned char)((top << 4) + bot);
}
- if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
- goto error;
- return newstring;
+
+ return _PyBytesWriter_Finish(&writer, buf);
error:
- Py_XDECREF(newstring);
+ PyErr_Format(PyExc_ValueError,
+ "non-hexadecimal number found in "
+ "fromhex() arg at position %zd", invalid_char);
+ _PyBytesWriter_Dealloc(&writer);
return NULL;
}
@@ -3078,17 +2409,20 @@ bytes_methods[] = {
{"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
{"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
_Py_capitalize__doc__},
- {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
- {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
+ {"center", (PyCFunction)stringlib_center, METH_VARARGS,
+ _Py_center__doc__},
+ {"count", (PyCFunction)bytes_count, METH_VARARGS,
+ _Py_count__doc__},
BYTES_DECODE_METHODDEF
{"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
- endswith__doc__},
+ _Py_endswith__doc__},
{"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
- expandtabs__doc__},
- {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
+ _Py_expandtabs__doc__},
+ {"find", (PyCFunction)bytes_find, METH_VARARGS,
+ _Py_find__doc__},
BYTES_FROMHEX_METHODDEF
{"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
- {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
+ {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
{"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
_Py_isalnum__doc__},
{"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
@@ -3104,38 +2438,40 @@ bytes_methods[] = {
{"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
_Py_isupper__doc__},
BYTES_JOIN_METHODDEF
- {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
+ {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
{"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
BYTES_LSTRIP_METHODDEF
BYTES_MAKETRANS_METHODDEF
BYTES_PARTITION_METHODDEF
BYTES_REPLACE_METHODDEF
- {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
- {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
- {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
+ {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
+ {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
+ {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
BYTES_RPARTITION_METHODDEF
BYTES_RSPLIT_METHODDEF
BYTES_RSTRIP_METHODDEF
BYTES_SPLIT_METHODDEF
BYTES_SPLITLINES_METHODDEF
{"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
- startswith__doc__},
+ _Py_startswith__doc__},
BYTES_STRIP_METHODDEF
{"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
{"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
BYTES_TRANSLATE_METHODDEF
{"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
- {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
+ {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
{NULL, NULL} /* sentinel */
};
static PyObject *
-bytes_mod(PyObject *v, PyObject *w)
+bytes_mod(PyObject *self, PyObject *arg)
{
- if (!PyBytes_Check(v))
+ if (!PyBytes_Check(self)) {
Py_RETURN_NOTIMPLEMENTED;
- return _PyBytes_Format(v, w);
+ }
+ return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
+ arg, 0);
}
static PyNumberMethods bytes_as_number = {
@@ -3244,108 +2580,93 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return PyBytes_FromObject(x);
}
-PyObject *
-PyBytes_FromObject(PyObject *x)
+static PyObject*
+_PyBytes_FromBuffer(PyObject *x)
{
- PyObject *new, *it;
- Py_ssize_t i, size;
+ PyObject *new;
+ Py_buffer view;
- if (x == NULL) {
- PyErr_BadInternalCall();
+ if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
return NULL;
- }
- if (PyBytes_CheckExact(x)) {
- Py_INCREF(x);
- return x;
- }
+ new = PyBytes_FromStringAndSize(NULL, view.len);
+ if (!new)
+ goto fail;
+ if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
+ &view, view.len, 'C') < 0)
+ goto fail;
+ PyBuffer_Release(&view);
+ return new;
- /* Use the modern buffer interface */
- if (PyObject_CheckBuffer(x)) {
- Py_buffer view;
- if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
- return NULL;
- new = PyBytes_FromStringAndSize(NULL, view.len);
- if (!new)
- goto fail;
- if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
- &view, view.len, 'C') < 0)
- goto fail;
- PyBuffer_Release(&view);
- return new;
- fail:
- Py_XDECREF(new);
- PyBuffer_Release(&view);
- return NULL;
- }
- if (PyUnicode_Check(x)) {
- PyErr_SetString(PyExc_TypeError,
- "cannot convert unicode object to bytes");
- return NULL;
- }
+fail:
+ Py_XDECREF(new);
+ PyBuffer_Release(&view);
+ return NULL;
+}
- if (PyList_CheckExact(x)) {
- new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
- if (new == NULL)
- return NULL;
- for (i = 0; i < Py_SIZE(x); i++) {
- Py_ssize_t value = PyNumber_AsSsize_t(
- PyList_GET_ITEM(x, i), PyExc_ValueError);
- if (value == -1 && PyErr_Occurred()) {
- Py_DECREF(new);
- return NULL;
- }
- if (value < 0 || value >= 256) {
- PyErr_SetString(PyExc_ValueError,
- "bytes must be in range(0, 256)");
- Py_DECREF(new);
- return NULL;
- }
- ((PyBytesObject *)new)->ob_sval[i] = (char) value;
- }
- return new;
- }
- if (PyTuple_CheckExact(x)) {
- new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
- if (new == NULL)
- return NULL;
- for (i = 0; i < Py_SIZE(x); i++) {
- Py_ssize_t value = PyNumber_AsSsize_t(
- PyTuple_GET_ITEM(x, i), PyExc_ValueError);
- if (value == -1 && PyErr_Occurred()) {
- Py_DECREF(new);
- return NULL;
- }
- if (value < 0 || value >= 256) {
- PyErr_SetString(PyExc_ValueError,
- "bytes must be in range(0, 256)");
- Py_DECREF(new);
- return NULL;
- }
- ((PyBytesObject *)new)->ob_sval[i] = (char) value;
- }
- return new;
- }
+#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
+ do { \
+ PyObject *bytes; \
+ Py_ssize_t i; \
+ Py_ssize_t value; \
+ char *str; \
+ PyObject *item; \
+ \
+ bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
+ if (bytes == NULL) \
+ return NULL; \
+ str = ((PyBytesObject *)bytes)->ob_sval; \
+ \
+ for (i = 0; i < Py_SIZE(x); i++) { \
+ item = GET_ITEM((x), i); \
+ value = PyNumber_AsSsize_t(item, PyExc_ValueError); \
+ if (value == -1 && PyErr_Occurred()) \
+ goto error; \
+ \
+ if (value < 0 || value >= 256) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "bytes must be in range(0, 256)"); \
+ goto error; \
+ } \
+ *str++ = (char) value; \
+ } \
+ return bytes; \
+ \
+ error: \
+ Py_DECREF(bytes); \
+ return NULL; \
+ } while (0)
+
+static PyObject*
+_PyBytes_FromList(PyObject *x)
+{
+ _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
+}
+
+static PyObject*
+_PyBytes_FromTuple(PyObject *x)
+{
+ _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
+}
+
+static PyObject *
+_PyBytes_FromIterator(PyObject *it, PyObject *x)
+{
+ char *str;
+ Py_ssize_t i, size;
+ _PyBytesWriter writer;
/* For iterator version, create a string object and resize as needed */
size = PyObject_LengthHint(x, 64);
if (size == -1 && PyErr_Occurred())
return NULL;
- /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
- returning a shared empty bytes string. This required because we
- want to call _PyBytes_Resize() the returned object, which we can
- only do on bytes objects with refcount == 1. */
- if (size == 0)
- size = 1;
- new = PyBytes_FromStringAndSize(NULL, size);
- if (new == NULL)
- return NULL;
- assert(Py_REFCNT(new) == 1);
- /* Get the iterator */
- it = PyObject_GetIter(x);
- if (it == NULL)
- goto error;
+ _PyBytesWriter_Init(&writer);
+ str = _PyBytesWriter_Alloc(&writer, size);
+ if (str == NULL)
+ return NULL;
+ writer.overallocate = 1;
+ size = writer.allocated;
/* Run the iterator to exhaustion */
for (i = 0; ; i++) {
@@ -3375,21 +2696,58 @@ PyBytes_FromObject(PyObject *x)
/* Append the byte */
if (i >= size) {
- size = 2 * size + 1;
- if (_PyBytes_Resize(&new, size) < 0)
- goto error;
+ str = _PyBytesWriter_Resize(&writer, str, size+1);
+ if (str == NULL)
+ return NULL;
+ size = writer.allocated;
}
- ((PyBytesObject *)new)->ob_sval[i] = (char) value;
+ *str++ = (char) value;
}
- _PyBytes_Resize(&new, i);
- /* Clean up and return success */
- Py_DECREF(it);
- return new;
+ return _PyBytesWriter_Finish(&writer, str);
error:
- Py_XDECREF(it);
- Py_XDECREF(new);
+ _PyBytesWriter_Dealloc(&writer);
+ return NULL;
+}
+
+PyObject *
+PyBytes_FromObject(PyObject *x)
+{
+ PyObject *it, *result;
+
+ if (x == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ if (PyBytes_CheckExact(x)) {
+ Py_INCREF(x);
+ return x;
+ }
+
+ /* Use the modern buffer interface */
+ if (PyObject_CheckBuffer(x))
+ return _PyBytes_FromBuffer(x);
+
+ if (PyList_CheckExact(x))
+ return _PyBytes_FromList(x);
+
+ if (PyTuple_CheckExact(x))
+ return _PyBytes_FromTuple(x);
+
+ if (!PyUnicode_Check(x)) {
+ it = PyObject_GetIter(x);
+ if (it != NULL) {
+ result = _PyBytes_FromIterator(it, x);
+ Py_DECREF(it);
+ return result;
+ }
+ }
+
+ PyErr_Format(PyExc_TypeError,
+ "cannot convert '%.200s' object to bytes",
+ x->ob_type->tp_name);
return NULL;
}
@@ -3740,3 +3098,282 @@ bytes_iter(PyObject *seq)
_PyObject_GC_TRACK(it);
return (PyObject *)it;
}
+
+
+/* _PyBytesWriter API */
+
+#ifdef MS_WINDOWS
+ /* On Windows, overallocate by 50% is the best factor */
+# define OVERALLOCATE_FACTOR 2
+#else
+ /* On Linux, overallocate by 25% is the best factor */
+# define OVERALLOCATE_FACTOR 4
+#endif
+
+void
+_PyBytesWriter_Init(_PyBytesWriter *writer)
+{
+ /* Set all attributes before small_buffer to 0 */
+ memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
+#ifdef Py_DEBUG
+ memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
+#endif
+}
+
+void
+_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
+{
+ Py_CLEAR(writer->buffer);
+}
+
+Py_LOCAL_INLINE(char*)
+_PyBytesWriter_AsString(_PyBytesWriter *writer)
+{
+ if (writer->use_small_buffer) {
+ assert(writer->buffer == NULL);
+ return writer->small_buffer;
+ }
+ else if (writer->use_bytearray) {
+ assert(writer->buffer != NULL);
+ return PyByteArray_AS_STRING(writer->buffer);
+ }
+ else {
+ assert(writer->buffer != NULL);
+ return PyBytes_AS_STRING(writer->buffer);
+ }
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
+{
+ char *start = _PyBytesWriter_AsString(writer);
+ assert(str != NULL);
+ assert(str >= start);
+ assert(str - start <= writer->allocated);
+ return str - start;
+}
+
+Py_LOCAL_INLINE(void)
+_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
+{
+#ifdef Py_DEBUG
+ char *start, *end;
+
+ if (writer->use_small_buffer) {
+ assert(writer->buffer == NULL);
+ }
+ else {
+ assert(writer->buffer != NULL);
+ if (writer->use_bytearray)
+ assert(PyByteArray_CheckExact(writer->buffer));
+ else
+ assert(PyBytes_CheckExact(writer->buffer));
+ assert(Py_REFCNT(writer->buffer) == 1);
+ }
+
+ if (writer->use_bytearray) {
+ /* bytearray has its own overallocation algorithm,
+ writer overallocation must be disabled */
+ assert(!writer->overallocate);
+ }
+
+ assert(0 <= writer->allocated);
+ assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
+ /* the last byte must always be null */
+ start = _PyBytesWriter_AsString(writer);
+ assert(start[writer->allocated] == 0);
+
+ end = start + writer->allocated;
+ assert(str != NULL);
+ assert(start <= str && str <= end);
+#endif
+}
+
+void*
+_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
+{
+ Py_ssize_t allocated, pos;
+
+ _PyBytesWriter_CheckConsistency(writer, str);
+ assert(writer->allocated < size);
+
+ allocated = size;
+ if (writer->overallocate
+ && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
+ /* overallocate to limit the number of realloc() */
+ allocated += allocated / OVERALLOCATE_FACTOR;
+ }
+
+ pos = _PyBytesWriter_GetSize(writer, str);
+ if (!writer->use_small_buffer) {
+ if (writer->use_bytearray) {
+ if (PyByteArray_Resize(writer->buffer, allocated))
+ goto error;
+ /* writer->allocated can be smaller than writer->buffer->ob_alloc,
+ but we cannot use ob_alloc because bytes may need to be moved
+ to use the whole buffer. bytearray uses an internal optimization
+ to avoid moving or copying bytes when bytes are removed at the
+ beginning (ex: del bytearray[:1]). */
+ }
+ else {
+ if (_PyBytes_Resize(&writer->buffer, allocated))
+ goto error;
+ }
+ }
+ else {
+ /* convert from stack buffer to bytes object buffer */
+ assert(writer->buffer == NULL);
+
+ if (writer->use_bytearray)
+ writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
+ else
+ writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
+ if (writer->buffer == NULL)
+ goto error;
+
+ if (pos != 0) {
+ char *dest;
+ if (writer->use_bytearray)
+ dest = PyByteArray_AS_STRING(writer->buffer);
+ else
+ dest = PyBytes_AS_STRING(writer->buffer);
+ Py_MEMCPY(dest,
+ writer->small_buffer,
+ pos);
+ }
+
+ writer->use_small_buffer = 0;
+#ifdef Py_DEBUG
+ memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
+#endif
+ }
+ writer->allocated = allocated;
+
+ str = _PyBytesWriter_AsString(writer) + pos;
+ _PyBytesWriter_CheckConsistency(writer, str);
+ return str;
+
+error:
+ _PyBytesWriter_Dealloc(writer);
+ return NULL;
+}
+
+void*
+_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
+{
+ Py_ssize_t new_min_size;
+
+ _PyBytesWriter_CheckConsistency(writer, str);
+ assert(size >= 0);
+
+ if (size == 0) {
+ /* nothing to do */
+ return str;
+ }
+
+ if (writer->min_size > PY_SSIZE_T_MAX - size) {
+ PyErr_NoMemory();
+ _PyBytesWriter_Dealloc(writer);
+ return NULL;
+ }
+ new_min_size = writer->min_size + size;
+
+ if (new_min_size > writer->allocated)
+ str = _PyBytesWriter_Resize(writer, str, new_min_size);
+
+ writer->min_size = new_min_size;
+ return str;
+}
+
+/* Allocate the buffer to write size bytes.
+ Return the pointer to the beginning of buffer data.
+ Raise an exception and return NULL on error. */
+void*
+_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
+{
+ /* ensure that _PyBytesWriter_Alloc() is only called once */
+ assert(writer->min_size == 0 && writer->buffer == NULL);
+ assert(size >= 0);
+
+ writer->use_small_buffer = 1;
+#ifdef Py_DEBUG
+ writer->allocated = sizeof(writer->small_buffer) - 1;
+ /* In debug mode, don't use the full small buffer because it is less
+ efficient than bytes and bytearray objects to detect buffer underflow
+ and buffer overflow. Use 10 bytes of the small buffer to test also
+ code using the smaller buffer in debug mode.
+
+ Don't modify the _PyBytesWriter structure (use a shorter small buffer)
+ in debug mode to also be able to detect stack overflow when running
+ tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
+ if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
+ stack overflow. */
+ writer->allocated = Py_MIN(writer->allocated, 10);
+ /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
+ to detect buffer overflow */
+ writer->small_buffer[writer->allocated] = 0;
+#else
+ writer->allocated = sizeof(writer->small_buffer);
+#endif
+ return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
+}
+
+PyObject *
+_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
+{
+ Py_ssize_t size;
+ PyObject *result;
+
+ _PyBytesWriter_CheckConsistency(writer, str);
+
+ size = _PyBytesWriter_GetSize(writer, str);
+ if (size == 0 && !writer->use_bytearray) {
+ Py_CLEAR(writer->buffer);
+ /* Get the empty byte string singleton */
+ result = PyBytes_FromStringAndSize(NULL, 0);
+ }
+ else if (writer->use_small_buffer) {
+ if (writer->use_bytearray) {
+ result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
+ }
+ else {
+ result = PyBytes_FromStringAndSize(writer->small_buffer, size);
+ }
+ }
+ else {
+ result = writer->buffer;
+ writer->buffer = NULL;
+
+ if (size != writer->allocated) {
+ if (writer->use_bytearray) {
+ if (PyByteArray_Resize(result, size)) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ }
+ else {
+ if (_PyBytes_Resize(&result, size)) {
+ assert(result == NULL);
+ return NULL;
+ }
+ }
+ }
+ }
+ return result;
+}
+
+void*
+_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
+ const void *bytes, Py_ssize_t size)
+{
+ char *str = (char *)ptr;
+
+ str = _PyBytesWriter_Prepare(writer, str, size);
+ if (str == NULL)
+ return NULL;
+
+ Py_MEMCPY(str, bytes, size);
+ str += size;
+
+ return str;
+}
diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h
index 5a1a5e9..95ce817 100644
--- a/Objects/clinic/bytesobject.c.h
+++ b/Objects/clinic/bytesobject.c.h
@@ -20,10 +20,10 @@ PyDoc_STRVAR(bytes_split__doc__,
{"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
static PyObject *
-bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
+bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit);
static PyObject *
-bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
+bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static char *_keywords[] = {"sep", "maxsplit", NULL};
@@ -133,10 +133,10 @@ PyDoc_STRVAR(bytes_rsplit__doc__,
{"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
static PyObject *
-bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
+bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit);
static PyObject *
-bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
+bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static char *_keywords[] = {"sep", "maxsplit", NULL};
@@ -359,11 +359,11 @@ PyDoc_STRVAR(bytes_replace__doc__,
{"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
static PyObject *
-bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
+bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Py_ssize_t count);
static PyObject *
-bytes_replace(PyBytesObject*self, PyObject *args)
+bytes_replace(PyBytesObject *self, PyObject *args)
{
PyObject *return_value = NULL;
Py_buffer old = {NULL, NULL};
@@ -405,11 +405,11 @@ PyDoc_STRVAR(bytes_decode__doc__,
{"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
static PyObject *
-bytes_decode_impl(PyBytesObject*self, const char *encoding,
+bytes_decode_impl(PyBytesObject *self, const char *encoding,
const char *errors);
static PyObject *
-bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
+bytes_decode(PyBytesObject *self, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static char *_keywords[] = {"encoding", "errors", NULL};
@@ -438,10 +438,10 @@ PyDoc_STRVAR(bytes_splitlines__doc__,
{"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
static PyObject *
-bytes_splitlines_impl(PyBytesObject*self, int keepends);
+bytes_splitlines_impl(PyBytesObject *self, int keepends);
static PyObject *
-bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
+bytes_splitlines(PyBytesObject *self, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static char *_keywords[] = {"keepends", NULL};
@@ -484,4 +484,4 @@ bytes_fromhex(PyTypeObject *type, PyObject *arg)
exit:
return return_value;
}
-/*[clinic end generated code: output=bd0ce8f25d7e18f4 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=d0e9f5a1c0682910 input=a9049054013a1b77]*/
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 964ae62..f089f75 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -694,7 +694,8 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq)
addr += *p++;
if (addr > addrq)
break;
- line += *p++;
+ line += (signed char)*p;
+ p++;
}
return line;
}
@@ -729,17 +730,19 @@ _PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds)
if (addr + *p > lasti)
break;
addr += *p++;
- if (*p)
+ if ((signed char)*p)
bounds->ap_lower = addr;
- line += *p++;
+ line += (signed char)*p;
+ p++;
--size;
}
if (size > 0) {
while (--size >= 0) {
addr += *p++;
- if (*p++)
+ if ((signed char)*p)
break;
+ p++;
}
bounds->ap_upper = addr;
}
diff --git a/Objects/descrobject.c b/Objects/descrobject.c
index da68e3b..4bc73b9 100644
--- a/Objects/descrobject.c
+++ b/Objects/descrobject.c
@@ -22,7 +22,7 @@ descr_name(PyDescrObject *descr)
}
static PyObject *
-descr_repr(PyDescrObject *descr, char *format)
+descr_repr(PyDescrObject *descr, const char *format)
{
PyObject *name = NULL;
if (descr->d_name != NULL && PyUnicode_Check(descr->d_name))
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index d774586..31c45ef 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -324,7 +324,7 @@ static PyDictKeysObject *new_keys_object(Py_ssize_t size)
assert(size >= PyDict_MINSIZE_SPLIT);
assert(IS_POWER_OF_2(size));
- dk = PyMem_MALLOC(sizeof(PyDictKeysObject) +
+ dk = PyObject_MALLOC(sizeof(PyDictKeysObject) +
sizeof(PyDictKeyEntry) * (size-1));
if (dk == NULL) {
PyErr_NoMemory();
@@ -353,7 +353,7 @@ free_keys_object(PyDictKeysObject *keys)
Py_XDECREF(entries[i].me_key);
Py_XDECREF(entries[i].me_value);
}
- PyMem_FREE(keys);
+ PyObject_FREE(keys);
}
#define new_values(size) PyMem_NEW(PyObject *, size)
@@ -964,7 +964,7 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
}
}
assert(oldkeys->dk_refcnt == 1);
- DK_DEBUG_DECREF PyMem_FREE(oldkeys);
+ DK_DEBUG_DECREF PyObject_FREE(oldkeys);
}
return 0;
}
@@ -1163,39 +1163,42 @@ _PyDict_GetItemIdWithError(PyObject *dp, struct _Py_Identifier *key)
return PyDict_GetItemWithError(dp, kv);
}
-/* Fast version of global value lookup.
+/* Fast version of global value lookup (LOAD_GLOBAL).
* Lookup in globals, then builtins.
+ *
+ * Raise an exception and return NULL if an error occurred (ex: computing the
+ * key hash failed, key comparison failed, ...). Return NULL if the key doesn't
+ * exist. Return the value if the key exists.
*/
PyObject *
_PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
{
- PyObject *x;
- if (PyUnicode_CheckExact(key)) {
- PyObject **value_addr;
- Py_hash_t hash = ((PyASCIIObject *)key)->hash;
- if (hash != -1) {
- PyDictKeyEntry *e;
- e = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr);
- if (e == NULL) {
- return NULL;
- }
- x = *value_addr;
- if (x != NULL)
- return x;
- e = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr);
- if (e == NULL) {
- return NULL;
- }
- x = *value_addr;
- return x;
- }
+ Py_hash_t hash;
+ PyDictKeyEntry *entry;
+ PyObject **value_addr;
+ PyObject *value;
+
+ if (!PyUnicode_CheckExact(key) ||
+ (hash = ((PyASCIIObject *) key)->hash) == -1)
+ {
+ hash = PyObject_Hash(key);
+ if (hash == -1)
+ return NULL;
}
- x = PyDict_GetItemWithError((PyObject *)globals, key);
- if (x != NULL)
- return x;
- if (PyErr_Occurred())
+
+ /* namespace 1: globals */
+ entry = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr);
+ if (entry == NULL)
return NULL;
- return PyDict_GetItemWithError((PyObject *)builtins, key);
+ value = *value_addr;
+ if (value != NULL)
+ return value;
+
+ /* namespace 2: builtins */
+ entry = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr);
+ if (entry == NULL)
+ return NULL;
+ return *value_addr;
}
/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
@@ -1920,7 +1923,7 @@ dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value)
}
static int
-dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname)
+dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, const char *methname)
{
PyObject *arg = NULL;
int result = 0;
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index d03aada..2c7688c 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -59,15 +59,11 @@ BaseException_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
static int
BaseException_init(PyBaseExceptionObject *self, PyObject *args, PyObject *kwds)
{
- PyObject *tmp;
-
if (!_PyArg_NoKeywords(Py_TYPE(self)->tp_name, kwds))
return -1;
- tmp = self->args;
- self->args = args;
- Py_INCREF(self->args);
- Py_XDECREF(tmp);
+ Py_INCREF(args);
+ Py_XSETREF(self->args, args);
return 0;
}
@@ -328,11 +324,10 @@ PyException_GetCause(PyObject *self) {
/* Steals a reference to cause */
void
-PyException_SetCause(PyObject *self, PyObject *cause) {
- PyObject *old_cause = ((PyBaseExceptionObject *)self)->cause;
- ((PyBaseExceptionObject *)self)->cause = cause;
+PyException_SetCause(PyObject *self, PyObject *cause)
+{
((PyBaseExceptionObject *)self)->suppress_context = 1;
- Py_XDECREF(old_cause);
+ Py_XSETREF(((PyBaseExceptionObject *)self)->cause, cause);
}
PyObject *
@@ -344,10 +339,9 @@ PyException_GetContext(PyObject *self) {
/* Steals a reference to context */
void
-PyException_SetContext(PyObject *self, PyObject *context) {
- PyObject *old_context = ((PyBaseExceptionObject *)self)->context;
- ((PyBaseExceptionObject *)self)->context = context;
- Py_XDECREF(old_context);
+PyException_SetContext(PyObject *self, PyObject *context)
+{
+ Py_XSETREF(((PyBaseExceptionObject *)self)->context, context);
}
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index d92bec3..5b2742a 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -1195,7 +1195,7 @@ Return a hexadecimal representation of a floating-point number.\n\
static PyObject *
float_fromhex(PyObject *cls, PyObject *arg)
{
- PyObject *result_as_float, *result;
+ PyObject *result;
double x;
long exp, top_exp, lsb, key_digit;
char *s, *coeff_start, *s_store, *coeff_end, *exp_start, *s_end;
@@ -1410,11 +1410,10 @@ float_fromhex(PyObject *cls, PyObject *arg)
s++;
if (s != s_end)
goto parse_error;
- result_as_float = Py_BuildValue("(d)", negate ? -x : x);
- if (result_as_float == NULL)
- return NULL;
- result = PyObject_CallObject(cls, result_as_float);
- Py_DECREF(result_as_float);
+ result = PyFloat_FromDouble(negate ? -x : x);
+ if (cls != (PyObject *)&PyFloat_Type && result != NULL) {
+ Py_SETREF(result, PyObject_CallFunctionObjArgs(cls, result, NULL));
+ }
return result;
overflow_error:
@@ -1451,29 +1450,23 @@ float_as_integer_ratio(PyObject *v, PyObject *unused)
int exponent;
int i;
- PyObject *prev;
PyObject *py_exponent = NULL;
PyObject *numerator = NULL;
PyObject *denominator = NULL;
PyObject *result_pair = NULL;
PyNumberMethods *long_methods = PyLong_Type.tp_as_number;
-#define INPLACE_UPDATE(obj, call) \
- prev = obj; \
- obj = call; \
- Py_DECREF(prev); \
-
CONVERT_TO_DOUBLE(v, self);
if (Py_IS_INFINITY(self)) {
- PyErr_SetString(PyExc_OverflowError,
- "Cannot pass infinity to float.as_integer_ratio.");
- return NULL;
+ PyErr_SetString(PyExc_OverflowError,
+ "cannot convert Infinity to integer ratio");
+ return NULL;
}
if (Py_IS_NAN(self)) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot pass NaN to float.as_integer_ratio.");
- return NULL;
+ PyErr_SetString(PyExc_ValueError,
+ "cannot convert NaN to integer ratio");
+ return NULL;
}
PyFPE_START_PROTECT("as_integer_ratio", goto error);
@@ -1489,29 +1482,31 @@ float_as_integer_ratio(PyObject *v, PyObject *unused)
to be truncated by PyLong_FromDouble(). */
numerator = PyLong_FromDouble(float_part);
- if (numerator == NULL) goto error;
+ if (numerator == NULL)
+ goto error;
+ denominator = PyLong_FromLong(1);
+ if (denominator == NULL)
+ goto error;
+ py_exponent = PyLong_FromLong(Py_ABS(exponent));
+ if (py_exponent == NULL)
+ goto error;
/* fold in 2**exponent */
- denominator = PyLong_FromLong(1);
- py_exponent = PyLong_FromLong(labs((long)exponent));
- if (py_exponent == NULL) goto error;
- INPLACE_UPDATE(py_exponent,
- long_methods->nb_lshift(denominator, py_exponent));
- if (py_exponent == NULL) goto error;
if (exponent > 0) {
- INPLACE_UPDATE(numerator,
- long_methods->nb_multiply(numerator, py_exponent));
- if (numerator == NULL) goto error;
+ Py_SETREF(numerator,
+ long_methods->nb_lshift(numerator, py_exponent));
+ if (numerator == NULL)
+ goto error;
}
else {
- Py_DECREF(denominator);
- denominator = py_exponent;
- py_exponent = NULL;
+ Py_SETREF(denominator,
+ long_methods->nb_lshift(denominator, py_exponent));
+ if (denominator == NULL)
+ goto error;
}
result_pair = PyTuple_Pack(2, numerator, denominator);
-#undef INPLACE_UPDATE
error:
Py_XDECREF(py_exponent);
Py_XDECREF(denominator);
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index bdf06db..a4a862a 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -137,7 +137,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
new_lasti = -1;
for (offset = 0; offset < lnotab_len; offset += 2) {
addr += lnotab[offset];
- line += lnotab[offset+1];
+ line += (signed char)lnotab[offset+1];
if (line >= new_lineno) {
new_lasti = addr;
new_lineno = line;
@@ -349,15 +349,11 @@ frame_gettrace(PyFrameObject *f, void *closure)
static int
frame_settrace(PyFrameObject *f, PyObject* v, void *closure)
{
- PyObject* old_value;
-
/* We rely on f_lineno being accurate when f_trace is set. */
f->f_lineno = PyFrame_GetLineNumber(f);
- old_value = f->f_trace;
Py_XINCREF(v);
- f->f_trace = v;
- Py_XDECREF(old_value);
+ Py_XSETREF(f->f_trace, v);
return 0;
}
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index e6c327d..261c16d 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -249,7 +249,6 @@ func_get_code(PyFunctionObject *op)
static int
func_set_code(PyFunctionObject *op, PyObject *value)
{
- PyObject *tmp;
Py_ssize_t nfree, nclosure;
/* Not legal to del f.func_code or to set it to anything
@@ -270,10 +269,8 @@ func_set_code(PyFunctionObject *op, PyObject *value)
nclosure, nfree);
return -1;
}
- tmp = op->func_code;
Py_INCREF(value);
- op->func_code = value;
- Py_DECREF(tmp);
+ Py_XSETREF(op->func_code, value);
return 0;
}
@@ -287,8 +284,6 @@ func_get_name(PyFunctionObject *op)
static int
func_set_name(PyFunctionObject *op, PyObject *value)
{
- PyObject *tmp;
-
/* Not legal to del f.func_name or to set it to anything
* other than a string object. */
if (value == NULL || !PyUnicode_Check(value)) {
@@ -296,10 +291,8 @@ func_set_name(PyFunctionObject *op, PyObject *value)
"__name__ must be set to a string object");
return -1;
}
- tmp = op->func_name;
Py_INCREF(value);
- op->func_name = value;
- Py_DECREF(tmp);
+ Py_XSETREF(op->func_name, value);
return 0;
}
@@ -313,8 +306,6 @@ func_get_qualname(PyFunctionObject *op)
static int
func_set_qualname(PyFunctionObject *op, PyObject *value)
{
- PyObject *tmp;
-
/* Not legal to del f.__qualname__ or to set it to anything
* other than a string object. */
if (value == NULL || !PyUnicode_Check(value)) {
@@ -322,10 +313,8 @@ func_set_qualname(PyFunctionObject *op, PyObject *value)
"__qualname__ must be set to a string object");
return -1;
}
- tmp = op->func_qualname;
Py_INCREF(value);
- op->func_qualname = value;
- Py_DECREF(tmp);
+ Py_XSETREF(op->func_qualname, value);
return 0;
}
@@ -343,8 +332,6 @@ func_get_defaults(PyFunctionObject *op)
static int
func_set_defaults(PyFunctionObject *op, PyObject *value)
{
- PyObject *tmp;
-
/* Legal to del f.func_defaults.
* Can only set func_defaults to NULL or a tuple. */
if (value == Py_None)
@@ -354,10 +341,8 @@ func_set_defaults(PyFunctionObject *op, PyObject *value)
"__defaults__ must be set to a tuple object");
return -1;
}
- tmp = op->func_defaults;
Py_XINCREF(value);
- op->func_defaults = value;
- Py_XDECREF(tmp);
+ Py_XSETREF(op->func_defaults, value);
return 0;
}
@@ -375,8 +360,6 @@ func_get_kwdefaults(PyFunctionObject *op)
static int
func_set_kwdefaults(PyFunctionObject *op, PyObject *value)
{
- PyObject *tmp;
-
if (value == Py_None)
value = NULL;
/* Legal to del f.func_kwdefaults.
@@ -386,10 +369,8 @@ func_set_kwdefaults(PyFunctionObject *op, PyObject *value)
"__kwdefaults__ must be set to a dict object");
return -1;
}
- tmp = op->func_kwdefaults;
Py_XINCREF(value);
- op->func_kwdefaults = value;
- Py_XDECREF(tmp);
+ Py_XSETREF(op->func_kwdefaults, value);
return 0;
}
@@ -408,8 +389,6 @@ func_get_annotations(PyFunctionObject *op)
static int
func_set_annotations(PyFunctionObject *op, PyObject *value)
{
- PyObject *tmp;
-
if (value == Py_None)
value = NULL;
/* Legal to del f.func_annotations.
@@ -420,10 +399,8 @@ func_set_annotations(PyFunctionObject *op, PyObject *value)
"__annotations__ must be set to a dict object");
return -1;
}
- tmp = op->func_annotations;
Py_XINCREF(value);
- op->func_annotations = value;
- Py_XDECREF(tmp);
+ Py_XSETREF(op->func_annotations, value);
return 0;
}
diff --git a/Objects/genobject.c b/Objects/genobject.c
index f74d044..c94a6ed 100644
--- a/Objects/genobject.c
+++ b/Objects/genobject.c
@@ -187,7 +187,7 @@ gen_send_ex(PyGenObject *gen, PyObject *arg, int exc, int closing)
/* Pop the exception before issuing a warning. */
PyErr_Fetch(&exc, &val, &tb);
- if (PyErr_WarnFormat(PyExc_PendingDeprecationWarning, 1,
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"generator '%.50S' raised StopIteration",
gen->gi_qualname)) {
/* Warning was converted to an error. */
@@ -519,8 +519,6 @@ gen_get_name(PyGenObject *op)
static int
gen_set_name(PyGenObject *op, PyObject *value)
{
- PyObject *tmp;
-
/* Not legal to del gen.gi_name or to set it to anything
* other than a string object. */
if (value == NULL || !PyUnicode_Check(value)) {
@@ -528,10 +526,8 @@ gen_set_name(PyGenObject *op, PyObject *value)
"__name__ must be set to a string object");
return -1;
}
- tmp = op->gi_name;
Py_INCREF(value);
- op->gi_name = value;
- Py_DECREF(tmp);
+ Py_XSETREF(op->gi_name, value);
return 0;
}
@@ -545,8 +541,6 @@ gen_get_qualname(PyGenObject *op)
static int
gen_set_qualname(PyGenObject *op, PyObject *value)
{
- PyObject *tmp;
-
/* Not legal to del gen.__qualname__ or to set it to anything
* other than a string object. */
if (value == NULL || !PyUnicode_Check(value)) {
@@ -554,10 +548,8 @@ gen_set_qualname(PyGenObject *op, PyObject *value)
"__qualname__ must be set to a string object");
return -1;
}
- tmp = op->gi_qualname;
Py_INCREF(value);
- op->gi_qualname = value;
- Py_DECREF(tmp);
+ Py_XSETREF(op->gi_qualname, value);
return 0;
}
diff --git a/Objects/listobject.c b/Objects/listobject.c
index d688179..6e2d026 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -216,7 +216,6 @@ int
PyList_SetItem(PyObject *op, Py_ssize_t i,
PyObject *newitem)
{
- PyObject *olditem;
PyObject **p;
if (!PyList_Check(op)) {
Py_XDECREF(newitem);
@@ -230,9 +229,7 @@ PyList_SetItem(PyObject *op, Py_ssize_t i,
return -1;
}
p = ((PyListObject *)op) -> ob_item + i;
- olditem = *p;
- *p = newitem;
- Py_XDECREF(olditem);
+ Py_XSETREF(*p, newitem);
return 0;
}
@@ -251,7 +248,7 @@ ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
return -1;
}
- if (list_resize(self, n+1) == -1)
+ if (list_resize(self, n+1) < 0)
return -1;
if (where < 0) {
@@ -291,7 +288,7 @@ app1(PyListObject *self, PyObject *v)
return -1;
}
- if (list_resize(self, n+1) == -1)
+ if (list_resize(self, n+1) < 0)
return -1;
Py_INCREF(v);
@@ -711,7 +708,7 @@ list_inplace_repeat(PyListObject *self, Py_ssize_t n)
return PyErr_NoMemory();
}
- if (list_resize(self, size*n) == -1)
+ if (list_resize(self, size*n) < 0)
return NULL;
p = size;
@@ -730,7 +727,6 @@ list_inplace_repeat(PyListObject *self, Py_ssize_t n)
static int
list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v)
{
- PyObject *old_value;
if (i < 0 || i >= Py_SIZE(a)) {
PyErr_SetString(PyExc_IndexError,
"list assignment index out of range");
@@ -739,9 +735,7 @@ list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v)
if (v == NULL)
return list_ass_slice(a, i, i+1, v);
Py_INCREF(v);
- old_value = a->ob_item[i];
- a->ob_item[i] = v;
- Py_DECREF(old_value);
+ Py_SETREF(a->ob_item[i], v);
return 0;
}
@@ -804,7 +798,7 @@ listextend(PyListObject *self, PyObject *b)
Py_RETURN_NONE;
}
m = Py_SIZE(self);
- if (list_resize(self, m + n) == -1) {
+ if (list_resize(self, m + n) < 0) {
Py_DECREF(b);
return NULL;
}
@@ -832,7 +826,7 @@ listextend(PyListObject *self, PyObject *b)
/* Guess a result list size. */
n = PyObject_LengthHint(b, 8);
- if (n == -1) {
+ if (n < 0) {
Py_DECREF(it);
return NULL;
}
@@ -840,7 +834,7 @@ listextend(PyListObject *self, PyObject *b)
mn = m + n;
if (mn >= m) {
/* Make room. */
- if (list_resize(self, mn) == -1)
+ if (list_resize(self, mn) < 0)
goto error;
/* Make the list sane again. */
Py_SIZE(self) = m;
diff --git a/Objects/listsort.txt b/Objects/listsort.txt
index 832e4f2..fef982f 100644
--- a/Objects/listsort.txt
+++ b/Objects/listsort.txt
@@ -486,7 +486,7 @@ sub-run, yet finding such very efficiently when they exist.
I first learned about the galloping strategy in a related context; see:
"Adaptive Set Intersections, Unions, and Differences" (2000)
- Erik D. Demaine, Alejandro López-Ortiz, J. Ian Munro
+ Erik D. Demaine, Alejandro López-Ortiz, J. Ian Munro
and its followup(s). An earlier paper called the same strategy
"exponential search":
diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt
index d247edd..5153757 100644
--- a/Objects/lnotab_notes.txt
+++ b/Objects/lnotab_notes.txt
@@ -12,42 +12,47 @@ pairs. The details are important and delicate, best illustrated by example:
0 1
6 2
50 7
- 350 307
- 361 308
+ 350 207
+ 361 208
Instead of storing these numbers literally, we compress the list by storing only
-the increments from one row to the next. Conceptually, the stored list might
+the difference from one row to the next. Conceptually, the stored list might
look like:
- 0, 1, 6, 1, 44, 5, 300, 300, 11, 1
+ 0, 1, 6, 1, 44, 5, 300, 200, 11, 1
-The above doesn't really work, but it's a start. Note that an unsigned byte
-can't hold negative values, or values larger than 255, and the above example
-contains two such values. So we make two tweaks:
+The above doesn't really work, but it's a start. An unsigned byte (byte code
+offset) can't hold negative values, or values larger than 255, a signed byte
+(line number) can't hold values larger than 127 or less than -128, and the
+above example contains two such values. So we make two tweaks:
- (a) there's a deep assumption that byte code offsets and their corresponding
- line #s both increase monotonically, and
- (b) if at least one column jumps by more than 255 from one row to the next,
- more than one pair is written to the table. In case #b, there's no way to know
- from looking at the table later how many were written. That's the delicate
- part. A user of co_lnotab desiring to find the source line number
- corresponding to a bytecode address A should do something like this
+ (a) there's a deep assumption that byte code offsets increase monotonically,
+ and
+ (b) if byte code offset jumps by more than 255 from one row to the next, or if
+ source code line number jumps by more than 127 or less than -128 from one row
+ to the next, more than one pair is written to the table. In case #b,
+ there's no way to know from looking at the table later how many were written.
+ That's the delicate part. A user of co_lnotab desiring to find the source
+ line number corresponding to a bytecode address A should do something like
+ this:
lineno = addr = 0
for addr_incr, line_incr in co_lnotab:
addr += addr_incr
if addr > A:
return lineno
+ if line_incr >= 0x80:
+ line_incr -= 0x100
lineno += line_incr
(In C, this is implemented by PyCode_Addr2Line().) In order for this to work,
when the addr field increments by more than 255, the line # increment in each
pair generated must be 0 until the remaining addr increment is < 256. So, in
the example above, assemble_lnotab in compile.c should not (as was actually done
-until 2.2) expand 300, 300 to
+until 2.2) expand 300, 200 to
255, 255, 45, 45,
but to
- 255, 0, 45, 255, 0, 45.
+ 255, 0, 45, 128, 0, 72.
The above is sufficient to reconstruct line numbers for tracebacks, but not for
line tracing. Tracing is handled by PyCode_CheckLineNumber() in codeobject.c
@@ -90,16 +95,16 @@ which compiles to this:
6 POP_JUMP_IF_FALSE 17
3 9 LOAD_CONST 1 (1)
- 12 PRINT_ITEM
+ 12 PRINT_ITEM
- 4 13 BREAK_LOOP
+ 4 13 BREAK_LOOP
14 JUMP_ABSOLUTE 3
- >> 17 POP_BLOCK
+ >> 17 POP_BLOCK
6 18 LOAD_CONST 2 (2)
- 21 PRINT_ITEM
+ 21 PRINT_ITEM
>> 22 LOAD_CONST 0 (None)
- 25 RETURN_VALUE
+ 25 RETURN_VALUE
If 'a' is false, execution will jump to the POP_BLOCK instruction at offset 17
and the co_lnotab will claim that execution has moved to line 4, which is wrong.
diff --git a/Objects/longobject.c b/Objects/longobject.c
index f68d15e..14d2974 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1582,10 +1582,12 @@ divrem1(PyLongObject *a, digit n, digit *prem)
static int
long_to_decimal_string_internal(PyObject *aa,
PyObject **p_output,
- _PyUnicodeWriter *writer)
+ _PyUnicodeWriter *writer,
+ _PyBytesWriter *bytes_writer,
+ char **bytes_str)
{
PyLongObject *scratch, *a;
- PyObject *str;
+ PyObject *str = NULL;
Py_ssize_t size, strlen, size_a, i, j;
digit *pout, *pin, rem, tenpow;
int negative;
@@ -1662,7 +1664,13 @@ long_to_decimal_string_internal(PyObject *aa,
return -1;
}
kind = writer->kind;
- str = NULL;
+ }
+ else if (bytes_writer) {
+ *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, strlen);
+ if (*bytes_str == NULL) {
+ Py_DECREF(scratch);
+ return -1;
+ }
}
else {
str = PyUnicode_New(strlen, '9');
@@ -1673,13 +1681,8 @@ long_to_decimal_string_internal(PyObject *aa,
kind = PyUnicode_KIND(str);
}
-#define WRITE_DIGITS(TYPE) \
+#define WRITE_DIGITS(p) \
do { \
- if (writer) \
- p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
- else \
- p = (TYPE*)PyUnicode_DATA(str) + strlen; \
- \
/* pout[0] through pout[size-2] contribute exactly \
_PyLong_DECIMAL_SHIFT digits each */ \
for (i=0; i < size - 1; i++) { \
@@ -1699,6 +1702,16 @@ long_to_decimal_string_internal(PyObject *aa,
/* and sign */ \
if (negative) \
*--p = '-'; \
+ } while (0)
+
+#define WRITE_UNICODE_DIGITS(TYPE) \
+ do { \
+ if (writer) \
+ p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
+ else \
+ p = (TYPE*)PyUnicode_DATA(str) + strlen; \
+ \
+ WRITE_DIGITS(p); \
\
/* check we've counted correctly */ \
if (writer) \
@@ -1708,25 +1721,34 @@ long_to_decimal_string_internal(PyObject *aa,
} while (0)
/* fill the string right-to-left */
- if (kind == PyUnicode_1BYTE_KIND) {
+ if (bytes_writer) {
+ char *p = *bytes_str + strlen;
+ WRITE_DIGITS(p);
+ assert(p == *bytes_str);
+ }
+ else if (kind == PyUnicode_1BYTE_KIND) {
Py_UCS1 *p;
- WRITE_DIGITS(Py_UCS1);
+ WRITE_UNICODE_DIGITS(Py_UCS1);
}
else if (kind == PyUnicode_2BYTE_KIND) {
Py_UCS2 *p;
- WRITE_DIGITS(Py_UCS2);
+ WRITE_UNICODE_DIGITS(Py_UCS2);
}
else {
Py_UCS4 *p;
assert (kind == PyUnicode_4BYTE_KIND);
- WRITE_DIGITS(Py_UCS4);
+ WRITE_UNICODE_DIGITS(Py_UCS4);
}
#undef WRITE_DIGITS
+#undef WRITE_UNICODE_DIGITS
Py_DECREF(scratch);
if (writer) {
writer->pos += strlen;
}
+ else if (bytes_writer) {
+ (*bytes_str) += strlen;
+ }
else {
assert(_PyUnicode_CheckConsistency(str, 1));
*p_output = (PyObject *)str;
@@ -1738,7 +1760,7 @@ static PyObject *
long_to_decimal_string(PyObject *aa)
{
PyObject *v;
- if (long_to_decimal_string_internal(aa, &v, NULL) == -1)
+ if (long_to_decimal_string_internal(aa, &v, NULL, NULL, NULL) == -1)
return NULL;
return v;
}
@@ -1750,10 +1772,11 @@ long_to_decimal_string(PyObject *aa)
static int
long_format_binary(PyObject *aa, int base, int alternate,
- PyObject **p_output, _PyUnicodeWriter *writer)
+ PyObject **p_output, _PyUnicodeWriter *writer,
+ _PyBytesWriter *bytes_writer, char **bytes_str)
{
PyLongObject *a = (PyLongObject *)aa;
- PyObject *v;
+ PyObject *v = NULL;
Py_ssize_t sz;
Py_ssize_t size_a;
enum PyUnicode_Kind kind;
@@ -1810,7 +1833,11 @@ long_format_binary(PyObject *aa, int base, int alternate,
if (_PyUnicodeWriter_Prepare(writer, sz, 'x') == -1)
return -1;
kind = writer->kind;
- v = NULL;
+ }
+ else if (bytes_writer) {
+ *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, sz);
+ if (*bytes_str == NULL)
+ return -1;
}
else {
v = PyUnicode_New(sz, 'x');
@@ -1819,13 +1846,8 @@ long_format_binary(PyObject *aa, int base, int alternate,
kind = PyUnicode_KIND(v);
}
-#define WRITE_DIGITS(TYPE) \
+#define WRITE_DIGITS(p) \
do { \
- if (writer) \
- p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
- else \
- p = (TYPE*)PyUnicode_DATA(v) + sz; \
- \
if (size_a == 0) { \
*--p = '0'; \
} \
@@ -1860,30 +1882,50 @@ long_format_binary(PyObject *aa, int base, int alternate,
} \
if (negative) \
*--p = '-'; \
+ } while (0)
+
+#define WRITE_UNICODE_DIGITS(TYPE) \
+ do { \
+ if (writer) \
+ p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
+ else \
+ p = (TYPE*)PyUnicode_DATA(v) + sz; \
+ \
+ WRITE_DIGITS(p); \
+ \
if (writer) \
assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
else \
assert(p == (TYPE*)PyUnicode_DATA(v)); \
} while (0)
- if (kind == PyUnicode_1BYTE_KIND) {
+ if (bytes_writer) {
+ char *p = *bytes_str + sz;
+ WRITE_DIGITS(p);
+ assert(p == *bytes_str);
+ }
+ else if (kind == PyUnicode_1BYTE_KIND) {
Py_UCS1 *p;
- WRITE_DIGITS(Py_UCS1);
+ WRITE_UNICODE_DIGITS(Py_UCS1);
}
else if (kind == PyUnicode_2BYTE_KIND) {
Py_UCS2 *p;
- WRITE_DIGITS(Py_UCS2);
+ WRITE_UNICODE_DIGITS(Py_UCS2);
}
else {
Py_UCS4 *p;
assert (kind == PyUnicode_4BYTE_KIND);
- WRITE_DIGITS(Py_UCS4);
+ WRITE_UNICODE_DIGITS(Py_UCS4);
}
#undef WRITE_DIGITS
+#undef WRITE_UNICODE_DIGITS
if (writer) {
writer->pos += sz;
}
+ else if (bytes_writer) {
+ (*bytes_str) += sz;
+ }
else {
assert(_PyUnicode_CheckConsistency(v, 1));
*p_output = v;
@@ -1897,9 +1939,9 @@ _PyLong_Format(PyObject *obj, int base)
PyObject *str;
int err;
if (base == 10)
- err = long_to_decimal_string_internal(obj, &str, NULL);
+ err = long_to_decimal_string_internal(obj, &str, NULL, NULL, NULL);
else
- err = long_format_binary(obj, base, 1, &str, NULL);
+ err = long_format_binary(obj, base, 1, &str, NULL, NULL, NULL);
if (err == -1)
return NULL;
return str;
@@ -1911,9 +1953,31 @@ _PyLong_FormatWriter(_PyUnicodeWriter *writer,
int base, int alternate)
{
if (base == 10)
- return long_to_decimal_string_internal(obj, NULL, writer);
+ return long_to_decimal_string_internal(obj, NULL, writer,
+ NULL, NULL);
+ else
+ return long_format_binary(obj, base, alternate, NULL, writer,
+ NULL, NULL);
+}
+
+char*
+_PyLong_FormatBytesWriter(_PyBytesWriter *writer, char *str,
+ PyObject *obj,
+ int base, int alternate)
+{
+ char *str2;
+ int res;
+ str2 = str;
+ if (base == 10)
+ res = long_to_decimal_string_internal(obj, NULL, NULL,
+ writer, &str2);
else
- return long_format_binary(obj, base, alternate, NULL, writer);
+ res = long_format_binary(obj, base, alternate, NULL, NULL,
+ writer, &str2);
+ if (res < 0)
+ return NULL;
+ assert(str2 != NULL);
+ return str2;
}
/* Table of digit values for 8-bit string -> integer conversion.
@@ -2705,6 +2769,13 @@ PyLong_AsDouble(PyObject *v)
PyErr_SetString(PyExc_TypeError, "an integer is required");
return -1.0;
}
+ if (Py_ABS(Py_SIZE(v)) <= 1) {
+ /* Fast path; single digit long (31 bits) will cast safely
+ to double. This improves performance of FP/long operations
+ by 20%.
+ */
+ return (double)MEDIUM_VALUE((PyLongObject *)v);
+ }
x = _PyLong_Frexp((PyLongObject *)v, &exponent);
if ((x == -1.0 && PyErr_Occurred()) || exponent > DBL_MAX_EXP) {
PyErr_SetString(PyExc_OverflowError,
@@ -3431,6 +3502,52 @@ long_mul(PyLongObject *a, PyLongObject *b)
return (PyObject *)z;
}
+/* Fast modulo division for single-digit longs. */
+static PyObject *
+fast_mod(PyLongObject *a, PyLongObject *b)
+{
+ sdigit left = a->ob_digit[0];
+ sdigit right = b->ob_digit[0];
+ sdigit mod;
+
+ assert(Py_ABS(Py_SIZE(a)) == 1);
+ assert(Py_ABS(Py_SIZE(b)) == 1);
+
+ if (Py_SIZE(a) == Py_SIZE(b)) {
+ /* 'a' and 'b' have the same sign. */
+ mod = left % right;
+ }
+ else {
+ /* Either 'a' or 'b' is negative. */
+ mod = right - 1 - (left - 1) % right;
+ }
+
+ return PyLong_FromLong(mod * (sdigit)Py_SIZE(b));
+}
+
+/* Fast floor division for single-digit longs. */
+static PyObject *
+fast_floor_div(PyLongObject *a, PyLongObject *b)
+{
+ sdigit left = a->ob_digit[0];
+ sdigit right = b->ob_digit[0];
+ sdigit div;
+
+ assert(Py_ABS(Py_SIZE(a)) == 1);
+ assert(Py_ABS(Py_SIZE(b)) == 1);
+
+ if (Py_SIZE(a) == Py_SIZE(b)) {
+ /* 'a' and 'b' have the same sign. */
+ div = left / right;
+ }
+ else {
+ /* Either 'a' or 'b' is negative. */
+ div = -1 - (left - 1) / right;
+ }
+
+ return PyLong_FromLong(div);
+}
+
/* The / and % operators are now defined in terms of divmod().
The expression a mod b has the value a - b*floor(a/b).
The long_divrem function gives the remainder after division of
@@ -3458,6 +3575,30 @@ l_divmod(PyLongObject *v, PyLongObject *w,
{
PyLongObject *div, *mod;
+ if (Py_ABS(Py_SIZE(v)) == 1 && Py_ABS(Py_SIZE(w)) == 1) {
+ /* Fast path for single-digit longs */
+ div = NULL;
+ if (pdiv != NULL) {
+ div = (PyLongObject *)fast_floor_div(v, w);
+ if (div == NULL) {
+ return -1;
+ }
+ }
+ if (pmod != NULL) {
+ mod = (PyLongObject *)fast_mod(v, w);
+ if (mod == NULL) {
+ Py_XDECREF(div);
+ return -1;
+ }
+ *pmod = mod;
+ }
+ if (pdiv != NULL) {
+ /* We only want to set `*pdiv` when `*pmod` is
+ set successfully. */
+ *pdiv = div;
+ }
+ return 0;
+ }
if (long_divrem(v, w, &div, &mod) < 0)
return -1;
if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) ||
@@ -3502,6 +3643,11 @@ long_div(PyObject *a, PyObject *b)
PyLongObject *div;
CHECK_BINOP(a, b);
+
+ if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) {
+ return fast_floor_div((PyLongObject*)a, (PyLongObject*)b);
+ }
+
if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, NULL) < 0)
div = NULL;
return (PyObject *)div;
@@ -3777,6 +3923,10 @@ long_mod(PyObject *a, PyObject *b)
CHECK_BINOP(a, b);
+ if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) {
+ return fast_mod((PyLongObject*)a, (PyLongObject*)b);
+ }
+
if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0)
mod = NULL;
return (PyObject *)mod;
diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c
index 10162cb..e355a83 100644
--- a/Objects/memoryobject.c
+++ b/Objects/memoryobject.c
@@ -1133,7 +1133,7 @@ get_native_fmtchar(char *result, const char *fmt)
return -1;
}
-Py_LOCAL_INLINE(char *)
+Py_LOCAL_INLINE(const char *)
get_native_fmtstr(const char *fmt)
{
int at = 0;
@@ -1221,7 +1221,7 @@ cast_to_1D(PyMemoryViewObject *mv, PyObject *format)
goto out;
}
- view->format = get_native_fmtstr(PyBytes_AS_STRING(asciifmt));
+ view->format = (char *)get_native_fmtstr(PyBytes_AS_STRING(asciifmt));
if (view->format == NULL) {
/* NOT_REACHED: get_native_fmtchar() already validates the format. */
PyErr_SetString(PyExc_RuntimeError,
diff --git a/Objects/object.c b/Objects/object.c
index 6fc4df1..cc1b2ff 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -644,7 +644,7 @@ PyObject_Bytes(PyObject *v)
/* Map rich comparison operators to their swapped version, e.g. LT <--> GT */
int _Py_SwappedOp[] = {Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE};
-static char *opstrings[] = {"<", "<=", "==", "!=", ">", ">="};
+static const char * const opstrings[] = {"<", "<=", "==", "!=", ">", ">="};
/* Perform a rich comparison, raising TypeError when the requested comparison
operator is not supported. */
@@ -686,11 +686,10 @@ do_richcompare(PyObject *v, PyObject *w, int op)
res = (v != w) ? Py_True : Py_False;
break;
default:
- /* XXX Special-case None so it doesn't show as NoneType() */
PyErr_Format(PyExc_TypeError,
- "unorderable types: %.100s() %s %.100s()",
- v->ob_type->tp_name,
+ "'%s' not supported between instances of '%.100s' and '%.100s'",
opstrings[op],
+ v->ob_type->tp_name,
w->ob_type->tp_name);
return NULL;
}
@@ -1041,8 +1040,7 @@ _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, PyObject *dict)
name->ob_type->tp_name);
return NULL;
}
- else
- Py_INCREF(name);
+ Py_INCREF(name);
if (tp->tp_dict == NULL) {
if (PyType_Ready(tp) < 0)
@@ -1050,10 +1048,10 @@ _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, PyObject *dict)
}
descr = _PyType_Lookup(tp, name);
- Py_XINCREF(descr);
f = NULL;
if (descr != NULL) {
+ Py_INCREF(descr);
f = descr->ob_type->tp_descr_get;
if (f != NULL && PyDescr_IsData(descr)) {
res = f(descr, obj, (PyObject *)obj->ob_type);
@@ -1073,8 +1071,9 @@ _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, PyObject *dict)
if (tsize < 0)
tsize = -tsize;
size = _PyObject_VAR_SIZE(tp, tsize);
+ assert(size <= PY_SSIZE_T_MAX);
- dictoffset += (long)size;
+ dictoffset += (Py_ssize_t)size;
assert(dictoffset > 0);
assert(dictoffset % SIZEOF_VOID_P == 0);
}
@@ -1142,12 +1141,11 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name,
Py_INCREF(name);
descr = _PyType_Lookup(tp, name);
- Py_XINCREF(descr);
- f = NULL;
if (descr != NULL) {
+ Py_INCREF(descr);
f = descr->ob_type->tp_descr_set;
- if (f != NULL && PyDescr_IsData(descr)) {
+ if (f != NULL) {
res = f(descr, obj, value);
goto done;
}
@@ -1155,40 +1153,32 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name,
if (dict == NULL) {
dictptr = _PyObject_GetDictPtr(obj);
- if (dictptr != NULL) {
- res = _PyObjectDict_SetItem(Py_TYPE(obj), dictptr, name, value);
- if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError))
- PyErr_SetObject(PyExc_AttributeError, name);
+ if (dictptr == NULL) {
+ if (descr == NULL) {
+ PyErr_Format(PyExc_AttributeError,
+ "'%.100s' object has no attribute '%U'",
+ tp->tp_name, name);
+ }
+ else {
+ PyErr_Format(PyExc_AttributeError,
+ "'%.50s' object attribute '%U' is read-only",
+ tp->tp_name, name);
+ }
goto done;
}
+ res = _PyObjectDict_SetItem(tp, dictptr, name, value);
}
- if (dict != NULL) {
+ else {
Py_INCREF(dict);
if (value == NULL)
res = PyDict_DelItem(dict, name);
else
res = PyDict_SetItem(dict, name, value);
Py_DECREF(dict);
- if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError))
- PyErr_SetObject(PyExc_AttributeError, name);
- goto done;
- }
-
- if (f != NULL) {
- res = f(descr, obj, value);
- goto done;
}
+ if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError))
+ PyErr_SetObject(PyExc_AttributeError, name);
- if (descr == NULL) {
- PyErr_Format(PyExc_AttributeError,
- "'%.100s' object has no attribute '%U'",
- tp->tp_name, name);
- goto done;
- }
-
- PyErr_Format(PyExc_AttributeError,
- "'%.50s' object attribute '%U' is read-only",
- tp->tp_name, name);
done:
Py_XDECREF(descr);
Py_DECREF(name);
@@ -1204,7 +1194,7 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value)
int
PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context)
{
- PyObject *dict, **dictptr = _PyObject_GetDictPtr(obj);
+ PyObject **dictptr = _PyObject_GetDictPtr(obj);
if (dictptr == NULL) {
PyErr_SetString(PyExc_AttributeError,
"This object has no __dict__");
@@ -1220,10 +1210,8 @@ PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context)
"not a '%.200s'", Py_TYPE(value)->tp_name);
return -1;
}
- dict = *dictptr;
- Py_XINCREF(value);
- *dictptr = value;
- Py_XDECREF(dict);
+ Py_INCREF(value);
+ Py_XSETREF(*dictptr, value);
return 0;
}
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 7cc889f..3f95133 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -1,17 +1,38 @@
#include "Python.h"
+
+/* Defined in tracemalloc.c */
+extern void _PyMem_DumpTraceback(int fd, const void *ptr);
+
+
/* Python's malloc wrappers (see pymem.h) */
-#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */
+/*
+ * Basic types
+ * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
+ */
+#undef uchar
+#define uchar unsigned char /* assuming == 8 bits */
+
+#undef uint
+#define uint unsigned int /* assuming >= 16 bits */
+
+#undef uptr
+#define uptr Py_uintptr_t
+
/* Forward declaration */
+static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
+static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
+static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
+static void _PyMem_DebugRawFree(void *ctx, void *p);
+
static void* _PyMem_DebugMalloc(void *ctx, size_t size);
static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
-static void _PyMem_DebugFree(void *ctx, void *p);
static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
+static void _PyMem_DebugFree(void *ctx, void *p);
static void _PyObject_DebugDumpAddress(const void *p);
static void _PyMem_DebugCheckAddress(char api_id, const void *p);
-#endif
#if defined(__has_feature) /* Clang */
#if __has_feature(address_sanitizer) /* is ASAN enabled? */
@@ -145,9 +166,8 @@ _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
#else
# define PYOBJ_FUNCS PYRAW_FUNCS
#endif
-#define PYMEM_FUNCS PYRAW_FUNCS
+#define PYMEM_FUNCS PYOBJ_FUNCS
-#ifdef PYMALLOC_DEBUG
typedef struct {
/* We tag each block with an API ID in order to tag API violations */
char api_id;
@@ -163,19 +183,21 @@ static struct {
{'o', {NULL, PYOBJ_FUNCS}}
};
-#define PYDBG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
-#endif
+#define PYRAWDBG_FUNCS \
+ _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree
+#define PYDBG_FUNCS \
+ _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
static PyMemAllocatorEx _PyMem_Raw = {
-#ifdef PYMALLOC_DEBUG
- &_PyMem_Debug.raw, PYDBG_FUNCS
+#ifdef Py_DEBUG
+ &_PyMem_Debug.raw, PYRAWDBG_FUNCS
#else
NULL, PYRAW_FUNCS
#endif
};
static PyMemAllocatorEx _PyMem = {
-#ifdef PYMALLOC_DEBUG
+#ifdef Py_DEBUG
&_PyMem_Debug.mem, PYDBG_FUNCS
#else
NULL, PYMEM_FUNCS
@@ -183,16 +205,76 @@ static PyMemAllocatorEx _PyMem = {
};
static PyMemAllocatorEx _PyObject = {
-#ifdef PYMALLOC_DEBUG
+#ifdef Py_DEBUG
&_PyMem_Debug.obj, PYDBG_FUNCS
#else
NULL, PYOBJ_FUNCS
#endif
};
+int
+_PyMem_SetupAllocators(const char *opt)
+{
+ if (opt == NULL || *opt == '\0') {
+ /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
+ options): use default allocators */
+#ifdef Py_DEBUG
+# ifdef WITH_PYMALLOC
+ opt = "pymalloc_debug";
+# else
+ opt = "malloc_debug";
+# endif
+#else
+ /* !Py_DEBUG */
+# ifdef WITH_PYMALLOC
+ opt = "pymalloc";
+# else
+ opt = "malloc";
+# endif
+#endif
+ }
+
+ if (strcmp(opt, "debug") == 0) {
+ PyMem_SetupDebugHooks();
+ }
+ else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0)
+ {
+ PyMemAllocatorEx alloc = {NULL, PYRAW_FUNCS};
+
+ PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
+ PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
+ PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
+
+ if (strcmp(opt, "malloc_debug") == 0)
+ PyMem_SetupDebugHooks();
+ }
+#ifdef WITH_PYMALLOC
+ else if (strcmp(opt, "pymalloc") == 0
+ || strcmp(opt, "pymalloc_debug") == 0)
+ {
+ PyMemAllocatorEx raw_alloc = {NULL, PYRAW_FUNCS};
+ PyMemAllocatorEx mem_alloc = {NULL, PYMEM_FUNCS};
+ PyMemAllocatorEx obj_alloc = {NULL, PYOBJ_FUNCS};
+
+ PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &raw_alloc);
+ PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &mem_alloc);
+ PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &obj_alloc);
+
+ if (strcmp(opt, "pymalloc_debug") == 0)
+ PyMem_SetupDebugHooks();
+ }
+#endif
+ else {
+ /* unknown allocator */
+ return -1;
+ }
+ return 0;
+}
+
#undef PYRAW_FUNCS
#undef PYMEM_FUNCS
#undef PYOBJ_FUNCS
+#undef PYRAWDBG_FUNCS
#undef PYDBG_FUNCS
static PyObjectArenaAllocator _PyObject_Arena = {NULL,
@@ -205,23 +287,46 @@ static PyObjectArenaAllocator _PyObject_Arena = {NULL,
#endif
};
+#ifdef WITH_PYMALLOC
+static int
+_PyMem_DebugEnabled(void)
+{
+ return (_PyObject.malloc == _PyMem_DebugMalloc);
+}
+
+int
+_PyMem_PymallocEnabled(void)
+{
+ if (_PyMem_DebugEnabled()) {
+ return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
+ }
+ else {
+ return (_PyObject.malloc == _PyObject_Malloc);
+ }
+}
+#endif
+
void
PyMem_SetupDebugHooks(void)
{
-#ifdef PYMALLOC_DEBUG
PyMemAllocatorEx alloc;
- alloc.malloc = _PyMem_DebugMalloc;
- alloc.calloc = _PyMem_DebugCalloc;
- alloc.realloc = _PyMem_DebugRealloc;
- alloc.free = _PyMem_DebugFree;
+ alloc.malloc = _PyMem_DebugRawMalloc;
+ alloc.calloc = _PyMem_DebugRawCalloc;
+ alloc.realloc = _PyMem_DebugRawRealloc;
+ alloc.free = _PyMem_DebugRawFree;
- if (_PyMem_Raw.malloc != _PyMem_DebugMalloc) {
+ if (_PyMem_Raw.malloc != _PyMem_DebugRawMalloc) {
alloc.ctx = &_PyMem_Debug.raw;
PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
}
+ alloc.malloc = _PyMem_DebugMalloc;
+ alloc.calloc = _PyMem_DebugCalloc;
+ alloc.realloc = _PyMem_DebugRealloc;
+ alloc.free = _PyMem_DebugFree;
+
if (_PyMem.malloc != _PyMem_DebugMalloc) {
alloc.ctx = &_PyMem_Debug.mem;
PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
@@ -233,7 +338,6 @@ PyMem_SetupDebugHooks(void)
PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
}
-#endif
}
void
@@ -264,7 +368,6 @@ PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
/* ignore unknown domain */
}
-
}
void
@@ -642,22 +745,6 @@ static int running_on_valgrind = -1;
#define SIMPLELOCK_LOCK(lock) /* acquire released lock */
#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
-/*
- * Basic types
- * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
- */
-#undef uchar
-#define uchar unsigned char /* assuming == 8 bits */
-
-#undef uint
-#define uint unsigned int /* assuming >= 16 bits */
-
-#undef ulong
-#define ulong unsigned long /* assuming >= 32 bits */
-
-#undef uptr
-#define uptr Py_uintptr_t
-
/* When you say memory, my mind reasons in terms of (pointers to) blocks */
typedef uchar block;
@@ -949,11 +1036,15 @@ new_arena(void)
struct arena_object* arenaobj;
uint excess; /* number of bytes above pool alignment */
void *address;
+ static int debug_stats = -1;
-#ifdef PYMALLOC_DEBUG
- if (Py_GETENV("PYTHONMALLOCSTATS"))
+ if (debug_stats == -1) {
+ char *opt = Py_GETENV("PYTHONMALLOCSTATS");
+ debug_stats = (opt != NULL && *opt != '\0');
+ }
+ if (debug_stats)
_PyObject_DebugMallocStats(stderr);
-#endif
+
if (unused_arena_objects == NULL) {
uint i;
uint numarenas;
@@ -1709,7 +1800,7 @@ _Py_GetAllocatedBlocks(void)
#endif /* WITH_PYMALLOC */
-#ifdef PYMALLOC_DEBUG
+
/*==========================================================================*/
/* A x-platform debugging allocator. This doesn't manage memory directly,
* it wraps a real allocator, adding extra debugging info to the memory blocks.
@@ -1767,31 +1858,6 @@ write_size_t(void *p, size_t n)
}
}
-#ifdef Py_DEBUG
-/* Is target in the list? The list is traversed via the nextpool pointers.
- * The list may be NULL-terminated, or circular. Return 1 if target is in
- * list, else 0.
- */
-static int
-pool_is_in_list(const poolp target, poolp list)
-{
- poolp origlist = list;
- assert(target != NULL);
- if (list == NULL)
- return 0;
- do {
- if (target == list)
- return 1;
- list = list->nextpool;
- } while (list != NULL && list != origlist);
- return 0;
-}
-
-#else
-#define pool_is_in_list(X, Y) 1
-
-#endif /* Py_DEBUG */
-
/* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and
fills them with useful stuff, here calling the underlying malloc's result p:
@@ -1819,7 +1885,7 @@ p[2*S+n+S: 2*S+n+2*S]
*/
static void *
-_PyMem_DebugAlloc(int use_calloc, void *ctx, size_t nbytes)
+_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
{
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
uchar *p; /* base address of malloc'ed block */
@@ -1856,18 +1922,18 @@ _PyMem_DebugAlloc(int use_calloc, void *ctx, size_t nbytes)
}
static void *
-_PyMem_DebugMalloc(void *ctx, size_t nbytes)
+_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
{
- return _PyMem_DebugAlloc(0, ctx, nbytes);
+ return _PyMem_DebugRawAlloc(0, ctx, nbytes);
}
static void *
-_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
+_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
{
size_t nbytes;
assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
nbytes = nelem * elsize;
- return _PyMem_DebugAlloc(1, ctx, nbytes);
+ return _PyMem_DebugRawAlloc(1, ctx, nbytes);
}
/* The debug free first checks the 2*SST bytes on each end for sanity (in
@@ -1876,7 +1942,7 @@ _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
Then calls the underlying free.
*/
static void
-_PyMem_DebugFree(void *ctx, void *p)
+_PyMem_DebugRawFree(void *ctx, void *p)
{
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */
@@ -1893,7 +1959,7 @@ _PyMem_DebugFree(void *ctx, void *p)
}
static void *
-_PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes)
+_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
{
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
uchar *q = (uchar *)p, *oldq;
@@ -1903,7 +1969,7 @@ _PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes)
int i;
if (p == NULL)
- return _PyMem_DebugAlloc(0, ctx, nbytes);
+ return _PyMem_DebugRawAlloc(0, ctx, nbytes);
_PyMem_DebugCheckAddress(api->api_id, p);
bumpserialno();
@@ -1946,6 +2012,44 @@ _PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes)
return q;
}
+static void
+_PyMem_DebugCheckGIL(void)
+{
+#ifdef WITH_THREAD
+ if (!PyGILState_Check())
+ Py_FatalError("Python memory allocator called "
+ "without holding the GIL");
+#endif
+}
+
+static void *
+_PyMem_DebugMalloc(void *ctx, size_t nbytes)
+{
+ _PyMem_DebugCheckGIL();
+ return _PyMem_DebugRawMalloc(ctx, nbytes);
+}
+
+static void *
+_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
+{
+ _PyMem_DebugCheckGIL();
+ return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
+}
+
+static void
+_PyMem_DebugFree(void *ctx, void *ptr)
+{
+ _PyMem_DebugCheckGIL();
+ _PyMem_DebugRawFree(ctx, ptr);
+}
+
+static void *
+_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
+{
+ _PyMem_DebugCheckGIL();
+ return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
+}
+
/* Check the forbidden bytes on both ends of the memory allocated for p.
* If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
* and call Py_FatalError to kill the program.
@@ -2104,9 +2208,12 @@ _PyObject_DebugDumpAddress(const void *p)
}
fputc('\n', stderr);
}
+ fputc('\n', stderr);
+
+ fflush(stderr);
+ _PyMem_DumpTraceback(fileno(stderr), p);
}
-#endif /* PYMALLOC_DEBUG */
static size_t
printone(FILE *out, const char* msg, size_t value)
@@ -2158,8 +2265,30 @@ _PyDebugAllocatorStats(FILE *out,
(void)printone(out, buf2, num_blocks * sizeof_block);
}
+
#ifdef WITH_PYMALLOC
+#ifdef Py_DEBUG
+/* Is target in the list? The list is traversed via the nextpool pointers.
+ * The list may be NULL-terminated, or circular. Return 1 if target is in
+ * list, else 0.
+ */
+static int
+pool_is_in_list(const poolp target, poolp list)
+{
+ poolp origlist = list;
+ assert(target != NULL);
+ if (list == NULL)
+ return 0;
+ do {
+ if (target == list)
+ return 1;
+ list = list->nextpool;
+ } while (list != NULL && list != origlist);
+ return 0;
+}
+#endif
+
/* Print summary info to "out" about the state of pymalloc's structures.
* In Py_DEBUG mode, also perform some expensive internal consistency
* checks.
@@ -2233,7 +2362,9 @@ _PyObject_DebugMallocStats(FILE *out)
if (p->ref.count == 0) {
/* currently unused */
+#ifdef Py_DEBUG
assert(pool_is_in_list(p, arenas[i].freepools));
+#endif
continue;
}
++numpools[sz];
@@ -2273,9 +2404,8 @@ _PyObject_DebugMallocStats(FILE *out)
quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
}
fputc('\n', out);
-#ifdef PYMALLOC_DEBUG
- (void)printone(out, "# times object malloc called", serialno);
-#endif
+ if (_PyMem_DebugEnabled())
+ (void)printone(out, "# times object malloc called", serialno);
(void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
(void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
(void)printone(out, "# arenas highwater mark", narenas_highwater);
@@ -2303,6 +2433,7 @@ _PyObject_DebugMallocStats(FILE *out)
#endif /* #ifdef WITH_PYMALLOC */
+
#ifdef Py_USING_MEMORY_DEBUGGER
/* Make this function last so gcc won't inline it since the definition is
* after the reference.
diff --git a/Objects/odictobject.c b/Objects/odictobject.c
index 1abdd02..dccbb3e 100644
--- a/Objects/odictobject.c
+++ b/Objects/odictobject.c
@@ -1424,14 +1424,13 @@ static PyMethodDef odict_methods[] = {
* OrderedDict members
*/
-/* tp_members */
+/* tp_getset */
-static PyMemberDef odict_members[] = {
- {"__dict__", T_OBJECT, offsetof(PyODictObject, od_inst_dict), READONLY},
- {0}
+static PyGetSetDef odict_getset[] = {
+ {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict},
+ {NULL}
};
-
/* ----------------------------------------------
* OrderedDict type slot methods
*/
@@ -1653,20 +1652,12 @@ odict_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
odict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- PyObject *dict;
PyODictObject *od;
- dict = PyDict_New();
- if (dict == NULL)
- return NULL;
-
od = (PyODictObject *)PyDict_Type.tp_new(type, args, kwds);
- if (od == NULL) {
- Py_DECREF(dict);
+ if (od == NULL)
return NULL;
- }
- od->od_inst_dict = dict;
/* type constructor fills the memory with zeros (see
PyType_GenericAlloc()), there is no need to set them to zero again */
if (_odict_resize(od) < 0) {
@@ -1708,8 +1699,8 @@ PyTypeObject PyODict_Type = {
(getiterfunc)odict_iter, /* tp_iter */
0, /* tp_iternext */
odict_methods, /* tp_methods */
- odict_members, /* tp_members */
- 0, /* tp_getset */
+ 0, /* tp_members */
+ odict_getset, /* tp_getset */
&PyDict_Type, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 4ef692d..6dd403f 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -26,7 +26,6 @@
#include "Python.h"
#include "structmember.h"
-#include "stringlib/eq.h"
/* Object used as dummy key to fill deleted entries */
static PyObject _dummy_struct;
@@ -48,19 +47,20 @@ static PyObject _dummy_struct;
static setentry *
set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- setentry *table = so->table;
- setentry *freeslot = NULL;
+ setentry *table;
setentry *entry;
- size_t perturb = hash;
+ size_t perturb;
size_t mask = so->mask;
size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
size_t j;
int cmp;
- entry = &table[i];
+ entry = &so->table[i];
if (entry->key == NULL)
return entry;
+ perturb = hash;
+
while (1) {
if (entry->hash == hash) {
PyObject *startkey = entry->key;
@@ -70,8 +70,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
- && unicode_eq(startkey, key))
+ && _PyUnicode_EQ(startkey, key))
return entry;
+ table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
@@ -83,14 +84,12 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
mask = so->mask; /* help avoid a register spill */
}
- if (entry->hash == -1 && freeslot == NULL)
- freeslot = entry;
if (i + LINEAR_PROBES <= mask) {
for (j = 0 ; j < LINEAR_PROBES ; j++) {
entry++;
- if (entry->key == NULL)
- goto found_null;
+ if (entry->hash == 0 && entry->key == NULL)
+ return entry;
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
@@ -98,8 +97,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
- && unicode_eq(startkey, key))
+ && _PyUnicode_EQ(startkey, key))
return entry;
+ table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
@@ -111,7 +111,104 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
mask = so->mask;
}
- if (entry->hash == -1 && freeslot == NULL)
+ }
+ }
+
+ perturb >>= PERTURB_SHIFT;
+ i = (i * 5 + 1 + perturb) & mask;
+
+ entry = &so->table[i];
+ if (entry->key == NULL)
+ return entry;
+ }
+}
+
+static int set_table_resize(PySetObject *, Py_ssize_t);
+
+static int
+set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
+{
+ setentry *table;
+ setentry *freeslot;
+ setentry *entry;
+ size_t perturb;
+ size_t mask;
+ size_t i; /* Unsigned for defined overflow behavior */
+ size_t j;
+ int cmp;
+
+ /* Pre-increment is necessary to prevent arbitrary code in the rich
+ comparison from deallocating the key just before the insertion. */
+ Py_INCREF(key);
+
+ restart:
+
+ mask = so->mask;
+ i = (size_t)hash & mask;
+
+ entry = &so->table[i];
+ if (entry->key == NULL)
+ goto found_unused;
+
+ freeslot = NULL;
+ perturb = hash;
+
+ while (1) {
+ if (entry->hash == hash) {
+ PyObject *startkey = entry->key;
+ /* startkey cannot be a dummy because the dummy hash field is -1 */
+ assert(startkey != dummy);
+ if (startkey == key)
+ goto found_active;
+ if (PyUnicode_CheckExact(startkey)
+ && PyUnicode_CheckExact(key)
+ && _PyUnicode_EQ(startkey, key))
+ goto found_active;
+ table = so->table;
+ Py_INCREF(startkey);
+ cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+ Py_DECREF(startkey);
+ if (cmp > 0) /* likely */
+ goto found_active;
+ if (cmp < 0)
+ goto comparison_error;
+ /* Continuing the search from the current entry only makes
+ sense if the table and entry are unchanged; otherwise,
+ we have to restart from the beginning */
+ if (table != so->table || entry->key != startkey)
+ goto restart;
+ mask = so->mask; /* help avoid a register spill */
+ }
+ else if (entry->hash == -1 && freeslot == NULL)
+ freeslot = entry;
+
+ if (i + LINEAR_PROBES <= mask) {
+ for (j = 0 ; j < LINEAR_PROBES ; j++) {
+ entry++;
+ if (entry->hash == 0 && entry->key == NULL)
+ goto found_unused_or_dummy;
+ if (entry->hash == hash) {
+ PyObject *startkey = entry->key;
+ assert(startkey != dummy);
+ if (startkey == key)
+ goto found_active;
+ if (PyUnicode_CheckExact(startkey)
+ && PyUnicode_CheckExact(key)
+ && _PyUnicode_EQ(startkey, key))
+ goto found_active;
+ table = so->table;
+ Py_INCREF(startkey);
+ cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+ Py_DECREF(startkey);
+ if (cmp > 0)
+ goto found_active;
+ if (cmp < 0)
+ goto comparison_error;
+ if (table != so->table || entry->key != startkey)
+ goto restart;
+ mask = so->mask;
+ }
+ else if (entry->hash == -1 && freeslot == NULL)
freeslot = entry;
}
}
@@ -119,29 +216,51 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
perturb >>= PERTURB_SHIFT;
i = (i * 5 + 1 + perturb) & mask;
- entry = &table[i];
+ entry = &so->table[i];
if (entry->key == NULL)
- goto found_null;
+ goto found_unused_or_dummy;
}
- found_null:
- return freeslot == NULL ? entry : freeslot;
+
+ found_unused_or_dummy:
+ if (freeslot == NULL)
+ goto found_unused;
+ so->used++;
+ freeslot->key = key;
+ freeslot->hash = hash;
+ return 0;
+
+ found_unused:
+ so->fill++;
+ so->used++;
+ entry->key = key;
+ entry->hash = hash;
+ if ((size_t)so->fill*3 < mask*2)
+ return 0;
+ return set_table_resize(so, so->used);
+
+ found_active:
+ Py_DECREF(key);
+ return 0;
+
+ comparison_error:
+ Py_DECREF(key);
+ return -1;
}
/*
Internal routine used by set_table_resize() to insert an item which is
known to be absent from the set. This routine also assumes that
the set contains no deleted entries. Besides the performance benefit,
-using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
-Note that no refcounts are changed by this routine; if needed, the caller
-is responsible for incref'ing `key`.
+there is also safety benefit since using set_add_entry() risks making
+a callback in the middle of a set_table_resize(), see issue 1456209.
+The caller is responsible for updating the key's reference count and
+the setobject's fill and used fields.
*/
static void
-set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
+set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
{
- setentry *table = so->table;
setentry *entry;
size_t perturb = hash;
- size_t mask = (size_t)so->mask;
size_t i = (size_t)hash & mask;
size_t j;
@@ -162,45 +281,11 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
found_null:
entry->key = key;
entry->hash = hash;
- so->fill++;
- so->used++;
}
/* ======== End logic for probing the hash table ========================== */
/* ======================================================================== */
-
-/*
-Internal routine to insert a new key into the table.
-Used by the public insert routine.
-Eats a reference to key.
-*/
-static int
-set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
-{
- setentry *entry;
-
- entry = set_lookkey(so, key, hash);
- if (entry == NULL)
- return -1;
- if (entry->key == NULL) {
- /* UNUSED */
- entry->key = key;
- entry->hash = hash;
- so->fill++;
- so->used++;
- } else if (entry->key == dummy) {
- /* DUMMY */
- entry->key = key;
- entry->hash = hash;
- so->used++;
- } else {
- /* ACTIVE */
- Py_DECREF(key);
- }
- return 0;
-}
-
/*
Restructure the table by allocating a new table and reinserting all
keys again. When entries have been deleted, the new table may
@@ -213,10 +298,13 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
setentry *oldtable, *newtable, *entry;
Py_ssize_t oldfill = so->fill;
Py_ssize_t oldused = so->used;
+ Py_ssize_t oldmask = so->mask;
+ size_t newmask;
int is_oldtable_malloced;
setentry small_copy[PySet_MINSIZE];
assert(minused >= 0);
+ minused = (minused > 50000) ? minused * 2 : minused * 4;
/* Find the smallest table size > minused. */
/* XXX speed-up with intrinsics */
@@ -264,25 +352,24 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
/* Make the set empty, using the new table. */
assert(newtable != oldtable);
memset(newtable, 0, sizeof(setentry) * newsize);
- so->fill = 0;
- so->used = 0;
+ so->fill = oldused;
+ so->used = oldused;
so->mask = newsize - 1;
so->table = newtable;
/* Copy the data over; this is refcount-neutral for active entries;
dummy entries aren't copied over, of course */
+ newmask = (size_t)so->mask;
if (oldfill == oldused) {
- for (entry = oldtable; oldused > 0; entry++) {
+ for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
if (entry->key != NULL) {
- oldused--;
- set_insert_clean(so, entry->key, entry->hash);
+ set_insert_clean(newtable, newmask, entry->key, entry->hash);
}
}
} else {
- for (entry = oldtable; oldused > 0; entry++) {
+ for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
if (entry->key != NULL && entry->key != dummy) {
- oldused--;
- set_insert_clean(so, entry->key, entry->hash);
+ set_insert_clean(newtable, newmask, entry->key, entry->hash);
}
}
}
@@ -292,31 +379,42 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
return 0;
}
-/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
+static int
+set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
+{
+ setentry *entry;
+
+ entry = set_lookkey(so, key, hash);
+ if (entry != NULL)
+ return entry->key != NULL;
+ return -1;
+}
+
+#define DISCARD_NOTFOUND 0
+#define DISCARD_FOUND 1
static int
-set_add_entry(PySetObject *so, setentry *entry)
+set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- Py_ssize_t n_used;
- PyObject *key = entry->key;
- Py_hash_t hash = entry->hash;
+ setentry *entry;
+ PyObject *old_key;
- assert(so->fill <= so->mask); /* at least one empty slot */
- n_used = so->used;
- Py_INCREF(key);
- if (set_insert_key(so, key, hash)) {
- Py_DECREF(key);
+ entry = set_lookkey(so, key, hash);
+ if (entry == NULL)
return -1;
- }
- if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
- return 0;
- return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+ if (entry->key == NULL)
+ return DISCARD_NOTFOUND;
+ old_key = entry->key;
+ entry->key = dummy;
+ entry->hash = -1;
+ so->used--;
+ Py_DECREF(old_key);
+ return DISCARD_FOUND;
}
static int
set_add_key(PySetObject *so, PyObject *key)
{
- setentry entry;
Py_hash_t hash;
if (!PyUnicode_CheckExact(key) ||
@@ -325,50 +423,35 @@ set_add_key(PySetObject *so, PyObject *key)
if (hash == -1)
return -1;
}
- entry.key = key;
- entry.hash = hash;
- return set_add_entry(so, &entry);
+ return set_add_entry(so, key, hash);
}
-#define DISCARD_NOTFOUND 0
-#define DISCARD_FOUND 1
-
static int
-set_discard_entry(PySetObject *so, setentry *oldentry)
+set_contains_key(PySetObject *so, PyObject *key)
{
- setentry *entry;
- PyObject *old_key;
+ Py_hash_t hash;
- entry = set_lookkey(so, oldentry->key, oldentry->hash);
- if (entry == NULL)
- return -1;
- if (entry->key == NULL || entry->key == dummy)
- return DISCARD_NOTFOUND;
- old_key = entry->key;
- entry->key = dummy;
- entry->hash = -1;
- so->used--;
- Py_DECREF(old_key);
- return DISCARD_FOUND;
+ if (!PyUnicode_CheckExact(key) ||
+ (hash = ((PyASCIIObject *) key)->hash) == -1) {
+ hash = PyObject_Hash(key);
+ if (hash == -1)
+ return -1;
+ }
+ return set_contains_entry(so, key, hash);
}
static int
set_discard_key(PySetObject *so, PyObject *key)
{
- setentry entry;
Py_hash_t hash;
- assert (PyAnySet_Check(so));
-
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
}
- entry.key = key;
- entry.hash = hash;
- return set_discard_entry(so, &entry);
+ return set_discard_entry(so, key, hash);
}
static void
@@ -449,20 +532,22 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
{
Py_ssize_t i;
Py_ssize_t mask;
- setentry *table;
+ setentry *entry;
assert (PyAnySet_Check(so));
i = *pos_ptr;
assert(i >= 0);
- table = so->table;
mask = so->mask;
- while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
+ entry = &so->table[i];
+ while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
i++;
+ entry++;
+ }
*pos_ptr = i+1;
if (i > mask)
return 0;
- assert(table[i].key != NULL);
- *entry_ptr = &table[i];
+ assert(entry != NULL);
+ *entry_ptr = entry;
return 1;
}
@@ -560,8 +645,8 @@ set_merge(PySetObject *so, PyObject *otherset)
* incrementally resizing as we insert new keys. Expect
* that there will be no (or few) overlapping keys.
*/
- if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
- if (set_table_resize(so, (so->used + other->used)*2) != 0)
+ if ((so->fill + other->used)*3 >= so->mask*2) {
+ if (set_table_resize(so, so->used + other->used) != 0)
return -1;
}
so_entry = so->table;
@@ -586,11 +671,15 @@ set_merge(PySetObject *so, PyObject *otherset)
/* If our table is empty, we can use set_insert_clean() */
if (so->fill == 0) {
- for (i = 0; i <= other->mask; i++, other_entry++) {
+ setentry *newtable = so->table;
+ size_t newmask = (size_t)so->mask;
+ so->fill = other->used;
+ so->used = other->used;
+ for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
key = other_entry->key;
if (key != NULL && key != dummy) {
Py_INCREF(key);
- set_insert_clean(so, key, other_entry->hash);
+ set_insert_clean(newtable, newmask, key, other_entry->hash);
}
}
return 0;
@@ -601,46 +690,13 @@ set_merge(PySetObject *so, PyObject *otherset)
other_entry = &other->table[i];
key = other_entry->key;
if (key != NULL && key != dummy) {
- Py_INCREF(key);
- if (set_insert_key(so, key, other_entry->hash)) {
- Py_DECREF(key);
+ if (set_add_entry(so, key, other_entry->hash))
return -1;
- }
}
}
return 0;
}
-static int
-set_contains_entry(PySetObject *so, setentry *entry)
-{
- PyObject *key;
- setentry *lu_entry;
-
- lu_entry = set_lookkey(so, entry->key, entry->hash);
- if (lu_entry == NULL)
- return -1;
- key = lu_entry->key;
- return key != NULL && key != dummy;
-}
-
-static int
-set_contains_key(PySetObject *so, PyObject *key)
-{
- setentry entry;
- Py_hash_t hash;
-
- if (!PyUnicode_CheckExact(key) ||
- (hash = ((PyASCIIObject *) key)->hash) == -1) {
- hash = PyObject_Hash(key);
- if (hash == -1)
- return -1;
- }
- entry.key = key;
- entry.hash = hash;
- return set_contains_entry(so, &entry);
-}
-
static PyObject *
set_pop(PySetObject *so)
{
@@ -682,43 +738,64 @@ set_traverse(PySetObject *so, visitproc visit, void *arg)
return 0;
}
-static Py_hash_t
-frozenset_hash(PyObject *self)
+/* Work to increase the bit dispersion for closely spaced hash values.
+ This is important because some use cases have many combinations of a
+ small number of elements with nearby hashes so that many distinct
+ combinations collapse to only a handful of distinct hash values. */
+
+static Py_uhash_t
+_shuffle_bits(Py_uhash_t h)
{
- /* Most of the constants in this hash algorithm are randomly choosen
- large primes with "interesting bit patterns" and that passed
- tests for good collision statistics on a variety of problematic
- datasets such as:
+ return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
+}
- ps = []
- for r in range(21):
- ps += itertools.combinations(range(20), r)
- num_distinct_hashes = len({hash(frozenset(s)) for s in ps})
+/* Most of the constants in this hash algorithm are randomly chosen
+ large primes with "interesting bit patterns" and that passed tests
+ for good collision statistics on a variety of problematic datasets
+ including powersets and graph structures (such as David Eppstein's
+ graph recipes in Lib/test/test_set.py) */
- */
+static Py_hash_t
+frozenset_hash(PyObject *self)
+{
PySetObject *so = (PySetObject *)self;
- Py_uhash_t h, hash = 1927868237UL;
+ Py_uhash_t hash = 0;
setentry *entry;
- Py_ssize_t pos = 0;
if (so->hash != -1)
return so->hash;
- hash *= (Py_uhash_t)PySet_GET_SIZE(self) + 1;
- while (set_next(so, &pos, &entry)) {
- /* Work to increase the bit dispersion for closely spaced hash
- values. This is important because some use cases have many
- combinations of a small number of elements with nearby
- hashes so that many distinct combinations collapse to only
- a handful of distinct hash values. */
- h = entry->hash;
- hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
- }
- /* Make the final result spread-out in a different pattern
- than the algorithm for tuples or other python objects. */
+ /* Xor-in shuffled bits from every entry's hash field because xor is
+ commutative and a frozenset hash should be independent of order.
+
+ For speed, include null entries and dummy entries and then
+ subtract out their effect afterwards so that the final hash
+ depends only on active entries. This allows the code to be
+ vectorized by the compiler and it saves the unpredictable
+ branches that would arise when trying to exclude null and dummy
+ entries on every iteration. */
+
+ for (entry = so->table; entry <= &so->table[so->mask]; entry++)
+ hash ^= _shuffle_bits(entry->hash);
+
+ /* Remove the effect of an odd number of NULL entries */
+ if ((so->mask + 1 - so->fill) & 1)
+ hash ^= _shuffle_bits(0);
+
+ /* Remove the effect of an odd number of dummy entries */
+ if ((so->fill - so->used) & 1)
+ hash ^= _shuffle_bits(-1);
+
+ /* Factor in the number of active entries */
+ hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
+
+ /* Disperse patterns arising in nested frozensets */
hash = hash * 69069U + 907133923UL;
+
+ /* -1 is reserved as an error code */
if (hash == (Py_uhash_t)-1)
hash = 590923713UL;
+
so->hash = hash;
return hash;
}
@@ -865,7 +942,7 @@ PyTypeObject PySetIter_Type = {
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc)setiter_traverse, /* tp_traverse */
0, /* tp_clear */
@@ -910,18 +987,14 @@ set_update_internal(PySetObject *so, PyObject *other)
* incrementally resizing as we insert new keys. Expect
* that there will be no (or few) overlapping keys.
*/
- if (dictsize == -1)
+ if (dictsize < 0)
return -1;
- if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
- if (set_table_resize(so, (so->used + dictsize)*2) != 0)
+ if ((so->fill + dictsize)*3 >= so->mask*2) {
+ if (set_table_resize(so, so->used + dictsize) != 0)
return -1;
}
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
- setentry an_entry;
-
- an_entry.hash = hash;
- an_entry.key = key;
- if (set_add_entry(so, &an_entry))
+ if (set_add_entry(so, key, hash))
return -1;
}
return 0;
@@ -970,9 +1043,8 @@ PyDoc_STRVAR(update_doc,
static PyObject *
make_new_set(PyTypeObject *type, PyObject *iterable)
{
- PySetObject *so = NULL;
+ PySetObject *so;
- /* create PySetObject structure */
so = (PySetObject *)type->tp_alloc(type, 0);
if (so == NULL)
return NULL;
@@ -1015,7 +1087,8 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *iterable = NULL, *result;
- if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds))
+ if (kwds != NULL && type == &PyFrozenSet_Type
+ && !_PyArg_NoKeywords("frozenset()", kwds))
return NULL;
if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
@@ -1042,24 +1115,9 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return emptyfrozenset;
}
-int
-PySet_ClearFreeList(void)
-{
- return 0;
-}
-
-void
-PySet_Fini(void)
-{
- Py_CLEAR(emptyfrozenset);
-}
-
static PyObject *
set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds))
- return NULL;
-
return make_new_set(type, NULL);
}
@@ -1201,6 +1259,8 @@ set_intersection(PySetObject *so, PyObject *other)
{
PySetObject *result;
PyObject *key, *it, *tmp;
+ Py_hash_t hash;
+ int rv;
if ((PyObject *)so == other)
return set_copy(so);
@@ -1220,13 +1280,15 @@ set_intersection(PySetObject *so, PyObject *other)
}
while (set_next((PySetObject *)other, &pos, &entry)) {
- int rv = set_contains_entry(so, entry);
- if (rv == -1) {
+ key = entry->key;
+ hash = entry->hash;
+ rv = set_contains_entry(so, key, hash);
+ if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (rv) {
- if (set_add_entry(result, entry)) {
+ if (set_add_entry(result, key, hash)) {
Py_DECREF(result);
return NULL;
}
@@ -1242,32 +1304,15 @@ set_intersection(PySetObject *so, PyObject *other)
}
while ((key = PyIter_Next(it)) != NULL) {
- int rv;
- setentry entry;
- Py_hash_t hash = PyObject_Hash(key);
-
- if (hash == -1) {
- Py_DECREF(it);
- Py_DECREF(result);
- Py_DECREF(key);
- return NULL;
- }
- entry.hash = hash;
- entry.key = key;
- rv = set_contains_entry(so, &entry);
- if (rv == -1) {
- Py_DECREF(it);
- Py_DECREF(result);
- Py_DECREF(key);
- return NULL;
- }
+ hash = PyObject_Hash(key);
+ if (hash == -1)
+ goto error;
+ rv = set_contains_entry(so, key, hash);
+ if (rv < 0)
+ goto error;
if (rv) {
- if (set_add_entry(result, &entry)) {
- Py_DECREF(it);
- Py_DECREF(result);
- Py_DECREF(key);
- return NULL;
- }
+ if (set_add_entry(result, key, hash))
+ goto error;
}
Py_DECREF(key);
}
@@ -1277,6 +1322,11 @@ set_intersection(PySetObject *so, PyObject *other)
return NULL;
}
return (PyObject *)result;
+ error:
+ Py_DECREF(it);
+ Py_DECREF(result);
+ Py_DECREF(key);
+ return NULL;
}
static PyObject *
@@ -1363,6 +1413,7 @@ static PyObject *
set_isdisjoint(PySetObject *so, PyObject *other)
{
PyObject *key, *it, *tmp;
+ int rv;
if ((PyObject *)so == other) {
if (PySet_GET_SIZE(so) == 0)
@@ -1381,8 +1432,8 @@ set_isdisjoint(PySetObject *so, PyObject *other)
other = tmp;
}
while (set_next((PySetObject *)other, &pos, &entry)) {
- int rv = set_contains_entry(so, entry);
- if (rv == -1)
+ rv = set_contains_entry(so, entry->key, entry->hash);
+ if (rv < 0)
return NULL;
if (rv)
Py_RETURN_FALSE;
@@ -1395,8 +1446,6 @@ set_isdisjoint(PySetObject *so, PyObject *other)
return NULL;
while ((key = PyIter_Next(it)) != NULL) {
- int rv;
- setentry entry;
Py_hash_t hash = PyObject_Hash(key);
if (hash == -1) {
@@ -1404,11 +1453,9 @@ set_isdisjoint(PySetObject *so, PyObject *other)
Py_DECREF(it);
return NULL;
}
- entry.hash = hash;
- entry.key = key;
- rv = set_contains_entry(so, &entry);
+ rv = set_contains_entry(so, key, hash);
Py_DECREF(key);
- if (rv == -1) {
+ if (rv < 0) {
Py_DECREF(it);
return NULL;
}
@@ -1437,7 +1484,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other)
Py_ssize_t pos = 0;
while (set_next((PySetObject *)other, &pos, &entry))
- if (set_discard_entry(so, entry) == -1)
+ if (set_discard_entry(so, entry->key, entry->hash) < 0)
return -1;
} else {
PyObject *key, *it;
@@ -1446,7 +1493,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other)
return -1;
while ((key = PyIter_Next(it)) != NULL) {
- if (set_discard_key(so, key) == -1) {
+ if (set_discard_key(so, key) < 0) {
Py_DECREF(it);
Py_DECREF(key);
return -1;
@@ -1457,10 +1504,10 @@ set_difference_update_internal(PySetObject *so, PyObject *other)
if (PyErr_Occurred())
return -1;
}
- /* If more than 1/5 are dummies, then resize them away. */
- if ((so->fill - so->used) * 5 < so->mask)
+ /* If more than 1/4th are dummies, then resize them away. */
+ if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
return 0;
- return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+ return set_table_resize(so, so->used);
}
static PyObject *
@@ -1487,7 +1534,7 @@ set_copy_and_difference(PySetObject *so, PyObject *other)
result = set_copy(so);
if (result == NULL)
return NULL;
- if (set_difference_update_internal((PySetObject *) result, other) != -1)
+ if (set_difference_update_internal((PySetObject *) result, other) == 0)
return result;
Py_DECREF(result);
return NULL;
@@ -1497,8 +1544,11 @@ static PyObject *
set_difference(PySetObject *so, PyObject *other)
{
PyObject *result;
+ PyObject *key;
+ Py_hash_t hash;
setentry *entry;
Py_ssize_t pos = 0;
+ int rv;
if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) {
return set_copy_and_difference(so, other);
@@ -1516,17 +1566,15 @@ set_difference(PySetObject *so, PyObject *other)
if (PyDict_CheckExact(other)) {
while (set_next(so, &pos, &entry)) {
- setentry entrycopy;
- int rv;
- entrycopy.hash = entry->hash;
- entrycopy.key = entry->key;
- rv = _PyDict_Contains(other, entry->key, entry->hash);
+ key = entry->key;
+ hash = entry->hash;
+ rv = _PyDict_Contains(other, key, hash);
if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (!rv) {
- if (set_add_entry((PySetObject *)result, &entrycopy)) {
+ if (set_add_entry((PySetObject *)result, key, hash)) {
Py_DECREF(result);
return NULL;
}
@@ -1537,13 +1585,15 @@ set_difference(PySetObject *so, PyObject *other)
/* Iterate over so, checking for common elements in other. */
while (set_next(so, &pos, &entry)) {
- int rv = set_contains_entry((PySetObject *)other, entry);
- if (rv == -1) {
+ key = entry->key;
+ hash = entry->hash;
+ rv = set_contains_entry((PySetObject *)other, key, hash);
+ if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (!rv) {
- if (set_add_entry((PySetObject *)result, entry)) {
+ if (set_add_entry((PySetObject *)result, key, hash)) {
Py_DECREF(result);
return NULL;
}
@@ -1605,29 +1655,24 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
PySetObject *otherset;
PyObject *key;
Py_ssize_t pos = 0;
+ Py_hash_t hash;
setentry *entry;
+ int rv;
if ((PyObject *)so == other)
return set_clear(so);
if (PyDict_CheckExact(other)) {
PyObject *value;
- int rv;
- Py_hash_t hash;
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
- setentry an_entry;
-
Py_INCREF(key);
- an_entry.hash = hash;
- an_entry.key = key;
-
- rv = set_discard_entry(so, &an_entry);
- if (rv == -1) {
+ rv = set_discard_entry(so, key, hash);
+ if (rv < 0) {
Py_DECREF(key);
return NULL;
}
if (rv == DISCARD_NOTFOUND) {
- if (set_add_entry(so, &an_entry)) {
+ if (set_add_entry(so, key, hash)) {
Py_DECREF(key);
return NULL;
}
@@ -1647,13 +1692,15 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
}
while (set_next(otherset, &pos, &entry)) {
- int rv = set_discard_entry(so, entry);
- if (rv == -1) {
+ key = entry->key;
+ hash = entry->hash;
+ rv = set_discard_entry(so, key, hash);
+ if (rv < 0) {
Py_DECREF(otherset);
return NULL;
}
if (rv == DISCARD_NOTFOUND) {
- if (set_add_entry(so, entry)) {
+ if (set_add_entry(so, key, hash)) {
Py_DECREF(otherset);
return NULL;
}
@@ -1715,6 +1762,7 @@ set_issubset(PySetObject *so, PyObject *other)
{
setentry *entry;
Py_ssize_t pos = 0;
+ int rv;
if (!PyAnySet_Check(other)) {
PyObject *tmp, *result;
@@ -1729,8 +1777,8 @@ set_issubset(PySetObject *so, PyObject *other)
Py_RETURN_FALSE;
while (set_next(so, &pos, &entry)) {
- int rv = set_contains_entry((PySetObject *)other, entry);
- if (rv == -1)
+ rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
+ if (rv < 0)
return NULL;
if (!rv)
Py_RETURN_FALSE;
@@ -1821,7 +1869,7 @@ set_contains(PySetObject *so, PyObject *key)
int rv;
rv = set_contains_key(so, key);
- if (rv == -1) {
+ if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return -1;
PyErr_Clear();
@@ -1840,7 +1888,7 @@ set_direct_contains(PySetObject *so, PyObject *key)
long result;
result = set_contains(so, key);
- if (result == -1)
+ if (result < 0)
return NULL;
return PyBool_FromLong(result);
}
@@ -1854,7 +1902,7 @@ set_remove(PySetObject *so, PyObject *key)
int rv;
rv = set_discard_key(so, key);
- if (rv == -1) {
+ if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
@@ -1863,7 +1911,7 @@ set_remove(PySetObject *so, PyObject *key)
return NULL;
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
- if (rv == -1)
+ if (rv < 0)
return NULL;
}
@@ -1886,7 +1934,7 @@ set_discard(PySetObject *so, PyObject *key)
int rv;
rv = set_discard_key(so, key);
- if (rv == -1) {
+ if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
@@ -1895,7 +1943,7 @@ set_discard(PySetObject *so, PyObject *key)
return NULL;
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
- if (rv == -1)
+ if (rv < 0)
return NULL;
}
Py_RETURN_NONE;
@@ -1949,13 +1997,12 @@ set_init(PySetObject *self, PyObject *args, PyObject *kwds)
{
PyObject *iterable = NULL;
- if (!PyAnySet_Check(self))
- return -1;
- if (PySet_Check(self) && !_PyArg_NoKeywords("set()", kwds))
+ if (kwds != NULL && !_PyArg_NoKeywords("set()", kwds))
return -1;
if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
return -1;
- set_clear_internal(self);
+ if (self->fill)
+ set_clear_internal(self);
self->hash = -1;
if (iterable == NULL)
return 0;
@@ -2122,7 +2169,7 @@ static PyMethodDef frozenset_methods[] = {
copy_doc},
{"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
difference_doc},
- {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
+ {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
intersection_doc},
{"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
isdisjoint_doc},
@@ -2193,7 +2240,7 @@ PyTypeObject PyFrozenSet_Type = {
(traverseproc)set_traverse, /* tp_traverse */
(inquiry)set_clear_internal, /* tp_clear */
(richcmpfunc)set_richcompare, /* tp_richcompare */
- offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
+ offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
(getiterfunc)set_iter, /* tp_iter */
0, /* tp_iternext */
frozenset_methods, /* tp_methods */
@@ -2277,6 +2324,18 @@ PySet_Add(PyObject *anyset, PyObject *key)
}
int
+PySet_ClearFreeList(void)
+{
+ return 0;
+}
+
+void
+PySet_Fini(void)
+{
+ Py_CLEAR(emptyfrozenset);
+}
+
+int
_PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
{
setentry *entry;
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index 0fc6b58..2846d7e 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -1,6 +1,8 @@
/* stringlib: codec implementations */
-#if STRINGLIB_IS_UNICODE
+#if !STRINGLIB_IS_UNICODE
+# error "codecs.h is specific to Unicode"
+#endif
/* Mask to quickly check whether a C 'long' contains a
non-ASCII, UTF8-encoded char. */
@@ -263,50 +265,34 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
Py_ssize_t i; /* index into s of next input byte */
- PyObject *result; /* result string object */
char *p; /* next free byte in output buffer */
- Py_ssize_t nallocated; /* number of result bytes allocated */
- Py_ssize_t nneeded; /* number of result bytes needed */
#if STRINGLIB_SIZEOF_CHAR > 1
- PyObject *errorHandler = NULL;
+ PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
PyObject *rep = NULL;
+ _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
#endif
#if STRINGLIB_SIZEOF_CHAR == 1
const Py_ssize_t max_char_size = 2;
- char stackbuf[MAX_SHORT_UNICHARS * 2];
#elif STRINGLIB_SIZEOF_CHAR == 2
const Py_ssize_t max_char_size = 3;
- char stackbuf[MAX_SHORT_UNICHARS * 3];
#else /* STRINGLIB_SIZEOF_CHAR == 4 */
const Py_ssize_t max_char_size = 4;
- char stackbuf[MAX_SHORT_UNICHARS * 4];
#endif
+ _PyBytesWriter writer;
assert(size >= 0);
+ _PyBytesWriter_Init(&writer);
- if (size <= MAX_SHORT_UNICHARS) {
- /* Write into the stack buffer; nallocated can't overflow.
- * At the end, we'll allocate exactly as much heap space as it
- * turns out we need.
- */
- nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int);
- result = NULL; /* will allocate after we're done */
- p = stackbuf;
- }
- else {
- if (size > PY_SSIZE_T_MAX / max_char_size) {
- /* integer overflow */
- return PyErr_NoMemory();
- }
- /* Overallocate on the heap, and give the excess back at the end. */
- nallocated = size * max_char_size;
- result = PyBytes_FromStringAndSize(NULL, nallocated);
- if (result == NULL)
- return NULL;
- p = PyBytes_AS_STRING(result);
+ if (size > PY_SSIZE_T_MAX / max_char_size) {
+ /* integer overflow */
+ return PyErr_NoMemory();
}
+ p = _PyBytesWriter_Alloc(&writer, size * max_char_size);
+ if (p == NULL)
+ return NULL;
+
for (i = 0; i < size;) {
Py_UCS4 ch = data[i++];
@@ -326,72 +312,118 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
}
#if STRINGLIB_SIZEOF_CHAR > 1
else if (Py_UNICODE_IS_SURROGATE(ch)) {
- Py_ssize_t newpos;
- Py_ssize_t repsize, k, startpos;
+ Py_ssize_t startpos, endpos, newpos;
+ Py_ssize_t k;
+ if (error_handler == _Py_ERROR_UNKNOWN)
+ error_handler = get_error_handler(errors);
+
startpos = i-1;
- rep = unicode_encode_call_errorhandler(
- errors, &errorHandler, "utf-8", "surrogates not allowed",
- unicode, &exc, startpos, startpos+1, &newpos);
- if (!rep)
- goto error;
-
- if (PyBytes_Check(rep))
- repsize = PyBytes_GET_SIZE(rep);
- else
- repsize = PyUnicode_GET_LENGTH(rep);
+ endpos = startpos+1;
+
+ while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos]))
+ endpos++;
+
+ /* Only overallocate the buffer if it's not the last write */
+ writer.overallocate = (endpos < size);
+
+ switch (error_handler)
+ {
+ case _Py_ERROR_REPLACE:
+ memset(p, '?', endpos - startpos);
+ p += (endpos - startpos);
+ /* fall through the ignore handler */
+ case _Py_ERROR_IGNORE:
+ i += (endpos - startpos - 1);
+ break;
- if (repsize > max_char_size) {
- Py_ssize_t offset;
+ case _Py_ERROR_SURROGATEPASS:
+ for (k=startpos; k<endpos; k++) {
+ ch = data[k];
+ *p++ = (char)(0xe0 | (ch >> 12));
+ *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+ *p++ = (char)(0x80 | (ch & 0x3f));
+ }
+ i += (endpos - startpos - 1);
+ break;
- if (result == NULL)
- offset = p - stackbuf;
- else
- offset = p - PyBytes_AS_STRING(result);
+ case _Py_ERROR_BACKSLASHREPLACE:
+ /* substract preallocated bytes */
+ writer.min_size -= max_char_size * (endpos - startpos);
+ p = backslashreplace(&writer, p,
+ unicode, startpos, endpos);
+ if (p == NULL)
+ goto error;
+ i += (endpos - startpos - 1);
+ break;
- if (nallocated > PY_SSIZE_T_MAX - repsize + max_char_size) {
- /* integer overflow */
- PyErr_NoMemory();
+ case _Py_ERROR_XMLCHARREFREPLACE:
+ /* substract preallocated bytes */
+ writer.min_size -= max_char_size * (endpos - startpos);
+ p = xmlcharrefreplace(&writer, p,
+ unicode, startpos, endpos);
+ if (p == NULL)
goto error;
+ i += (endpos - startpos - 1);
+ break;
+
+ case _Py_ERROR_SURROGATEESCAPE:
+ for (k=startpos; k<endpos; k++) {
+ ch = data[k];
+ if (!(0xDC80 <= ch && ch <= 0xDCFF))
+ break;
+ *p++ = (char)(ch & 0xff);
}
- nallocated += repsize - max_char_size;
- if (result != NULL) {
- if (_PyBytes_Resize(&result, nallocated) < 0)
- goto error;
- } else {
- result = PyBytes_FromStringAndSize(NULL, nallocated);
- if (result == NULL)
- goto error;
- Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset);
+ if (k >= endpos) {
+ i += (endpos - startpos - 1);
+ break;
}
- p = PyBytes_AS_STRING(result) + offset;
- }
+ startpos = k;
+ assert(startpos < endpos);
+ /* fall through the default handler */
+ default:
+ rep = unicode_encode_call_errorhandler(
+ errors, &error_handler_obj, "utf-8", "surrogates not allowed",
+ unicode, &exc, startpos, endpos, &newpos);
+ if (!rep)
+ goto error;
- if (PyBytes_Check(rep)) {
- char *prep = PyBytes_AS_STRING(rep);
- for(k = repsize; k > 0; k--)
- *p++ = *prep++;
- } else /* rep is unicode */ {
- enum PyUnicode_Kind repkind;
- void *repdata;
+ /* substract preallocated bytes */
+ writer.min_size -= max_char_size;
- if (PyUnicode_READY(rep) < 0)
- goto error;
- repkind = PyUnicode_KIND(rep);
- repdata = PyUnicode_DATA(rep);
+ if (PyBytes_Check(rep)) {
+ p = _PyBytesWriter_WriteBytes(&writer, p,
+ PyBytes_AS_STRING(rep),
+ PyBytes_GET_SIZE(rep));
+ }
+ else {
+ /* rep is unicode */
+ if (PyUnicode_READY(rep) < 0)
+ goto error;
- for(k=0; k<repsize; k++) {
- Py_UCS4 c = PyUnicode_READ(repkind, repdata, k);
- if (0x80 <= c) {
+ if (!PyUnicode_IS_ASCII(rep)) {
raise_encode_exception(&exc, "utf-8",
unicode,
i-1, i,
"surrogates not allowed");
goto error;
}
- *p++ = (char)c;
+
+ assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
+ p = _PyBytesWriter_WriteBytes(&writer, p,
+ PyUnicode_DATA(rep),
+ PyUnicode_GET_LENGTH(rep));
}
+
+ if (p == NULL)
+ goto error;
+ Py_CLEAR(rep);
+
+ i = newpos;
}
- Py_CLEAR(rep);
+
+ /* If overallocation was disabled, ensure that it was the last
+ write. Otherwise, we missed an optimization */
+ assert(writer.overallocate || i == size);
}
else
#if STRINGLIB_SIZEOF_CHAR > 2
@@ -416,31 +448,18 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
#endif /* STRINGLIB_SIZEOF_CHAR > 1 */
}
- if (result == NULL) {
- /* This was stack allocated. */
- nneeded = p - stackbuf;
- assert(nneeded <= nallocated);
- result = PyBytes_FromStringAndSize(stackbuf, nneeded);
- }
- else {
- /* Cut back to size actually needed. */
- nneeded = p - PyBytes_AS_STRING(result);
- assert(nneeded <= nallocated);
- _PyBytes_Resize(&result, nneeded);
- }
-
#if STRINGLIB_SIZEOF_CHAR > 1
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
#endif
- return result;
+ return _PyBytesWriter_Finish(&writer, p);
#if STRINGLIB_SIZEOF_CHAR > 1
error:
Py_XDECREF(rep);
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
- Py_XDECREF(result);
+ _PyBytesWriter_Dealloc(&writer);
return NULL;
#endif
@@ -806,5 +825,3 @@ STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
#undef SWAB4
#endif
-
-#endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h
index 739cf3d..f054625 100644
--- a/Objects/stringlib/ctype.h
+++ b/Objects/stringlib/ctype.h
@@ -1,5 +1,6 @@
-/* NOTE: this API is -ONLY- for use with single byte character strings. */
-/* Do not use it with Unicode. */
+#if STRINGLIB_IS_UNICODE
+# error "ctype.h only compatible with byte-wise strings"
+#endif
#include "bytes_methods.h"
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index cda68e7..98165ad 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -32,52 +32,98 @@
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
-
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
- STRINGLIB_CHAR ch, unsigned char needle,
- int mode)
+STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{
- if (mode == FAST_SEARCH) {
- const STRINGLIB_CHAR *ptr = s;
- const STRINGLIB_CHAR *e = s + n;
- while (ptr < e) {
- void *candidate = memchr((const void *) ptr, needle, (e - ptr) * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- ptr = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- if (sizeof(STRINGLIB_CHAR) == 1 || *ptr == ch)
- return (ptr - s);
- /* False positive */
- ptr++;
- }
+ const STRINGLIB_CHAR *p, *e;
+
+ p = s;
+ e = s + n;
+ if (n > 10) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+ p = memchr(s, ch, n);
+ if (p != NULL)
+ return (p - s);
return -1;
+#else
+ /* use memchr if we can choose a needle without two many likely
+ false positives */
+ unsigned char needle = ch & 0xff;
+ /* If looking for a multiple of 256, we'd have too
+ many false positives looking for the '\0' byte in UCS2
+ and UCS4 representations. */
+ if (needle != 0) {
+ while (p < e) {
+ void *candidate = memchr(p, needle,
+ (e - p) * sizeof(STRINGLIB_CHAR));
+ if (candidate == NULL)
+ return -1;
+ p = (const STRINGLIB_CHAR *)
+ _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+ if (*p == ch)
+ return (p - s);
+ /* False positive */
+ p++;
+ }
+ return -1;
+ }
+#endif
}
+ while (p < e) {
+ if (*p == ch)
+ return (p - s);
+ p++;
+ }
+ return -1;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
+{
+ const STRINGLIB_CHAR *p;
#ifdef HAVE_MEMRCHR
/* memrchr() is a GNU extension, available since glibc 2.1.91.
it doesn't seem as optimized as memchr(), but is still quite
- faster than our hand-written loop in FASTSEARCH below */
- else if (mode == FAST_RSEARCH) {
- while (n > 0) {
- const STRINGLIB_CHAR *found;
- void *candidate = memrchr((const void *) s, needle, n * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- found = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- n = found - s;
- if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch)
- return n;
- /* False positive */
- }
+ faster than our hand-written loop below */
+
+ if (n > 10) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+ p = memrchr(s, ch, n);
+ if (p != NULL)
+ return (p - s);
return -1;
- }
+#else
+ /* use memrchr if we can choose a needle without two many likely
+ false positives */
+ unsigned char needle = ch & 0xff;
+ /* If looking for a multiple of 256, we'd have too
+ many false positives looking for the '\0' byte in UCS2
+ and UCS4 representations. */
+ if (needle != 0) {
+ while (n > 0) {
+ void *candidate = memrchr(s, needle,
+ n * sizeof(STRINGLIB_CHAR));
+ if (candidate == NULL)
+ return -1;
+ p = (const STRINGLIB_CHAR *)
+ _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+ n = p - s;
+ if (*p == ch)
+ return n;
+ /* False positive */
+ }
+ return -1;
+ }
#endif
- else {
- assert(0); /* Should never get here */
- return 0;
}
-
-#undef DO_MEMCHR
+#endif /* HAVE_MEMRCHR */
+ p = s + n;
+ while (p > s) {
+ p--;
+ if (*p == ch)
+ return (p - s);
+ }
+ return -1;
}
Py_LOCAL_INLINE(Py_ssize_t)
@@ -99,25 +145,11 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (m <= 0)
return -1;
/* use special case for 1-character strings */
- if (n > 10 && (mode == FAST_SEARCH
-#ifdef HAVE_MEMRCHR
- || mode == FAST_RSEARCH
-#endif
- )) {
- /* use memchr if we can choose a needle without two many likely
- false positives */
- unsigned char needle;
- needle = p[0] & 0xff;
-#if STRINGLIB_SIZEOF_CHAR > 1
- /* If looking for a multiple of 256, we'd have too
- many false positives looking for the '\0' byte in UCS2
- and UCS4 representations. */
- if (needle != 0)
-#endif
- return STRINGLIB(fastsearch_memchr_1char)
- (s, n, p[0], needle, mode);
- }
- if (mode == FAST_COUNT) {
+ if (mode == FAST_SEARCH)
+ return STRINGLIB(find_char)(s, n, p[0]);
+ else if (mode == FAST_RSEARCH)
+ return STRINGLIB(rfind_char)(s, n, p[0]);
+ else { /* FAST_COUNT */
for (i = 0; i < n; i++)
if (s[i] == p[0]) {
count++;
@@ -125,14 +157,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
return maxcount;
}
return count;
- } else if (mode == FAST_SEARCH) {
- for (i = 0; i < n; i++)
- if (s[i] == p[0])
- return i;
- } else { /* FAST_RSEARCH */
- for (i = n - 1; i > -1; i--)
- if (s[i] == p[0])
- return i;
}
return -1;
}
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index 14815f6..509b929 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -117,85 +117,3 @@ STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
}
#undef FORMAT_BUFFER_SIZE
-
-#if STRINGLIB_IS_UNICODE
-
-/*
-Wraps stringlib_parse_args_finds() and additionally ensures that the
-first argument is a unicode object.
-
-Note that we receive a pointer to the pointer of the substring object,
-so when we create that object in this function we don't DECREF it,
-because it continues living in the caller functions (those functions,
-after finishing using the substring, must DECREF it).
-*/
-
-Py_LOCAL_INLINE(int)
-STRINGLIB(parse_args_finds_unicode)(const char * function_name, PyObject *args,
- PyObject **substring,
- Py_ssize_t *start, Py_ssize_t *end)
-{
- PyObject *tmp_substring;
-
- if(STRINGLIB(parse_args_finds)(function_name, args, &tmp_substring,
- start, end)) {
- tmp_substring = PyUnicode_FromObject(tmp_substring);
- if (!tmp_substring)
- return 0;
- *substring = tmp_substring;
- return 1;
- }
- return 0;
-}
-
-#else /* !STRINGLIB_IS_UNICODE */
-
-/*
-Wraps stringlib_parse_args_finds() and additionally checks whether the
-first argument is an integer in range(0, 256).
-
-If this is the case, writes the integer value to the byte parameter
-and sets subobj to NULL. Otherwise, sets the first argument to subobj
-and doesn't touch byte. The other parameters are similar to those of
-stringlib_parse_args_finds().
-*/
-
-Py_LOCAL_INLINE(int)
-STRINGLIB(parse_args_finds_byte)(const char *function_name, PyObject *args,
- PyObject **subobj, char *byte,
- Py_ssize_t *start, Py_ssize_t *end)
-{
- PyObject *tmp_subobj;
- Py_ssize_t ival;
- PyObject *err;
-
- if(!STRINGLIB(parse_args_finds)(function_name, args, &tmp_subobj,
- start, end))
- return 0;
-
- if (!PyNumber_Check(tmp_subobj)) {
- *subobj = tmp_subobj;
- return 1;
- }
-
- ival = PyNumber_AsSsize_t(tmp_subobj, PyExc_OverflowError);
- if (ival == -1) {
- err = PyErr_Occurred();
- if (err && !PyErr_GivenExceptionMatches(err, PyExc_OverflowError)) {
- PyErr_Clear();
- *subobj = tmp_subobj;
- return 1;
- }
- }
-
- if (ival < 0 || ival > 255) {
- PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
- return 0;
- }
-
- *subobj = NULL;
- *byte = (char)ival;
- return 1;
-}
-
-#endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h
index eb3fe88..8ccbc30 100644
--- a/Objects/stringlib/find_max_char.h
+++ b/Objects/stringlib/find_max_char.h
@@ -1,6 +1,8 @@
/* Finding the optimal width of unicode characters in a buffer */
-#if STRINGLIB_IS_UNICODE
+#if !STRINGLIB_IS_UNICODE
+# error "find_max_char.h is specific to Unicode"
+#endif
/* Mask to quickly check whether a C 'long' contains a
non-ASCII, UTF8-encoded char. */
@@ -129,5 +131,4 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
#undef MAX_CHAR_UCS4
#endif /* STRINGLIB_SIZEOF_CHAR == 1 */
-#endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h
index cbf81be..90f966d 100644
--- a/Objects/stringlib/join.h
+++ b/Objects/stringlib/join.h
@@ -1,6 +1,6 @@
/* stringlib: bytes joining implementation */
-#if STRINGLIB_SIZEOF_CHAR != 1
+#if STRINGLIB_IS_UNICODE
#error join.h only compatible with byte-wise strings
#endif
diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h
index 6e2f073..df501ed 100644
--- a/Objects/stringlib/localeutil.h
+++ b/Objects/stringlib/localeutil.h
@@ -2,8 +2,8 @@
#include <locale.h>
-#ifndef STRINGLIB_IS_UNICODE
-# error "localeutil is specific to Unicode"
+#if !STRINGLIB_IS_UNICODE
+# error "localeutil.h is specific to Unicode"
#endif
typedef struct {
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
index b559b53..625507d 100644
--- a/Objects/stringlib/transmogrify.h
+++ b/Objects/stringlib/transmogrify.h
@@ -1,14 +1,21 @@
-/* NOTE: this API is -ONLY- for use with single byte character strings. */
-/* Do not use it with Unicode. */
+#if STRINGLIB_IS_UNICODE
+# error "transmogrify.h only compatible with byte-wise strings"
+#endif
/* the more complicated methods. parts of these should be pulled out into the
shared code in bytes_methods.c to cut down on duplicate code bloat. */
-PyDoc_STRVAR(expandtabs__doc__,
-"B.expandtabs(tabsize=8) -> copy of B\n\
-\n\
-Return a copy of B where all tab characters are expanded using spaces.\n\
-If tabsize is not given, a tab size of 8 characters is assumed.");
+Py_LOCAL_INLINE(PyObject *)
+return_self(PyObject *self)
+{
+#if !STRINGLIB_MUTABLE
+ if (STRINGLIB_CHECK_EXACT(self)) {
+ Py_INCREF(self);
+ return self;
+ }
+#endif
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
static PyObject*
stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
@@ -93,39 +100,25 @@ pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
if (right < 0)
right = 0;
- if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) {
-#if STRINGLIB_MUTABLE
- /* We're defined as returning a copy; If the object is mutable
- * that means we must make an identical copy. */
- return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
-#else
- Py_INCREF(self);
- return (PyObject *)self;
-#endif /* STRINGLIB_MUTABLE */
+ if (left == 0 && right == 0) {
+ return return_self(self);
}
- u = STRINGLIB_NEW(NULL,
- left + STRINGLIB_LEN(self) + right);
+ u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
if (u) {
if (left)
memset(STRINGLIB_STR(u), fill, left);
Py_MEMCPY(STRINGLIB_STR(u) + left,
- STRINGLIB_STR(self),
- STRINGLIB_LEN(self));
+ STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
if (right)
memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
- fill, right);
+ fill, right);
}
return u;
}
-PyDoc_STRVAR(ljust__doc__,
-"B.ljust(width[, fillchar]) -> copy of B\n"
-"\n"
-"Return B left justified in a string of length width. Padding is\n"
-"done using the specified fill character (default is a space).");
-
static PyObject *
stringlib_ljust(PyObject *self, PyObject *args)
{
@@ -135,27 +128,14 @@ stringlib_ljust(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
return NULL;
- if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
-#if STRINGLIB_MUTABLE
- /* We're defined as returning a copy; If the object is mutable
- * that means we must make an identical copy. */
- return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
-#else
- Py_INCREF(self);
- return (PyObject*) self;
-#endif
+ if (STRINGLIB_LEN(self) >= width) {
+ return return_self(self);
}
return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
}
-PyDoc_STRVAR(rjust__doc__,
-"B.rjust(width[, fillchar]) -> copy of B\n"
-"\n"
-"Return B right justified in a string of length width. Padding is\n"
-"done using the specified fill character (default is a space)");
-
static PyObject *
stringlib_rjust(PyObject *self, PyObject *args)
{
@@ -165,27 +145,14 @@ stringlib_rjust(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
return NULL;
- if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
-#if STRINGLIB_MUTABLE
- /* We're defined as returning a copy; If the object is mutable
- * that means we must make an identical copy. */
- return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
-#else
- Py_INCREF(self);
- return (PyObject*) self;
-#endif
+ if (STRINGLIB_LEN(self) >= width) {
+ return return_self(self);
}
return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
}
-PyDoc_STRVAR(center__doc__,
-"B.center(width[, fillchar]) -> copy of B\n"
-"\n"
-"Return B centered in a string of length width. Padding is\n"
-"done using the specified fill character (default is a space).");
-
static PyObject *
stringlib_center(PyObject *self, PyObject *args)
{
@@ -196,15 +163,8 @@ stringlib_center(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
return NULL;
- if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
-#if STRINGLIB_MUTABLE
- /* We're defined as returning a copy; If the object is mutable
- * that means we must make an identical copy. */
- return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
-#else
- Py_INCREF(self);
- return (PyObject*) self;
-#endif
+ if (STRINGLIB_LEN(self) >= width) {
+ return return_self(self);
}
marg = width - STRINGLIB_LEN(self);
@@ -213,12 +173,6 @@ stringlib_center(PyObject *self, PyObject *args)
return pad(self, left, marg - left, fillchar);
}
-PyDoc_STRVAR(zfill__doc__,
-"B.zfill(width) -> copy of B\n"
-"\n"
-"Pad a numeric string B with zeros on the left, to fill a field\n"
-"of the specified width. B is never truncated.");
-
static PyObject *
stringlib_zfill(PyObject *self, PyObject *args)
{
@@ -231,21 +185,7 @@ stringlib_zfill(PyObject *self, PyObject *args)
return NULL;
if (STRINGLIB_LEN(self) >= width) {
- if (STRINGLIB_CHECK_EXACT(self)) {
-#if STRINGLIB_MUTABLE
- /* We're defined as returning a copy; If the object is mutable
- * that means we must make an identical copy. */
- return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
-#else
- Py_INCREF(self);
- return (PyObject*) self;
-#endif
- }
- else
- return STRINGLIB_NEW(
- STRINGLIB_STR(self),
- STRINGLIB_LEN(self)
- );
+ return return_self(self);
}
fill = width - STRINGLIB_LEN(self);
@@ -262,5 +202,500 @@ stringlib_zfill(PyObject *self, PyObject *args)
p[fill] = '0';
}
- return (PyObject*) s;
+ return s;
+}
+
+
+/* find and count characters and substrings */
+
+#define findchar(target, target_len, c) \
+ ((char *)memchr((const void *)(target), c, target_len))
+
+
+Py_LOCAL_INLINE(Py_ssize_t)
+countchar(const char *target, Py_ssize_t target_len, char c,
+ Py_ssize_t maxcount)
+{
+ Py_ssize_t count = 0;
+ const char *start = target;
+ const char *end = target + target_len;
+
+ while ((start = findchar(start, end - start, c)) != NULL) {
+ count++;
+ if (count >= maxcount)
+ break;
+ start += 1;
+ }
+ return count;
+}
+
+
+/* Algorithms for different cases of string replacement */
+
+/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
+Py_LOCAL(PyObject *)
+stringlib_replace_interleave(PyObject *self,
+ const char *to_s, Py_ssize_t to_len,
+ Py_ssize_t maxcount)
+{
+ const char *self_s;
+ char *result_s;
+ Py_ssize_t self_len, result_len;
+ Py_ssize_t count, i;
+ PyObject *result;
+
+ self_len = STRINGLIB_LEN(self);
+
+ /* 1 at the end plus 1 after every character;
+ count = min(maxcount, self_len + 1) */
+ if (maxcount <= self_len) {
+ count = maxcount;
+ }
+ else {
+ /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
+ count = self_len + 1;
+ }
+
+ /* Check for overflow */
+ /* result_len = count * to_len + self_len; */
+ assert(count > 0);
+ if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
+ PyErr_SetString(PyExc_OverflowError,
+ "replace bytes are too long");
+ return NULL;
+ }
+ result_len = count * to_len + self_len;
+ result = STRINGLIB_NEW(NULL, result_len);
+ if (result == NULL) {
+ return NULL;
+ }
+
+ self_s = STRINGLIB_STR(self);
+ result_s = STRINGLIB_STR(result);
+
+ if (to_len > 1) {
+ /* Lay the first one down (guaranteed this will occur) */
+ Py_MEMCPY(result_s, to_s, to_len);
+ result_s += to_len;
+ count -= 1;
+
+ for (i = 0; i < count; i++) {
+ *result_s++ = *self_s++;
+ Py_MEMCPY(result_s, to_s, to_len);
+ result_s += to_len;
+ }
+ }
+ else {
+ result_s[0] = to_s[0];
+ result_s += to_len;
+ count -= 1;
+ for (i = 0; i < count; i++) {
+ *result_s++ = *self_s++;
+ result_s[0] = to_s[0];
+ result_s += to_len;
+ }
+ }
+
+ /* Copy the rest of the original string */
+ Py_MEMCPY(result_s, self_s, self_len - i);
+
+ return result;
}
+
+/* Special case for deleting a single character */
+/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
+Py_LOCAL(PyObject *)
+stringlib_replace_delete_single_character(PyObject *self,
+ char from_c, Py_ssize_t maxcount)
+{
+ const char *self_s, *start, *next, *end;
+ char *result_s;
+ Py_ssize_t self_len, result_len;
+ Py_ssize_t count;
+ PyObject *result;
+
+ self_len = STRINGLIB_LEN(self);
+ self_s = STRINGLIB_STR(self);
+
+ count = countchar(self_s, self_len, from_c, maxcount);
+ if (count == 0) {
+ return return_self(self);
+ }
+
+ result_len = self_len - count; /* from_len == 1 */
+ assert(result_len>=0);
+
+ result = STRINGLIB_NEW(NULL, result_len);
+ if (result == NULL) {
+ return NULL;
+ }
+ result_s = STRINGLIB_STR(result);
+
+ start = self_s;
+ end = self_s + self_len;
+ while (count-- > 0) {
+ next = findchar(start, end - start, from_c);
+ if (next == NULL)
+ break;
+ Py_MEMCPY(result_s, start, next - start);
+ result_s += (next - start);
+ start = next + 1;
+ }
+ Py_MEMCPY(result_s, start, end - start);
+
+ return result;
+}
+
+/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
+
+Py_LOCAL(PyObject *)
+stringlib_replace_delete_substring(PyObject *self,
+ const char *from_s, Py_ssize_t from_len,
+ Py_ssize_t maxcount)
+{
+ const char *self_s, *start, *next, *end;
+ char *result_s;
+ Py_ssize_t self_len, result_len;
+ Py_ssize_t count, offset;
+ PyObject *result;
+
+ self_len = STRINGLIB_LEN(self);
+ self_s = STRINGLIB_STR(self);
+
+ count = stringlib_count(self_s, self_len,
+ from_s, from_len,
+ maxcount);
+
+ if (count == 0) {
+ /* no matches */
+ return return_self(self);
+ }
+
+ result_len = self_len - (count * from_len);
+ assert (result_len>=0);
+
+ result = STRINGLIB_NEW(NULL, result_len);
+ if (result == NULL) {
+ return NULL;
+ }
+ result_s = STRINGLIB_STR(result);
+
+ start = self_s;
+ end = self_s + self_len;
+ while (count-- > 0) {
+ offset = stringlib_find(start, end - start,
+ from_s, from_len,
+ 0);
+ if (offset == -1)
+ break;
+ next = start + offset;
+
+ Py_MEMCPY(result_s, start, next - start);
+
+ result_s += (next - start);
+ start = next + from_len;
+ }
+ Py_MEMCPY(result_s, start, end - start);
+ return result;
+}
+
+/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
+Py_LOCAL(PyObject *)
+stringlib_replace_single_character_in_place(PyObject *self,
+ char from_c, char to_c,
+ Py_ssize_t maxcount)
+{
+ const char *self_s, *end;
+ char *result_s, *start, *next;
+ Py_ssize_t self_len;
+ PyObject *result;
+
+ /* The result string will be the same size */
+ self_s = STRINGLIB_STR(self);
+ self_len = STRINGLIB_LEN(self);
+
+ next = findchar(self_s, self_len, from_c);
+
+ if (next == NULL) {
+ /* No matches; return the original bytes */
+ return return_self(self);
+ }
+
+ /* Need to make a new bytes */
+ result = STRINGLIB_NEW(NULL, self_len);
+ if (result == NULL) {
+ return NULL;
+ }
+ result_s = STRINGLIB_STR(result);
+ Py_MEMCPY(result_s, self_s, self_len);
+
+ /* change everything in-place, starting with this one */
+ start = result_s + (next - self_s);
+ *start = to_c;
+ start++;
+ end = result_s + self_len;
+
+ while (--maxcount > 0) {
+ next = findchar(start, end - start, from_c);
+ if (next == NULL)
+ break;
+ *next = to_c;
+ start = next + 1;
+ }
+
+ return result;
+}
+
+/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
+Py_LOCAL(PyObject *)
+stringlib_replace_substring_in_place(PyObject *self,
+ const char *from_s, Py_ssize_t from_len,
+ const char *to_s, Py_ssize_t to_len,
+ Py_ssize_t maxcount)
+{
+ const char *self_s, *end;
+ char *result_s, *start;
+ Py_ssize_t self_len, offset;
+ PyObject *result;
+
+ /* The result bytes will be the same size */
+
+ self_s = STRINGLIB_STR(self);
+ self_len = STRINGLIB_LEN(self);
+
+ offset = stringlib_find(self_s, self_len,
+ from_s, from_len,
+ 0);
+ if (offset == -1) {
+ /* No matches; return the original bytes */
+ return return_self(self);
+ }
+
+ /* Need to make a new bytes */
+ result = STRINGLIB_NEW(NULL, self_len);
+ if (result == NULL) {
+ return NULL;
+ }
+ result_s = STRINGLIB_STR(result);
+ Py_MEMCPY(result_s, self_s, self_len);
+
+ /* change everything in-place, starting with this one */
+ start = result_s + offset;
+ Py_MEMCPY(start, to_s, from_len);
+ start += from_len;
+ end = result_s + self_len;
+
+ while ( --maxcount > 0) {
+ offset = stringlib_find(start, end - start,
+ from_s, from_len,
+ 0);
+ if (offset == -1)
+ break;
+ Py_MEMCPY(start + offset, to_s, from_len);
+ start += offset + from_len;
+ }
+
+ return result;
+}
+
+/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
+Py_LOCAL(PyObject *)
+stringlib_replace_single_character(PyObject *self,
+ char from_c,
+ const char *to_s, Py_ssize_t to_len,
+ Py_ssize_t maxcount)
+{
+ const char *self_s, *start, *next, *end;
+ char *result_s;
+ Py_ssize_t self_len, result_len;
+ Py_ssize_t count;
+ PyObject *result;
+
+ self_s = STRINGLIB_STR(self);
+ self_len = STRINGLIB_LEN(self);
+
+ count = countchar(self_s, self_len, from_c, maxcount);
+ if (count == 0) {
+ /* no matches, return unchanged */
+ return return_self(self);
+ }
+
+ /* use the difference between current and new, hence the "-1" */
+ /* result_len = self_len + count * (to_len-1) */
+ assert(count > 0);
+ if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
+ PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
+ return NULL;
+ }
+ result_len = self_len + count * (to_len - 1);
+
+ result = STRINGLIB_NEW(NULL, result_len);
+ if (result == NULL) {
+ return NULL;
+ }
+ result_s = STRINGLIB_STR(result);
+
+ start = self_s;
+ end = self_s + self_len;
+ while (count-- > 0) {
+ next = findchar(start, end - start, from_c);
+ if (next == NULL)
+ break;
+
+ if (next == start) {
+ /* replace with the 'to' */
+ Py_MEMCPY(result_s, to_s, to_len);
+ result_s += to_len;
+ start += 1;
+ } else {
+ /* copy the unchanged old then the 'to' */
+ Py_MEMCPY(result_s, start, next - start);
+ result_s += (next - start);
+ Py_MEMCPY(result_s, to_s, to_len);
+ result_s += to_len;
+ start = next + 1;
+ }
+ }
+ /* Copy the remainder of the remaining bytes */
+ Py_MEMCPY(result_s, start, end - start);
+
+ return result;
+}
+
+/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
+Py_LOCAL(PyObject *)
+stringlib_replace_substring(PyObject *self,
+ const char *from_s, Py_ssize_t from_len,
+ const char *to_s, Py_ssize_t to_len,
+ Py_ssize_t maxcount)
+{
+ const char *self_s, *start, *next, *end;
+ char *result_s;
+ Py_ssize_t self_len, result_len;
+ Py_ssize_t count, offset;
+ PyObject *result;
+
+ self_s = STRINGLIB_STR(self);
+ self_len = STRINGLIB_LEN(self);
+
+ count = stringlib_count(self_s, self_len,
+ from_s, from_len,
+ maxcount);
+
+ if (count == 0) {
+ /* no matches, return unchanged */
+ return return_self(self);
+ }
+
+ /* Check for overflow */
+ /* result_len = self_len + count * (to_len-from_len) */
+ assert(count > 0);
+ if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
+ PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
+ return NULL;
+ }
+ result_len = self_len + count * (to_len - from_len);
+
+ result = STRINGLIB_NEW(NULL, result_len);
+ if (result == NULL) {
+ return NULL;
+ }
+ result_s = STRINGLIB_STR(result);
+
+ start = self_s;
+ end = self_s + self_len;
+ while (count-- > 0) {
+ offset = stringlib_find(start, end - start,
+ from_s, from_len,
+ 0);
+ if (offset == -1)
+ break;
+ next = start + offset;
+ if (next == start) {
+ /* replace with the 'to' */
+ Py_MEMCPY(result_s, to_s, to_len);
+ result_s += to_len;
+ start += from_len;
+ } else {
+ /* copy the unchanged old then the 'to' */
+ Py_MEMCPY(result_s, start, next - start);
+ result_s += (next - start);
+ Py_MEMCPY(result_s, to_s, to_len);
+ result_s += to_len;
+ start = next + from_len;
+ }
+ }
+ /* Copy the remainder of the remaining bytes */
+ Py_MEMCPY(result_s, start, end - start);
+
+ return result;
+}
+
+
+Py_LOCAL(PyObject *)
+stringlib_replace(PyObject *self,
+ const char *from_s, Py_ssize_t from_len,
+ const char *to_s, Py_ssize_t to_len,
+ Py_ssize_t maxcount)
+{
+ if (maxcount < 0) {
+ maxcount = PY_SSIZE_T_MAX;
+ } else if (maxcount == 0 || STRINGLIB_LEN(self) == 0) {
+ /* nothing to do; return the original bytes */
+ return return_self(self);
+ }
+
+ /* Handle zero-length special cases */
+ if (from_len == 0) {
+ if (to_len == 0) {
+ /* nothing to do; return the original bytes */
+ return return_self(self);
+ }
+ /* insert the 'to' bytes everywhere. */
+ /* >>> b"Python".replace(b"", b".") */
+ /* b'.P.y.t.h.o.n.' */
+ return stringlib_replace_interleave(self, to_s, to_len, maxcount);
+ }
+
+ /* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */
+ /* point for an empty self bytes to generate a non-empty bytes */
+ /* Special case so the remaining code always gets a non-empty bytes */
+ if (STRINGLIB_LEN(self) == 0) {
+ return return_self(self);
+ }
+
+ if (to_len == 0) {
+ /* delete all occurrences of 'from' bytes */
+ if (from_len == 1) {
+ return stringlib_replace_delete_single_character(
+ self, from_s[0], maxcount);
+ } else {
+ return stringlib_replace_delete_substring(
+ self, from_s, from_len, maxcount);
+ }
+ }
+
+ /* Handle special case where both bytes have the same length */
+
+ if (from_len == to_len) {
+ if (from_len == 1) {
+ return stringlib_replace_single_character_in_place(
+ self, from_s[0], to_s[0], maxcount);
+ } else {
+ return stringlib_replace_substring_in_place(
+ self, from_s, from_len, to_s, to_len, maxcount);
+ }
+ }
+
+ /* Otherwise use the more generic algorithms */
+ if (from_len == 1) {
+ return stringlib_replace_single_character(
+ self, from_s[0], to_s, to_len, maxcount);
+ } else {
+ /* len('from')>=2, len('to')>=1 */
+ return stringlib_replace_substring(
+ self, from_s, from_len, to_s, to_len, maxcount);
+ }
+}
+
+#undef findchar
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
index be09b5f..14fa28e 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -67,7 +67,7 @@ SubString_new_object(SubString *str)
return PyUnicode_Substring(str->str, str->start, str->end);
}
-/* return a new string. if str->str is NULL, return None */
+/* return a new string. if str->str is NULL, return a new empty string */
Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString *str)
{
diff --git a/Objects/structseq.c b/Objects/structseq.c
index 664344b..e315cba 100644
--- a/Objects/structseq.c
+++ b/Objects/structseq.c
@@ -4,9 +4,9 @@
#include "Python.h"
#include "structmember.h"
-static char visible_length_key[] = "n_sequence_fields";
-static char real_length_key[] = "n_fields";
-static char unnamed_fields_key[] = "n_unnamed_fields";
+static const char visible_length_key[] = "n_sequence_fields";
+static const char real_length_key[] = "n_fields";
+static const char unnamed_fields_key[] = "n_unnamed_fields";
/* Fields with this name have only a field index, not a field name.
They are only allowed for indices < n_visible_fields. */
@@ -16,14 +16,14 @@ _Py_IDENTIFIER(n_fields);
_Py_IDENTIFIER(n_unnamed_fields);
#define VISIBLE_SIZE(op) Py_SIZE(op)
-#define VISIBLE_SIZE_TP(tp) PyLong_AsLong( \
+#define VISIBLE_SIZE_TP(tp) PyLong_AsSsize_t( \
_PyDict_GetItemId((tp)->tp_dict, &PyId_n_sequence_fields))
-#define REAL_SIZE_TP(tp) PyLong_AsLong( \
+#define REAL_SIZE_TP(tp) PyLong_AsSsize_t( \
_PyDict_GetItemId((tp)->tp_dict, &PyId_n_fields))
#define REAL_SIZE(op) REAL_SIZE_TP(Py_TYPE(op))
-#define UNNAMED_FIELDS_TP(tp) PyLong_AsLong( \
+#define UNNAMED_FIELDS_TP(tp) PyLong_AsSsize_t( \
_PyDict_GetItemId((tp)->tp_dict, &PyId_n_unnamed_fields))
#define UNNAMED_FIELDS(op) UNNAMED_FIELDS_TP(Py_TYPE(op))
@@ -164,7 +164,8 @@ structseq_repr(PyStructSequence *obj)
#define TYPE_MAXSIZE 100
PyTypeObject *typ = Py_TYPE(obj);
- int i, removelast = 0;
+ Py_ssize_t i;
+ int removelast = 0;
Py_ssize_t len;
char buf[REPR_BUFFER_SIZE];
char *endofbuf, *pbuf = buf;
@@ -236,8 +237,7 @@ structseq_reduce(PyStructSequence* self)
PyObject* tup = NULL;
PyObject* dict = NULL;
PyObject* result;
- Py_ssize_t n_fields, n_visible_fields, n_unnamed_fields;
- int i;
+ Py_ssize_t n_fields, n_visible_fields, n_unnamed_fields, i;
n_fields = REAL_SIZE(self);
n_visible_fields = VISIBLE_SIZE(self);
@@ -325,7 +325,7 @@ PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc)
{
PyObject *dict;
PyMemberDef* members;
- int n_members, n_unnamed_members, i, k;
+ Py_ssize_t n_members, n_unnamed_members, i, k;
PyObject *v;
#ifdef Py_TRACE_REFS
@@ -373,9 +373,9 @@ PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc)
Py_INCREF(type);
dict = type->tp_dict;
-#define SET_DICT_FROM_INT(key, value) \
+#define SET_DICT_FROM_SIZE(key, value) \
do { \
- v = PyLong_FromLong((long) value); \
+ v = PyLong_FromSsize_t(value); \
if (v == NULL) \
return -1; \
if (PyDict_SetItemString(dict, key, v) < 0) { \
@@ -385,9 +385,9 @@ PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc)
Py_DECREF(v); \
} while (0)
- SET_DICT_FROM_INT(visible_length_key, desc->n_in_sequence);
- SET_DICT_FROM_INT(real_length_key, n_members);
- SET_DICT_FROM_INT(unnamed_fields_key, n_unnamed_members);
+ SET_DICT_FROM_SIZE(visible_length_key, desc->n_in_sequence);
+ SET_DICT_FROM_SIZE(real_length_key, n_members);
+ SET_DICT_FROM_SIZE(unnamed_fields_key, n_unnamed_members);
return 0;
}
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 7920fec..a7774e2 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -149,7 +149,6 @@ PyTuple_GetItem(PyObject *op, Py_ssize_t i)
int
PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem)
{
- PyObject *olditem;
PyObject **p;
if (!PyTuple_Check(op) || op->ob_refcnt != 1) {
Py_XDECREF(newitem);
@@ -163,9 +162,7 @@ PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem)
return -1;
}
p = ((PyTupleObject *)op) -> ob_item + i;
- olditem = *p;
- *p = newitem;
- Py_XDECREF(olditem);
+ Py_XSETREF(*p, newitem);
return 0;
}
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 0a82f3a..9e7b4e6 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -548,7 +548,7 @@ type_get_bases(PyTypeObject *type, void *context)
static PyTypeObject *best_base(PyObject *);
static int mro_internal(PyTypeObject *, PyObject **);
Py_LOCAL_INLINE(int) type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *);
-static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, char *);
+static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *);
static int add_subclass(PyTypeObject*, PyTypeObject*);
static int add_all_subclasses(PyTypeObject *type, PyObject *bases);
static void remove_subclass(PyTypeObject *, PyTypeObject *);
@@ -888,25 +888,33 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds)
#endif
obj = type->tp_new(type, args, kwds);
- if (obj != NULL) {
- /* Ugly exception: when the call was type(something),
- don't call tp_init on the result. */
- if (type == &PyType_Type &&
- PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1 &&
- (kwds == NULL ||
- (PyDict_Check(kwds) && PyDict_Size(kwds) == 0)))
- return obj;
- /* If the returned object is not an instance of type,
- it won't be initialized. */
- if (!PyType_IsSubtype(Py_TYPE(obj), type))
- return obj;
- type = Py_TYPE(obj);
- if (type->tp_init != NULL) {
- int res = type->tp_init(obj, args, kwds);
- if (res < 0) {
- Py_DECREF(obj);
- obj = NULL;
- }
+ obj = _Py_CheckFunctionResult((PyObject*)type, obj, NULL);
+ if (obj == NULL)
+ return NULL;
+
+ /* Ugly exception: when the call was type(something),
+ don't call tp_init on the result. */
+ if (type == &PyType_Type &&
+ PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1 &&
+ (kwds == NULL ||
+ (PyDict_Check(kwds) && PyDict_Size(kwds) == 0)))
+ return obj;
+
+ /* If the returned object is not an instance of type,
+ it won't be initialized. */
+ if (!PyType_IsSubtype(Py_TYPE(obj), type))
+ return obj;
+
+ type = Py_TYPE(obj);
+ if (type->tp_init != NULL) {
+ int res = type->tp_init(obj, args, kwds);
+ if (res < 0) {
+ assert(PyErr_Occurred());
+ Py_DECREF(obj);
+ obj = NULL;
+ }
+ else {
+ assert(!PyErr_Occurred());
}
}
return obj;
@@ -1411,7 +1419,7 @@ _PyObject_LookupSpecial(PyObject *self, _Py_Identifier *attrid)
as lookup_method to cache the interned name string object. */
static PyObject *
-call_method(PyObject *o, _Py_Identifier *nameid, char *format, ...)
+call_method(PyObject *o, _Py_Identifier *nameid, const char *format, ...)
{
va_list va;
PyObject *args, *func = 0, *retval;
@@ -1447,7 +1455,7 @@ call_method(PyObject *o, _Py_Identifier *nameid, char *format, ...)
/* Clone of call_method() that returns NotImplemented when the lookup fails. */
static PyObject *
-call_maybe(PyObject *o, _Py_Identifier *nameid, char *format, ...)
+call_maybe(PyObject *o, _Py_Identifier *nameid, const char *format, ...)
{
va_list va;
PyObject *args, *func = 0, *retval;
@@ -2084,7 +2092,7 @@ subtype_dict(PyObject *obj, void *context)
static int
subtype_setdict(PyObject *obj, PyObject *value, void *context)
{
- PyObject *dict, **dictptr;
+ PyObject **dictptr;
PyTypeObject *base;
base = get_builtin_base_with_dict(Py_TYPE(obj));
@@ -2115,10 +2123,8 @@ subtype_setdict(PyObject *obj, PyObject *value, void *context)
"not a '%.200s'", Py_TYPE(value)->tp_name);
return -1;
}
- dict = *dictptr;
Py_XINCREF(value);
- *dictptr = value;
- Py_XDECREF(dict);
+ Py_XSETREF(*dictptr, value);
return 0;
}
@@ -2671,7 +2677,7 @@ error:
return NULL;
}
-static short slotoffsets[] = {
+static const short slotoffsets[] = {
-1, /* invalid slot */
#include "typeslots.inc"
};
@@ -3590,7 +3596,7 @@ same_slots_added(PyTypeObject *a, PyTypeObject *b)
}
static int
-compatible_for_assignment(PyTypeObject* oldto, PyTypeObject* newto, char* attr)
+compatible_for_assignment(PyTypeObject* oldto, PyTypeObject* newto, const char* attr)
{
PyTypeObject *newbase, *oldbase;
@@ -3866,6 +3872,24 @@ _PyObject_GetState(PyObject *obj, int required)
}
assert(slotnames == Py_None || PyList_Check(slotnames));
+ if (required) {
+ Py_ssize_t basicsize = PyBaseObject_Type.tp_basicsize;
+ if (obj->ob_type->tp_dictoffset)
+ basicsize += sizeof(PyObject *);
+ if (obj->ob_type->tp_weaklistoffset)
+ basicsize += sizeof(PyObject *);
+ if (slotnames != Py_None)
+ basicsize += sizeof(PyObject *) * Py_SIZE(slotnames);
+ if (obj->ob_type->tp_basicsize > basicsize) {
+ Py_DECREF(slotnames);
+ Py_DECREF(state);
+ PyErr_Format(PyExc_TypeError,
+ "can't pickle %.200s objects",
+ Py_TYPE(obj)->tp_name);
+ return NULL;
+ }
+ }
+
if (slotnames != Py_None && Py_SIZE(slotnames) > 0) {
PyObject *slots;
Py_ssize_t slotnames_size, i;
@@ -3920,7 +3944,7 @@ _PyObject_GetState(PyObject *obj, int required)
}
/* If we found some slot attributes, pack them in a tuple along
- the orginal attribute dictionary. */
+ the original attribute dictionary. */
if (PyDict_Size(slots) > 0) {
PyObject *state2;
@@ -4089,7 +4113,7 @@ _PyObject_GetItemsIter(PyObject *obj, PyObject **listitems,
}
static PyObject *
-reduce_newobj(PyObject *obj, int proto)
+reduce_newobj(PyObject *obj)
{
PyObject *args = NULL, *kwargs = NULL;
PyObject *copyreg;
@@ -4142,7 +4166,7 @@ reduce_newobj(PyObject *obj, int proto)
}
Py_XDECREF(args);
}
- else if (proto >= 4) {
+ else {
_Py_IDENTIFIER(__newobj_ex__);
newobj = _PyObject_GetAttrId(copyreg, &PyId___newobj_ex__);
@@ -4160,16 +4184,6 @@ reduce_newobj(PyObject *obj, int proto)
return NULL;
}
}
- else {
- PyErr_SetString(PyExc_ValueError,
- "must use protocol 4 or greater to copy this "
- "object; since __getnewargs_ex__ returned "
- "keyword arguments.");
- Py_DECREF(args);
- Py_DECREF(kwargs);
- Py_DECREF(copyreg);
- return NULL;
- }
state = _PyObject_GetState(obj,
!hasargs && !PyList_Check(obj) && !PyDict_Check(obj));
@@ -4215,7 +4229,7 @@ _common_reduce(PyObject *self, int proto)
PyObject *copyreg, *res;
if (proto >= 2)
- return reduce_newobj(self, proto);
+ return reduce_newobj(self);
copyreg = import_copyreg();
if (!copyreg)
@@ -5342,7 +5356,7 @@ wrap_delitem(PyObject *self, PyObject *args, void *wrapped)
/* Helper to check for object.__setattr__ or __delattr__ applied to a type.
This is called the Carlo Verre hack after its discoverer. */
static int
-hackcheck(PyObject *self, setattrofunc func, char *what)
+hackcheck(PyObject *self, setattrofunc func, const char *what)
{
PyTypeObject *type = Py_TYPE(self);
while (type && type->tp_flags & Py_TPFLAGS_HEAPTYPE)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index b146da9..1e7cba6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "Python.h"
#include "ucnhash.h"
#include "bytes_methods.h"
+#include "stringlib/eq.h"
#ifdef MS_WINDOWS
#include <windows.h>
@@ -162,6 +163,14 @@ extern "C" {
*_to++ = (to_type) *_iter++; \
} while (0)
+#ifdef MS_WINDOWS
+ /* On Windows, overallocate by 50% is the best factor */
+# define OVERALLOCATE_FACTOR 2
+#else
+ /* On Linux, overallocate by 25% is the best factor */
+# define OVERALLOCATE_FACTOR 4
+#endif
+
/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
@@ -263,7 +272,7 @@ raise_encode_exception(PyObject **exceptionObject,
const char *reason);
/* Same for linebreaks */
-static unsigned char ascii_linebreak[] = {
+static const unsigned char ascii_linebreak[] = {
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x000A, * LINE FEED */
/* 0x000B, * LINE TABULATION */
@@ -292,6 +301,38 @@ static unsigned char ascii_linebreak[] = {
#include "clinic/unicodeobject.c.h"
+typedef enum {
+ _Py_ERROR_UNKNOWN=0,
+ _Py_ERROR_STRICT,
+ _Py_ERROR_SURROGATEESCAPE,
+ _Py_ERROR_REPLACE,
+ _Py_ERROR_IGNORE,
+ _Py_ERROR_BACKSLASHREPLACE,
+ _Py_ERROR_SURROGATEPASS,
+ _Py_ERROR_XMLCHARREFREPLACE,
+ _Py_ERROR_OTHER
+} _Py_error_handler;
+
+static _Py_error_handler
+get_error_handler(const char *errors)
+{
+ if (errors == NULL || strcmp(errors, "strict") == 0)
+ return _Py_ERROR_STRICT;
+ if (strcmp(errors, "surrogateescape") == 0)
+ return _Py_ERROR_SURROGATEESCAPE;
+ if (strcmp(errors, "replace") == 0)
+ return _Py_ERROR_REPLACE;
+ if (strcmp(errors, "ignore") == 0)
+ return _Py_ERROR_IGNORE;
+ if (strcmp(errors, "backslashreplace") == 0)
+ return _Py_ERROR_BACKSLASHREPLACE;
+ if (strcmp(errors, "surrogatepass") == 0)
+ return _Py_ERROR_SURROGATEPASS;
+ if (strcmp(errors, "xmlcharrefreplace") == 0)
+ return _Py_ERROR_XMLCHARREFREPLACE;
+ return _Py_ERROR_OTHER;
+}
+
/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
This function is kept for backward compatibility with the old API. */
Py_UNICODE
@@ -521,6 +562,129 @@ unicode_result_unchanged(PyObject *unicode)
return _PyUnicode_Copy(unicode);
}
+/* Implementation of the "backslashreplace" error handler for 8-bit encodings:
+ ASCII, Latin1, UTF-8, etc. */
+static char*
+backslashreplace(_PyBytesWriter *writer, char *str,
+ PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
+{
+ Py_ssize_t size, i;
+ Py_UCS4 ch;
+ enum PyUnicode_Kind kind;
+ void *data;
+
+ assert(PyUnicode_IS_READY(unicode));
+ kind = PyUnicode_KIND(unicode);
+ data = PyUnicode_DATA(unicode);
+
+ size = 0;
+ /* determine replacement size */
+ for (i = collstart; i < collend; ++i) {
+ Py_ssize_t incr;
+
+ ch = PyUnicode_READ(kind, data, i);
+ if (ch < 0x100)
+ incr = 2+2;
+ else if (ch < 0x10000)
+ incr = 2+4;
+ else {
+ assert(ch <= MAX_UNICODE);
+ incr = 2+8;
+ }
+ if (size > PY_SSIZE_T_MAX - incr) {
+ PyErr_SetString(PyExc_OverflowError,
+ "encoded result is too long for a Python string");
+ return NULL;
+ }
+ size += incr;
+ }
+
+ str = _PyBytesWriter_Prepare(writer, str, size);
+ if (str == NULL)
+ return NULL;
+
+ /* generate replacement */
+ for (i = collstart; i < collend; ++i) {
+ ch = PyUnicode_READ(kind, data, i);
+ *str++ = '\\';
+ if (ch >= 0x00010000) {
+ *str++ = 'U';
+ *str++ = Py_hexdigits[(ch>>28)&0xf];
+ *str++ = Py_hexdigits[(ch>>24)&0xf];
+ *str++ = Py_hexdigits[(ch>>20)&0xf];
+ *str++ = Py_hexdigits[(ch>>16)&0xf];
+ *str++ = Py_hexdigits[(ch>>12)&0xf];
+ *str++ = Py_hexdigits[(ch>>8)&0xf];
+ }
+ else if (ch >= 0x100) {
+ *str++ = 'u';
+ *str++ = Py_hexdigits[(ch>>12)&0xf];
+ *str++ = Py_hexdigits[(ch>>8)&0xf];
+ }
+ else
+ *str++ = 'x';
+ *str++ = Py_hexdigits[(ch>>4)&0xf];
+ *str++ = Py_hexdigits[ch&0xf];
+ }
+ return str;
+}
+
+/* Implementation of the "xmlcharrefreplace" error handler for 8-bit encodings:
+ ASCII, Latin1, UTF-8, etc. */
+static char*
+xmlcharrefreplace(_PyBytesWriter *writer, char *str,
+ PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
+{
+ Py_ssize_t size, i;
+ Py_UCS4 ch;
+ enum PyUnicode_Kind kind;
+ void *data;
+
+ assert(PyUnicode_IS_READY(unicode));
+ kind = PyUnicode_KIND(unicode);
+ data = PyUnicode_DATA(unicode);
+
+ size = 0;
+ /* determine replacement size */
+ for (i = collstart; i < collend; ++i) {
+ Py_ssize_t incr;
+
+ ch = PyUnicode_READ(kind, data, i);
+ if (ch < 10)
+ incr = 2+1+1;
+ else if (ch < 100)
+ incr = 2+2+1;
+ else if (ch < 1000)
+ incr = 2+3+1;
+ else if (ch < 10000)
+ incr = 2+4+1;
+ else if (ch < 100000)
+ incr = 2+5+1;
+ else if (ch < 1000000)
+ incr = 2+6+1;
+ else {
+ assert(ch <= MAX_UNICODE);
+ incr = 2+7+1;
+ }
+ if (size > PY_SSIZE_T_MAX - incr) {
+ PyErr_SetString(PyExc_OverflowError,
+ "encoded result is too long for a Python string");
+ return NULL;
+ }
+ size += incr;
+ }
+
+ str = _PyBytesWriter_Prepare(writer, str, size);
+ if (str == NULL)
+ return NULL;
+
+ /* generate replacement */
+ for (i = collstart; i < collend; ++i) {
+ str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
+ }
+ return str;
+}
+
/* --- Bloom Filters ----------------------------------------------------- */
/* stuff to implement simple "bloom filters" for Unicode characters.
@@ -587,6 +751,18 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
#undef BLOOM_UPDATE
}
+static int
+ensure_unicode(PyObject *obj)
+{
+ if (!PyUnicode_Check(obj)) {
+ PyErr_Format(PyExc_TypeError,
+ "must be str, not %.100s",
+ Py_TYPE(obj)->tp_name);
+ return -1;
+ }
+ return PyUnicode_READY(obj);
+}
+
/* Compilation of templated routines */
#include "stringlib/asciilib.h"
@@ -647,27 +823,26 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
Py_ssize_t size, Py_UCS4 ch,
int direction)
{
- int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
-
switch (kind) {
case PyUnicode_1BYTE_KIND:
- {
- Py_UCS1 ch1 = (Py_UCS1) ch;
- if (ch1 == ch)
- return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
- else
- return -1;
- }
+ if ((Py_UCS1) ch != ch)
+ return -1;
+ if (direction > 0)
+ return ucs1lib_find_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
+ else
+ return ucs1lib_rfind_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
case PyUnicode_2BYTE_KIND:
- {
- Py_UCS2 ch2 = (Py_UCS2) ch;
- if (ch2 == ch)
- return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
- else
- return -1;
- }
+ if ((Py_UCS2) ch != ch)
+ return -1;
+ if (direction > 0)
+ return ucs2lib_find_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
+ else
+ return ucs2lib_rfind_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
case PyUnicode_4BYTE_KIND:
- return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode);
+ if (direction > 0)
+ return ucs4lib_find_char((Py_UCS4 *) s, size, ch);
+ else
+ return ucs4lib_rfind_char((Py_UCS4 *) s, size, ch);
default:
assert(0);
return -1;
@@ -2903,7 +3078,7 @@ PyUnicode_FromEncodedObject(PyObject *obj,
/* Retrieve a bytes buffer view through the PEP 3118 buffer interface */
if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) {
PyErr_Format(PyExc_TypeError,
- "coercing to str: need a bytes-like object, %.80s found",
+ "decoding to str: need a bytes-like object, %.80s found",
Py_TYPE(obj)->tp_name);
return NULL;
}
@@ -3167,24 +3342,22 @@ wcstombs_errorpos(const wchar_t *wstr)
static int
locale_error_handler(const char *errors, int *surrogateescape)
{
- if (errors == NULL) {
- *surrogateescape = 0;
- return 0;
- }
-
- if (strcmp(errors, "strict") == 0) {
+ _Py_error_handler error_handler = get_error_handler(errors);
+ switch (error_handler)
+ {
+ case _Py_ERROR_STRICT:
*surrogateescape = 0;
return 0;
- }
- if (strcmp(errors, "surrogateescape") == 0) {
+ case _Py_ERROR_SURROGATEESCAPE:
*surrogateescape = 1;
return 0;
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "only 'strict' and 'surrogateescape' error handlers "
+ "are supported, not '%s'",
+ errors);
+ return -1;
}
- PyErr_Format(PyExc_ValueError,
- "only 'strict' and 'surrogateescape' error handlers "
- "are supported, not '%s'",
- errors);
- return -1;
}
PyObject *
@@ -3626,19 +3799,17 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
output = arg;
Py_INCREF(output);
}
- else {
- arg = PyUnicode_FromObject(arg);
- if (!arg)
- return 0;
+ else if (PyUnicode_Check(arg)) {
output = PyUnicode_EncodeFSDefault(arg);
- Py_DECREF(arg);
if (!output)
return 0;
- if (!PyBytes_Check(output)) {
- Py_DECREF(output);
- PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
- return 0;
- }
+ assert(PyBytes_Check(output));
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "must be str or bytes, not %.100s",
+ Py_TYPE(arg)->tp_name);
+ return 0;
}
size = PyBytes_GET_SIZE(output);
data = PyBytes_AS_STRING(output);
@@ -3710,7 +3881,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
if (PyUnicode_UTF8(unicode) == NULL) {
assert(!PyUnicode_IS_COMPACT_ASCII(unicode));
- bytes = _PyUnicode_AsUTF8String(unicode, "strict");
+ bytes = _PyUnicode_AsUTF8String(unicode, NULL);
if (bytes == NULL)
return NULL;
_PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1);
@@ -3976,7 +4147,7 @@ unicode_decode_call_errorhandler_wchar(
Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
PyObject **output, Py_ssize_t *outpos)
{
- static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
+ static const char *argparse = "O!n;decoding error handler must return (str, int) tuple";
PyObject *restuple = NULL;
PyObject *repunicode = NULL;
@@ -4084,7 +4255,7 @@ unicode_decode_call_errorhandler_writer(
Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
_PyUnicodeWriter *writer /* PyObject **output, Py_ssize_t *outpos */)
{
- static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
+ static const char *argparse = "O!n;decoding error handler must return (str, int) tuple";
PyObject *restuple = NULL;
PyObject *repunicode = NULL;
@@ -4690,8 +4861,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
Py_ssize_t startinpos;
Py_ssize_t endinpos;
const char *errmsg = "";
- PyObject *errorHandler = NULL;
+ PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
+ _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
if (size == 0) {
if (consumed)
@@ -4716,6 +4888,7 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
while (s < end) {
Py_UCS4 ch;
int kind = writer.kind;
+
if (kind == PyUnicode_1BYTE_KIND) {
if (PyUnicode_IS_ASCII(writer.buffer))
ch = asciilib_utf8_decode(&s, end, writer.data, &writer.pos);
@@ -4754,24 +4927,56 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
continue;
}
- if (unicode_decode_call_errorhandler_writer(
- errors, &errorHandler,
- "utf-8", errmsg,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &writer))
- goto onError;
+ if (error_handler == _Py_ERROR_UNKNOWN)
+ error_handler = get_error_handler(errors);
+
+ switch (error_handler) {
+ case _Py_ERROR_IGNORE:
+ s += (endinpos - startinpos);
+ break;
+
+ case _Py_ERROR_REPLACE:
+ if (_PyUnicodeWriter_WriteCharInline(&writer, 0xfffd) < 0)
+ goto onError;
+ s += (endinpos - startinpos);
+ break;
+
+ case _Py_ERROR_SURROGATEESCAPE:
+ {
+ Py_ssize_t i;
+
+ if (_PyUnicodeWriter_PrepareKind(&writer, PyUnicode_2BYTE_KIND) < 0)
+ goto onError;
+ for (i=startinpos; i<endinpos; i++) {
+ ch = (Py_UCS4)(unsigned char)(starts[i]);
+ PyUnicode_WRITE(writer.kind, writer.data, writer.pos,
+ ch + 0xdc00);
+ writer.pos++;
+ }
+ s += (endinpos - startinpos);
+ break;
+ }
+
+ default:
+ if (unicode_decode_call_errorhandler_writer(
+ errors, &error_handler_obj,
+ "utf-8", errmsg,
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &writer))
+ goto onError;
+ }
}
End:
if (consumed)
*consumed = s - starts;
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
return _PyUnicodeWriter_Finish(&writer);
onError:
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
@@ -5862,11 +6067,10 @@ PyObject *
PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
{
Py_ssize_t i, len;
- PyObject *repr;
char *p;
int kind;
void *data;
- Py_ssize_t expandsize = 0;
+ _PyBytesWriter writer;
/* Initial allocation is based on the longest-possible character
escape.
@@ -5882,35 +6086,28 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
}
if (PyUnicode_READY(unicode) == -1)
return NULL;
+
+ _PyBytesWriter_Init(&writer);
+
len = PyUnicode_GET_LENGTH(unicode);
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
- switch (kind) {
- case PyUnicode_1BYTE_KIND: expandsize = 4; break;
- case PyUnicode_2BYTE_KIND: expandsize = 6; break;
- case PyUnicode_4BYTE_KIND: expandsize = 10; break;
- }
-
- if (len == 0)
- return PyBytes_FromStringAndSize(NULL, 0);
-
- if (len > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
- return PyErr_NoMemory();
-
- repr = PyBytes_FromStringAndSize(NULL,
- 2
- + expandsize*len
- + 1);
- if (repr == NULL)
- return NULL;
- p = PyBytes_AS_STRING(repr);
+ p = _PyBytesWriter_Alloc(&writer, len);
+ if (p == NULL)
+ goto error;
+ writer.overallocate = 1;
for (i = 0; i < len; i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
/* Escape backslashes */
if (ch == '\\') {
+ /* -1: substract 1 preallocated byte */
+ p = _PyBytesWriter_Prepare(&writer, p, 2-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = (char) ch;
continue;
@@ -5919,6 +6116,11 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
/* Map 21-bit characters to '\U00xxxxxx' */
else if (ch >= 0x10000) {
assert(ch <= MAX_UNICODE);
+
+ p = _PyBytesWriter_Prepare(&writer, p, 10-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = 'U';
*p++ = Py_hexdigits[(ch >> 28) & 0x0000000F];
@@ -5934,6 +6136,10 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
/* Map 16-bit characters to '\uxxxx' */
if (ch >= 256) {
+ p = _PyBytesWriter_Prepare(&writer, p, 6-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = 'u';
*p++ = Py_hexdigits[(ch >> 12) & 0x000F];
@@ -5944,20 +6150,37 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
/* Map special whitespace to '\t', \n', '\r' */
else if (ch == '\t') {
+ p = _PyBytesWriter_Prepare(&writer, p, 2-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = 't';
}
else if (ch == '\n') {
+ p = _PyBytesWriter_Prepare(&writer, p, 2-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = 'n';
}
else if (ch == '\r') {
+ p = _PyBytesWriter_Prepare(&writer, p, 2-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = 'r';
}
/* Map non-printable US ASCII to '\xhh' */
else if (ch < ' ' || ch >= 0x7F) {
+ /* -1: substract 1 preallocated byte */
+ p = _PyBytesWriter_Prepare(&writer, p, 4-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = 'x';
*p++ = Py_hexdigits[(ch >> 4) & 0x000F];
@@ -5969,10 +6192,11 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
*p++ = (char) ch;
}
- assert(p - PyBytes_AS_STRING(repr) > 0);
- if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0)
- return NULL;
- return repr;
+ return _PyBytesWriter_Finish(&writer, p);
+
+error:
+ _PyBytesWriter_Dealloc(&writer);
+ return NULL;
}
PyObject *
@@ -6101,13 +6325,12 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
PyObject *
PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
{
- PyObject *repr;
char *p;
- char *q;
- Py_ssize_t expandsize, pos;
+ Py_ssize_t pos;
int kind;
void *data;
Py_ssize_t len;
+ _PyBytesWriter writer;
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
@@ -6115,28 +6338,29 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
}
if (PyUnicode_READY(unicode) == -1)
return NULL;
+
+ _PyBytesWriter_Init(&writer);
+
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
len = PyUnicode_GET_LENGTH(unicode);
- /* 4 byte characters can take up 10 bytes, 2 byte characters can take up 6
- bytes, and 1 byte characters 4. */
- expandsize = kind * 2 + 2;
- if (len > PY_SSIZE_T_MAX / expandsize)
- return PyErr_NoMemory();
-
- repr = PyBytes_FromStringAndSize(NULL, expandsize * len);
- if (repr == NULL)
- return NULL;
- if (len == 0)
- return repr;
+ p = _PyBytesWriter_Alloc(&writer, len);
+ if (p == NULL)
+ goto error;
+ writer.overallocate = 1;
- p = q = PyBytes_AS_STRING(repr);
for (pos = 0; pos < len; pos++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
/* Map 32-bit characters to '\Uxxxxxxxx' */
if (ch >= 0x10000) {
assert(ch <= MAX_UNICODE);
+
+ /* -1: substract 1 preallocated byte */
+ p = _PyBytesWriter_Prepare(&writer, p, 10-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = 'U';
*p++ = Py_hexdigits[(ch >> 28) & 0xf];
@@ -6150,6 +6374,11 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
}
/* Map 16-bit characters to '\uxxxx' */
else if (ch >= 256) {
+ /* -1: substract 1 preallocated byte */
+ p = _PyBytesWriter_Prepare(&writer, p, 6-1);
+ if (p == NULL)
+ goto error;
+
*p++ = '\\';
*p++ = 'u';
*p++ = Py_hexdigits[(ch >> 12) & 0xf];
@@ -6162,10 +6391,11 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
*p++ = (char) ch;
}
- assert(p > q);
- if (_PyBytes_Resize(&repr, p - q) < 0)
- return NULL;
- return repr;
+ return _PyBytesWriter_Finish(&writer, p);
+
+error:
+ _PyBytesWriter_Dealloc(&writer);
+ return NULL;
}
PyObject *
@@ -6342,7 +6572,7 @@ unicode_encode_call_errorhandler(const char *errors,
Py_ssize_t startpos, Py_ssize_t endpos,
Py_ssize_t *newpos)
{
- static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
+ static const char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
Py_ssize_t len;
PyObject *restuple;
PyObject *resunicode;
@@ -6396,25 +6626,22 @@ unicode_encode_call_errorhandler(const char *errors,
static PyObject *
unicode_encode_ucs1(PyObject *unicode,
const char *errors,
- unsigned int limit)
+ const Py_UCS4 limit)
{
/* input state */
Py_ssize_t pos=0, size;
int kind;
void *data;
- /* output object */
- PyObject *res;
/* pointer into the output */
char *str;
- /* current output position */
- Py_ssize_t ressize;
const char *encoding = (limit == 256) ? "latin-1" : "ascii";
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
- PyObject *errorHandler = NULL;
+ PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
- /* the following variable is used for caching string comparisons
- * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
- int known_errorHandler = -1;
+ _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
+ PyObject *rep = NULL;
+ /* output object */
+ _PyBytesWriter writer;
if (PyUnicode_READY(unicode) == -1)
return NULL;
@@ -6425,186 +6652,157 @@ unicode_encode_ucs1(PyObject *unicode,
replacements, if we need more, we'll resize */
if (size == 0)
return PyBytes_FromStringAndSize(NULL, 0);
- res = PyBytes_FromStringAndSize(NULL, size);
- if (res == NULL)
+
+ _PyBytesWriter_Init(&writer);
+ str = _PyBytesWriter_Alloc(&writer, size);
+ if (str == NULL)
return NULL;
- str = PyBytes_AS_STRING(res);
- ressize = size;
while (pos < size) {
- Py_UCS4 c = PyUnicode_READ(kind, data, pos);
+ Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
/* can we encode this? */
- if (c<limit) {
+ if (ch < limit) {
/* no overflow check, because we know that the space is enough */
- *str++ = (char)c;
+ *str++ = (char)ch;
++pos;
}
else {
- Py_ssize_t requiredsize;
- PyObject *repunicode;
- Py_ssize_t repsize, newpos, respos, i;
+ Py_ssize_t newpos, i;
/* startpos for collecting unencodable chars */
Py_ssize_t collstart = pos;
- Py_ssize_t collend = pos;
+ Py_ssize_t collend = collstart + 1;
/* find all unecodable characters */
+
while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
++collend;
+
+ /* Only overallocate the buffer if it's not the last write */
+ writer.overallocate = (collend < size);
+
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
- if (known_errorHandler==-1) {
- if ((errors==NULL) || (!strcmp(errors, "strict")))
- known_errorHandler = 1;
- else if (!strcmp(errors, "replace"))
- known_errorHandler = 2;
- else if (!strcmp(errors, "ignore"))
- known_errorHandler = 3;
- else if (!strcmp(errors, "xmlcharrefreplace"))
- known_errorHandler = 4;
- else
- known_errorHandler = 0;
- }
- switch (known_errorHandler) {
- case 1: /* strict */
+ if (error_handler == _Py_ERROR_UNKNOWN)
+ error_handler = get_error_handler(errors);
+
+ switch (error_handler) {
+ case _Py_ERROR_STRICT:
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
goto onError;
- case 2: /* replace */
- while (collstart++ < collend)
- *str++ = '?'; /* fall through */
- case 3: /* ignore */
+
+ case _Py_ERROR_REPLACE:
+ memset(str, '?', collend - collstart);
+ str += (collend - collstart);
+ /* fall through ignore error handler */
+ case _Py_ERROR_IGNORE:
pos = collend;
break;
- case 4: /* xmlcharrefreplace */
- respos = str - PyBytes_AS_STRING(res);
- requiredsize = respos;
- /* determine replacement size */
+
+ case _Py_ERROR_BACKSLASHREPLACE:
+ /* substract preallocated bytes */
+ writer.min_size -= (collend - collstart);
+ str = backslashreplace(&writer, str,
+ unicode, collstart, collend);
+ if (str == NULL)
+ goto onError;
+ pos = collend;
+ break;
+
+ case _Py_ERROR_XMLCHARREFREPLACE:
+ /* substract preallocated bytes */
+ writer.min_size -= (collend - collstart);
+ str = xmlcharrefreplace(&writer, str,
+ unicode, collstart, collend);
+ if (str == NULL)
+ goto onError;
+ pos = collend;
+ break;
+
+ case _Py_ERROR_SURROGATEESCAPE:
for (i = collstart; i < collend; ++i) {
- Py_UCS4 ch = PyUnicode_READ(kind, data, i);
- Py_ssize_t incr;
- if (ch < 10)
- incr = 2+1+1;
- else if (ch < 100)
- incr = 2+2+1;
- else if (ch < 1000)
- incr = 2+3+1;
- else if (ch < 10000)
- incr = 2+4+1;
- else if (ch < 100000)
- incr = 2+5+1;
- else if (ch < 1000000)
- incr = 2+6+1;
- else {
- assert(ch <= MAX_UNICODE);
- incr = 2+7+1;
+ ch = PyUnicode_READ(kind, data, i);
+ if (ch < 0xdc80 || 0xdcff < ch) {
+ /* Not a UTF-8b surrogate */
+ break;
}
- if (requiredsize > PY_SSIZE_T_MAX - incr)
- goto overflow;
- requiredsize += incr;
- }
- if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
- goto overflow;
- requiredsize += size - collend;
- if (requiredsize > ressize) {
- if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
- requiredsize = 2*ressize;
- if (_PyBytes_Resize(&res, requiredsize))
- goto onError;
- str = PyBytes_AS_STRING(res) + respos;
- ressize = requiredsize;
+ *str++ = (char)(ch - 0xdc00);
+ ++pos;
}
- /* generate replacement */
- for (i = collstart; i < collend; ++i) {
- str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
- }
- pos = collend;
- break;
+ if (i >= collend)
+ break;
+ collstart = pos;
+ assert(collstart != collend);
+ /* fallback to general error handling */
+
default:
- repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
- encoding, reason, unicode, &exc,
- collstart, collend, &newpos);
- if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
- PyUnicode_READY(repunicode) == -1))
+ rep = unicode_encode_call_errorhandler(errors, &error_handler_obj,
+ encoding, reason, unicode, &exc,
+ collstart, collend, &newpos);
+ if (rep == NULL)
goto onError;
- if (PyBytes_Check(repunicode)) {
+
+ /* substract preallocated bytes */
+ writer.min_size -= 1;
+
+ if (PyBytes_Check(rep)) {
/* Directly copy bytes result to output. */
- repsize = PyBytes_Size(repunicode);
- if (repsize > 1) {
- /* Make room for all additional bytes. */
- respos = str - PyBytes_AS_STRING(res);
- if (ressize > PY_SSIZE_T_MAX - repsize - 1) {
- Py_DECREF(repunicode);
- goto overflow;
- }
- if (_PyBytes_Resize(&res, ressize+repsize-1)) {
- Py_DECREF(repunicode);
- goto onError;
- }
- str = PyBytes_AS_STRING(res) + respos;
- ressize += repsize-1;
- }
- memcpy(str, PyBytes_AsString(repunicode), repsize);
- str += repsize;
- pos = newpos;
- Py_DECREF(repunicode);
- break;
- }
- /* need more space? (at least enough for what we
- have+the replacement+the rest of the string, so
- we won't have to check space for encodable characters) */
- respos = str - PyBytes_AS_STRING(res);
- repsize = PyUnicode_GET_LENGTH(repunicode);
- requiredsize = respos;
- if (requiredsize > PY_SSIZE_T_MAX - repsize)
- goto overflow;
- requiredsize += repsize;
- if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
- goto overflow;
- requiredsize += size - collend;
- if (requiredsize > ressize) {
- if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
- requiredsize = 2*ressize;
- if (_PyBytes_Resize(&res, requiredsize)) {
- Py_DECREF(repunicode);
+ str = _PyBytesWriter_WriteBytes(&writer, str,
+ PyBytes_AS_STRING(rep),
+ PyBytes_GET_SIZE(rep));
+ if (str == NULL)
goto onError;
- }
- str = PyBytes_AS_STRING(res) + respos;
- ressize = requiredsize;
}
- /* check if there is anything unencodable in the replacement
- and copy it to the output */
- for (i = 0; repsize-->0; ++i, ++str) {
- c = PyUnicode_READ_CHAR(repunicode, i);
- if (c >= limit) {
- raise_encode_exception(&exc, encoding, unicode,
- pos, pos+1, reason);
- Py_DECREF(repunicode);
+ else {
+ assert(PyUnicode_Check(rep));
+
+ if (PyUnicode_READY(rep) < 0)
goto onError;
+
+ if (PyUnicode_IS_ASCII(rep)) {
+ /* Fast path: all characters are smaller than limit */
+ assert(limit >= 128);
+ assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
+ str = _PyBytesWriter_WriteBytes(&writer, str,
+ PyUnicode_DATA(rep),
+ PyUnicode_GET_LENGTH(rep));
+ }
+ else {
+ Py_ssize_t repsize = PyUnicode_GET_LENGTH(rep);
+
+ str = _PyBytesWriter_Prepare(&writer, str, repsize);
+ if (str == NULL)
+ goto onError;
+
+ /* check if there is anything unencodable in the
+ replacement and copy it to the output */
+ for (i = 0; repsize-->0; ++i, ++str) {
+ ch = PyUnicode_READ_CHAR(rep, i);
+ if (ch >= limit) {
+ raise_encode_exception(&exc, encoding, unicode,
+ pos, pos+1, reason);
+ goto onError;
+ }
+ *str = (char)ch;
+ }
}
- *str = (char)c;
}
pos = newpos;
- Py_DECREF(repunicode);
+ Py_CLEAR(rep);
}
+
+ /* If overallocation was disabled, ensure that it was the last
+ write. Otherwise, we missed an optimization */
+ assert(writer.overallocate || pos == size);
}
}
- /* Resize if we allocated to much */
- size = str - PyBytes_AS_STRING(res);
- if (size < ressize) { /* If this falls res will be NULL */
- assert(size >= 0);
- if (_PyBytes_Resize(&res, size) < 0)
- goto onError;
- }
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
- return res;
-
- overflow:
- PyErr_SetString(PyExc_OverflowError,
- "encoded result is too long for a Python string");
+ return _PyBytesWriter_Finish(&writer, str);
onError:
- Py_XDECREF(res);
- Py_XDECREF(errorHandler);
+ Py_XDECREF(rep);
+ _PyBytesWriter_Dealloc(&writer);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
return NULL;
}
@@ -6664,8 +6862,9 @@ PyUnicode_DecodeASCII(const char *s,
Py_ssize_t endinpos;
Py_ssize_t outpos;
const char *e;
- PyObject *errorHandler = NULL;
+ PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
+ _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
if (size == 0)
_Py_RETURN_UNICODE_EMPTY();
@@ -6694,12 +6893,42 @@ PyUnicode_DecodeASCII(const char *s,
PyUnicode_WRITE(kind, data, writer.pos, c);
writer.pos++;
++s;
+ continue;
}
- else {
+
+ /* byte outsize range 0x00..0x7f: call the error handler */
+
+ if (error_handler == _Py_ERROR_UNKNOWN)
+ error_handler = get_error_handler(errors);
+
+ switch (error_handler)
+ {
+ case _Py_ERROR_REPLACE:
+ case _Py_ERROR_SURROGATEESCAPE:
+ /* Fast-path: the error handler only writes one character,
+ but we may switch to UCS2 at the first write */
+ if (_PyUnicodeWriter_PrepareKind(&writer, PyUnicode_2BYTE_KIND) < 0)
+ goto onError;
+ kind = writer.kind;
+ data = writer.data;
+
+ if (error_handler == _Py_ERROR_REPLACE)
+ PyUnicode_WRITE(kind, data, writer.pos, 0xfffd);
+ else
+ PyUnicode_WRITE(kind, data, writer.pos, c + 0xdc00);
+ writer.pos++;
+ ++s;
+ break;
+
+ case _Py_ERROR_IGNORE:
+ ++s;
+ break;
+
+ default:
startinpos = s-starts;
endinpos = startinpos + 1;
if (unicode_decode_call_errorhandler_writer(
- errors, &errorHandler,
+ errors, &error_handler_obj,
"ascii", "ordinal not in range(128)",
&starts, &e, &startinpos, &endinpos, &exc, &s,
&writer))
@@ -6708,13 +6937,13 @@ PyUnicode_DecodeASCII(const char *s,
data = writer.data;
}
}
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
return _PyUnicodeWriter_Finish(&writer);
onError:
_PyUnicodeWriter_Dealloc(&writer);
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
return NULL;
}
@@ -6769,7 +6998,7 @@ PyUnicode_AsASCIIString(PyObject *unicode)
# define WC_ERR_INVALID_CHARS 0x0080
#endif
-static char*
+static const char*
code_page_name(UINT code_page, PyObject **obj)
{
*obj = NULL;
@@ -6877,7 +7106,7 @@ decode_code_page_errors(UINT code_page,
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *encoding_obj = NULL;
- char *encoding;
+ const char *encoding;
DWORD err;
int ret = -1;
@@ -7113,7 +7342,6 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
BOOL usedDefaultChar = FALSE;
BOOL *pusedDefaultChar = &usedDefaultChar;
int outsize;
- PyObject *exc = NULL;
wchar_t *p;
Py_ssize_t size;
const DWORD flags = encode_code_page_flags(code_page, NULL);
@@ -7222,7 +7450,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *encoding_obj = NULL;
- char *encoding;
+ const char *encoding;
Py_ssize_t newpos, newoutsize;
PyObject *rep;
int ret = -1;
@@ -8080,7 +8308,7 @@ static int
charmap_encoding_error(
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
PyObject **exceptionObject,
- int *known_errorHandler, PyObject **errorHandler, const char *errors,
+ _Py_error_handler *error_handler, PyObject **error_handler_obj, const char *errors,
PyObject **res, Py_ssize_t *respos)
{
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
@@ -8127,23 +8355,15 @@ charmap_encoding_error(
}
/* cache callback name lookup
* (if not done yet, i.e. it's the first error) */
- if (*known_errorHandler==-1) {
- if ((errors==NULL) || (!strcmp(errors, "strict")))
- *known_errorHandler = 1;
- else if (!strcmp(errors, "replace"))
- *known_errorHandler = 2;
- else if (!strcmp(errors, "ignore"))
- *known_errorHandler = 3;
- else if (!strcmp(errors, "xmlcharrefreplace"))
- *known_errorHandler = 4;
- else
- *known_errorHandler = 0;
- }
- switch (*known_errorHandler) {
- case 1: /* strict */
+ if (*error_handler == _Py_ERROR_UNKNOWN)
+ *error_handler = get_error_handler(errors);
+
+ switch (*error_handler) {
+ case _Py_ERROR_STRICT:
raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1;
- case 2: /* replace */
+
+ case _Py_ERROR_REPLACE:
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
x = charmapencode_output('?', mapping, res, respos);
if (x==enc_EXCEPTION) {
@@ -8155,10 +8375,11 @@ charmap_encoding_error(
}
}
/* fall through */
- case 3: /* ignore */
+ case _Py_ERROR_IGNORE:
*inpos = collendpos;
break;
- case 4: /* xmlcharrefreplace */
+
+ case _Py_ERROR_XMLCHARREFREPLACE:
/* generate replacement (temporarily (mis)uses p) */
for (collpos = collstartpos; collpos < collendpos; ++collpos) {
char buffer[2+29+1+1];
@@ -8176,8 +8397,9 @@ charmap_encoding_error(
}
*inpos = collendpos;
break;
+
default:
- repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
+ repunicode = unicode_encode_call_errorhandler(errors, error_handler_obj,
encoding, reason, unicode, exceptionObject,
collstartpos, collendpos, &newpos);
if (repunicode == NULL)
@@ -8240,12 +8462,9 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
Py_ssize_t size;
/* current output position */
Py_ssize_t respos = 0;
- PyObject *errorHandler = NULL;
+ PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
- /* the following variable is used for caching string comparisons
- * -1=not initialized, 0=unknown, 1=strict, 2=replace,
- * 3=ignore, 4=xmlcharrefreplace */
- int known_errorHandler = -1;
+ _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
void *data;
int kind;
@@ -8276,7 +8495,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
if (x==enc_FAILED) { /* unencodable character */
if (charmap_encoding_error(unicode, &inpos, mapping,
&exc,
- &known_errorHandler, &errorHandler, errors,
+ &error_handler, &error_handler_obj, errors,
&res, &respos)) {
goto onError;
}
@@ -8292,13 +8511,13 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
goto onError;
Py_XDECREF(exc);
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
return res;
onError:
Py_XDECREF(res);
Py_XDECREF(exc);
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
return NULL;
}
@@ -8365,7 +8584,7 @@ unicode_translate_call_errorhandler(const char *errors,
Py_ssize_t startpos, Py_ssize_t endpos,
Py_ssize_t *newpos)
{
- static char *argparse = "O!n;translating error handler must return (str, int) tuple";
+ static const char *argparse = "O!n;translating error handler must return (str, int) tuple";
Py_ssize_t i_newpos;
PyObject *restuple;
@@ -8622,7 +8841,7 @@ exit:
return res;
}
-PyObject *
+static PyObject *
_PyUnicode_TranslateCharmap(PyObject *input,
PyObject *mapping,
const char *errors)
@@ -8651,10 +8870,8 @@ _PyUnicode_TranslateCharmap(PyObject *input,
kind = PyUnicode_KIND(input);
size = PyUnicode_GET_LENGTH(input);
- if (size == 0) {
- Py_INCREF(input);
- return input;
- }
+ if (size == 0)
+ return PyUnicode_FromObject(input);
/* allocate enough for a simple 1:1 translation without
replacements, if we need more, we'll resize */
@@ -8765,14 +8982,9 @@ PyUnicode_Translate(PyObject *str,
PyObject *mapping,
const char *errors)
{
- PyObject *result;
-
- str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (ensure_unicode(str) < 0)
return NULL;
- result = _PyUnicode_TranslateCharmap(str, mapping, errors);
- Py_DECREF(str);
- return result;
+ return _PyUnicode_TranslateCharmap(str, mapping, errors);
}
static Py_UCS4
@@ -8954,9 +9166,10 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
}
static Py_ssize_t
-any_find_slice(int direction, PyObject* s1, PyObject* s2,
+any_find_slice(PyObject* s1, PyObject* s2,
Py_ssize_t start,
- Py_ssize_t end)
+ Py_ssize_t end,
+ int direction)
{
int kind1, kind2;
void *buf1, *buf2;
@@ -9125,54 +9338,35 @@ PyUnicode_Count(PyObject *str,
Py_ssize_t end)
{
Py_ssize_t result;
- PyObject* str_obj;
- PyObject* sub_obj;
int kind1, kind2;
void *buf1 = NULL, *buf2 = NULL;
Py_ssize_t len1, len2;
- str_obj = PyUnicode_FromObject(str);
- if (!str_obj)
+ if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
return -1;
- sub_obj = PyUnicode_FromObject(substr);
- if (!sub_obj) {
- Py_DECREF(str_obj);
- return -1;
- }
- if (PyUnicode_READY(sub_obj) == -1 || PyUnicode_READY(str_obj) == -1) {
- Py_DECREF(sub_obj);
- Py_DECREF(str_obj);
- return -1;
- }
- kind1 = PyUnicode_KIND(str_obj);
- kind2 = PyUnicode_KIND(sub_obj);
- if (kind1 < kind2) {
- Py_DECREF(sub_obj);
- Py_DECREF(str_obj);
+ kind1 = PyUnicode_KIND(str);
+ kind2 = PyUnicode_KIND(substr);
+ if (kind1 < kind2)
return 0;
- }
- len1 = PyUnicode_GET_LENGTH(str_obj);
- len2 = PyUnicode_GET_LENGTH(sub_obj);
+ len1 = PyUnicode_GET_LENGTH(str);
+ len2 = PyUnicode_GET_LENGTH(substr);
ADJUST_INDICES(start, end, len1);
- if (end - start < len2) {
- Py_DECREF(sub_obj);
- Py_DECREF(str_obj);
+ if (end - start < len2)
return 0;
- }
- buf1 = PyUnicode_DATA(str_obj);
- buf2 = PyUnicode_DATA(sub_obj);
+ buf1 = PyUnicode_DATA(str);
+ buf2 = PyUnicode_DATA(substr);
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(sub_obj, kind1);
+ buf2 = _PyUnicode_AsKind(substr, kind1);
if (!buf2)
goto onError;
}
switch (kind1) {
case PyUnicode_1BYTE_KIND:
- if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj))
+ if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr))
result = asciilib_count(
((Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
@@ -9199,16 +9393,11 @@ PyUnicode_Count(PyObject *str,
assert(0); result = 0;
}
- Py_DECREF(sub_obj);
- Py_DECREF(str_obj);
-
if (kind2 != kind1)
PyMem_Free(buf2);
return result;
onError:
- Py_DECREF(sub_obj);
- Py_DECREF(str_obj);
if (kind2 != kind1 && buf2)
PyMem_Free(buf2);
return -1;
@@ -9216,35 +9405,15 @@ PyUnicode_Count(PyObject *str,
Py_ssize_t
PyUnicode_Find(PyObject *str,
- PyObject *sub,
+ PyObject *substr,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
- Py_ssize_t result;
-
- str = PyUnicode_FromObject(str);
- if (!str)
- return -2;
- sub = PyUnicode_FromObject(sub);
- if (!sub) {
- Py_DECREF(str);
+ if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
return -2;
- }
- if (PyUnicode_READY(sub) == -1 || PyUnicode_READY(str) == -1) {
- Py_DECREF(sub);
- Py_DECREF(str);
- return -2;
- }
- result = any_find_slice(direction,
- str, sub, start, end
- );
-
- Py_DECREF(str);
- Py_DECREF(sub);
-
- return result;
+ return any_find_slice(str, substr, start, end, direction);
}
Py_ssize_t
@@ -9347,22 +9516,10 @@ PyUnicode_Tailmatch(PyObject *str,
Py_ssize_t end,
int direction)
{
- Py_ssize_t result;
-
- str = PyUnicode_FromObject(str);
- if (str == NULL)
- return -1;
- substr = PyUnicode_FromObject(substr);
- if (substr == NULL) {
- Py_DECREF(str);
+ if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
return -1;
- }
- result = tailmatch(str, substr,
- start, end, direction);
- Py_DECREF(str);
- Py_DECREF(substr);
- return result;
+ return tailmatch(str, substr, start, end, direction);
}
/* Apply fixfct filter to the Unicode object self and return a
@@ -9968,13 +10125,8 @@ PyUnicode_Splitlines(PyObject *string, int keepends)
{
PyObject *list;
- string = PyUnicode_FromObject(string);
- if (string == NULL)
+ if (ensure_unicode(string) < 0)
return NULL;
- if (PyUnicode_READY(string) == -1) {
- Py_DECREF(string);
- return NULL;
- }
switch (PyUnicode_KIND(string)) {
case PyUnicode_1BYTE_KIND:
@@ -10001,7 +10153,6 @@ PyUnicode_Splitlines(PyObject *string, int keepends)
assert(0);
list = 0;
}
- Py_DECREF(string);
return list;
}
@@ -10562,28 +10713,27 @@ unicode_casefold(PyObject *self)
}
-/* Argument converter. Coerces to a single unicode character */
+/* Argument converter. Accepts a single Unicode character. */
static int
convert_uc(PyObject *obj, void *addr)
{
Py_UCS4 *fillcharloc = (Py_UCS4 *)addr;
- PyObject *uniobj;
- uniobj = PyUnicode_FromObject(obj);
- if (uniobj == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "The fill character cannot be converted to Unicode");
+ if (!PyUnicode_Check(obj)) {
+ PyErr_Format(PyExc_TypeError,
+ "The fill character must be a unicode character, "
+ "not %.100s", Py_TYPE(obj)->tp_name);
return 0;
}
- if (PyUnicode_GET_LENGTH(uniobj) != 1) {
+ if (PyUnicode_READY(obj) < 0)
+ return 0;
+ if (PyUnicode_GET_LENGTH(obj) != 1) {
PyErr_SetString(PyExc_TypeError,
"The fill character must be exactly one character long");
- Py_DECREF(uniobj);
return 0;
}
- *fillcharloc = PyUnicode_READ_CHAR(uniobj, 0);
- Py_DECREF(uniobj);
+ *fillcharloc = PyUnicode_READ_CHAR(obj, 0);
return 1;
}
@@ -10899,59 +11049,49 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
}
int
-PyUnicode_Contains(PyObject *container, PyObject *element)
+_PyUnicode_EQ(PyObject *aa, PyObject *bb)
+{
+ return unicode_eq(aa, bb);
+}
+
+int
+PyUnicode_Contains(PyObject *str, PyObject *substr)
{
- PyObject *str, *sub;
int kind1, kind2;
void *buf1, *buf2;
Py_ssize_t len1, len2;
int result;
- /* Coerce the two arguments */
- sub = PyUnicode_FromObject(element);
- if (!sub) {
+ if (!PyUnicode_Check(substr)) {
PyErr_Format(PyExc_TypeError,
- "'in <string>' requires string as left operand, not %s",
- element->ob_type->tp_name);
+ "'in <string>' requires string as left operand, not %.100s",
+ Py_TYPE(substr)->tp_name);
return -1;
}
-
- str = PyUnicode_FromObject(container);
- if (!str) {
- Py_DECREF(sub);
+ if (PyUnicode_READY(substr) == -1)
+ return -1;
+ if (ensure_unicode(str) < 0)
return -1;
- }
kind1 = PyUnicode_KIND(str);
- kind2 = PyUnicode_KIND(sub);
- if (kind1 < kind2) {
- Py_DECREF(sub);
- Py_DECREF(str);
+ kind2 = PyUnicode_KIND(substr);
+ if (kind1 < kind2)
return 0;
- }
len1 = PyUnicode_GET_LENGTH(str);
- len2 = PyUnicode_GET_LENGTH(sub);
- if (len1 < len2) {
- Py_DECREF(sub);
- Py_DECREF(str);
+ len2 = PyUnicode_GET_LENGTH(substr);
+ if (len1 < len2)
return 0;
- }
buf1 = PyUnicode_DATA(str);
- buf2 = PyUnicode_DATA(sub);
+ buf2 = PyUnicode_DATA(substr);
if (len2 == 1) {
Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0);
result = findchar((const char *)buf1, kind1, len1, ch, 1) != -1;
- Py_DECREF(sub);
- Py_DECREF(str);
return result;
}
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(sub, kind1);
- if (!buf2) {
- Py_DECREF(sub);
- Py_DECREF(str);
+ buf2 = _PyUnicode_AsKind(substr, kind1);
+ if (!buf2)
return -1;
- }
}
switch (kind1) {
@@ -10969,9 +11109,6 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
assert(0);
}
- Py_DECREF(str);
- Py_DECREF(sub);
-
if (kind2 != kind1)
PyMem_Free(buf2);
@@ -10983,56 +11120,40 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
PyObject *
PyUnicode_Concat(PyObject *left, PyObject *right)
{
- PyObject *u = NULL, *v = NULL, *w;
+ PyObject *result;
Py_UCS4 maxchar, maxchar2;
- Py_ssize_t u_len, v_len, new_len;
+ Py_ssize_t left_len, right_len, new_len;
- /* Coerce the two arguments */
- u = PyUnicode_FromObject(left);
- if (u == NULL)
- goto onError;
- v = PyUnicode_FromObject(right);
- if (v == NULL)
- goto onError;
+ if (ensure_unicode(left) < 0 || ensure_unicode(right) < 0)
+ return NULL;
/* Shortcuts */
- if (v == unicode_empty) {
- Py_DECREF(v);
- return u;
- }
- if (u == unicode_empty) {
- Py_DECREF(u);
- return v;
- }
+ if (left == unicode_empty)
+ return PyUnicode_FromObject(right);
+ if (right == unicode_empty)
+ return PyUnicode_FromObject(left);
- u_len = PyUnicode_GET_LENGTH(u);
- v_len = PyUnicode_GET_LENGTH(v);
- if (u_len > PY_SSIZE_T_MAX - v_len) {
+ left_len = PyUnicode_GET_LENGTH(left);
+ right_len = PyUnicode_GET_LENGTH(right);
+ if (left_len > PY_SSIZE_T_MAX - right_len) {
PyErr_SetString(PyExc_OverflowError,
"strings are too large to concat");
- goto onError;
+ return NULL;
}
- new_len = u_len + v_len;
+ new_len = left_len + right_len;
- maxchar = PyUnicode_MAX_CHAR_VALUE(u);
- maxchar2 = PyUnicode_MAX_CHAR_VALUE(v);
+ maxchar = PyUnicode_MAX_CHAR_VALUE(left);
+ maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);
maxchar = Py_MAX(maxchar, maxchar2);
/* Concat the two Unicode strings */
- w = PyUnicode_New(new_len, maxchar);
- if (w == NULL)
- goto onError;
- _PyUnicode_FastCopyCharacters(w, 0, u, 0, u_len);
- _PyUnicode_FastCopyCharacters(w, u_len, v, 0, v_len);
- Py_DECREF(u);
- Py_DECREF(v);
- assert(_PyUnicode_CheckConsistency(w, 1));
- return w;
-
- onError:
- Py_XDECREF(u);
- Py_XDECREF(v);
- return NULL;
+ result = PyUnicode_New(new_len, maxchar);
+ if (result == NULL)
+ return NULL;
+ _PyUnicode_FastCopyCharacters(result, 0, left, 0, left_len);
+ _PyUnicode_FastCopyCharacters(result, left_len, right, 0, right_len);
+ assert(_PyUnicode_CheckConsistency(result, 1));
+ return result;
}
void
@@ -11123,6 +11244,25 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right)
Py_XDECREF(right);
}
+/*
+Wraps stringlib_parse_args_finds() and additionally ensures that the
+first argument is a unicode object.
+*/
+
+Py_LOCAL_INLINE(int)
+parse_args_finds_unicode(const char * function_name, PyObject *args,
+ PyObject **substring,
+ Py_ssize_t *start, Py_ssize_t *end)
+{
+ if(stringlib_parse_args_finds(function_name, args, substring,
+ start, end)) {
+ if (ensure_unicode(*substring) < 0)
+ return 0;
+ return 1;
+ }
+ return 0;
+}
+
PyDoc_STRVAR(count__doc__,
"S.count(sub[, start[, end]]) -> int\n\
\n\
@@ -11141,31 +11281,26 @@ unicode_count(PyObject *self, PyObject *args)
void *buf1, *buf2;
Py_ssize_t len1, len2, iresult;
- if (!stringlib_parse_args_finds_unicode("count", args, &substring,
- &start, &end))
+ if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
return NULL;
kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- if (kind1 < kind2) {
- Py_DECREF(substring);
+ if (kind1 < kind2)
return PyLong_FromLong(0);
- }
+
len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
ADJUST_INDICES(start, end, len1);
- if (end - start < len2) {
- Py_DECREF(substring);
+ if (end - start < len2)
return PyLong_FromLong(0);
- }
+
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
- if (!buf2) {
- Py_DECREF(substring);
+ if (!buf2)
return NULL;
- }
}
switch (kind1) {
case PyUnicode_1BYTE_KIND:
@@ -11195,8 +11330,6 @@ unicode_count(PyObject *self, PyObject *args)
if (kind2 != kind1)
PyMem_Free(buf2);
- Py_DECREF(substring);
-
return result;
}
@@ -11330,22 +11463,13 @@ unicode_find(PyObject *self, PyObject *args)
Py_ssize_t end = 0;
Py_ssize_t result;
- if (!stringlib_parse_args_finds_unicode("find", args, &substring,
- &start, &end))
+ if (!parse_args_finds_unicode("find", args, &substring, &start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1) {
- Py_DECREF(substring);
- return NULL;
- }
- if (PyUnicode_READY(substring) == -1) {
- Py_DECREF(substring);
+ if (PyUnicode_READY(self) == -1)
return NULL;
- }
- result = any_find_slice(1, self, substring, start, end);
-
- Py_DECREF(substring);
+ result = any_find_slice(self, substring, start, end, 1);
if (result == -2)
return NULL;
@@ -11418,22 +11542,13 @@ unicode_index(PyObject *self, PyObject *args)
Py_ssize_t start = 0;
Py_ssize_t end = 0;
- if (!stringlib_parse_args_finds_unicode("index", args, &substring,
- &start, &end))
+ if (!parse_args_finds_unicode("index", args, &substring, &start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1) {
- Py_DECREF(substring);
- return NULL;
- }
- if (PyUnicode_READY(substring) == -1) {
- Py_DECREF(substring);
+ if (PyUnicode_READY(self) == -1)
return NULL;
- }
- result = any_find_slice(1, self, substring, start, end);
-
- Py_DECREF(substring);
+ result = any_find_slice(self, substring, start, end, 1);
if (result == -2)
return NULL;
@@ -11947,7 +12062,7 @@ unicode_lower(PyObject *self)
#define BOTHSTRIP 2
/* Arrays indexed by above */
-static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
+static const char * const stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
@@ -12242,40 +12357,15 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
}
PyObject *
-PyUnicode_Replace(PyObject *obj,
- PyObject *subobj,
- PyObject *replobj,
+PyUnicode_Replace(PyObject *str,
+ PyObject *substr,
+ PyObject *replstr,
Py_ssize_t maxcount)
{
- PyObject *self;
- PyObject *str1;
- PyObject *str2;
- PyObject *result;
-
- self = PyUnicode_FromObject(obj);
- if (self == NULL)
+ if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0 ||
+ ensure_unicode(replstr) < 0)
return NULL;
- str1 = PyUnicode_FromObject(subobj);
- if (str1 == NULL) {
- Py_DECREF(self);
- return NULL;
- }
- str2 = PyUnicode_FromObject(replobj);
- if (str2 == NULL) {
- Py_DECREF(self);
- Py_DECREF(str1);
- return NULL;
- }
- if (PyUnicode_READY(self) == -1 ||
- PyUnicode_READY(str1) == -1 ||
- PyUnicode_READY(str2) == -1)
- result = NULL;
- else
- result = replace(self, str1, str2, maxcount);
- Py_DECREF(self);
- Py_DECREF(str1);
- Py_DECREF(str2);
- return result;
+ return replace(str, substr, replstr, maxcount);
}
PyDoc_STRVAR(replace__doc__,
@@ -12291,28 +12381,12 @@ unicode_replace(PyObject *self, PyObject *args)
PyObject *str1;
PyObject *str2;
Py_ssize_t maxcount = -1;
- PyObject *result;
- if (!PyArg_ParseTuple(args, "OO|n:replace", &str1, &str2, &maxcount))
+ if (!PyArg_ParseTuple(args, "UU|n:replace", &str1, &str2, &maxcount))
return NULL;
if (PyUnicode_READY(self) == -1)
return NULL;
- str1 = PyUnicode_FromObject(str1);
- if (str1 == NULL)
- return NULL;
- str2 = PyUnicode_FromObject(str2);
- if (str2 == NULL) {
- Py_DECREF(str1);
- return NULL;
- }
- if (PyUnicode_READY(str1) == -1 || PyUnicode_READY(str2) == -1)
- result = NULL;
- else
- result = replace(self, str1, str2, maxcount);
-
- Py_DECREF(str1);
- Py_DECREF(str2);
- return result;
+ return replace(self, str1, str2, maxcount);
}
static PyObject *
@@ -12497,22 +12571,13 @@ unicode_rfind(PyObject *self, PyObject *args)
Py_ssize_t end = 0;
Py_ssize_t result;
- if (!stringlib_parse_args_finds_unicode("rfind", args, &substring,
- &start, &end))
+ if (!parse_args_finds_unicode("rfind", args, &substring, &start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1) {
- Py_DECREF(substring);
- return NULL;
- }
- if (PyUnicode_READY(substring) == -1) {
- Py_DECREF(substring);
+ if (PyUnicode_READY(self) == -1)
return NULL;
- }
- result = any_find_slice(-1, self, substring, start, end);
-
- Py_DECREF(substring);
+ result = any_find_slice(self, substring, start, end, -1);
if (result == -2)
return NULL;
@@ -12534,22 +12599,13 @@ unicode_rindex(PyObject *self, PyObject *args)
Py_ssize_t end = 0;
Py_ssize_t result;
- if (!stringlib_parse_args_finds_unicode("rindex", args, &substring,
- &start, &end))
+ if (!parse_args_finds_unicode("rindex", args, &substring, &start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1) {
- Py_DECREF(substring);
- return NULL;
- }
- if (PyUnicode_READY(substring) == -1) {
- Py_DECREF(substring);
+ if (PyUnicode_READY(self) == -1)
return NULL;
- }
-
- result = any_find_slice(-1, self, substring, start, end);
- Py_DECREF(substring);
+ result = any_find_slice(self, substring, start, end, -1);
if (result == -2)
return NULL;
@@ -12589,24 +12645,10 @@ unicode_rjust(PyObject *self, PyObject *args)
PyObject *
PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
{
- PyObject *result;
-
- s = PyUnicode_FromObject(s);
- if (s == NULL)
+ if (ensure_unicode(s) < 0 || (sep != NULL && ensure_unicode(sep) < 0))
return NULL;
- if (sep != NULL) {
- sep = PyUnicode_FromObject(sep);
- if (sep == NULL) {
- Py_DECREF(s);
- return NULL;
- }
- }
-
- result = split(s, sep, maxsplit);
- Py_DECREF(s);
- Py_XDECREF(sep);
- return result;
+ return split(s, sep, maxsplit);
}
PyDoc_STRVAR(split__doc__,
@@ -12631,35 +12673,26 @@ unicode_split(PyObject *self, PyObject *args, PyObject *kwds)
if (substring == Py_None)
return split(self, NULL, maxcount);
- else if (PyUnicode_Check(substring))
+
+ if (PyUnicode_Check(substring))
return split(self, substring, maxcount);
- else
- return PyUnicode_Split(self, substring, maxcount);
+
+ PyErr_Format(PyExc_TypeError,
+ "must be str or None, not %.100s",
+ Py_TYPE(substring)->tp_name);
+ return NULL;
}
PyObject *
-PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
+PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
{
- PyObject* str_obj;
- PyObject* sep_obj;
PyObject* out;
int kind1, kind2;
void *buf1, *buf2;
Py_ssize_t len1, len2;
- str_obj = PyUnicode_FromObject(str_in);
- if (!str_obj)
+ if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0)
return NULL;
- sep_obj = PyUnicode_FromObject(sep_in);
- if (!sep_obj) {
- Py_DECREF(str_obj);
- return NULL;
- }
- if (PyUnicode_READY(sep_obj) == -1 || PyUnicode_READY(str_obj) == -1) {
- Py_DECREF(sep_obj);
- Py_DECREF(str_obj);
- return NULL;
- }
kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sep_obj);
@@ -12673,8 +12706,6 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
out = PyTuple_Pack(3, str_obj, unicode_empty, unicode_empty);
Py_DECREF(unicode_empty);
}
- Py_DECREF(sep_obj);
- Py_DECREF(str_obj);
return out;
}
buf1 = PyUnicode_DATA(str_obj);
@@ -12682,7 +12713,7 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
if (!buf2)
- goto onError;
+ return NULL;
}
switch (kind1) {
@@ -12703,39 +12734,23 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
out = 0;
}
- Py_DECREF(sep_obj);
- Py_DECREF(str_obj);
if (kind2 != kind1)
PyMem_Free(buf2);
return out;
- onError:
- Py_DECREF(sep_obj);
- Py_DECREF(str_obj);
- if (kind2 != kind1 && buf2)
- PyMem_Free(buf2);
- return NULL;
}
PyObject *
-PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
+PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
{
- PyObject* str_obj;
- PyObject* sep_obj;
PyObject* out;
int kind1, kind2;
void *buf1, *buf2;
Py_ssize_t len1, len2;
- str_obj = PyUnicode_FromObject(str_in);
- if (!str_obj)
- return NULL;
- sep_obj = PyUnicode_FromObject(sep_in);
- if (!sep_obj) {
- Py_DECREF(str_obj);
+ if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0)
return NULL;
- }
kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sep_obj);
@@ -12749,8 +12764,6 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
out = PyTuple_Pack(3, unicode_empty, unicode_empty, str_obj);
Py_DECREF(unicode_empty);
}
- Py_DECREF(sep_obj);
- Py_DECREF(str_obj);
return out;
}
buf1 = PyUnicode_DATA(str_obj);
@@ -12758,7 +12771,7 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
if (!buf2)
- goto onError;
+ return NULL;
}
switch (kind1) {
@@ -12779,18 +12792,10 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
out = 0;
}
- Py_DECREF(sep_obj);
- Py_DECREF(str_obj);
if (kind2 != kind1)
PyMem_Free(buf2);
return out;
- onError:
- Py_DECREF(sep_obj);
- Py_DECREF(str_obj);
- if (kind2 != kind1 && buf2)
- PyMem_Free(buf2);
- return NULL;
}
PyDoc_STRVAR(partition__doc__,
@@ -12822,24 +12827,10 @@ unicode_rpartition(PyObject *self, PyObject *separator)
PyObject *
PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
{
- PyObject *result;
-
- s = PyUnicode_FromObject(s);
- if (s == NULL)
+ if (ensure_unicode(s) < 0 || (sep != NULL && ensure_unicode(sep) < 0))
return NULL;
- if (sep != NULL) {
- sep = PyUnicode_FromObject(sep);
- if (sep == NULL) {
- Py_DECREF(s);
- return NULL;
- }
- }
- result = rsplit(s, sep, maxsplit);
-
- Py_DECREF(s);
- Py_XDECREF(sep);
- return result;
+ return rsplit(s, sep, maxsplit);
}
PyDoc_STRVAR(rsplit__doc__,
@@ -12864,10 +12855,14 @@ unicode_rsplit(PyObject *self, PyObject *args, PyObject *kwds)
if (substring == Py_None)
return rsplit(self, NULL, maxcount);
- else if (PyUnicode_Check(substring))
+
+ if (PyUnicode_Check(substring))
return rsplit(self, substring, maxcount);
- else
- return PyUnicode_RSplit(self, substring, maxcount);
+
+ PyErr_Format(PyExc_TypeError,
+ "must be str or None, not %.100s",
+ Py_TYPE(substring)->tp_name);
+ return NULL;
}
PyDoc_STRVAR(splitlines__doc__,
@@ -13148,11 +13143,15 @@ unicode_startswith(PyObject *self,
if (PyTuple_Check(subobj)) {
Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
- substring = PyUnicode_FromObject(PyTuple_GET_ITEM(subobj, i));
- if (substring == NULL)
+ substring = PyTuple_GET_ITEM(subobj, i);
+ if (!PyUnicode_Check(substring)) {
+ PyErr_Format(PyExc_TypeError,
+ "tuple for startswith must only contain str, "
+ "not %.100s",
+ Py_TYPE(substring)->tp_name);
return NULL;
+ }
result = tailmatch(self, substring, start, end, -1);
- Py_DECREF(substring);
if (result == -1)
return NULL;
if (result) {
@@ -13162,15 +13161,13 @@ unicode_startswith(PyObject *self,
/* nothing matched */
Py_RETURN_FALSE;
}
- substring = PyUnicode_FromObject(subobj);
- if (substring == NULL) {
- if (PyErr_ExceptionMatches(PyExc_TypeError))
- PyErr_Format(PyExc_TypeError, "startswith first arg must be str or "
- "a tuple of str, not %s", Py_TYPE(subobj)->tp_name);
+ if (!PyUnicode_Check(subobj)) {
+ PyErr_Format(PyExc_TypeError,
+ "startswith first arg must be str or "
+ "a tuple of str, not %.100s", Py_TYPE(subobj)->tp_name);
return NULL;
}
- result = tailmatch(self, substring, start, end, -1);
- Py_DECREF(substring);
+ result = tailmatch(self, subobj, start, end, -1);
if (result == -1)
return NULL;
return PyBool_FromLong(result);
@@ -13200,12 +13197,15 @@ unicode_endswith(PyObject *self,
if (PyTuple_Check(subobj)) {
Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
- substring = PyUnicode_FromObject(
- PyTuple_GET_ITEM(subobj, i));
- if (substring == NULL)
+ substring = PyTuple_GET_ITEM(subobj, i);
+ if (!PyUnicode_Check(substring)) {
+ PyErr_Format(PyExc_TypeError,
+ "tuple for endswith must only contain str, "
+ "not %.100s",
+ Py_TYPE(substring)->tp_name);
return NULL;
+ }
result = tailmatch(self, substring, start, end, +1);
- Py_DECREF(substring);
if (result == -1)
return NULL;
if (result) {
@@ -13214,15 +13214,13 @@ unicode_endswith(PyObject *self,
}
Py_RETURN_FALSE;
}
- substring = PyUnicode_FromObject(subobj);
- if (substring == NULL) {
- if (PyErr_ExceptionMatches(PyExc_TypeError))
- PyErr_Format(PyExc_TypeError, "endswith first arg must be str or "
- "a tuple of str, not %s", Py_TYPE(subobj)->tp_name);
+ if (!PyUnicode_Check(subobj)) {
+ PyErr_Format(PyExc_TypeError,
+ "endswith first arg must be str or "
+ "a tuple of str, not %.100s", Py_TYPE(subobj)->tp_name);
return NULL;
}
- result = tailmatch(self, substring, start, end, +1);
- Py_DECREF(substring);
+ result = tailmatch(self, subobj, start, end, +1);
if (result == -1)
return NULL;
return PyBool_FromLong(result);
@@ -13231,44 +13229,50 @@ unicode_endswith(PyObject *self,
Py_LOCAL_INLINE(void)
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
{
- if (!writer->readonly)
+ writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
+ writer->data = PyUnicode_DATA(writer->buffer);
+
+ if (!writer->readonly) {
+ writer->kind = PyUnicode_KIND(writer->buffer);
writer->size = PyUnicode_GET_LENGTH(writer->buffer);
+ }
else {
+ /* use a value smaller than PyUnicode_1BYTE_KIND() so
+ _PyUnicodeWriter_PrepareKind() will copy the buffer. */
+ writer->kind = PyUnicode_WCHAR_KIND;
+ assert(writer->kind <= PyUnicode_1BYTE_KIND);
+
/* Copy-on-write mode: set buffer size to 0 so
* _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
* next write. */
writer->size = 0;
}
- writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
- writer->data = PyUnicode_DATA(writer->buffer);
- writer->kind = PyUnicode_KIND(writer->buffer);
}
void
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
{
memset(writer, 0, sizeof(*writer));
-#ifdef Py_DEBUG
- writer->kind = 5; /* invalid kind */
-#endif
+
+ /* ASCII is the bare minimum */
writer->min_char = 127;
+
+ /* use a value smaller than PyUnicode_1BYTE_KIND() so
+ _PyUnicodeWriter_PrepareKind() will copy the buffer. */
+ writer->kind = PyUnicode_WCHAR_KIND;
+ assert(writer->kind <= PyUnicode_1BYTE_KIND);
}
int
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar)
{
-#ifdef MS_WINDOWS
- /* On Windows, overallocate by 50% is the best factor */
-# define OVERALLOCATE_FACTOR 2
-#else
- /* On Linux, overallocate by 25% is the best factor */
-# define OVERALLOCATE_FACTOR 4
-#endif
Py_ssize_t newlen;
PyObject *newbuffer;
- assert(length > 0);
+ /* ensure that the _PyUnicodeWriter_Prepare macro was used */
+ assert((maxchar > writer->maxchar && length >= 0)
+ || length > 0);
if (length > PY_SSIZE_T_MAX - writer->pos) {
PyErr_NoMemory();
@@ -13334,6 +13338,28 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
#undef OVERALLOCATE_FACTOR
}
+int
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+ enum PyUnicode_Kind kind)
+{
+ Py_UCS4 maxchar;
+
+ /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
+ assert(writer->kind < kind);
+
+ switch (kind)
+ {
+ case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
+ case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
+ case PyUnicode_4BYTE_KIND: maxchar = 0x10ffff; break;
+ default:
+ assert(0 && "invalid kind");
+ return -1;
+ }
+
+ return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
+}
+
Py_LOCAL_INLINE(int)
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
{
@@ -13504,17 +13530,26 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
assert(PyUnicode_GET_LENGTH(str) == writer->pos);
return str;
}
- if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) {
- PyObject *newbuffer;
- newbuffer = resize_compact(writer->buffer, writer->pos);
- if (newbuffer == NULL) {
- Py_CLEAR(writer->buffer);
- return NULL;
+ if (writer->pos == 0) {
+ Py_CLEAR(writer->buffer);
+
+ /* Get the empty Unicode string singleton ('') */
+ _Py_INCREF_UNICODE_EMPTY();
+ str = unicode_empty;
+ }
+ else {
+ str = writer->buffer;
+ writer->buffer = NULL;
+
+ if (PyUnicode_GET_LENGTH(str) != writer->pos) {
+ PyObject *str2;
+ str2 = resize_compact(str, writer->pos);
+ if (str2 == NULL)
+ return NULL;
+ str = str2;
}
- writer->buffer = newbuffer;
}
- str = writer->buffer;
- writer->buffer = NULL;
+
assert(_PyUnicode_CheckConsistency(str, 1));
return unicode_result_ready(str);
}
@@ -14655,13 +14690,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
return NULL;
}
- ctx.fmtstr = PyUnicode_FromObject(format);
- if (ctx.fmtstr == NULL)
- return NULL;
- if (PyUnicode_READY(ctx.fmtstr) == -1) {
- Py_DECREF(ctx.fmtstr);
+ if (ensure_unicode(format) < 0)
return NULL;
- }
+
+ ctx.fmtstr = format;
ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
@@ -14721,11 +14753,9 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (ctx.args_owned) {
Py_DECREF(ctx.args);
}
- Py_DECREF(ctx.fmtstr);
return _PyUnicodeWriter_Finish(&ctx.writer);
onError:
- Py_DECREF(ctx.fmtstr);
_PyUnicodeWriter_Dealloc(&ctx.writer);
if (ctx.args_owned) {
Py_DECREF(ctx.args);
diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c
index 7e6f364..f75b1e8 100644
--- a/Objects/weakrefobject.c
+++ b/Objects/weakrefobject.c
@@ -265,7 +265,7 @@ insert_head(PyWeakReference *newref, PyWeakReference **list)
}
static int
-parse_weakref_init_args(char *funcname, PyObject *args, PyObject *kwargs,
+parse_weakref_init_args(const char *funcname, PyObject *args, PyObject *kwargs,
PyObject **obp, PyObject **callbackp)
{
return PyArg_UnpackTuple(args, funcname, 1, 2, obp, callbackp);