summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
Diffstat (limited to 'Objects')
-rw-r--r--Objects/abstract.c122
-rw-r--r--Objects/bytearrayobject.c74
-rw-r--r--Objects/bytesobject.c111
-rw-r--r--Objects/complexobject.c14
-rw-r--r--Objects/descrobject.c10
-rw-r--r--Objects/dictobject.c101
-rw-r--r--Objects/exceptions.c2
-rw-r--r--Objects/fileobject.c2
-rw-r--r--Objects/floatobject.c24
-rw-r--r--Objects/frameobject.c19
-rw-r--r--Objects/funcobject.c101
-rw-r--r--Objects/genobject.c103
-rw-r--r--Objects/iterobject.c13
-rw-r--r--Objects/listobject.c41
-rw-r--r--Objects/longobject.c147
-rw-r--r--Objects/methodobject.c53
-rw-r--r--Objects/moduleobject.c62
-rw-r--r--Objects/namespaceobject.c60
-rw-r--r--Objects/object.c145
-rw-r--r--Objects/obmalloc.c558
-rw-r--r--Objects/rangeobject.c205
-rw-r--r--Objects/setobject.c1
-rw-r--r--Objects/sliceobject.c191
-rw-r--r--Objects/stringlib/asciilib.h1
-rw-r--r--Objects/stringlib/codecs.h6
-rw-r--r--Objects/stringlib/fastsearch.h8
-rw-r--r--Objects/stringlib/join.h133
-rw-r--r--Objects/stringlib/replace.h53
-rw-r--r--Objects/stringlib/stringdefs.h1
-rw-r--r--Objects/stringlib/ucs1lib.h1
-rw-r--r--Objects/stringlib/ucs2lib.h1
-rw-r--r--Objects/stringlib/ucs4lib.h1
-rw-r--r--Objects/stringlib/undef.h1
-rw-r--r--Objects/stringlib/unicode_format.h139
-rw-r--r--Objects/stringlib/unicodedefs.h1
-rw-r--r--Objects/structseq.c86
-rw-r--r--Objects/tupleobject.c9
-rw-r--r--Objects/typeobject.c192
-rw-r--r--Objects/unicodeobject.c4080
-rw-r--r--Objects/unicodetype_db.h13
-rw-r--r--Objects/weakrefobject.c7
41 files changed, 3907 insertions, 2985 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 7f1808f..5f11c3d 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -64,49 +64,70 @@ PyObject_Length(PyObject *o)
}
#define PyObject_Length PyObject_Size
+int
+_PyObject_HasLen(PyObject *o) {
+ return (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_length) ||
+ (Py_TYPE(o)->tp_as_mapping && Py_TYPE(o)->tp_as_mapping->mp_length);
+}
/* The length hint function returns a non-negative value from o.__len__()
- or o.__length_hint__(). If those methods aren't found or return a negative
- value, then the defaultvalue is returned. If one of the calls fails,
+ or o.__length_hint__(). If those methods aren't found the defaultvalue is
+ returned. If one of the calls fails with an exception other than TypeError
this function returns -1.
*/
Py_ssize_t
-_PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue)
+PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue)
{
+ PyObject *hint, *result;
+ Py_ssize_t res;
_Py_IDENTIFIER(__length_hint__);
- PyObject *ro, *hintmeth;
- Py_ssize_t rv;
-
- /* try o.__len__() */
- rv = PyObject_Size(o);
- if (rv >= 0)
- return rv;
- if (PyErr_Occurred()) {
- if (!PyErr_ExceptionMatches(PyExc_TypeError))
+ res = PyObject_Length(o);
+ if (res < 0 && PyErr_Occurred()) {
+ if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
return -1;
+ }
PyErr_Clear();
}
-
- /* try o.__length_hint__() */
- hintmeth = _PyObject_LookupSpecial(o, &PyId___length_hint__);
- if (hintmeth == NULL) {
- if (PyErr_Occurred())
+ else {
+ return res;
+ }
+ hint = _PyObject_LookupSpecial(o, &PyId___length_hint__);
+ if (hint == NULL) {
+ if (PyErr_Occurred()) {
return -1;
- else
+ }
+ return defaultvalue;
+ }
+ result = PyObject_CallFunctionObjArgs(hint, NULL);
+ Py_DECREF(hint);
+ if (result == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+ PyErr_Clear();
return defaultvalue;
+ }
+ return -1;
}
- ro = PyObject_CallFunctionObjArgs(hintmeth, NULL);
- Py_DECREF(hintmeth);
- if (ro == NULL) {
- if (!PyErr_ExceptionMatches(PyExc_TypeError))
- return -1;
- PyErr_Clear();
+ else if (result == Py_NotImplemented) {
+ Py_DECREF(result);
return defaultvalue;
}
- rv = PyLong_Check(ro) ? PyLong_AsSsize_t(ro) : defaultvalue;
- Py_DECREF(ro);
- return rv;
+ if (!PyLong_Check(result)) {
+ PyErr_Format(PyExc_TypeError, "__length_hint__ must be an integer, not %.100s",
+ Py_TYPE(result)->tp_name);
+ Py_DECREF(result);
+ return -1;
+ }
+ res = PyLong_AsSsize_t(result);
+ Py_DECREF(result);
+ if (res < 0 && PyErr_Occurred()) {
+ return -1;
+ }
+ if (res < 0) {
+ PyErr_Format(PyExc_ValueError, "__length_hint__() should return >= 0");
+ return -1;
+ }
+ return res;
}
PyObject *
@@ -1217,7 +1238,7 @@ PyNumber_AsSsize_t(PyObject *item, PyObject *err)
to be an int or have an __int__ method. Steals integral's
reference. error_format will be used to create the TypeError if integral
isn't actually an Integral instance. error_format should be a format string
- that can accept a char* naming integral's type.
+ that can accept a char* naming integral's type.
*/
static PyObject *
convert_integral_to_int(PyObject *integral, const char *error_format)
@@ -1236,7 +1257,7 @@ convert_integral_to_int(PyObject *integral, const char *error_format)
}
PyErr_Format(PyExc_TypeError, error_format, Py_TYPE(integral)->tp_name);
Py_DECREF(integral);
- return NULL;
+ return NULL;
}
@@ -1670,7 +1691,7 @@ PySequence_Tuple(PyObject *v)
return NULL;
/* Guess result size and allocate space. */
- n = _PyObject_LengthHint(v, 10);
+ n = PyObject_LengthHint(v, 10);
if (n == -1)
goto Fail;
result = PyTuple_New(n);
@@ -2064,10 +2085,15 @@ PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw)
return NULL;
result = (*call)(func, arg, kw);
Py_LeaveRecursiveCall();
- if (result == NULL && !PyErr_Occurred())
+#ifdef NDEBUG
+ if (result == NULL && !PyErr_Occurred()) {
PyErr_SetString(
PyExc_SystemError,
"NULL result without error in PyObject_Call");
+ }
+#else
+ assert(result != NULL || PyErr_Occurred());
+#endif
return result;
}
PyErr_Format(PyExc_TypeError, "'%.200s' object is not callable",
@@ -2102,7 +2128,7 @@ call_function_tail(PyObject *callable, PyObject *args)
}
PyObject *
-PyObject_CallFunction(PyObject *callable, char *format, ...)
+PyObject_CallFunction(PyObject *callable, const char *format, ...)
{
va_list va;
PyObject *args;
@@ -2122,7 +2148,7 @@ PyObject_CallFunction(PyObject *callable, char *format, ...)
}
PyObject *
-_PyObject_CallFunction_SizeT(PyObject *callable, char *format, ...)
+_PyObject_CallFunction_SizeT(PyObject *callable, const char *format, ...)
{
va_list va;
PyObject *args;
@@ -2142,7 +2168,7 @@ _PyObject_CallFunction_SizeT(PyObject *callable, char *format, ...)
}
static PyObject*
-callmethod(PyObject* func, char *format, va_list va, int is_size_t)
+callmethod(PyObject* func, const char *format, va_list va, int is_size_t)
{
PyObject *retval = NULL;
PyObject *args;
@@ -2171,7 +2197,7 @@ callmethod(PyObject* func, char *format, va_list va, int is_size_t)
}
PyObject *
-PyObject_CallMethod(PyObject *o, char *name, char *format, ...)
+PyObject_CallMethod(PyObject *o, const char *name, const char *format, ...)
{
va_list va;
PyObject *func = NULL;
@@ -2192,7 +2218,8 @@ PyObject_CallMethod(PyObject *o, char *name, char *format, ...)
}
PyObject *
-_PyObject_CallMethodId(PyObject *o, _Py_Identifier *name, char *format, ...)
+_PyObject_CallMethodId(PyObject *o, _Py_Identifier *name,
+ const char *format, ...)
{
va_list va;
PyObject *func = NULL;
@@ -2213,7 +2240,8 @@ _PyObject_CallMethodId(PyObject *o, _Py_Identifier *name, char *format, ...)
}
PyObject *
-_PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...)
+_PyObject_CallMethod_SizeT(PyObject *o, const char *name,
+ const char *format, ...)
{
va_list va;
PyObject *func = NULL;
@@ -2233,7 +2261,8 @@ _PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...)
}
PyObject *
-_PyObject_CallMethodId_SizeT(PyObject *o, _Py_Identifier *name, char *format, ...)
+_PyObject_CallMethodId_SizeT(PyObject *o, _Py_Identifier *name,
+ const char *format, ...)
{
va_list va;
PyObject *func = NULL;
@@ -2303,7 +2332,7 @@ PyObject_CallMethodObjArgs(PyObject *callable, PyObject *name, ...)
}
PyObject *
-_PyObject_CallMethodObjIdArgs(PyObject *callable,
+_PyObject_CallMethodIdObjArgs(PyObject *callable,
struct _Py_Identifier *name, ...)
{
PyObject *args, *tmp;
@@ -2678,8 +2707,8 @@ PyIter_Next(PyObject *iter)
* NULL terminated string pointers with a NULL char* terminating the array.
* (ie: an argv or env list)
*
- * Memory allocated for the returned list is allocated using malloc() and MUST
- * be freed by the caller using a free() loop or _Py_FreeCharPArray().
+ * Memory allocated for the returned list is allocated using PyMem_Malloc()
+ * and MUST be freed by _Py_FreeCharPArray().
*/
char *const *
_PySequence_BytesToCharpArray(PyObject* self)
@@ -2687,6 +2716,7 @@ _PySequence_BytesToCharpArray(PyObject* self)
char **array;
Py_ssize_t i, argc;
PyObject *item = NULL;
+ Py_ssize_t size;
argc = PySequence_Size(self);
if (argc == -1)
@@ -2699,7 +2729,7 @@ _PySequence_BytesToCharpArray(PyObject* self)
return NULL;
}
- array = malloc((argc + 1) * sizeof(char *));
+ array = PyMem_Malloc((argc + 1) * sizeof(char *));
if (array == NULL) {
PyErr_NoMemory();
return NULL;
@@ -2718,11 +2748,13 @@ _PySequence_BytesToCharpArray(PyObject* self)
array[i] = NULL;
goto fail;
}
- array[i] = strdup(data);
+ size = PyBytes_GET_SIZE(item) + 1;
+ array[i] = PyMem_Malloc(size);
if (!array[i]) {
PyErr_NoMemory();
goto fail;
}
+ memcpy(array[i], data, size);
Py_DECREF(item);
}
array[argc] = NULL;
@@ -2742,7 +2774,7 @@ _Py_FreeCharPArray(char *const array[])
{
Py_ssize_t i;
for (i = 0; array[i] != NULL; ++i) {
- free(array[i]);
+ PyMem_Free(array[i]);
}
- free((void*)array);
+ PyMem_Free((void*)array);
}
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 60b2811..aa38924 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -1038,6 +1038,7 @@ bytearray_dealloc(PyByteArrayObject *self)
#define FASTSEARCH fastsearch
#define STRINGLIB(F) stringlib_##F
#define STRINGLIB_CHAR char
+#define STRINGLIB_SIZEOF_CHAR 1
#define STRINGLIB_LEN PyByteArray_GET_SIZE
#define STRINGLIB_STR PyByteArray_AS_STRING
#define STRINGLIB_NEW PyByteArray_FromStringAndSize
@@ -1049,6 +1050,7 @@ bytearray_dealloc(PyByteArrayObject *self)
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/join.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/ctype.h"
@@ -2291,7 +2293,7 @@ bytearray_extend(PyByteArrayObject *self, PyObject *arg)
return NULL;
/* Try to determine the length of the argument. 32 is arbitrary. */
- buf_size = _PyObject_LengthHint(arg, 32);
+ buf_size = PyObject_LengthHint(arg, 32);
if (buf_size == -1) {
Py_DECREF(it);
return NULL;
@@ -2578,73 +2580,9 @@ Concatenate any number of bytes/bytearray objects, with B\n\
in between each pair, and return the result as a new bytearray.");
static PyObject *
-bytearray_join(PyByteArrayObject *self, PyObject *it)
-{
- PyObject *seq;
- Py_ssize_t mysize = Py_SIZE(self);
- Py_ssize_t i;
- Py_ssize_t n;
- PyObject **items;
- Py_ssize_t totalsize = 0;
- PyObject *result;
- char *dest;
-
- seq = PySequence_Fast(it, "can only join an iterable");
- if (seq == NULL)
- return NULL;
- n = PySequence_Fast_GET_SIZE(seq);
- items = PySequence_Fast_ITEMS(seq);
-
- /* Compute the total size, and check that they are all bytes */
- /* XXX Shouldn't we use _getbuffer() on these items instead? */
- for (i = 0; i < n; i++) {
- PyObject *obj = items[i];
- if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
- PyErr_Format(PyExc_TypeError,
- "can only join an iterable of bytes "
- "(item %ld has type '%.100s')",
- /* XXX %ld isn't right on Win64 */
- (long)i, Py_TYPE(obj)->tp_name);
- goto error;
- }
- if (i > 0)
- totalsize += mysize;
- totalsize += Py_SIZE(obj);
- if (totalsize < 0) {
- PyErr_NoMemory();
- goto error;
- }
- }
-
- /* Allocate the result, and copy the bytes */
- result = PyByteArray_FromStringAndSize(NULL, totalsize);
- if (result == NULL)
- goto error;
- dest = PyByteArray_AS_STRING(result);
- for (i = 0; i < n; i++) {
- PyObject *obj = items[i];
- Py_ssize_t size = Py_SIZE(obj);
- char *buf;
- if (PyByteArray_Check(obj))
- buf = PyByteArray_AS_STRING(obj);
- else
- buf = PyBytes_AS_STRING(obj);
- if (i) {
- memcpy(dest, self->ob_bytes, mysize);
- dest += mysize;
- }
- memcpy(dest, buf, size);
- dest += size;
- }
-
- /* Done */
- Py_DECREF(seq);
- return result;
-
- /* Error handling */
- error:
- Py_DECREF(seq);
- return NULL;
+bytearray_join(PyObject *self, PyObject *iterable)
+{
+ return stringlib_bytes_join(self, iterable);
}
PyDoc_STRVAR(splitlines__doc__,
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 47898fe..056ac36 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -10,9 +10,18 @@
static Py_ssize_t
_getbuffer(PyObject *obj, Py_buffer *view)
{
- PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
-
- if (buffer == NULL || buffer->bf_getbuffer == NULL)
+ PyBufferProcs *bufferprocs;
+ if (PyBytes_CheckExact(obj)) {
+ /* Fast path, e.g. for .join() of many bytes objects */
+ Py_INCREF(obj);
+ view->obj = obj;
+ view->buf = PyBytes_AS_STRING(obj);
+ view->len = PyBytes_GET_SIZE(obj);
+ return view->len;
+ }
+
+ bufferprocs = Py_TYPE(obj)->tp_as_buffer;
+ if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
{
PyErr_Format(PyExc_TypeError,
"Type %.100s doesn't support the buffer API",
@@ -20,7 +29,7 @@ _getbuffer(PyObject *obj, Py_buffer *view)
return -1;
}
- if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
+ if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
return -1;
return view->len;
}
@@ -560,6 +569,7 @@ PyBytes_AsStringAndSize(register PyObject *obj,
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/join.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/ctype.h"
@@ -1112,94 +1122,9 @@ Concatenate any number of bytes objects, with B in between each pair.\n\
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
static PyObject *
-bytes_join(PyObject *self, PyObject *orig)
+bytes_join(PyObject *self, PyObject *iterable)
{
- char *sep = PyBytes_AS_STRING(self);
- const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
- PyObject *res = NULL;
- char *p;
- Py_ssize_t seqlen = 0;
- size_t sz = 0;
- Py_ssize_t i;
- PyObject *seq, *item;
-
- seq = PySequence_Fast(orig, "");
- if (seq == NULL) {
- return NULL;
- }
-
- seqlen = PySequence_Size(seq);
- if (seqlen == 0) {
- Py_DECREF(seq);
- return PyBytes_FromString("");
- }
- if (seqlen == 1) {
- item = PySequence_Fast_GET_ITEM(seq, 0);
- if (PyBytes_CheckExact(item)) {
- Py_INCREF(item);
- Py_DECREF(seq);
- return item;
- }
- }
-
- /* There are at least two things to join, or else we have a subclass
- * of the builtin types in the sequence.
- * Do a pre-pass to figure out the total amount of space we'll
- * need (sz), and see whether all argument are bytes.
- */
- /* XXX Shouldn't we use _getbuffer() on these items instead? */
- for (i = 0; i < seqlen; i++) {
- const size_t old_sz = sz;
- item = PySequence_Fast_GET_ITEM(seq, i);
- if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
- PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected bytes,"
- " %.80s found",
- i, Py_TYPE(item)->tp_name);
- Py_DECREF(seq);
- return NULL;
- }
- sz += Py_SIZE(item);
- if (i != 0)
- sz += seplen;
- if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
- PyErr_SetString(PyExc_OverflowError,
- "join() result is too long for bytes");
- Py_DECREF(seq);
- return NULL;
- }
- }
-
- /* Allocate result space. */
- res = PyBytes_FromStringAndSize((char*)NULL, sz);
- if (res == NULL) {
- Py_DECREF(seq);
- return NULL;
- }
-
- /* Catenate everything. */
- /* I'm not worried about a PyByteArray item growing because there's
- nowhere in this function where we release the GIL. */
- p = PyBytes_AS_STRING(res);
- for (i = 0; i < seqlen; ++i) {
- size_t n;
- char *q;
- if (i) {
- Py_MEMCPY(p, sep, seplen);
- p += seplen;
- }
- item = PySequence_Fast_GET_ITEM(seq, i);
- n = Py_SIZE(item);
- if (PyBytes_Check(item))
- q = PyBytes_AS_STRING(item);
- else
- q = PyByteArray_AS_STRING(item);
- Py_MEMCPY(p, q, n);
- p += n;
- }
-
- Py_DECREF(seq);
- return res;
+ return stringlib_bytes_join(self, iterable);
}
PyObject *
@@ -2316,8 +2241,6 @@ bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
return NULL;
- if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
return PyUnicode_FromEncodedObject(self, encoding, errors);
}
@@ -2679,7 +2602,7 @@ PyBytes_FromObject(PyObject *x)
}
/* For iterator version, create a string object and resize as needed */
- size = _PyObject_LengthHint(x, 64);
+ size = PyObject_LengthHint(x, 64);
if (size == -1 && PyErr_Occurred())
return NULL;
/* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index 403c60c..54838cc 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -271,6 +271,12 @@ try_complex_special_method(PyObject *op) {
if (f) {
PyObject *res = PyObject_CallFunctionObjArgs(f, NULL);
Py_DECREF(f);
+ if (res != NULL && !PyComplex_Check(res)) {
+ PyErr_SetString(PyExc_TypeError,
+ "__complex__ should return a complex object");
+ Py_DECREF(res);
+ return NULL;
+ }
return res;
}
return NULL;
@@ -296,12 +302,6 @@ PyComplex_AsCComplex(PyObject *op)
newop = try_complex_special_method(op);
if (newop) {
- if (!PyComplex_Check(newop)) {
- PyErr_SetString(PyExc_TypeError,
- "__complex__ should return a complex object");
- Py_DECREF(newop);
- return cv;
- }
cv = ((PyComplexObject *)newop)->cval;
Py_DECREF(newop);
return cv;
@@ -705,7 +705,7 @@ complex__format__(PyObject* self, PyObject* args)
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL;
- _PyUnicodeWriter_Init(&writer, 0);
+ _PyUnicodeWriter_Init(&writer);
ret = _PyComplex_FormatAdvancedWriter(
&writer,
self,
diff --git a/Objects/descrobject.c b/Objects/descrobject.c
index abcc002..d4f8048 100644
--- a/Objects/descrobject.c
+++ b/Objects/descrobject.c
@@ -115,7 +115,7 @@ classmethod_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type)
((PyTypeObject *)type)->tp_name);
return NULL;
}
- return PyCFunction_New(descr->d_method, type);
+ return PyCFunction_NewEx(descr->d_method, type, NULL);
}
static PyObject *
@@ -125,7 +125,7 @@ method_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type)
if (descr_check((PyDescrObject *)descr, obj, &res))
return res;
- return PyCFunction_New(descr->d_method, obj);
+ return PyCFunction_NewEx(descr->d_method, obj, NULL);
}
static PyObject *
@@ -239,7 +239,7 @@ methoddescr_call(PyMethodDescrObject *descr, PyObject *args, PyObject *kwds)
return NULL;
}
- func = PyCFunction_New(descr->d_method, self);
+ func = PyCFunction_NewEx(descr->d_method, self, NULL);
if (func == NULL)
return NULL;
args = PyTuple_GetSlice(args, 1, argc);
@@ -292,7 +292,7 @@ classmethoddescr_call(PyMethodDescrObject *descr, PyObject *args,
return NULL;
}
- func = PyCFunction_New(descr->d_method, self);
+ func = PyCFunction_NewEx(descr->d_method, self, NULL);
if (func == NULL)
return NULL;
args = PyTuple_GetSlice(args, 1, argc);
@@ -1009,7 +1009,7 @@ wrapper_dealloc(wrapperobject *wp)
static PyObject *
wrapper_richcompare(PyObject *a, PyObject *b, int op)
{
- int result;
+ Py_intptr_t result;
PyObject *v;
PyWrapperDescrObject *a_descr, *b_descr;
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 7aa5ea8..e0cb8ac 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -305,9 +305,9 @@ PyDict_Fini(void)
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
*/
-/* GROWTH_RATE. Growth rate upon hitting maximum load.
- * Currently set to used*2 + capacity/2.
- * This means that dicts double in size when growing without deletions,
+/* GROWTH_RATE. Growth rate upon hitting maximum load.
+ * Currently set to used*2 + capacity/2.
+ * This means that dicts double in size when growing without deletions,
* but have more head room when the number of deletions is on a par with the
* number of insertions.
* Raising this to used*4 doubles memory consumption depending on the size of
@@ -389,6 +389,7 @@ static PyObject *
new_dict(PyDictKeysObject *keys, PyObject **values)
{
PyDictObject *mp;
+ assert(keys != NULL);
if (numfree) {
mp = free_list[--numfree];
assert (mp != NULL);
@@ -431,7 +432,10 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
PyObject *
PyDict_New(void)
{
- return new_dict(new_keys_object(PyDict_MINSIZE_COMBINED), NULL);
+ PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+ if (keys == NULL)
+ return NULL;
+ return new_dict(keys, NULL);
}
/*
@@ -1391,7 +1395,7 @@ dict_dealloc(PyDictObject *mp)
}
DK_DECREF(keys);
}
- else {
+ else if (keys != NULL) {
assert(keys->dk_refcnt == 1);
DK_DECREF(keys);
}
@@ -1439,6 +1443,9 @@ dict_repr(PyDictObject *mp)
Py_INCREF(value);
s = PyObject_Repr(key);
PyUnicode_Append(&s, colon);
+ if (s == NULL)
+ goto Done;
+
PyUnicode_AppendAndDel(&s, PyObject_Repr(value));
Py_DECREF(key);
Py_DECREF(value);
@@ -2118,13 +2125,18 @@ dict_equal(PyDictObject *a, PyDictObject *b)
if (aval != NULL) {
int cmp;
PyObject *bval;
+ PyObject **vaddr;
PyObject *key = ep->me_key;
/* temporarily bump aval's refcount to ensure it stays
alive until we're done with it */
Py_INCREF(aval);
/* ditto for key */
Py_INCREF(key);
- bval = PyDict_GetItemWithError((PyObject *)b, key);
+ /* reuse the known hash value */
+ if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr) == NULL)
+ bval = NULL;
+ else
+ bval = *vaddr;
Py_DECREF(key);
if (bval == NULL) {
Py_DECREF(aval);
@@ -2210,19 +2222,19 @@ dict_get(register PyDictObject *mp, PyObject *args)
return val;
}
-static PyObject *
-dict_setdefault(register PyDictObject *mp, PyObject *args)
+PyObject *
+PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
{
- PyObject *key;
- PyObject *failobj = Py_None;
+ PyDictObject *mp = (PyDictObject *)d;
PyObject *val = NULL;
Py_hash_t hash;
PyDictKeyEntry *ep;
PyObject **value_addr;
- if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
+ if (!PyDict_Check(d)) {
+ PyErr_BadInternalCall();
return NULL;
-
+ }
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
@@ -2240,20 +2252,32 @@ dict_setdefault(register PyDictObject *mp, PyObject *args)
return NULL;
ep = find_empty_slot(mp, key, hash, &value_addr);
}
- Py_INCREF(failobj);
+ Py_INCREF(defaultobj);
Py_INCREF(key);
- MAINTAIN_TRACKING(mp, key, failobj);
+ MAINTAIN_TRACKING(mp, key, defaultobj);
ep->me_key = key;
ep->me_hash = hash;
- *value_addr = failobj;
- val = failobj;
+ *value_addr = defaultobj;
+ val = defaultobj;
mp->ma_keys->dk_usable--;
mp->ma_used++;
}
- Py_INCREF(val);
return val;
}
+static PyObject *
+dict_setdefault(PyDictObject *mp, PyObject *args)
+{
+ PyObject *key, *val;
+ PyObject *defaultobj = Py_None;
+
+ if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &defaultobj))
+ return NULL;
+
+ val = PyDict_SetDefault((PyObject *)mp, key, defaultobj);
+ Py_XINCREF(val);
+ return val;
+}
static PyObject *
dict_clear(register PyDictObject *mp)
@@ -2460,10 +2484,10 @@ PyDoc_STRVAR(popitem__doc__,
2-tuple; but raise KeyError if D is empty.");
PyDoc_STRVAR(update__doc__,
-"D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n"
-"If E present and has a .keys() method, does: for k in E: D[k] = E[k]\n\
-If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v\n\
-In either case, this is followed by: for k in F: D[k] = F[k]");
+"D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n\
+If E is present and has a .keys() method, then does: for k in E: D[k] = E[k]\n\
+If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v\n\
+In either case, this is followed by: for k in F: D[k] = F[k]");
PyDoc_STRVAR(fromkeys__doc__,
"dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.\n\
@@ -2568,22 +2592,23 @@ static PyObject *
dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *self;
+ PyDictObject *d;
assert(type != NULL && type->tp_alloc != NULL);
self = type->tp_alloc(type, 0);
- if (self != NULL) {
- PyDictObject *d = (PyDictObject *)self;
- d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED);
- /* XXX - Should we raise a no-memory error? */
- if (d->ma_keys == NULL) {
- DK_INCREF(Py_EMPTY_KEYS);
- d->ma_keys = Py_EMPTY_KEYS;
- d->ma_values = empty_values;
- }
- d->ma_used = 0;
- /* The object has been implicitly tracked by tp_alloc */
- if (type == &PyDict_Type)
- _PyObject_GC_UNTRACK(d);
+ if (self == NULL)
+ return NULL;
+ d = (PyDictObject *)self;
+
+ /* The object has been implicitly tracked by tp_alloc */
+ if (type == &PyDict_Type)
+ _PyObject_GC_UNTRACK(d);
+
+ d->ma_used = 0;
+ d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+ if (d->ma_keys == NULL) {
+ Py_DECREF(self);
+ return NULL;
}
return self;
}
@@ -2659,8 +2684,10 @@ _PyDict_GetItemId(PyObject *dp, struct _Py_Identifier *key)
{
PyObject *kv;
kv = _PyUnicode_FromId(key); /* borrowed */
- if (kv == NULL)
+ if (kv == NULL) {
+ PyErr_Clear();
return NULL;
+ }
return PyDict_GetItem(dp, kv);
}
@@ -2671,8 +2698,10 @@ PyDict_GetItemString(PyObject *v, const char *key)
{
PyObject *kv, *rv;
kv = PyUnicode_FromString(key);
- if (kv == NULL)
+ if (kv == NULL) {
+ PyErr_Clear();
return NULL;
+ }
rv = PyDict_GetItem(v, kv);
Py_DECREF(kv);
return rv;
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 6b04700..79bbb8f 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2327,7 +2327,7 @@ PyObject *PyExc_RecursionErrorInst = NULL;
}
#ifdef MS_WINDOWS
-#include <Winsock2.h>
+#include <winsock2.h>
/* The following constants were added to errno.h in VS2010 but have
preferred WSA equivalents. */
#undef EADDRINUSE
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 3a31314..e0c5bfe 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -390,7 +390,7 @@ stdprinter_write(PyStdPrinter_Object *self, PyObject *args)
Py_BEGIN_ALLOW_THREADS
errno = 0;
-#if defined(MS_WIN64) || defined(MS_WINDOWS)
+#ifdef MS_WINDOWS
if (n > INT_MAX)
n = INT_MAX;
n = write(self->fd, c, (int)n);
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index b571ca8..7ee2034 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -9,11 +9,6 @@
#include <ctype.h>
#include <float.h>
-#undef MAX
-#undef MIN
-#define MAX(x, y) ((x) < (y) ? (y) : (x))
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-
/* Special free list
free_list is a singly-linked list of available PyFloatObjects, linked
@@ -1131,7 +1126,7 @@ float_hex(PyObject *v)
}
m = frexp(fabs(x), &e);
- shift = 1 - MAX(DBL_MIN_EXP - e, 0);
+ shift = 1 - Py_MAX(DBL_MIN_EXP - e, 0);
m = ldexp(m, shift);
e -= shift;
@@ -1285,8 +1280,8 @@ float_fromhex(PyObject *cls, PyObject *arg)
fdigits = coeff_end - s_store;
if (ndigits == 0)
goto parse_error;
- if (ndigits > MIN(DBL_MIN_EXP - DBL_MANT_DIG - LONG_MIN/2,
- LONG_MAX/2 + 1 - DBL_MAX_EXP)/4)
+ if (ndigits > Py_MIN(DBL_MIN_EXP - DBL_MANT_DIG - LONG_MIN/2,
+ LONG_MAX/2 + 1 - DBL_MAX_EXP)/4)
goto insane_length_error;
/* [p <exponent>] */
@@ -1342,7 +1337,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
/* lsb = exponent of least significant bit of the *rounded* value.
This is top_exp - DBL_MANT_DIG unless result is subnormal. */
- lsb = MAX(top_exp, (long)DBL_MIN_EXP) - DBL_MANT_DIG;
+ lsb = Py_MAX(top_exp, (long)DBL_MIN_EXP) - DBL_MANT_DIG;
x = 0.0;
if (exp >= lsb) {
@@ -1711,7 +1706,7 @@ float__format__(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL;
- _PyUnicodeWriter_Init(&writer, 0);
+ _PyUnicodeWriter_Init(&writer);
ret = _PyFloat_FormatAdvancedWriter(
&writer,
self,
@@ -1858,7 +1853,7 @@ PyTypeObject PyFloat_Type = {
float_new, /* tp_new */
};
-void
+int
_PyFloat_Init(void)
{
/* We attempt to determine if this machine is using IEEE
@@ -1908,8 +1903,11 @@ _PyFloat_Init(void)
float_format = detected_float_format;
/* Init float info */
- if (FloatInfoType.tp_name == 0)
- PyStructSequence_InitType(&FloatInfoType, &floatinfo_desc);
+ if (FloatInfoType.tp_name == NULL) {
+ if (PyStructSequence_InitType2(&FloatInfoType, &floatinfo_desc) < 0)
+ return 0;
+ }
+ return 1;
}
int
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index 808e595..d3b59f1 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -7,11 +7,6 @@
#include "opcode.h"
#include "structmember.h"
-#undef MIN
-#undef MAX
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
#define OFF(x) offsetof(PyFrameObject, x)
static PyMemberDef frame_memberlist[] = {
@@ -160,8 +155,8 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
/* We're now ready to look at the bytecode. */
PyBytes_AsStringAndSize(f->f_code->co_code, (char **)&code, &code_len);
- min_addr = MIN(new_lasti, f->f_lasti);
- max_addr = MAX(new_lasti, f->f_lasti);
+ min_addr = Py_MIN(new_lasti, f->f_lasti);
+ max_addr = Py_MAX(new_lasti, f->f_lasti);
/* You can't jump onto a line with an 'except' statement on it -
* they expect to have an exception on the top of the stack, which
@@ -293,7 +288,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
break;
}
- min_delta_iblock = MIN(min_delta_iblock, delta_iblock);
+ min_delta_iblock = Py_MIN(min_delta_iblock, delta_iblock);
if (op >= HAVE_ARGUMENT) {
addr += 2;
@@ -466,7 +461,7 @@ static int
frame_traverse(PyFrameObject *f, visitproc visit, void *arg)
{
PyObject **fastlocals, **p;
- int i, slots;
+ Py_ssize_t i, slots;
Py_VISIT(f->f_back);
Py_VISIT(f->f_code);
@@ -496,7 +491,7 @@ static void
frame_clear(PyFrameObject *f)
{
PyObject **fastlocals, **p, **oldtop;
- int i, slots;
+ Py_ssize_t i, slots;
/* Before anything else, make sure that this frame is clearly marked
* as being defunct! Else, e.g., a generator reachable from this
@@ -848,7 +843,7 @@ PyFrame_FastToLocals(PyFrameObject *f)
PyObject *error_type, *error_value, *error_traceback;
PyCodeObject *co;
Py_ssize_t j;
- int ncells, nfreevars;
+ Py_ssize_t ncells, nfreevars;
if (f == NULL)
return;
locals = f->f_locals;
@@ -900,7 +895,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
PyObject *error_type, *error_value, *error_traceback;
PyCodeObject *co;
Py_ssize_t j;
- int ncells, nfreevars;
+ Py_ssize_t ncells, nfreevars;
if (f == NULL)
return;
locals = f->f_locals;
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index 49415b9..b043934 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -8,60 +8,59 @@
PyObject *
PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname)
{
- PyFunctionObject *op = PyObject_GC_New(PyFunctionObject,
- &PyFunction_Type);
- static PyObject *__name__ = 0;
- if (op != NULL) {
- PyObject *doc;
- PyObject *consts;
- PyObject *module;
- op->func_weakreflist = NULL;
- Py_INCREF(code);
- op->func_code = code;
- Py_INCREF(globals);
- op->func_globals = globals;
- op->func_name = ((PyCodeObject *)code)->co_name;
- Py_INCREF(op->func_name);
- op->func_defaults = NULL; /* No default arguments */
- op->func_kwdefaults = NULL; /* No keyword only defaults */
- op->func_closure = NULL;
- consts = ((PyCodeObject *)code)->co_consts;
- if (PyTuple_Size(consts) >= 1) {
- doc = PyTuple_GetItem(consts, 0);
- if (!PyUnicode_Check(doc))
- doc = Py_None;
- }
- else
+ PyFunctionObject *op;
+ PyObject *doc, *consts, *module;
+ static PyObject *__name__ = NULL;
+
+ if (__name__ == NULL) {
+ __name__ = PyUnicode_InternFromString("__name__");
+ if (__name__ == NULL)
+ return NULL;
+ }
+
+ op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
+ if (op == NULL)
+ return NULL;
+
+ op->func_weakreflist = NULL;
+ Py_INCREF(code);
+ op->func_code = code;
+ Py_INCREF(globals);
+ op->func_globals = globals;
+ op->func_name = ((PyCodeObject *)code)->co_name;
+ Py_INCREF(op->func_name);
+ op->func_defaults = NULL; /* No default arguments */
+ op->func_kwdefaults = NULL; /* No keyword only defaults */
+ op->func_closure = NULL;
+
+ consts = ((PyCodeObject *)code)->co_consts;
+ if (PyTuple_Size(consts) >= 1) {
+ doc = PyTuple_GetItem(consts, 0);
+ if (!PyUnicode_Check(doc))
doc = Py_None;
- Py_INCREF(doc);
- op->func_doc = doc;
- op->func_dict = NULL;
- op->func_module = NULL;
- op->func_annotations = NULL;
-
- /* __module__: If module name is in globals, use it.
- Otherwise, use None.
- */
- if (!__name__) {
- __name__ = PyUnicode_InternFromString("__name__");
- if (!__name__) {
- Py_DECREF(op);
- return NULL;
- }
- }
- module = PyDict_GetItem(globals, __name__);
- if (module) {
- Py_INCREF(module);
- op->func_module = module;
- }
- if (qualname)
- op->func_qualname = qualname;
- else
- op->func_qualname = op->func_name;
- Py_INCREF(op->func_qualname);
}
else
- return NULL;
+ doc = Py_None;
+ Py_INCREF(doc);
+ op->func_doc = doc;
+
+ op->func_dict = NULL;
+ op->func_module = NULL;
+ op->func_annotations = NULL;
+
+ /* __module__: If module name is in globals, use it.
+ Otherwise, use None. */
+ module = PyDict_GetItem(globals, __name__);
+ if (module) {
+ Py_INCREF(module);
+ op->func_module = module;
+ }
+ if (qualname)
+ op->func_qualname = qualname;
+ else
+ op->func_qualname = op->func_name;
+ Py_INCREF(op->func_qualname);
+
_PyObject_GC_TRACK(op);
return (PyObject *)op;
}
diff --git a/Objects/genobject.c b/Objects/genobject.c
index 016bfa2..dfd90aa 100644
--- a/Objects/genobject.c
+++ b/Objects/genobject.c
@@ -16,6 +16,31 @@ gen_traverse(PyGenObject *gen, visitproc visit, void *arg)
}
static void
+gen_finalize(PyObject *self)
+{
+ PyGenObject *gen = (PyGenObject *)self;
+ PyObject *res;
+ PyObject *error_type, *error_value, *error_traceback;
+
+ if (gen->gi_frame == NULL || gen->gi_frame->f_stacktop == NULL)
+ /* Generator isn't paused, so no need to close */
+ return;
+
+ /* Save the current exception, if any. */
+ PyErr_Fetch(&error_type, &error_value, &error_traceback);
+
+ res = gen_close(gen, NULL);
+
+ if (res == NULL)
+ PyErr_WriteUnraisable(self);
+ else
+ Py_DECREF(res);
+
+ /* Restore the saved exception. */
+ PyErr_Restore(error_type, error_value, error_traceback);
+}
+
+static void
gen_dealloc(PyGenObject *gen)
{
PyObject *self = (PyObject *) gen;
@@ -27,12 +52,8 @@ gen_dealloc(PyGenObject *gen)
_PyObject_GC_TRACK(self);
- if (gen->gi_frame != NULL && gen->gi_frame->f_stacktop != NULL) {
- /* Generator is paused, so we need to close */
- Py_TYPE(gen)->tp_del(self);
- if (self->ob_refcnt > 0)
- return; /* resurrected. :( */
- }
+ if (PyObject_CallFinalizerFromDealloc(self))
+ return; /* resurrected. :( */
_PyObject_GC_UNTRACK(self);
Py_CLEAR(gen->gi_frame);
@@ -40,7 +61,6 @@ gen_dealloc(PyGenObject *gen)
PyObject_GC_Del(gen);
}
-
static PyObject *
gen_send_ex(PyGenObject *gen, PyObject *arg, int exc)
{
@@ -222,68 +242,6 @@ gen_close(PyGenObject *gen, PyObject *args)
return NULL;
}
-static void
-gen_del(PyObject *self)
-{
- PyObject *res;
- PyObject *error_type, *error_value, *error_traceback;
- PyGenObject *gen = (PyGenObject *)self;
-
- if (gen->gi_frame == NULL || gen->gi_frame->f_stacktop == NULL)
- /* Generator isn't paused, so no need to close */
- return;
-
- /* Temporarily resurrect the object. */
- assert(self->ob_refcnt == 0);
- self->ob_refcnt = 1;
-
- /* Save the current exception, if any. */
- PyErr_Fetch(&error_type, &error_value, &error_traceback);
-
- res = gen_close(gen, NULL);
-
- if (res == NULL)
- PyErr_WriteUnraisable(self);
- else
- Py_DECREF(res);
-
- /* Restore the saved exception. */
- PyErr_Restore(error_type, error_value, error_traceback);
-
- /* Undo the temporary resurrection; can't use DECREF here, it would
- * cause a recursive call.
- */
- assert(self->ob_refcnt > 0);
- if (--self->ob_refcnt == 0)
- return; /* this is the normal path out */
-
- /* close() resurrected it! Make it look like the original Py_DECREF
- * never happened.
- */
- {
- Py_ssize_t refcnt = self->ob_refcnt;
- _Py_NewReference(self);
- self->ob_refcnt = refcnt;
- }
- assert(PyType_IS_GC(Py_TYPE(self)) &&
- _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED);
-
- /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so
- * we need to undo that. */
- _Py_DEC_REFTOTAL;
- /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object
- * chain, so no more to do there.
- * If COUNT_ALLOCS, the original decref bumped tp_frees, and
- * _Py_NewReference bumped tp_allocs: both of those need to be
- * undone.
- */
-#ifdef COUNT_ALLOCS
- --(Py_TYPE(self)->tp_frees);
- --(Py_TYPE(self)->tp_allocs);
-#endif
-}
-
-
PyDoc_STRVAR(throw_doc,
"throw(typ[,val[,tb]]) -> raise exception in generator,\n\
@@ -517,7 +475,8 @@ PyTypeObject PyGen_Type = {
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+ Py_TPFLAGS_HAVE_FINALIZE, /* tp_flags */
0, /* tp_doc */
(traverseproc)gen_traverse, /* tp_traverse */
0, /* tp_clear */
@@ -544,7 +503,9 @@ PyTypeObject PyGen_Type = {
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
- gen_del, /* tp_del */
+ 0, /* tp_del */
+ 0, /* tp_version_tag */
+ gen_finalize, /* tp_finalize */
};
PyObject *
diff --git a/Objects/iterobject.c b/Objects/iterobject.c
index 3cfbeaf..77ff810 100644
--- a/Objects/iterobject.c
+++ b/Objects/iterobject.c
@@ -4,7 +4,7 @@
typedef struct {
PyObject_HEAD
- long it_index;
+ Py_ssize_t it_index;
PyObject *it_seq; /* Set to NULL when iterator is exhausted */
} seqiterobject;
@@ -76,9 +76,14 @@ iter_len(seqiterobject *it)
Py_ssize_t seqsize, len;
if (it->it_seq) {
- seqsize = PySequence_Size(it->it_seq);
- if (seqsize == -1)
- return NULL;
+ if (_PyObject_HasLen(it->it_seq)) {
+ seqsize = PySequence_Size(it->it_seq);
+ if (seqsize == -1)
+ return NULL;
+ }
+ else {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
len = seqsize - it->it_index;
if (len >= 0)
return PyLong_FromSsize_t(len);
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 6e0d094..2f203b3 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -644,9 +644,14 @@ list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
memcpy(recycle, &item[ilow], s);
if (d < 0) { /* Delete -d items */
- memmove(&item[ihigh+d], &item[ihigh],
- (Py_SIZE(a) - ihigh)*sizeof(PyObject *));
- list_resize(a, Py_SIZE(a) + d);
+ Py_ssize_t tail;
+ tail = (Py_SIZE(a) - ihigh) * sizeof(PyObject *);
+ memmove(&item[ihigh+d], &item[ihigh], tail);
+ if (list_resize(a, Py_SIZE(a) + d) < 0) {
+ memmove(&item[ihigh], &item[ihigh+d], tail);
+ memcpy(&item[ilow], recycle, s);
+ goto Error;
+ }
item = a->ob_item;
}
else if (d > 0) { /* Insert d items */
@@ -826,7 +831,7 @@ listextend(PyListObject *self, PyObject *b)
iternext = *it->ob_type->tp_iternext;
/* Guess a result list size. */
- n = _PyObject_LengthHint(b, 8);
+ n = PyObject_LengthHint(b, 8);
if (n == -1) {
Py_DECREF(it);
return NULL;
@@ -871,8 +876,10 @@ listextend(PyListObject *self, PyObject *b)
}
/* Cut back result list if initial guess was too large. */
- if (Py_SIZE(self) < self->allocated)
- list_resize(self, Py_SIZE(self)); /* shrinking can't fail */
+ if (Py_SIZE(self) < self->allocated) {
+ if (list_resize(self, Py_SIZE(self)) < 0)
+ goto error;
+ }
Py_DECREF(it);
Py_RETURN_NONE;
@@ -925,17 +932,17 @@ listpop(PyListObject *self, PyObject *args)
v = self->ob_item[i];
if (i == Py_SIZE(self) - 1) {
status = list_resize(self, Py_SIZE(self) - 1);
- assert(status >= 0);
- return v; /* and v now owns the reference the list had */
+ if (status >= 0)
+ return v; /* and v now owns the reference the list had */
+ else
+ return NULL;
}
Py_INCREF(v);
status = list_ass_slice(self, i, i+1, (PyObject *)NULL);
- assert(status >= 0);
- /* Use status, so that in a release build compilers don't
- * complain about the unused name.
- */
- (void) status;
-
+ if (status < 0) {
+ Py_DECREF(v);
+ return NULL;
+ }
return v;
}
@@ -2660,7 +2667,7 @@ PyTypeObject PyList_Type = {
typedef struct {
PyObject_HEAD
- long it_index;
+ Py_ssize_t it_index;
PyListObject *it_seq; /* Set to NULL when iterator is exhausted */
} listiterobject;
@@ -2797,7 +2804,7 @@ listiter_reduce(listiterobject *it)
static PyObject *
listiter_setstate(listiterobject *it, PyObject *state)
{
- long index = PyLong_AsLong(state);
+ Py_ssize_t index = PyLong_AsSsize_t(state);
if (index == -1 && PyErr_Occurred())
return NULL;
if (it->it_seq != NULL) {
@@ -2958,7 +2965,7 @@ listiter_reduce_general(void *_it, int forward)
if (forward) {
listiterobject *it = (listiterobject *)_it;
if (it->it_seq)
- return Py_BuildValue("N(O)l", _PyObject_GetBuiltin("iter"),
+ return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
it->it_seq, it->it_index);
} else {
listreviterobject *it = (listreviterobject *)_it;
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 30ffc94..e0d641a0 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -17,7 +17,8 @@
#endif
/* convert a PyLong of size 1, 0 or -1 to an sdigit */
-#define MEDIUM_VALUE(x) (Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] : \
+#define MEDIUM_VALUE(x) (assert(-1 <= Py_SIZE(x) && Py_SIZE(x) <= 1), \
+ Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] : \
(Py_SIZE(x) == 0 ? (sdigit)0 : \
(sdigit)(x)->ob_digit[0]))
#define ABS(x) ((x) < 0 ? -(x) : (x))
@@ -70,11 +71,21 @@ maybe_small_long(PyLongObject *v)
/* If a freshly-allocated long is already shared, it must
be a small integer, so negating it must go to PyLong_FromLong */
-#define NEGATE(x) \
- do if (Py_REFCNT(x) == 1) Py_SIZE(x) = -Py_SIZE(x); \
- else { PyObject* tmp=PyLong_FromLong(-MEDIUM_VALUE(x)); \
- Py_DECREF(x); (x) = (PyLongObject*)tmp; } \
- while(0)
+Py_LOCAL_INLINE(void)
+_PyLong_Negate(PyLongObject **x_p)
+{
+ PyLongObject *x;
+
+ x = (PyLongObject *)*x_p;
+ if (Py_REFCNT(x) == 1) {
+ Py_SIZE(x) = -Py_SIZE(x);
+ return;
+ }
+
+ *x_p = (PyLongObject *)PyLong_FromLong(-MEDIUM_VALUE(x));
+ Py_DECREF(x);
+}
+
/* For long multiplication, use the O(N**2) school algorithm unless
* both operands contain more than KARATSUBA_CUTOFF digits (this
* being an internal Python long digit, in base BASE).
@@ -89,11 +100,6 @@ maybe_small_long(PyLongObject *v)
*/
#define FIVEARY_CUTOFF 8
-#undef MIN
-#undef MAX
-#define MAX(x, y) ((x) < (y) ? (y) : (x))
-#define MIN(x, y) ((x) > (y) ? (y) : (x))
-
#define SIGCHECK(PyTryBlock) \
do { \
if (PyErr_CheckSignals()) PyTryBlock \
@@ -543,7 +549,7 @@ PyLong_AsUnsignedLong(PyObject *vv)
x = (x << PyLong_SHIFT) | v->ob_digit[i];
if ((x >> PyLong_SHIFT) != prev) {
PyErr_SetString(PyExc_OverflowError,
- "python int too large to convert "
+ "Python int too large to convert "
"to C unsigned long");
return (unsigned long) -1;
}
@@ -954,9 +960,6 @@ PyObject *
PyLong_FromVoidPtr(void *p)
{
#if SIZEOF_VOID_P <= SIZEOF_LONG
- /* special-case null pointer */
- if (!p)
- return PyLong_FromLong(0);
return PyLong_FromUnsignedLong((unsigned long)(Py_uintptr_t)p);
#else
@@ -966,9 +969,6 @@ PyLong_FromVoidPtr(void *p)
#if SIZEOF_LONG_LONG < SIZEOF_VOID_P
# error "PyLong_FromVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)"
#endif
- /* special-case null pointer */
- if (!p)
- return PyLong_FromLong(0);
return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)(Py_uintptr_t)p);
#endif /* SIZEOF_VOID_P <= SIZEOF_LONG */
@@ -1014,7 +1014,6 @@ PyLong_AsVoidPtr(PyObject *vv)
* rewritten to use the newer PyLong_{As,From}ByteArray API.
*/
-#define IS_LITTLE_ENDIAN (int)*(unsigned char*)&one
#define PY_ABS_LLONG_MIN (0-(unsigned PY_LONG_LONG)PY_LLONG_MIN)
/* Create a new long int object from a C PY_LONG_LONG int. */
@@ -1167,7 +1166,6 @@ PyLong_AsLongLong(PyObject *vv)
{
PyLongObject *v;
PY_LONG_LONG bytes;
- int one = 1;
int res;
if (vv == NULL) {
@@ -1202,7 +1200,7 @@ PyLong_AsLongLong(PyObject *vv)
case 1: return v->ob_digit[0];
}
res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes,
- SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 1);
+ SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1);
/* Plan 9 can't handle PY_LONG_LONG in ? : expressions */
if (res < 0)
@@ -1219,7 +1217,6 @@ PyLong_AsUnsignedLongLong(PyObject *vv)
{
PyLongObject *v;
unsigned PY_LONG_LONG bytes;
- int one = 1;
int res;
if (vv == NULL) {
@@ -1238,7 +1235,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv)
}
res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes,
- SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 0);
+ SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0);
/* Plan 9 can't handle PY_LONG_LONG in ? : expressions */
if (res < 0)
@@ -1314,7 +1311,6 @@ PyLong_AsUnsignedLongLongMask(register PyObject *op)
return (unsigned PY_LONG_LONG)-1;
}
}
-#undef IS_LITTLE_ENDIAN
/* Get a C long long int from a long int object or any object that has an
__int__ method.
@@ -1606,7 +1602,7 @@ long_to_decimal_string_internal(PyObject *aa,
*/
if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) {
PyErr_SetString(PyExc_OverflowError,
- "long is too large to format");
+ "int too large to format");
return -1;
}
/* the expression size_a * PyLong_SHIFT is now safe from overflow */
@@ -1676,7 +1672,6 @@ long_to_decimal_string_internal(PyObject *aa,
else \
p = (TYPE*)PyUnicode_DATA(str) + strlen; \
\
- *p = '\0'; \
/* pout[0] through pout[size-2] contribute exactly \
_PyLong_DECIMAL_SHIFT digits each */ \
for (i=0; i < size - 1; i++) { \
@@ -1790,7 +1785,7 @@ long_format_binary(PyObject *aa, int base, int alternate,
/* Ensure overflow doesn't occur during computation of sz. */
if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) {
PyErr_SetString(PyExc_OverflowError,
- "int is too large to format");
+ "int too large to format");
return -1;
}
size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
@@ -2292,7 +2287,7 @@ digit beyond the first.
if (strobj == NULL)
return NULL;
PyErr_Format(PyExc_ValueError,
- "invalid literal for int() with base %d: %R",
+ "invalid literal for int() with base %d: %.200R",
base, strobj);
Py_DECREF(strobj);
return NULL;
@@ -2316,7 +2311,7 @@ _PyLong_FromBytes(const char *s, Py_ssize_t len, int base)
strobj = PyBytes_FromStringAndSize(s, Py_MIN(len, 200));
if (strobj != NULL) {
PyErr_Format(PyExc_ValueError,
- "invalid literal for int() with base %d: %R",
+ "invalid literal for int() with base %d: %.200R",
base, strobj);
Py_DECREF(strobj);
}
@@ -2337,7 +2332,7 @@ PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
PyObject *
PyLong_FromUnicodeObject(PyObject *u, int base)
{
- PyObject *result, *asciidig, *strobj;
+ PyObject *result, *asciidig;
char *buffer, *end = NULL;
Py_ssize_t buflen;
@@ -2359,13 +2354,9 @@ PyLong_FromUnicodeObject(PyObject *u, int base)
Py_DECREF(asciidig);
Py_XDECREF(result);
}
- strobj = PySequence_GetSlice(u, 0, 200);
- if (strobj != NULL) {
- PyErr_Format(PyExc_ValueError,
- "invalid literal for int() with base %d: %R",
- base, strobj);
- Py_DECREF(strobj);
- }
+ PyErr_Format(PyExc_ValueError,
+ "invalid literal for int() with base %d: %.200R",
+ base, u);
return NULL;
}
@@ -2419,10 +2410,21 @@ long_divrem(PyLongObject *a, PyLongObject *b,
The quotient z has the sign of a*b;
the remainder r has the sign of a,
so a = b*z + r. */
- if ((Py_SIZE(a) < 0) != (Py_SIZE(b) < 0))
- NEGATE(z);
- if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0)
- NEGATE(*prem);
+ if ((Py_SIZE(a) < 0) != (Py_SIZE(b) < 0)) {
+ _PyLong_Negate(&z);
+ if (z == NULL) {
+ Py_CLEAR(*prem);
+ return -1;
+ }
+ }
+ if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) {
+ _PyLong_Negate(prem);
+ if (*prem == NULL) {
+ Py_DECREF(z);
+ Py_CLEAR(*prem);
+ return -1;
+ }
+ }
*pdiv = maybe_small_long(z);
return 0;
}
@@ -2698,7 +2700,7 @@ PyLong_AsDouble(PyObject *v)
x = _PyLong_Frexp((PyLongObject *)v, &exponent);
if ((x == -1.0 && PyErr_Occurred()) || exponent > DBL_MAX_EXP) {
PyErr_SetString(PyExc_OverflowError,
- "long int too large to convert to float");
+ "int too large to convert to float");
return -1.0;
}
return ldexp(x, (int)exponent);
@@ -2918,8 +2920,11 @@ x_sub(PyLongObject *a, PyLongObject *b)
borrow &= 1; /* Keep only one sign bit */
}
assert(borrow == 0);
- if (sign < 0)
- NEGATE(z);
+ if (sign < 0) {
+ _PyLong_Negate(&z);
+ if (z == NULL)
+ return NULL;
+ }
return long_normalize(z);
}
@@ -3086,7 +3091,7 @@ kmul_split(PyLongObject *n,
Py_ssize_t size_lo, size_hi;
const Py_ssize_t size_n = ABS(Py_SIZE(n));
- size_lo = MIN(size_n, size);
+ size_lo = Py_MIN(size_n, size);
size_hi = size_n - size_lo;
if ((hi = _PyLong_New(size_hi)) == NULL)
@@ -3357,7 +3362,7 @@ k_lopsided_mul(PyLongObject *a, PyLongObject *b)
nbdone = 0;
while (bsize > 0) {
PyLongObject *product;
- const Py_ssize_t nbtouse = MIN(bsize, asize);
+ const Py_ssize_t nbtouse = Py_MIN(bsize, asize);
/* Multiply the next slice of b by a. */
memcpy(bslice->ob_digit, b->ob_digit + nbdone,
@@ -3410,8 +3415,11 @@ long_mul(PyLongObject *a, PyLongObject *b)
z = k_mul(a, b);
/* Negate if exactly one of the inputs is negative. */
- if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z)
- NEGATE(z);
+ if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) {
+ _PyLong_Negate(&z);
+ if (z == NULL)
+ return NULL;
+ }
return (PyObject *)z;
}
@@ -3648,7 +3656,7 @@ long_true_divide(PyObject *v, PyObject *w)
goto underflow_or_zero;
/* Choose value for shift; see comments for step 1 above. */
- shift = MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2;
+ shift = Py_MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2;
inexact = 0;
@@ -3719,7 +3727,7 @@ long_true_divide(PyObject *v, PyObject *w)
x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]);
/* The number of extra bits that have to be rounded away. */
- extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
+ extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
assert(extra_bits == 2 || extra_bits == 3);
/* Round by directly modifying the low digit of x. */
@@ -3858,7 +3866,9 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
Py_DECREF(c);
c = temp;
temp = NULL;
- NEGATE(c);
+ _PyLong_Negate(&c);
+ if (c == NULL)
+ goto Error;
}
/* if modulus == 1:
@@ -3958,10 +3968,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
goto Done;
Error:
- if (z != NULL) {
- Py_DECREF(z);
- z = NULL;
- }
+ Py_CLEAR(z);
/* fall through */
Done:
if (Py_SIZE(b) > FIVEARY_CUTOFF) {
@@ -4091,10 +4098,10 @@ long_lshift(PyObject *v, PyObject *w)
shiftby = PyLong_AsSsize_t((PyObject *)b);
if (shiftby == -1L && PyErr_Occurred())
- goto lshift_error;
+ return NULL;
if (shiftby < 0) {
PyErr_SetString(PyExc_ValueError, "negative shift count");
- goto lshift_error;
+ return NULL;
}
/* wordshift, remshift = divmod(shiftby, PyLong_SHIFT) */
wordshift = shiftby / PyLong_SHIFT;
@@ -4106,9 +4113,11 @@ long_lshift(PyObject *v, PyObject *w)
++newsize;
z = _PyLong_New(newsize);
if (z == NULL)
- goto lshift_error;
- if (Py_SIZE(a) < 0)
- NEGATE(z);
+ return NULL;
+ if (Py_SIZE(a) < 0) {
+ assert(Py_REFCNT(z) == 1);
+ Py_SIZE(z) = -Py_SIZE(z);
+ }
for (i = 0; i < wordshift; i++)
z->ob_digit[i] = 0;
accum = 0;
@@ -4122,7 +4131,6 @@ long_lshift(PyObject *v, PyObject *w)
else
assert(!accum);
z = long_normalize(z);
- lshift_error:
return (PyObject *) maybe_small_long(z);
}
@@ -4147,7 +4155,7 @@ v_complement(digit *z, digit *a, Py_ssize_t m)
static PyObject *
long_bitwise(PyLongObject *a,
- int op, /* '&', '|', '^' */
+ char op, /* '&', '|', '^' */
PyLongObject *b)
{
int nega, negb, negz;
@@ -4322,8 +4330,7 @@ static PyObject *
long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *obase = NULL, *x = NULL;
- long base;
- int overflow;
+ Py_ssize_t base;
static char *kwlist[] = {"x", "base", 0};
if (type != &PyLong_Type)
@@ -4342,10 +4349,10 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (obase == NULL)
return PyNumber_Long(x);
- base = PyLong_AsLongAndOverflow(obase, &overflow);
+ base = PyNumber_AsSsize_t(obase, NULL);
if (base == -1 && PyErr_Occurred())
return NULL;
- if (overflow || (base != 0 && base < 2) || base > 36) {
+ if ((base != 0 && base < 2) || base > 36) {
PyErr_SetString(PyExc_ValueError,
"int() base must be >= 2 and <= 36");
return NULL;
@@ -4426,7 +4433,7 @@ long__format__(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL;
- _PyUnicodeWriter_Init(&writer, 0);
+ _PyUnicodeWriter_Init(&writer);
ret = _PyLong_FormatAdvancedWriter(
&writer,
self,
@@ -5083,8 +5090,10 @@ _PyLong_Init(void)
}
#endif
/* initialize int_info */
- if (Int_InfoType.tp_name == 0)
- PyStructSequence_InitType(&Int_InfoType, &int_info_desc);
+ if (Int_InfoType.tp_name == NULL) {
+ if (PyStructSequence_InitType2(&Int_InfoType, &int_info_desc) < 0)
+ return 0;
+ }
return 1;
}
diff --git a/Objects/methodobject.c b/Objects/methodobject.c
index f0685dd..11c8b6e 100644
--- a/Objects/methodobject.c
+++ b/Objects/methodobject.c
@@ -13,6 +13,15 @@ static int numfree = 0;
#define PyCFunction_MAXFREELIST 256
#endif
+/* undefine macro trampoline to PyCFunction_NewEx */
+#undef PyCFunction_New
+
+PyObject *
+PyCFunction_New(PyMethodDef *ml, PyObject *self)
+{
+ return PyCFunction_NewEx(ml, self, NULL);
+}
+
PyObject *
PyCFunction_NewEx(PyMethodDef *ml, PyObject *self, PyObject *module)
{
@@ -70,23 +79,34 @@ PyCFunction_GetFlags(PyObject *op)
PyObject *
PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw)
{
+#define CHECK_RESULT(res) assert(res != NULL || PyErr_Occurred())
+
PyCFunctionObject* f = (PyCFunctionObject*)func;
PyCFunction meth = PyCFunction_GET_FUNCTION(func);
PyObject *self = PyCFunction_GET_SELF(func);
+ PyObject *res;
Py_ssize_t size;
switch (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST)) {
case METH_VARARGS:
- if (kw == NULL || PyDict_Size(kw) == 0)
- return (*meth)(self, arg);
+ if (kw == NULL || PyDict_Size(kw) == 0) {
+ res = (*meth)(self, arg);
+ CHECK_RESULT(res);
+ return res;
+ }
break;
case METH_VARARGS | METH_KEYWORDS:
- return (*(PyCFunctionWithKeywords)meth)(self, arg, kw);
+ res = (*(PyCFunctionWithKeywords)meth)(self, arg, kw);
+ CHECK_RESULT(res);
+ return res;
case METH_NOARGS:
if (kw == NULL || PyDict_Size(kw) == 0) {
size = PyTuple_GET_SIZE(arg);
- if (size == 0)
- return (*meth)(self, NULL);
+ if (size == 0) {
+ res = (*meth)(self, NULL);
+ CHECK_RESULT(res);
+ return res;
+ }
PyErr_Format(PyExc_TypeError,
"%.200s() takes no arguments (%zd given)",
f->m_ml->ml_name, size);
@@ -96,8 +116,11 @@ PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw)
case METH_O:
if (kw == NULL || PyDict_Size(kw) == 0) {
size = PyTuple_GET_SIZE(arg);
- if (size == 1)
- return (*meth)(self, PyTuple_GET_ITEM(arg, 0));
+ if (size == 1) {
+ res = (*meth)(self, PyTuple_GET_ITEM(arg, 0));
+ CHECK_RESULT(res);
+ return res;
+ }
PyErr_Format(PyExc_TypeError,
"%.200s() takes exactly one argument (%zd given)",
f->m_ml->ml_name, size);
@@ -114,6 +137,8 @@ PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw)
PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments",
f->m_ml->ml_name);
return NULL;
+
+#undef CHECK_RESULT
}
/* Methods (the standard built-in methods, that is) */
@@ -346,17 +371,3 @@ _PyCFunction_DebugMallocStats(FILE *out)
"free PyCFunctionObject",
numfree, sizeof(PyCFunctionObject));
}
-
-/* PyCFunction_New() is now just a macro that calls PyCFunction_NewEx(),
- but it's part of the API so we need to keep a function around that
- existing C extensions can call.
-*/
-
-#undef PyCFunction_New
-PyAPI_FUNC(PyObject *) PyCFunction_New(PyMethodDef *, PyObject *);
-
-PyObject *
-PyCFunction_New(PyMethodDef *ml, PyObject *self)
-{
- return PyCFunction_NewEx(ml, self, NULL);
-}
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index 2f2bd36..3ea3be8 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -11,6 +11,8 @@ typedef struct {
PyObject *md_dict;
struct PyModuleDef *md_def;
void *md_state;
+ PyObject *md_weaklist;
+ PyObject *md_name; /* for logging purposes after md_dict is cleared */
} PyModuleObject;
static PyMemberDef module_members[] = {
@@ -26,6 +28,33 @@ static PyTypeObject moduledef_type = {
};
+static int
+module_init_dict(PyModuleObject *mod, PyObject *md_dict,
+ PyObject *name, PyObject *doc)
+{
+ if (md_dict == NULL)
+ return -1;
+ if (doc == NULL)
+ doc = Py_None;
+
+ if (PyDict_SetItemString(md_dict, "__name__", name) != 0)
+ return -1;
+ if (PyDict_SetItemString(md_dict, "__doc__", doc) != 0)
+ return -1;
+ if (PyDict_SetItemString(md_dict, "__package__", Py_None) != 0)
+ return -1;
+ if (PyDict_SetItemString(md_dict, "__loader__", Py_None) != 0)
+ return -1;
+ if (PyUnicode_CheckExact(name)) {
+ Py_INCREF(name);
+ Py_XDECREF(mod->md_name);
+ mod->md_name = name;
+ }
+
+ return 0;
+}
+
+
PyObject *
PyModule_NewObject(PyObject *name)
{
@@ -35,14 +64,10 @@ PyModule_NewObject(PyObject *name)
return NULL;
m->md_def = NULL;
m->md_state = NULL;
+ m->md_weaklist = NULL;
+ m->md_name = NULL;
m->md_dict = PyDict_New();
- if (m->md_dict == NULL)
- goto fail;
- if (PyDict_SetItemString(m->md_dict, "__name__", name) != 0)
- goto fail;
- if (PyDict_SetItemString(m->md_dict, "__doc__", Py_None) != 0)
- goto fail;
- if (PyDict_SetItemString(m->md_dict, "__package__", Py_None) != 0)
+ if (module_init_dict(m, m->md_dict, name, NULL) != 0)
goto fail;
PyObject_GC_Track(m);
return (PyObject *)m;
@@ -347,9 +372,7 @@ module_init(PyModuleObject *m, PyObject *args, PyObject *kwds)
return -1;
m->md_dict = dict;
}
- if (PyDict_SetItemString(dict, "__name__", name) < 0)
- return -1;
- if (PyDict_SetItemString(dict, "__doc__", doc) < 0)
+ if (module_init_dict(m, dict, name, doc) < 0)
return -1;
return 0;
}
@@ -358,12 +381,15 @@ static void
module_dealloc(PyModuleObject *m)
{
PyObject_GC_UnTrack(m);
+ if (Py_VerboseFlag && m->md_name) {
+ PySys_FormatStderr("# destroy %S\n", m->md_name);
+ }
+ if (m->md_weaklist != NULL)
+ PyObject_ClearWeakRefs((PyObject *) m);
if (m->md_def && m->md_def->m_free)
m->md_def->m_free(m);
- if (m->md_dict != NULL) {
- _PyModule_Clear((PyObject *)m);
- Py_DECREF(m->md_dict);
- }
+ Py_XDECREF(m->md_dict);
+ Py_XDECREF(m->md_name);
if (m->md_state != NULL)
PyMem_FREE(m->md_state);
Py_TYPE(m)->tp_free((PyObject *)m);
@@ -380,7 +406,7 @@ module_repr(PyModuleObject *m)
if (m->md_dict != NULL) {
loader = PyDict_GetItemString(m->md_dict, "__loader__");
}
- if (loader != NULL) {
+ if (loader != NULL && loader != Py_None) {
repr = PyObject_CallMethod(loader, "module_repr", "(O)",
(PyObject *)m, NULL);
if (repr == NULL) {
@@ -404,10 +430,10 @@ module_repr(PyModuleObject *m)
filename = PyModule_GetFilenameObject((PyObject *)m);
if (filename == NULL) {
PyErr_Clear();
- /* There's no m.__file__, so if there was an __loader__, use that in
+ /* There's no m.__file__, so if there was a __loader__, use that in
* the repr, otherwise, the only thing you can use is m.__name__
*/
- if (loader == NULL) {
+ if (loader == NULL || loader == Py_None) {
repr = PyUnicode_FromFormat("<module %R>", name);
}
else {
@@ -509,7 +535,7 @@ PyTypeObject PyModule_Type = {
(traverseproc)module_traverse, /* tp_traverse */
(inquiry)module_clear, /* tp_clear */
0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
+ offsetof(PyModuleObject, md_weaklist), /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
module_methods, /* tp_methods */
diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c
index ff278d3..8c51b07 100644
--- a/Objects/namespaceobject.c
+++ b/Objects/namespaceobject.c
@@ -44,7 +44,7 @@ namespace_init(_PyNamespaceObject *ns, PyObject *args, PyObject *kwds)
if (args != NULL) {
Py_ssize_t argcount = PyObject_Size(args);
if (argcount < 0)
- return argcount;
+ return -1;
else if (argcount > 0) {
PyErr_Format(PyExc_TypeError, "no positional arguments expected");
return -1;
@@ -66,16 +66,20 @@ namespace_dealloc(_PyNamespaceObject *ns)
static PyObject *
-namespace_repr(_PyNamespaceObject *ns)
+namespace_repr(PyObject *ns)
{
int i, loop_error = 0;
PyObject *pairs = NULL, *d = NULL, *keys = NULL, *keys_iter = NULL;
PyObject *key;
PyObject *separator, *pairsrepr, *repr = NULL;
+ const char * name;
+
+ name = (Py_TYPE(ns) == &_PyNamespace_Type) ? "namespace"
+ : ns->ob_type->tp_name;
- i = Py_ReprEnter((PyObject *)ns);
+ i = Py_ReprEnter(ns);
if (i != 0) {
- return i > 0 ? PyUnicode_FromString("namespace(...)") : NULL;
+ return i > 0 ? PyUnicode_FromFormat("%s(...)", name) : NULL;
}
pairs = PyList_New(0);
@@ -127,8 +131,7 @@ namespace_repr(_PyNamespaceObject *ns)
if (pairsrepr == NULL)
goto error;
- repr = PyUnicode_FromFormat("%s(%S)",
- ((PyObject *)ns)->ob_type->tp_name, pairsrepr);
+ repr = PyUnicode_FromFormat("%s(%S)", name, pairsrepr);
Py_DECREF(pairsrepr);
error:
@@ -136,7 +139,7 @@ error:
Py_XDECREF(d);
Py_XDECREF(keys);
Py_XDECREF(keys_iter);
- Py_ReprLeave((PyObject *)ns);
+ Py_ReprLeave(ns);
return repr;
}
@@ -158,14 +161,49 @@ namespace_clear(_PyNamespaceObject *ns)
}
+static PyObject *
+namespace_richcompare(PyObject *self, PyObject *other, int op)
+{
+ if (PyObject_IsInstance(self, (PyObject *)&_PyNamespace_Type) &&
+ PyObject_IsInstance(other, (PyObject *)&_PyNamespace_Type))
+ return PyObject_RichCompare(((_PyNamespaceObject *)self)->ns_dict,
+ ((_PyNamespaceObject *)other)->ns_dict, op);
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+}
+
+
+PyDoc_STRVAR(namespace_reduce__doc__, "Return state information for pickling");
+
+static PyObject *
+namespace_reduce(register _PyNamespaceObject *ns)
+{
+ PyObject *result, *args = PyTuple_New(0);
+
+ if (!args)
+ return NULL;
+
+ result = PyTuple_Pack(3, (PyObject *)Py_TYPE(ns), args, ns->ns_dict);
+ Py_DECREF(args);
+ return result;
+}
+
+
+static PyMethodDef namespace_methods[] = {
+ {"__reduce__", (PyCFunction)namespace_reduce, METH_NOARGS,
+ namespace_reduce__doc__},
+ {NULL, NULL} /* sentinel */
+};
+
+
PyDoc_STRVAR(namespace_doc,
"A simple attribute-based namespace.\n\
\n\
-namespace(**kwargs)");
+SimpleNamespace(**kwargs)");
PyTypeObject _PyNamespace_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "namespace", /* tp_name */
+ "types.SimpleNamespace", /* tp_name */
sizeof(_PyNamespaceObject), /* tp_size */
0, /* tp_itemsize */
(destructor)namespace_dealloc, /* tp_dealloc */
@@ -188,11 +226,11 @@ PyTypeObject _PyNamespace_Type = {
namespace_doc, /* tp_doc */
(traverseproc)namespace_traverse, /* tp_traverse */
(inquiry)namespace_clear, /* tp_clear */
- 0, /* tp_richcompare */
+ namespace_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
- 0, /* tp_methods */
+ namespace_methods, /* tp_methods */
namespace_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
diff --git a/Objects/object.c b/Objects/object.c
index 949e7dc..c83109d 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -255,6 +255,72 @@ _PyObject_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
return PyObject_INIT_VAR(op, tp, nitems);
}
+void
+PyObject_CallFinalizer(PyObject *self)
+{
+ PyTypeObject *tp = Py_TYPE(self);
+
+ /* The former could happen on heaptypes created from the C API, e.g.
+ PyType_FromSpec(). */
+ if (!PyType_HasFeature(tp, Py_TPFLAGS_HAVE_FINALIZE) ||
+ tp->tp_finalize == NULL)
+ return;
+ /* tp_finalize should only be called once. */
+ if (PyType_IS_GC(tp) && _PyGC_FINALIZED(self))
+ return;
+
+ tp->tp_finalize(self);
+ if (PyType_IS_GC(tp))
+ _PyGC_SET_FINALIZED(self, 1);
+}
+
+int
+PyObject_CallFinalizerFromDealloc(PyObject *self)
+{
+ Py_ssize_t refcnt;
+
+ /* Temporarily resurrect the object. */
+ if (self->ob_refcnt != 0) {
+ Py_FatalError("PyObject_CallFinalizerFromDealloc called on "
+ "object with a non-zero refcount");
+ }
+ self->ob_refcnt = 1;
+
+ PyObject_CallFinalizer(self);
+
+ /* Undo the temporary resurrection; can't use DECREF here, it would
+ * cause a recursive call.
+ */
+ assert(self->ob_refcnt > 0);
+ if (--self->ob_refcnt == 0)
+ return 0; /* this is the normal path out */
+
+ /* tp_finalize resurrected it! Make it look like the original Py_DECREF
+ * never happened.
+ */
+ refcnt = self->ob_refcnt;
+ _Py_NewReference(self);
+ self->ob_refcnt = refcnt;
+
+ if (PyType_IS_GC(Py_TYPE(self))) {
+ assert(_PyGC_REFS(self) != _PyGC_REFS_UNTRACKED);
+ }
+ /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so
+ * we need to undo that. */
+ _Py_DEC_REFTOTAL;
+ /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object
+ * chain, so no more to do there.
+ * If COUNT_ALLOCS, the original decref bumped tp_frees, and
+ * _Py_NewReference bumped tp_allocs: both of those need to be
+ * undone.
+ */
+#ifdef COUNT_ALLOCS
+ --Py_TYPE(self)->tp_frees;
+ --Py_TYPE(self)->tp_allocs;
+#endif
+ return -1;
+}
+
int
PyObject_Print(PyObject *op, FILE *fp, int flags)
{
@@ -377,6 +443,14 @@ PyObject_Repr(PyObject *v)
if (Py_TYPE(v)->tp_repr == NULL)
return PyUnicode_FromFormat("<%s object at %p>",
v->ob_type->tp_name, v);
+
+#ifdef Py_DEBUG
+ /* PyObject_Repr() must not be called with an exception set,
+ because it may clear it (directly or indirectly) and so the
+ caller looses its exception */
+ assert(!PyErr_Occurred());
+#endif
+
res = (*v->ob_type->tp_repr)(v);
if (res == NULL)
return NULL;
@@ -408,6 +482,7 @@ PyObject_Str(PyObject *v)
#endif
if (v == NULL)
return PyUnicode_FromString("<NULL>");
+
if (PyUnicode_CheckExact(v)) {
#ifndef Py_DEBUG
if (PyUnicode_READY(v) < 0)
@@ -419,6 +494,13 @@ PyObject_Str(PyObject *v)
if (Py_TYPE(v)->tp_str == NULL)
return PyObject_Repr(v);
+#ifdef Py_DEBUG
+ /* PyObject_Str() must not be called with an exception set,
+ because it may clear it (directly or indirectly) and so the
+ caller looses its exception */
+ assert(!PyErr_Occurred());
+#endif
+
/* It is possible for a type to have a tp_str representation that loops
infinitely. */
if (Py_EnterRecursiveCall(" while getting the str of an object"))
@@ -451,6 +533,9 @@ PyObject_ASCII(PyObject *v)
if (repr == NULL)
return NULL;
+ if (PyUnicode_IS_ASCII(repr))
+ return repr;
+
/* repr is guaranteed to be a PyUnicode object by PyObject_Repr */
ascii = _PyUnicode_AsASCIIString(repr, "backslashreplace");
Py_DECREF(repr);
@@ -1524,12 +1609,21 @@ notimplemented_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Py_RETURN_NOTIMPLEMENTED;
}
+static void
+notimplemented_dealloc(PyObject* ignore)
+{
+ /* This should never get called, but we also don't want to SEGV if
+ * we accidentally decref NotImplemented out of existence.
+ */
+ Py_FatalError("deallocating NotImplemented");
+}
+
static PyTypeObject PyNotImplemented_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"NotImplementedType",
0,
0,
- none_dealloc, /*tp_dealloc*/ /*never called*/
+ notimplemented_dealloc, /*tp_dealloc*/ /*never called*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
@@ -1699,15 +1793,6 @@ _Py_ReadyTypes(void)
if (PyType_Ready(&PyMemberDescr_Type) < 0)
Py_FatalError("Can't initialize member descriptor type");
- if (PyType_Ready(&PyFilter_Type) < 0)
- Py_FatalError("Can't initialize filter type");
-
- if (PyType_Ready(&PyMap_Type) < 0)
- Py_FatalError("Can't initialize map type");
-
- if (PyType_Ready(&PyZip_Type) < 0)
- Py_FatalError("Can't initialize zip type");
-
if (PyType_Ready(&_PyNamespace_Type) < 0)
Py_FatalError("Can't initialize namespace type");
@@ -1856,26 +1941,6 @@ PyTypeObject *_PyCapsule_hack = &PyCapsule_Type;
Py_ssize_t (*_Py_abstract_hack)(PyObject *) = PyObject_Size;
-/* Python's malloc wrappers (see pymem.h) */
-
-void *
-PyMem_Malloc(size_t nbytes)
-{
- return PyMem_MALLOC(nbytes);
-}
-
-void *
-PyMem_Realloc(void *p, size_t nbytes)
-{
- return PyMem_REALLOC(p, nbytes);
-}
-
-void
-PyMem_Free(void *p)
-{
- PyMem_FREE(p);
-}
-
void
_PyObject_DebugTypeStats(FILE *out)
{
@@ -1927,7 +1992,8 @@ Py_ReprEnter(PyObject *obj)
if (PyList_GET_ITEM(list, i) == obj)
return 1;
}
- PyList_Append(list, obj);
+ if (PyList_Append(list, obj) < 0)
+ return -1;
return 0;
}
@@ -1937,13 +2003,18 @@ Py_ReprLeave(PyObject *obj)
PyObject *dict;
PyObject *list;
Py_ssize_t i;
+ PyObject *error_type, *error_value, *error_traceback;
+
+ PyErr_Fetch(&error_type, &error_value, &error_traceback);
dict = PyThreadState_GetDict();
if (dict == NULL)
- return;
+ goto finally;
+
list = PyDict_GetItemString(dict, KEY);
if (list == NULL || !PyList_Check(list))
- return;
+ goto finally;
+
i = PyList_GET_SIZE(list);
/* Count backwards because we always expect obj to be list[-1] */
while (--i >= 0) {
@@ -1952,6 +2023,10 @@ Py_ReprLeave(PyObject *obj)
break;
}
}
+
+finally:
+ /* ignore exceptions because there is no way to report them. */
+ PyErr_Restore(error_type, error_value, error_traceback);
}
/* Trashcan support. */
@@ -1972,7 +2047,7 @@ void
_PyTrash_deposit_object(PyObject *op)
{
assert(PyObject_IS_GC(op));
- assert(_Py_AS_GC(op)->gc.gc_refs == _PyGC_REFS_UNTRACKED);
+ assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED);
assert(op->ob_refcnt == 0);
_Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later;
_PyTrash_delete_later = op;
@@ -1984,7 +2059,7 @@ _PyTrash_thread_deposit_object(PyObject *op)
{
PyThreadState *tstate = PyThreadState_GET();
assert(PyObject_IS_GC(op));
- assert(_Py_AS_GC(op)->gc.gc_refs == _PyGC_REFS_UNTRACKED);
+ assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED);
assert(op->ob_refcnt == 0);
_Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *) tstate->trash_delete_later;
tstate->trash_delete_later = op;
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 50fc7c1..f146222 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -1,14 +1,354 @@
#include "Python.h"
+/* Python's malloc wrappers (see pymem.h) */
+
+#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */
+/* Forward declaration */
+static void* _PyMem_DebugMalloc(void *ctx, size_t size);
+static void _PyMem_DebugFree(void *ctx, void *p);
+static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
+
+static void _PyObject_DebugDumpAddress(const void *p);
+static void _PyMem_DebugCheckAddress(char api_id, const void *p);
+#endif
+
+#ifdef WITH_PYMALLOC
+
+#ifdef MS_WINDOWS
+# include <windows.h>
+#elif defined(HAVE_MMAP)
+# include <sys/mman.h>
+# ifdef MAP_ANONYMOUS
+# define ARENAS_USE_MMAP
+# endif
+#endif
+
+/* Forward declaration */
+static void* _PyObject_Malloc(void *ctx, size_t size);
+static void _PyObject_Free(void *ctx, void *p);
+static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
+#endif
+
+
+static void *
+_PyMem_RawMalloc(void *ctx, size_t size)
+{
+ /* PyMem_Malloc(0) means malloc(1). Some systems would return NULL
+ for malloc(0), which would be treated as an error. Some platforms would
+ return a pointer with no memory behind it, which would break pymalloc.
+ To solve these problems, allocate an extra byte. */
+ if (size == 0)
+ size = 1;
+ return malloc(size);
+}
+
+static void *
+_PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
+{
+ if (size == 0)
+ size = 1;
+ return realloc(ptr, size);
+}
+
+static void
+_PyMem_RawFree(void *ctx, void *ptr)
+{
+ free(ptr);
+}
+
+
+#ifdef MS_WINDOWS
+static void *
+_PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
+{
+ return VirtualAlloc(NULL, size,
+ MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+}
+
+static void
+_PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
+{
+ VirtualFree(ptr, 0, MEM_RELEASE);
+}
+
+#elif defined(ARENAS_USE_MMAP)
+static void *
+_PyObject_ArenaMmap(void *ctx, size_t size)
+{
+ void *ptr;
+ ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (ptr == MAP_FAILED)
+ return NULL;
+ assert(ptr != NULL);
+ return ptr;
+}
+
+static void
+_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
+{
+ munmap(ptr, size);
+}
+
+#else
+static void *
+_PyObject_ArenaMalloc(void *ctx, size_t size)
+{
+ return malloc(size);
+}
+
+static void
+_PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
+{
+ free(ptr);
+}
+#endif
+
+
+#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawRealloc, _PyMem_RawFree
#ifdef WITH_PYMALLOC
+#define PYOBJECT_FUNCS _PyObject_Malloc, _PyObject_Realloc, _PyObject_Free
+#else
+#define PYOBJECT_FUNCS PYRAW_FUNCS
+#endif
-#ifdef HAVE_MMAP
- #include <sys/mman.h>
- #ifdef MAP_ANONYMOUS
- #define ARENAS_USE_MMAP
- #endif
+#ifdef PYMALLOC_DEBUG
+typedef struct {
+ /* We tag each block with an API ID in order to tag API violations */
+ char api_id;
+ PyMemAllocator alloc;
+} debug_alloc_api_t;
+static struct {
+ debug_alloc_api_t raw;
+ debug_alloc_api_t mem;
+ debug_alloc_api_t obj;
+} _PyMem_Debug = {
+ {'r', {NULL, PYRAW_FUNCS}},
+ {'m', {NULL, PYRAW_FUNCS}},
+ {'o', {NULL, PYOBJECT_FUNCS}}
+ };
+
+#define PYDEBUG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
#endif
+static PyMemAllocator _PyMem_Raw = {
+#ifdef PYMALLOC_DEBUG
+ &_PyMem_Debug.raw, PYDEBUG_FUNCS
+#else
+ NULL, PYRAW_FUNCS
+#endif
+ };
+
+static PyMemAllocator _PyMem = {
+#ifdef PYMALLOC_DEBUG
+ &_PyMem_Debug.mem, PYDEBUG_FUNCS
+#else
+ NULL, PYRAW_FUNCS
+#endif
+ };
+
+static PyMemAllocator _PyObject = {
+#ifdef PYMALLOC_DEBUG
+ &_PyMem_Debug.obj, PYDEBUG_FUNCS
+#else
+ NULL, PYOBJECT_FUNCS
+#endif
+ };
+
+#undef PYRAW_FUNCS
+#undef PYOBJECT_FUNCS
+#undef PYDEBUG_FUNCS
+
+static PyObjectArenaAllocator _PyObject_Arena = {NULL,
+#ifdef MS_WINDOWS
+ _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
+#elif defined(ARENAS_USE_MMAP)
+ _PyObject_ArenaMmap, _PyObject_ArenaMunmap
+#else
+ _PyObject_ArenaMalloc, _PyObject_ArenaFree
+#endif
+ };
+
+void
+PyMem_SetupDebugHooks(void)
+{
+#ifdef PYMALLOC_DEBUG
+ PyMemAllocator alloc;
+
+ alloc.malloc = _PyMem_DebugMalloc;
+ alloc.realloc = _PyMem_DebugRealloc;
+ alloc.free = _PyMem_DebugFree;
+
+ if (_PyMem_Raw.malloc != _PyMem_DebugMalloc) {
+ alloc.ctx = &_PyMem_Debug.raw;
+ PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
+ PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
+ }
+
+ if (_PyMem.malloc != _PyMem_DebugMalloc) {
+ alloc.ctx = &_PyMem_Debug.mem;
+ PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
+ PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
+ }
+
+ if (_PyObject.malloc != _PyMem_DebugMalloc) {
+ alloc.ctx = &_PyMem_Debug.obj;
+ PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
+ PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
+ }
+#endif
+}
+
+void
+PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocator *allocator)
+{
+ switch(domain)
+ {
+ case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
+ case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
+ case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
+ default:
+ /* unknown domain */
+ allocator->ctx = NULL;
+ allocator->malloc = NULL;
+ allocator->realloc = NULL;
+ allocator->free = NULL;
+ }
+}
+
+void
+PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocator *allocator)
+{
+ switch(domain)
+ {
+ case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
+ case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
+ case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
+ /* ignore unknown domain */
+ }
+
+}
+
+void
+PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
+{
+ *allocator = _PyObject_Arena;
+}
+
+void
+PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
+{
+ _PyObject_Arena = *allocator;
+}
+
+void *
+PyMem_RawMalloc(size_t size)
+{
+ /*
+ * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
+ * Most python internals blindly use a signed Py_ssize_t to track
+ * things without checking for overflows or negatives.
+ * As size_t is unsigned, checking for size < 0 is not required.
+ */
+ if (size > (size_t)PY_SSIZE_T_MAX)
+ return NULL;
+
+ return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
+}
+
+void*
+PyMem_RawRealloc(void *ptr, size_t new_size)
+{
+ /* see PyMem_RawMalloc() */
+ if (new_size > (size_t)PY_SSIZE_T_MAX)
+ return NULL;
+ return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
+}
+
+void PyMem_RawFree(void *ptr)
+{
+ _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
+}
+
+void *
+PyMem_Malloc(size_t size)
+{
+ /* see PyMem_RawMalloc() */
+ if (size > (size_t)PY_SSIZE_T_MAX)
+ return NULL;
+ return _PyMem.malloc(_PyMem.ctx, size);
+}
+
+void *
+PyMem_Realloc(void *ptr, size_t new_size)
+{
+ /* see PyMem_RawMalloc() */
+ if (new_size > (size_t)PY_SSIZE_T_MAX)
+ return NULL;
+ return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
+}
+
+void
+PyMem_Free(void *ptr)
+{
+ _PyMem.free(_PyMem.ctx, ptr);
+}
+
+char *
+_PyMem_RawStrdup(const char *str)
+{
+ size_t size;
+ char *copy;
+
+ size = strlen(str) + 1;
+ copy = PyMem_RawMalloc(size);
+ if (copy == NULL)
+ return NULL;
+ memcpy(copy, str, size);
+ return copy;
+}
+
+char *
+_PyMem_Strdup(const char *str)
+{
+ size_t size;
+ char *copy;
+
+ size = strlen(str) + 1;
+ copy = PyMem_Malloc(size);
+ if (copy == NULL)
+ return NULL;
+ memcpy(copy, str, size);
+ return copy;
+}
+
+void *
+PyObject_Malloc(size_t size)
+{
+ /* see PyMem_RawMalloc() */
+ if (size > (size_t)PY_SSIZE_T_MAX)
+ return NULL;
+ return _PyObject.malloc(_PyObject.ctx, size);
+}
+
+void *
+PyObject_Realloc(void *ptr, size_t new_size)
+{
+ /* see PyMem_RawMalloc() */
+ if (new_size > (size_t)PY_SSIZE_T_MAX)
+ return NULL;
+ return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
+}
+
+void
+PyObject_Free(void *ptr)
+{
+ _PyObject.free(_PyObject.ctx, ptr);
+}
+
+
+#ifdef WITH_PYMALLOC
+
#ifdef WITH_VALGRIND
#include <valgrind/valgrind.h>
@@ -525,6 +865,15 @@ static size_t ntimes_arena_allocated = 0;
/* High water mark (max value ever seen) for narenas_currently_allocated. */
static size_t narenas_highwater = 0;
+static Py_ssize_t _Py_AllocatedBlocks = 0;
+
+Py_ssize_t
+_Py_GetAllocatedBlocks(void)
+{
+ return _Py_AllocatedBlocks;
+}
+
+
/* Allocate a new arena. If we run out of memory, return NULL. Else
* allocate a new arena, and return the address of an arena_object
* describing the new arena. It's expected that the caller will set
@@ -536,7 +885,6 @@ new_arena(void)
struct arena_object* arenaobj;
uint excess; /* number of bytes above pool alignment */
void *address;
- int err;
#ifdef PYMALLOC_DEBUG
if (Py_GETENV("PYTHONMALLOCSTATS"))
@@ -558,7 +906,7 @@ new_arena(void)
return NULL; /* overflow */
#endif
nbytes = numarenas * sizeof(*arenas);
- arenaobj = (struct arena_object *)realloc(arenas, nbytes);
+ arenaobj = (struct arena_object *)PyMem_Realloc(arenas, nbytes);
if (arenaobj == NULL)
return NULL;
arenas = arenaobj;
@@ -589,15 +937,8 @@ new_arena(void)
arenaobj = unused_arena_objects;
unused_arena_objects = arenaobj->nextarena;
assert(arenaobj->address == 0);
-#ifdef ARENAS_USE_MMAP
- address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- err = (address == MAP_FAILED);
-#else
- address = malloc(ARENA_SIZE);
- err = (address == 0);
-#endif
- if (err) {
+ address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
+ if (address == NULL) {
/* The allocation failed: return NULL after putting the
* arenaobj back.
*/
@@ -760,15 +1101,16 @@ int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE;
* Unless the optimizer reorders everything, being too smart...
*/
-#undef PyObject_Malloc
-void *
-PyObject_Malloc(size_t nbytes)
+static void *
+_PyObject_Malloc(void *ctx, size_t nbytes)
{
block *bp;
poolp pool;
poolp next;
uint size;
+ _Py_AllocatedBlocks++;
+
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind == -1))
running_on_valgrind = RUNNING_ON_VALGRIND;
@@ -777,15 +1119,6 @@ PyObject_Malloc(size_t nbytes)
#endif
/*
- * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
- * Most python internals blindly use a signed Py_ssize_t to track
- * things without checking for overflows or negatives.
- * As size_t is unsigned, checking for nbytes < 0 is not required.
- */
- if (nbytes > PY_SSIZE_T_MAX)
- return NULL;
-
- /*
* This implicitly redirects malloc(0).
*/
if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
@@ -901,6 +1234,7 @@ PyObject_Malloc(size_t nbytes)
* and free list are already initialized.
*/
bp = pool->freeblock;
+ assert(bp != NULL);
pool->freeblock = *(block **)bp;
UNLOCK();
return (void *)bp;
@@ -956,16 +1290,18 @@ redirect:
* last chance to serve the request) or when the max memory limit
* has been reached.
*/
- if (nbytes == 0)
- nbytes = 1;
- return (void *)malloc(nbytes);
+ {
+ void *result = PyMem_Malloc(nbytes);
+ if (!result)
+ _Py_AllocatedBlocks--;
+ return result;
+ }
}
/* free */
-#undef PyObject_Free
-void
-PyObject_Free(void *p)
+static void
+_PyObject_Free(void *ctx, void *p)
{
poolp pool;
block *lastfree;
@@ -978,6 +1314,8 @@ PyObject_Free(void *p)
if (p == NULL) /* free(NULL) has no effect */
return;
+ _Py_AllocatedBlocks--;
+
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind > 0))
goto redirect;
@@ -1072,11 +1410,8 @@ PyObject_Free(void *p)
unused_arena_objects = ao;
/* Free the entire arena. */
-#ifdef ARENAS_USE_MMAP
- munmap((void *)ao->address, ARENA_SIZE);
-#else
- free((void *)ao->address);
-#endif
+ _PyObject_Arena.free(_PyObject_Arena.ctx,
+ (void *)ao->address, ARENA_SIZE);
ao->address = 0; /* mark unassociated */
--narenas_currently_allocated;
@@ -1185,7 +1520,7 @@ PyObject_Free(void *p)
redirect:
#endif
/* We didn't allocate this address. */
- free(p);
+ PyMem_Free(p);
}
/* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0,
@@ -1193,9 +1528,8 @@ redirect:
* return a non-NULL result.
*/
-#undef PyObject_Realloc
-void *
-PyObject_Realloc(void *p, size_t nbytes)
+static void *
+_PyObject_Realloc(void *ctx, void *p, size_t nbytes)
{
void *bp;
poolp pool;
@@ -1205,16 +1539,7 @@ PyObject_Realloc(void *p, size_t nbytes)
#endif
if (p == NULL)
- return PyObject_Malloc(nbytes);
-
- /*
- * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
- * Most python internals blindly use a signed Py_ssize_t to track
- * things without checking for overflows or negatives.
- * As size_t is unsigned, checking for nbytes < 0 is not required.
- */
- if (nbytes > PY_SSIZE_T_MAX)
- return NULL;
+ return _PyObject_Malloc(ctx, nbytes);
#ifdef WITH_VALGRIND
/* Treat running_on_valgrind == -1 the same as 0 */
@@ -1242,10 +1567,10 @@ PyObject_Realloc(void *p, size_t nbytes)
}
size = nbytes;
}
- bp = PyObject_Malloc(nbytes);
+ bp = _PyObject_Malloc(ctx, nbytes);
if (bp != NULL) {
memcpy(bp, p, size);
- PyObject_Free(p);
+ _PyObject_Free(ctx, p);
}
return bp;
}
@@ -1263,14 +1588,14 @@ PyObject_Realloc(void *p, size_t nbytes)
* at p. Instead we punt: let C continue to manage this block.
*/
if (nbytes)
- return realloc(p, nbytes);
+ return PyMem_Realloc(p, nbytes);
/* C doesn't define the result of realloc(p, 0) (it may or may not
* return NULL then), but Python's docs promise that nbytes==0 never
* returns NULL. We don't pass 0 to realloc(), to avoid that endcase
* to begin with. Even then, we can't be sure that realloc() won't
* return NULL.
*/
- bp = realloc(p, 1);
+ bp = PyMem_Realloc(p, 1);
return bp ? bp : p;
}
@@ -1280,23 +1605,12 @@ PyObject_Realloc(void *p, size_t nbytes)
/* pymalloc not enabled: Redirect the entry points to malloc. These will
* only be used by extensions that are compiled with pymalloc enabled. */
-void *
-PyObject_Malloc(size_t n)
-{
- return PyMem_MALLOC(n);
-}
-
-void *
-PyObject_Realloc(void *p, size_t n)
+Py_ssize_t
+_Py_GetAllocatedBlocks(void)
{
- return PyMem_REALLOC(p, n);
+ return 0;
}
-void
-PyObject_Free(void *p)
-{
- PyMem_FREE(p);
-}
#endif /* WITH_PYMALLOC */
#ifdef PYMALLOC_DEBUG
@@ -1316,10 +1630,6 @@ PyObject_Free(void *p)
#define DEADBYTE 0xDB /* dead (newly freed) memory */
#define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */
-/* We tag each block with an API ID in order to tag API violations */
-#define _PYMALLOC_MEM_ID 'm' /* the PyMem_Malloc() API */
-#define _PYMALLOC_OBJ_ID 'o' /* The PyObject_Malloc() API */
-
static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
/* serialno is always incremented via calling this routine. The point is
@@ -1402,58 +1712,18 @@ p[2*S: 2*S+n]
p[2*S+n: 2*S+n+S]
Copies of FORBIDDENBYTE. Used to catch over- writes and reads.
p[2*S+n+S: 2*S+n+2*S]
- A serial number, incremented by 1 on each call to _PyObject_DebugMalloc
- and _PyObject_DebugRealloc.
+ A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
+ and _PyMem_DebugRealloc.
This is a big-endian size_t.
If "bad memory" is detected later, the serial number gives an
excellent way to set a breakpoint on the next run, to capture the
instant at which this block was passed out.
*/
-/* debug replacements for the PyMem_* memory API */
-void *
-_PyMem_DebugMalloc(size_t nbytes)
-{
- return _PyObject_DebugMallocApi(_PYMALLOC_MEM_ID, nbytes);
-}
-void *
-_PyMem_DebugRealloc(void *p, size_t nbytes)
-{
- return _PyObject_DebugReallocApi(_PYMALLOC_MEM_ID, p, nbytes);
-}
-void
-_PyMem_DebugFree(void *p)
-{
- _PyObject_DebugFreeApi(_PYMALLOC_MEM_ID, p);
-}
-
-/* debug replacements for the PyObject_* memory API */
-void *
-_PyObject_DebugMalloc(size_t nbytes)
-{
- return _PyObject_DebugMallocApi(_PYMALLOC_OBJ_ID, nbytes);
-}
-void *
-_PyObject_DebugRealloc(void *p, size_t nbytes)
-{
- return _PyObject_DebugReallocApi(_PYMALLOC_OBJ_ID, p, nbytes);
-}
-void
-_PyObject_DebugFree(void *p)
-{
- _PyObject_DebugFreeApi(_PYMALLOC_OBJ_ID, p);
-}
-void
-_PyObject_DebugCheckAddress(const void *p)
-{
- _PyObject_DebugCheckAddressApi(_PYMALLOC_OBJ_ID, p);
-}
-
-
-/* generic debug memory api, with an "id" to identify the API in use */
-void *
-_PyObject_DebugMallocApi(char id, size_t nbytes)
+static void *
+_PyMem_DebugMalloc(void *ctx, size_t nbytes)
{
+ debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
uchar *p; /* base address of malloc'ed block */
uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */
size_t total; /* nbytes + 4*SST */
@@ -1464,14 +1734,14 @@ _PyObject_DebugMallocApi(char id, size_t nbytes)
/* overflow: can't represent total as a size_t */
return NULL;
- p = (uchar *)PyObject_Malloc(total);
+ p = (uchar *)api->alloc.malloc(api->alloc.ctx, total);
if (p == NULL)
return NULL;
/* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
write_size_t(p, nbytes);
- p[SST] = (uchar)id;
- memset(p + SST + 1 , FORBIDDENBYTE, SST-1);
+ p[SST] = (uchar)api->api_id;
+ memset(p + SST + 1, FORBIDDENBYTE, SST-1);
if (nbytes > 0)
memset(p + 2*SST, CLEANBYTE, nbytes);
@@ -1489,35 +1759,37 @@ _PyObject_DebugMallocApi(char id, size_t nbytes)
Then fills the original bytes with DEADBYTE.
Then calls the underlying free.
*/
-void
-_PyObject_DebugFreeApi(char api, void *p)
+static void
+_PyMem_DebugFree(void *ctx, void *p)
{
+ debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */
size_t nbytes;
if (p == NULL)
return;
- _PyObject_DebugCheckAddressApi(api, p);
+ _PyMem_DebugCheckAddress(api->api_id, p);
nbytes = read_size_t(q);
nbytes += 4*SST;
if (nbytes > 0)
memset(q, DEADBYTE, nbytes);
- PyObject_Free(q);
+ api->alloc.free(api->alloc.ctx, q);
}
-void *
-_PyObject_DebugReallocApi(char api, void *p, size_t nbytes)
+static void *
+_PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes)
{
- uchar *q = (uchar *)p;
+ debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
+ uchar *q = (uchar *)p, *oldq;
uchar *tail;
size_t total; /* nbytes + 4*SST */
size_t original_nbytes;
int i;
if (p == NULL)
- return _PyObject_DebugMallocApi(api, nbytes);
+ return _PyMem_DebugMalloc(ctx, nbytes);
- _PyObject_DebugCheckAddressApi(api, p);
+ _PyMem_DebugCheckAddress(api->api_id, p);
bumpserialno();
original_nbytes = read_size_t(q - 2*SST);
total = nbytes + 4*SST;
@@ -1525,24 +1797,26 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes)
/* overflow: can't represent total as a size_t */
return NULL;
- if (nbytes < original_nbytes) {
- /* shrinking: mark old extra memory dead */
- memset(q + nbytes, DEADBYTE, original_nbytes - nbytes + 2*SST);
- }
-
/* Resize and add decorations. We may get a new pointer here, in which
* case we didn't get the chance to mark the old memory with DEADBYTE,
* but we live with that.
*/
- q = (uchar *)PyObject_Realloc(q - 2*SST, total);
+ oldq = q;
+ q = (uchar *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total);
if (q == NULL)
return NULL;
+ if (q == oldq && nbytes < original_nbytes) {
+ /* shrinking: mark old extra memory dead */
+ memset(q + nbytes, DEADBYTE, original_nbytes - nbytes);
+ }
+
write_size_t(q, nbytes);
- assert(q[SST] == (uchar)api);
+ assert(q[SST] == (uchar)api->api_id);
for (i = 1; i < SST; ++i)
assert(q[SST + i] == FORBIDDENBYTE);
q += 2*SST;
+
tail = q + nbytes;
memset(tail, FORBIDDENBYTE, SST);
write_size_t(tail + SST, serialno);
@@ -1561,8 +1835,8 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes)
* and call Py_FatalError to kill the program.
* The API id, is also checked.
*/
- void
-_PyObject_DebugCheckAddressApi(char api, const void *p)
+static void
+_PyMem_DebugCheckAddress(char api, const void *p)
{
const uchar *q = (const uchar *)p;
char msgbuf[64];
@@ -1614,7 +1888,7 @@ error:
}
/* Display info to stderr about the memory block at p. */
-void
+static void
_PyObject_DebugDumpAddress(const void *p)
{
const uchar *q = (const uchar *)p;
diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c
index 214b455..ba51fec 100644
--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@@ -318,195 +318,6 @@ range_item(rangeobject *r, Py_ssize_t i)
return res;
}
-/* Additional helpers, since the standard slice helpers
- * all clip to PY_SSIZE_T_MAX
- */
-
-/* Replace _PyEval_SliceIndex */
-static PyObject *
-compute_slice_element(PyObject *obj)
-{
- PyObject *result = NULL;
- if (obj != NULL) {
- if (PyIndex_Check(obj)) {
- result = PyNumber_Index(obj);
- }
- else {
- PyErr_SetString(PyExc_TypeError,
- "slice indices must be integers or "
- "None or have an __index__ method");
- }
- }
- return result;
-}
-
-/* Replace PySlice_GetIndicesEx
- * Result indicates whether or not the slice is empty
- * (-1 = error, 0 = empty slice, 1 = slice contains elements)
- */
-static int
-compute_slice_indices(rangeobject *r, PySliceObject *slice,
- PyObject **start, PyObject **stop, PyObject **step)
-{
- int cmp_result, has_elements;
- Py_ssize_t clamped_step = 0;
- PyObject *zero = NULL, *one = NULL, *neg_one = NULL, *candidate = NULL;
- PyObject *tmp_start = NULL, *tmp_stop = NULL, *tmp_step = NULL;
- zero = PyLong_FromLong(0);
- if (zero == NULL) goto Fail;
- one = PyLong_FromLong(1);
- if (one == NULL) goto Fail;
- neg_one = PyLong_FromLong(-1);
- if (neg_one == NULL) goto Fail;
-
- /* Calculate step value */
- if (slice->step == Py_None) {
- clamped_step = 1;
- tmp_step = one;
- Py_INCREF(tmp_step);
- } else {
- if (!_PyEval_SliceIndex(slice->step, &clamped_step)) goto Fail;
- if (clamped_step == 0) {
- PyErr_SetString(PyExc_ValueError,
- "slice step cannot be zero");
- goto Fail;
- }
- tmp_step = compute_slice_element(slice->step);
- if (tmp_step == NULL) goto Fail;
- }
-
- /* Calculate start value */
- if (slice->start == Py_None) {
- if (clamped_step < 0) {
- tmp_start = PyNumber_Subtract(r->length, one);
- if (tmp_start == NULL) goto Fail;
- } else {
- tmp_start = zero;
- Py_INCREF(tmp_start);
- }
- } else {
- candidate = compute_slice_element(slice->start);
- if (candidate == NULL) goto Fail;
- cmp_result = PyObject_RichCompareBool(candidate, zero, Py_LT);
- if (cmp_result == -1) goto Fail;
- if (cmp_result) {
- /* candidate < 0 */
- tmp_start = PyNumber_Add(r->length, candidate);
- if (tmp_start == NULL) goto Fail;
- Py_CLEAR(candidate);
- } else {
- /* candidate >= 0 */
- tmp_start = candidate;
- candidate = NULL;
- }
- cmp_result = PyObject_RichCompareBool(tmp_start, zero, Py_LT);
- if (cmp_result == -1) goto Fail;
- if (cmp_result) {
- /* tmp_start < 0 */
- Py_CLEAR(tmp_start);
- if (clamped_step < 0) {
- tmp_start = neg_one;
- } else {
- tmp_start = zero;
- }
- Py_INCREF(tmp_start);
- } else {
- /* tmp_start >= 0 */
- cmp_result = PyObject_RichCompareBool(tmp_start, r->length, Py_GE);
- if (cmp_result == -1) goto Fail;
- if (cmp_result) {
- /* tmp_start >= r->length */
- Py_CLEAR(tmp_start);
- if (clamped_step < 0) {
- tmp_start = PyNumber_Subtract(r->length, one);
- if (tmp_start == NULL) goto Fail;
- } else {
- tmp_start = r->length;
- Py_INCREF(tmp_start);
- }
- }
- }
- }
-
- /* Calculate stop value */
- if (slice->stop == Py_None) {
- if (clamped_step < 0) {
- tmp_stop = neg_one;
- } else {
- tmp_stop = r->length;
- }
- Py_INCREF(tmp_stop);
- } else {
- candidate = compute_slice_element(slice->stop);
- if (candidate == NULL) goto Fail;
- cmp_result = PyObject_RichCompareBool(candidate, zero, Py_LT);
- if (cmp_result == -1) goto Fail;
- if (cmp_result) {
- /* candidate < 0 */
- tmp_stop = PyNumber_Add(r->length, candidate);
- if (tmp_stop == NULL) goto Fail;
- Py_CLEAR(candidate);
- } else {
- /* candidate >= 0 */
- tmp_stop = candidate;
- candidate = NULL;
- }
- cmp_result = PyObject_RichCompareBool(tmp_stop, zero, Py_LT);
- if (cmp_result == -1) goto Fail;
- if (cmp_result) {
- /* tmp_stop < 0 */
- Py_CLEAR(tmp_stop);
- if (clamped_step < 0) {
- tmp_stop = neg_one;
- } else {
- tmp_stop = zero;
- }
- Py_INCREF(tmp_stop);
- } else {
- /* tmp_stop >= 0 */
- cmp_result = PyObject_RichCompareBool(tmp_stop, r->length, Py_GE);
- if (cmp_result == -1) goto Fail;
- if (cmp_result) {
- /* tmp_stop >= r->length */
- Py_CLEAR(tmp_stop);
- if (clamped_step < 0) {
- tmp_stop = PyNumber_Subtract(r->length, one);
- if (tmp_stop == NULL) goto Fail;
- } else {
- tmp_stop = r->length;
- Py_INCREF(tmp_stop);
- }
- }
- }
- }
-
- /* Check if the slice is empty or not */
- if (clamped_step < 0) {
- has_elements = PyObject_RichCompareBool(tmp_start, tmp_stop, Py_GT);
- } else {
- has_elements = PyObject_RichCompareBool(tmp_start, tmp_stop, Py_LT);
- }
- if (has_elements == -1) goto Fail;
-
- *start = tmp_start;
- *stop = tmp_stop;
- *step = tmp_step;
- Py_DECREF(neg_one);
- Py_DECREF(one);
- Py_DECREF(zero);
- return has_elements;
-
- Fail:
- Py_XDECREF(tmp_start);
- Py_XDECREF(tmp_stop);
- Py_XDECREF(tmp_step);
- Py_XDECREF(candidate);
- Py_XDECREF(neg_one);
- Py_XDECREF(one);
- Py_XDECREF(zero);
- return -1;
-}
-
static PyObject *
compute_slice(rangeobject *r, PyObject *_slice)
{
@@ -514,10 +325,11 @@ compute_slice(rangeobject *r, PyObject *_slice)
rangeobject *result;
PyObject *start = NULL, *stop = NULL, *step = NULL;
PyObject *substart = NULL, *substop = NULL, *substep = NULL;
- int has_elements;
+ int error;
- has_elements = compute_slice_indices(r, slice, &start, &stop, &step);
- if (has_elements == -1) return NULL;
+ error = _PySlice_GetLongIndices(slice, r->length, &start, &stop, &step);
+ if (error == -1)
+ return NULL;
substep = PyNumber_Multiply(r->step, step);
if (substep == NULL) goto fail;
@@ -527,13 +339,8 @@ compute_slice(rangeobject *r, PyObject *_slice)
if (substart == NULL) goto fail;
Py_CLEAR(start);
- if (has_elements) {
- substop = compute_item(r, stop);
- if (substop == NULL) goto fail;
- } else {
- substop = substart;
- Py_INCREF(substop);
- }
+ substop = compute_item(r, stop);
+ if (substop == NULL) goto fail;
Py_CLEAR(stop);
result = make_range_object(Py_TYPE(r), substart, substop, substep);
diff --git a/Objects/setobject.c b/Objects/setobject.c
index c484dce..ea5a24c 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -214,7 +214,6 @@ static int
set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash)
{
register setentry *entry;
- typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, Py_hash_t);
assert(so->lookup != NULL);
entry = so->lookup(so, key, hash);
diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c
index 1593335..52f1c89 100644
--- a/Objects/sliceobject.c
+++ b/Objects/sliceobject.c
@@ -299,23 +299,198 @@ static PyMemberDef slice_members[] = {
{0}
};
+/* Helper function to convert a slice argument to a PyLong, and raise TypeError
+ with a suitable message on failure. */
+
static PyObject*
-slice_indices(PySliceObject* self, PyObject* len)
+evaluate_slice_index(PyObject *v)
{
- Py_ssize_t ilen, start, stop, step, slicelength;
+ if (PyIndex_Check(v)) {
+ return PyNumber_Index(v);
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError,
+ "slice indices must be integers or "
+ "None or have an __index__ method");
+ return NULL;
+ }
+}
- ilen = PyNumber_AsSsize_t(len, PyExc_OverflowError);
+/* Compute slice indices given a slice and length. Return -1 on failure. Used
+ by slice.indices and rangeobject slicing. Assumes that `len` is a
+ nonnegative instance of PyLong. */
- if (ilen == -1 && PyErr_Occurred()) {
- return NULL;
+int
+_PySlice_GetLongIndices(PySliceObject *self, PyObject *length,
+ PyObject **start_ptr, PyObject **stop_ptr,
+ PyObject **step_ptr)
+{
+ PyObject *start=NULL, *stop=NULL, *step=NULL;
+ PyObject *upper=NULL, *lower=NULL;
+ int step_is_negative, cmp_result;
+
+ /* Convert step to an integer; raise for zero step. */
+ if (self->step == Py_None) {
+ step = PyLong_FromLong(1L);
+ if (step == NULL)
+ goto error;
+ step_is_negative = 0;
+ }
+ else {
+ int step_sign;
+ step = evaluate_slice_index(self->step);
+ if (step == NULL)
+ goto error;
+ step_sign = _PyLong_Sign(step);
+ if (step_sign == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "slice step cannot be zero");
+ goto error;
+ }
+ step_is_negative = step_sign < 0;
+ }
+
+ /* Find lower and upper bounds for start and stop. */
+ if (step_is_negative) {
+ lower = PyLong_FromLong(-1L);
+ if (lower == NULL)
+ goto error;
+
+ upper = PyNumber_Add(length, lower);
+ if (upper == NULL)
+ goto error;
+ }
+ else {
+ lower = PyLong_FromLong(0L);
+ if (lower == NULL)
+ goto error;
+
+ upper = length;
+ Py_INCREF(upper);
}
- if (PySlice_GetIndicesEx((PyObject*)self, ilen, &start, &stop,
- &step, &slicelength) < 0) {
+ /* Compute start. */
+ if (self->start == Py_None) {
+ start = step_is_negative ? upper : lower;
+ Py_INCREF(start);
+ }
+ else {
+ start = evaluate_slice_index(self->start);
+ if (start == NULL)
+ goto error;
+
+ if (_PyLong_Sign(start) < 0) {
+ /* start += length */
+ PyObject *tmp = PyNumber_Add(start, length);
+ Py_DECREF(start);
+ start = tmp;
+ if (start == NULL)
+ goto error;
+
+ cmp_result = PyObject_RichCompareBool(start, lower, Py_LT);
+ if (cmp_result < 0)
+ goto error;
+ if (cmp_result) {
+ Py_INCREF(lower);
+ Py_DECREF(start);
+ start = lower;
+ }
+ }
+ else {
+ cmp_result = PyObject_RichCompareBool(start, upper, Py_GT);
+ if (cmp_result < 0)
+ goto error;
+ if (cmp_result) {
+ Py_INCREF(upper);
+ Py_DECREF(start);
+ start = upper;
+ }
+ }
+ }
+
+ /* Compute stop. */
+ if (self->stop == Py_None) {
+ stop = step_is_negative ? lower : upper;
+ Py_INCREF(stop);
+ }
+ else {
+ stop = evaluate_slice_index(self->stop);
+ if (stop == NULL)
+ goto error;
+
+ if (_PyLong_Sign(stop) < 0) {
+ /* stop += length */
+ PyObject *tmp = PyNumber_Add(stop, length);
+ Py_DECREF(stop);
+ stop = tmp;
+ if (stop == NULL)
+ goto error;
+
+ cmp_result = PyObject_RichCompareBool(stop, lower, Py_LT);
+ if (cmp_result < 0)
+ goto error;
+ if (cmp_result) {
+ Py_INCREF(lower);
+ Py_DECREF(stop);
+ stop = lower;
+ }
+ }
+ else {
+ cmp_result = PyObject_RichCompareBool(stop, upper, Py_GT);
+ if (cmp_result < 0)
+ goto error;
+ if (cmp_result) {
+ Py_INCREF(upper);
+ Py_DECREF(stop);
+ stop = upper;
+ }
+ }
+ }
+
+ *start_ptr = start;
+ *stop_ptr = stop;
+ *step_ptr = step;
+ Py_DECREF(upper);
+ Py_DECREF(lower);
+ return 0;
+
+ error:
+ *start_ptr = *stop_ptr = *step_ptr = NULL;
+ Py_XDECREF(start);
+ Py_XDECREF(stop);
+ Py_XDECREF(step);
+ Py_XDECREF(upper);
+ Py_XDECREF(lower);
+ return -1;
+}
+
+/* Implementation of slice.indices. */
+
+static PyObject*
+slice_indices(PySliceObject* self, PyObject* len)
+{
+ PyObject *start, *stop, *step;
+ PyObject *length;
+ int error;
+
+ /* Convert length to an integer if necessary; raise for negative length. */
+ length = PyNumber_Index(len);
+ if (length == NULL)
+ return NULL;
+
+ if (_PyLong_Sign(length) < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "length should not be negative");
+ Py_DECREF(length);
return NULL;
}
- return Py_BuildValue("(nnn)", start, stop, step);
+ error = _PySlice_GetLongIndices(self, length, &start, &stop, &step);
+ Py_DECREF(length);
+ if (error == -1)
+ return NULL;
+ else
+ return Py_BuildValue("(NNN)", start, stop, step);
}
PyDoc_STRVAR(slice_indices_doc,
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
index f62813d..d0fc18d 100644
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index f353367..f855003 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -47,7 +47,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
unsigned long value = *(unsigned long *) _s;
if (value & ASCII_CHAR_MASK)
break;
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
_p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
_p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
_p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
@@ -486,7 +486,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
const unsigned char *q = *inptr;
STRINGLIB_CHAR *p = dest + *outpos;
/* Offsets from q for retrieving byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
int ihi = !!native_ordering, ilo = !native_ordering;
#else
int ihi = !native_ordering, ilo = !!native_ordering;
@@ -517,7 +517,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
block = SWAB(block);
#endif
}
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
# if SIZEOF_LONG == 4
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
p[1] = (STRINGLIB_CHAR)(block >> 16);
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 55ac77d..cd7cac4 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -142,6 +142,8 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
mask = 0;
if (mode != FAST_RSEARCH) {
+ const STRINGLIB_CHAR *ss = s + m - 1;
+ const STRINGLIB_CHAR *pp = p + m - 1;
/* create compressed boyer-moore delta 1 table */
@@ -156,7 +158,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
- if (s[i+m-1] == p[m-1]) {
+ if (ss[i] == pp[0]) {
/* candidate match */
for (j = 0; j < mlast; j++)
if (s[i+j] != p[j])
@@ -172,13 +174,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue;
}
/* miss: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, s[i+m]))
+ if (!STRINGLIB_BLOOM(mask, ss[i+1]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, s[i+m]))
+ if (!STRINGLIB_BLOOM(mask, ss[i+1]))
i = i + m;
}
}
diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h
new file mode 100644
index 0000000..5568b31
--- /dev/null
+++ b/Objects/stringlib/join.h
@@ -0,0 +1,133 @@
+/* stringlib: bytes joining implementation */
+
+#if STRINGLIB_SIZEOF_CHAR != 1
+#error join.h only compatible with byte-wise strings
+#endif
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
+{
+ char *sepstr = STRINGLIB_STR(sep);
+ const Py_ssize_t seplen = STRINGLIB_LEN(sep);
+ PyObject *res = NULL;
+ char *p;
+ Py_ssize_t seqlen = 0;
+ Py_ssize_t sz = 0;
+ Py_ssize_t i, nbufs;
+ PyObject *seq, *item;
+ Py_buffer *buffers = NULL;
+#define NB_STATIC_BUFFERS 10
+ Py_buffer static_buffers[NB_STATIC_BUFFERS];
+
+ seq = PySequence_Fast(iterable, "can only join an iterable");
+ if (seq == NULL) {
+ return NULL;
+ }
+
+ seqlen = PySequence_Fast_GET_SIZE(seq);
+ if (seqlen == 0) {
+ Py_DECREF(seq);
+ return STRINGLIB_NEW(NULL, 0);
+ }
+#ifndef STRINGLIB_MUTABLE
+ if (seqlen == 1) {
+ item = PySequence_Fast_GET_ITEM(seq, 0);
+ if (STRINGLIB_CHECK_EXACT(item)) {
+ Py_INCREF(item);
+ Py_DECREF(seq);
+ return item;
+ }
+ }
+#endif
+ if (seqlen > NB_STATIC_BUFFERS) {
+ buffers = PyMem_NEW(Py_buffer, seqlen);
+ if (buffers == NULL) {
+ Py_DECREF(seq);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ }
+ else {
+ buffers = static_buffers;
+ }
+
+ /* Here is the general case. Do a pre-pass to figure out the total
+ * amount of space we'll need (sz), and see whether all arguments are
+ * buffer-compatible.
+ */
+ for (i = 0, nbufs = 0; i < seqlen; i++) {
+ Py_ssize_t itemlen;
+ item = PySequence_Fast_GET_ITEM(seq, i);
+ if (_getbuffer(item, &buffers[i]) < 0) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item %zd: expected bytes, bytearray, "
+ "or an object with the buffer interface, %.80s found",
+ i, Py_TYPE(item)->tp_name);
+ goto error;
+ }
+ nbufs = i + 1; /* for error cleanup */
+ itemlen = buffers[i].len;
+ if (itemlen > PY_SSIZE_T_MAX - sz) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() result is too long");
+ goto error;
+ }
+ sz += itemlen;
+ if (i != 0) {
+ if (seplen > PY_SSIZE_T_MAX - sz) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() result is too long");
+ goto error;
+ }
+ sz += seplen;
+ }
+ if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "sequence changed size during iteration");
+ goto error;
+ }
+ }
+
+ /* Allocate result space. */
+ res = STRINGLIB_NEW(NULL, sz);
+ if (res == NULL)
+ goto error;
+
+ /* Catenate everything. */
+ p = STRINGLIB_STR(res);
+ if (!seplen) {
+ /* fast path */
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n = buffers[i].len;
+ char *q = buffers[i].buf;
+ Py_MEMCPY(p, q, n);
+ p += n;
+ }
+ goto done;
+ }
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n;
+ char *q;
+ if (i) {
+ Py_MEMCPY(p, sepstr, seplen);
+ p += seplen;
+ }
+ n = buffers[i].len;
+ q = buffers[i].buf;
+ Py_MEMCPY(p, q, n);
+ p += n;
+ }
+ goto done;
+
+error:
+ res = NULL;
+done:
+ Py_DECREF(seq);
+ for (i = 0; i < nbufs; i++)
+ PyBuffer_Release(&buffers[i]);
+ if (buffers != static_buffers)
+ PyMem_FREE(buffers);
+ return res;
+}
+
+#undef NB_STATIC_BUFFERS
diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h
new file mode 100644
index 0000000..ef318ed
--- /dev/null
+++ b/Objects/stringlib/replace.h
@@ -0,0 +1,53 @@
+/* stringlib: replace implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
+ Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+ *s = u2;
+ while (--maxcount && ++s != end) {
+ /* Find the next character to be replaced.
+
+ If it occurs often, it is faster to scan for it using an inline
+ loop. If it occurs seldom, it is faster to scan for it using a
+ function call; the overhead of the function call is amortized
+ across the many characters that call covers. We start with an
+ inline loop and use a heuristic to determine whether to fall back
+ to a function call. */
+ if (*s != u1) {
+ int attempts = 10;
+ /* search u1 in a dummy loop */
+ while (1) {
+ if (++s == end)
+ return;
+ if (*s == u1)
+ break;
+ if (!--attempts) {
+ /* if u1 was not found for attempts iterations,
+ use FASTSEARCH() or memchr() */
+#if STRINGLIB_SIZEOF_CHAR == 1
+ s++;
+ s = memchr(s, u1, end - s);
+ if (s == NULL)
+ return;
+#else
+ Py_ssize_t i;
+ STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
+ s++;
+ i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
+ if (i < 0)
+ return;
+ s += i;
+#endif
+ /* restart the dummy loop */
+ break;
+ }
+ }
+ }
+ *s = u2;
+ }
+}
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index 7bb91a7..ce27f3e 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -21,7 +21,6 @@
#define STRINGLIB_STR PyBytes_AS_STRING
#define STRINGLIB_LEN PyBytes_GET_SIZE
#define STRINGLIB_NEW PyBytes_FromStringAndSize
-#define STRINGLIB_RESIZE _PyBytes_Resize
#define STRINGLIB_CHECK PyBytes_Check
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h
index e8c6fcb..ce1eb57 100644
--- a/Objects/stringlib/ucs1lib.h
+++ b/Objects/stringlib/ucs1lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS1
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h
index 45e5729..f900cb6 100644
--- a/Objects/stringlib/ucs2lib.h
+++ b/Objects/stringlib/ucs2lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_2BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS2
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h
index 647a27e..86a480f 100644
--- a/Objects/stringlib/ucs4lib.h
+++ b/Objects/stringlib/ucs4lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_4BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS4
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/undef.h b/Objects/stringlib/undef.h
index 03117ec..f9d3f1d 100644
--- a/Objects/stringlib/undef.h
+++ b/Objects/stringlib/undef.h
@@ -6,7 +6,6 @@
#undef STRINGLIB_STR
#undef STRINGLIB_LEN
#undef STRINGLIB_NEW
-#undef STRINGLIB_RESIZE
#undef _Py_InsertThousandsGrouping
#undef STRINGLIB_IS_UNICODE
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
index c1c2cf3..aec221a 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -543,7 +543,7 @@ done:
static int
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
- Py_UCS4 *conversion)
+ int *format_spec_needs_expanding, Py_UCS4 *conversion)
{
/* Note this function works if the field name is zero length,
which is good. Zero length field names are handled later, in
@@ -561,6 +561,15 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
field_name->start = str->start;
while (str->start < str->end) {
switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+ case '{':
+ PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
+ return 0;
+ case '[':
+ for (; str->start < str->end; str->start++)
+ if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
+ break;
+ continue;
+ case '}':
case ':':
case '!':
break;
@@ -570,41 +579,62 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
break;
}
+ field_name->end = str->start - 1;
if (c == '!' || c == ':') {
+ Py_ssize_t count;
/* we have a format specifier and/or a conversion */
/* don't include the last character */
- field_name->end = str->start-1;
-
- /* the format specifier is the rest of the string */
- format_spec->str = str->str;
- format_spec->start = str->start;
- format_spec->end = str->end;
/* see if there's a conversion specifier */
if (c == '!') {
/* there must be another character present */
- if (format_spec->start >= format_spec->end) {
+ if (str->start >= str->end) {
PyErr_SetString(PyExc_ValueError,
- "end of format while looking for conversion "
+ "end of string while looking for conversion "
"specifier");
return 0;
}
- *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+ *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
- /* if there is another character, it must be a colon */
- if (format_spec->start < format_spec->end) {
- c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+ if (str->start < str->end) {
+ c = PyUnicode_READ_CHAR(str->str, str->start++);
+ if (c == '}')
+ return 1;
if (c != ':') {
PyErr_SetString(PyExc_ValueError,
- "expected ':' after format specifier");
+ "expected ':' after conversion specifier");
return 0;
}
}
}
+ format_spec->str = str->str;
+ format_spec->start = str->start;
+ count = 1;
+ while (str->start < str->end) {
+ switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+ case '{':
+ *format_spec_needs_expanding = 1;
+ count++;
+ break;
+ case '}':
+ count--;
+ if (count == 0) {
+ format_spec->end = str->start - 1;
+ return 1;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
+ return 0;
+ }
+ else if (c != '}') {
+ PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
+ return 0;
}
- else
- /* end of string, there's no format_spec or conversion */
- field_name->end = str->start;
return 1;
}
@@ -638,10 +668,9 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
SubString *format_spec, Py_UCS4 *conversion,
int *format_spec_needs_expanding)
{
- int at_end, hit_format_spec;
+ int at_end;
Py_UCS4 c = 0;
Py_ssize_t start;
- int count;
Py_ssize_t len;
int markup_follows = 0;
@@ -713,50 +742,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
if (!markup_follows)
return 2;
- /* this is markup, find the end of the string by counting nested
- braces. note that this prohibits escaped braces, so that
- format_specs cannot have braces in them. */
+ /* this is markup; parse the field */
*field_present = 1;
- count = 1;
-
- start = self->str.start;
-
- /* we know we can't have a zero length string, so don't worry
- about that case */
- hit_format_spec = 0;
- while (self->str.start < self->str.end) {
- switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
- case ':':
- hit_format_spec = 1;
- count = 1;
- break;
- case '{':
- /* the format spec needs to be recursively expanded.
- this is an optimization, and not strictly needed */
- if (hit_format_spec)
- *format_spec_needs_expanding = 1;
- count++;
- break;
- case '}':
- count--;
- if (count <= 0) {
- /* we're done. parse and get out */
- SubString s;
-
- SubString_init(&s, self->str.str, start, self->str.start - 1);
- if (parse_field(&s, field_name, format_spec, conversion) == 0)
- return 0;
-
- /* success */
- return 2;
- }
- break;
- }
- }
-
- /* end of string while searching for matching '}' */
- PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
- return 0;
+ if (!parse_field(&self->str, field_name, format_spec,
+ format_spec_needs_expanding, conversion))
+ return 0;
+ return 2;
}
@@ -875,25 +866,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
SubString literal;
SubString field_name;
SubString format_spec;
- Py_UCS4 conversion, maxchar;
- Py_ssize_t sublen;
- int err;
+ Py_UCS4 conversion;
MarkupIterator_init(&iter, input->str, input->start, input->end);
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
&field_name, &format_spec,
&conversion,
&format_spec_needs_expanding)) == 2) {
- sublen = literal.end - literal.start;
- if (sublen) {
- maxchar = _PyUnicode_FindMaxChar(literal.str,
- literal.start, literal.end);
- err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
- if (err == -1)
+ if (literal.end != literal.start) {
+ if (!field_present && iter.str.start == iter.str.end)
+ writer->overallocate = 0;
+ if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
+ literal.start, literal.end) < 0)
return 0;
- _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
- literal.str, literal.start, sublen);
- writer->pos += sublen;
}
if (field_present) {
@@ -918,7 +903,6 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
_PyUnicodeWriter writer;
- Py_ssize_t minlen;
/* check the recursion level */
if (recursion_depth <= 0) {
@@ -927,8 +911,9 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
return NULL;
}
- minlen = PyUnicode_GET_LENGTH(input->str) + 100;
- _PyUnicodeWriter_Init(&writer, minlen);
+ _PyUnicodeWriter_Init(&writer);
+ writer.overallocate = 1;
+ writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) {
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index f16f21e..48d00ec 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -21,7 +21,6 @@
#define STRINGLIB_STR PyUnicode_AS_UNICODE
#define STRINGLIB_LEN PyUnicode_GET_SIZE
#define STRINGLIB_NEW PyUnicode_FromUnicode
-#define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/structseq.c b/Objects/structseq.c
index c3b9a72..664344b 100644
--- a/Objects/structseq.c
+++ b/Objects/structseq.c
@@ -11,17 +11,20 @@ static char unnamed_fields_key[] = "n_unnamed_fields";
/* Fields with this name have only a field index, not a field name.
They are only allowed for indices < n_visible_fields. */
char *PyStructSequence_UnnamedField = "unnamed field";
+_Py_IDENTIFIER(n_sequence_fields);
+_Py_IDENTIFIER(n_fields);
+_Py_IDENTIFIER(n_unnamed_fields);
#define VISIBLE_SIZE(op) Py_SIZE(op)
#define VISIBLE_SIZE_TP(tp) PyLong_AsLong( \
- PyDict_GetItemString((tp)->tp_dict, visible_length_key))
+ _PyDict_GetItemId((tp)->tp_dict, &PyId_n_sequence_fields))
#define REAL_SIZE_TP(tp) PyLong_AsLong( \
- PyDict_GetItemString((tp)->tp_dict, real_length_key))
+ _PyDict_GetItemId((tp)->tp_dict, &PyId_n_fields))
#define REAL_SIZE(op) REAL_SIZE_TP(Py_TYPE(op))
#define UNNAMED_FIELDS_TP(tp) PyLong_AsLong( \
- PyDict_GetItemString((tp)->tp_dict, unnamed_fields_key))
+ _PyDict_GetItemId((tp)->tp_dict, &PyId_n_unnamed_fields))
#define UNNAMED_FIELDS(op) UNNAMED_FIELDS_TP(Py_TYPE(op))
@@ -59,7 +62,7 @@ static void
structseq_dealloc(PyStructSequence *obj)
{
Py_ssize_t i, size;
-
+
size = REAL_SIZE(obj);
for (i = 0; i < size; ++i) {
Py_XDECREF(obj->ob_item[i]);
@@ -230,8 +233,8 @@ structseq_repr(PyStructSequence *obj)
static PyObject *
structseq_reduce(PyStructSequence* self)
{
- PyObject* tup;
- PyObject* dict;
+ PyObject* tup = NULL;
+ PyObject* dict = NULL;
PyObject* result;
Py_ssize_t n_fields, n_visible_fields, n_unnamed_fields;
int i;
@@ -240,15 +243,12 @@ structseq_reduce(PyStructSequence* self)
n_visible_fields = VISIBLE_SIZE(self);
n_unnamed_fields = UNNAMED_FIELDS(self);
tup = PyTuple_New(n_visible_fields);
- if (!tup) {
- return NULL;
- }
+ if (!tup)
+ goto error;
dict = PyDict_New();
- if (!dict) {
- Py_DECREF(tup);
- return NULL;
- }
+ if (!dict)
+ goto error;
for (i = 0; i < n_visible_fields; i++) {
Py_INCREF(self->ob_item[i]);
@@ -257,8 +257,8 @@ structseq_reduce(PyStructSequence* self)
for (; i < n_fields; i++) {
char *n = Py_TYPE(self)->tp_members[i-n_unnamed_fields].name;
- PyDict_SetItemString(dict, n,
- self->ob_item[i]);
+ if (PyDict_SetItemString(dict, n, self->ob_item[i]) < 0)
+ goto error;
}
result = Py_BuildValue("(O(OO))", Py_TYPE(self), tup, dict);
@@ -267,6 +267,11 @@ structseq_reduce(PyStructSequence* self)
Py_DECREF(dict);
return result;
+
+error:
+ Py_XDECREF(tup);
+ Py_XDECREF(dict);
+ return NULL;
}
static PyMethodDef structseq_methods[] = {
@@ -315,12 +320,13 @@ static PyTypeObject _struct_sequence_template = {
structseq_new, /* tp_new */
};
-void
-PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc)
+int
+PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc)
{
PyObject *dict;
PyMemberDef* members;
int n_members, n_unnamed_members, i, k;
+ PyObject *v;
#ifdef Py_TRACE_REFS
/* if the type object was chained, unchain it first
@@ -342,8 +348,10 @@ PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc)
type->tp_doc = desc->doc;
members = PyMem_NEW(PyMemberDef, n_members-n_unnamed_members+1);
- if (members == NULL)
- return;
+ if (members == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
for (i = k = 0; i < n_members; ++i) {
if (desc->fields[i].name == PyStructSequence_UnnamedField)
@@ -361,30 +369,56 @@ PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc)
type->tp_members = members;
if (PyType_Ready(type) < 0)
- return;
+ return -1;
Py_INCREF(type);
dict = type->tp_dict;
#define SET_DICT_FROM_INT(key, value) \
do { \
- PyObject *v = PyLong_FromLong((long) value); \
- if (v != NULL) { \
- PyDict_SetItemString(dict, key, v); \
+ v = PyLong_FromLong((long) value); \
+ if (v == NULL) \
+ return -1; \
+ if (PyDict_SetItemString(dict, key, v) < 0) { \
Py_DECREF(v); \
+ return -1; \
} \
+ Py_DECREF(v); \
} while (0)
SET_DICT_FROM_INT(visible_length_key, desc->n_in_sequence);
SET_DICT_FROM_INT(real_length_key, n_members);
SET_DICT_FROM_INT(unnamed_fields_key, n_unnamed_members);
+
+ return 0;
+}
+
+void
+PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc)
+{
+ (void)PyStructSequence_InitType2(type, desc);
}
PyTypeObject*
PyStructSequence_NewType(PyStructSequence_Desc *desc)
{
- PyTypeObject *result = (PyTypeObject*)PyType_GenericAlloc(&PyType_Type, 0);
- if (result != NULL) {
- PyStructSequence_InitType(result, desc);
+ PyTypeObject *result;
+
+ result = (PyTypeObject*)PyType_GenericAlloc(&PyType_Type, 0);
+ if (result == NULL)
+ return NULL;
+ if (PyStructSequence_InitType2(result, desc) < 0) {
+ Py_DECREF(result);
+ return NULL;
}
return result;
}
+
+int _PyStructSequence_Init(void)
+{
+ if (_PyUnicode_FromId(&PyId_n_sequence_fields) == NULL
+ || _PyUnicode_FromId(&PyId_n_fields) == NULL
+ || _PyUnicode_FromId(&PyId_n_unnamed_fields) == NULL)
+ return -1;
+
+ return 0;
+}
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index ec3f91b..52896b7 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -322,6 +322,9 @@ error:
1082527, 1165049, 1082531, 1165057, 1247581, 1330103, 1082533,
1330111, 1412633, 1165069, 1247599, 1495177, 1577699
+
+ Tests have shown that it's not worth to cache the hash value, see
+ issue #9685.
*/
static Py_hash_t
@@ -927,7 +930,7 @@ PyTuple_Fini(void)
typedef struct {
PyObject_HEAD
- long it_index;
+ Py_ssize_t it_index;
PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */
} tupleiterobject;
@@ -985,7 +988,7 @@ static PyObject *
tupleiter_reduce(tupleiterobject *it)
{
if (it->it_seq)
- return Py_BuildValue("N(O)l", _PyObject_GetBuiltin("iter"),
+ return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
@@ -994,7 +997,7 @@ tupleiter_reduce(tupleiterobject *it)
static PyObject *
tupleiter_setstate(tupleiterobject *it, PyObject *state)
{
- long index = PyLong_AsLong(state);
+ Py_ssize_t index = PyLong_AsSsize_t(state);
if (index == -1 && PyErr_Occurred())
return NULL;
if (it->it_seq != NULL) {
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index a55d977..3ff42da 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -736,6 +736,13 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL;
}
+#ifdef Py_DEBUG
+ /* type_call() must not be called with an exception set,
+ because it may clear it (directly or indirectly) and so the
+ caller looses its exception */
+ assert(!PyErr_Occurred());
+#endif
+
obj = type->tp_new(type, args, kwds);
if (obj != NULL) {
/* Ugly exception: when the call was type(something),
@@ -750,10 +757,12 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (!PyType_IsSubtype(Py_TYPE(obj), type))
return obj;
type = Py_TYPE(obj);
- if (type->tp_init != NULL &&
- type->tp_init(obj, args, kwds) < 0) {
- Py_DECREF(obj);
- obj = NULL;
+ if (type->tp_init != NULL) {
+ int res = type->tp_init(obj, args, kwds);
+ if (res < 0) {
+ Py_DECREF(obj);
+ obj = NULL;
+ }
}
}
return obj;
@@ -912,6 +921,7 @@ subtype_dealloc(PyObject *self)
PyTypeObject *type, *base;
destructor basedealloc;
PyThreadState *tstate = PyThreadState_GET();
+ int has_finalizer;
/* Extract the type; we expect it to be a heap type */
type = Py_TYPE(self);
@@ -927,6 +937,10 @@ subtype_dealloc(PyObject *self)
clear_slots(), or DECREF the dict, or clear weakrefs. */
/* Maybe call finalizer; exit early if resurrected */
+ if (type->tp_finalize) {
+ if (PyObject_CallFinalizerFromDealloc(self) < 0)
+ return;
+ }
if (type->tp_del) {
type->tp_del(self);
if (self->ob_refcnt > 0)
@@ -978,25 +992,36 @@ subtype_dealloc(PyObject *self)
assert(base);
}
- /* If we added a weaklist, we clear it. Do this *before* calling
- the finalizer (__del__), clearing slots, or clearing the instance
- dict. */
+ has_finalizer = type->tp_finalize || type->tp_del;
+
+ /* Maybe call finalizer; exit early if resurrected */
+ if (has_finalizer)
+ _PyObject_GC_TRACK(self);
+ if (type->tp_finalize) {
+ if (PyObject_CallFinalizerFromDealloc(self) < 0) {
+ /* Resurrected */
+ goto endlabel;
+ }
+ }
+ /* If we added a weaklist, we clear it. Do this *before* calling
+ tp_del, clearing slots, or clearing the instance dict. */
if (type->tp_weaklistoffset && !base->tp_weaklistoffset)
PyObject_ClearWeakRefs(self);
- /* Maybe call finalizer; exit early if resurrected */
if (type->tp_del) {
- _PyObject_GC_TRACK(self);
type->tp_del(self);
- if (self->ob_refcnt > 0)
- goto endlabel; /* resurrected */
- else
- _PyObject_GC_UNTRACK(self);
+ if (self->ob_refcnt > 0) {
+ /* Resurrected */
+ goto endlabel;
+ }
+ }
+ if (has_finalizer) {
+ _PyObject_GC_UNTRACK(self);
/* New weakrefs could be created during the finalizer call.
- If this occurs, clear them out without calling their
- finalizers since they might rely on part of the object
- being finalized that has already been destroyed. */
+ If this occurs, clear them out without calling their
+ finalizers since they might rely on part of the object
+ being finalized that has already been destroyed. */
if (type->tp_weaklistoffset && !base->tp_weaklistoffset) {
/* Modeled after GET_WEAKREFS_LISTPTR() */
PyWeakReference **list = (PyWeakReference **) \
@@ -1456,8 +1481,10 @@ pmerge(PyObject *acc, PyObject* to_merge) {
that is not included in acc.
*/
remain = (int *)PyMem_MALLOC(SIZEOF_INT*to_merge_size);
- if (remain == NULL)
+ if (remain == NULL) {
+ PyErr_NoMemory();
return -1;
+ }
for (i = 0; i < to_merge_size; i++)
remain[i] = 0;
@@ -1489,7 +1516,7 @@ pmerge(PyObject *acc, PyObject* to_merge) {
}
ok = PyList_Append(acc, candidate);
if (ok < 0) {
- PyMem_Free(remain);
+ PyMem_FREE(remain);
return -1;
}
for (j = 0; j < to_merge_size; j++) {
@@ -1949,7 +1976,7 @@ type_init(PyObject *cls, PyObject *args, PyObject *kwds)
return res;
}
-long
+unsigned long
PyType_GetFlags(PyTypeObject *type)
{
return type->tp_flags;
@@ -2220,7 +2247,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
/* Initialize tp_flags */
type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE |
- Py_TPFLAGS_BASETYPE;
+ Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_FINALIZE;
if (base->tp_flags & Py_TPFLAGS_HAVE_GC)
type->tp_flags |= Py_TPFLAGS_HAVE_GC;
@@ -2290,8 +2317,10 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
/* Silently truncate the docstring if it contains null bytes. */
len = strlen(doc_str);
tp_doc = (char *)PyObject_MALLOC(len + 1);
- if (tp_doc == NULL)
+ if (tp_doc == NULL) {
+ PyErr_NoMemory();
goto error;
+ }
memcpy(tp_doc, doc_str, len + 1);
type->tp_doc = tp_doc;
}
@@ -2411,7 +2440,7 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
char *s;
char *res_start = (char*)res;
PyType_Slot *slot;
-
+
/* Set the type name and qualname */
s = strrchr(spec->name, '.');
if (s == NULL)
@@ -2432,7 +2461,7 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
type->tp_name = spec->name;
if (!type->tp_name)
goto fail;
-
+
/* Adjust for empty tuple bases */
if (!bases) {
base = &PyBaseObject_Type;
@@ -2494,8 +2523,10 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
if (slot->slot == Py_tp_doc) {
size_t len = strlen(slot->pfunc)+1;
char *tp_doc = PyObject_MALLOC(len);
- if (tp_doc == NULL)
+ if (tp_doc == NULL) {
+ PyErr_NoMemory();
goto fail;
+ }
memcpy(tp_doc, slot->pfunc, len);
type->tp_doc = tp_doc;
}
@@ -2516,7 +2547,7 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
/* Set type.__module__ */
s = strrchr(spec->name, '.');
if (s != NULL)
- _PyDict_SetItemId(type->tp_dict, &PyId___module__,
+ _PyDict_SetItemId(type->tp_dict, &PyId___module__,
PyUnicode_FromStringAndSize(
spec->name, (Py_ssize_t)(s - spec->name)));
@@ -3323,7 +3354,7 @@ object_set_class(PyObject *self, PyObject *value, void *closure)
"__class__ assignment: only for heap types");
return -1;
}
- if (compatible_for_assignment(newto, oldto, "__class__")) {
+ if (compatible_for_assignment(oldto, newto, "__class__")) {
Py_INCREF(newto);
Py_TYPE(self) = newto;
Py_DECREF(oldto);
@@ -3669,16 +3700,9 @@ object_format(PyObject *self, PyObject *args)
/* Issue 7994: If we're converting to a string, we
should reject format specifications */
if (PyUnicode_GET_LENGTH(format_spec) > 0) {
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "object.__format__ with a non-empty format "
- "string is deprecated", 1) < 0) {
- goto done;
- }
- /* Eventually this will become an error:
- PyErr_Format(PyExc_TypeError,
+ PyErr_SetString(PyExc_TypeError,
"non-empty format string passed to object.__format__");
- goto done;
- */
+ goto done;
}
result = PyObject_Format(self_as_str, format_spec);
@@ -3833,7 +3857,7 @@ add_methods(PyTypeObject *type, PyMethodDef *meth)
descr = PyDescr_NewClassMethod(type, meth);
}
else if (meth->ml_flags & METH_STATIC) {
- PyObject *cfunc = PyCFunction_New(meth, (PyObject*)type);
+ PyObject *cfunc = PyCFunction_NewEx(meth, (PyObject*)type, NULL);
if (cfunc == NULL)
return -1;
descr = PyStaticMethod_New(cfunc);
@@ -4103,6 +4127,10 @@ inherit_slots(PyTypeObject *type, PyTypeObject *base)
COPYSLOT(tp_init);
COPYSLOT(tp_alloc);
COPYSLOT(tp_is_gc);
+ if ((type->tp_flags & Py_TPFLAGS_HAVE_FINALIZE) &&
+ (base->tp_flags & Py_TPFLAGS_HAVE_FINALIZE)) {
+ COPYSLOT(tp_finalize);
+ }
if ((type->tp_flags & Py_TPFLAGS_HAVE_GC) ==
(base->tp_flags & Py_TPFLAGS_HAVE_GC)) {
/* They agree about gc. */
@@ -4255,11 +4283,15 @@ PyType_Ready(PyTypeObject *type)
PyObject *doc = PyUnicode_FromString(type->tp_doc);
if (doc == NULL)
goto error;
- _PyDict_SetItemId(type->tp_dict, &PyId___doc__, doc);
+ if (_PyDict_SetItemId(type->tp_dict, &PyId___doc__, doc) < 0) {
+ Py_DECREF(doc);
+ goto error;
+ }
Py_DECREF(doc);
} else {
- _PyDict_SetItemId(type->tp_dict,
- &PyId___doc__, Py_None);
+ if (_PyDict_SetItemId(type->tp_dict,
+ &PyId___doc__, Py_None) < 0)
+ goto error;
}
}
@@ -4303,13 +4335,11 @@ PyType_Ready(PyTypeObject *type)
/* Warn for a type that implements tp_compare (now known as
tp_reserved) but not tp_richcompare. */
if (type->tp_reserved && !type->tp_richcompare) {
- int error;
- error = PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ PyErr_Format(PyExc_TypeError,
"Type %.100s defines tp_reserved (formerly tp_compare) "
"but not tp_richcompare. Comparisons may not behave as intended.",
type->tp_name);
- if (error == -1)
- goto error;
+ goto error;
}
/* All done -- set the ready flag */
@@ -4338,6 +4368,8 @@ add_subclass(PyTypeObject *base, PyTypeObject *type)
}
assert(PyList_Check(list));
newobj = PyWeakref_NewRef((PyObject *)type, NULL);
+ if (newobj == NULL)
+ return -1;
i = PyList_GET_SIZE(list);
while (--i >= 0) {
ref = PyList_GET_ITEM(list, i);
@@ -4725,6 +4757,18 @@ wrap_call(PyObject *self, PyObject *args, void *wrapped, PyObject *kwds)
}
static PyObject *
+wrap_del(PyObject *self, PyObject *args, void *wrapped)
+{
+ destructor func = (destructor)wrapped;
+
+ if (!check_num_args(args, 0))
+ return NULL;
+
+ (*func)(self);
+ Py_RETURN_NONE;
+}
+
+static PyObject *
wrap_richcmpfunc(PyObject *self, PyObject *args, void *wrapped, int op)
{
richcmpfunc func = (richcmpfunc)wrapped;
@@ -4903,7 +4947,7 @@ add_tp_new_wrapper(PyTypeObject *type)
if (_PyDict_GetItemId(type->tp_dict, &PyId___new__) != NULL)
return 0;
- func = PyCFunction_New(tp_new_methoddef, (PyObject *)type);
+ func = PyCFunction_NewEx(tp_new_methoddef, (PyObject *)type, NULL);
if (func == NULL)
return -1;
if (_PyDict_SetItemId(type->tp_dict, &PyId___new__, func)) {
@@ -5281,29 +5325,12 @@ slot_tp_str(PyObject *self)
_Py_IDENTIFIER(__str__);
func = lookup_method(self, &PyId___str__);
- if (func != NULL) {
+ if (func == NULL)
+ return NULL;
res = PyEval_CallObject(func, NULL);
Py_DECREF(func);
return res;
}
- else {
- /* PyObject *ress; */
- PyErr_Clear();
- res = slot_tp_repr(self);
- if (!res)
- return NULL;
- /* XXX this is non-sensical. Why should we return
- a bytes object from __str__. Is this code even
- used? - mvl */
- assert(0);
- return res;
- /*
- ress = _PyUnicode_AsDefaultEncodedString(res);
- Py_DECREF(res);
- return ress;
- */
- }
-}
static Py_hash_t
slot_tp_hash(PyObject *self)
@@ -5622,16 +5649,12 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
}
static void
-slot_tp_del(PyObject *self)
+slot_tp_finalize(PyObject *self)
{
_Py_IDENTIFIER(__del__);
PyObject *del, *res;
PyObject *error_type, *error_value, *error_traceback;
- /* Temporarily resurrect the object. */
- assert(self->ob_refcnt == 0);
- self->ob_refcnt = 1;
-
/* Save the current exception, if any. */
PyErr_Fetch(&error_type, &error_value, &error_traceback);
@@ -5648,37 +5671,6 @@ slot_tp_del(PyObject *self)
/* Restore the saved exception. */
PyErr_Restore(error_type, error_value, error_traceback);
-
- /* Undo the temporary resurrection; can't use DECREF here, it would
- * cause a recursive call.
- */
- assert(self->ob_refcnt > 0);
- if (--self->ob_refcnt == 0)
- return; /* this is the normal path out */
-
- /* __del__ resurrected it! Make it look like the original Py_DECREF
- * never happened.
- */
- {
- Py_ssize_t refcnt = self->ob_refcnt;
- _Py_NewReference(self);
- self->ob_refcnt = refcnt;
- }
- assert(!PyType_IS_GC(Py_TYPE(self)) ||
- _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED);
- /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so
- * we need to undo that. */
- _Py_DEC_REFTOTAL;
- /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object
- * chain, so no more to do there.
- * If COUNT_ALLOCS, the original decref bumped tp_frees, and
- * _Py_NewReference bumped tp_allocs: both of those need to be
- * undone.
- */
-#ifdef COUNT_ALLOCS
- --Py_TYPE(self)->tp_frees;
- --Py_TYPE(self)->tp_allocs;
-#endif
}
@@ -5787,7 +5779,7 @@ static slotdef slotdefs[] = {
"see help(type(x)) for signature",
PyWrapperFlag_KEYWORDS),
TPSLOT("__new__", tp_new, slot_tp_new, NULL, ""),
- TPSLOT("__del__", tp_del, slot_tp_del, NULL, ""),
+ TPSLOT("__del__", tp_finalize, slot_tp_finalize, (wrapperfunc)wrap_del, ""),
BINSLOT("__add__", nb_add, slot_nb_add,
"+"),
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 440d35a..2f437f6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -47,14 +47,6 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <windows.h>
#endif
-/* Endianness switches; defaults to little endian */
-
-#ifdef WORDS_BIGENDIAN
-# define BYTEORDER_IS_BIG_ENDIAN
-#else
-# define BYTEORDER_IS_LITTLE_ENDIAN
-#endif
-
/* --- Globals ------------------------------------------------------------
NOTE: In the interpreter's initialization phase, some globals are currently
@@ -130,16 +122,14 @@ extern "C" {
/* true if the Unicode object has an allocated UTF-8 memory block
(not shared with other data) */
#define _PyUnicode_HAS_UTF8_MEMORY(op) \
- (assert(_PyUnicode_CHECK(op)), \
- (!PyUnicode_IS_COMPACT_ASCII(op) \
+ ((!PyUnicode_IS_COMPACT_ASCII(op) \
&& _PyUnicode_UTF8(op) \
&& _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
/* true if the Unicode object has an allocated wstr memory block
(not shared with other data) */
#define _PyUnicode_HAS_WSTR_MEMORY(op) \
- (assert(_PyUnicode_CHECK(op)), \
- (_PyUnicode_WSTR(op) && \
+ ((_PyUnicode_WSTR(op) && \
(!PyUnicode_IS_READY(op) || \
_PyUnicode_WSTR(op) != PyUnicode_DATA(op))))
@@ -199,6 +189,10 @@ static PyObject *unicode_empty = NULL;
return unicode_empty; \
} while (0)
+/* Forward declaration */
+Py_LOCAL_INLINE(int)
+_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
+
/* List of static strings. */
static _Py_Identifier *static_strings = NULL;
@@ -427,8 +421,6 @@ unicode_result_wchar(PyObject *unicode)
#ifndef Py_DEBUG
Py_ssize_t len;
- assert(Py_REFCNT(unicode) == 1);
-
len = _PyUnicode_WSTR_LENGTH(unicode);
if (len == 0) {
Py_DECREF(unicode);
@@ -445,10 +437,12 @@ unicode_result_wchar(PyObject *unicode)
}
if (_PyUnicode_Ready(unicode) < 0) {
- Py_XDECREF(unicode);
+ Py_DECREF(unicode);
return NULL;
}
#else
+ assert(Py_REFCNT(unicode) == 1);
+
/* don't make the result ready in debug mode to ensure that the caller
makes the string ready before using it */
assert(_PyUnicode_CheckConsistency(unicode, 1));
@@ -471,7 +465,9 @@ unicode_result_ready(PyObject *unicode)
}
if (length == 1) {
- Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
+ void *data = PyUnicode_DATA(unicode);
+ int kind = PyUnicode_KIND(unicode);
+ Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
if (ch < 256) {
PyObject *latin1_char = unicode_latin1[ch];
if (latin1_char != NULL) {
@@ -544,7 +540,6 @@ static OSVERSIONINFOEX winver;
static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
-#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM_LINEBREAK(ch) \
@@ -554,21 +549,40 @@ static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
Py_LOCAL_INLINE(BLOOM_MASK)
make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
{
+#define BLOOM_UPDATE(TYPE, MASK, PTR, LEN) \
+ do { \
+ TYPE *data = (TYPE *)PTR; \
+ TYPE *end = data + LEN; \
+ Py_UCS4 ch; \
+ for (; data != end; data++) { \
+ ch = *data; \
+ MASK |= (1UL << (ch & (BLOOM_WIDTH - 1))); \
+ } \
+ break; \
+ } while (0)
+
/* calculate simple bloom-style bitmask for a given unicode string */
BLOOM_MASK mask;
- Py_ssize_t i;
mask = 0;
- for (i = 0; i < len; i++)
- BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i));
-
+ switch (kind) {
+ case PyUnicode_1BYTE_KIND:
+ BLOOM_UPDATE(Py_UCS1, mask, ptr, len);
+ break;
+ case PyUnicode_2BYTE_KIND:
+ BLOOM_UPDATE(Py_UCS2, mask, ptr, len);
+ break;
+ case PyUnicode_4BYTE_KIND:
+ BLOOM_UPDATE(Py_UCS4, mask, ptr, len);
+ break;
+ default:
+ assert(0);
+ }
return mask;
-}
-#define BLOOM_MEMBER(mask, chr, str) \
- (BLOOM(mask, chr) \
- && (PyUnicode_FindChar(str, chr, 0, PyUnicode_GET_LENGTH(str), 1) >= 0))
+#undef BLOOM_UPDATE
+}
/* Compilation of templated routines */
@@ -588,6 +602,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
@@ -598,6 +613,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
@@ -608,6 +624,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
@@ -654,6 +671,25 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
}
}
+#ifdef Py_DEBUG
+/* Fill the data of an Unicode string with invalid characters to detect bugs
+ earlier.
+
+ _PyUnicode_CheckConsistency(str, 1) detects invalid characters, at least for
+ ASCII and UCS-4 strings. U+00FF is invalid in ASCII and U+FFFFFFFF is an
+ invalid character in Unicode 6.0. */
+static void
+unicode_fill_invalid(PyObject *unicode, Py_ssize_t old_length)
+{
+ int kind = PyUnicode_KIND(unicode);
+ Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
+ Py_ssize_t length = _PyUnicode_LENGTH(unicode);
+ if (length <= old_length)
+ return;
+ memset(data + old_length * kind, 0xff, (length - old_length) * kind);
+}
+#endif
+
static PyObject*
resize_compact(PyObject *unicode, Py_ssize_t length)
{
@@ -662,6 +698,10 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
Py_ssize_t new_size;
int share_wstr;
PyObject *new_unicode;
+#ifdef Py_DEBUG
+ Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
+
assert(unicode_modifiable(unicode));
assert(PyUnicode_IS_READY(unicode));
assert(PyUnicode_IS_COMPACT(unicode));
@@ -701,6 +741,9 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
PyObject_DEL(_PyUnicode_WSTR(unicode));
_PyUnicode_WSTR(unicode) = NULL;
}
+#ifdef Py_DEBUG
+ unicode_fill_invalid(unicode, old_length);
+#endif
PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
length, 0);
assert(_PyUnicode_CheckConsistency(unicode, 0));
@@ -719,6 +762,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
Py_ssize_t char_size;
int share_wstr, share_utf8;
void *data;
+#ifdef Py_DEBUG
+ Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
data = _PyUnicode_DATA_ANY(unicode);
char_size = PyUnicode_KIND(unicode);
@@ -754,6 +800,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
}
_PyUnicode_LENGTH(unicode) = length;
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
+#ifdef Py_DEBUG
+ unicode_fill_invalid(unicode, old_length);
+#endif
if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {
assert(_PyUnicode_CheckConsistency(unicode, 0));
return 0;
@@ -806,8 +855,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
return NULL;
copy_length = _PyUnicode_WSTR_LENGTH(unicode);
copy_length = Py_MIN(copy_length, length);
- Py_UNICODE_COPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),
- copy_length);
+ Py_MEMCPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),
+ copy_length * sizeof(wchar_t));
return w;
}
}
@@ -1078,11 +1127,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
}
}
#ifdef Py_DEBUG
- /* Fill the data with invalid characters to detect bugs earlier.
- _PyUnicode_CheckConsistency(str, 1) detects invalid characters,
- at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
- and U+FFFFFFFF is an invalid character in Unicode 6.0. */
- memset(data, 0xff, size * kind);
+ unicode_fill_invalid((PyObject*)unicode, 0);
#endif
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
return obj;
@@ -1644,38 +1689,6 @@ PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length)
return unicode_resize(p_unicode, length);
}
-static int
-unicode_widen(PyObject **p_unicode, Py_ssize_t length,
- unsigned int maxchar)
-{
- PyObject *result;
- assert(PyUnicode_IS_READY(*p_unicode));
- assert(length <= PyUnicode_GET_LENGTH(*p_unicode));
- if (maxchar <= PyUnicode_MAX_CHAR_VALUE(*p_unicode))
- return 0;
- result = PyUnicode_New(PyUnicode_GET_LENGTH(*p_unicode),
- maxchar);
- if (result == NULL)
- return -1;
- _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length);
- Py_DECREF(*p_unicode);
- *p_unicode = result;
- return 0;
-}
-
-static int
-unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos,
- Py_UCS4 ch)
-{
- assert(ch <= MAX_UNICODE);
- if (unicode_widen(p_unicode, *pos, ch) < 0)
- return -1;
- PyUnicode_WRITE(PyUnicode_KIND(*p_unicode),
- PyUnicode_DATA(*p_unicode),
- (*pos)++, ch);
- return 0;
-}
-
/* Copy a ASCII or latin1 char* string into a Python Unicode string.
WARNING: The function doesn't copy the terminating null character and
@@ -1692,6 +1705,14 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
switch (kind) {
case PyUnicode_1BYTE_KIND: {
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
+#ifdef Py_DEBUG
+ if (PyUnicode_IS_ASCII(unicode)) {
+ Py_UCS4 maxchar = ucs1lib_find_max_char(
+ (const Py_UCS1*)str,
+ (const Py_UCS1*)str + len);
+ assert(maxchar < 128);
+ }
+#endif
memcpy((char *) data + index, str, len);
break;
}
@@ -1937,13 +1958,17 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
assert(size > 0);
if (size == 1) {
Py_UCS4 ch = u[0];
+ int kind;
+ void *data;
if (ch < 256)
return get_latin1_char((unsigned char)ch);
res = PyUnicode_New(1, ch);
if (res == NULL)
return NULL;
- PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
+ kind = PyUnicode_KIND(res);
+ data = PyUnicode_DATA(res);
+ PyUnicode_WRITE(kind, data, 0, ch);
assert(_PyUnicode_CheckConsistency(res, 1));
return res;
}
@@ -1973,13 +1998,17 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
assert(size > 0);
if (size == 1) {
Py_UCS4 ch = u[0];
+ int kind;
+ void *data;
if (ch < 256)
return get_latin1_char((unsigned char)ch);
res = PyUnicode_New(1, ch);
if (res == NULL)
return NULL;
- PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
+ kind = PyUnicode_KIND(res);
+ data = PyUnicode_DATA(res);
+ PyUnicode_WRITE(kind, data, 0, ch);
assert(_PyUnicode_CheckConsistency(res, 1));
return res;
}
@@ -2278,16 +2307,9 @@ PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
static void
makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
- int zeropad, int width, int precision, char c)
+ char c)
{
*fmt++ = '%';
- if (width) {
- if (zeropad)
- *fmt++ = '0';
- fmt += sprintf(fmt, "%d", width);
- }
- if (precision)
- fmt += sprintf(fmt, ".%d", precision);
if (longflag)
*fmt++ = 'l';
else if (longlongflag) {
@@ -2312,44 +2334,139 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
*fmt = '\0';
}
-/* helper for PyUnicode_FromFormatV() */
+/* maximum number of characters required for output of %lld or %p.
+ We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
+ plus 1 for the sign. 53/22 is an upper bound for log10(256). */
+#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
+
+static int
+unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str,
+ Py_ssize_t width, Py_ssize_t precision)
+{
+ Py_ssize_t length, fill, arglen;
+ Py_UCS4 maxchar;
+
+ if (PyUnicode_READY(str) == -1)
+ return -1;
+
+ length = PyUnicode_GET_LENGTH(str);
+ if ((precision == -1 || precision >= length)
+ && width <= length)
+ return _PyUnicodeWriter_WriteStr(writer, str);
+
+ if (precision != -1)
+ length = Py_MIN(precision, length);
+
+ arglen = Py_MAX(length, width);
+ if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
+ maxchar = _PyUnicode_FindMaxChar(str, 0, length);
+ else
+ maxchar = writer->maxchar;
+
+ if (_PyUnicodeWriter_Prepare(writer, arglen, maxchar) == -1)
+ return -1;
+
+ if (width > length) {
+ fill = width - length;
+ if (PyUnicode_Fill(writer->buffer, writer->pos, fill, ' ') == -1)
+ return -1;
+ writer->pos += fill;
+ }
+
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ str, 0, length);
+ writer->pos += length;
+ return 0;
+}
+
+static int
+unicode_fromformat_write_cstr(_PyUnicodeWriter *writer, const char *str,
+ Py_ssize_t width, Py_ssize_t precision)
+{
+ /* UTF-8 */
+ Py_ssize_t length;
+ PyObject *unicode;
+ int res;
+
+ length = strlen(str);
+ if (precision != -1)
+ length = Py_MIN(length, precision);
+ unicode = PyUnicode_DecodeUTF8Stateful(str, length, "replace", NULL);
+ if (unicode == NULL)
+ return -1;
+
+ res = unicode_fromformat_write_str(writer, unicode, width, -1);
+ Py_DECREF(unicode);
+ return res;
+}
static const char*
-parse_format_flags(const char *f,
- int *p_width, int *p_precision,
- int *p_longflag, int *p_longlongflag, int *p_size_tflag)
+unicode_fromformat_arg(_PyUnicodeWriter *writer,
+ const char *f, va_list *vargs)
{
- int width, precision, longflag, longlongflag, size_tflag;
+ const char *p;
+ Py_ssize_t len;
+ int zeropad;
+ Py_ssize_t width;
+ Py_ssize_t precision;
+ int longflag;
+ int longlongflag;
+ int size_tflag;
+ Py_ssize_t fill;
- /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
+ p = f;
f++;
- width = 0;
- while (Py_ISDIGIT((unsigned)*f))
- width = (width*10) + *f++ - '0';
- precision = 0;
+ zeropad = 0;
+ if (*f == '0') {
+ zeropad = 1;
+ f++;
+ }
+
+ /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
+ width = -1;
+ if (Py_ISDIGIT((unsigned)*f)) {
+ width = *f - '0';
+ f++;
+ while (Py_ISDIGIT((unsigned)*f)) {
+ if (width > (PY_SSIZE_T_MAX - ((int)*f - '0')) / 10) {
+ PyErr_SetString(PyExc_ValueError,
+ "width too big");
+ return NULL;
+ }
+ width = (width * 10) + (*f - '0');
+ f++;
+ }
+ }
+ precision = -1;
if (*f == '.') {
f++;
- while (Py_ISDIGIT((unsigned)*f))
- precision = (precision*10) + *f++ - '0';
+ if (Py_ISDIGIT((unsigned)*f)) {
+ precision = (*f - '0');
+ f++;
+ while (Py_ISDIGIT((unsigned)*f)) {
+ if (precision > (PY_SSIZE_T_MAX - ((int)*f - '0')) / 10) {
+ PyErr_SetString(PyExc_ValueError,
+ "precision too big");
+ return NULL;
+ }
+ precision = (precision * 10) + (*f - '0');
+ f++;
+ }
+ }
if (*f == '%') {
/* "%.3%s" => f points to "3" */
f--;
}
}
if (*f == '\0') {
- /* bogus format "%.1" => go backward, f points to "1" */
+ /* bogus format "%.123" => go backward, f points to "3" */
f--;
}
- if (p_width != NULL)
- *p_width = width;
- if (p_precision != NULL)
- *p_precision = precision;
/* Handle %ld, %lu, %lld and %llu. */
longflag = 0;
longlongflag = 0;
size_tflag = 0;
-
if (*f == 'l') {
if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') {
longflag = 1;
@@ -2368,499 +2485,289 @@ parse_format_flags(const char *f,
size_tflag = 1;
++f;
}
- if (p_longflag != NULL)
- *p_longflag = longflag;
- if (p_longlongflag != NULL)
- *p_longlongflag = longlongflag;
- if (p_size_tflag != NULL)
- *p_size_tflag = size_tflag;
- return f;
-}
-/* maximum number of characters required for output of %ld. 21 characters
- allows for 64-bit integers (in decimal) and an optional sign. */
-#define MAX_LONG_CHARS 21
-/* maximum number of characters required for output of %lld.
- We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
- plus 1 for the sign. 53/22 is an upper bound for log10(256). */
-#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
+ if (f[1] == '\0')
+ writer->overallocate = 0;
-PyObject *
-PyUnicode_FromFormatV(const char *format, va_list vargs)
-{
- va_list count;
- Py_ssize_t callcount = 0;
- PyObject **callresults = NULL;
- PyObject **callresult = NULL;
- Py_ssize_t n = 0;
- int width = 0;
- int precision = 0;
- int zeropad;
- const char* f;
- PyObject *string;
- /* used by sprintf */
- char fmt[61]; /* should be enough for %0width.precisionlld */
- Py_UCS4 maxchar = 127; /* result is ASCII by default */
- Py_UCS4 argmaxchar;
- Py_ssize_t numbersize = 0;
- char *numberresults = NULL;
- char *numberresult = NULL;
- Py_ssize_t i;
- int kind;
- void *data;
+ switch (*f) {
+ case 'c':
+ {
+ int ordinal = va_arg(*vargs, int);
+ if (ordinal < 0 || ordinal > MAX_UNICODE) {
+ PyErr_SetString(PyExc_OverflowError,
+ "character argument not in range(0x110000)");
+ return NULL;
+ }
+ if (_PyUnicodeWriter_WriteCharInline(writer, ordinal) < 0)
+ return NULL;
+ break;
+ }
- Py_VA_COPY(count, vargs);
- /* step 1: count the number of %S/%R/%A/%s format specifications
- * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
- * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
- * result in an array)
- * also estimate a upper bound for all the number formats in the string,
- * numbers will be formatted in step 3 and be kept in a '\0'-separated
- * buffer before putting everything together. */
- for (f = format; *f; f++) {
- if (*f == '%') {
- int longlongflag;
- /* skip width or width.precision (eg. "1.2" of "%1.2f") */
- f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL);
- if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
- ++callcount;
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'x':
+ {
+ /* used by sprintf */
+ char fmt[10]; /* should be enough for "%0lld\0" */
+ char buffer[MAX_LONG_LONG_CHARS];
+ Py_ssize_t arglen;
+
+ if (*f == 'u') {
+ makefmt(fmt, longflag, longlongflag, size_tflag, *f);
- else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') {
+ if (longflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, unsigned long));
#ifdef HAVE_LONG_LONG
- if (longlongflag) {
- if (width < MAX_LONG_LONG_CHARS)
- width = MAX_LONG_LONG_CHARS;
- }
- else
+ else if (longlongflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, unsigned PY_LONG_LONG));
#endif
- /* MAX_LONG_CHARS is enough to hold a 64-bit integer,
- including sign. Decimal takes the most space. This
- isn't enough for octal. If a width is specified we
- need more (which we allocate later). */
- if (width < MAX_LONG_CHARS)
- width = MAX_LONG_CHARS;
-
- /* account for the size + '\0' to separate numbers
- inside of the numberresults buffer */
- numbersize += (width + 1);
- }
+ else if (size_tflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, size_t));
+ else
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, unsigned int));
+ }
+ else if (*f == 'x') {
+ makefmt(fmt, 0, 0, 0, 'x');
+ len = sprintf(buffer, fmt, va_arg(*vargs, int));
+ }
+ else {
+ makefmt(fmt, longflag, longlongflag, size_tflag, *f);
+
+ if (longflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, long));
+#ifdef HAVE_LONG_LONG
+ else if (longlongflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, PY_LONG_LONG));
+#endif
+ else if (size_tflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, Py_ssize_t));
+ else
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, int));
}
- else if ((unsigned char)*f > 127) {
- PyErr_Format(PyExc_ValueError,
- "PyUnicode_FromFormatV() expects an ASCII-encoded format "
- "string, got a non-ASCII byte: 0x%02x",
- (unsigned char)*f);
+ assert(len >= 0);
+
+ if (precision < len)
+ precision = len;
+
+ arglen = Py_MAX(precision, width);
+ assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127);
+ if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
return NULL;
+
+ if (width > precision) {
+ Py_UCS4 fillchar;
+ fill = width - precision;
+ fillchar = zeropad?'0':' ';
+ if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1)
+ return NULL;
+ writer->pos += fill;
}
+ if (precision > len) {
+ fill = precision - len;
+ if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1)
+ return NULL;
+ writer->pos += fill;
+ }
+
+ unicode_write_cstr(writer->buffer, writer->pos, buffer, len);
+ writer->pos += len;
+ break;
}
- /* step 2: allocate memory for the results of
- * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
- if (callcount) {
- callresults = PyObject_Malloc(sizeof(PyObject *) * callcount);
- if (!callresults) {
- PyErr_NoMemory();
+
+ case 'p':
+ {
+ char number[MAX_LONG_LONG_CHARS];
+
+ len = sprintf(number, "%p", va_arg(*vargs, void*));
+ assert(len >= 0);
+
+ /* %p is ill-defined: ensure leading 0x. */
+ if (number[1] == 'X')
+ number[1] = 'x';
+ else if (number[1] != 'x') {
+ memmove(number + 2, number,
+ strlen(number) + 1);
+ number[0] = '0';
+ number[1] = 'x';
+ len += 2;
+ }
+
+ assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127);
+ if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+ return NULL;
+ unicode_write_cstr(writer->buffer, writer->pos, number, len);
+ writer->pos += len;
+ break;
+ }
+
+ case 's':
+ {
+ /* UTF-8 */
+ const char *s = va_arg(*vargs, const char*);
+ if (unicode_fromformat_write_cstr(writer, s, width, precision) < 0)
+ return NULL;
+ break;
+ }
+
+ case 'U':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ assert(obj && _PyUnicode_CHECK(obj));
+
+ if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
return NULL;
+ break;
+ }
+
+ case 'V':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ const char *str = va_arg(*vargs, const char *);
+ if (obj) {
+ assert(_PyUnicode_CHECK(obj));
+ if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
+ return NULL;
+ }
+ else {
+ assert(str != NULL);
+ if (unicode_fromformat_write_cstr(writer, str, width, precision) < 0)
+ return NULL;
}
- callresult = callresults;
+ break;
}
- /* step 2.5: allocate memory for the results of formating numbers */
- if (numbersize) {
- numberresults = PyObject_Malloc(numbersize);
- if (!numberresults) {
- PyErr_NoMemory();
- goto fail;
+
+ case 'S':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ PyObject *str;
+ assert(obj);
+ str = PyObject_Str(obj);
+ if (!str)
+ return NULL;
+ if (unicode_fromformat_write_str(writer, str, width, precision) == -1) {
+ Py_DECREF(str);
+ return NULL;
}
- numberresult = numberresults;
+ Py_DECREF(str);
+ break;
}
- /* step 3: format numbers and figure out how large a buffer we need */
- for (f = format; *f; f++) {
- if (*f == '%') {
- const char* p;
- int longflag;
- int longlongflag;
- int size_tflag;
- int numprinted;
+ case 'R':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ PyObject *repr;
+ assert(obj);
+ repr = PyObject_Repr(obj);
+ if (!repr)
+ return NULL;
+ if (unicode_fromformat_write_str(writer, repr, width, precision) == -1) {
+ Py_DECREF(repr);
+ return NULL;
+ }
+ Py_DECREF(repr);
+ break;
+ }
- p = f;
- zeropad = (f[1] == '0');
- f = parse_format_flags(f, &width, &precision,
- &longflag, &longlongflag, &size_tflag);
- switch (*f) {
- case 'c':
- {
- int ordinal = va_arg(count, int);
- if (ordinal < 0 || ordinal > MAX_UNICODE) {
- PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x110000)");
- goto fail;
- }
- maxchar = Py_MAX(maxchar, (Py_UCS4)ordinal);
- n++;
- break;
- }
- case '%':
- n++;
- break;
- case 'i':
- case 'd':
- makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
- width, precision, *f);
- if (longflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, long));
-#ifdef HAVE_LONG_LONG
- else if (longlongflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, PY_LONG_LONG));
-#endif
- else if (size_tflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, Py_ssize_t));
- else
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, int));
- n += numprinted;
- /* advance by +1 to skip over the '\0' */
- numberresult += (numprinted + 1);
- assert(*(numberresult - 1) == '\0');
- assert(*(numberresult - 2) != '\0');
- assert(numprinted >= 0);
- assert(numberresult <= numberresults + numbersize);
- break;
- case 'u':
- makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
- width, precision, 'u');
- if (longflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, unsigned long));
-#ifdef HAVE_LONG_LONG
- else if (longlongflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, unsigned PY_LONG_LONG));
-#endif
- else if (size_tflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, size_t));
- else
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, unsigned int));
- n += numprinted;
- numberresult += (numprinted + 1);
- assert(*(numberresult - 1) == '\0');
- assert(*(numberresult - 2) != '\0');
- assert(numprinted >= 0);
- assert(numberresult <= numberresults + numbersize);
- break;
- case 'x':
- makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
- numprinted = sprintf(numberresult, fmt, va_arg(count, int));
- n += numprinted;
- numberresult += (numprinted + 1);
- assert(*(numberresult - 1) == '\0');
- assert(*(numberresult - 2) != '\0');
- assert(numprinted >= 0);
- assert(numberresult <= numberresults + numbersize);
- break;
- case 'p':
- numprinted = sprintf(numberresult, "%p", va_arg(count, void*));
- /* %p is ill-defined: ensure leading 0x. */
- if (numberresult[1] == 'X')
- numberresult[1] = 'x';
- else if (numberresult[1] != 'x') {
- memmove(numberresult + 2, numberresult,
- strlen(numberresult) + 1);
- numberresult[0] = '0';
- numberresult[1] = 'x';
- numprinted += 2;
- }
- n += numprinted;
- numberresult += (numprinted + 1);
- assert(*(numberresult - 1) == '\0');
- assert(*(numberresult - 2) != '\0');
- assert(numprinted >= 0);
- assert(numberresult <= numberresults + numbersize);
- break;
- case 's':
- {
- /* UTF-8 */
- const char *s = va_arg(count, const char*);
- PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL);
- if (!str)
- goto fail;
- /* since PyUnicode_DecodeUTF8 returns already flexible
- unicode objects, there is no need to call ready on them */
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(str);
- /* Remember the str and switch to the next slot */
- *callresult++ = str;
- break;
- }
- case 'U':
- {
- PyObject *obj = va_arg(count, PyObject *);
- assert(obj && _PyUnicode_CHECK(obj));
- if (PyUnicode_READY(obj) == -1)
- goto fail;
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(obj);
- break;
- }
- case 'V':
- {
- PyObject *obj = va_arg(count, PyObject *);
- const char *str = va_arg(count, const char *);
- PyObject *str_obj;
- assert(obj || str);
- assert(!obj || _PyUnicode_CHECK(obj));
- if (obj) {
- if (PyUnicode_READY(obj) == -1)
- goto fail;
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(obj);
- *callresult++ = NULL;
- }
- else {
- str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL);
- if (!str_obj)
- goto fail;
- if (PyUnicode_READY(str_obj) == -1) {
- Py_DECREF(str_obj);
- goto fail;
- }
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(str_obj);
- *callresult++ = str_obj;
- }
- break;
- }
- case 'S':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *str;
- assert(obj);
- str = PyObject_Str(obj);
- if (!str)
- goto fail;
- if (PyUnicode_READY(str) == -1) {
- Py_DECREF(str);
- goto fail;
- }
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(str);
- /* Remember the str and switch to the next slot */
- *callresult++ = str;
- break;
- }
- case 'R':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *repr;
- assert(obj);
- repr = PyObject_Repr(obj);
- if (!repr)
- goto fail;
- if (PyUnicode_READY(repr) == -1) {
- Py_DECREF(repr);
- goto fail;
- }
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(repr);
- /* Remember the repr and switch to the next slot */
- *callresult++ = repr;
- break;
- }
- case 'A':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *ascii;
- assert(obj);
- ascii = PyObject_ASCII(obj);
- if (!ascii)
- goto fail;
- if (PyUnicode_READY(ascii) == -1) {
- Py_DECREF(ascii);
- goto fail;
- }
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(ascii);
- /* Remember the repr and switch to the next slot */
- *callresult++ = ascii;
- break;
- }
- default:
- /* if we stumble upon an unknown
- formatting code, copy the rest of
- the format string to the output
- string. (we cannot just skip the
- code, since there's no way to know
- what's in the argument list) */
- n += strlen(p);
- goto expand;
- }
- } else
- n++;
- }
- expand:
- /* step 4: fill the buffer */
- /* Since we've analyzed how much space we need,
- we don't have to resize the string.
- There can be no errors beyond this point. */
- string = PyUnicode_New(n, maxchar);
- if (!string)
- goto fail;
- kind = PyUnicode_KIND(string);
- data = PyUnicode_DATA(string);
- callresult = callresults;
- numberresult = numberresults;
+ case 'A':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ PyObject *ascii;
+ assert(obj);
+ ascii = PyObject_ASCII(obj);
+ if (!ascii)
+ return NULL;
+ if (unicode_fromformat_write_str(writer, ascii, width, precision) == -1) {
+ Py_DECREF(ascii);
+ return NULL;
+ }
+ Py_DECREF(ascii);
+ break;
+ }
+
+ case '%':
+ if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
+ return NULL;
+ break;
+
+ default:
+ /* if we stumble upon an unknown formatting code, copy the rest
+ of the format string to the output string. (we cannot just
+ skip the code, since there's no way to know what's in the
+ argument list) */
+ len = strlen(p);
+ if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1)
+ return NULL;
+ f = p+len;
+ return f;
+ }
+
+ f++;
+ return f;
+}
+
+PyObject *
+PyUnicode_FromFormatV(const char *format, va_list vargs)
+{
+ va_list vargs2;
+ const char *f;
+ _PyUnicodeWriter writer;
- for (i = 0, f = format; *f; f++) {
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = strlen(format) + 100;
+ writer.overallocate = 1;
+
+ /* va_list may be an array (of 1 item) on some platforms (ex: AMD64).
+ Copy it to be able to pass a reference to a subfunction. */
+ Py_VA_COPY(vargs2, vargs);
+
+ for (f = format; *f; ) {
if (*f == '%') {
- const char* p;
+ f = unicode_fromformat_arg(&writer, f, &vargs2);
+ if (f == NULL)
+ goto fail;
+ }
+ else {
+ const char *p;
+ Py_ssize_t len;
p = f;
- f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL);
- /* checking for == because the last argument could be a empty
- string, which causes i to point to end, the assert at the end of
- the loop */
- assert(i <= PyUnicode_GET_LENGTH(string));
-
- switch (*f) {
- case 'c':
- {
- const int ordinal = va_arg(vargs, int);
- PyUnicode_WRITE(kind, data, i++, ordinal);
- break;
- }
- case 'i':
- case 'd':
- case 'u':
- case 'x':
- case 'p':
- {
- Py_ssize_t len;
- /* unused, since we already have the result */
- if (*f == 'p')
- (void) va_arg(vargs, void *);
- else
- (void) va_arg(vargs, int);
- /* extract the result from numberresults and append. */
- len = strlen(numberresult);
- unicode_write_cstr(string, i, numberresult, len);
- /* skip over the separating '\0' */
- i += len;
- numberresult += len;
- assert(*numberresult == '\0');
- numberresult++;
- assert(numberresult <= numberresults + numbersize);
- break;
- }
- case 's':
- {
- /* unused, since we already have the result */
- Py_ssize_t size;
- (void) va_arg(vargs, char *);
- size = PyUnicode_GET_LENGTH(*callresult);
- assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
- _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
- i += size;
- /* We're done with the unicode()/repr() => forget it */
- Py_DECREF(*callresult);
- /* switch to next unicode()/repr() result */
- ++callresult;
- break;
- }
- case 'U':
+ do
{
- PyObject *obj = va_arg(vargs, PyObject *);
- Py_ssize_t size;
- assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
- size = PyUnicode_GET_LENGTH(obj);
- _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
- i += size;
- break;
- }
- case 'V':
- {
- Py_ssize_t size;
- PyObject *obj = va_arg(vargs, PyObject *);
- va_arg(vargs, const char *);
- if (obj) {
- size = PyUnicode_GET_LENGTH(obj);
- assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
- _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
- i += size;
- } else {
- size = PyUnicode_GET_LENGTH(*callresult);
- assert(PyUnicode_KIND(*callresult) <=
- PyUnicode_KIND(string));
- _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
- i += size;
- Py_DECREF(*callresult);
+ if ((unsigned char)*p > 127) {
+ PyErr_Format(PyExc_ValueError,
+ "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+ "string, got a non-ASCII byte: 0x%02x",
+ (unsigned char)*p);
+ return NULL;
}
- ++callresult;
- break;
- }
- case 'S':
- case 'R':
- case 'A':
- {
- Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult);
- /* unused, since we already have the result */
- (void) va_arg(vargs, PyObject *);
- assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
- _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
- i += size;
- /* We're done with the unicode()/repr() => forget it */
- Py_DECREF(*callresult);
- /* switch to next unicode()/repr() result */
- ++callresult;
- break;
- }
- case '%':
- PyUnicode_WRITE(kind, data, i++, '%');
- break;
- default:
- {
- Py_ssize_t len = strlen(p);
- unicode_write_cstr(string, i, p, len);
- i += len;
- assert(i == PyUnicode_GET_LENGTH(string));
- goto end;
- }
+ p++;
}
- }
- else {
- assert(i < PyUnicode_GET_LENGTH(string));
- PyUnicode_WRITE(kind, data, i++, *f);
+ while (*p != '\0' && *p != '%');
+ len = p - f;
+
+ if (*p == '\0')
+ writer.overallocate = 0;
+ if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1)
+ goto fail;
+ unicode_write_cstr(writer.buffer, writer.pos, f, len);
+ writer.pos += len;
+
+ f = p;
}
}
- assert(i == PyUnicode_GET_LENGTH(string));
+ return _PyUnicodeWriter_Finish(&writer);
- end:
- if (callresults)
- PyObject_Free(callresults);
- if (numberresults)
- PyObject_Free(numberresults);
- return unicode_result(string);
fail:
- if (callresults) {
- PyObject **callresult2 = callresults;
- while (callresult2 < callresult) {
- Py_XDECREF(*callresult2);
- ++callresult2;
- }
- PyObject_Free(callresults);
- }
- if (numberresults)
- PyObject_Free(numberresults);
+ _PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
@@ -2968,6 +2875,9 @@ PyObject *
PyUnicode_FromOrdinal(int ordinal)
{
PyObject *v;
+ void *data;
+ int kind;
+
if (ordinal < 0 || ordinal > MAX_UNICODE) {
PyErr_SetString(PyExc_ValueError,
"chr() arg not in range(0x110000)");
@@ -2980,7 +2890,9 @@ PyUnicode_FromOrdinal(int ordinal)
v = PyUnicode_New(1, ordinal);
if (v == NULL)
return NULL;
- PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal);
+ kind = PyUnicode_KIND(v);
+ data = PyUnicode_DATA(v);
+ PyUnicode_WRITE(kind, data, 0, ordinal);
assert(_PyUnicode_CheckConsistency(v, 1));
return v;
}
@@ -3060,8 +2972,8 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
1 on success. */
int
_Py_normalize_encoding(const char *encoding,
- char *lower,
- size_t lower_len)
+ char *lower,
+ size_t lower_len)
{
const char *e;
char *l;
@@ -3343,7 +3255,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
}
if (surrogateescape) {
- /* locale encoding with surrogateescape */
+ /* "surrogateescape" error handler */
char *str;
str = _Py_wchar2char(wstr, &error_pos);
@@ -3363,6 +3275,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
PyMem_Free(str);
}
else {
+ /* strict mode */
size_t len, len2;
len = wcstombs(NULL, wstr, 0);
@@ -3401,7 +3314,7 @@ encode_error:
wstr = _Py_char2wchar(errmsg, &errlen);
if (wstr != NULL) {
reason = PyUnicode_FromWideChar(wstr, errlen);
- PyMem_Free(wstr);
+ PyMem_RawFree(wstr);
} else
errmsg = NULL;
}
@@ -3608,8 +3521,8 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
return NULL;
}
- if (surrogateescape)
- {
+ if (surrogateescape) {
+ /* "surrogateescape" error handler */
wstr = _Py_char2wchar(str, &wlen);
if (wstr == NULL) {
if (wlen == (size_t)-1)
@@ -3620,9 +3533,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
}
unicode = PyUnicode_FromWideChar(wstr, wlen);
- PyMem_Free(wstr);
+ PyMem_RawFree(wstr);
}
else {
+ /* strict mode */
#ifndef HAVE_BROKEN_MBSTOWCS
wlen = mbstowcs(NULL, str, 0);
#else
@@ -3642,7 +3556,6 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
return PyErr_NoMemory();
}
- /* This shouldn't fail now */
wlen2 = mbstowcs(wstr, str, wlen+1);
if (wlen2 == (size_t)-1) {
if (wstr != smallbuf)
@@ -3668,7 +3581,7 @@ decode_error:
wstr = _Py_char2wchar(errmsg, &errlen);
if (wstr != NULL) {
reason = PyUnicode_FromWideChar(wstr, errlen);
- PyMem_Free(wstr);
+ PyMem_RawFree(wstr);
} else
errmsg = NULL;
}
@@ -3736,18 +3649,20 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
int
-_PyUnicode_HasNULChars(PyObject* s)
+_PyUnicode_HasNULChars(PyObject* str)
{
- static PyObject *nul = NULL;
+ Py_ssize_t pos;
- if (nul == NULL)
- nul = PyUnicode_FromStringAndSize("\0", 1);
- if (nul == NULL)
+ if (PyUnicode_READY(str) == -1)
return -1;
- return PyUnicode_Contains(s, nul);
+ pos = findchar(PyUnicode_DATA(str), PyUnicode_KIND(str),
+ PyUnicode_GET_LENGTH(str), '\0', 1);
+ if (pos == -1)
+ return 0;
+ else
+ return 1;
}
-
int
PyUnicode_FSConverter(PyObject* arg, void* addr)
{
@@ -4019,6 +3934,9 @@ PyUnicode_GetLength(PyObject *unicode)
Py_UCS4
PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
{
+ void *data;
+ int kind;
+
if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) {
PyErr_BadArgument();
return (Py_UCS4)-1;
@@ -4027,7 +3945,9 @@ PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
PyErr_SetString(PyExc_IndexError, "string index out of range");
return (Py_UCS4)-1;
}
- return PyUnicode_READ_CHAR(unicode, index);
+ data = PyUnicode_DATA(unicode);
+ kind = PyUnicode_KIND(unicode);
+ return PyUnicode_READ(kind, data, index);
}
int
@@ -4086,6 +4006,7 @@ onError:
*exceptionObject = NULL;
}
+#ifdef HAVE_MBCS
/* error handling callback helper:
build arguments, call the callback and check the arguments,
if no exception occurred, copy the replacement to the output
@@ -4094,11 +4015,12 @@ onError:
*/
static int
-unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
- const char *encoding, const char *reason,
- const char **input, const char **inend, Py_ssize_t *startinpos,
- Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
- PyObject **output, Py_ssize_t *outpos)
+unicode_decode_call_errorhandler_wchar(
+ const char *errors, PyObject **errorHandler,
+ const char *encoding, const char *reason,
+ const char **input, const char **inend, Py_ssize_t *startinpos,
+ Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
+ PyObject **output, Py_ssize_t *outpos)
{
static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
@@ -4109,12 +4031,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
Py_ssize_t requiredsize;
Py_ssize_t newpos;
PyObject *inputobj = NULL;
- int res = -1;
+ wchar_t *repwstr;
+ Py_ssize_t repwlen;
- if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND)
- outsize = PyUnicode_GET_LENGTH(*output);
- else
- outsize = _PyUnicode_WSTR_LENGTH(*output);
+ assert (_PyUnicode_KIND(*output) == PyUnicode_WCHAR_KIND);
+ outsize = _PyUnicode_WSTR_LENGTH(*output);
if (*errorHandler == NULL) {
*errorHandler = PyCodec_LookupError(errors);
@@ -4139,8 +4060,6 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
}
if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
goto onError;
- if (PyUnicode_READY(repunicode) == -1)
- goto onError;
/* Copy back the bytes variables, which might have been modified by the
callback */
@@ -4164,54 +4083,118 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
goto onError;
}
- if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) {
- /* need more space? (at least enough for what we
- have+the replacement+the rest of the string (starting
- at the new input position), so we won't have to check space
- when there are no errors in the rest of the string) */
- Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
- requiredsize = *outpos + replen + insize-newpos;
- if (requiredsize > outsize) {
- if (requiredsize<2*outsize)
- requiredsize = 2*outsize;
- if (unicode_resize(output, requiredsize) < 0)
- goto onError;
- }
- if (unicode_widen(output, *outpos,
- PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
+ repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
+ if (repwstr == NULL)
+ goto onError;
+ /* need more space? (at least enough for what we
+ have+the replacement+the rest of the string (starting
+ at the new input position), so we won't have to check space
+ when there are no errors in the rest of the string) */
+ requiredsize = *outpos + repwlen + insize-newpos;
+ if (requiredsize > outsize) {
+ if (requiredsize < 2*outsize)
+ requiredsize = 2*outsize;
+ if (unicode_resize(output, requiredsize) < 0)
goto onError;
- _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen);
- *outpos += replen;
}
- else {
- wchar_t *repwstr;
- Py_ssize_t repwlen;
- repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
- if (repwstr == NULL)
+ wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
+ *outpos += repwlen;
+
+ *endinpos = newpos;
+ *inptr = *input + newpos;
+
+ /* we made it! */
+ Py_XDECREF(restuple);
+ return 0;
+
+ onError:
+ Py_XDECREF(restuple);
+ return -1;
+}
+#endif /* HAVE_MBCS */
+
+static int
+unicode_decode_call_errorhandler_writer(
+ const char *errors, PyObject **errorHandler,
+ const char *encoding, const char *reason,
+ const char **input, const char **inend, Py_ssize_t *startinpos,
+ Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
+ _PyUnicodeWriter *writer /* PyObject **output, Py_ssize_t *outpos */)
+{
+ static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
+
+ PyObject *restuple = NULL;
+ PyObject *repunicode = NULL;
+ Py_ssize_t insize;
+ Py_ssize_t newpos;
+ Py_ssize_t replen;
+ PyObject *inputobj = NULL;
+
+ if (*errorHandler == NULL) {
+ *errorHandler = PyCodec_LookupError(errors);
+ if (*errorHandler == NULL)
goto onError;
- /* need more space? (at least enough for what we
- have+the replacement+the rest of the string (starting
- at the new input position), so we won't have to check space
- when there are no errors in the rest of the string) */
- requiredsize = *outpos + repwlen + insize-newpos;
- if (requiredsize > outsize) {
- if (requiredsize < 2*outsize)
- requiredsize = 2*outsize;
- if (unicode_resize(output, requiredsize) < 0)
- goto onError;
- }
- wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
- *outpos += repwlen;
}
+
+ make_decode_exception(exceptionObject,
+ encoding,
+ *input, *inend - *input,
+ *startinpos, *endinpos,
+ reason);
+ if (*exceptionObject == NULL)
+ goto onError;
+
+ restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
+ if (restuple == NULL)
+ goto onError;
+ if (!PyTuple_Check(restuple)) {
+ PyErr_SetString(PyExc_TypeError, &argparse[4]);
+ goto onError;
+ }
+ if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
+ goto onError;
+
+ /* Copy back the bytes variables, which might have been modified by the
+ callback */
+ inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
+ if (!inputobj)
+ goto onError;
+ if (!PyBytes_Check(inputobj)) {
+ PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
+ }
+ *input = PyBytes_AS_STRING(inputobj);
+ insize = PyBytes_GET_SIZE(inputobj);
+ *inend = *input + insize;
+ /* we can DECREF safely, as the exception has another reference,
+ so the object won't go away. */
+ Py_DECREF(inputobj);
+
+ if (newpos<0)
+ newpos = insize+newpos;
+ if (newpos<0 || newpos>insize) {
+ PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
+ goto onError;
+ }
+
+ if (PyUnicode_READY(repunicode) < 0)
+ goto onError;
+ replen = PyUnicode_GET_LENGTH(repunicode);
+ writer->min_length += replen;
+ if (replen > 1)
+ writer->overallocate = 1;
+ if (_PyUnicodeWriter_WriteStr(writer, repunicode) == -1)
+ goto onError;
+
*endinpos = newpos;
*inptr = *input + newpos;
/* we made it! */
- res = 0;
+ Py_XDECREF(restuple);
+ return 0;
onError:
Py_XDECREF(restuple);
- return res;
+ return -1;
}
/* --- UTF-7 Codec -------------------------------------------------------- */
@@ -4319,9 +4302,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
- Py_ssize_t outpos;
const char *e;
- PyObject *unicode;
+ _PyUnicodeWriter writer;
const char *errmsg = "";
int inShift = 0;
Py_ssize_t shiftOutStart;
@@ -4331,17 +4313,17 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
- /* Start off assuming it's all ASCII. Widen later as necessary. */
- unicode = PyUnicode_New(size, 127);
- if (!unicode)
- return NULL;
if (size == 0) {
if (consumed)
*consumed = 0;
- return unicode;
+ _Py_RETURN_UNICODE_EMPTY();
}
- shiftOutStart = outpos = 0;
+ /* Start off assuming it's all ASCII. Widen later as necessary. */
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = size;
+
+ shiftOutStart = 0;
e = s + size;
while (s < e) {
@@ -4363,13 +4345,13 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
/* expecting a second surrogate */
if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
Py_UCS4 ch2 = Py_UNICODE_JOIN_SURROGATES(surrogate, outCh);
- if (unicode_putchar(&unicode, &outpos, ch2) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, ch2) < 0)
goto onError;
surrogate = 0;
continue;
}
else {
- if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0)
goto onError;
surrogate = 0;
}
@@ -4379,7 +4361,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
surrogate = outCh;
}
else {
- if (unicode_putchar(&unicode, &outpos, outCh) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, outCh) < 0)
goto onError;
}
}
@@ -4388,7 +4370,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
inShift = 0;
s++;
if (surrogate) {
- if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0)
goto onError;
surrogate = 0;
}
@@ -4409,7 +4391,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
if (ch != '-') {
/* '-' is absorbed; other terminating
characters are preserved */
- if (unicode_putchar(&unicode, &outpos, ch) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
goto onError;
}
}
@@ -4419,19 +4401,19 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
s++; /* consume '+' */
if (s < e && *s == '-') { /* '+-' encodes '+' */
s++;
- if (unicode_putchar(&unicode, &outpos, '+') < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
goto onError;
}
else { /* begin base64-encoded section */
inShift = 1;
- shiftOutStart = outpos;
+ shiftOutStart = writer.pos;
base64bits = 0;
}
}
else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
- if (unicode_putchar(&unicode, &outpos, ch) < 0)
- goto onError;
s++;
+ if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
+ goto onError;
}
else {
startinpos = s-starts;
@@ -4442,11 +4424,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
continue;
utf7Error:
endinpos = s-starts;
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"utf7", errmsg,
&starts, &e, &startinpos, &endinpos, &exc, &s,
- &unicode, &outpos))
+ &writer))
goto onError;
}
@@ -4458,11 +4440,11 @@ utf7Error:
(base64bits >= 6) ||
(base64bits > 0 && base64buffer != 0)) {
endinpos = size;
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"utf7", "unterminated shift sequence",
&starts, &e, &startinpos, &endinpos, &exc, &s,
- &unicode, &outpos))
+ &writer))
goto onError;
if (s < e)
goto restart;
@@ -4472,7 +4454,7 @@ utf7Error:
/* return state */
if (consumed) {
if (inShift) {
- outpos = shiftOutStart; /* back off output */
+ writer.pos = shiftOutStart; /* back off output */
*consumed = startinpos;
}
else {
@@ -4480,17 +4462,14 @@ utf7Error:
}
}
- if (unicode_resize(&unicode, outpos) < 0)
- goto onError;
-
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- return unicode_result(unicode);
+ return _PyUnicodeWriter_Finish(&writer);
onError:
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- Py_DECREF(unicode);
+ _PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
@@ -4573,7 +4552,7 @@ encode_char:
/* code first surrogate */
base64bits += 16;
- base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10);
+ base64buffer = (base64buffer << 16) | Py_UNICODE_HIGH_SURROGATE(ch);
while (base64bits >= 6) {
*out++ = TO_BASE64(base64buffer >> (base64bits-6));
base64bits -= 6;
@@ -4725,10 +4704,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
const char *errors,
Py_ssize_t *consumed)
{
- PyObject *unicode;
+ _PyUnicodeWriter writer;
const char *starts = s;
const char *end = s + size;
- Py_ssize_t outpos;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
@@ -4749,29 +4727,26 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
return get_latin1_char((unsigned char)s[0]);
}
- unicode = PyUnicode_New(size, 127);
- if (!unicode)
- return NULL;
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = size;
+ if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+ goto onError;
- outpos = ascii_decode(s, end, PyUnicode_1BYTE_DATA(unicode));
- s += outpos;
+ writer.pos = ascii_decode(s, end, writer.data);
+ s += writer.pos;
while (s < end) {
Py_UCS4 ch;
- int kind = PyUnicode_KIND(unicode);
+ int kind = writer.kind;
if (kind == PyUnicode_1BYTE_KIND) {
- if (PyUnicode_IS_ASCII(unicode))
- ch = asciilib_utf8_decode(&s, end,
- PyUnicode_1BYTE_DATA(unicode), &outpos);
+ if (PyUnicode_IS_ASCII(writer.buffer))
+ ch = asciilib_utf8_decode(&s, end, writer.data, &writer.pos);
else
- ch = ucs1lib_utf8_decode(&s, end,
- PyUnicode_1BYTE_DATA(unicode), &outpos);
+ ch = ucs1lib_utf8_decode(&s, end, writer.data, &writer.pos);
} else if (kind == PyUnicode_2BYTE_KIND) {
- ch = ucs2lib_utf8_decode(&s, end,
- PyUnicode_2BYTE_DATA(unicode), &outpos);
+ ch = ucs2lib_utf8_decode(&s, end, writer.data, &writer.pos);
} else {
assert(kind == PyUnicode_4BYTE_KIND);
- ch = ucs4lib_utf8_decode(&s, end,
- PyUnicode_4BYTE_DATA(unicode), &outpos);
+ ch = ucs4lib_utf8_decode(&s, end, writer.data, &writer.pos);
}
switch (ch) {
@@ -4795,35 +4770,31 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
endinpos = startinpos + ch - 1;
break;
default:
- if (unicode_putchar(&unicode, &outpos, ch) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
goto onError;
continue;
}
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"utf-8", errmsg,
&starts, &end, &startinpos, &endinpos, &exc, &s,
- &unicode, &outpos))
+ &writer))
goto onError;
}
End:
- if (unicode_resize(&unicode, outpos) < 0)
- goto onError;
-
if (consumed)
*consumed = s - starts;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- assert(_PyUnicode_CheckConsistency(unicode, 1));
- return unicode;
+ return _PyUnicodeWriter_Finish(&writer);
onError:
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- Py_XDECREF(unicode);
+ _PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
@@ -4833,7 +4804,7 @@ onError:
used to decode the command line arguments on Mac OS X.
Return a pointer to a newly allocated wide character string (use
- PyMem_Free() to free the memory), or NULL on memory allocation error. */
+ PyMem_RawFree() to free the memory), or NULL on memory allocation error. */
wchar_t*
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@@ -4846,7 +4817,7 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
character count */
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
return NULL;
- unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
+ unicode = PyMem_RawMalloc((size + 1) * sizeof(wchar_t));
if (!unicode)
return NULL;
@@ -4969,17 +4940,10 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
- Py_ssize_t outpos;
- PyObject *unicode;
+ _PyUnicodeWriter writer;
const unsigned char *q, *e;
- int bo = 0; /* assume native ordering by default */
+ int le, bo = 0; /* assume native ordering by default */
const char *errmsg = "";
- /* Offsets from q for retrieving bytes in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- int iorder[] = {0, 1, 2, 3};
-#else
- int iorder[] = {3, 2, 1, 0};
-#endif
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
@@ -4993,107 +4957,106 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
byte order setting accordingly. In native mode, the leading BOM
mark is skipped, in all other modes, it is copied to the output
stream as-is (giving a ZWNBSP character). */
- if (bo == 0) {
- if (size >= 4) {
- const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
- (q[iorder[1]] << 8) | q[iorder[0]];
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- if (bom == 0x0000FEFF) {
- q += 4;
- bo = -1;
- }
- else if (bom == 0xFFFE0000) {
- q += 4;
- bo = 1;
- }
-#else
- if (bom == 0x0000FEFF) {
- q += 4;
- bo = 1;
- }
- else if (bom == 0xFFFE0000) {
- q += 4;
- bo = -1;
- }
-#endif
+ if (bo == 0 && size >= 4) {
+ Py_UCS4 bom = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
+ if (bom == 0x0000FEFF) {
+ bo = -1;
+ q += 4;
+ }
+ else if (bom == 0xFFFE0000) {
+ bo = 1;
+ q += 4;
}
+ if (byteorder)
+ *byteorder = bo;
}
- if (bo == -1) {
- /* force LE */
- iorder[0] = 0;
- iorder[1] = 1;
- iorder[2] = 2;
- iorder[3] = 3;
- }
- else if (bo == 1) {
- /* force BE */
- iorder[0] = 3;
- iorder[1] = 2;
- iorder[2] = 1;
- iorder[3] = 0;
+ if (q == e) {
+ if (consumed)
+ *consumed = size;
+ _Py_RETURN_UNICODE_EMPTY();
}
- /* This might be one to much, because of a BOM */
- unicode = PyUnicode_New((size+3)/4, 127);
- if (!unicode)
- return NULL;
- if (size == 0)
- return unicode;
- outpos = 0;
+#ifdef WORDS_BIGENDIAN
+ le = bo < 0;
+#else
+ le = bo <= 0;
+#endif
- while (q < e) {
- Py_UCS4 ch;
- /* remaining bytes at the end? (size should be divisible by 4) */
- if (e-q<4) {
- if (consumed)
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = (e - q + 3) / 4;
+ if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+ goto onError;
+
+ while (1) {
+ Py_UCS4 ch = 0;
+ Py_UCS4 maxch = PyUnicode_MAX_CHAR_VALUE(writer.buffer);
+
+ if (e - q >= 4) {
+ enum PyUnicode_Kind kind = writer.kind;
+ void *data = writer.data;
+ const unsigned char *last = e - 4;
+ Py_ssize_t pos = writer.pos;
+ if (le) {
+ do {
+ ch = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
+ if (ch > maxch)
+ break;
+ PyUnicode_WRITE(kind, data, pos++, ch);
+ q += 4;
+ } while (q <= last);
+ }
+ else {
+ do {
+ ch = (q[0] << 24) | (q[1] << 16) | (q[2] << 8) | q[3];
+ if (ch > maxch)
+ break;
+ PyUnicode_WRITE(kind, data, pos++, ch);
+ q += 4;
+ } while (q <= last);
+ }
+ writer.pos = pos;
+ }
+
+ if (ch <= maxch) {
+ if (q == e || consumed)
break;
+ /* remaining bytes at the end? (size should be divisible by 4) */
errmsg = "truncated data";
- startinpos = ((const char *)q)-starts;
- endinpos = ((const char *)e)-starts;
- goto utf32Error;
- /* The remaining input chars are ignored if the callback
- chooses to skip the input */
+ startinpos = ((const char *)q) - starts;
+ endinpos = ((const char *)e) - starts;
}
- ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
- (q[iorder[1]] << 8) | q[iorder[0]];
-
- if (ch >= 0x110000)
- {
+ else {
+ if (ch < 0x110000) {
+ if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
+ goto onError;
+ q += 4;
+ continue;
+ }
errmsg = "codepoint not in range(0x110000)";
- startinpos = ((const char *)q)-starts;
- endinpos = startinpos+4;
- goto utf32Error;
+ startinpos = ((const char *)q) - starts;
+ endinpos = startinpos + 4;
}
- if (unicode_putchar(&unicode, &outpos, ch) < 0)
- goto onError;
- q += 4;
- continue;
- utf32Error:
- if (unicode_decode_call_errorhandler(
+
+ /* The remaining input chars are ignored if the callback
+ chooses to skip the input */
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"utf32", errmsg,
&starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
- &unicode, &outpos))
+ &writer))
goto onError;
}
- if (byteorder)
- *byteorder = bo;
-
if (consumed)
*consumed = (const char *)q-starts;
- /* Adjust length */
- if (unicode_resize(&unicode, outpos) < 0)
- goto onError;
-
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- return unicode_result(unicode);
+ return _PyUnicodeWriter_Finish(&writer);
onError:
- Py_DECREF(unicode);
+ _PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
@@ -5111,7 +5074,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
unsigned char *p;
Py_ssize_t nsize, i;
/* Offsets from p for storing byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
int iorder[] = {0, 1, 2, 3};
#else
int iorder[] = {3, 2, 1, 0};
@@ -5214,8 +5177,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
- Py_ssize_t outpos;
- PyObject *unicode;
+ _PyUnicodeWriter writer;
const unsigned char *q, *e;
int bo = 0; /* assume native ordering by default */
int native_ordering;
@@ -5253,7 +5215,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
_Py_RETURN_UNICODE_EMPTY();
}
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
native_ordering = bo <= 0;
#else
native_ordering = bo >= 0;
@@ -5261,32 +5223,32 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
/* Note: size will always be longer than the resulting Unicode
character count */
- unicode = PyUnicode_New((e - q + 1) / 2, 127);
- if (!unicode)
- return NULL;
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = (e - q + 1) / 2;
+ if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+ goto onError;
- outpos = 0;
while (1) {
Py_UCS4 ch = 0;
if (e - q >= 2) {
- int kind = PyUnicode_KIND(unicode);
+ int kind = writer.kind;
if (kind == PyUnicode_1BYTE_KIND) {
- if (PyUnicode_IS_ASCII(unicode))
+ if (PyUnicode_IS_ASCII(writer.buffer))
ch = asciilib_utf16_decode(&q, e,
- PyUnicode_1BYTE_DATA(unicode), &outpos,
+ (Py_UCS1*)writer.data, &writer.pos,
native_ordering);
else
ch = ucs1lib_utf16_decode(&q, e,
- PyUnicode_1BYTE_DATA(unicode), &outpos,
+ (Py_UCS1*)writer.data, &writer.pos,
native_ordering);
} else if (kind == PyUnicode_2BYTE_KIND) {
ch = ucs2lib_utf16_decode(&q, e,
- PyUnicode_2BYTE_DATA(unicode), &outpos,
+ (Py_UCS2*)writer.data, &writer.pos,
native_ordering);
} else {
assert(kind == PyUnicode_4BYTE_KIND);
ch = ucs4lib_utf16_decode(&q, e,
- PyUnicode_4BYTE_DATA(unicode), &outpos,
+ (Py_UCS4*)writer.data, &writer.pos,
native_ordering);
}
}
@@ -5322,12 +5284,12 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
endinpos = startinpos + 2;
break;
default:
- if (unicode_putchar(&unicode, &outpos, ch) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
goto onError;
continue;
}
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors,
&errorHandler,
"utf16", errmsg,
@@ -5337,8 +5299,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
&endinpos,
&exc,
(const char **)&q,
- &unicode,
- &outpos))
+ &writer))
goto onError;
}
@@ -5346,16 +5307,12 @@ End:
if (consumed)
*consumed = (const char *)q-starts;
- /* Adjust length */
- if (unicode_resize(&unicode, outpos) < 0)
- goto onError;
-
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- return unicode_result(unicode);
+ return _PyUnicodeWriter_Finish(&writer);
onError:
- Py_DECREF(unicode);
+ _PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
@@ -5373,7 +5330,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
unsigned short *out;
Py_ssize_t bytesize;
Py_ssize_t pairs;
-#ifdef WORDS_BIGENDIAN
+#if PY_BIG_ENDIAN
int native_ordering = byteorder >= 0;
#else
int native_ordering = byteorder <= 0;
@@ -5521,27 +5478,26 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
- PyObject *v;
+ _PyUnicodeWriter writer;
const char *end;
char* message;
Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
Py_ssize_t len;
- Py_ssize_t i;
len = length_of_escaped_ascii_string(s, size);
+ if (len == 0)
+ _Py_RETURN_UNICODE_EMPTY();
/* After length_of_escaped_ascii_string() there are two alternatives,
either the string is pure ASCII with named escapes like \n, etc.
and we determined it's exact size (common case)
or it contains \x, \u, ... escape sequences. then we create a
legacy wchar string and resize it at the end of this function. */
- if (len >= 0) {
- v = PyUnicode_New(len, 127);
- if (!v)
- goto onError;
- assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+ _PyUnicodeWriter_Init(&writer);
+ if (len > 0) {
+ writer.min_length = len;
}
else {
/* Escaped strings will always be longer than the resulting
@@ -5549,15 +5505,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
length after conversion to the true value.
(but if the error callback returns a long replacement string
we'll have to allocate more space) */
- v = PyUnicode_New(size, 127);
- if (!v)
- goto onError;
- len = size;
+ writer.min_length = size;
}
if (size == 0)
- return v;
- i = 0;
+ return _PyUnicodeWriter_Finish(&writer);
end = s + size;
while (s < end) {
@@ -5565,13 +5517,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
Py_UCS4 x;
int digits;
- /* The only case in which i == ascii_length is a backslash
- followed by a newline. */
- assert(i <= len);
-
/* Non-escape characters are interpreted as Unicode ordinals */
if (*s != '\\') {
- if (unicode_putchar(&v, &i, (unsigned char) *s++) < 0)
+ x = (unsigned char)*s;
+ s++;
+ if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
goto onError;
continue;
}
@@ -5583,18 +5533,14 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
if (s > end)
c = '\0'; /* Invalid after \ */
- /* The only case in which i == ascii_length is a backslash
- followed by a newline. */
- assert(i < len || (i == len && c == '\n'));
-
switch (c) {
/* \x escapes */
-#define WRITECHAR(ch) \
- do { \
- if (unicode_putchar(&v, &i, ch) < 0) \
- goto onError; \
- }while(0)
+#define WRITECHAR(ch) \
+ do { \
+ if (_PyUnicodeWriter_WriteCharInline(&writer, (ch)) < 0) \
+ goto onError; \
+ } while(0)
case '\n': break;
case '\\': WRITECHAR('\\'); break;
@@ -5718,35 +5664,32 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
error:
endinpos = s-starts;
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"unicodeescape", message,
&starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &i))
+ &writer))
goto onError;
- len = PyUnicode_GET_LENGTH(v);
continue;
}
#undef WRITECHAR
- if (unicode_resize(&v, i) < 0)
- goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- return unicode_result(v);
+ return _PyUnicodeWriter_Finish(&writer);
ucnhashError:
PyErr_SetString(
PyExc_UnicodeError,
"\\N escapes not supported (can't load unicodedata module)"
);
- Py_XDECREF(v);
+ _PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
onError:
- Py_XDECREF(v);
+ _PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
@@ -5899,23 +5842,22 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
- Py_ssize_t outpos;
- PyObject *v;
+ _PyUnicodeWriter writer;
const char *end;
const char *bs;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
+ if (size == 0)
+ _Py_RETURN_UNICODE_EMPTY();
+
/* Escaped strings will always be longer than the resulting
Unicode string, so we start with size here and then reduce the
length after conversion to the true value. (But decoding error
handler might have to resize the string) */
- v = PyUnicode_New(size, 127);
- if (v == NULL)
- goto onError;
- if (size == 0)
- return v;
- outpos = 0;
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = size;
+
end = s + size;
while (s < end) {
unsigned char c;
@@ -5925,7 +5867,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
/* Non-escape characters are interpreted as Unicode ordinals */
if (*s != '\\') {
- if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
+ x = (unsigned char)*s++;
+ if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
goto onError;
continue;
}
@@ -5937,7 +5880,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
for (;s < end;) {
if (*s != '\\')
break;
- if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
+ x = (unsigned char)*s++;
+ if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
goto onError;
}
if (((s - bs) & 1) == 0 ||
@@ -5945,7 +5889,7 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
(*s != 'u' && *s != 'U')) {
continue;
}
- outpos--;
+ writer.pos--;
count = *s=='u' ? 4 : 8;
s++;
@@ -5954,11 +5898,11 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
c = (unsigned char)*s;
if (!Py_ISXDIGIT(c)) {
endinpos = s-starts;
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"rawunicodeescape", "truncated \\uXXXX",
&starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos))
+ &writer))
goto onError;
goto nextByte;
}
@@ -5971,28 +5915,27 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
x += 10 + c - 'A';
}
if (x <= MAX_UNICODE) {
- if (unicode_putchar(&v, &outpos, x) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
goto onError;
- } else {
+ }
+ else {
endinpos = s-starts;
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"rawunicodeescape", "\\Uxxxxxxxx out of range",
&starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos))
+ &writer))
goto onError;
}
nextByte:
;
}
- if (unicode_resize(&v, outpos) < 0)
- goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- return unicode_result(v);
+ return _PyUnicodeWriter_Finish(&writer);
onError:
- Py_XDECREF(v);
+ _PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
@@ -6092,8 +6035,7 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
- Py_ssize_t outpos;
- PyObject *v;
+ _PyUnicodeWriter writer;
const char *end;
const char *reason;
PyObject *errorHandler = NULL;
@@ -6104,15 +6046,17 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
1))
return NULL;
- /* XXX overflow detection missing */
- v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127);
- if (v == NULL)
+ if (size == 0)
+ _Py_RETURN_UNICODE_EMPTY();
+
+ _PyUnicodeWriter_Init(&writer);
+ if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
+ PyErr_NoMemory();
goto onError;
- if (PyUnicode_GET_LENGTH(v) == 0)
- return v;
- outpos = 0;
- end = s + size;
+ }
+ writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
+ end = s + size;
while (s < end) {
Py_UNICODE uch;
Py_UCS4 ch;
@@ -6154,28 +6098,26 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
}
#endif
- if (unicode_putchar(&v, &outpos, ch) < 0)
+ if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
goto onError;
continue;
error:
startinpos = s - starts;
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"unicode_internal", reason,
&starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos))
+ &writer))
goto onError;
}
- if (unicode_resize(&v, outpos) < 0)
- goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- return unicode_result(v);
+ return _PyUnicodeWriter_Finish(&writer);
onError:
- Py_XDECREF(v);
+ _PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
@@ -6539,7 +6481,7 @@ PyUnicode_DecodeASCII(const char *s,
const char *errors)
{
const char *starts = s;
- PyObject *unicode;
+ _PyUnicodeWriter writer;
int kind;
void *data;
Py_ssize_t startinpos;
@@ -6556,46 +6498,46 @@ PyUnicode_DecodeASCII(const char *s,
if (size == 1 && (unsigned char)s[0] < 128)
return get_latin1_char((unsigned char)s[0]);
- unicode = PyUnicode_New(size, 127);
- if (unicode == NULL)
- goto onError;
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = size;
+ if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) < 0)
+ return NULL;
e = s + size;
- data = PyUnicode_1BYTE_DATA(unicode);
+ data = writer.data;
outpos = ascii_decode(s, e, (Py_UCS1 *)data);
- if (outpos == size)
- return unicode;
+ writer.pos = outpos;
+ if (writer.pos == size)
+ return _PyUnicodeWriter_Finish(&writer);
- s += outpos;
- kind = PyUnicode_1BYTE_KIND;
+ s += writer.pos;
+ kind = writer.kind;
while (s < e) {
register unsigned char c = (unsigned char)*s;
if (c < 128) {
- PyUnicode_WRITE(kind, data, outpos++, c);
+ PyUnicode_WRITE(kind, data, writer.pos, c);
+ writer.pos++;
++s;
}
else {
startinpos = s-starts;
endinpos = startinpos + 1;
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"ascii", "ordinal not in range(128)",
&starts, &e, &startinpos, &endinpos, &exc, &s,
- &unicode, &outpos))
+ &writer))
goto onError;
- kind = PyUnicode_KIND(unicode);
- data = PyUnicode_DATA(unicode);
+ kind = writer.kind;
+ data = writer.data;
}
}
- if (unicode_resize(&unicode, outpos) < 0)
- goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- assert(_PyUnicode_CheckConsistency(unicode, 1));
- return unicode;
+ return _PyUnicodeWriter_Finish(&writer);
onError:
- Py_XDECREF(unicode);
+ _PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
@@ -6627,7 +6569,7 @@ _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors)
return NULL;
/* Fast path: if it is an ASCII-only string, construct bytes object
directly. Else defer to above function to raise the exception. */
- if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
+ if (PyUnicode_IS_ASCII(unicode))
return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
PyUnicode_GET_LENGTH(unicode));
return unicode_encode_ucs1(unicode, errors, 128);
@@ -6705,8 +6647,8 @@ decode_code_page_flags(UINT code_page)
* Decode a byte string from a Windows code page into unicode object in strict
* mode.
*
- * Returns consumed size if succeed, returns -2 on decode error, or raise a
- * WindowsError and returns -1 on other error.
+ * Returns consumed size if succeed, returns -2 on decode error, or raise an
+ * OSError and returns -1 on other error.
*/
static int
decode_code_page_strict(UINT code_page,
@@ -6757,7 +6699,7 @@ error:
* Decode a byte string from a code page into unicode object with an error
* handler.
*
- * Returns consumed size if succeed, or raise a WindowsError or
+ * Returns consumed size if succeed, or raise an OSError or
* UnicodeDecodeError exception and returns -1 on error.
*/
static int
@@ -6776,7 +6718,8 @@ decode_code_page_errors(UINT code_page,
/* each step cannot decode more than 1 character, but a character can be
represented as a surrogate pair */
wchar_t buffer[2], *startout, *out;
- int insize, outsize;
+ int insize;
+ Py_ssize_t outsize;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *encoding_obj = NULL;
@@ -6856,7 +6799,7 @@ decode_code_page_errors(UINT code_page,
startinpos = in - startin;
endinpos = startinpos + 1;
outpos = out - PyUnicode_AS_UNICODE(*v);
- if (unicode_decode_call_errorhandler(
+ if (unicode_decode_call_errorhandler_wchar(
errors, &errorHandler,
encoding, reason,
&startin, &endin, &startinpos, &endinpos, &exc, &in,
@@ -7012,7 +6955,7 @@ encode_code_page_flags(UINT code_page, const char *errors)
* mode.
*
* Returns consumed characters if succeed, returns -2 on encode error, or raise
- * a WindowsError and returns -1 on other error.
+ * an OSError and returns -1 on other error.
*/
static int
encode_code_page_strict(UINT code_page, PyObject **outbytes,
@@ -7046,10 +6989,11 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
Py_DECREF(substring);
return -1;
}
+ assert(size <= INT_MAX);
/* First get the size of the result */
outsize = WideCharToMultiByte(code_page, flags,
- p, size,
+ p, (int)size,
NULL, 0,
NULL, pusedDefaultChar);
if (outsize <= 0)
@@ -7086,7 +7030,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
/* Do the conversion */
outsize = WideCharToMultiByte(code_page, flags,
- p, size,
+ p, (int)size,
out, outsize,
NULL, pusedDefaultChar);
Py_CLEAR(substring);
@@ -7108,7 +7052,7 @@ error:
* Encode a Unicode string to a Windows code page into a byte string using a
* error handler.
*
- * Returns consumed characters if succeed, or raise a WindowsError and returns
+ * Returns consumed characters if succeed, or raise an OSError and returns
* -1 on other error.
*/
static int
@@ -7194,9 +7138,8 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
charsize = 1;
}
else {
- ch -= 0x10000;
- chars[0] = 0xd800 + (ch >> 10);
- chars[1] = 0xdc00 + (ch & 0x3ff);
+ chars[0] = Py_UNICODE_HIGH_SURROGATE(ch);
+ chars[1] = Py_UNICODE_LOW_SURROGATE(ch);
charsize = 2;
}
@@ -7389,220 +7332,258 @@ PyUnicode_AsMBCSString(PyObject *unicode)
/* --- Character Mapping Codec -------------------------------------------- */
-PyObject *
-PyUnicode_DecodeCharmap(const char *s,
- Py_ssize_t size,
- PyObject *mapping,
- const char *errors)
+static int
+charmap_decode_string(const char *s,
+ Py_ssize_t size,
+ PyObject *mapping,
+ const char *errors,
+ _PyUnicodeWriter *writer)
{
const char *starts = s;
- Py_ssize_t startinpos;
- Py_ssize_t endinpos;
- Py_ssize_t outpos;
const char *e;
- PyObject *v;
- Py_ssize_t extrachars = 0;
- PyObject *errorHandler = NULL;
- PyObject *exc = NULL;
+ Py_ssize_t startinpos, endinpos;
+ PyObject *errorHandler = NULL, *exc = NULL;
+ Py_ssize_t maplen;
+ enum PyUnicode_Kind mapkind;
+ void *mapdata;
+ Py_UCS4 x;
+ unsigned char ch;
+
+ if (PyUnicode_READY(mapping) == -1)
+ return -1;
- /* Default to Latin-1 */
- if (mapping == NULL)
- return PyUnicode_DecodeLatin1(s, size, errors);
+ maplen = PyUnicode_GET_LENGTH(mapping);
+ mapdata = PyUnicode_DATA(mapping);
+ mapkind = PyUnicode_KIND(mapping);
- v = PyUnicode_New(size, 127);
- if (v == NULL)
- goto onError;
- if (size == 0)
- return v;
- outpos = 0;
e = s + size;
- if (PyUnicode_CheckExact(mapping)) {
- Py_ssize_t maplen;
- enum PyUnicode_Kind mapkind;
- void *mapdata;
- Py_UCS4 x;
- if (PyUnicode_READY(mapping) == -1)
- return NULL;
+ if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) {
+ /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1
+ * is disabled in encoding aliases, latin1 is preferred because
+ * its implementation is faster. */
+ Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata;
+ Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
+ Py_UCS4 maxchar = writer->maxchar;
- maplen = PyUnicode_GET_LENGTH(mapping);
- mapdata = PyUnicode_DATA(mapping);
- mapkind = PyUnicode_KIND(mapping);
+ assert (writer->kind == PyUnicode_1BYTE_KIND);
while (s < e) {
- unsigned char ch;
- if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
- enum PyUnicode_Kind outkind = PyUnicode_KIND(v);
- if (outkind == PyUnicode_1BYTE_KIND) {
- void *outdata = PyUnicode_DATA(v);
- Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(v);
- while (s < e) {
- unsigned char ch = *s;
- x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch);
- if (x > maxchar)
- goto Error;
- PyUnicode_WRITE(PyUnicode_1BYTE_KIND, outdata, outpos++, x);
- ++s;
- }
- break;
+ ch = *s;
+ x = mapdata_ucs1[ch];
+ if (x > maxchar) {
+ if (_PyUnicodeWriter_Prepare(writer, 1, 0xff) == -1)
+ goto onError;
+ maxchar = writer->maxchar;
+ outdata = (Py_UCS1 *)writer->data;
+ }
+ outdata[writer->pos] = x;
+ writer->pos++;
+ ++s;
+ }
+ return 0;
+ }
+
+ while (s < e) {
+ if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
+ enum PyUnicode_Kind outkind = writer->kind;
+ Py_UCS2 *mapdata_ucs2 = (Py_UCS2 *)mapdata;
+ if (outkind == PyUnicode_1BYTE_KIND) {
+ Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
+ Py_UCS4 maxchar = writer->maxchar;
+ while (s < e) {
+ ch = *s;
+ x = mapdata_ucs2[ch];
+ if (x > maxchar)
+ goto Error;
+ outdata[writer->pos] = x;
+ writer->pos++;
+ ++s;
}
- else if (outkind == PyUnicode_2BYTE_KIND) {
- void *outdata = PyUnicode_DATA(v);
- while (s < e) {
- unsigned char ch = *s;
- x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch);
- if (x == 0xFFFE)
- goto Error;
- PyUnicode_WRITE(PyUnicode_2BYTE_KIND, outdata, outpos++, x);
- ++s;
- }
- break;
+ break;
+ }
+ else if (outkind == PyUnicode_2BYTE_KIND) {
+ Py_UCS2 *outdata = (Py_UCS2 *)writer->data;
+ while (s < e) {
+ ch = *s;
+ x = mapdata_ucs2[ch];
+ if (x == 0xFFFE)
+ goto Error;
+ outdata[writer->pos] = x;
+ writer->pos++;
+ ++s;
}
+ break;
}
- ch = *s;
+ }
+ ch = *s;
- if (ch < maplen)
- x = PyUnicode_READ(mapkind, mapdata, ch);
- else
- x = 0xfffe; /* invalid value */
+ if (ch < maplen)
+ x = PyUnicode_READ(mapkind, mapdata, ch);
+ else
+ x = 0xfffe; /* invalid value */
Error:
- if (x == 0xfffe)
- {
- /* undefined mapping */
- startinpos = s-starts;
- endinpos = startinpos+1;
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "charmap", "character maps to <undefined>",
- &starts, &e, &startinpos, &endinpos, &exc, &s,
- &v, &outpos)) {
- goto onError;
- }
- continue;
+ if (x == 0xfffe)
+ {
+ /* undefined mapping */
+ startinpos = s-starts;
+ endinpos = startinpos+1;
+ if (unicode_decode_call_errorhandler_writer(
+ errors, &errorHandler,
+ "charmap", "character maps to <undefined>",
+ &starts, &e, &startinpos, &endinpos, &exc, &s,
+ writer)) {
+ goto onError;
}
+ continue;
+ }
+
+ if (_PyUnicodeWriter_WriteCharInline(writer, x) < 0)
+ goto onError;
+ ++s;
+ }
+ Py_XDECREF(errorHandler);
+ Py_XDECREF(exc);
+ return 0;
+
+onError:
+ Py_XDECREF(errorHandler);
+ Py_XDECREF(exc);
+ return -1;
+}
+
+static int
+charmap_decode_mapping(const char *s,
+ Py_ssize_t size,
+ PyObject *mapping,
+ const char *errors,
+ _PyUnicodeWriter *writer)
+{
+ const char *starts = s;
+ const char *e;
+ Py_ssize_t startinpos, endinpos;
+ PyObject *errorHandler = NULL, *exc = NULL;
+ unsigned char ch;
+ PyObject *key, *item = NULL;
+
+ e = s + size;
+
+ while (s < e) {
+ ch = *s;
- if (unicode_putchar(&v, &outpos, x) < 0)
+ /* Get mapping (char ordinal -> integer, Unicode char or None) */
+ key = PyLong_FromLong((long)ch);
+ if (key == NULL)
+ goto onError;
+
+ item = PyObject_GetItem(mapping, key);
+ Py_DECREF(key);
+ if (item == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+ /* No mapping found means: mapping is undefined. */
+ PyErr_Clear();
+ goto Undefined;
+ } else
goto onError;
- ++s;
}
- }
- else {
- while (s < e) {
- unsigned char ch = *s;
- PyObject *w, *x;
- /* Get mapping (char ordinal -> integer, Unicode char or None) */
- w = PyLong_FromLong((long)ch);
- if (w == NULL)
+ /* Apply mapping */
+ if (item == Py_None)
+ goto Undefined;
+ if (PyLong_Check(item)) {
+ long value = PyLong_AS_LONG(item);
+ if (value == 0xFFFE)
+ goto Undefined;
+ if (value < 0 || value > MAX_UNICODE) {
+ PyErr_Format(PyExc_TypeError,
+ "character mapping must be in range(0x%lx)",
+ (unsigned long)MAX_UNICODE + 1);
goto onError;
- x = PyObject_GetItem(mapping, w);
- Py_DECREF(w);
- if (x == NULL) {
- if (PyErr_ExceptionMatches(PyExc_LookupError)) {
- /* No mapping found means: mapping is undefined. */
- PyErr_Clear();
- goto Undefined;
- } else
- goto onError;
}
- /* Apply mapping */
- if (x == Py_None)
- goto Undefined;
- if (PyLong_Check(x)) {
- long value = PyLong_AS_LONG(x);
+ if (_PyUnicodeWriter_WriteCharInline(writer, value) < 0)
+ goto onError;
+ }
+ else if (PyUnicode_Check(item)) {
+ if (PyUnicode_READY(item) == -1)
+ goto onError;
+ if (PyUnicode_GET_LENGTH(item) == 1) {
+ Py_UCS4 value = PyUnicode_READ_CHAR(item, 0);
if (value == 0xFFFE)
goto Undefined;
- if (value < 0 || value > MAX_UNICODE) {
- PyErr_Format(PyExc_TypeError,
- "character mapping must be in range(0x%lx)",
- (unsigned long)MAX_UNICODE + 1);
- Py_DECREF(x);
- goto onError;
- }
- if (unicode_putchar(&v, &outpos, value) < 0) {
- Py_DECREF(x);
+ if (_PyUnicodeWriter_WriteCharInline(writer, value) < 0)
goto onError;
- }
- }
- else if (PyUnicode_Check(x)) {
- Py_ssize_t targetsize;
-
- if (PyUnicode_READY(x) == -1) {
- Py_DECREF(x);
- goto onError;
- }
- targetsize = PyUnicode_GET_LENGTH(x);
-
- if (targetsize == 1) {
- /* 1-1 mapping */
- Py_UCS4 value = PyUnicode_READ_CHAR(x, 0);
- if (value == 0xFFFE)
- goto Undefined;
- if (unicode_putchar(&v, &outpos, value) < 0) {
- Py_DECREF(x);
- goto onError;
- }
- }
- else if (targetsize > 1) {
- /* 1-n mapping */
- if (targetsize > extrachars) {
- /* resize first */
- Py_ssize_t needed = (targetsize - extrachars) + \
- (targetsize << 2);
- extrachars += needed;
- /* XXX overflow detection missing */
- if (unicode_resize(&v,
- PyUnicode_GET_LENGTH(v) + needed) < 0)
- {
- Py_DECREF(x);
- goto onError;
- }
- }
- if (unicode_widen(&v, outpos,
- PyUnicode_MAX_CHAR_VALUE(x)) < 0) {
- Py_DECREF(x);
- goto onError;
- }
- PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize);
- outpos += targetsize;
- extrachars -= targetsize;
- }
- /* 1-0 mapping: skip the character */
}
else {
- /* wrong return value */
- PyErr_SetString(PyExc_TypeError,
- "character mapping must return integer, None or str");
- Py_DECREF(x);
- goto onError;
+ writer->overallocate = 1;
+ if (_PyUnicodeWriter_WriteStr(writer, item) == -1)
+ goto onError;
}
- Py_DECREF(x);
- ++s;
- continue;
+ }
+ else {
+ /* wrong return value */
+ PyErr_SetString(PyExc_TypeError,
+ "character mapping must return integer, None or str");
+ goto onError;
+ }
+ Py_CLEAR(item);
+ ++s;
+ continue;
+
Undefined:
- /* undefined mapping */
- Py_XDECREF(x);
- startinpos = s-starts;
- endinpos = startinpos+1;
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "charmap", "character maps to <undefined>",
- &starts, &e, &startinpos, &endinpos, &exc, &s,
- &v, &outpos)) {
- goto onError;
- }
+ /* undefined mapping */
+ Py_CLEAR(item);
+ startinpos = s-starts;
+ endinpos = startinpos+1;
+ if (unicode_decode_call_errorhandler_writer(
+ errors, &errorHandler,
+ "charmap", "character maps to <undefined>",
+ &starts, &e, &startinpos, &endinpos, &exc, &s,
+ writer)) {
+ goto onError;
}
}
- if (unicode_resize(&v, outpos) < 0)
- goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- return unicode_result(v);
+ return 0;
- onError:
+onError:
+ Py_XDECREF(item);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- Py_XDECREF(v);
+ return -1;
+}
+
+PyObject *
+PyUnicode_DecodeCharmap(const char *s,
+ Py_ssize_t size,
+ PyObject *mapping,
+ const char *errors)
+{
+ _PyUnicodeWriter writer;
+
+ /* Default to Latin-1 */
+ if (mapping == NULL)
+ return PyUnicode_DecodeLatin1(s, size, errors);
+
+ if (size == 0)
+ _Py_RETURN_UNICODE_EMPTY();
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = size;
+ if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+ goto onError;
+
+ if (PyUnicode_CheckExact(mapping)) {
+ if (charmap_decode_string(s, size, mapping, errors, &writer) < 0)
+ goto onError;
+ }
+ else {
+ if (charmap_decode_mapping(s, size, mapping, errors, &writer) < 0)
+ goto onError;
+ }
+ return _PyUnicodeWriter_Finish(&writer);
+
+ onError:
+ _PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
@@ -8116,10 +8097,14 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
* -1=not initialized, 0=unknown, 1=strict, 2=replace,
* 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1;
+ void *data;
+ int kind;
if (PyUnicode_READY(unicode) == -1)
return NULL;
size = PyUnicode_GET_LENGTH(unicode);
+ data = PyUnicode_DATA(unicode);
+ kind = PyUnicode_KIND(unicode);
/* Default to Latin-1 */
if (mapping == NULL)
@@ -8134,7 +8119,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
return res;
while (inpos<size) {
- Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos);
+ Py_UCS4 ch = PyUnicode_READ(kind, data, inpos);
/* try to encode it */
charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
if (x==enc_EXCEPTION) /* error */
@@ -8220,19 +8205,6 @@ make_translate_exception(PyObject **exceptionObject,
}
}
-/* raises a UnicodeTranslateError */
-static void
-raise_translate_exception(PyObject **exceptionObject,
- PyObject *unicode,
- Py_ssize_t startpos, Py_ssize_t endpos,
- const char *reason)
-{
- make_translate_exception(exceptionObject,
- unicode, startpos, endpos, reason);
- if (*exceptionObject != NULL)
- PyCodec_StrictErrors(*exceptionObject);
-}
-
/* error handling callback helper:
build arguments, call the callback and check the arguments,
put the result into newpos and return the replacement string, which
@@ -8508,8 +8480,10 @@ _PyUnicode_TranslateCharmap(PyObject *input,
}
switch (known_errorHandler) {
case 1: /* strict */
- raise_translate_exception(&exc, input, collstart,
- collend, reason);
+ make_translate_exception(&exc,
+ input, collstart, collend, reason);
+ if (exc != NULL)
+ PyCodec_StrictErrors(exc);
goto onError;
case 2: /* replace */
/* No need to check for space, this is a 1:1 replacement */
@@ -9101,7 +9075,7 @@ tailmatch(PyObject *self,
if (PyUnicode_READY(self) == -1 ||
PyUnicode_READY(substring) == -1)
- return 0;
+ return -1;
if (PyUnicode_GET_LENGTH(substring) == 0)
return 1;
@@ -9139,7 +9113,6 @@ tailmatch(PyObject *self,
/* We do not need to compare 0 and len(substring)-1 because
the if statement above ensured already that they are equal
when we end up here. */
- /* TODO: honor direction and do a forward or backwards search */
for (i = 1; i < end_sub; ++i) {
if (PyUnicode_READ(kind_self, data_self, offset + i) !=
PyUnicode_READ(kind_sub, data_sub, i))
@@ -9601,41 +9574,49 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
sep_data = PyUnicode_1BYTE_DATA(sep);
}
#endif
- for (i = 0, res_offset = 0; i < seqlen; ++i) {
- Py_ssize_t itemlen;
- item = items[i];
- /* Copy item, and maybe the separator. */
- if (i && seplen != 0) {
- if (use_memcpy) {
+ if (use_memcpy) {
+ for (i = 0; i < seqlen; ++i) {
+ Py_ssize_t itemlen;
+ item = items[i];
+
+ /* Copy item, and maybe the separator. */
+ if (i && seplen != 0) {
Py_MEMCPY(res_data,
sep_data,
kind * seplen);
res_data += kind * seplen;
}
- else {
- _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
- res_offset += seplen;
- }
- }
- itemlen = PyUnicode_GET_LENGTH(item);
- if (itemlen != 0) {
- if (use_memcpy) {
+
+ itemlen = PyUnicode_GET_LENGTH(item);
+ if (itemlen != 0) {
Py_MEMCPY(res_data,
PyUnicode_DATA(item),
kind * itemlen);
res_data += kind * itemlen;
}
- else {
+ }
+ assert(res_data == PyUnicode_1BYTE_DATA(res)
+ + kind * PyUnicode_GET_LENGTH(res));
+ }
+ else {
+ for (i = 0, res_offset = 0; i < seqlen; ++i) {
+ Py_ssize_t itemlen;
+ item = items[i];
+
+ /* Copy item, and maybe the separator. */
+ if (i && seplen != 0) {
+ _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
+ res_offset += seplen;
+ }
+
+ itemlen = PyUnicode_GET_LENGTH(item);
+ if (itemlen != 0) {
_PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
res_offset += itemlen;
}
}
- }
- if (use_memcpy)
- assert(res_data == PyUnicode_1BYTE_DATA(res)
- + kind * PyUnicode_GET_LENGTH(res));
- else
assert(res_offset == PyUnicode_GET_LENGTH(res));
+ }
Py_DECREF(fseq);
Py_XDECREF(sep);
@@ -10027,6 +10008,31 @@ anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen,
return 0;
}
+static void
+replace_1char_inplace(PyObject *u, Py_ssize_t pos,
+ Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+ int kind = PyUnicode_KIND(u);
+ void *data = PyUnicode_DATA(u);
+ Py_ssize_t len = PyUnicode_GET_LENGTH(u);
+ if (kind == PyUnicode_1BYTE_KIND) {
+ ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos,
+ (Py_UCS1 *)data + len,
+ u1, u2, maxcount);
+ }
+ else if (kind == PyUnicode_2BYTE_KIND) {
+ ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos,
+ (Py_UCS2 *)data + len,
+ u1, u2, maxcount);
+ }
+ else {
+ assert(kind == PyUnicode_4BYTE_KIND);
+ ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos,
+ (Py_UCS4 *)data + len,
+ u1, u2, maxcount);
+ }
+}
+
static PyObject *
replace(PyObject *self, PyObject *str1,
PyObject *str2, Py_ssize_t maxcount)
@@ -10043,7 +10049,7 @@ replace(PyObject *self, PyObject *str1,
Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1);
Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2);
int mayshrink;
- Py_UCS4 maxchar, maxchar_str2;
+ Py_UCS4 maxchar, maxchar_str1, maxchar_str2;
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
@@ -10052,15 +10058,16 @@ replace(PyObject *self, PyObject *str1,
if (str1 == str2)
goto nothing;
- if (skind < kind1)
- /* substring too wide to be present */
- goto nothing;
maxchar = PyUnicode_MAX_CHAR_VALUE(self);
+ maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1);
+ if (maxchar < maxchar_str1)
+ /* substring too wide to be present */
+ goto nothing;
maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2);
/* Replacing str1 with str2 may cause a maxchar reduction in the
result string. */
- mayshrink = (maxchar_str2 < maxchar);
+ mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1);
maxchar = Py_MAX(maxchar, maxchar_str2);
if (len1 == len2) {
@@ -10070,35 +10077,19 @@ replace(PyObject *self, PyObject *str1,
if (len1 == 1) {
/* replace characters */
Py_UCS4 u1, u2;
- int rkind;
- Py_ssize_t index, pos;
- char *src;
+ Py_ssize_t pos;
- u1 = PyUnicode_READ_CHAR(str1, 0);
- pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1);
+ u1 = PyUnicode_READ(kind1, buf1, 0);
+ pos = findchar(sbuf, skind, slen, u1, 1);
if (pos < 0)
goto nothing;
- u2 = PyUnicode_READ_CHAR(str2, 0);
+ u2 = PyUnicode_READ(kind2, buf2, 0);
u = PyUnicode_New(slen, maxchar);
if (!u)
goto error;
- _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
- rkind = PyUnicode_KIND(u);
- PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2);
- index = 0;
- src = sbuf;
- while (--maxcount)
- {
- pos++;
- src += pos * PyUnicode_KIND(self);
- slen -= pos;
- index += pos;
- pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1);
- if (pos < 0)
- break;
- PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2);
- }
+ _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
+ replace_1char_inplace(u, pos, u1, u2, maxcount);
}
else {
int rkind = skind;
@@ -10410,9 +10401,28 @@ unicode_center(PyObject *self, PyObject *args)
static int
unicode_compare(PyObject *str1, PyObject *str2)
{
+#define COMPARE(TYPE1, TYPE2) \
+ do { \
+ TYPE1* p1 = (TYPE1 *)data1; \
+ TYPE2* p2 = (TYPE2 *)data2; \
+ TYPE1* end = p1 + len; \
+ Py_UCS4 c1, c2; \
+ for (; p1 != end; p1++, p2++) { \
+ c1 = *p1; \
+ c2 = *p2; \
+ if (c1 != c2) \
+ return (c1 < c2) ? -1 : 1; \
+ } \
+ } \
+ while (0)
+
int kind1, kind2;
void *data1, *data2;
- Py_ssize_t len1, len2, i;
+ Py_ssize_t len1, len2, len;
+
+ /* a string is equal to itself */
+ if (str1 == str2)
+ return 0;
kind1 = PyUnicode_KIND(str1);
kind2 = PyUnicode_KIND(str2);
@@ -10420,19 +10430,120 @@ unicode_compare(PyObject *str1, PyObject *str2)
data2 = PyUnicode_DATA(str2);
len1 = PyUnicode_GET_LENGTH(str1);
len2 = PyUnicode_GET_LENGTH(str2);
+ len = Py_MIN(len1, len2);
- for (i = 0; i < len1 && i < len2; ++i) {
- Py_UCS4 c1, c2;
- c1 = PyUnicode_READ(kind1, data1, i);
- c2 = PyUnicode_READ(kind2, data2, i);
-
- if (c1 != c2)
- return (c1 < c2) ? -1 : 1;
+ switch(kind1) {
+ case PyUnicode_1BYTE_KIND:
+ {
+ switch(kind2) {
+ case PyUnicode_1BYTE_KIND:
+ {
+ int cmp = memcmp(data1, data2, len);
+ /* normalize result of memcmp() into the range [-1; 1] */
+ if (cmp < 0)
+ return -1;
+ if (cmp > 0)
+ return 1;
+ break;
+ }
+ case PyUnicode_2BYTE_KIND:
+ COMPARE(Py_UCS1, Py_UCS2);
+ break;
+ case PyUnicode_4BYTE_KIND:
+ COMPARE(Py_UCS1, Py_UCS4);
+ break;
+ default:
+ assert(0);
+ }
+ break;
}
+ case PyUnicode_2BYTE_KIND:
+ {
+ switch(kind2) {
+ case PyUnicode_1BYTE_KIND:
+ COMPARE(Py_UCS2, Py_UCS1);
+ break;
+ case PyUnicode_2BYTE_KIND:
+ {
+ COMPARE(Py_UCS2, Py_UCS2);
+ break;
+ }
+ case PyUnicode_4BYTE_KIND:
+ COMPARE(Py_UCS2, Py_UCS4);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ }
+ case PyUnicode_4BYTE_KIND:
+ {
+ switch(kind2) {
+ case PyUnicode_1BYTE_KIND:
+ COMPARE(Py_UCS4, Py_UCS1);
+ break;
+ case PyUnicode_2BYTE_KIND:
+ COMPARE(Py_UCS4, Py_UCS2);
+ break;
+ case PyUnicode_4BYTE_KIND:
+ {
+#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 4
+ int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len);
+ /* normalize result of wmemcmp() into the range [-1; 1] */
+ if (cmp < 0)
+ return -1;
+ if (cmp > 0)
+ return 1;
+#else
+ COMPARE(Py_UCS4, Py_UCS4);
+#endif
+ break;
+ }
+ default:
+ assert(0);
+ }
+ break;
+ }
+ default:
+ assert(0);
+ }
+
+ if (len1 == len2)
+ return 0;
+ if (len1 < len2)
+ return -1;
+ else
+ return 1;
+
+#undef COMPARE
+}
+
+static int
+unicode_compare_eq(PyObject *str1, PyObject *str2)
+{
+ int kind;
+ void *data1, *data2;
+ Py_ssize_t len;
+ int cmp;
+
+ /* a string is equal to itself */
+ if (str1 == str2)
+ return 1;
+
+ len = PyUnicode_GET_LENGTH(str1);
+ if (PyUnicode_GET_LENGTH(str2) != len)
+ return 0;
+ kind = PyUnicode_KIND(str1);
+ if (PyUnicode_KIND(str2) != kind)
+ return 0;
+ data1 = PyUnicode_DATA(str1);
+ data2 = PyUnicode_DATA(str2);
- return (len1 < len2) ? -1 : (len1 != len2);
+ cmp = memcmp(data1, data2, len * kind);
+ return (cmp == 0);
}
+
int
PyUnicode_Compare(PyObject *left, PyObject *right)
{
@@ -10483,36 +10594,27 @@ PyObject *
PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
{
int result;
+ PyObject *v;
- if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
- PyObject *v;
- if (PyUnicode_READY(left) == -1 ||
- PyUnicode_READY(right) == -1)
- return NULL;
- if (PyUnicode_GET_LENGTH(left) != PyUnicode_GET_LENGTH(right) ||
- PyUnicode_KIND(left) != PyUnicode_KIND(right)) {
- if (op == Py_EQ) {
- Py_INCREF(Py_False);
- return Py_False;
- }
- if (op == Py_NE) {
- Py_INCREF(Py_True);
- return Py_True;
- }
- }
- if (left == right)
- result = 0;
+ if (!PyUnicode_Check(left) || !PyUnicode_Check(right))
+ Py_RETURN_NOTIMPLEMENTED;
+
+ if (PyUnicode_READY(left) == -1 ||
+ PyUnicode_READY(right) == -1)
+ return NULL;
+
+ if (op == Py_EQ || op == Py_NE) {
+ result = unicode_compare_eq(left, right);
+ if (op == Py_EQ)
+ v = TEST_COND(result);
else
- result = unicode_compare(left, right);
+ v = TEST_COND(!result);
+ }
+ else {
+ result = unicode_compare(left, right);
/* Convert the return value to a Boolean */
switch (op) {
- case Py_EQ:
- v = TEST_COND(result == 0);
- break;
- case Py_NE:
- v = TEST_COND(result != 0);
- break;
case Py_LE:
v = TEST_COND(result <= 0);
break;
@@ -10529,18 +10631,16 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
PyErr_BadArgument();
return NULL;
}
- Py_INCREF(v);
- return v;
}
-
- Py_RETURN_NOTIMPLEMENTED;
+ Py_INCREF(v);
+ return v;
}
int
PyUnicode_Contains(PyObject *container, PyObject *element)
{
PyObject *str, *sub;
- int kind1, kind2, kind;
+ int kind1, kind2;
void *buf1, *buf2;
Py_ssize_t len1, len2;
int result;
@@ -10559,23 +10659,18 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
Py_DECREF(sub);
return -1;
}
- if (PyUnicode_READY(sub) == -1 || PyUnicode_READY(str) == -1) {
- Py_DECREF(sub);
- Py_DECREF(str);
- }
kind1 = PyUnicode_KIND(str);
kind2 = PyUnicode_KIND(sub);
- kind = kind1;
buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(sub);
- if (kind2 != kind) {
- if (kind2 > kind) {
+ if (kind2 != kind1) {
+ if (kind2 > kind1) {
Py_DECREF(sub);
Py_DECREF(str);
return 0;
}
- buf2 = _PyUnicode_AsKind(sub, kind);
+ buf2 = _PyUnicode_AsKind(sub, kind1);
}
if (!buf2) {
Py_DECREF(sub);
@@ -10585,7 +10680,7 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
len1 = PyUnicode_GET_LENGTH(str);
len2 = PyUnicode_GET_LENGTH(sub);
- switch (kind) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
result = ucs1lib_find(buf1, len1, buf2, len2, 0) != -1;
break;
@@ -10603,7 +10698,7 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
Py_DECREF(str);
Py_DECREF(sub);
- if (kind2 != kind)
+ if (kind2 != kind1)
PyMem_Free(buf2);
return result;
@@ -10679,7 +10774,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
return;
}
left = *p_left;
- if (right == NULL || left == NULL || !PyUnicode_Check(left)) {
+ if (right == NULL || left == NULL
+ || !PyUnicode_Check(left) || !PyUnicode_Check(right)) {
if (!PyErr_Occurred())
PyErr_BadInternalCall();
goto error;
@@ -10719,15 +10815,9 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
&& !(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right)))
{
/* append inplace */
- if (unicode_resize(p_left, new_len) != 0) {
- /* XXX if _PyUnicode_Resize() fails, 'left' has been
- * deallocated so it cannot be put back into
- * 'variable'. The MemoryError is raised when there
- * is no value in 'variable', which might (very
- * remotely) be a cause of incompatibilities.
- */
+ if (unicode_resize(p_left, new_len) != 0)
goto error;
- }
+
/* copy 'right' into the newly allocated area of 'left' */
_PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len);
}
@@ -10783,8 +10873,10 @@ unicode_count(PyObject *self, PyObject *args)
kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- if (kind2 > kind1)
+ if (kind2 > kind1) {
+ Py_DECREF(substring);
return PyLong_FromLong(0);
+ }
kind = kind1;
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
@@ -10962,10 +11054,14 @@ unicode_find(PyObject *self, PyObject *args)
&start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1)
+ if (PyUnicode_READY(self) == -1) {
+ Py_DECREF(substring);
return NULL;
- if (PyUnicode_READY(substring) == -1)
+ }
+ if (PyUnicode_READY(substring) == -1) {
+ Py_DECREF(substring);
return NULL;
+ }
result = any_find_slice(1, self, substring, start, end);
@@ -11088,10 +11184,14 @@ unicode_index(PyObject *self, PyObject *args)
&start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1)
+ if (PyUnicode_READY(self) == -1) {
+ Py_DECREF(substring);
return NULL;
- if (PyUnicode_READY(substring) == -1)
+ }
+ if (PyUnicode_READY(substring) == -1) {
+ Py_DECREF(substring);
return NULL;
+ }
result = any_find_slice(1, self, substring, start, end);
@@ -11621,6 +11721,7 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
int kind;
Py_ssize_t i, j, len;
BLOOM_MASK sepmask;
+ Py_ssize_t seplen;
if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1)
return NULL;
@@ -11628,24 +11729,35 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
len = PyUnicode_GET_LENGTH(self);
+ seplen = PyUnicode_GET_LENGTH(sepobj);
sepmask = make_bloom_mask(PyUnicode_KIND(sepobj),
PyUnicode_DATA(sepobj),
- PyUnicode_GET_LENGTH(sepobj));
+ seplen);
i = 0;
if (striptype != RIGHTSTRIP) {
- while (i < len &&
- BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, i), sepobj)) {
+ while (i < len) {
+ Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+ if (!BLOOM(sepmask, ch))
+ break;
+ if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
+ break;
i++;
}
}
j = len;
if (striptype != LEFTSTRIP) {
- do {
+ j--;
+ while (j >= i) {
+ Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+ if (!BLOOM(sepmask, ch))
+ break;
+ if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
+ break;
j--;
- } while (j >= i &&
- BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, j), sepobj));
+ }
+
j++;
}
@@ -11692,30 +11804,63 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
static PyObject *
do_strip(PyObject *self, int striptype)
{
- int kind;
- void *data;
Py_ssize_t len, i, j;
if (PyUnicode_READY(self) == -1)
return NULL;
- kind = PyUnicode_KIND(self);
- data = PyUnicode_DATA(self);
len = PyUnicode_GET_LENGTH(self);
- i = 0;
- if (striptype != RIGHTSTRIP) {
- while (i < len && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
- i++;
+ if (PyUnicode_IS_ASCII(self)) {
+ Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
+
+ i = 0;
+ if (striptype != RIGHTSTRIP) {
+ while (i < len) {
+ Py_UCS1 ch = data[i];
+ if (!_Py_ascii_whitespace[ch])
+ break;
+ i++;
+ }
+ }
+
+ j = len;
+ if (striptype != LEFTSTRIP) {
+ j--;
+ while (j >= i) {
+ Py_UCS1 ch = data[j];
+ if (!_Py_ascii_whitespace[ch])
+ break;
+ j--;
+ }
+ j++;
}
}
+ else {
+ int kind = PyUnicode_KIND(self);
+ void *data = PyUnicode_DATA(self);
- j = len;
- if (striptype != LEFTSTRIP) {
- do {
+ i = 0;
+ if (striptype != RIGHTSTRIP) {
+ while (i < len) {
+ Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+ if (!Py_UNICODE_ISSPACE(ch))
+ break;
+ i++;
+ }
+ }
+
+ j = len;
+ if (striptype != LEFTSTRIP) {
j--;
- } while (j >= i && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j)));
- j++;
+ while (j >= i) {
+ Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+ if (!Py_UNICODE_ISSPACE(ch))
+ break;
+ j--;
+ }
+ j++;
+ }
}
return PyUnicode_Substring(self, i, j);
@@ -11939,7 +12084,7 @@ unicode_repr(PyObject *unicode)
Py_ssize_t isize;
Py_ssize_t osize, squote, dquote, i, o;
Py_UCS4 max, quote;
- int ikind, okind;
+ int ikind, okind, unchanged;
void *idata, *odata;
if (PyUnicode_READY(unicode) == -1)
@@ -11950,7 +12095,7 @@ unicode_repr(PyObject *unicode)
/* Compute length of output, quote characters, and
maximum character */
- osize = 2; /* quotes */
+ osize = 0;
max = 127;
squote = dquote = 0;
ikind = PyUnicode_KIND(unicode);
@@ -11981,7 +12126,9 @@ unicode_repr(PyObject *unicode)
}
quote = '\'';
+ unchanged = (osize == isize);
if (squote) {
+ unchanged = 0;
if (dquote)
/* Both squote and dquote present. Use squote,
and escape them */
@@ -11989,6 +12136,7 @@ unicode_repr(PyObject *unicode)
else
quote = '"';
}
+ osize += 2; /* quotes */
repr = PyUnicode_New(osize, max);
if (repr == NULL)
@@ -11998,82 +12146,88 @@ unicode_repr(PyObject *unicode)
PyUnicode_WRITE(okind, odata, 0, quote);
PyUnicode_WRITE(okind, odata, osize-1, quote);
+ if (unchanged) {
+ _PyUnicode_FastCopyCharacters(repr, 1,
+ unicode, 0,
+ isize);
+ }
+ else {
+ for (i = 0, o = 1; i < isize; i++) {
+ Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
- for (i = 0, o = 1; i < isize; i++) {
- Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
-
- /* Escape quotes and backslashes */
- if ((ch == quote) || (ch == '\\')) {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, ch);
- continue;
- }
+ /* Escape quotes and backslashes */
+ if ((ch == quote) || (ch == '\\')) {
+ PyUnicode_WRITE(okind, odata, o++, '\\');
+ PyUnicode_WRITE(okind, odata, o++, ch);
+ continue;
+ }
- /* Map special whitespace to '\t', \n', '\r' */
- if (ch == '\t') {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, 't');
- }
- else if (ch == '\n') {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, 'n');
- }
- else if (ch == '\r') {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, 'r');
- }
+ /* Map special whitespace to '\t', \n', '\r' */
+ if (ch == '\t') {
+ PyUnicode_WRITE(okind, odata, o++, '\\');
+ PyUnicode_WRITE(okind, odata, o++, 't');
+ }
+ else if (ch == '\n') {
+ PyUnicode_WRITE(okind, odata, o++, '\\');
+ PyUnicode_WRITE(okind, odata, o++, 'n');
+ }
+ else if (ch == '\r') {
+ PyUnicode_WRITE(okind, odata, o++, '\\');
+ PyUnicode_WRITE(okind, odata, o++, 'r');
+ }
- /* Map non-printable US ASCII to '\xhh' */
- else if (ch < ' ' || ch == 0x7F) {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, 'x');
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
- }
+ /* Map non-printable US ASCII to '\xhh' */
+ else if (ch < ' ' || ch == 0x7F) {
+ PyUnicode_WRITE(okind, odata, o++, '\\');
+ PyUnicode_WRITE(okind, odata, o++, 'x');
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+ }
- /* Copy ASCII characters as-is */
- else if (ch < 0x7F) {
- PyUnicode_WRITE(okind, odata, o++, ch);
- }
+ /* Copy ASCII characters as-is */
+ else if (ch < 0x7F) {
+ PyUnicode_WRITE(okind, odata, o++, ch);
+ }
- /* Non-ASCII characters */
- else {
- /* Map Unicode whitespace and control characters
- (categories Z* and C* except ASCII space)
- */
- if (!Py_UNICODE_ISPRINTABLE(ch)) {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- /* Map 8-bit characters to '\xhh' */
- if (ch <= 0xff) {
- PyUnicode_WRITE(okind, odata, o++, 'x');
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
- }
- /* Map 16-bit characters to '\uxxxx' */
- else if (ch <= 0xffff) {
- PyUnicode_WRITE(okind, odata, o++, 'u');
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+ /* Non-ASCII characters */
+ else {
+ /* Map Unicode whitespace and control characters
+ (categories Z* and C* except ASCII space)
+ */
+ if (!Py_UNICODE_ISPRINTABLE(ch)) {
+ PyUnicode_WRITE(okind, odata, o++, '\\');
+ /* Map 8-bit characters to '\xhh' */
+ if (ch <= 0xff) {
+ PyUnicode_WRITE(okind, odata, o++, 'x');
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+ }
+ /* Map 16-bit characters to '\uxxxx' */
+ else if (ch <= 0xffff) {
+ PyUnicode_WRITE(okind, odata, o++, 'u');
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+ }
+ /* Map 21-bit characters to '\U00xxxxxx' */
+ else {
+ PyUnicode_WRITE(okind, odata, o++, 'U');
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+ }
}
- /* Map 21-bit characters to '\U00xxxxxx' */
+ /* Copy characters as-is */
else {
- PyUnicode_WRITE(okind, odata, o++, 'U');
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+ PyUnicode_WRITE(okind, odata, o++, ch);
}
}
- /* Copy characters as-is */
- else {
- PyUnicode_WRITE(okind, odata, o++, ch);
- }
}
}
/* Closing quote already added at the beginning */
@@ -12102,10 +12256,14 @@ unicode_rfind(PyObject *self, PyObject *args)
&start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1)
+ if (PyUnicode_READY(self) == -1) {
+ Py_DECREF(substring);
return NULL;
- if (PyUnicode_READY(substring) == -1)
+ }
+ if (PyUnicode_READY(substring) == -1) {
+ Py_DECREF(substring);
return NULL;
+ }
result = any_find_slice(-1, self, substring, start, end);
@@ -12134,10 +12292,14 @@ unicode_rindex(PyObject *self, PyObject *args)
&start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1)
+ if (PyUnicode_READY(self) == -1) {
+ Py_DECREF(substring);
return NULL;
- if (PyUnicode_READY(substring) == -1)
+ }
+ if (PyUnicode_READY(substring) == -1) {
+ Py_DECREF(substring);
return NULL;
+ }
result = any_find_slice(-1, self, substring, start, end);
@@ -12726,6 +12888,8 @@ unicode_startswith(PyObject *self,
return NULL;
result = tailmatch(self, substring, start, end, -1);
Py_DECREF(substring);
+ if (result == -1)
+ return NULL;
if (result) {
Py_RETURN_TRUE;
}
@@ -12742,6 +12906,8 @@ unicode_startswith(PyObject *self,
}
result = tailmatch(self, substring, start, end, -1);
Py_DECREF(substring);
+ if (result == -1)
+ return NULL;
return PyBool_FromLong(result);
}
@@ -12775,6 +12941,8 @@ unicode_endswith(PyObject *self,
return NULL;
result = tailmatch(self, substring, start, end, +1);
Py_DECREF(substring);
+ if (result == -1)
+ return NULL;
if (result) {
Py_RETURN_TRUE;
}
@@ -12790,27 +12958,35 @@ unicode_endswith(PyObject *self,
}
result = tailmatch(self, substring, start, end, +1);
Py_DECREF(substring);
+ if (result == -1)
+ return NULL;
return PyBool_FromLong(result);
}
Py_LOCAL_INLINE(void)
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
{
- writer->size = PyUnicode_GET_LENGTH(writer->buffer);
+ if (!writer->readonly)
+ writer->size = PyUnicode_GET_LENGTH(writer->buffer);
+ else {
+ /* Copy-on-write mode: set buffer size to 0 so
+ * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
+ * next write. */
+ writer->size = 0;
+ }
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
writer->data = PyUnicode_DATA(writer->buffer);
writer->kind = PyUnicode_KIND(writer->buffer);
}
void
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
{
memset(writer, 0, sizeof(*writer));
#ifdef Py_DEBUG
writer->kind = 5; /* invalid kind */
#endif
- writer->min_length = Py_MAX(min_length, 100);
- writer->overallocate = (min_length > 0);
+ writer->min_char = 127;
}
int
@@ -12828,29 +13004,28 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
}
newlen = writer->pos + length;
+ maxchar = Py_MAX(maxchar, writer->min_char);
+
if (writer->buffer == NULL) {
- if (writer->overallocate) {
+ assert(!writer->readonly);
+ if (writer->overallocate && newlen <= (PY_SSIZE_T_MAX - newlen / 4)) {
/* overallocate 25% to limit the number of resize */
- if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
- newlen += newlen / 4;
- if (newlen < writer->min_length)
- newlen = writer->min_length;
+ newlen += newlen / 4;
}
+ if (newlen < writer->min_length)
+ newlen = writer->min_length;
+
writer->buffer = PyUnicode_New(newlen, maxchar);
if (writer->buffer == NULL)
return -1;
- _PyUnicodeWriter_Update(writer);
- return 0;
}
-
- if (newlen > writer->size) {
- if (writer->overallocate) {
+ else if (newlen > writer->size) {
+ if (writer->overallocate && newlen <= (PY_SSIZE_T_MAX - newlen / 4)) {
/* overallocate 25% to limit the number of resize */
- if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
- newlen += newlen / 4;
- if (newlen < writer->min_length)
- newlen = writer->min_length;
+ newlen += newlen / 4;
}
+ if (newlen < writer->min_length)
+ newlen = writer->min_length;
if (maxchar > writer->maxchar || writer->readonly) {
/* resize + widen */
@@ -12868,7 +13043,6 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
return -1;
}
writer->buffer = newbuffer;
- _PyUnicodeWriter_Update(writer);
}
else if (maxchar > writer->maxchar) {
assert(!writer->readonly);
@@ -12879,12 +13053,28 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
writer->buffer, 0, writer->pos);
Py_DECREF(writer->buffer);
writer->buffer = newbuffer;
- _PyUnicodeWriter_Update(writer);
}
+ _PyUnicodeWriter_Update(writer);
+ return 0;
+}
+
+Py_LOCAL_INLINE(int)
+_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+ if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0)
+ return -1;
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
+ writer->pos++;
return 0;
}
int
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+ return _PyUnicodeWriter_WriteCharInline(writer, ch);
+}
+
+int
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
{
Py_UCS4 maxchar;
@@ -12898,11 +13088,10 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
maxchar = PyUnicode_MAX_CHAR_VALUE(str);
if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
if (writer->buffer == NULL && !writer->overallocate) {
+ writer->readonly = 1;
Py_INCREF(str);
writer->buffer = str;
_PyUnicodeWriter_Update(writer);
- writer->readonly = 1;
- writer->size = 0;
writer->pos += len;
return 0;
}
@@ -12915,28 +13104,82 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
return 0;
}
+int
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
+ Py_ssize_t start, Py_ssize_t end)
+{
+ Py_UCS4 maxchar;
+ Py_ssize_t len;
+
+ if (PyUnicode_READY(str) == -1)
+ return -1;
+
+ assert(0 <= start);
+ assert(end <= PyUnicode_GET_LENGTH(str));
+ assert(start <= end);
+
+ if (end == 0)
+ return 0;
+
+ if (start == 0 && end == PyUnicode_GET_LENGTH(str))
+ return _PyUnicodeWriter_WriteStr(writer, str);
+
+ if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
+ maxchar = _PyUnicode_FindMaxChar(str, start, end);
+ else
+ maxchar = writer->maxchar;
+ len = end - start;
+
+ if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0)
+ return -1;
+
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ str, start, len);
+ writer->pos += len;
+ return 0;
+}
+
+int
+_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len)
+{
+ Py_UCS4 maxchar;
+
+ maxchar = ucs1lib_find_max_char((Py_UCS1*)str, (Py_UCS1*)str + len);
+ if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
+ return -1;
+ unicode_write_cstr(writer->buffer, writer->pos, str, len);
+ writer->pos += len;
+ return 0;
+}
+
PyObject *
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
{
+ PyObject *str;
if (writer->pos == 0) {
- Py_XDECREF(writer->buffer);
+ Py_CLEAR(writer->buffer);
_Py_RETURN_UNICODE_EMPTY();
}
if (writer->readonly) {
- assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
- return writer->buffer;
+ str = writer->buffer;
+ writer->buffer = NULL;
+ assert(PyUnicode_GET_LENGTH(str) == writer->pos);
+ return str;
}
if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) {
PyObject *newbuffer;
newbuffer = resize_compact(writer->buffer, writer->pos);
if (newbuffer == NULL) {
Py_DECREF(writer->buffer);
+ writer->buffer = NULL;
return NULL;
}
writer->buffer = newbuffer;
}
- assert(_PyUnicode_CheckConsistency(writer->buffer, 1));
- return unicode_result_ready(writer->buffer);
+ str = writer->buffer;
+ writer->buffer = NULL;
+ assert(_PyUnicode_CheckConsistency(str, 1));
+ return unicode_result_ready(str);
}
void
@@ -12971,7 +13214,7 @@ unicode__format__(PyObject* self, PyObject* args)
if (PyUnicode_READY(self) == -1)
return NULL;
- _PyUnicodeWriter_Init(&writer, 0);
+ _PyUnicodeWriter_Init(&writer);
ret = _PyUnicode_FormatAdvancedWriter(&writer,
self, format_spec, 0,
PyUnicode_GET_LENGTH(format_spec));
@@ -13190,16 +13433,39 @@ static PyMappingMethods unicode_as_mapping = {
/* Helpers for PyUnicode_Format() */
+struct unicode_formatter_t {
+ PyObject *args;
+ int args_owned;
+ Py_ssize_t arglen, argidx;
+ PyObject *dict;
+
+ enum PyUnicode_Kind fmtkind;
+ Py_ssize_t fmtcnt, fmtpos;
+ void *fmtdata;
+ PyObject *fmtstr;
+
+ _PyUnicodeWriter writer;
+};
+
+struct unicode_format_arg_t {
+ Py_UCS4 ch;
+ int flags;
+ Py_ssize_t width;
+ int prec;
+ int sign;
+};
+
static PyObject *
-getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
+unicode_format_getnextarg(struct unicode_formatter_t *ctx)
{
- Py_ssize_t argidx = *p_argidx;
- if (argidx < arglen) {
- (*p_argidx)++;
- if (arglen < 0)
- return args;
+ Py_ssize_t argidx = ctx->argidx;
+
+ if (argidx < ctx->arglen) {
+ ctx->argidx++;
+ if (ctx->arglen < 0)
+ return ctx->args;
else
- return PyTuple_GetItem(args, argidx);
+ return PyTuple_GetItem(ctx->args, argidx);
}
PyErr_SetString(PyExc_TypeError,
"not enough arguments for format string");
@@ -13208,23 +13474,34 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
+/* Format a float into the writer if the writer is not NULL, or into *p_output
+ otherwise.
+
+ Return 0 on success, raise an exception and return -1 on error. */
static int
-formatfloat(PyObject *v, int flags, int prec, int type,
- PyObject **p_output, _PyUnicodeWriter *writer)
+formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
+ PyObject **p_output,
+ _PyUnicodeWriter *writer)
{
char *p;
double x;
Py_ssize_t len;
+ int prec;
+ int dtoa_flags;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred())
return -1;
+ prec = arg->prec;
if (prec < 0)
prec = 6;
- p = PyOS_double_to_string(x, type, prec,
- (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
+ if (arg->flags & F_ALT)
+ dtoa_flags = Py_DTSF_ALT;
+ else
+ dtoa_flags = 0;
+ p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
if (p == NULL)
return -1;
len = strlen(p);
@@ -13261,7 +13538,7 @@ formatfloat(PyObject *v, int flags, int prec, int type,
* produce a '-' sign, but can for Python's unbounded ints.
*/
static PyObject*
-formatlong(PyObject *val, int flags, int prec, int type)
+formatlong(PyObject *val, struct unicode_format_arg_t *arg)
{
PyObject *result = NULL;
char *buf;
@@ -13271,6 +13548,8 @@ formatlong(PyObject *val, int flags, int prec, int type)
Py_ssize_t llen;
int numdigits; /* len == numnondigits + numdigits */
int numnondigits = 0;
+ int prec = arg->prec;
+ int type = arg->ch;
/* Avoid exceeding SSIZE_T_MAX */
if (prec > INT_MAX-3) {
@@ -13282,7 +13561,10 @@ formatlong(PyObject *val, int flags, int prec, int type)
assert(PyLong_Check(val));
switch (type) {
+ default:
+ assert(!"'type' not in [diuoxX]");
case 'd':
+ case 'i':
case 'u':
/* Special-case boolean: we want 0/1 */
if (PyBool_Check(val))
@@ -13299,8 +13581,6 @@ formatlong(PyObject *val, int flags, int prec, int type)
numnondigits = 2;
result = PyNumber_ToBase(val, 16);
break;
- default:
- assert(!"'type' not in [duoxX]");
}
if (!result)
return NULL;
@@ -13311,12 +13591,14 @@ formatlong(PyObject *val, int flags, int prec, int type)
/* To modify the string in-place, there can only be one reference. */
if (Py_REFCNT(result) != 1) {
+ Py_DECREF(result);
PyErr_BadInternalCall();
return NULL;
}
buf = PyUnicode_DATA(result);
llen = PyUnicode_GET_LENGTH(result);
if (llen > INT_MAX) {
+ Py_DECREF(result);
PyErr_SetString(PyExc_ValueError,
"string too large in _PyBytes_FormatLong");
return NULL;
@@ -13328,7 +13610,7 @@ formatlong(PyObject *val, int flags, int prec, int type)
assert(numdigits > 0);
/* Get rid of base marker unless F_ALT */
- if (((flags & F_ALT) == 0 &&
+ if (((arg->flags & F_ALT) == 0 &&
(type == 'o' || type == 'x' || type == 'X'))) {
assert(buf[sign] == '0');
assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
@@ -13373,15 +13655,100 @@ formatlong(PyObject *val, int flags, int prec, int type)
if (buf[i] >= 'a' && buf[i] <= 'x')
buf[i] -= 'a'-'A';
}
- if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
+ if (!PyUnicode_Check(result)
+ || buf != PyUnicode_DATA(result)) {
PyObject *unicode;
unicode = _PyUnicode_FromASCII(buf, len);
Py_DECREF(result);
result = unicode;
}
+ else if (len != PyUnicode_GET_LENGTH(result)) {
+ if (PyUnicode_Resize(&result, len) < 0)
+ Py_CLEAR(result);
+ }
return result;
}
+/* Format an integer.
+ * Return 1 if the number has been formatted into the writer,
+ * 0 if the number has been formatted into *p_output
+ * -1 and raise an exception on error */
+static int
+mainformatlong(PyObject *v,
+ struct unicode_format_arg_t *arg,
+ PyObject **p_output,
+ _PyUnicodeWriter *writer)
+{
+ PyObject *iobj, *res;
+ char type = (char)arg->ch;
+
+ if (!PyNumber_Check(v))
+ goto wrongtype;
+
+ if (!PyLong_Check(v)) {
+ iobj = PyNumber_Long(v);
+ if (iobj == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
+ goto wrongtype;
+ return -1;
+ }
+ assert(PyLong_Check(iobj));
+ }
+ else {
+ iobj = v;
+ Py_INCREF(iobj);
+ }
+
+ if (PyLong_CheckExact(v)
+ && arg->width == -1 && arg->prec == -1
+ && !(arg->flags & (F_SIGN | F_BLANK))
+ && type != 'X')
+ {
+ /* Fast path */
+ int alternate = arg->flags & F_ALT;
+ int base;
+
+ switch(type)
+ {
+ default:
+ assert(0 && "'type' not in [diuoxX]");
+ case 'd':
+ case 'i':
+ case 'u':
+ base = 10;
+ break;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ }
+
+ if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
+ Py_DECREF(iobj);
+ return -1;
+ }
+ Py_DECREF(iobj);
+ return 1;
+ }
+
+ res = formatlong(iobj, arg);
+ Py_DECREF(iobj);
+ if (res == NULL)
+ return -1;
+ *p_output = res;
+ return 0;
+
+wrongtype:
+ PyErr_Format(PyExc_TypeError,
+ "%%%c format: a number is required, "
+ "not %.200s",
+ type, Py_TYPE(v)->tp_name);
+ return -1;
+}
+
static Py_UCS4
formatchar(PyObject *v)
{
@@ -13414,540 +13781,587 @@ formatchar(PyObject *v)
return (Py_UCS4) -1;
}
-PyObject *
-PyUnicode_Format(PyObject *format, PyObject *args)
-{
- Py_ssize_t fmtcnt, fmtpos, arglen, argidx;
- int args_owned = 0;
- PyObject *dict = NULL;
- PyObject *temp = NULL;
- PyObject *second = NULL;
- PyObject *uformat;
- void *fmt;
- enum PyUnicode_Kind kind, fmtkind;
- _PyUnicodeWriter writer;
- Py_ssize_t sublen;
- Py_UCS4 maxchar;
+/* Parse options of an argument: flags, width, precision.
+ Handle also "%(name)" syntax.
- if (format == NULL || args == NULL) {
- PyErr_BadInternalCall();
- return NULL;
- }
- uformat = PyUnicode_FromObject(format);
- if (uformat == NULL)
- return NULL;
- if (PyUnicode_READY(uformat) == -1) {
- Py_DECREF(uformat);
- return NULL;
- }
+ Return 0 if the argument has been formatted into arg->str.
+ Return 1 if the argument has been written into ctx->writer,
+ Raise an exception and return -1 on error. */
+static int
+unicode_format_arg_parse(struct unicode_formatter_t *ctx,
+ struct unicode_format_arg_t *arg)
+{
+#define FORMAT_READ(ctx) \
+ PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
- fmt = PyUnicode_DATA(uformat);
- fmtkind = PyUnicode_KIND(uformat);
- fmtcnt = PyUnicode_GET_LENGTH(uformat);
- fmtpos = 0;
+ PyObject *v;
- _PyUnicodeWriter_Init(&writer, fmtcnt + 100);
+ if (arg->ch == '(') {
+ /* Get argument value from a dictionary. Example: "%(name)s". */
+ Py_ssize_t keystart;
+ Py_ssize_t keylen;
+ PyObject *key;
+ int pcount = 1;
- if (PyTuple_Check(args)) {
- arglen = PyTuple_Size(args);
- argidx = 0;
- }
- else {
- arglen = -1;
- argidx = -2;
+ if (ctx->dict == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "format requires a mapping");
+ return -1;
+ }
+ ++ctx->fmtpos;
+ --ctx->fmtcnt;
+ keystart = ctx->fmtpos;
+ /* Skip over balanced parentheses */
+ while (pcount > 0 && --ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ if (arg->ch == ')')
+ --pcount;
+ else if (arg->ch == '(')
+ ++pcount;
+ ctx->fmtpos++;
+ }
+ keylen = ctx->fmtpos - keystart - 1;
+ if (ctx->fmtcnt < 0 || pcount > 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format key");
+ return -1;
+ }
+ key = PyUnicode_Substring(ctx->fmtstr,
+ keystart, keystart + keylen);
+ if (key == NULL)
+ return -1;
+ if (ctx->args_owned) {
+ Py_DECREF(ctx->args);
+ ctx->args_owned = 0;
+ }
+ ctx->args = PyObject_GetItem(ctx->dict, key);
+ Py_DECREF(key);
+ if (ctx->args == NULL)
+ return -1;
+ ctx->args_owned = 1;
+ ctx->arglen = -1;
+ ctx->argidx = -2;
+ }
+
+ /* Parse flags. Example: "%+i" => flags=F_SIGN. */
+ while (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ switch (arg->ch) {
+ case '-': arg->flags |= F_LJUST; continue;
+ case '+': arg->flags |= F_SIGN; continue;
+ case ' ': arg->flags |= F_BLANK; continue;
+ case '#': arg->flags |= F_ALT; continue;
+ case '0': arg->flags |= F_ZERO; continue;
+ }
+ break;
}
- if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
- dict = args;
-
- while (--fmtcnt >= 0) {
- if (PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') {
- Py_ssize_t nonfmtpos;
- nonfmtpos = fmtpos++;
- while (fmtcnt >= 0 &&
- PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') {
- fmtpos++;
- fmtcnt--;
- }
- if (fmtcnt < 0)
- fmtpos--;
- sublen = fmtpos - nonfmtpos;
- maxchar = _PyUnicode_FindMaxChar(uformat,
- nonfmtpos, nonfmtpos + sublen);
- if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1)
- goto onError;
- _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
- uformat, nonfmtpos, sublen);
- writer.pos += sublen;
+ /* Parse width. Example: "%10s" => width=10 */
+ if (arg->ch == '*') {
+ v = unicode_format_getnextarg(ctx);
+ if (v == NULL)
+ return -1;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ return -1;
}
- else {
- /* Got a format specifier */
- int flags = 0;
- Py_ssize_t width = -1;
- int prec = -1;
- Py_UCS4 c = '\0';
- Py_UCS4 fill;
- int sign;
- Py_UCS4 signchar;
- int isnumok;
- PyObject *v = NULL;
- void *pbuf = NULL;
- Py_ssize_t pindex, len;
- Py_UCS4 bufmaxchar;
- Py_ssize_t buflen;
-
- fmtpos++;
- c = PyUnicode_READ(fmtkind, fmt, fmtpos);
- if (c == '(') {
- Py_ssize_t keystart;
- Py_ssize_t keylen;
- PyObject *key;
- int pcount = 1;
-
- if (dict == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "format requires a mapping");
- goto onError;
- }
- ++fmtpos;
- --fmtcnt;
- keystart = fmtpos;
- /* Skip over balanced parentheses */
- while (pcount > 0 && --fmtcnt >= 0) {
- c = PyUnicode_READ(fmtkind, fmt, fmtpos);
- if (c == ')')
- --pcount;
- else if (c == '(')
- ++pcount;
- fmtpos++;
- }
- keylen = fmtpos - keystart - 1;
- if (fmtcnt < 0 || pcount > 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format key");
- goto onError;
- }
- key = PyUnicode_Substring(uformat,
- keystart, keystart + keylen);
- if (key == NULL)
- goto onError;
- if (args_owned) {
- Py_DECREF(args);
- args_owned = 0;
- }
- args = PyObject_GetItem(dict, key);
- Py_DECREF(key);
- if (args == NULL) {
- goto onError;
- }
- args_owned = 1;
- arglen = -1;
- argidx = -2;
- }
- while (--fmtcnt >= 0) {
- c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
- switch (c) {
- case '-': flags |= F_LJUST; continue;
- case '+': flags |= F_SIGN; continue;
- case ' ': flags |= F_BLANK; continue;
- case '#': flags |= F_ALT; continue;
- case '0': flags |= F_ZERO; continue;
- }
+ arg->width = PyLong_AsSsize_t(v);
+ if (arg->width == -1 && PyErr_Occurred())
+ return -1;
+ if (arg->width < 0) {
+ arg->flags |= F_LJUST;
+ arg->width = -arg->width;
+ }
+ if (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ }
+ }
+ else if (arg->ch >= '0' && arg->ch <= '9') {
+ arg->width = arg->ch - '0';
+ while (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ if (arg->ch < '0' || arg->ch > '9')
break;
+ /* Since arg->ch is unsigned, the RHS would end up as unsigned,
+ mixing signed and unsigned comparison. Since arg->ch is between
+ '0' and '9', casting to int is safe. */
+ if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
+ PyErr_SetString(PyExc_ValueError,
+ "width too big");
+ return -1;
}
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
- if (!PyLong_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- goto onError;
- }
- width = PyLong_AsSsize_t(v);
- if (width == -1 && PyErr_Occurred())
- goto onError;
- if (width < 0) {
- flags |= F_LJUST;
- width = -width;
- }
- if (--fmtcnt >= 0)
- c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
- }
- else if (c >= '0' && c <= '9') {
- width = c - '0';
- while (--fmtcnt >= 0) {
- c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
- if (c < '0' || c > '9')
- break;
- /* Since c is unsigned, the RHS would end up as unsigned,
- mixing signed and unsigned comparison. Since c is between
- '0' and '9', casting to int is safe. */
- if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
- PyErr_SetString(PyExc_ValueError,
- "width too big");
- goto onError;
- }
- width = width*10 + (c - '0');
- }
+ arg->width = arg->width*10 + (arg->ch - '0');
+ }
+ }
+
+ /* Parse precision. Example: "%.3f" => prec=3 */
+ if (arg->ch == '.') {
+ arg->prec = 0;
+ if (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ }
+ if (arg->ch == '*') {
+ v = unicode_format_getnextarg(ctx);
+ if (v == NULL)
+ return -1;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ return -1;
}
- if (c == '.') {
- prec = 0;
- if (--fmtcnt >= 0)
- c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
- if (!PyLong_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- goto onError;
- }
- prec = _PyLong_AsInt(v);
- if (prec == -1 && PyErr_Occurred())
- goto onError;
- if (prec < 0)
- prec = 0;
- if (--fmtcnt >= 0)
- c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
- }
- else if (c >= '0' && c <= '9') {
- prec = c - '0';
- while (--fmtcnt >= 0) {
- c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
- if (c < '0' || c > '9')
- break;
- if (prec > (INT_MAX - ((int)c - '0')) / 10) {
- PyErr_SetString(PyExc_ValueError,
- "prec too big");
- goto onError;
- }
- prec = prec*10 + (c - '0');
- }
- }
- } /* prec */
- if (fmtcnt >= 0) {
- if (c == 'h' || c == 'l' || c == 'L') {
- if (--fmtcnt >= 0)
- c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
+ arg->prec = _PyLong_AsInt(v);
+ if (arg->prec == -1 && PyErr_Occurred())
+ return -1;
+ if (arg->prec < 0)
+ arg->prec = 0;
+ if (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ }
+ }
+ else if (arg->ch >= '0' && arg->ch <= '9') {
+ arg->prec = arg->ch - '0';
+ while (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ if (arg->ch < '0' || arg->ch > '9')
+ break;
+ if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
+ PyErr_SetString(PyExc_ValueError,
+ "precision too big");
+ return -1;
}
+ arg->prec = arg->prec*10 + (arg->ch - '0');
}
- if (fmtcnt < 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format");
- goto onError;
- }
- if (fmtcnt == 0)
- writer.overallocate = 0;
+ }
+ }
- if (c == '%') {
- if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1)
- goto onError;
- PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '%');
- writer.pos += 1;
- continue;
+ /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
+ if (ctx->fmtcnt >= 0) {
+ if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
+ if (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
}
+ }
+ }
+ if (ctx->fmtcnt < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format");
+ return -1;
+ }
+ return 0;
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
+#undef FORMAT_READ
+}
- sign = 0;
- signchar = '\0';
- fill = ' ';
- switch (c) {
-
- case 's':
- case 'r':
- case 'a':
- if (PyLong_CheckExact(v) && width == -1 && prec == -1) {
- /* Fast path */
- if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
- goto onError;
- goto nextarg;
- }
+/* Format one argument. Supported conversion specifiers:
- if (PyUnicode_CheckExact(v) && c == 's') {
- temp = v;
- Py_INCREF(temp);
- }
- else {
- if (c == 's')
- temp = PyObject_Str(v);
- else if (c == 'r')
- temp = PyObject_Repr(v);
- else
- temp = PyObject_ASCII(v);
- }
- break;
+ - "s", "r", "a": any type
+ - "i", "d", "u", "o", "x", "X": int
+ - "e", "E", "f", "F", "g", "G": float
+ - "c": int or str (1 character)
- case 'i':
- case 'd':
- case 'u':
- case 'o':
- case 'x':
- case 'X':
- if (PyLong_CheckExact(v)
- && width == -1 && prec == -1
- && !(flags & (F_SIGN | F_BLANK)))
- {
- /* Fast path */
- switch(c)
- {
- case 'd':
- case 'i':
- case 'u':
- if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
- goto onError;
- goto nextarg;
- case 'x':
- if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
- goto onError;
- goto nextarg;
- case 'o':
- if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
- goto onError;
- goto nextarg;
- default:
- break;
- }
- }
+ When possible, the output is written directly into the Unicode writer
+ (ctx->writer). A string is created when padding is required.
- isnumok = 0;
- if (PyNumber_Check(v)) {
- PyObject *iobj=NULL;
+ Return 0 if the argument has been formatted into *p_str,
+ 1 if the argument has been written into ctx->writer,
+ -1 on error. */
+static int
+unicode_format_arg_format(struct unicode_formatter_t *ctx,
+ struct unicode_format_arg_t *arg,
+ PyObject **p_str)
+{
+ PyObject *v;
+ _PyUnicodeWriter *writer = &ctx->writer;
- if (PyLong_Check(v)) {
- iobj = v;
- Py_INCREF(iobj);
- }
- else {
- iobj = PyNumber_Long(v);
- }
- if (iobj!=NULL) {
- if (PyLong_Check(iobj)) {
- isnumok = 1;
- sign = 1;
- temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c));
- Py_DECREF(iobj);
- }
- else {
- Py_DECREF(iobj);
- }
- }
- }
- if (!isnumok) {
- PyErr_Format(PyExc_TypeError,
- "%%%c format: a number is required, "
- "not %.200s", (char)c, Py_TYPE(v)->tp_name);
- goto onError;
- }
- if (flags & F_ZERO)
- fill = '0';
- break;
+ if (ctx->fmtcnt == 0)
+ ctx->writer.overallocate = 0;
- case 'e':
- case 'E':
- case 'f':
- case 'F':
- case 'g':
- case 'G':
- if (width == -1 && prec == -1
- && !(flags & (F_SIGN | F_BLANK)))
- {
- /* Fast path */
- if (formatfloat(v, flags, prec, c, NULL, &writer) == -1)
- goto onError;
- goto nextarg;
- }
+ if (arg->ch == '%') {
+ if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
+ return -1;
+ return 1;
+ }
- sign = 1;
- if (flags & F_ZERO)
- fill = '0';
- if (formatfloat(v, flags, prec, c, &temp, NULL) == -1)
- temp = NULL;
- break;
+ v = unicode_format_getnextarg(ctx);
+ if (v == NULL)
+ return -1;
- case 'c':
- {
- Py_UCS4 ch = formatchar(v);
- if (ch == (Py_UCS4) -1)
- goto onError;
- if (width == -1 && prec == -1) {
- /* Fast path */
- if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1)
- goto onError;
- PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
- writer.pos += 1;
- goto nextarg;
- }
- temp = PyUnicode_FromOrdinal(ch);
- break;
- }
- default:
- PyErr_Format(PyExc_ValueError,
- "unsupported format character '%c' (0x%x) "
- "at index %zd",
- (31<=c && c<=126) ? (char)c : '?',
- (int)c,
- fmtpos - 1);
- goto onError;
- }
- if (temp == NULL)
- goto onError;
- assert (PyUnicode_Check(temp));
+ switch (arg->ch) {
+ case 's':
+ case 'r':
+ case 'a':
+ if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
+ /* Fast path */
+ if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
+ return -1;
+ return 1;
+ }
- if (width == -1 && prec == -1
- && !(flags & (F_SIGN | F_BLANK)))
- {
- /* Fast path */
- if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1)
- goto onError;
- goto nextarg;
- }
+ if (PyUnicode_CheckExact(v) && arg->ch == 's') {
+ *p_str = v;
+ Py_INCREF(*p_str);
+ }
+ else {
+ if (arg->ch == 's')
+ *p_str = PyObject_Str(v);
+ else if (arg->ch == 'r')
+ *p_str = PyObject_Repr(v);
+ else
+ *p_str = PyObject_ASCII(v);
+ }
+ break;
- if (PyUnicode_READY(temp) == -1) {
- Py_CLEAR(temp);
- goto onError;
- }
- kind = PyUnicode_KIND(temp);
- pbuf = PyUnicode_DATA(temp);
- len = PyUnicode_GET_LENGTH(temp);
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ {
+ int ret = mainformatlong(v, arg, p_str, writer);
+ if (ret != 0)
+ return ret;
+ arg->sign = 1;
+ break;
+ }
- if (c == 's' || c == 'r' || c == 'a') {
- if (prec >= 0 && len > prec)
- len = prec;
- }
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ if (arg->width == -1 && arg->prec == -1
+ && !(arg->flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ if (formatfloat(v, arg, NULL, writer) == -1)
+ return -1;
+ return 1;
+ }
- /* pbuf is initialized here. */
- pindex = 0;
- if (sign) {
- Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
- if (ch == '-' || ch == '+') {
- signchar = ch;
- len--;
- pindex++;
- }
- else if (flags & F_SIGN)
- signchar = '+';
- else if (flags & F_BLANK)
- signchar = ' ';
- else
- sign = 0;
- }
- if (width < len)
- width = len;
-
- /* Compute the length and maximum character of the
- written characters */
- bufmaxchar = 127;
- if (!(flags & F_LJUST)) {
- if (sign) {
- if ((width-1) > len)
- bufmaxchar = Py_MAX(bufmaxchar, fill);
- }
- else {
- if (width > len)
- bufmaxchar = Py_MAX(bufmaxchar, fill);
- }
- }
- maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
- bufmaxchar = Py_MAX(bufmaxchar, maxchar);
+ arg->sign = 1;
+ if (formatfloat(v, arg, p_str, NULL) == -1)
+ return -1;
+ break;
- buflen = width;
- if (sign && len == width)
- buflen++;
+ case 'c':
+ {
+ Py_UCS4 ch = formatchar(v);
+ if (ch == (Py_UCS4) -1)
+ return -1;
+ if (arg->width == -1 && arg->prec == -1) {
+ /* Fast path */
+ if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
+ return -1;
+ return 1;
+ }
+ *p_str = PyUnicode_FromOrdinal(ch);
+ break;
+ }
- if (_PyUnicodeWriter_Prepare(&writer, buflen, bufmaxchar) == -1)
- goto onError;
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "unsupported format character '%c' (0x%x) "
+ "at index %zd",
+ (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
+ (int)arg->ch,
+ ctx->fmtpos - 1);
+ return -1;
+ }
+ if (*p_str == NULL)
+ return -1;
+ assert (PyUnicode_Check(*p_str));
+ return 0;
+}
- /* Write characters */
- if (sign) {
- if (fill != ' ') {
- PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
- writer.pos += 1;
- }
- if (width > len)
- width--;
- }
- if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
- assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
- assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c);
- if (fill != ' ') {
- PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
- PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
- writer.pos += 2;
- pindex += 2;
- }
- width -= 2;
- if (width < 0)
- width = 0;
- len -= 2;
- }
- if (width > len && !(flags & F_LJUST)) {
- sublen = width - len;
- FILL(writer.kind, writer.data, fill, writer.pos, sublen);
- writer.pos += sublen;
- width = len;
- }
- if (fill == ' ') {
- if (sign) {
- PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
- writer.pos += 1;
- }
- if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
- assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
- assert(PyUnicode_READ(kind, pbuf, pindex+1) == c);
- PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
- PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
- writer.pos += 2;
- pindex += 2;
- }
- }
+static int
+unicode_format_arg_output(struct unicode_formatter_t *ctx,
+ struct unicode_format_arg_t *arg,
+ PyObject *str)
+{
+ Py_ssize_t len;
+ enum PyUnicode_Kind kind;
+ void *pbuf;
+ Py_ssize_t pindex;
+ Py_UCS4 signchar;
+ Py_ssize_t buflen;
+ Py_UCS4 maxchar;
+ Py_ssize_t sublen;
+ _PyUnicodeWriter *writer = &ctx->writer;
+ Py_UCS4 fill;
+
+ fill = ' ';
+ if (arg->sign && arg->flags & F_ZERO)
+ fill = '0';
- if (len) {
- _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
- temp, pindex, len);
- writer.pos += len;
+ if (PyUnicode_READY(str) == -1)
+ return -1;
+
+ len = PyUnicode_GET_LENGTH(str);
+ if ((arg->width == -1 || arg->width <= len)
+ && (arg->prec == -1 || arg->prec >= len)
+ && !(arg->flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
+ return -1;
+ return 0;
+ }
+
+ /* Truncate the string for "s", "r" and "a" formats
+ if the precision is set */
+ if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
+ if (arg->prec >= 0 && len > arg->prec)
+ len = arg->prec;
+ }
+
+ /* Adjust sign and width */
+ kind = PyUnicode_KIND(str);
+ pbuf = PyUnicode_DATA(str);
+ pindex = 0;
+ signchar = '\0';
+ if (arg->sign) {
+ Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
+ if (ch == '-' || ch == '+') {
+ signchar = ch;
+ len--;
+ pindex++;
+ }
+ else if (arg->flags & F_SIGN)
+ signchar = '+';
+ else if (arg->flags & F_BLANK)
+ signchar = ' ';
+ else
+ arg->sign = 0;
+ }
+ if (arg->width < len)
+ arg->width = len;
+
+ /* Prepare the writer */
+ maxchar = writer->maxchar;
+ if (!(arg->flags & F_LJUST)) {
+ if (arg->sign) {
+ if ((arg->width-1) > len)
+ maxchar = Py_MAX(maxchar, fill);
+ }
+ else {
+ if (arg->width > len)
+ maxchar = Py_MAX(maxchar, fill);
+ }
+ }
+ if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
+ Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
+ maxchar = Py_MAX(maxchar, strmaxchar);
+ }
+
+ buflen = arg->width;
+ if (arg->sign && len == arg->width)
+ buflen++;
+ if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
+ return -1;
+
+ /* Write the sign if needed */
+ if (arg->sign) {
+ if (fill != ' ') {
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
+ writer->pos += 1;
+ }
+ if (arg->width > len)
+ arg->width--;
+ }
+
+ /* Write the numeric prefix for "x", "X" and "o" formats
+ if the alternate form is used.
+ For example, write "0x" for the "%#x" format. */
+ if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
+ assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
+ assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
+ if (fill != ' ') {
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
+ writer->pos += 2;
+ pindex += 2;
+ }
+ arg->width -= 2;
+ if (arg->width < 0)
+ arg->width = 0;
+ len -= 2;
+ }
+
+ /* Pad left with the fill character if needed */
+ if (arg->width > len && !(arg->flags & F_LJUST)) {
+ sublen = arg->width - len;
+ FILL(writer->kind, writer->data, fill, writer->pos, sublen);
+ writer->pos += sublen;
+ arg->width = len;
+ }
+
+ /* If padding with spaces: write sign if needed and/or numeric prefix if
+ the alternate form is used */
+ if (fill == ' ') {
+ if (arg->sign) {
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
+ writer->pos += 1;
+ }
+ if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
+ assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
+ assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
+ writer->pos += 2;
+ pindex += 2;
+ }
+ }
+
+ /* Write characters */
+ if (len) {
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ str, pindex, len);
+ writer->pos += len;
+ }
+
+ /* Pad right with the fill character if needed */
+ if (arg->width > len) {
+ sublen = arg->width - len;
+ FILL(writer->kind, writer->data, ' ', writer->pos, sublen);
+ writer->pos += sublen;
+ }
+ return 0;
+}
+
+/* Helper of PyUnicode_Format(): format one arg.
+ Return 0 on success, raise an exception and return -1 on error. */
+static int
+unicode_format_arg(struct unicode_formatter_t *ctx)
+{
+ struct unicode_format_arg_t arg;
+ PyObject *str;
+ int ret;
+
+ arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
+ arg.flags = 0;
+ arg.width = -1;
+ arg.prec = -1;
+ arg.sign = 0;
+ str = NULL;
+
+ ret = unicode_format_arg_parse(ctx, &arg);
+ if (ret == -1)
+ return -1;
+
+ ret = unicode_format_arg_format(ctx, &arg, &str);
+ if (ret == -1)
+ return -1;
+
+ if (ret != 1) {
+ ret = unicode_format_arg_output(ctx, &arg, str);
+ Py_DECREF(str);
+ if (ret == -1)
+ return -1;
+ }
+
+ if (ctx->dict && (ctx->argidx < ctx->arglen) && arg.ch != '%') {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during string formatting");
+ return -1;
+ }
+ return 0;
+}
+
+PyObject *
+PyUnicode_Format(PyObject *format, PyObject *args)
+{
+ struct unicode_formatter_t ctx;
+
+ if (format == NULL || args == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ ctx.fmtstr = PyUnicode_FromObject(format);
+ if (ctx.fmtstr == NULL)
+ return NULL;
+ if (PyUnicode_READY(ctx.fmtstr) == -1) {
+ Py_DECREF(ctx.fmtstr);
+ return NULL;
+ }
+ ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
+ ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
+ ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
+ ctx.fmtpos = 0;
+
+ _PyUnicodeWriter_Init(&ctx.writer);
+ ctx.writer.min_length = ctx.fmtcnt + 100;
+ ctx.writer.overallocate = 1;
+
+ if (PyTuple_Check(args)) {
+ ctx.arglen = PyTuple_Size(args);
+ ctx.argidx = 0;
+ }
+ else {
+ ctx.arglen = -1;
+ ctx.argidx = -2;
+ }
+ ctx.args_owned = 0;
+ if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
+ ctx.dict = args;
+ else
+ ctx.dict = NULL;
+ ctx.args = args;
+
+ while (--ctx.fmtcnt >= 0) {
+ if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
+ Py_ssize_t nonfmtpos;
+
+ nonfmtpos = ctx.fmtpos++;
+ while (ctx.fmtcnt >= 0 &&
+ PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
+ ctx.fmtpos++;
+ ctx.fmtcnt--;
}
- if (width > len) {
- sublen = width - len;
- FILL(writer.kind, writer.data, ' ', writer.pos, sublen);
- writer.pos += sublen;
+ if (ctx.fmtcnt < 0) {
+ ctx.fmtpos--;
+ ctx.writer.overallocate = 0;
}
-nextarg:
- if (dict && (argidx < arglen) && c != '%') {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
+ if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
+ nonfmtpos, ctx.fmtpos) < 0)
goto onError;
- }
- Py_CLEAR(temp);
- } /* '%' */
- } /* until end */
- if (argidx < arglen && !dict) {
+ }
+ else {
+ ctx.fmtpos++;
+ if (unicode_format_arg(&ctx) == -1)
+ goto onError;
+ }
+ }
+
+ if (ctx.argidx < ctx.arglen && !ctx.dict) {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during string formatting");
goto onError;
}
- if (args_owned) {
- Py_DECREF(args);
+ if (ctx.args_owned) {
+ Py_DECREF(ctx.args);
}
- Py_DECREF(uformat);
- Py_XDECREF(temp);
- Py_XDECREF(second);
- return _PyUnicodeWriter_Finish(&writer);
+ Py_DECREF(ctx.fmtstr);
+ return _PyUnicodeWriter_Finish(&ctx.writer);
onError:
- Py_DECREF(uformat);
- Py_XDECREF(temp);
- Py_XDECREF(second);
- _PyUnicodeWriter_Dealloc(&writer);
- if (args_owned) {
- Py_DECREF(args);
+ Py_DECREF(ctx.fmtstr);
+ _PyUnicodeWriter_Dealloc(&ctx.writer);
+ if (ctx.args_owned) {
+ Py_DECREF(ctx.args);
}
return NULL;
}
@@ -14236,12 +14650,12 @@ PyUnicode_InternInPlace(PyObject **p)
t = PyDict_GetItem(interned, s);
Py_END_ALLOW_RECURSION
- if (t) {
- Py_INCREF(t);
- Py_DECREF(*p);
- *p = t;
- return;
- }
+ if (t) {
+ Py_INCREF(t);
+ Py_DECREF(*p);
+ *p = t;
+ return;
+ }
PyThreadState_GET()->recursion_critical = 1;
if (PyDict_SetItem(interned, s, s) < 0) {
diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h
index 46a92bb..1009bb3 100644
--- a/Objects/unicodetype_db.h
+++ b/Objects/unicodetype_db.h
@@ -1919,7 +1919,7 @@ static unsigned short index2[] = {
246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 95, 245, 26, 22, 23, 246,
247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 0, 95, 95, 95, 95, 95, 95, 95,
95, 95, 95, 95, 95, 95, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 6, 6, 6, 6, 25, 6, 6, 6, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 113, 5, 5,
@@ -2593,10 +2593,10 @@ static unsigned short index2[] = {
0, 0, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
- 141, 141, 141, 141, 141, 141, 141, 141, 141, 252, 252, 141, 141, 141,
141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
- 141, 141, 141, 252, 252, 141, 141, 141, 141, 141, 141, 141, 141, 141,
+ 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
+ 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
141, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
@@ -2925,6 +2925,9 @@ static unsigned short index2[] = {
double _PyUnicode_ToNumeric(Py_UCS4 ch)
{
switch (ch) {
+ case 0x12456:
+ case 0x12457:
+ return (double) -1.0;
case 0x0F33:
return (double) -1.0/2.0;
case 0x0030:
@@ -3427,6 +3430,8 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
return (double) 20000.0;
case 0x3251:
return (double) 21.0;
+ case 0x12432:
+ return (double) 216000.0;
case 0x3252:
return (double) 22.0;
case 0x3253:
@@ -3721,6 +3726,8 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
return (double) 42.0;
case 0x32B8:
return (double) 43.0;
+ case 0x12433:
+ return (double) 432000.0;
case 0x32B9:
return (double) 44.0;
case 0x32BA:
diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c
index b49dcee..c083f8f 100644
--- a/Objects/weakrefobject.c
+++ b/Objects/weakrefobject.c
@@ -338,6 +338,11 @@ weakref___init__(PyObject *self, PyObject *args, PyObject *kwargs)
}
+static PyMemberDef weakref_members[] = {
+ {"__callback__", T_OBJECT, offsetof(PyWeakReference, wr_callback), READONLY},
+ {NULL} /* Sentinel */
+};
+
PyTypeObject
_PyWeakref_RefType = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
@@ -369,7 +374,7 @@ _PyWeakref_RefType = {
0, /*tp_iter*/
0, /*tp_iternext*/
0, /*tp_methods*/
- 0, /*tp_members*/
+ weakref_members, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/