diff options
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/abstract.c | 82 | ||||
-rw-r--r-- | Objects/bufferobject.c | 89 | ||||
-rw-r--r-- | Objects/cellobject.c | 2 | ||||
-rw-r--r-- | Objects/classobject.c | 11 | ||||
-rw-r--r-- | Objects/codeobject.c | 1 | ||||
-rw-r--r-- | Objects/complexobject.c | 14 | ||||
-rw-r--r-- | Objects/descrobject.c | 25 | ||||
-rw-r--r-- | Objects/dictnotes.txt | 2 | ||||
-rw-r--r-- | Objects/dictobject.c | 7 | ||||
-rw-r--r-- | Objects/exceptions.c | 35 | ||||
-rw-r--r-- | Objects/fileobject.c | 23 | ||||
-rw-r--r-- | Objects/frameobject.c | 15 | ||||
-rw-r--r-- | Objects/funcobject.c | 6 | ||||
-rw-r--r-- | Objects/listobject.c | 4 | ||||
-rw-r--r-- | Objects/listsort.txt | 2 | ||||
-rw-r--r-- | Objects/longobject.c | 8 | ||||
-rw-r--r-- | Objects/object.c | 30 | ||||
-rw-r--r-- | Objects/setobject.c | 52 | ||||
-rw-r--r-- | Objects/stringobject.c | 335 | ||||
-rw-r--r-- | Objects/typeobject.c | 53 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 295 | ||||
-rw-r--r-- | Objects/weakrefobject.c | 4 |
22 files changed, 700 insertions, 395 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c index 91ba8c2..de88457 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -9,9 +9,9 @@ /* Shorthands to return certain errors */ static PyObject * -type_error(const char *msg) +type_error(const char *msg, PyObject *obj) { - PyErr_SetString(PyExc_TypeError, msg); + PyErr_Format(PyExc_TypeError, msg, obj->ob_type->tp_name); return NULL; } @@ -125,10 +125,11 @@ PyObject_GetItem(PyObject *o, PyObject *key) return PySequence_GetItem(o, key_value); } else if (o->ob_type->tp_as_sequence->sq_item) - return type_error("sequence index must be integer"); + return type_error("sequence index must " + "be integer, not '%.200s'", key); } - return type_error("unsubscriptable object"); + return type_error("'%.200s' object is unsubscriptable", o); } int @@ -153,12 +154,13 @@ PyObject_SetItem(PyObject *o, PyObject *key, PyObject *value) return PySequence_SetItem(o, key_value, value); } else if (o->ob_type->tp_as_sequence->sq_ass_item) { - type_error("sequence index must be integer"); + type_error("sequence index must be " + "integer, not '%.200s'", key); return -1; } } - type_error("object does not support item assignment"); + type_error("'%.200s' object does not support item assignment", o); return -1; } @@ -184,12 +186,13 @@ PyObject_DelItem(PyObject *o, PyObject *key) return PySequence_DelItem(o, key_value); } else if (o->ob_type->tp_as_sequence->sq_ass_item) { - type_error("sequence index must be integer"); + type_error("sequence index must be " + "integer, not '%.200s'", key); return -1; } } - type_error("object does not support item deletion"); + type_error("'%.200s' object does not support item deletion", o); return -1; } @@ -408,7 +411,8 @@ static PyObject * binop_type_error(PyObject *v, PyObject *w, const char *op_name) { PyErr_Format(PyExc_TypeError, - "unsupported operand type(s) for %s: '%s' and '%s'", + "unsupported operand type(s) for %.100s: " + "'%.100s' and '%.100s'", op_name, v->ob_type->tp_name, w->ob_type->tp_name); @@ -514,14 +518,14 @@ ternary_op(PyObject *v, PyErr_Format( PyExc_TypeError, "unsupported operand type(s) for ** or pow(): " - "'%s' and '%s'", + "'%.100s' and '%.100s'", v->ob_type->tp_name, w->ob_type->tp_name); else PyErr_Format( PyExc_TypeError, "unsupported operand type(s) for pow(): " - "'%s', '%s', '%s'", + "'%.100s', '%.100s', '%.100s'", v->ob_type->tp_name, w->ob_type->tp_name, z->ob_type->tp_name); @@ -568,8 +572,8 @@ sequence_repeat(ssizeargfunc repeatfunc, PyObject *seq, PyObject *n) return NULL; } else { - return type_error( - "can't multiply sequence by non-int"); + return type_error("can't multiply sequence by " + "non-int of type '%.200s'", n); } return (*repeatfunc)(seq, count); } @@ -776,7 +780,7 @@ PyNumber_Negative(PyObject *o) if (m && m->nb_negative) return (*m->nb_negative)(o); - return type_error("bad operand type for unary -"); + return type_error("bad operand type for unary -: '%.200s'", o); } PyObject * @@ -790,7 +794,7 @@ PyNumber_Positive(PyObject *o) if (m && m->nb_positive) return (*m->nb_positive)(o); - return type_error("bad operand type for unary +"); + return type_error("bad operand type for unary +: '%.200s'", o); } PyObject * @@ -804,7 +808,7 @@ PyNumber_Invert(PyObject *o) if (m && m->nb_invert) return (*m->nb_invert)(o); - return type_error("bad operand type for unary ~"); + return type_error("bad operand type for unary ~: '%.200s'", o); } PyObject * @@ -818,7 +822,7 @@ PyNumber_Absolute(PyObject *o) if (m && m->nb_absolute) return m->nb_absolute(o); - return type_error("bad operand type for abs()"); + return type_error("bad operand type for abs(): '%.200s'", o); } /* Add a check for embedded NULL-bytes in the argument. */ @@ -898,7 +902,8 @@ PyNumber_Int(PyObject *o) if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) return int_from_string((char*)buffer, buffer_len); - return type_error("int() argument must be a string or a number"); + return type_error("int() argument must be a string or a " + "number, not '%.200s'", o); } /* Add a check for embedded NULL-bytes in the argument. */ @@ -960,7 +965,8 @@ PyNumber_Long(PyObject *o) if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) return long_from_string(buffer, buffer_len); - return type_error("long() argument must be a string or a number"); + return type_error("long() argument must be a string or a " + "number, not '%.200s'", o); } PyObject * @@ -1014,7 +1020,7 @@ PySequence_Size(PyObject *s) if (m && m->sq_length) return m->sq_length(s); - type_error("len() of unsized object"); + type_error("object of type '%.200s' has no len()", s); return -1; } @@ -1047,7 +1053,7 @@ PySequence_Concat(PyObject *s, PyObject *o) return result; Py_DECREF(result); } - return type_error("object can't be concatenated"); + return type_error("'%.200s' object can't be concatenated", s); } PyObject * @@ -1076,7 +1082,7 @@ PySequence_Repeat(PyObject *o, Py_ssize_t count) return result; Py_DECREF(result); } - return type_error("object can't be repeated"); + return type_error("'%.200s' object can't be repeated", o); } PyObject * @@ -1100,7 +1106,7 @@ PySequence_InPlaceConcat(PyObject *s, PyObject *o) return result; Py_DECREF(result); } - return type_error("object can't be concatenated"); + return type_error("'%.200s' object can't be concatenated", s); } PyObject * @@ -1129,7 +1135,7 @@ PySequence_InPlaceRepeat(PyObject *o, Py_ssize_t count) return result; Py_DECREF(result); } - return type_error("object can't be repeated"); + return type_error("'%.200s' object can't be repeated", o); } PyObject * @@ -1153,7 +1159,7 @@ PySequence_GetItem(PyObject *s, Py_ssize_t i) return m->sq_item(s, i); } - return type_error("unindexable object"); + return type_error("'%.200s' object is unindexable", s); } PyObject * @@ -1188,7 +1194,7 @@ PySequence_GetSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2) return res; } - return type_error("unsliceable object"); + return type_error("'%.200s' object is unsliceable", s); } int @@ -1214,7 +1220,7 @@ PySequence_SetItem(PyObject *s, Py_ssize_t i, PyObject *o) return m->sq_ass_item(s, i, o); } - type_error("object does not support item assignment"); + type_error("'%.200s' object does not support item assignment", s); return -1; } @@ -1241,7 +1247,7 @@ PySequence_DelItem(PyObject *s, Py_ssize_t i) return m->sq_ass_item(s, i, (PyObject *)NULL); } - type_error("object doesn't support item deletion"); + type_error("'%.200s' object doesn't support item deletion", s); return -1; } @@ -1280,7 +1286,7 @@ PySequence_SetSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2, PyObject *o) return res; } - type_error("object doesn't support slice assignment"); + type_error("'%.200s' object doesn't support slice assignment", s); return -1; } @@ -1309,7 +1315,7 @@ PySequence_DelSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2) } return m->sq_ass_slice(s, i1, i2, (PyObject *)NULL); } - type_error("object doesn't support slice deletion"); + type_error("'%.200s' object doesn't support slice deletion", s); return -1; } @@ -1440,7 +1446,7 @@ PySequence_Fast(PyObject *v, const char *m) it = PyObject_GetIter(v); if (it == NULL) { if (PyErr_ExceptionMatches(PyExc_TypeError)) - return type_error(m); + PyErr_SetString(PyExc_TypeError, m); return NULL; } @@ -1470,7 +1476,7 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation) it = PyObject_GetIter(seq); if (it == NULL) { - type_error("iterable argument required"); + type_error("argument of type '%.200s' is not iterable", seq); return -1; } @@ -1603,7 +1609,7 @@ PyMapping_Size(PyObject *o) if (m && m->mp_length) return m->mp_length(o); - type_error("len() of unsized object"); + type_error("object of type '%.200s' has no len()", o); return -1; } @@ -1701,7 +1707,7 @@ PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) "NULL result without error in PyObject_Call"); return result; } - PyErr_Format(PyExc_TypeError, "'%s' object is not callable", + PyErr_Format(PyExc_TypeError, "'%.200s' object is not callable", func->ob_type->tp_name); return NULL; } @@ -1790,7 +1796,7 @@ PyObject_CallMethod(PyObject *o, char *name, char *format, ...) } if (!PyCallable_Check(func)) { - type_error("call of non-callable attribute"); + type_error("attribute of type '%.200s' is not callable", func); goto exit; } @@ -1829,7 +1835,7 @@ _PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...) } if (!PyCallable_Check(func)) { - type_error("call of non-callable attribute"); + type_error("attribute of type '%.200s' is not callable", func); goto exit; } @@ -2170,9 +2176,7 @@ PyObject_GetIter(PyObject *o) if (f == NULL) { if (PySequence_Check(o)) return PySeqIter_New(o); - PyErr_SetString(PyExc_TypeError, - "iteration over non-sequence"); - return NULL; + return type_error("'%.200s' object is not iterable", o); } else { PyObject *res = (*f)(o); diff --git a/Objects/bufferobject.c b/Objects/bufferobject.c index d2597b9..3a0e3d5 100644 --- a/Objects/bufferobject.c +++ b/Objects/bufferobject.c @@ -15,8 +15,16 @@ typedef struct { } PyBufferObject; +enum buffer_t { + READ_BUFFER, + WRITE_BUFFER, + CHAR_BUFFER, + ANY_BUFFER, +}; + static int -get_buf(PyBufferObject *self, void **ptr, Py_ssize_t *size) +get_buf(PyBufferObject *self, void **ptr, Py_ssize_t *size, + enum buffer_t buffer_type) { if (self->b_base == NULL) { assert (ptr != NULL); @@ -25,17 +33,43 @@ get_buf(PyBufferObject *self, void **ptr, Py_ssize_t *size) } else { Py_ssize_t count, offset; - readbufferproc proc; + readbufferproc proc = 0; PyBufferProcs *bp = self->b_base->ob_type->tp_as_buffer; if ((*bp->bf_getsegcount)(self->b_base, NULL) != 1) { PyErr_SetString(PyExc_TypeError, "single-segment buffer object expected"); return 0; } - if (self->b_readonly) - proc = bp->bf_getreadbuffer; - else - proc = (readbufferproc)bp->bf_getwritebuffer; + if ((buffer_type == READ_BUFFER) || + ((buffer_type == ANY_BUFFER) && self->b_readonly)) + proc = bp->bf_getreadbuffer; + else if ((buffer_type == WRITE_BUFFER) || + (buffer_type == ANY_BUFFER)) + proc = (readbufferproc)bp->bf_getwritebuffer; + else if (buffer_type == CHAR_BUFFER) { + proc = (readbufferproc)bp->bf_getcharbuffer; + } + if (!proc) { + char *buffer_type_name; + switch (buffer_type) { + case READ_BUFFER: + buffer_type_name = "read"; + break; + case WRITE_BUFFER: + buffer_type_name = "write"; + break; + case CHAR_BUFFER: + buffer_type_name = "char"; + break; + default: + buffer_type_name = "no"; + break; + } + PyErr_Format(PyExc_TypeError, + "%s buffer type not available", + buffer_type_name); + return 0; + } if ((count = (*proc)(self->b_base, 0, ptr)) < 0) return 0; /* apply constraints to the start/end */ @@ -224,15 +258,15 @@ buffer_compare(PyBufferObject *self, PyBufferObject *other) Py_ssize_t len_self, len_other, min_len; int cmp; - if (!get_buf(self, &p1, &len_self)) + if (!get_buf(self, &p1, &len_self, ANY_BUFFER)) return -1; - if (!get_buf(other, &p2, &len_other)) + if (!get_buf(other, &p2, &len_other, ANY_BUFFER)) return -1; min_len = (len_self < len_other) ? len_self : len_other; if (min_len > 0) { cmp = memcmp(p1, p2, min_len); if (cmp != 0) - return cmp; + return cmp < 0 ? -1 : 1; } return (len_self < len_other) ? -1 : (len_self > len_other) ? 1 : 0; } @@ -284,7 +318,7 @@ buffer_hash(PyBufferObject *self) return -1; } - if (!get_buf(self, &ptr, &size)) + if (!get_buf(self, &ptr, &size, ANY_BUFFER)) return -1; p = (unsigned char *) ptr; len = size; @@ -303,7 +337,7 @@ buffer_str(PyBufferObject *self) { void *ptr; Py_ssize_t size; - if (!get_buf(self, &ptr, &size)) + if (!get_buf(self, &ptr, &size, ANY_BUFFER)) return NULL; return PyString_FromStringAndSize((const char *)ptr, size); } @@ -315,7 +349,7 @@ buffer_length(PyBufferObject *self) { void *ptr; Py_ssize_t size; - if (!get_buf(self, &ptr, &size)) + if (!get_buf(self, &ptr, &size, ANY_BUFFER)) return -1; return size; } @@ -344,7 +378,7 @@ buffer_concat(PyBufferObject *self, PyObject *other) return NULL; } - if (!get_buf(self, &ptr1, &size)) + if (!get_buf(self, &ptr1, &size, ANY_BUFFER)) return NULL; /* optimize special case */ @@ -380,7 +414,7 @@ buffer_repeat(PyBufferObject *self, Py_ssize_t count) if ( count < 0 ) count = 0; - if (!get_buf(self, &ptr, &size)) + if (!get_buf(self, &ptr, &size, ANY_BUFFER)) return NULL; ob = PyString_FromStringAndSize(NULL, size * count); if ( ob == NULL ) @@ -404,7 +438,7 @@ buffer_item(PyBufferObject *self, Py_ssize_t idx) { void *ptr; Py_ssize_t size; - if (!get_buf(self, &ptr, &size)) + if (!get_buf(self, &ptr, &size, ANY_BUFFER)) return NULL; if ( idx < 0 || idx >= size ) { PyErr_SetString(PyExc_IndexError, "buffer index out of range"); @@ -418,7 +452,7 @@ buffer_slice(PyBufferObject *self, Py_ssize_t left, Py_ssize_t right) { void *ptr; Py_ssize_t size; - if (!get_buf(self, &ptr, &size)) + if (!get_buf(self, &ptr, &size, ANY_BUFFER)) return NULL; if ( left < 0 ) left = 0; @@ -446,7 +480,7 @@ buffer_ass_item(PyBufferObject *self, Py_ssize_t idx, PyObject *other) return -1; } - if (!get_buf(self, &ptr1, &size)) + if (!get_buf(self, &ptr1, &size, ANY_BUFFER)) return -1; if (idx < 0 || idx >= size) { @@ -513,7 +547,7 @@ buffer_ass_slice(PyBufferObject *self, Py_ssize_t left, Py_ssize_t right, PyObje "single-segment buffer object expected"); return -1; } - if (!get_buf(self, &ptr1, &size)) + if (!get_buf(self, &ptr1, &size, ANY_BUFFER)) return -1; if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 ) return -1; @@ -552,7 +586,7 @@ buffer_getreadbuf(PyBufferObject *self, Py_ssize_t idx, void **pp) "accessing non-existent buffer segment"); return -1; } - if (!get_buf(self, pp, &size)) + if (!get_buf(self, pp, &size, READ_BUFFER)) return -1; return size; } @@ -560,12 +594,22 @@ buffer_getreadbuf(PyBufferObject *self, Py_ssize_t idx, void **pp) static Py_ssize_t buffer_getwritebuf(PyBufferObject *self, Py_ssize_t idx, void **pp) { + Py_ssize_t size; + if ( self->b_readonly ) { PyErr_SetString(PyExc_TypeError, "buffer is read-only"); return -1; } - return buffer_getreadbuf(self, idx, pp); + + if ( idx != 0 ) { + PyErr_SetString(PyExc_SystemError, + "accessing non-existent buffer segment"); + return -1; + } + if (!get_buf(self, pp, &size, WRITE_BUFFER)) + return -1; + return size; } static Py_ssize_t @@ -573,7 +617,7 @@ buffer_getsegcount(PyBufferObject *self, Py_ssize_t *lenp) { void *ptr; Py_ssize_t size; - if (!get_buf(self, &ptr, &size)) + if (!get_buf(self, &ptr, &size, ANY_BUFFER)) return -1; if (lenp) *lenp = size; @@ -590,13 +634,12 @@ buffer_getcharbuf(PyBufferObject *self, Py_ssize_t idx, const char **pp) "accessing non-existent buffer segment"); return -1; } - if (!get_buf(self, &ptr, &size)) + if (!get_buf(self, &ptr, &size, CHAR_BUFFER)) return -1; *pp = (const char *)ptr; return size; } - static PySequenceMethods buffer_as_sequence = { (lenfunc)buffer_length, /*sq_length*/ (binaryfunc)buffer_concat, /*sq_concat*/ diff --git a/Objects/cellobject.c b/Objects/cellobject.c index da48dea..65a29aa 100644 --- a/Objects/cellobject.c +++ b/Objects/cellobject.c @@ -8,6 +8,8 @@ PyCell_New(PyObject *obj) PyCellObject *op; op = (PyCellObject *)PyObject_GC_New(PyCellObject, &PyCell_Type); + if (op == NULL) + return NULL; op->ob_ref = obj; Py_XINCREF(obj); diff --git a/Objects/classobject.c b/Objects/classobject.c index fa8a3a9..2d80073 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -933,11 +933,9 @@ instance_hash(PyInstanceObject *inst) Py_DECREF(func); if (res == NULL) return -1; - if (PyInt_Check(res)) { - outcome = PyInt_AsLong(res); - if (outcome == -1) - outcome = -2; - } + if (PyInt_Check(res) || PyLong_Check(res)) + /* This already converts a -1 result to -2. */ + outcome = res->ob_type->tp_hash(res); else { PyErr_SetString(PyExc_TypeError, "__hash__() should return an int"); @@ -1367,10 +1365,13 @@ half_binop(PyObject *v, PyObject *w, char *opname, binaryfunc thisfunc, * argument */ result = generic_binary_op(v1, w, opname); } else { + if (Py_EnterRecursiveCall(" after coercion")) + return NULL; if (swapped) result = (thisfunc)(w, v1); else result = (thisfunc)(v1, w); + Py_LeaveRecursiveCall(); } Py_DECREF(coerced); return result; diff --git a/Objects/codeobject.c b/Objects/codeobject.c index a9bcb01..89871d6 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -556,6 +556,7 @@ PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds) the line increments here, treating them as byte increments gets confusing, to say the least. */ + bounds->ap_lower = 0; while (size > 0) { if (addr + *p > lasti) break; diff --git a/Objects/complexobject.c b/Objects/complexobject.c index c6e3343..aa8fc81 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -274,16 +274,16 @@ complex_to_buf(char *buf, int bufsz, PyComplexObject *v, int precision) { char format[32]; if (v->cval.real == 0.) { - PyOS_snprintf(format, 32, "%%.%ig", precision); - PyOS_ascii_formatd(buf, bufsz, format, v->cval.imag); - strncat(buf, "j", bufsz); + PyOS_snprintf(format, sizeof(format), "%%.%ig", precision); + PyOS_ascii_formatd(buf, bufsz - 1, format, v->cval.imag); + strncat(buf, "j", 1); } else { char re[64], im[64]; /* Format imaginary part with sign, real part without */ - PyOS_snprintf(format, 32, "%%.%ig", precision); - PyOS_ascii_formatd(re, 64, format, v->cval.real); - PyOS_snprintf(format, 32, "%%+.%ig", precision); - PyOS_ascii_formatd(im, 64, format, v->cval.imag); + PyOS_snprintf(format, sizeof(format), "%%.%ig", precision); + PyOS_ascii_formatd(re, sizeof(re), format, v->cval.real); + PyOS_snprintf(format, sizeof(format), "%%+.%ig", precision); + PyOS_ascii_formatd(im, sizeof(im), format, v->cval.imag); PyOS_snprintf(buf, bufsz, "(%s%sj)", re, im); } } diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 606ef05..914b6d3 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -892,10 +892,12 @@ typedef struct { static void wrapper_dealloc(wrapperobject *wp) { - _PyObject_GC_UNTRACK(wp); + PyObject_GC_UnTrack(wp); + Py_TRASHCAN_SAFE_BEGIN(wp) Py_XDECREF(wp->descr); Py_XDECREF(wp->self); PyObject_GC_Del(wp); + Py_TRASHCAN_SAFE_END(wp) } static int @@ -1174,7 +1176,6 @@ static int property_init(PyObject *self, PyObject *args, PyObject *kwds) { PyObject *get = NULL, *set = NULL, *del = NULL, *doc = NULL; - PyObject *get_doc = NULL; static char *kwlist[] = {"fget", "fset", "fdel", "doc", 0}; propertyobject *gs = (propertyobject *)self; @@ -1189,20 +1190,22 @@ property_init(PyObject *self, PyObject *args, PyObject *kwds) if (del == Py_None) del = NULL; - /* if no docstring given and the getter has one, use that one */ - if ((doc == NULL || doc == Py_None) && get != NULL && - PyObject_HasAttrString(get, "__doc__")) { - if (!(get_doc = PyObject_GetAttrString(get, "__doc__"))) - return -1; - Py_DECREF(get_doc); /* it is INCREF'd again below */ - doc = get_doc; - } - Py_XINCREF(get); Py_XINCREF(set); Py_XINCREF(del); Py_XINCREF(doc); + /* if no docstring given and the getter has one, use that one */ + if ((doc == NULL || doc == Py_None) && get != NULL) { + PyObject *get_doc = PyObject_GetAttrString(get, "__doc__"); + if (get_doc != NULL) { + Py_XDECREF(doc); + doc = get_doc; /* get_doc already INCREF'd by GetAttr */ + } else { + PyErr_Clear(); + } + } + gs->prop_get = get; gs->prop_set = set; gs->prop_del = del; diff --git a/Objects/dictnotes.txt b/Objects/dictnotes.txt index cb46cb1..b0e59a7 100644 --- a/Objects/dictnotes.txt +++ b/Objects/dictnotes.txt @@ -243,7 +243,7 @@ kept just for iteration. Caching Lookups --------------- The idea is to exploit key access patterns by anticipating future lookups -based of previous lookups. +based on previous lookups. The simplest incarnation is to save the most recently accessed entry. This gives optimal performance for use cases where every get is followed diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f9e45fd..f3b6b7f 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -532,7 +532,7 @@ dictresize(dictobject *mp, Py_ssize_t minused) /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors * that may occur (originally dicts supported only string keys, and exceptions * weren't possible). So, while the original intent was that a NULL return - * meant the key wasn't present, it reality it can mean that, or that an error + * meant the key wasn't present, in reality it can mean that, or that an error * (suppressed) occurred while computing the key's hash, or that some error * (suppressed) occurred when comparing keys in the dict's internal probe * sequence. A nasty example of the latter is when a Python-coded comparison @@ -561,7 +561,7 @@ PyDict_GetItem(PyObject *op, PyObject *key) /* We can arrive here with a NULL tstate during initialization: try running "python -Wi" for an example related to string interning. Let's just hope that no exception occurs then... */ - tstate = PyThreadState_GET(); + tstate = _PyThreadState_Current; if (tstate != NULL && tstate->curexc_type != NULL) { /* preserve the existing exception */ PyObject *err_type, *err_value, *err_tb; @@ -599,6 +599,8 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) PyErr_BadInternalCall(); return -1; } + assert(key); + assert(value); mp = (dictobject *)op; if (PyString_CheckExact(key)) { hash = ((PyStringObject *)key)->ob_shash; @@ -647,6 +649,7 @@ PyDict_DelItem(PyObject *op, PyObject *key) PyErr_BadInternalCall(); return -1; } + assert(key); if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 369365b..be9627c 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -1967,6 +1967,29 @@ static PyMethodDef functions[] = { if (PyDict_SetItemString(bdict, # TYPE, PyExc_ ## TYPE)) \ Py_FatalError("Module dictionary insertion problem."); +#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) +/* crt variable checking in VisualStudio .NET 2005 */ +#include <crtdbg.h> + +static int prevCrtReportMode; +static _invalid_parameter_handler prevCrtHandler; + +/* Invalid parameter handler. Sets a ValueError exception */ +static void +InvalidParameterHandler( + const wchar_t * expression, + const wchar_t * function, + const wchar_t * file, + unsigned int line, + uintptr_t pReserved) +{ + /* Do nothing, allow execution to continue. Usually this + * means that the CRT will set errno to EINVAL + */ +} +#endif + + PyMODINIT_FUNC _PyExc_Init(void) { @@ -2096,6 +2119,13 @@ _PyExc_Init(void) Py_FatalError("Cannot pre-allocate MemoryError instance\n"); Py_DECREF(bltinmod); + +#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) + /* Set CRT argument error handler */ + prevCrtHandler = _set_invalid_parameter_handler(InvalidParameterHandler); + /* turn off assertions in debug mode */ + prevCrtReportMode = _CrtSetReportMode(_CRT_ASSERT, 0); +#endif } void @@ -2103,4 +2133,9 @@ _PyExc_Fini(void) { Py_XDECREF(PyExc_MemoryErrorInst); PyExc_MemoryErrorInst = NULL; +#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) + /* reset CRT error handling */ + _set_invalid_parameter_handler(prevCrtHandler); + _CrtSetReportMode(_CRT_ASSERT, prevCrtReportMode); +#endif } diff --git a/Objects/fileobject.c b/Objects/fileobject.c index a4a43ed..fb8a542 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -4,9 +4,9 @@ #include "Python.h" #include "structmember.h" -#ifndef DONT_HAVE_SYS_TYPES_H +#ifdef HAVE_SYS_TYPES_H #include <sys/types.h> -#endif /* DONT_HAVE_SYS_TYPES_H */ +#endif /* HAVE_SYS_TYPES_H */ #ifdef MS_WINDOWS #define fileno _fileno @@ -103,6 +103,7 @@ static PyObject * fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, int (*close)(FILE *)) { + assert(name != NULL); assert(f != NULL); assert(PyFile_Check(f)); assert(f->f_fp == NULL); @@ -111,7 +112,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, Py_DECREF(f->f_mode); Py_DECREF(f->f_encoding); - Py_INCREF (name); + Py_INCREF(name); f->f_name = name; f->f_mode = PyString_FromString(mode); @@ -126,7 +127,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, Py_INCREF(Py_None); f->f_encoding = Py_None; - if (f->f_name == NULL || f->f_mode == NULL) + if (f->f_mode == NULL) return NULL; f->f_fp = fp; f = dircheck(f); @@ -241,13 +242,15 @@ open_the_file(PyFileObject *f, char *name, char *mode) } if (f->f_fp == NULL) { -#ifdef _MSC_VER +#if defined _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__)) /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings, * across all Windows flavors. When it sets EINVAL varies * across Windows flavors, the exact conditions aren't * documented, and the answer lies in the OS's implementation * of Win32's CreateFile function (whose source is secret). * Seems the best we can do is map EINVAL to ENOENT. + * Starting with Visual Studio .NET 2005, EINVAL is correctly + * set by our CRT error handler (set in exceptions.c.) */ if (errno == 0) /* bad mode string */ errno = EINVAL; @@ -276,7 +279,9 @@ PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *)) PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type, NULL, NULL); if (f != NULL) { - PyObject *o_name = PyString_FromString(name); + PyObject *o_name = PyString_FromString(name); + if (o_name == NULL) + return NULL; if (fill_file_fields(f, fp, o_name, mode, close) == NULL) { Py_DECREF(f); f = NULL; @@ -409,11 +414,11 @@ file_repr(PyFileObject *f) if (PyUnicode_Check(f->f_name)) { #ifdef Py_USING_UNICODE PyObject *ret = NULL; - PyObject *name; - name = PyUnicode_AsUnicodeEscapeString(f->f_name); + PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name); + const char *name_str = name ? PyString_AsString(name) : "?"; ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>", f->f_fp == NULL ? "closed" : "open", - PyString_AsString(name), + name_str, PyString_AsString(f->f_mode), f); Py_XDECREF(name); diff --git a/Objects/frameobject.c b/Objects/frameobject.c index fcb5e4e..3a073b6 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -20,7 +20,6 @@ static PyMemberDef frame_memberlist[] = { {"f_builtins", T_OBJECT, OFF(f_builtins),RO}, {"f_globals", T_OBJECT, OFF(f_globals), RO}, {"f_lasti", T_INT, OFF(f_lasti), RO}, - {"f_restricted",T_INT, OFF(f_restricted),RO}, {"f_exc_type", T_OBJECT, OFF(f_exc_type)}, {"f_exc_value", T_OBJECT, OFF(f_exc_value)}, {"f_exc_traceback", T_OBJECT, OFF(f_exc_traceback)}, @@ -341,11 +340,18 @@ frame_settrace(PyFrameObject *f, PyObject* v, void *closure) return 0; } +static PyObject * +frame_getrestricted(PyFrameObject *f, void *closure) +{ + return PyBool_FromLong(PyFrame_IsRestricted(f)); +} + static PyGetSetDef frame_getsetlist[] = { {"f_locals", (getter)frame_getlocals, NULL, NULL}, {"f_lineno", (getter)frame_getlineno, (setter)frame_setlineno, NULL}, {"f_trace", (getter)frame_gettrace, (setter)frame_settrace, NULL}, + {"f_restricted",(getter)frame_getrestricted,NULL, NULL}, {0} }; @@ -425,7 +431,7 @@ frame_dealloc(PyFrameObject *f) Py_CLEAR(f->f_exc_traceback); co = f->f_code; - if (co != NULL && co->co_zombieframe == NULL) + if (co->co_zombieframe == NULL) co->co_zombieframe = f; else if (numfree < MAXFREELIST) { ++numfree; @@ -435,7 +441,7 @@ frame_dealloc(PyFrameObject *f) else PyObject_GC_Del(f); - Py_XDECREF(co); + Py_DECREF(co); Py_TRASHCAN_SAFE_END(f) } @@ -636,6 +642,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, f->f_trace = NULL; f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL; } + f->f_stacktop = f->f_valuestack; f->f_builtins = builtins; Py_XINCREF(back); f->f_back = back; @@ -664,10 +671,8 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, f->f_lasti = -1; f->f_lineno = code->co_firstlineno; - f->f_restricted = (builtins != tstate->interp->builtins); f->f_iblock = 0; - f->f_stacktop = f->f_valuestack; _PyObject_GC_TRACK(f); return f; } diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 59cb519..1ba74c5 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -109,8 +109,8 @@ PyFunction_SetDefaults(PyObject *op, PyObject *defaults) } if (defaults == Py_None) defaults = NULL; - else if (PyTuple_Check(defaults)) { - Py_XINCREF(defaults); + else if (defaults && PyTuple_Check(defaults)) { + Py_INCREF(defaults); } else { PyErr_SetString(PyExc_SystemError, "non-tuple default args"); @@ -141,7 +141,7 @@ PyFunction_SetClosure(PyObject *op, PyObject *closure) if (closure == Py_None) closure = NULL; else if (PyTuple_Check(closure)) { - Py_XINCREF(closure); + Py_INCREF(closure); } else { PyErr_Format(PyExc_SystemError, diff --git a/Objects/listobject.c b/Objects/listobject.c index e128473..ea03a2a 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -108,8 +108,10 @@ PyList_New(Py_ssize_t size) op->ob_item = NULL; else { op->ob_item = (PyObject **) PyMem_MALLOC(nbytes); - if (op->ob_item == NULL) + if (op->ob_item == NULL) { + Py_DECREF(op); return PyErr_NoMemory(); + } memset(op->ob_item, 0, nbytes); } op->ob_size = size; diff --git a/Objects/listsort.txt b/Objects/listsort.txt index 68e918d..9226984 100644 --- a/Objects/listsort.txt +++ b/Objects/listsort.txt @@ -494,7 +494,7 @@ and its followup(s). An earlier paper called the same strategy 467-474, Austin, Texas, 25-27 January 1993. and it probably dates back to an earlier paper by Bentley and Yao. The -McIlory paper in particular has good analysis of a mergesort that's +McIlroy paper in particular has good analysis of a mergesort that's probably strongly related to this one in its galloping strategy. diff --git a/Objects/longobject.c b/Objects/longobject.c index 45166fe..9d3685e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1203,7 +1203,7 @@ long_format(PyObject *aa, int base, int addL) register PyLongObject *a = (PyLongObject *)aa; PyStringObject *str; Py_ssize_t i; - const Py_ssize_t size_a = ABS(a->ob_size); + Py_ssize_t size_a; char *p; int bits; char sign = '\0'; @@ -1213,6 +1213,7 @@ long_format(PyObject *aa, int base, int addL) return NULL; } assert(base >= 2 && base <= 36); + size_a = ABS(a->ob_size); /* Compute a rough upper bound for the length of the string */ i = base; @@ -3133,9 +3134,8 @@ long_bitwise(PyLongObject *a, : MAX(size_a, size_b); z = _PyLong_New(size_z); if (z == NULL) { - Py_XDECREF(a); - Py_XDECREF(b); - Py_XDECREF(z); + Py_DECREF(a); + Py_DECREF(b); return NULL; } diff --git a/Objects/object.c b/Objects/object.c index 0af7989..ff13574 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1067,7 +1067,8 @@ PyObject_Hash(PyObject *v) return _Py_HashPointer(v); /* Use address as hash value */ } /* If there's a cmp but no hash defined, the object can't be hashed */ - PyErr_SetString(PyExc_TypeError, "unhashable type"); + PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'", + v->ob_type->tp_name); return -1; } @@ -1132,8 +1133,9 @@ PyObject_GetAttr(PyObject *v, PyObject *name) else #endif { - PyErr_SetString(PyExc_TypeError, - "attribute name must be string"); + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); return NULL; } } @@ -1178,8 +1180,9 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) else #endif { - PyErr_SetString(PyExc_TypeError, - "attribute name must be string"); + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); return -1; } } @@ -1274,8 +1277,9 @@ PyObject_GenericGetAttr(PyObject *obj, PyObject *name) else #endif { - PyErr_SetString(PyExc_TypeError, - "attribute name must be string"); + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); return NULL; } } @@ -1395,8 +1399,9 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) else #endif { - PyErr_SetString(PyExc_TypeError, - "attribute name must be string"); + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); return -1; } } @@ -1445,7 +1450,7 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) if (descr == NULL) { PyErr_Format(PyExc_AttributeError, - "'%.50s' object has no attribute '%.400s'", + "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AS_STRING(name)); goto done; } @@ -1760,8 +1765,9 @@ PyObject_Dir(PyObject *arg) assert(result); if (!PyList_Check(result)) { - PyErr_SetString(PyExc_TypeError, - "Expected keys() to be a list."); + PyErr_Format(PyExc_TypeError, + "Expected keys() to be a list, not '%.200s'", + result->ob_type->tp_name); goto error; } if (PyList_Sort(result) != 0) diff --git a/Objects/setobject.c b/Objects/setobject.c index 91682a2..55a2b85 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -62,7 +62,7 @@ set_lookkey(PySetObject *so, PyObject *key, register long hash) register Py_ssize_t i; register size_t perturb; register setentry *freeslot; - register unsigned int mask = so->mask; + register size_t mask = so->mask; setentry *table = so->table; register setentry *entry; register int cmp; @@ -140,7 +140,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) register Py_ssize_t i; register size_t perturb; register setentry *freeslot; - register unsigned int mask = so->mask; + register size_t mask = so->mask; setentry *table = so->table; register setentry *entry; @@ -221,11 +221,11 @@ keys again. When entries have been deleted, the new table may actually be smaller than the old one. */ static int -set_table_resize(PySetObject *so, int minused) +set_table_resize(PySetObject *so, Py_ssize_t minused) { - int newsize; + Py_ssize_t newsize; setentry *oldtable, *newtable, *entry; - int i; + Py_ssize_t i; int is_oldtable_malloced; setentry small_copy[PySet_MINSIZE]; @@ -314,7 +314,7 @@ set_table_resize(PySetObject *so, int minused) static int set_add_entry(register PySetObject *so, setentry *entry) { - register int n_used; + register Py_ssize_t n_used; assert(so->fill <= so->mask); /* at least one empty slot */ n_used = so->used; @@ -330,7 +330,7 @@ static int set_add_key(register PySetObject *so, PyObject *key) { register long hash; - register int n_used; + register Py_ssize_t n_used; if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) { @@ -403,10 +403,10 @@ set_clear_internal(PySetObject *so) { setentry *entry, *table; int table_is_malloced; - int fill; + Py_ssize_t fill; setentry small_copy[PySet_MINSIZE]; #ifdef Py_DEBUG - int i, n; + Py_ssize_t i, n; assert (PyAnySet_Check(so)); n = so->mask + 1; @@ -465,7 +465,7 @@ set_clear_internal(PySetObject *so) /* * Iterate over a set table. Use like so: * - * int pos; + * Py_ssize_t pos; * setentry *entry; * pos = 0; # important! pos should not otherwise be changed by you * while (set_next(yourset, &pos, &entry)) { @@ -479,7 +479,7 @@ static int set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) { Py_ssize_t i; - int mask; + Py_ssize_t mask; register setentry *table; assert (PyAnySet_Check(so)); @@ -501,7 +501,7 @@ static void set_dealloc(PySetObject *so) { register setentry *entry; - int fill = so->fill; + Py_ssize_t fill = so->fill; PyObject_GC_UnTrack(so); Py_TRASHCAN_SAFE_BEGIN(so) if (so->weakreflist != NULL) @@ -570,7 +570,7 @@ static int set_merge(PySetObject *so, PyObject *otherset) { PySetObject *other; - register int i; + register Py_ssize_t i; register setentry *entry; assert (PyAnySet_Check(so)); @@ -637,7 +637,7 @@ set_contains_entry(PySetObject *so, setentry *entry) static PyObject * set_pop(PySetObject *so) { - register int i = 0; + register Py_ssize_t i = 0; register setentry *entry; PyObject *key; @@ -655,7 +655,7 @@ set_pop(PySetObject *so) */ entry = &so->table[0]; if (entry->key == NULL || entry->key == dummy) { - i = (int)entry->hash; + i = entry->hash; /* The hash field may be a real hash value, or it may be a * legit search finger, or it may be a once-legit search * finger that's out of bounds now because it wrapped around @@ -730,9 +730,9 @@ set_nohash(PyObject *self) typedef struct { PyObject_HEAD PySetObject *si_set; /* Set to NULL when iterator is exhausted */ - int si_used; - int si_pos; - long len; + Py_ssize_t si_used; + Py_ssize_t si_pos; + Py_ssize_t len; } setiterobject; static void @@ -745,7 +745,7 @@ setiter_dealloc(setiterobject *si) static PyObject * setiter_len(setiterobject *si) { - long len = 0; + Py_ssize_t len = 0; if (si->si_set != NULL && si->si_used == si->si_set->used) len = si->len; return PyInt_FromLong(len); @@ -761,7 +761,7 @@ static PyMethodDef setiter_methods[] = { static PyObject *setiter_iternext(setiterobject *si) { PyObject *key; - register int i, mask; + register Py_ssize_t i, mask; register setentry *entry; PySetObject *so = si->si_set; @@ -1007,7 +1007,7 @@ set_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static void set_swap_bodies(PySetObject *a, PySetObject *b) { - int t; + Py_ssize_t t; setentry *u; setentry *(*f)(PySetObject *so, PyObject *key, long hash); setentry tab[PySet_MINSIZE]; @@ -1380,12 +1380,12 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) while (set_next(otherset, &pos, &entry)) { int rv = set_discard_entry(so, entry); if (rv == -1) { - Py_XDECREF(otherset); + Py_DECREF(otherset); return NULL; } if (rv == DISCARD_NOTFOUND) { if (set_add_entry(so, entry) == -1) { - Py_XDECREF(otherset); + Py_DECREF(otherset); return NULL; } } @@ -1795,7 +1795,7 @@ static PyNumberMethods set_as_number = { PyDoc_STRVAR(set_doc, "set(iterable) --> set object\n\ \n\ -Build an unordered collection."); +Build an unordered collection of unique elements."); PyTypeObject PySet_Type = { PyObject_HEAD_INIT(&PyType_Type) @@ -1889,7 +1889,7 @@ static PyNumberMethods frozenset_as_number = { PyDoc_STRVAR(frozenset_doc, "frozenset(iterable) --> frozenset object\n\ \n\ -Build an immutable unordered collection."); +Build an immutable unordered collection of unique elements."); PyTypeObject PyFrozenSet_Type = { PyObject_HEAD_INIT(&PyType_Type) @@ -2061,7 +2061,7 @@ _PySet_Update(PyObject *set, PyObject *iterable) static PyObject * test_c_api(PySetObject *so) { - int count; + Py_ssize_t count; char *s; Py_ssize_t i; PyObject *elem, *dup, *t, *f, *dup2; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 6a760a2..0819c98 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1490,7 +1490,6 @@ string_split(PyStringObject *self, PyObject *args) j = i+pos; SPLIT_ADD(s, i, j); i = j + n; - } #else i = j = 0; @@ -1586,7 +1585,7 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) return NULL; i = j = len-1; - + while (maxsplit-- > 0) { RSKIP_SPACE(s, i); if (i<0) break; @@ -1786,7 +1785,7 @@ string_join(PyStringObject *self, PyObject *orig) sz += seplen; if (sz < old_sz || sz > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, - "join() is too long for a Python string"); + "join() result is too long for a Python string"); Py_DECREF(seq); return NULL; } @@ -2462,11 +2461,11 @@ return_self(PyStringObject *self) } Py_LOCAL_INLINE(Py_ssize_t) -countchar(char *target, int target_len, char c, Py_ssize_t maxcount) +countchar(const char *target, int target_len, char c, Py_ssize_t maxcount) { Py_ssize_t count=0; - char *start=target; - char *end=target+target_len; + const char *start=target; + const char *end=target+target_len; while ( (start=findchar(start, end-start, c)) != NULL ) { count++; @@ -2478,8 +2477,8 @@ countchar(char *target, int target_len, char c, Py_ssize_t maxcount) } Py_LOCAL(Py_ssize_t) -findstring(char *target, Py_ssize_t target_len, - char *pattern, Py_ssize_t pattern_len, +findstring(const char *target, Py_ssize_t target_len, + const char *pattern, Py_ssize_t pattern_len, Py_ssize_t start, Py_ssize_t end, int direction) @@ -2516,8 +2515,8 @@ findstring(char *target, Py_ssize_t target_len, } Py_LOCAL_INLINE(Py_ssize_t) -countstring(char *target, Py_ssize_t target_len, - char *pattern, Py_ssize_t pattern_len, +countstring(const char *target, Py_ssize_t target_len, + const char *pattern, Py_ssize_t pattern_len, Py_ssize_t start, Py_ssize_t end, int direction, Py_ssize_t maxcount) @@ -2570,22 +2569,21 @@ countstring(char *target, Py_ssize_t target_len, /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ Py_LOCAL(PyStringObject *) replace_interleave(PyStringObject *self, - PyStringObject *to, + const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { - char *self_s, *to_s, *result_s; - Py_ssize_t self_len, to_len, result_len; + char *self_s, *result_s; + Py_ssize_t self_len, result_len; Py_ssize_t count, i, product; PyStringObject *result; self_len = PyString_GET_SIZE(self); - to_len = PyString_GET_SIZE(to); - + /* 1 at the end plus 1 after every character */ count = self_len+1; if (maxcount < count) count = maxcount; - + /* Check for overflow */ /* result_len = count * to_len + self_len; */ product = count * to_len; @@ -2606,8 +2604,6 @@ replace_interleave(PyStringObject *self, return NULL; self_s = PyString_AS_STRING(self); - to_s = PyString_AS_STRING(to); - to_len = PyString_GET_SIZE(to); result_s = PyString_AS_STRING(result); /* TODO: special case single character, which doesn't need memcpy */ @@ -2668,25 +2664,24 @@ replace_delete_single_character(PyStringObject *self, start = next+1; } Py_MEMCPY(result_s, start, end-start); - + return result; } /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ Py_LOCAL(PyStringObject *) -replace_delete_substring(PyStringObject *self, PyStringObject *from, +replace_delete_substring(PyStringObject *self, + const char *from_s, Py_ssize_t from_len, Py_ssize_t maxcount) { - char *self_s, *from_s, *result_s; + char *self_s, *result_s; char *start, *next, *end; - Py_ssize_t self_len, from_len, result_len; + Py_ssize_t self_len, result_len; Py_ssize_t count, offset; PyStringObject *result; self_len = PyString_GET_SIZE(self); self_s = PyString_AS_STRING(self); - from_len = PyString_GET_SIZE(from); - from_s = PyString_AS_STRING(from); count = countstring(self_s, self_len, from_s, from_len, @@ -2700,13 +2695,13 @@ replace_delete_substring(PyStringObject *self, PyStringObject *from, result_len = self_len - (count * from_len); assert (result_len>=0); - + if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL ) return NULL; - + result_s = PyString_AS_STRING(result); - + start = self_s; end = self_s + self_len; while (count-- > 0) { @@ -2716,9 +2711,9 @@ replace_delete_substring(PyStringObject *self, PyStringObject *from, if (offset == -1) break; next = start + offset; - + Py_MEMCPY(result_s, start, next-start); - + result_s += (next-start); start = next+from_len; } @@ -2735,31 +2730,31 @@ replace_single_character_in_place(PyStringObject *self, char *self_s, *result_s, *start, *end, *next; Py_ssize_t self_len; PyStringObject *result; - + /* The result string will be the same size */ self_s = PyString_AS_STRING(self); self_len = PyString_GET_SIZE(self); - + next = findchar(self_s, self_len, from_c); - + if (next == NULL) { /* No matches; return the original string */ return return_self(self); } - + /* Need to make a new string */ result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); if (result == NULL) return NULL; result_s = PyString_AS_STRING(result); Py_MEMCPY(result_s, self_s, self_len); - + /* change everything in-place, starting with this one */ start = result_s + (next-self_s); *start = to_c; start++; end = result_s + self_len; - + while (--maxcount > 0) { next = findchar(start, end-start, from_c); if (next == NULL) @@ -2767,40 +2762,35 @@ replace_single_character_in_place(PyStringObject *self, *next = to_c; start = next+1; } - + return result; } /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ Py_LOCAL(PyStringObject *) replace_substring_in_place(PyStringObject *self, - PyStringObject *from, - PyStringObject *to, + const char *from_s, Py_ssize_t from_len, + const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { char *result_s, *start, *end; - char *self_s, *from_s, *to_s; - Py_ssize_t self_len, from_len, offset; + char *self_s; + Py_ssize_t self_len, offset; PyStringObject *result; - + /* The result string will be the same size */ - + self_s = PyString_AS_STRING(self); self_len = PyString_GET_SIZE(self); - - from_s = PyString_AS_STRING(from); - from_len = PyString_GET_SIZE(from); - to_s = PyString_AS_STRING(to); - + offset = findstring(self_s, self_len, from_s, from_len, 0, self_len, FORWARD); - if (offset == -1) { /* No matches; return the original string */ return return_self(self); } - + /* Need to make a new string */ result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); if (result == NULL) @@ -2808,13 +2798,12 @@ replace_substring_in_place(PyStringObject *self, result_s = PyString_AS_STRING(result); Py_MEMCPY(result_s, self_s, self_len); - /* change everything in-place, starting with this one */ start = result_s + offset; Py_MEMCPY(start, to_s, from_len); start += from_len; end = result_s + self_len; - + while ( --maxcount > 0) { offset = findstring(start, end-start, from_s, from_len, @@ -2824,7 +2813,7 @@ replace_substring_in_place(PyStringObject *self, Py_MEMCPY(start+offset, to_s, from_len); start += offset+from_len; } - + return result; } @@ -2832,28 +2821,24 @@ replace_substring_in_place(PyStringObject *self, Py_LOCAL(PyStringObject *) replace_single_character(PyStringObject *self, char from_c, - PyStringObject *to, + const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { - char *self_s, *to_s, *result_s; + char *self_s, *result_s; char *start, *next, *end; - Py_ssize_t self_len, to_len, result_len; + Py_ssize_t self_len, result_len; Py_ssize_t count, product; PyStringObject *result; - + self_s = PyString_AS_STRING(self); self_len = PyString_GET_SIZE(self); - + count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { /* no matches, return unchanged */ return return_self(self); } - - to_s = PyString_AS_STRING(to); - to_len = PyString_GET_SIZE(to); - + /* use the difference between current and new, hence the "-1" */ /* result_len = self_len + count * (to_len-1) */ product = count * (to_len-1); @@ -2866,19 +2851,19 @@ replace_single_character(PyStringObject *self, PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } - + if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL) return NULL; result_s = PyString_AS_STRING(result); - + start = self_s; end = self_s + self_len; while (count-- > 0) { next = findchar(start, end-start, from_c); if (next == NULL) break; - + if (next == start) { /* replace with the 'to' */ Py_MEMCPY(result_s, to_s, to_len); @@ -2895,27 +2880,25 @@ replace_single_character(PyStringObject *self, } /* Copy the remainder of the remaining string */ Py_MEMCPY(result_s, start, end-start); - + return result; } /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ Py_LOCAL(PyStringObject *) replace_substring(PyStringObject *self, - PyStringObject *from, - PyStringObject *to, + const char *from_s, Py_ssize_t from_len, + const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { - char *self_s, *from_s, *to_s, *result_s; + char *self_s, *result_s; char *start, *next, *end; - Py_ssize_t self_len, from_len, to_len, result_len; + Py_ssize_t self_len, result_len; Py_ssize_t count, offset, product; PyStringObject *result; - + self_s = PyString_AS_STRING(self); self_len = PyString_GET_SIZE(self); - from_s = PyString_AS_STRING(from); - from_len = PyString_GET_SIZE(from); - + count = countstring(self_s, self_len, from_s, from_len, 0, self_len, FORWARD, maxcount); @@ -2923,10 +2906,7 @@ replace_substring(PyStringObject *self, /* no matches, return unchanged */ return return_self(self); } - - to_s = PyString_AS_STRING(to); - to_len = PyString_GET_SIZE(to); - + /* Check for overflow */ /* result_len = self_len + count * (to_len-from_len) */ product = count * (to_len-from_len); @@ -2939,12 +2919,12 @@ replace_substring(PyStringObject *self, PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } - + if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL) return NULL; result_s = PyString_AS_STRING(result); - + start = self_s; end = self_s + self_len; while (count-- > 0) { @@ -2970,29 +2950,24 @@ replace_substring(PyStringObject *self, } /* Copy the remainder of the remaining string */ Py_MEMCPY(result_s, start, end-start); - + return result; } Py_LOCAL(PyStringObject *) replace(PyStringObject *self, - PyStringObject *from, - PyStringObject *to, + const char *from_s, Py_ssize_t from_len, + const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { - Py_ssize_t from_len, to_len; - if (maxcount < 0) { maxcount = PY_SSIZE_T_MAX; } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) { /* nothing to do; return the original string */ return return_self(self); } - - from_len = PyString_GET_SIZE(from); - to_len = PyString_GET_SIZE(to); - + if (maxcount == 0 || (from_len == 0 && to_len == 0)) { /* nothing to do; return the original string */ @@ -3000,12 +2975,12 @@ replace(PyStringObject *self, } /* Handle zero-length special cases */ - + if (from_len == 0) { /* insert the 'to' string everywhere. */ /* >>> "Python".replace("", ".") */ /* '.P.y.t.h.o.n.' */ - return replace_interleave(self, to, maxcount); + return replace_interleave(self, to_s, to_len, maxcount); } /* Except for "".replace("", "A") == "A" there is no way beyond this */ @@ -3019,9 +2994,9 @@ replace(PyStringObject *self, /* delete all occurances of 'from' string */ if (from_len == 1) { return replace_delete_single_character( - self, PyString_AS_STRING(from)[0], maxcount); + self, from_s[0], maxcount); } else { - return replace_delete_substring(self, from, maxcount); + return replace_delete_substring(self, from_s, from_len, maxcount); } } @@ -3031,22 +3006,22 @@ replace(PyStringObject *self, if (from_len == 1) { return replace_single_character_in_place( self, - PyString_AS_STRING(from)[0], - PyString_AS_STRING(to)[0], + from_s[0], + to_s[0], maxcount); } else { return replace_substring_in_place( - self, from, to, maxcount); + self, from_s, from_len, to_s, to_len, maxcount); } } /* Otherwise use the more generic algorithms */ if (from_len == 1) { - return replace_single_character(self, PyString_AS_STRING(from)[0], - to, maxcount); + return replace_single_character(self, from_s[0], + to_s, to_len, maxcount); } else { /* len('from')>=2, len('to')>=1 */ - return replace_substring(self, from, to, maxcount); + return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); } } @@ -3062,89 +3037,127 @@ string_replace(PyStringObject *self, PyObject *args) { Py_ssize_t count = -1; PyObject *from, *to; - const char *tmp_s; - Py_ssize_t tmp_len; + const char *from_s, *to_s; + Py_ssize_t from_len, to_len; if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) return NULL; if (PyString_Check(from)) { - /* Can this be made a '!check' after the Unicode check? */ + from_s = PyString_AS_STRING(from); + from_len = PyString_GET_SIZE(from); } #ifdef Py_USING_UNICODE if (PyUnicode_Check(from)) return PyUnicode_Replace((PyObject *)self, from, to, count); #endif - else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len)) + else if (PyObject_AsCharBuffer(from, &from_s, &from_len)) return NULL; if (PyString_Check(to)) { - /* Can this be made a '!check' after the Unicode check? */ + to_s = PyString_AS_STRING(to); + to_len = PyString_GET_SIZE(to); } #ifdef Py_USING_UNICODE else if (PyUnicode_Check(to)) return PyUnicode_Replace((PyObject *)self, from, to, count); #endif - else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len)) + else if (PyObject_AsCharBuffer(to, &to_s, &to_len)) return NULL; return (PyObject *)replace((PyStringObject *) self, - (PyStringObject *) from, - (PyStringObject *) to, count); + from_s, from_len, + to_s, to_len, count); } /** End DALKE **/ +/* Matches the end (direction >= 0) or start (direction < 0) of self + * against substr, using the start and end arguments. Returns + * -1 on error, 0 if not found and 1 if found. + */ +Py_LOCAL(int) +_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start, + Py_ssize_t end, int direction) +{ + Py_ssize_t len = PyString_GET_SIZE(self); + Py_ssize_t slen; + const char* sub; + const char* str; + + if (PyString_Check(substr)) { + sub = PyString_AS_STRING(substr); + slen = PyString_GET_SIZE(substr); + } +#ifdef Py_USING_UNICODE + else if (PyUnicode_Check(substr)) + return PyUnicode_Tailmatch((PyObject *)self, + substr, start, end, direction); +#endif + else if (PyObject_AsCharBuffer(substr, &sub, &slen)) + return -1; + str = PyString_AS_STRING(self); + + string_adjust_indices(&start, &end, len); + + if (direction < 0) { + /* startswith */ + if (start+slen > len) + return 0; + } else { + /* endswith */ + if (end-start < slen || start > len) + return 0; + + if (end-slen > start) + start = end - slen; + } + if (end-start >= slen) + return ! memcmp(str+start, sub, slen); + return 0; +} + + PyDoc_STRVAR(startswith__doc__, "S.startswith(prefix[, start[, end]]) -> bool\n\ \n\ Return True if S starts with the specified prefix, False otherwise.\n\ With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position."); +With optional end, stop comparing S at that position.\n\ +prefix can also be a tuple of strings to try."); static PyObject * string_startswith(PyStringObject *self, PyObject *args) { - const char* str = PyString_AS_STRING(self); - Py_ssize_t len = PyString_GET_SIZE(self); - const char* prefix; - Py_ssize_t plen; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; PyObject *subobj; + int result; if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - if (PyString_Check(subobj)) { - prefix = PyString_AS_STRING(subobj); - plen = PyString_GET_SIZE(subobj); - } -#ifdef Py_USING_UNICODE - else if (PyUnicode_Check(subobj)) { - Py_ssize_t rc; - rc = PyUnicode_Tailmatch((PyObject *)self, - subobj, start, end, -1); - if (rc == -1) - return NULL; - else - return PyBool_FromLong((long) rc); + if (PyTuple_Check(subobj)) { + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { + result = _string_tailmatch(self, + PyTuple_GET_ITEM(subobj, i), + start, end, -1); + if (result == -1) + return NULL; + else if (result) { + Py_RETURN_TRUE; + } + } + Py_RETURN_FALSE; } -#endif - else if (PyObject_AsCharBuffer(subobj, &prefix, &plen)) + result = _string_tailmatch(self, subobj, start, end, -1); + if (result == -1) return NULL; - - string_adjust_indices(&start, &end, len); - - if (start+plen > len) - return PyBool_FromLong(0); - - if (end-start >= plen) - return PyBool_FromLong(!memcmp(str+start, prefix, plen)); else - return PyBool_FromLong(0); + return PyBool_FromLong(result); } @@ -3153,51 +3166,39 @@ PyDoc_STRVAR(endswith__doc__, \n\ Return True if S ends with the specified suffix, False otherwise.\n\ With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position."); +With optional end, stop comparing S at that position.\n\ +suffix can also be a tuple of strings to try."); static PyObject * string_endswith(PyStringObject *self, PyObject *args) { - const char* str = PyString_AS_STRING(self); - Py_ssize_t len = PyString_GET_SIZE(self); - const char* suffix; - Py_ssize_t slen; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; PyObject *subobj; + int result; if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - if (PyString_Check(subobj)) { - suffix = PyString_AS_STRING(subobj); - slen = PyString_GET_SIZE(subobj); - } -#ifdef Py_USING_UNICODE - else if (PyUnicode_Check(subobj)) { - Py_ssize_t rc; - rc = PyUnicode_Tailmatch((PyObject *)self, - subobj, start, end, +1); - if (rc == -1) - return NULL; - else - return PyBool_FromLong((long) rc); + if (PyTuple_Check(subobj)) { + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { + result = _string_tailmatch(self, + PyTuple_GET_ITEM(subobj, i), + start, end, +1); + if (result == -1) + return NULL; + else if (result) { + Py_RETURN_TRUE; + } + } + Py_RETURN_FALSE; } -#endif - else if (PyObject_AsCharBuffer(subobj, &suffix, &slen)) + result = _string_tailmatch(self, subobj, start, end, +1); + if (result == -1) return NULL; - - string_adjust_indices(&start, &end, len); - - if (end-start < slen || start > len) - return PyBool_FromLong(0); - - if (end-slen > start) - start = end - slen; - if (end-start >= slen) - return PyBool_FromLong(!memcmp(str+start, suffix, slen)); else - return PyBool_FromLong(0); + return PyBool_FromLong(result); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c8bc61d..1ec345f 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1466,8 +1466,9 @@ subtype_setdict(PyObject *obj, PyObject *value, void *context) return -1; } if (value != NULL && !PyDict_Check(value)) { - PyErr_SetString(PyExc_TypeError, - "__dict__ must be set to a dictionary"); + PyErr_Format(PyExc_TypeError, + "__dict__ must be set to a dictionary, " + "not a '%.200s'", value->ob_type->tp_name); return -1; } dict = *dictptr; @@ -1485,7 +1486,7 @@ subtype_getweakref(PyObject *obj, void *context) if (obj->ob_type->tp_weaklistoffset == 0) { PyErr_SetString(PyExc_AttributeError, - "This object has no __weaklist__"); + "This object has no __weakref__"); return NULL; } assert(obj->ob_type->tp_weaklistoffset > 0); @@ -1530,8 +1531,9 @@ valid_identifier(PyObject *s) Py_ssize_t i, n; if (!PyString_Check(s)) { - PyErr_SetString(PyExc_TypeError, - "__slots__ must be strings"); + PyErr_Format(PyExc_TypeError, + "__slots__ items must be strings, not '%.200s'", + s->ob_type->tp_name); return 0; } p = (unsigned char *) PyString_AS_STRING(s); @@ -2565,8 +2567,9 @@ reduce_2(PyObject *obj) args = PyObject_CallObject(getnewargs, NULL); Py_DECREF(getnewargs); if (args != NULL && !PyTuple_Check(args)) { - PyErr_SetString(PyExc_TypeError, - "__getnewargs__ should return a tuple"); + PyErr_Format(PyExc_TypeError, + "__getnewargs__ should return a tuple, " + "not '%.200s'", args->ob_type->tp_name); goto end; } } @@ -3206,6 +3209,8 @@ PyType_Ready(PyTypeObject *type) if (PyDict_GetItemString(type->tp_dict, "__doc__") == NULL) { if (type->tp_doc != NULL) { PyObject *doc = PyString_FromString(type->tp_doc); + if (doc == NULL) + goto error; PyDict_SetItemString(type->tp_dict, "__doc__", doc); Py_DECREF(doc); } else { @@ -4294,8 +4299,9 @@ slot_nb_index(PyObject *self) result = temp->ob_type->tp_as_number->nb_index(temp); } else { - PyErr_SetString(PyExc_TypeError, - "__index__ must return an int or a long"); + PyErr_Format(PyExc_TypeError, + "__index__ must return an int or a long, " + "not '%.200s'", temp->ob_type->tp_name); result = -1; } Py_DECREF(temp); @@ -4494,7 +4500,10 @@ slot_tp_hash(PyObject *self) Py_DECREF(func); if (res == NULL) return -1; - h = PyInt_AsLong(res); + if (PyLong_Check(res)) + h = PyLong_Type.tp_hash(res); + else + h = PyInt_AsLong(res); Py_DECREF(res); } else { @@ -4505,8 +4514,9 @@ slot_tp_hash(PyObject *self) func = lookup_method(self, "__cmp__", &cmp_str); } if (func != NULL) { + PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'", + self->ob_type->tp_name); Py_DECREF(func); - PyErr_SetString(PyExc_TypeError, "unhashable type"); return -1; } PyErr_Clear(); @@ -4526,7 +4536,18 @@ slot_tp_call(PyObject *self, PyObject *args, PyObject *kwds) if (meth == NULL) return NULL; + + /* PyObject_Call() will end up calling slot_tp_call() again if + the object returned for __call__ has __call__ itself defined + upon it. This can be an infinite recursion if you set + __call__ in a class to an instance of it. */ + if (Py_EnterRecursiveCall(" in __call__")) { + Py_DECREF(meth); + return NULL; + } res = PyObject_Call(meth, args, kwds); + Py_LeaveRecursiveCall(); + Py_DECREF(meth); return res; } @@ -4683,8 +4704,9 @@ slot_tp_iter(PyObject *self) PyErr_Clear(); func = lookup_method(self, "__getitem__", &getitem_str); if (func == NULL) { - PyErr_SetString(PyExc_TypeError, - "iteration over non-sequence"); + PyErr_Format(PyExc_TypeError, + "'%.200s' object is not iterable", + self->ob_type->tp_name); return NULL; } Py_DECREF(func); @@ -4757,8 +4779,9 @@ slot_tp_init(PyObject *self, PyObject *args, PyObject *kwds) if (res == NULL) return -1; if (res != Py_None) { - PyErr_SetString(PyExc_TypeError, - "__init__() should return None"); + PyErr_Format(PyExc_TypeError, + "__init__() should return None, not '%.200s'", + res->ob_type->tp_name); Py_DECREF(res); return -1; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c3ab2d8..9e35b61 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2820,65 +2820,199 @@ PyObject *PyUnicode_AsASCIIString(PyObject *unicode) /* --- MBCS codecs for Windows -------------------------------------------- */ -PyObject *PyUnicode_DecodeMBCS(const char *s, - Py_ssize_t size, - const char *errors) +#if SIZEOF_INT < SIZEOF_SSIZE_T +#define NEED_RETRY +#endif + +/* XXX This code is limited to "true" double-byte encodings, as + a) it assumes an incomplete character consists of a single byte, and + b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte + encodings, see IsDBCSLeadByteEx documentation. */ + +static int is_dbcs_lead_byte(const char *s, int offset) +{ + const char *curr = s + offset; + + if (IsDBCSLeadByte(*curr)) { + const char *prev = CharPrev(s, curr); + return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2); + } + return 0; +} + +/* + * Decode MBCS string into unicode object. If 'final' is set, converts + * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise. + */ +static int decode_mbcs(PyUnicodeObject **v, + const char *s, /* MBCS string */ + int size, /* sizeof MBCS string */ + int final) { - PyUnicodeObject *v; Py_UNICODE *p; - DWORD usize; + Py_ssize_t n = 0; + int usize = 0; + + assert(size >= 0); + + /* Skip trailing lead-byte unless 'final' is set */ + if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1)) + --size; /* First get the size of the result */ - assert(size < INT_MAX); - usize = MultiByteToWideChar(CP_ACP, 0, s, (int)size, NULL, 0); - if (size > 0 && usize==0) - return PyErr_SetFromWindowsErrWithFilename(0, NULL); + if (size > 0) { + usize = MultiByteToWideChar(CP_ACP, 0, s, size, NULL, 0); + if (usize == 0) { + PyErr_SetFromWindowsErrWithFilename(0, NULL); + return -1; + } + } - v = _PyUnicode_New(usize); - if (v == NULL) - return NULL; - if (usize == 0) - return (PyObject *)v; - p = PyUnicode_AS_UNICODE(v); - if (0 == MultiByteToWideChar(CP_ACP, 0, s, (int)size, p, usize)) { - Py_DECREF(v); - return PyErr_SetFromWindowsErrWithFilename(0, NULL); + if (*v == NULL) { + /* Create unicode object */ + *v = _PyUnicode_New(usize); + if (*v == NULL) + return -1; + } + else { + /* Extend unicode object */ + n = PyUnicode_GET_SIZE(*v); + if (_PyUnicode_Resize(v, n + usize) < 0) + return -1; + } + + /* Do the conversion */ + if (size > 0) { + p = PyUnicode_AS_UNICODE(*v) + n; + if (0 == MultiByteToWideChar(CP_ACP, 0, s, size, p, usize)) { + PyErr_SetFromWindowsErrWithFilename(0, NULL); + return -1; + } + } + + return size; +} + +PyObject *PyUnicode_DecodeMBCSStateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ + PyUnicodeObject *v = NULL; + int done; + + if (consumed) + *consumed = 0; + +#ifdef NEED_RETRY + retry: + if (size > INT_MAX) + done = decode_mbcs(&v, s, INT_MAX, 0); + else +#endif + done = decode_mbcs(&v, s, (int)size, !consumed); + + if (done < 0) { + Py_XDECREF(v); + return NULL; + } + + if (consumed) + *consumed += done; + +#ifdef NEED_RETRY + if (size > INT_MAX) { + s += done; + size -= done; + goto retry; } +#endif return (PyObject *)v; } -PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p, +PyObject *PyUnicode_DecodeMBCS(const char *s, Py_ssize_t size, const char *errors) { - PyObject *repr; - char *s; - DWORD mbcssize; + return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL); +} + +/* + * Convert unicode into string object (MBCS). + * Returns 0 if succeed, -1 otherwise. + */ +static int encode_mbcs(PyObject **repr, + const Py_UNICODE *p, /* unicode */ + int size) /* size of unicode */ +{ + int mbcssize = 0; + Py_ssize_t n = 0; - /* If there are no characters, bail now! */ - if (size==0) - return PyString_FromString(""); + assert(size >= 0); /* First get the size of the result */ - assert(size<INT_MAX); - mbcssize = WideCharToMultiByte(CP_ACP, 0, p, (int)size, NULL, 0, NULL, NULL); - if (mbcssize==0) - return PyErr_SetFromWindowsErrWithFilename(0, NULL); + if (size > 0) { + mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL); + if (mbcssize == 0) { + PyErr_SetFromWindowsErrWithFilename(0, NULL); + return -1; + } + } - repr = PyString_FromStringAndSize(NULL, mbcssize); - if (repr == NULL) - return NULL; - if (mbcssize == 0) - return repr; + if (*repr == NULL) { + /* Create string object */ + *repr = PyString_FromStringAndSize(NULL, mbcssize); + if (*repr == NULL) + return -1; + } + else { + /* Extend string object */ + n = PyString_Size(*repr); + if (_PyString_Resize(repr, n + mbcssize) < 0) + return -1; + } /* Do the conversion */ - s = PyString_AS_STRING(repr); - assert(size < INT_MAX); - if (0 == WideCharToMultiByte(CP_ACP, 0, p, (int)size, s, mbcssize, NULL, NULL)) { - Py_DECREF(repr); - return PyErr_SetFromWindowsErrWithFilename(0, NULL); + if (size > 0) { + char *s = PyString_AS_STRING(*repr) + n; + if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) { + PyErr_SetFromWindowsErrWithFilename(0, NULL); + return -1; + } + } + + return 0; +} + +PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) +{ + PyObject *repr = NULL; + int ret; + +#ifdef NEED_RETRY + retry: + if (size > INT_MAX) + ret = encode_mbcs(&repr, p, INT_MAX); + else +#endif + ret = encode_mbcs(&repr, p, (int)size); + + if (ret < 0) { + Py_XDECREF(repr); + return NULL; } + +#ifdef NEED_RETRY + if (size > INT_MAX) { + p += INT_MAX; + size -= INT_MAX; + goto retry; + } +#endif + return repr; } @@ -2893,6 +3027,8 @@ PyObject *PyUnicode_AsMBCSString(PyObject *unicode) NULL); } +#undef NEED_RETRY + #endif /* MS_WINDOWS */ /* --- Character Mapping Codec -------------------------------------------- */ @@ -4491,11 +4627,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) /* Make sure we have enough space for the separator and the item. */ itemlen = PyUnicode_GET_SIZE(item); new_res_used = res_used + itemlen; - if (new_res_used <= 0) + if (new_res_used < 0) goto Overflow; if (i < seqlen - 1) { new_res_used += seplen; - if (new_res_used <= 0) + if (new_res_used < 0) goto Overflow; } if (new_res_used > res_alloc) { @@ -4536,7 +4672,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) Overflow: PyErr_SetString(PyExc_OverflowError, - "join() is too long for a Python string"); + "join() result is too long for a Python string"); Py_DECREF(item); /* fall through */ @@ -6667,29 +6803,44 @@ PyDoc_STRVAR(startswith__doc__, \n\ Return True if S starts with the specified prefix, False otherwise.\n\ With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position."); +With optional end, stop comparing S at that position.\n\ +prefix can also be a tuple of strings to try."); static PyObject * unicode_startswith(PyUnicodeObject *self, PyObject *args) { + PyObject *subobj; PyUnicodeObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *result; + int result; - if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &substring, + if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); + if (PyTuple_Check(subobj)) { + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { + substring = (PyUnicodeObject *)PyUnicode_FromObject( + PyTuple_GET_ITEM(subobj, i)); + if (substring == NULL) + return NULL; + result = tailmatch(self, substring, start, end, -1); + Py_DECREF(substring); + if (result) { + Py_RETURN_TRUE; + } + } + /* nothing matched */ + Py_RETURN_FALSE; + } + substring = (PyUnicodeObject *)PyUnicode_FromObject(subobj); if (substring == NULL) - return NULL; - - result = PyBool_FromLong(tailmatch(self, substring, start, end, -1)); - + return NULL; + result = tailmatch(self, substring, start, end, -1); Py_DECREF(substring); - return result; + return PyBool_FromLong(result); } @@ -6698,29 +6849,44 @@ PyDoc_STRVAR(endswith__doc__, \n\ Return True if S ends with the specified suffix, False otherwise.\n\ With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position."); +With optional end, stop comparing S at that position.\n\ +suffix can also be a tuple of strings to try."); static PyObject * unicode_endswith(PyUnicodeObject *self, PyObject *args) { + PyObject *subobj; PyUnicodeObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *result; + int result; - if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &substring, - _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) + if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj, + _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); + if (PyTuple_Check(subobj)) { + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { + substring = (PyUnicodeObject *)PyUnicode_FromObject( + PyTuple_GET_ITEM(subobj, i)); + if (substring == NULL) + return NULL; + result = tailmatch(self, substring, start, end, +1); + Py_DECREF(substring); + if (result) { + Py_RETURN_TRUE; + } + } + Py_RETURN_FALSE; + } + substring = (PyUnicodeObject *)PyUnicode_FromObject(subobj); if (substring == NULL) - return NULL; - - result = PyBool_FromLong(tailmatch(self, substring, start, end, +1)); + return NULL; + result = tailmatch(self, substring, start, end, +1); Py_DECREF(substring); - return result; + return PyBool_FromLong(result); } @@ -7748,6 +7914,9 @@ void _PyUnicode_Init(void) unicode_freelist = NULL; unicode_freelist_size = 0; unicode_empty = _PyUnicode_New(0); + if (!unicode_empty) + return; + strcpy(unicode_default_encoding, "ascii"); for (i = 0; i < 256; i++) unicode_latin1[i] = NULL; @@ -7758,6 +7927,8 @@ void _PyUnicode_Init(void) bloom_linebreak = make_bloom_mask( linebreak, sizeof(linebreak) / sizeof(linebreak[0]) ); + + PyType_Ready(&EncodingMapType); } /* Finalize the Unicode implementation */ diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index c55dd4c..f814306 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -6,10 +6,10 @@ ((PyWeakReference **) PyObject_GET_WEAKREFS_LISTPTR(o)) -long +Py_ssize_t _PyWeakref_GetWeakrefCount(PyWeakReference *head) { - long count = 0; + Py_ssize_t count = 0; while (head != NULL) { ++count; |