diff options
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/classobject.c | 27 | ||||
-rw-r--r-- | Objects/codeobject.c | 3 | ||||
-rw-r--r-- | Objects/exceptions.c | 2141 | ||||
-rw-r--r-- | Objects/fileobject.c | 92 | ||||
-rw-r--r-- | Objects/floatobject.c | 7 | ||||
-rw-r--r-- | Objects/frameobject.c | 220 | ||||
-rw-r--r-- | Objects/longobject.c | 269 | ||||
-rw-r--r-- | Objects/stringlib/README.txt | 34 | ||||
-rw-r--r-- | Objects/stringlib/count.h | 34 | ||||
-rw-r--r-- | Objects/stringlib/fastsearch.h | 104 | ||||
-rw-r--r-- | Objects/stringlib/find.h | 112 | ||||
-rw-r--r-- | Objects/stringlib/partition.h | 111 | ||||
-rw-r--r-- | Objects/stringobject.c | 1408 | ||||
-rw-r--r-- | Objects/typeobject.c | 10 | ||||
-rw-r--r-- | Objects/unicodectype.c | 333 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 772 | ||||
-rw-r--r-- | Objects/weakrefobject.c | 2 |
17 files changed, 4690 insertions, 989 deletions
diff --git a/Objects/classobject.c b/Objects/classobject.c index 594de11..9cfdf0e 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -81,12 +81,9 @@ PyClass_New(PyObject *bases, PyObject *dict, PyObject *name) if (!PyClass_Check(base)) { if (PyCallable_Check( (PyObject *) base->ob_type)) - return PyObject_CallFunction( + return PyObject_CallFunctionObjArgs( (PyObject *) base->ob_type, - "OOO", - name, - bases, - dict); + name, bases, dict, NULL); PyErr_SetString(PyExc_TypeError, "PyClass_New: base must be a class"); return NULL; @@ -320,7 +317,7 @@ class_setattr(PyClassObject *op, PyObject *name, PyObject *v) } sname = PyString_AsString(name); if (sname[0] == '_' && sname[1] == '_') { - int n = PyString_Size(name); + Py_ssize_t n = PyString_Size(name); if (sname[n-1] == '_' && sname[n-2] == '_') { char *err = NULL; if (strcmp(sname, "__dict__") == 0) @@ -380,7 +377,7 @@ class_str(PyClassObject *op) PyObject *mod = PyDict_GetItemString(op->cl_dict, "__module__"); PyObject *name = op->cl_name; PyObject *res; - int m, n; + Py_ssize_t m, n; if (name == NULL || !PyString_Check(name)) return class_repr(op); @@ -638,7 +635,7 @@ instance_dealloc(register PyInstanceObject *inst) PyObject_GC_Del(inst); } else { - int refcnt = inst->ob_refcnt; + Py_ssize_t refcnt = inst->ob_refcnt; /* __del__ resurrected it! Make it look like the original * Py_DECREF never happened. */ @@ -778,7 +775,7 @@ instance_setattr(PyInstanceObject *inst, PyObject *name, PyObject *v) PyObject *func, *args, *res, *tmp; char *sname = PyString_AsString(name); if (sname[0] == '_' && sname[1] == '_') { - int n = PyString_Size(name); + Py_ssize_t n = PyString_Size(name); if (sname[n-1] == '_' && sname[n-2] == '_') { if (strcmp(sname, "__dict__") == 0) { if (PyEval_GetRestricted()) { @@ -1075,21 +1072,15 @@ static PyMappingMethods instance_as_mapping = { static PyObject * instance_item(PyInstanceObject *inst, Py_ssize_t i) { - PyObject *func, *arg, *res; + PyObject *func, *res; if (getitemstr == NULL) getitemstr = PyString_InternFromString("__getitem__"); func = instance_getattr(inst, getitemstr); if (func == NULL) return NULL; - arg = Py_BuildValue("(n)", i); - if (arg == NULL) { - Py_DECREF(func); - return NULL; - } - res = PyEval_CallObject(func, arg); + res = PyObject_CallFunction(func, "n", i); Py_DECREF(func); - Py_DECREF(arg); return res; } @@ -1263,7 +1254,7 @@ instance_contains(PyInstanceObject *inst, PyObject *member) */ PyErr_Clear(); return _PySequence_IterSearch((PyObject *)inst, member, - PY_ITERSEARCH_CONTAINS); + PY_ITERSEARCH_CONTAINS) > 0; } else return -1; diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 8ae2399..a9bcb01 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -102,6 +102,7 @@ PyCode_New(int argcount, int nlocals, int stacksize, int flags, co->co_firstlineno = firstlineno; Py_INCREF(lnotab); co->co_lnotab = lnotab; + co->co_zombieframe = NULL; } return co; } @@ -265,6 +266,8 @@ code_dealloc(PyCodeObject *co) Py_XDECREF(co->co_filename); Py_XDECREF(co->co_name); Py_XDECREF(co->co_lnotab); + if (co->co_zombieframe != NULL) + PyObject_GC_Del(co->co_zombieframe); PyObject_DEL(co); } diff --git a/Objects/exceptions.c b/Objects/exceptions.c new file mode 100644 index 0000000..6271372 --- /dev/null +++ b/Objects/exceptions.c @@ -0,0 +1,2141 @@ +#define PY_SSIZE_T_CLEAN +#include <Python.h> +#include "structmember.h" +#include "osdefs.h" + +#define MAKE_IT_NONE(x) (x) = Py_None; Py_INCREF(Py_None); +#define EXC_MODULE_NAME "exceptions." + +/* NOTE: If the exception class hierarchy changes, don't forget to update + * Lib/test/exception_hierarchy.txt + */ + +PyDoc_STRVAR(exceptions_doc, "Python's standard exception class hierarchy.\n\ +\n\ +Exceptions found here are defined both in the exceptions module and the\n\ +built-in namespace. It is recommended that user-defined exceptions\n\ +inherit from Exception. See the documentation for the exception\n\ +inheritance hierarchy.\n\ +"); + +/* + * BaseException + */ +static PyObject * +BaseException_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyBaseExceptionObject *self; + + self = (PyBaseExceptionObject *)type->tp_alloc(type, 0); + /* the dict is created on the fly in PyObject_GenericSetAttr */ + self->message = self->dict = NULL; + + self->args = PyTuple_New(0); + if (!self->args) { + Py_DECREF(self); + return NULL; + } + + self->message = PyString_FromString(""); + if (!self->message) { + Py_DECREF(self); + return NULL; + } + + return (PyObject *)self; +} + +static int +BaseException_init(PyBaseExceptionObject *self, PyObject *args, PyObject *kwds) +{ + Py_DECREF(self->args); + self->args = args; + Py_INCREF(self->args); + + if (PyTuple_GET_SIZE(self->args) == 1) { + Py_DECREF(self->message); + self->message = PyTuple_GET_ITEM(self->args, 0); + Py_INCREF(self->message); + } + return 0; +} + +int +BaseException_clear(PyBaseExceptionObject *self) +{ + Py_CLEAR(self->dict); + Py_CLEAR(self->args); + Py_CLEAR(self->message); + return 0; +} + +static void +BaseException_dealloc(PyBaseExceptionObject *self) +{ + BaseException_clear(self); + self->ob_type->tp_free((PyObject *)self); +} + +int +BaseException_traverse(PyBaseExceptionObject *self, visitproc visit, void *arg) +{ + if (self->dict) + Py_VISIT(self->dict); + Py_VISIT(self->args); + Py_VISIT(self->message); + return 0; +} + +static PyObject * +BaseException_str(PyBaseExceptionObject *self) +{ + PyObject *out; + + switch (PySequence_Length(self->args)) { + case 0: + out = PyString_FromString(""); + break; + case 1: + { + PyObject *tmp = PySequence_GetItem(self->args, 0); + if (tmp) { + out = PyObject_Str(tmp); + Py_DECREF(tmp); + } + else + out = NULL; + break; + } + case -1: + PyErr_Clear(); + /* Fall through */ + default: + out = PyObject_Str(self->args); + break; + } + + return out; +} + +static PyObject * +BaseException_repr(PyBaseExceptionObject *self) +{ + Py_ssize_t args_len; + PyObject *repr_suffix; + PyObject *repr; + char *name; + char *dot; + + args_len = PySequence_Length(self->args); + if (args_len < 0) { + return NULL; + } + + if (args_len == 0) { + repr_suffix = PyString_FromString("()"); + if (!repr_suffix) + return NULL; + } + else { + PyObject *args_repr = PyObject_Repr(self->args); + if (!args_repr) + return NULL; + repr_suffix = args_repr; + } + + name = (char *)self->ob_type->tp_name; + dot = strrchr(name, '.'); + if (dot != NULL) name = dot+1; + + repr = PyString_FromString(name); + if (!repr) { + Py_DECREF(repr_suffix); + return NULL; + } + + PyString_ConcatAndDel(&repr, repr_suffix); + return repr; +} + +/* Pickling support */ +static PyObject * +BaseException_reduce(PyBaseExceptionObject *self) +{ + return PyTuple_Pack(3, self->ob_type, self->args, self->dict); +} + + +#ifdef Py_USING_UNICODE +/* while this method generates fairly uninspired output, it a least + * guarantees that we can display exceptions that have unicode attributes + */ +static PyObject * +BaseException_unicode(PyBaseExceptionObject *self) +{ + if (PySequence_Length(self->args) == 0) + return PyUnicode_FromUnicode(NULL, 0); + if (PySequence_Length(self->args) == 1) { + PyObject *temp = PySequence_GetItem(self->args, 0); + PyObject *unicode_obj; + if (!temp) { + return NULL; + } + unicode_obj = PyObject_Unicode(temp); + Py_DECREF(temp); + return unicode_obj; + } + return PyObject_Unicode(self->args); +} +#endif /* Py_USING_UNICODE */ + +static PyMethodDef BaseException_methods[] = { + {"__reduce__", (PyCFunction)BaseException_reduce, METH_NOARGS }, +#ifdef Py_USING_UNICODE + {"__unicode__", (PyCFunction)BaseException_unicode, METH_NOARGS }, +#endif + {NULL, NULL, 0, NULL}, +}; + + + +static PyObject * +BaseException_getitem(PyBaseExceptionObject *self, Py_ssize_t index) +{ + return PySequence_GetItem(self->args, index); +} + +static PySequenceMethods BaseException_as_sequence = { + 0, /* sq_length; */ + 0, /* sq_concat; */ + 0, /* sq_repeat; */ + (ssizeargfunc)BaseException_getitem, /* sq_item; */ + 0, /* sq_slice; */ + 0, /* sq_ass_item; */ + 0, /* sq_ass_slice; */ + 0, /* sq_contains; */ + 0, /* sq_inplace_concat; */ + 0 /* sq_inplace_repeat; */ +}; + +static PyMemberDef BaseException_members[] = { + {"message", T_OBJECT, offsetof(PyBaseExceptionObject, message), 0, + PyDoc_STR("exception message")}, + {NULL} /* Sentinel */ +}; + + +static PyObject * +BaseException_get_dict(PyBaseExceptionObject *self) +{ + if (self->dict == NULL) { + self->dict = PyDict_New(); + if (!self->dict) + return NULL; + } + Py_INCREF(self->dict); + return self->dict; +} + +static int +BaseException_set_dict(PyBaseExceptionObject *self, PyObject *val) +{ + if (val == NULL) { + PyErr_SetString(PyExc_TypeError, "__dict__ may not be deleted"); + return -1; + } + if (!PyDict_Check(val)) { + PyErr_SetString(PyExc_TypeError, "__dict__ must be a dictionary"); + return -1; + } + Py_CLEAR(self->dict); + Py_INCREF(val); + self->dict = val; + return 0; +} + +static PyObject * +BaseException_get_args(PyBaseExceptionObject *self) +{ + if (self->args == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + Py_INCREF(self->args); + return self->args; +} + +static int +BaseException_set_args(PyBaseExceptionObject *self, PyObject *val) +{ + PyObject *seq; + if (val == NULL) { + PyErr_SetString(PyExc_TypeError, "args may not be deleted"); + return -1; + } + seq = PySequence_Tuple(val); + if (!seq) return -1; + self->args = seq; + return 0; +} + +static PyGetSetDef BaseException_getset[] = { + {"__dict__", (getter)BaseException_get_dict, (setter)BaseException_set_dict}, + {"args", (getter)BaseException_get_args, (setter)BaseException_set_args}, + {NULL}, +}; + + +static PyTypeObject _PyExc_BaseException = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + EXC_MODULE_NAME "BaseException", /*tp_name*/ + sizeof(PyBaseExceptionObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)BaseException_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /* tp_compare; */ + (reprfunc)BaseException_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + &BaseException_as_sequence, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + (reprfunc)BaseException_str, /*tp_str*/ + PyObject_GenericGetAttr, /*tp_getattro*/ + PyObject_GenericSetAttr, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + PyDoc_STR("Common base class for all exceptions"), /* tp_doc */ + (traverseproc)BaseException_traverse, /* tp_traverse */ + (inquiry)BaseException_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + BaseException_methods, /* tp_methods */ + BaseException_members, /* tp_members */ + BaseException_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(PyBaseExceptionObject, dict), /* tp_dictoffset */ + (initproc)BaseException_init, /* tp_init */ + 0, /* tp_alloc */ + BaseException_new, /* tp_new */ +}; +/* the CPython API expects exceptions to be (PyObject *) - both a hold-over +from the previous implmentation and also allowing Python objects to be used +in the API */ +PyObject *PyExc_BaseException = (PyObject *)&_PyExc_BaseException; + +/* note these macros omit the last semicolon so the macro invocation may + * include it and not look strange. + */ +#define SimpleExtendsException(EXCBASE, EXCNAME, EXCDOC) \ +static PyTypeObject _PyExc_ ## EXCNAME = { \ + PyObject_HEAD_INIT(NULL) \ + 0, \ + EXC_MODULE_NAME # EXCNAME, \ + sizeof(PyBaseExceptionObject), \ + 0, (destructor)BaseException_dealloc, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, \ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, \ + PyDoc_STR(EXCDOC), (traverseproc)BaseException_traverse, \ + (inquiry)BaseException_clear, 0, 0, 0, 0, 0, 0, 0, &_ ## EXCBASE, \ + 0, 0, 0, offsetof(PyBaseExceptionObject, dict), \ + (initproc)BaseException_init, 0, BaseException_new,\ +}; \ +PyObject *PyExc_ ## EXCNAME = (PyObject *)&_PyExc_ ## EXCNAME + +#define MiddlingExtendsException(EXCBASE, EXCNAME, EXCSTORE, EXCDOC) \ +static PyTypeObject _PyExc_ ## EXCNAME = { \ + PyObject_HEAD_INIT(NULL) \ + 0, \ + EXC_MODULE_NAME # EXCNAME, \ + sizeof(Py ## EXCSTORE ## Object), \ + 0, (destructor)BaseException_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, \ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, \ + PyDoc_STR(EXCDOC), (traverseproc)BaseException_traverse, \ + (inquiry)BaseException_clear, 0, 0, 0, 0, 0, 0, 0, &_ ## EXCBASE, \ + 0, 0, 0, offsetof(Py ## EXCSTORE ## Object, dict), \ + (initproc)EXCSTORE ## _init, 0, EXCSTORE ## _new,\ +}; \ +PyObject *PyExc_ ## EXCNAME = (PyObject *)&_PyExc_ ## EXCNAME + +#define ComplexExtendsException(EXCBASE, EXCNAME, EXCSTORE, EXCDEALLOC, EXCMETHODS, EXCMEMBERS, EXCSTR, EXCDOC) \ +static PyTypeObject _PyExc_ ## EXCNAME = { \ + PyObject_HEAD_INIT(NULL) \ + 0, \ + EXC_MODULE_NAME # EXCNAME, \ + sizeof(Py ## EXCSTORE ## Object), 0, \ + (destructor)EXCSTORE ## _dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + (reprfunc)EXCSTR, 0, 0, 0, \ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, \ + PyDoc_STR(EXCDOC), (traverseproc)EXCSTORE ## _traverse, \ + (inquiry)EXCSTORE ## _clear, 0, 0, 0, 0, EXCMETHODS, \ + EXCMEMBERS, 0, &_ ## EXCBASE, \ + 0, 0, 0, offsetof(Py ## EXCSTORE ## Object, dict), \ + (initproc)EXCSTORE ## _init, 0, EXCSTORE ## _new,\ +}; \ +PyObject *PyExc_ ## EXCNAME = (PyObject *)&_PyExc_ ## EXCNAME + + +/* + * Exception extends BaseException + */ +SimpleExtendsException(PyExc_BaseException, Exception, + "Common base class for all non-exit exceptions."); + + +/* + * StandardError extends Exception + */ +SimpleExtendsException(PyExc_Exception, StandardError, + "Base class for all standard Python exceptions that do not represent\n" + "interpreter exiting."); + + +/* + * TypeError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, TypeError, + "Inappropriate argument type."); + + +/* + * StopIteration extends Exception + */ +SimpleExtendsException(PyExc_Exception, StopIteration, + "Signal the end from iterator.next()."); + + +/* + * GeneratorExit extends Exception + */ +SimpleExtendsException(PyExc_Exception, GeneratorExit, + "Request that a generator exit."); + + +/* + * SystemExit extends BaseException + */ +static PyObject * +SystemExit_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PySystemExitObject *self; + + self = (PySystemExitObject *)BaseException_new(type, args, kwds); + if (!self) + return NULL; + + MAKE_IT_NONE(self->code); + + return (PyObject *)self; +} + +static int +SystemExit_init(PySystemExitObject *self, PyObject *args, PyObject *kwds) +{ + Py_ssize_t size = PyTuple_GET_SIZE(args); + + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + return -1; + + Py_DECREF(self->code); + if (size == 1) + self->code = PyTuple_GET_ITEM(args, 0); + else if (size > 1) + self->code = args; + Py_INCREF(self->code); + return 0; +} + +int +SystemExit_clear(PySystemExitObject *self) +{ + Py_CLEAR(self->code); + return BaseException_clear((PyBaseExceptionObject *)self); +} + +static void +SystemExit_dealloc(PySystemExitObject *self) +{ + SystemExit_clear(self); + self->ob_type->tp_free((PyObject *)self); +} + +int +SystemExit_traverse(PySystemExitObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->code); + return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); +} + +static PyMemberDef SystemExit_members[] = { + {"message", T_OBJECT, offsetof(PySystemExitObject, message), 0, + PyDoc_STR("exception message")}, + {"code", T_OBJECT, offsetof(PySystemExitObject, code), 0, + PyDoc_STR("exception code")}, + {NULL} /* Sentinel */ +}; + +ComplexExtendsException(PyExc_BaseException, SystemExit, SystemExit, + SystemExit_dealloc, 0, SystemExit_members, 0, + "Request to exit from the interpreter."); + +/* + * KeyboardInterrupt extends BaseException + */ +SimpleExtendsException(PyExc_BaseException, KeyboardInterrupt, + "Program interrupted by user."); + + +/* + * ImportError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, ImportError, + "Import can't find module, or can't find name in module."); + + +/* + * EnvironmentError extends StandardError + */ + +static PyObject * +EnvironmentError_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEnvironmentErrorObject *self = NULL; + + self = (PyEnvironmentErrorObject *)BaseException_new(type, args, kwds); + if (!self) + return NULL; + + self->myerrno = Py_None; + Py_INCREF(Py_None); + self->strerror = Py_None; + Py_INCREF(Py_None); + self->filename = Py_None; + Py_INCREF(Py_None); + + return (PyObject *)self; +} + +/* Where a function has a single filename, such as open() or some + * of the os module functions, PyErr_SetFromErrnoWithFilename() is + * called, giving a third argument which is the filename. But, so + * that old code using in-place unpacking doesn't break, e.g.: + * + * except IOError, (errno, strerror): + * + * we hack args so that it only contains two items. This also + * means we need our own __str__() which prints out the filename + * when it was supplied. + */ +static int +EnvironmentError_init(PyEnvironmentErrorObject *self, PyObject *args, + PyObject *kwds) +{ + PyObject *myerrno = NULL, *strerror = NULL, *filename = NULL; + PyObject *subslice = NULL; + + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + return -1; + + if (PyTuple_GET_SIZE(args) <= 1) { + return 0; + } + + if (!PyArg_UnpackTuple(args, "EnvironmentError", 2, 3, + &myerrno, &strerror, &filename)) { + return -1; + } + Py_DECREF(self->myerrno); /* replacing */ + self->myerrno = myerrno; + Py_INCREF(self->myerrno); + + Py_DECREF(self->strerror); /* replacing */ + self->strerror = strerror; + Py_INCREF(self->strerror); + + /* self->filename will remain Py_None otherwise */ + if (filename != NULL) { + Py_DECREF(self->filename); /* replacing */ + self->filename = filename; + Py_INCREF(self->filename); + + subslice = PyTuple_GetSlice(args, 0, 2); + if (!subslice) + return -1; + + Py_DECREF(self->args); /* replacing args */ + self->args = subslice; + } + return 0; +} + +int +EnvironmentError_clear(PyEnvironmentErrorObject *self) +{ + Py_CLEAR(self->myerrno); + Py_CLEAR(self->strerror); + Py_CLEAR(self->filename); + return BaseException_clear((PyBaseExceptionObject *)self); +} + +static void +EnvironmentError_dealloc(PyEnvironmentErrorObject *self) +{ + EnvironmentError_clear(self); + self->ob_type->tp_free((PyObject *)self); +} + +int +EnvironmentError_traverse(PyEnvironmentErrorObject *self, visitproc visit, + void *arg) +{ + Py_VISIT(self->myerrno); + Py_VISIT(self->strerror); + Py_VISIT(self->filename); + return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); +} + +static PyObject * +EnvironmentError_str(PyEnvironmentErrorObject *self) +{ + PyObject *rtnval = NULL; + + if (self->filename != Py_None) { + PyObject *fmt = PyString_FromString("[Errno %s] %s: %s"); + PyObject *repr = PyObject_Repr(self->filename); + PyObject *tuple = PyTuple_New(3); + + if (!fmt || !repr || !tuple) { + Py_XDECREF(fmt); + Py_XDECREF(repr); + Py_XDECREF(tuple); + return NULL; + } + Py_INCREF(self->myerrno); + PyTuple_SET_ITEM(tuple, 0, self->myerrno); + Py_INCREF(self->strerror); + PyTuple_SET_ITEM(tuple, 1, self->strerror); + Py_INCREF(repr); + PyTuple_SET_ITEM(tuple, 2, repr); + + rtnval = PyString_Format(fmt, tuple); + + Py_DECREF(fmt); + Py_DECREF(tuple); + } + else if (PyObject_IsTrue(self->myerrno) && + PyObject_IsTrue(self->strerror)) { + PyObject *fmt = PyString_FromString("[Errno %s] %s"); + PyObject *tuple = PyTuple_New(2); + + if (!fmt || !tuple) { + Py_XDECREF(fmt); + Py_XDECREF(tuple); + return NULL; + } + Py_INCREF(self->myerrno); + PyTuple_SET_ITEM(tuple, 0, self->myerrno); + Py_INCREF(self->strerror); + PyTuple_SET_ITEM(tuple, 1, self->strerror); + + rtnval = PyString_Format(fmt, tuple); + + Py_DECREF(fmt); + Py_DECREF(tuple); + } + else + rtnval = BaseException_str((PyBaseExceptionObject *)self); + + return rtnval; +} + +static PyMemberDef EnvironmentError_members[] = { + {"message", T_OBJECT, offsetof(PyEnvironmentErrorObject, message), 0, + PyDoc_STR("exception message")}, + {"errno", T_OBJECT, offsetof(PyEnvironmentErrorObject, myerrno), 0, + PyDoc_STR("exception errno")}, + {"strerror", T_OBJECT, offsetof(PyEnvironmentErrorObject, strerror), 0, + PyDoc_STR("exception strerror")}, + {"filename", T_OBJECT, offsetof(PyEnvironmentErrorObject, filename), 0, + PyDoc_STR("exception filename")}, + {NULL} /* Sentinel */ +}; + + +static PyObject * +EnvironmentError_reduce(PyEnvironmentErrorObject *self) +{ + PyObject *args = self->args; + PyObject *res = NULL, *tmp; + /* self->args is only the first two real arguments if there was a + * file name given to EnvironmentError. */ + if (PyTuple_Check(args) && + PyTuple_GET_SIZE(args) == 2 && + self->filename != Py_None) { + + args = PyTuple_New(3); + if (!args) return NULL; + + tmp = PyTuple_GetItem(self->args, 0); + if (!tmp) goto finish; + Py_INCREF(tmp); + PyTuple_SET_ITEM(args, 0, tmp); + + tmp = PyTuple_GetItem(self->args, 1); + if (!tmp) goto finish; + Py_INCREF(tmp); + PyTuple_SET_ITEM(args, 1, tmp); + + Py_INCREF(self->filename); + PyTuple_SET_ITEM(args, 2, self->filename); + } else { + Py_INCREF(args); + } + res = PyTuple_Pack(3, self->ob_type, args, self->dict); + finish: + Py_DECREF(args); + return res; +} + + +static PyMethodDef EnvironmentError_methods[] = { + {"__reduce__", (PyCFunction)EnvironmentError_reduce, METH_NOARGS}, + {NULL} +}; + +ComplexExtendsException(PyExc_StandardError, EnvironmentError, + EnvironmentError, EnvironmentError_dealloc, + EnvironmentError_methods, EnvironmentError_members, + EnvironmentError_str, + "Base class for I/O related errors."); + + +/* + * IOError extends EnvironmentError + */ +MiddlingExtendsException(PyExc_EnvironmentError, IOError, + EnvironmentError, "I/O operation failed."); + + +/* + * OSError extends EnvironmentError + */ +MiddlingExtendsException(PyExc_EnvironmentError, OSError, + EnvironmentError, "OS system call failed."); + + +/* + * WindowsError extends OSError + */ +#ifdef MS_WINDOWS +#include "errmap.h" + +int +WindowsError_clear(PyWindowsErrorObject *self) +{ + Py_CLEAR(self->myerrno); + Py_CLEAR(self->strerror); + Py_CLEAR(self->filename); + Py_CLEAR(self->winerror); + return BaseException_clear((PyBaseExceptionObject *)self); +} + +static void +WindowsError_dealloc(PyWindowsErrorObject *self) +{ + WindowsError_clear(self); + self->ob_type->tp_free((PyObject *)self); +} + +int +WindowsError_traverse(PyWindowsErrorObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->myerrno); + Py_VISIT(self->strerror); + Py_VISIT(self->filename); + Py_VISIT(self->winerror); + return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); +} + +static PyObject * +WindowsError_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *o_errcode = NULL; + long errcode; + PyWindowsErrorObject *self; + long posix_errno; + + self = (PyWindowsErrorObject *)EnvironmentError_new(type, args, kwds); + if (!self) + return NULL; + + if (self->myerrno == Py_None) { + self->winerror = self->myerrno; + Py_INCREF(self->winerror); + return (PyObject *)self; + } + + /* Set errno to the POSIX errno, and winerror to the Win32 + error code. */ + errcode = PyInt_AsLong(self->myerrno); + if (errcode == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + /* give a clearer error message */ + PyErr_SetString(PyExc_TypeError, "errno has to be an integer"); + goto failed; + } + posix_errno = winerror_to_errno(errcode); + + self->winerror = self->myerrno; + + o_errcode = PyInt_FromLong(posix_errno); + if (!o_errcode) + goto failed; + + self->myerrno = o_errcode; + + return (PyObject *)self; +failed: + /* Could not set errno. */ + Py_DECREF(self); + return NULL; +} + +static int +WindowsError_init(PyWindowsErrorObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *o_errcode = NULL; + long errcode; + long posix_errno; + + if (EnvironmentError_init((PyEnvironmentErrorObject *)self, args, kwds) + == -1) + return -1; + + if (self->myerrno == Py_None) { + Py_DECREF(self->winerror); + self->winerror = self->myerrno; + Py_INCREF(self->winerror); + return 0; + } + + /* Set errno to the POSIX errno, and winerror to the Win32 + error code. */ + errcode = PyInt_AsLong(self->myerrno); + if (errcode == -1 && PyErr_Occurred()) + return -1; + posix_errno = winerror_to_errno(errcode); + + Py_DECREF(self->winerror); + self->winerror = self->myerrno; + + o_errcode = PyInt_FromLong(posix_errno); + if (!o_errcode) + return -1; + + self->myerrno = o_errcode; + + return 0; +} + + +static PyObject * +WindowsError_str(PyWindowsErrorObject *self) +{ + PyObject *repr = NULL; + PyObject *fmt = NULL; + PyObject *tuple = NULL; + PyObject *rtnval = NULL; + + if (self->filename != Py_None) { + fmt = PyString_FromString("[Error %s] %s: %s"); + repr = PyObject_Repr(self->filename); + if (!fmt || !repr) + goto finally; + + tuple = PyTuple_Pack(3, self->myerrno, self->strerror, repr); + if (!tuple) + goto finally; + + rtnval = PyString_Format(fmt, tuple); + Py_DECREF(tuple); + } + else if (PyObject_IsTrue(self->myerrno) && + PyObject_IsTrue(self->strerror)) { + fmt = PyString_FromString("[Error %s] %s"); + if (!fmt) + goto finally; + + tuple = PyTuple_Pack(2, self->myerrno, self->strerror); + if (!tuple) + goto finally; + + rtnval = PyString_Format(fmt, tuple); + Py_DECREF(tuple); + } + else + rtnval = EnvironmentError_str((PyEnvironmentErrorObject *)self); + + finally: + Py_XDECREF(repr); + Py_XDECREF(fmt); + Py_XDECREF(tuple); + return rtnval; +} + +static PyMemberDef WindowsError_members[] = { + {"message", T_OBJECT, offsetof(PyWindowsErrorObject, message), 0, + PyDoc_STR("exception message")}, + {"errno", T_OBJECT, offsetof(PyWindowsErrorObject, myerrno), 0, + PyDoc_STR("POSIX exception code")}, + {"strerror", T_OBJECT, offsetof(PyWindowsErrorObject, strerror), 0, + PyDoc_STR("exception strerror")}, + {"filename", T_OBJECT, offsetof(PyWindowsErrorObject, filename), 0, + PyDoc_STR("exception filename")}, + {"winerror", T_OBJECT, offsetof(PyWindowsErrorObject, winerror), 0, + PyDoc_STR("Win32 exception code")}, + {NULL} /* Sentinel */ +}; + +ComplexExtendsException(PyExc_OSError, WindowsError, WindowsError, + WindowsError_dealloc, 0, WindowsError_members, + WindowsError_str, "MS-Windows OS system call failed."); + +#endif /* MS_WINDOWS */ + + +/* + * VMSError extends OSError (I think) + */ +#ifdef __VMS +MiddlingExtendsException(PyExc_OSError, VMSError, EnvironmentError, + "OpenVMS OS system call failed."); +#endif + + +/* + * EOFError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, EOFError, + "Read beyond end of file."); + + +/* + * RuntimeError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, RuntimeError, + "Unspecified run-time error."); + + +/* + * NotImplementedError extends RuntimeError + */ +SimpleExtendsException(PyExc_RuntimeError, NotImplementedError, + "Method or function hasn't been implemented yet."); + +/* + * NameError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, NameError, + "Name not found globally."); + +/* + * UnboundLocalError extends NameError + */ +SimpleExtendsException(PyExc_NameError, UnboundLocalError, + "Local name referenced but not bound to a value."); + +/* + * AttributeError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, AttributeError, + "Attribute not found."); + + +/* + * SyntaxError extends StandardError + */ +static PyObject * +SyntaxError_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PySyntaxErrorObject *self = NULL; + + self = (PySyntaxErrorObject *)BaseException_new(type, args, kwds); + if (!self) + return NULL; + + MAKE_IT_NONE(self->msg) + MAKE_IT_NONE(self->filename) + MAKE_IT_NONE(self->lineno) + MAKE_IT_NONE(self->offset) + MAKE_IT_NONE(self->text) + + /* this is always None - yes, I know it doesn't seem to be used + anywhere, but it was in the previous implementation */ + MAKE_IT_NONE(self->print_file_and_line) + + return (PyObject *)self; +} + +static int +SyntaxError_init(PySyntaxErrorObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *info = NULL; + Py_ssize_t lenargs = PyTuple_GET_SIZE(args); + + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + return -1; + + if (lenargs >= 1) { + Py_DECREF(self->msg); + self->msg = PyTuple_GET_ITEM(args, 0); + Py_INCREF(self->msg); + } + if (lenargs == 2) { + info = PyTuple_GET_ITEM(args, 1); + info = PySequence_Tuple(info); + if (!info) return -1; + + Py_DECREF(self->filename); + self->filename = PyTuple_GET_ITEM(info, 0); + Py_INCREF(self->filename); + + Py_DECREF(self->lineno); + self->lineno = PyTuple_GET_ITEM(info, 1); + Py_INCREF(self->lineno); + + Py_DECREF(self->offset); + self->offset = PyTuple_GET_ITEM(info, 2); + Py_INCREF(self->offset); + + Py_DECREF(self->text); + self->text = PyTuple_GET_ITEM(info, 3); + Py_INCREF(self->text); + } + return 0; +} + +int +SyntaxError_clear(PySyntaxErrorObject *self) +{ + Py_CLEAR(self->msg); + Py_CLEAR(self->filename); + Py_CLEAR(self->lineno); + Py_CLEAR(self->offset); + Py_CLEAR(self->text); + Py_CLEAR(self->print_file_and_line); + return BaseException_clear((PyBaseExceptionObject *)self); +} + +static void +SyntaxError_dealloc(PySyntaxErrorObject *self) +{ + SyntaxError_clear(self); + self->ob_type->tp_free((PyObject *)self); +} + +int +SyntaxError_traverse(PySyntaxErrorObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->msg); + Py_VISIT(self->filename); + Py_VISIT(self->lineno); + Py_VISIT(self->offset); + Py_VISIT(self->text); + Py_VISIT(self->print_file_and_line); + return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); +} + +/* This is called "my_basename" instead of just "basename" to avoid name + conflicts with glibc; basename is already prototyped if _GNU_SOURCE is + defined, and Python does define that. */ +static char * +my_basename(char *name) +{ + char *cp = name; + char *result = name; + + if (name == NULL) + return "???"; + while (*cp != '\0') { + if (*cp == SEP) + result = cp + 1; + ++cp; + } + return result; +} + + +static PyObject * +SyntaxError_str(PySyntaxErrorObject *self) +{ + PyObject *str; + PyObject *result; + + str = PyObject_Str(self->msg); + result = str; + + /* XXX -- do all the additional formatting with filename and + lineno here */ + + if (str != NULL && PyString_Check(str)) { + int have_filename = 0; + int have_lineno = 0; + char *buffer = NULL; + + have_filename = (self->filename != NULL) && + PyString_Check(self->filename); + have_lineno = (self->lineno != NULL) && PyInt_Check(self->lineno); + + if (have_filename || have_lineno) { + Py_ssize_t bufsize = PyString_GET_SIZE(str) + 64; + if (have_filename) + bufsize += PyString_GET_SIZE(self->filename); + + buffer = (char *)PyMem_MALLOC(bufsize); + if (buffer != NULL) { + if (have_filename && have_lineno) + PyOS_snprintf(buffer, bufsize, "%s (%s, line %ld)", + PyString_AS_STRING(str), + my_basename(PyString_AS_STRING(self->filename)), + PyInt_AsLong(self->lineno)); + else if (have_filename) + PyOS_snprintf(buffer, bufsize, "%s (%s)", + PyString_AS_STRING(str), + my_basename(PyString_AS_STRING(self->filename))); + else if (have_lineno) + PyOS_snprintf(buffer, bufsize, "%s (line %ld)", + PyString_AS_STRING(str), + PyInt_AsLong(self->lineno)); + + result = PyString_FromString(buffer); + PyMem_FREE(buffer); + + if (result == NULL) + result = str; + else + Py_DECREF(str); + } + } + } + return result; +} + +static PyMemberDef SyntaxError_members[] = { + {"message", T_OBJECT, offsetof(PySyntaxErrorObject, message), 0, + PyDoc_STR("exception message")}, + {"msg", T_OBJECT, offsetof(PySyntaxErrorObject, msg), 0, + PyDoc_STR("exception msg")}, + {"filename", T_OBJECT, offsetof(PySyntaxErrorObject, filename), 0, + PyDoc_STR("exception filename")}, + {"lineno", T_OBJECT, offsetof(PySyntaxErrorObject, lineno), 0, + PyDoc_STR("exception lineno")}, + {"offset", T_OBJECT, offsetof(PySyntaxErrorObject, offset), 0, + PyDoc_STR("exception offset")}, + {"text", T_OBJECT, offsetof(PySyntaxErrorObject, text), 0, + PyDoc_STR("exception text")}, + {"print_file_and_line", T_OBJECT, + offsetof(PySyntaxErrorObject, print_file_and_line), 0, + PyDoc_STR("exception print_file_and_line")}, + {NULL} /* Sentinel */ +}; + +ComplexExtendsException(PyExc_StandardError, SyntaxError, SyntaxError, + SyntaxError_dealloc, 0, SyntaxError_members, + SyntaxError_str, "Invalid syntax."); + + +/* + * IndentationError extends SyntaxError + */ +MiddlingExtendsException(PyExc_SyntaxError, IndentationError, SyntaxError, + "Improper indentation."); + + +/* + * TabError extends IndentationError + */ +MiddlingExtendsException(PyExc_IndentationError, TabError, SyntaxError, + "Improper mixture of spaces and tabs."); + + +/* + * LookupError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, LookupError, + "Base class for lookup errors."); + + +/* + * IndexError extends LookupError + */ +SimpleExtendsException(PyExc_LookupError, IndexError, + "Sequence index out of range."); + + +/* + * KeyError extends LookupError + */ +static PyObject * +KeyError_str(PyBaseExceptionObject *self) +{ + /* If args is a tuple of exactly one item, apply repr to args[0]. + This is done so that e.g. the exception raised by {}[''] prints + KeyError: '' + rather than the confusing + KeyError + alone. The downside is that if KeyError is raised with an explanatory + string, that string will be displayed in quotes. Too bad. + If args is anything else, use the default BaseException__str__(). + */ + if (PyTuple_Check(self->args) && PyTuple_GET_SIZE(self->args) == 1) { + PyObject *key = PyTuple_GET_ITEM(self->args, 0); + return PyObject_Repr(key); + } + return BaseException_str(self); +} + +ComplexExtendsException(PyExc_LookupError, KeyError, BaseException, + 0, 0, 0, KeyError_str, "Mapping key not found."); + + +/* + * ValueError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, ValueError, + "Inappropriate argument value (of correct type)."); + +/* + * UnicodeError extends ValueError + */ + +SimpleExtendsException(PyExc_ValueError, UnicodeError, + "Unicode related error."); + +#ifdef Py_USING_UNICODE +static int +get_int(PyObject *attr, Py_ssize_t *value, const char *name) +{ + if (!attr) { + PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); + return -1; + } + + if (PyInt_Check(attr)) { + *value = PyInt_AS_LONG(attr); + } else if (PyLong_Check(attr)) { + *value = _PyLong_AsSsize_t(attr); + if (*value == -1 && PyErr_Occurred()) + return -1; + } else { + PyErr_Format(PyExc_TypeError, "%.200s attribute must be int", name); + return -1; + } + return 0; +} + +static int +set_ssize_t(PyObject **attr, Py_ssize_t value) +{ + PyObject *obj = PyInt_FromSsize_t(value); + if (!obj) + return -1; + Py_XDECREF(*attr); + *attr = obj; + return 0; +} + +static PyObject * +get_string(PyObject *attr, const char *name) +{ + if (!attr) { + PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); + return NULL; + } + + if (!PyString_Check(attr)) { + PyErr_Format(PyExc_TypeError, "%.200s attribute must be str", name); + return NULL; + } + Py_INCREF(attr); + return attr; +} + + +static int +set_string(PyObject **attr, const char *value) +{ + PyObject *obj = PyString_FromString(value); + if (!obj) + return -1; + Py_XDECREF(*attr); + *attr = obj; + return 0; +} + + +static PyObject * +get_unicode(PyObject *attr, const char *name) +{ + if (!attr) { + PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); + return NULL; + } + + if (!PyUnicode_Check(attr)) { + PyErr_Format(PyExc_TypeError, + "%.200s attribute must be unicode", name); + return NULL; + } + Py_INCREF(attr); + return attr; +} + +PyObject * +PyUnicodeEncodeError_GetEncoding(PyObject *exc) +{ + return get_string(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); +} + +PyObject * +PyUnicodeDecodeError_GetEncoding(PyObject *exc) +{ + return get_string(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); +} + +PyObject * +PyUnicodeEncodeError_GetObject(PyObject *exc) +{ + return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); +} + +PyObject * +PyUnicodeDecodeError_GetObject(PyObject *exc) +{ + return get_string(((PyUnicodeErrorObject *)exc)->object, "object"); +} + +PyObject * +PyUnicodeTranslateError_GetObject(PyObject *exc) +{ + return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); +} + +int +PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) +{ + if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) { + Py_ssize_t size; + PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, + "object"); + if (!obj) return -1; + size = PyUnicode_GET_SIZE(obj); + if (*start<0) + *start = 0; /*XXX check for values <0*/ + if (*start>=size) + *start = size-1; + return 0; + } + return -1; +} + + +int +PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) +{ + if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) { + Py_ssize_t size; + PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, + "object"); + if (!obj) return -1; + size = PyString_GET_SIZE(obj); + if (*start<0) + *start = 0; + if (*start>=size) + *start = size-1; + return 0; + } + return -1; +} + + +int +PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) +{ + return PyUnicodeEncodeError_GetStart(exc, start); +} + + +int +PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) +{ + return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start); +} + + +int +PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) +{ + return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start); +} + + +int +PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) +{ + return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start); +} + + +int +PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +{ + if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) { + Py_ssize_t size; + PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, + "object"); + if (!obj) return -1; + size = PyUnicode_GET_SIZE(obj); + if (*end<1) + *end = 1; + if (*end>size) + *end = size; + return 0; + } + return -1; +} + + +int +PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +{ + if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) { + Py_ssize_t size; + PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, + "object"); + if (!obj) return -1; + size = PyString_GET_SIZE(obj); + if (*end<1) + *end = 1; + if (*end>size) + *end = size; + return 0; + } + return -1; +} + + +int +PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *start) +{ + return PyUnicodeEncodeError_GetEnd(exc, start); +} + + +int +PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +{ + return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end); +} + + +int +PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) +{ + return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end); +} + + +int +PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) +{ + return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end); +} + +PyObject * +PyUnicodeEncodeError_GetReason(PyObject *exc) +{ + return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason"); +} + + +PyObject * +PyUnicodeDecodeError_GetReason(PyObject *exc) +{ + return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason"); +} + + +PyObject * +PyUnicodeTranslateError_GetReason(PyObject *exc) +{ + return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason"); +} + + +int +PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason) +{ + return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason); +} + + +int +PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason) +{ + return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason); +} + + +int +PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason) +{ + return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason); +} + + +static PyObject * +UnicodeError_new(PyTypeObject *type, PyObject *args, PyObject *kwds, + PyTypeObject *objecttype) +{ + PyUnicodeErrorObject *self; + + self = (PyUnicodeErrorObject *)BaseException_new(type, args, kwds); + if (!self) + return NULL; + + MAKE_IT_NONE(self->encoding); + MAKE_IT_NONE(self->object); + MAKE_IT_NONE(self->start); + MAKE_IT_NONE(self->end); + MAKE_IT_NONE(self->reason); + + return (PyObject *)self; +} + +static int +UnicodeError_init(PyUnicodeErrorObject *self, PyObject *args, PyObject *kwds, + PyTypeObject *objecttype) +{ + if (!PyArg_ParseTuple(args, "O!O!O!O!O!", + &PyString_Type, &self->encoding, + objecttype, &self->object, + &PyInt_Type, &self->start, + &PyInt_Type, &self->end, + &PyString_Type, &self->reason)) { + self->encoding = self->object = self->start = self->end = + self->reason = NULL; + return -1; + } + + Py_INCREF(self->encoding); + Py_INCREF(self->object); + Py_INCREF(self->start); + Py_INCREF(self->end); + Py_INCREF(self->reason); + + return 0; +} + +int +UnicodeError_clear(PyUnicodeErrorObject *self) +{ + Py_CLEAR(self->encoding); + Py_CLEAR(self->object); + Py_CLEAR(self->start); + Py_CLEAR(self->end); + Py_CLEAR(self->reason); + return BaseException_clear((PyBaseExceptionObject *)self); +} + +static void +UnicodeError_dealloc(PyUnicodeErrorObject *self) +{ + UnicodeError_clear(self); + self->ob_type->tp_free((PyObject *)self); +} + +int +UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->encoding); + Py_VISIT(self->object); + Py_VISIT(self->start); + Py_VISIT(self->end); + Py_VISIT(self->reason); + return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); +} + +static PyMemberDef UnicodeError_members[] = { + {"message", T_OBJECT, offsetof(PyUnicodeErrorObject, message), 0, + PyDoc_STR("exception message")}, + {"encoding", T_OBJECT, offsetof(PyUnicodeErrorObject, encoding), 0, + PyDoc_STR("exception encoding")}, + {"object", T_OBJECT, offsetof(PyUnicodeErrorObject, object), 0, + PyDoc_STR("exception object")}, + {"start", T_OBJECT, offsetof(PyUnicodeErrorObject, start), 0, + PyDoc_STR("exception start")}, + {"end", T_OBJECT, offsetof(PyUnicodeErrorObject, end), 0, + PyDoc_STR("exception end")}, + {"reason", T_OBJECT, offsetof(PyUnicodeErrorObject, reason), 0, + PyDoc_STR("exception reason")}, + {NULL} /* Sentinel */ +}; + + +/* + * UnicodeEncodeError extends UnicodeError + */ +static PyObject * +UnicodeEncodeError_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + return UnicodeError_new(type, args, kwds, &PyUnicode_Type); +} + +static int +UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + return -1; + return UnicodeError_init((PyUnicodeErrorObject *)self, args, + kwds, &PyUnicode_Type); +} + +static PyObject * +UnicodeEncodeError_str(PyObject *self) +{ + Py_ssize_t start; + Py_ssize_t end; + + if (PyUnicodeEncodeError_GetStart(self, &start)) + return NULL; + + if (PyUnicodeEncodeError_GetEnd(self, &end)) + return NULL; + + if (end==start+1) { + int badchar = (int)PyUnicode_AS_UNICODE(((PyUnicodeErrorObject *)self)->object)[start]; + char badchar_str[20]; + if (badchar <= 0xff) + PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar); + else if (badchar <= 0xffff) + PyOS_snprintf(badchar_str, sizeof(badchar_str), "u%04x", badchar); + else + PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar); + return PyString_FromFormat( + "'%.400s' codec can't encode character u'\\%s' in position %zd: %.400s", + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), + badchar_str, + start, + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) + ); + } + return PyString_FromFormat( + "'%.400s' codec can't encode characters in position %zd-%zd: %.400s", + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), + start, + (end-1), + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) + ); +} + +static PyTypeObject _PyExc_UnicodeEncodeError = { + PyObject_HEAD_INIT(NULL) + 0, + "UnicodeEncodeError", + sizeof(PyUnicodeErrorObject), 0, + (destructor)UnicodeError_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + (reprfunc)UnicodeEncodeError_str, 0, 0, 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + PyDoc_STR("Unicode encoding error."), (traverseproc)BaseException_traverse, + (inquiry)BaseException_clear, 0, 0, 0, 0, 0, UnicodeError_members, + 0, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict), + (initproc)UnicodeEncodeError_init, 0, UnicodeEncodeError_new, +}; +PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError; + +PyObject * +PyUnicodeEncodeError_Create( + const char *encoding, const Py_UNICODE *object, Py_ssize_t length, + Py_ssize_t start, Py_ssize_t end, const char *reason) +{ + return PyObject_CallFunction(PyExc_UnicodeEncodeError, "su#nns", + encoding, object, length, start, end, reason); +} + + +/* + * UnicodeDecodeError extends UnicodeError + */ +static PyObject * +UnicodeDecodeError_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + return UnicodeError_new(type, args, kwds, &PyString_Type); +} + +static int +UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + return -1; + return UnicodeError_init((PyUnicodeErrorObject *)self, args, + kwds, &PyString_Type); +} + +static PyObject * +UnicodeDecodeError_str(PyObject *self) +{ + Py_ssize_t start; + Py_ssize_t end; + + if (PyUnicodeDecodeError_GetStart(self, &start)) + return NULL; + + if (PyUnicodeDecodeError_GetEnd(self, &end)) + return NULL; + + if (end==start+1) { + /* FromFormat does not support %02x, so format that separately */ + char byte[4]; + PyOS_snprintf(byte, sizeof(byte), "%02x", + ((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff); + return PyString_FromFormat( + "'%.400s' codec can't decode byte 0x%s in position %zd: %.400s", + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), + byte, + start, + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) + ); + } + return PyString_FromFormat( + "'%.400s' codec can't decode bytes in position %zd-%zd: %.400s", + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), + start, + (end-1), + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) + ); +} + +static PyTypeObject _PyExc_UnicodeDecodeError = { + PyObject_HEAD_INIT(NULL) + 0, + EXC_MODULE_NAME "UnicodeDecodeError", + sizeof(PyUnicodeErrorObject), 0, + (destructor)UnicodeError_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + (reprfunc)UnicodeDecodeError_str, 0, 0, 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + PyDoc_STR("Unicode decoding error."), (traverseproc)BaseException_traverse, + (inquiry)BaseException_clear, 0, 0, 0, 0, 0, UnicodeError_members, + 0, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict), + (initproc)UnicodeDecodeError_init, 0, UnicodeDecodeError_new, +}; +PyObject *PyExc_UnicodeDecodeError = (PyObject *)&_PyExc_UnicodeDecodeError; + +PyObject * +PyUnicodeDecodeError_Create( + const char *encoding, const char *object, Py_ssize_t length, + Py_ssize_t start, Py_ssize_t end, const char *reason) +{ + assert(length < INT_MAX); + assert(start < INT_MAX); + assert(end < INT_MAX); + return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns", + encoding, object, length, start, end, reason); +} + + +/* + * UnicodeTranslateError extends UnicodeError + */ +static PyObject * +UnicodeTranslateError_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyUnicodeErrorObject *self = NULL; + + self = (PyUnicodeErrorObject *)BaseException_new(type, args, kwds); + if (!self) + return NULL; + + MAKE_IT_NONE(self->encoding); + MAKE_IT_NONE(self->object); + MAKE_IT_NONE(self->start); + MAKE_IT_NONE(self->end); + MAKE_IT_NONE(self->reason); + + return (PyObject *)self; +} + +static int +UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args, + PyObject *kwds) +{ + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + return -1; + + Py_CLEAR(self->object); + Py_CLEAR(self->start); + Py_CLEAR(self->end); + Py_CLEAR(self->reason); + + if (!PyArg_ParseTuple(args, "O!O!O!O!", + &PyUnicode_Type, &self->object, + &PyInt_Type, &self->start, + &PyInt_Type, &self->end, + &PyString_Type, &self->reason)) { + self->object = self->start = self->end = self->reason = NULL; + return -1; + } + + Py_INCREF(self->object); + Py_INCREF(self->start); + Py_INCREF(self->end); + Py_INCREF(self->reason); + + return 0; +} + + +static PyObject * +UnicodeTranslateError_str(PyObject *self) +{ + Py_ssize_t start; + Py_ssize_t end; + + if (PyUnicodeTranslateError_GetStart(self, &start)) + return NULL; + + if (PyUnicodeTranslateError_GetEnd(self, &end)) + return NULL; + + if (end==start+1) { + int badchar = (int)PyUnicode_AS_UNICODE(((PyUnicodeErrorObject *)self)->object)[start]; + char badchar_str[20]; + if (badchar <= 0xff) + PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar); + else if (badchar <= 0xffff) + PyOS_snprintf(badchar_str, sizeof(badchar_str), "u%04x", badchar); + else + PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar); + return PyString_FromFormat( + "can't translate character u'\\%s' in position %zd: %.400s", + badchar_str, + start, + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) + ); + } + return PyString_FromFormat( + "can't translate characters in position %zd-%zd: %.400s", + start, + (end-1), + PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) + ); +} + +static PyTypeObject _PyExc_UnicodeTranslateError = { + PyObject_HEAD_INIT(NULL) + 0, + EXC_MODULE_NAME "UnicodeTranslateError", + sizeof(PyUnicodeErrorObject), 0, + (destructor)UnicodeError_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + (reprfunc)UnicodeTranslateError_str, 0, 0, 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + PyDoc_STR("Unicode decoding error."), (traverseproc)UnicodeError_traverse, + (inquiry)UnicodeError_clear, 0, 0, 0, 0, 0, UnicodeError_members, + 0, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict), + (initproc)UnicodeTranslateError_init, 0, UnicodeTranslateError_new, +}; +PyObject *PyExc_UnicodeTranslateError = (PyObject *)&_PyExc_UnicodeTranslateError; + +PyObject * +PyUnicodeTranslateError_Create( + const Py_UNICODE *object, Py_ssize_t length, + Py_ssize_t start, Py_ssize_t end, const char *reason) +{ + return PyObject_CallFunction(PyExc_UnicodeTranslateError, "u#nns", + object, length, start, end, reason); +} +#endif + + +/* + * AssertionError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, AssertionError, + "Assertion failed."); + + +/* + * ArithmeticError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, ArithmeticError, + "Base class for arithmetic errors."); + + +/* + * FloatingPointError extends ArithmeticError + */ +SimpleExtendsException(PyExc_ArithmeticError, FloatingPointError, + "Floating point operation failed."); + + +/* + * OverflowError extends ArithmeticError + */ +SimpleExtendsException(PyExc_ArithmeticError, OverflowError, + "Result too large to be represented."); + + +/* + * ZeroDivisionError extends ArithmeticError + */ +SimpleExtendsException(PyExc_ArithmeticError, ZeroDivisionError, + "Second argument to a division or modulo operation was zero."); + + +/* + * SystemError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, SystemError, + "Internal error in the Python interpreter.\n" + "\n" + "Please report this to the Python maintainer, along with the traceback,\n" + "the Python version, and the hardware/OS platform and version."); + + +/* + * ReferenceError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, ReferenceError, + "Weak ref proxy used after referent went away."); + + +/* + * MemoryError extends StandardError + */ +SimpleExtendsException(PyExc_StandardError, MemoryError, "Out of memory."); + + +/* Warning category docstrings */ + +/* + * Warning extends Exception + */ +SimpleExtendsException(PyExc_Exception, Warning, + "Base class for warning categories."); + + +/* + * UserWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, UserWarning, + "Base class for warnings generated by user code."); + + +/* + * DeprecationWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, DeprecationWarning, + "Base class for warnings about deprecated features."); + + +/* + * PendingDeprecationWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, PendingDeprecationWarning, + "Base class for warnings about features which will be deprecated\n" + "in the future."); + + +/* + * SyntaxWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, SyntaxWarning, + "Base class for warnings about dubious syntax."); + + +/* + * RuntimeWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, RuntimeWarning, + "Base class for warnings about dubious runtime behavior."); + + +/* + * FutureWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, FutureWarning, + "Base class for warnings about constructs that will change semantically\n" + "in the future."); + + +/* + * ImportWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, ImportWarning, + "Base class for warnings about probable mistakes in module imports"); + + +/* Pre-computed MemoryError instance. Best to create this as early as + * possible and not wait until a MemoryError is actually raised! + */ +PyObject *PyExc_MemoryErrorInst=NULL; + +/* module global functions */ +static PyMethodDef functions[] = { + /* Sentinel */ + {NULL, NULL} +}; + +#define PRE_INIT(TYPE) if (PyType_Ready(&_PyExc_ ## TYPE) < 0) \ + Py_FatalError("exceptions bootstrapping error."); + +#define POST_INIT(TYPE) Py_INCREF(PyExc_ ## TYPE); \ + PyModule_AddObject(m, # TYPE, PyExc_ ## TYPE); \ + if (PyDict_SetItemString(bdict, # TYPE, PyExc_ ## TYPE)) \ + Py_FatalError("Module dictionary insertion problem."); + +PyMODINIT_FUNC +_PyExc_Init(void) +{ + PyObject *m, *bltinmod, *bdict; + + PRE_INIT(BaseException) + PRE_INIT(Exception) + PRE_INIT(StandardError) + PRE_INIT(TypeError) + PRE_INIT(StopIteration) + PRE_INIT(GeneratorExit) + PRE_INIT(SystemExit) + PRE_INIT(KeyboardInterrupt) + PRE_INIT(ImportError) + PRE_INIT(EnvironmentError) + PRE_INIT(IOError) + PRE_INIT(OSError) +#ifdef MS_WINDOWS + PRE_INIT(WindowsError) +#endif +#ifdef __VMS + PRE_INIT(VMSError) +#endif + PRE_INIT(EOFError) + PRE_INIT(RuntimeError) + PRE_INIT(NotImplementedError) + PRE_INIT(NameError) + PRE_INIT(UnboundLocalError) + PRE_INIT(AttributeError) + PRE_INIT(SyntaxError) + PRE_INIT(IndentationError) + PRE_INIT(TabError) + PRE_INIT(LookupError) + PRE_INIT(IndexError) + PRE_INIT(KeyError) + PRE_INIT(ValueError) + PRE_INIT(UnicodeError) +#ifdef Py_USING_UNICODE + PRE_INIT(UnicodeEncodeError) + PRE_INIT(UnicodeDecodeError) + PRE_INIT(UnicodeTranslateError) +#endif + PRE_INIT(AssertionError) + PRE_INIT(ArithmeticError) + PRE_INIT(FloatingPointError) + PRE_INIT(OverflowError) + PRE_INIT(ZeroDivisionError) + PRE_INIT(SystemError) + PRE_INIT(ReferenceError) + PRE_INIT(MemoryError) + PRE_INIT(Warning) + PRE_INIT(UserWarning) + PRE_INIT(DeprecationWarning) + PRE_INIT(PendingDeprecationWarning) + PRE_INIT(SyntaxWarning) + PRE_INIT(RuntimeWarning) + PRE_INIT(FutureWarning) + PRE_INIT(ImportWarning) + + m = Py_InitModule4("exceptions", functions, exceptions_doc, + (PyObject *)NULL, PYTHON_API_VERSION); + if (m == NULL) return; + + bltinmod = PyImport_ImportModule("__builtin__"); + if (bltinmod == NULL) + Py_FatalError("exceptions bootstrapping error."); + bdict = PyModule_GetDict(bltinmod); + if (bdict == NULL) + Py_FatalError("exceptions bootstrapping error."); + + POST_INIT(BaseException) + POST_INIT(Exception) + POST_INIT(StandardError) + POST_INIT(TypeError) + POST_INIT(StopIteration) + POST_INIT(GeneratorExit) + POST_INIT(SystemExit) + POST_INIT(KeyboardInterrupt) + POST_INIT(ImportError) + POST_INIT(EnvironmentError) + POST_INIT(IOError) + POST_INIT(OSError) +#ifdef MS_WINDOWS + POST_INIT(WindowsError) +#endif +#ifdef __VMS + POST_INIT(VMSError) +#endif + POST_INIT(EOFError) + POST_INIT(RuntimeError) + POST_INIT(NotImplementedError) + POST_INIT(NameError) + POST_INIT(UnboundLocalError) + POST_INIT(AttributeError) + POST_INIT(SyntaxError) + POST_INIT(IndentationError) + POST_INIT(TabError) + POST_INIT(LookupError) + POST_INIT(IndexError) + POST_INIT(KeyError) + POST_INIT(ValueError) + POST_INIT(UnicodeError) +#ifdef Py_USING_UNICODE + POST_INIT(UnicodeEncodeError) + POST_INIT(UnicodeDecodeError) + POST_INIT(UnicodeTranslateError) +#endif + POST_INIT(AssertionError) + POST_INIT(ArithmeticError) + POST_INIT(FloatingPointError) + POST_INIT(OverflowError) + POST_INIT(ZeroDivisionError) + POST_INIT(SystemError) + POST_INIT(ReferenceError) + POST_INIT(MemoryError) + POST_INIT(Warning) + POST_INIT(UserWarning) + POST_INIT(DeprecationWarning) + POST_INIT(PendingDeprecationWarning) + POST_INIT(SyntaxWarning) + POST_INIT(RuntimeWarning) + POST_INIT(FutureWarning) + POST_INIT(ImportWarning) + + PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL); + if (!PyExc_MemoryErrorInst) + Py_FatalError("Cannot pre-allocate MemoryError instance\n"); + + Py_DECREF(bltinmod); +} + +void +_PyExc_Fini(void) +{ + Py_XDECREF(PyExc_MemoryErrorInst); + PyExc_MemoryErrorInst = NULL; +} diff --git a/Objects/fileobject.c b/Objects/fileobject.c index ab2616d..997792a 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -136,46 +136,45 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, /* check for known incorrect mode strings - problem is, platforms are free to accept any mode characters they like and are supposed to ignore stuff they don't understand... write or append mode with - universal newline support is expressly forbidden by PEP 278. */ + universal newline support is expressly forbidden by PEP 278. + Additionally, remove the 'U' from the mode string as platforms + won't know what it is. */ /* zero return is kewl - one is un-kewl */ static int -check_the_mode(char *mode) +sanitize_the_mode(char *mode) { + char *upos; size_t len = strlen(mode); - switch (len) { - case 0: + if (!len) { PyErr_SetString(PyExc_ValueError, "empty mode string"); return 1; + } - /* reject wU, aU */ - case 2: - switch (mode[0]) { - case 'w': - case 'a': - if (mode[1] == 'U') { - PyErr_SetString(PyExc_ValueError, - "invalid mode string"); - return 1; - } - break; + upos = strchr(mode, 'U'); + if (upos) { + memmove(upos, upos+1, len-(upos-mode)); /* incl null char */ + + if (mode[0] == 'w' || mode[0] == 'a') { + PyErr_Format(PyExc_ValueError, "universal newline " + "mode can only be used with modes " + "starting with 'r'"); + return 1; } - break; - /* reject w+U, a+U, wU+, aU+ */ - case 3: - switch (mode[0]) { - case 'w': - case 'a': - if ((mode[1] == '+' && mode[2] == 'U') || - (mode[1] == 'U' && mode[2] == '+')) { - PyErr_SetString(PyExc_ValueError, - "invalid mode string"); - return 1; - } - break; + if (mode[0] != 'r') { + memmove(mode+1, mode, strlen(mode)+1); + mode[0] = 'r'; } - break; + + if (!strchr(mode, 'b')) { + memmove(mode+2, mode+1, strlen(mode)); + mode[1] = 'b'; + } + } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') { + PyErr_Format(PyExc_ValueError, "mode string must begin with " + "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode); + return 1; } return 0; @@ -184,6 +183,7 @@ check_the_mode(char *mode) static PyObject * open_the_file(PyFileObject *f, char *name, char *mode) { + char *newmode; assert(f != NULL); assert(PyFile_Check(f)); #ifdef MS_WINDOWS @@ -195,8 +195,18 @@ open_the_file(PyFileObject *f, char *name, char *mode) assert(mode != NULL); assert(f->f_fp == NULL); - if (check_the_mode(mode)) + /* probably need to replace 'U' by 'rb' */ + newmode = PyMem_MALLOC(strlen(mode) + 3); + if (!newmode) { + PyErr_NoMemory(); return NULL; + } + strcpy(newmode, mode); + + if (sanitize_the_mode(newmode)) { + f = NULL; + goto cleanup; + } /* rexec.py can't stop a user from getting the file() constructor -- all they have to do is get *any* file object f, and then do @@ -204,16 +214,15 @@ open_the_file(PyFileObject *f, char *name, char *mode) if (PyEval_GetRestricted()) { PyErr_SetString(PyExc_IOError, "file() constructor not accessible in restricted mode"); - return NULL; + f = NULL; + goto cleanup; } errno = 0; - if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0) - mode = "rb"; #ifdef MS_WINDOWS if (PyUnicode_Check(f->f_name)) { PyObject *wmode; - wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL); + wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL); if (f->f_name && wmode) { Py_BEGIN_ALLOW_THREADS /* PyUnicode_AS_UNICODE OK without thread @@ -227,7 +236,7 @@ open_the_file(PyFileObject *f, char *name, char *mode) #endif if (NULL == f->f_fp && NULL != name) { Py_BEGIN_ALLOW_THREADS - f->f_fp = fopen(name, mode); + f->f_fp = fopen(name, newmode); Py_END_ALLOW_THREADS } @@ -254,6 +263,10 @@ open_the_file(PyFileObject *f, char *name, char *mode) } if (f != NULL) f = dircheck(f); + +cleanup: + PyMem_FREE(newmode); + return (PyObject *)f; } @@ -1705,9 +1718,6 @@ PyDoc_STRVAR(close_doc, PyDoc_STRVAR(isatty_doc, "isatty() -> true or false. True if the file is connected to a tty device."); -PyDoc_STRVAR(context_doc, - "__context__() -> self."); - PyDoc_STRVAR(enter_doc, "__enter__() -> self."); @@ -1727,7 +1737,6 @@ static PyMethodDef file_methods[] = { {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc}, {"close", (PyCFunction)file_close, METH_NOARGS, close_doc}, {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc}, - {"__context__", (PyCFunction)file_self, METH_NOARGS, context_doc}, {"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc}, {"__exit__", (PyCFunction)file_close, METH_VARARGS, close_doc}, {NULL, NULL} /* sentinel */ @@ -2023,10 +2032,6 @@ PyDoc_STR( "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n" "\n" "'U' cannot be combined with 'w' or '+' mode.\n" -) -PyDoc_STR( -"\n" -"Note: open() is an alias for file()." ); PyTypeObject PyFile_Type = { @@ -2447,4 +2452,3 @@ Py_UniversalNewlineFread(char *buf, size_t n, #ifdef __cplusplus } #endif - diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 8708690..74f1315 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -384,7 +384,7 @@ float_richcompare(PyObject *v, PyObject *w, int op) if (PyFloat_Check(w)) j = PyFloat_AS_DOUBLE(w); - else if (Py_IS_INFINITY(i) || Py_IS_NAN(i)) { + else if (!Py_IS_FINITE(i)) { if (PyInt_Check(w) || PyLong_Check(w)) /* If i is an infinity, its magnitude exceeds any * finite integer, so it doesn't matter which int we @@ -783,10 +783,7 @@ float_pow(PyObject *v, PyObject *w, PyObject *z) * bug; we let that slide in math.pow() (which currently * reflects all platform accidents), but not for Python's **. */ - if (iv == -1.0 && !Py_IS_INFINITY(iw) && iw == iw) { - /* XXX the "iw == iw" was to weed out NaNs. This - * XXX doesn't actually work on all platforms. - */ + if (iv == -1.0 && Py_IS_FINITE(iw)) { /* Return 1 if iw is even, -1 if iw is odd; there's * no guarantee that any C integral type is big * enough to hold iw, so we have to check this diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 9aabc7a..fcb5e4e 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -350,13 +350,32 @@ static PyGetSetDef frame_getsetlist[] = { }; /* Stack frames are allocated and deallocated at a considerable rate. - In an attempt to improve the speed of function calls, we maintain a - separate free list of stack frames (just like integers are - allocated in a special way -- see intobject.c). When a stack frame - is on the free list, only the following members have a meaning: + In an attempt to improve the speed of function calls, we: + + 1. Hold a single "zombie" frame on each code object. This retains + the allocated and initialised frame object from an invocation of + the code object. The zombie is reanimated the next time we need a + frame object for that code object. Doing this saves the malloc/ + realloc required when using a free_list frame that isn't the + correct size. It also saves some field initialisation. + + In zombie mode, no field of PyFrameObject holds a reference, but + the following fields are still valid: + + * ob_type, ob_size, f_code, f_valuestack; + + * f_locals, f_trace, + f_exc_type, f_exc_value, f_exc_traceback are NULL; + + * f_localsplus does not require re-allocation and + the local variables in f_localsplus are NULL. + + 2. We also maintain a separate free list of stack frames (just like + integers are allocated in a special way -- see intobject.c). When + a stack frame is on the free list, only the following members have + a meaning: ob_type == &Frametype f_back next item on free list, or NULL - f_nlocals number of locals f_stacksize size of value stack ob_size size of localsplus Note that the value and block stacks are preserved -- this can save @@ -380,41 +399,43 @@ static int numfree = 0; /* number of frames currently in free_list */ static void frame_dealloc(PyFrameObject *f) { - int i, slots; - PyObject **fastlocals; - PyObject **p; + PyObject **p, **valuestack; + PyCodeObject *co; PyObject_GC_UnTrack(f); Py_TRASHCAN_SAFE_BEGIN(f) /* Kill all local variables */ - slots = f->f_nlocals + f->f_ncells + f->f_nfreevars; - fastlocals = f->f_localsplus; - for (i = slots; --i >= 0; ++fastlocals) { - Py_XDECREF(*fastlocals); - } + valuestack = f->f_valuestack; + for (p = f->f_localsplus; p < valuestack; p++) + Py_CLEAR(*p); /* Free stack */ if (f->f_stacktop != NULL) { - for (p = f->f_valuestack; p < f->f_stacktop; p++) + for (p = valuestack; p < f->f_stacktop; p++) Py_XDECREF(*p); } Py_XDECREF(f->f_back); - Py_DECREF(f->f_code); Py_DECREF(f->f_builtins); Py_DECREF(f->f_globals); - Py_XDECREF(f->f_locals); - Py_XDECREF(f->f_trace); - Py_XDECREF(f->f_exc_type); - Py_XDECREF(f->f_exc_value); - Py_XDECREF(f->f_exc_traceback); - if (numfree < MAXFREELIST) { + Py_CLEAR(f->f_locals); + Py_CLEAR(f->f_trace); + Py_CLEAR(f->f_exc_type); + Py_CLEAR(f->f_exc_value); + Py_CLEAR(f->f_exc_traceback); + + co = f->f_code; + if (co != NULL && co->co_zombieframe == NULL) + co->co_zombieframe = f; + else if (numfree < MAXFREELIST) { ++numfree; f->f_back = free_list; free_list = f; - } - else + } + else PyObject_GC_Del(f); + + Py_XDECREF(co); Py_TRASHCAN_SAFE_END(f) } @@ -435,7 +456,7 @@ frame_traverse(PyFrameObject *f, visitproc visit, void *arg) Py_VISIT(f->f_exc_traceback); /* locals */ - slots = f->f_nlocals + f->f_ncells + f->f_nfreevars; + slots = f->f_code->co_nlocals + PyTuple_GET_SIZE(f->f_code->co_cellvars) + PyTuple_GET_SIZE(f->f_code->co_freevars); fastlocals = f->f_localsplus; for (i = slots; --i >= 0; ++fastlocals) Py_VISIT(*fastlocals); @@ -468,7 +489,7 @@ frame_clear(PyFrameObject *f) Py_CLEAR(f->f_trace); /* locals */ - slots = f->f_nlocals + f->f_ncells + f->f_nfreevars; + slots = f->f_code->co_nlocals + PyTuple_GET_SIZE(f->f_code->co_cellvars) + PyTuple_GET_SIZE(f->f_code->co_freevars); fastlocals = f->f_localsplus; for (i = slots; --i >= 0; ++fastlocals) Py_CLEAR(*fastlocals); @@ -532,7 +553,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, PyFrameObject *back = tstate->frame; PyFrameObject *f; PyObject *builtins; - Py_ssize_t extras, ncells, nfrees, i; + Py_ssize_t i; #ifdef Py_DEBUG if (code == NULL || globals == NULL || !PyDict_Check(globals) || @@ -541,9 +562,6 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, return NULL; } #endif - ncells = PyTuple_GET_SIZE(code->co_cellvars); - nfrees = PyTuple_GET_SIZE(code->co_freevars); - extras = code->co_stacksize + code->co_nlocals + ncells + nfrees; if (back == NULL || back->f_globals != globals) { builtins = PyDict_GetItem(globals, builtin_object); if (builtins) { @@ -574,71 +592,82 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, assert(builtins != NULL && PyDict_Check(builtins)); Py_INCREF(builtins); } - if (free_list == NULL) { - f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras); - if (f == NULL) { - Py_DECREF(builtins); - return NULL; - } + if (code->co_zombieframe != NULL) { + f = code->co_zombieframe; + code->co_zombieframe = NULL; + _Py_NewReference((PyObject *)f); + assert(f->f_code == code); } - else { - assert(numfree > 0); - --numfree; - f = free_list; - free_list = free_list->f_back; - if (f->ob_size < extras) { - f = PyObject_GC_Resize(PyFrameObject, f, extras); - if (f == NULL) { - Py_DECREF(builtins); - return NULL; - } - } - _Py_NewReference((PyObject *)f); + else { + Py_ssize_t extras, ncells, nfrees; + ncells = PyTuple_GET_SIZE(code->co_cellvars); + nfrees = PyTuple_GET_SIZE(code->co_freevars); + extras = code->co_stacksize + code->co_nlocals + ncells + + nfrees; + if (free_list == NULL) { + f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, + extras); + if (f == NULL) { + Py_DECREF(builtins); + return NULL; + } + } + else { + assert(numfree > 0); + --numfree; + f = free_list; + free_list = free_list->f_back; + if (f->ob_size < extras) { + f = PyObject_GC_Resize(PyFrameObject, f, extras); + if (f == NULL) { + Py_DECREF(builtins); + return NULL; + } + } + _Py_NewReference((PyObject *)f); + } + + f->f_code = code; + extras = code->co_nlocals + ncells + nfrees; + f->f_valuestack = f->f_localsplus + extras; + for (i=0; i<extras; i++) + f->f_localsplus[i] = NULL; + f->f_locals = NULL; + f->f_trace = NULL; + f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL; } f->f_builtins = builtins; Py_XINCREF(back); f->f_back = back; Py_INCREF(code); - f->f_code = code; Py_INCREF(globals); f->f_globals = globals; /* Most functions have CO_NEWLOCALS and CO_OPTIMIZED set. */ if ((code->co_flags & (CO_NEWLOCALS | CO_OPTIMIZED)) == (CO_NEWLOCALS | CO_OPTIMIZED)) - locals = NULL; /* PyFrame_FastToLocals() will set. */ + ; /* f_locals = NULL; will be set by PyFrame_FastToLocals() */ else if (code->co_flags & CO_NEWLOCALS) { locals = PyDict_New(); if (locals == NULL) { Py_DECREF(f); return NULL; } + f->f_locals = locals; } else { if (locals == NULL) locals = globals; Py_INCREF(locals); + f->f_locals = locals; } - f->f_locals = locals; - f->f_trace = NULL; - f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL; f->f_tstate = tstate; f->f_lasti = -1; f->f_lineno = code->co_firstlineno; f->f_restricted = (builtins != tstate->interp->builtins); f->f_iblock = 0; - f->f_nlocals = code->co_nlocals; - f->f_stacksize = code->co_stacksize; - f->f_ncells = ncells; - f->f_nfreevars = nfrees; - - extras = f->f_nlocals + ncells + nfrees; - /* Tim said it's ok to replace memset */ - for (i=0; i<extras; i++) - f->f_localsplus[i] = NULL; - - f->f_valuestack = f->f_localsplus + extras; - f->f_stacktop = f->f_valuestack; + + f->f_stacktop = f->f_valuestack; _PyObject_GC_TRACK(f); return f; } @@ -725,7 +754,9 @@ PyFrame_FastToLocals(PyFrameObject *f) PyObject *locals, *map; PyObject **fast; PyObject *error_type, *error_value, *error_traceback; + PyCodeObject *co; Py_ssize_t j; + int ncells, nfreevars; if (f == NULL) return; locals = f->f_locals; @@ -736,27 +767,24 @@ PyFrame_FastToLocals(PyFrameObject *f) return; } } - map = f->f_code->co_varnames; + co = f->f_code; + map = co->co_varnames; if (!PyTuple_Check(map)) return; PyErr_Fetch(&error_type, &error_value, &error_traceback); fast = f->f_localsplus; j = PyTuple_GET_SIZE(map); - if (j > f->f_nlocals) - j = f->f_nlocals; - if (f->f_nlocals) + if (j > co->co_nlocals) + j = co->co_nlocals; + if (co->co_nlocals) map_to_dict(map, j, locals, fast, 0); - if (f->f_ncells || f->f_nfreevars) { - if (!(PyTuple_Check(f->f_code->co_cellvars) - && PyTuple_Check(f->f_code->co_freevars))) { - return; - } - map_to_dict(f->f_code->co_cellvars, - PyTuple_GET_SIZE(f->f_code->co_cellvars), - locals, fast + f->f_nlocals, 1); - map_to_dict(f->f_code->co_freevars, - PyTuple_GET_SIZE(f->f_code->co_freevars), - locals, fast + f->f_nlocals + f->f_ncells, 1); + ncells = PyTuple_GET_SIZE(co->co_cellvars); + nfreevars = PyTuple_GET_SIZE(co->co_freevars); + if (ncells || nfreevars) { + map_to_dict(co->co_cellvars, ncells, + locals, fast + co->co_nlocals, 1); + map_to_dict(co->co_freevars, nfreevars, + locals, fast + co->co_nlocals + ncells, 1); } PyErr_Restore(error_type, error_value, error_traceback); } @@ -768,11 +796,14 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear) PyObject *locals, *map; PyObject **fast; PyObject *error_type, *error_value, *error_traceback; + PyCodeObject *co; Py_ssize_t j; + int ncells, nfreevars; if (f == NULL) return; locals = f->f_locals; - map = f->f_code->co_varnames; + co = f->f_code; + map = co->co_varnames; if (locals == NULL) return; if (!PyTuple_Check(map)) @@ -780,21 +811,18 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear) PyErr_Fetch(&error_type, &error_value, &error_traceback); fast = f->f_localsplus; j = PyTuple_GET_SIZE(map); - if (j > f->f_nlocals) - j = f->f_nlocals; - if (f->f_nlocals) - dict_to_map(f->f_code->co_varnames, j, locals, fast, 0, clear); - if (f->f_ncells || f->f_nfreevars) { - if (!(PyTuple_Check(f->f_code->co_cellvars) - && PyTuple_Check(f->f_code->co_freevars))) - return; - dict_to_map(f->f_code->co_cellvars, - PyTuple_GET_SIZE(f->f_code->co_cellvars), - locals, fast + f->f_nlocals, 1, clear); - dict_to_map(f->f_code->co_freevars, - PyTuple_GET_SIZE(f->f_code->co_freevars), - locals, fast + f->f_nlocals + f->f_ncells, 1, - clear); + if (j > co->co_nlocals) + j = co->co_nlocals; + if (co->co_nlocals) + dict_to_map(co->co_varnames, j, locals, fast, 0, clear); + ncells = PyTuple_GET_SIZE(co->co_cellvars); + nfreevars = PyTuple_GET_SIZE(co->co_freevars); + if (ncells || nfreevars) { + dict_to_map(co->co_cellvars, ncells, + locals, fast + co->co_nlocals, 1, clear); + dict_to_map(co->co_freevars, nfreevars, + locals, fast + co->co_nlocals + ncells, 1, + clear); } PyErr_Restore(error_type, error_value, error_traceback); } diff --git a/Objects/longobject.c b/Objects/longobject.c index 3073923..cd02eb3 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -40,7 +40,7 @@ static PyObject *long_format(PyObject *aa, int base, int addL); #define SIGCHECK(PyTryBlock) \ if (--_Py_Ticker < 0) { \ _Py_Ticker = _Py_CheckInterval; \ - if (PyErr_CheckSignals()) { PyTryBlock; } \ + if (PyErr_CheckSignals()) PyTryBlock \ } /* Normalize (remove leading zeros from) a long int object. @@ -66,8 +66,7 @@ long_normalize(register PyLongObject *v) PyLongObject * _PyLong_New(Py_ssize_t size) { - if (size > INT_MAX) { - /* XXX: Fix this check when ob_size becomes ssize_t */ + if (size > PY_SSIZE_T_MAX) { PyErr_NoMemory(); return NULL; } @@ -278,9 +277,9 @@ _long_as_ssize_t(PyObject *vv) { overflow: PyErr_SetString(PyExc_OverflowError, "long int too large to convert to int"); - if (sign > 0) + if (sign > 0) return PY_SSIZE_T_MAX; - else + else return PY_SSIZE_T_MIN; } @@ -845,11 +844,36 @@ PyLong_AsVoidPtr(PyObject *vv) PyObject * PyLong_FromLongLong(PY_LONG_LONG ival) { - PY_LONG_LONG bytes = ival; - int one = 1; - return _PyLong_FromByteArray( - (unsigned char *)&bytes, - SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 1); + PyLongObject *v; + unsigned PY_LONG_LONG t; /* unsigned so >> doesn't propagate sign bit */ + int ndigits = 0; + int negative = 0; + + if (ival < 0) { + ival = -ival; + negative = 1; + } + + /* Count the number of Python digits. + We used to pick 5 ("big enough for anything"), but that's a + waste of time and space given that 5*15 = 75 bits are rarely + needed. */ + t = (unsigned PY_LONG_LONG)ival; + while (t) { + ++ndigits; + t >>= SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + v->ob_size = negative ? -ndigits : ndigits; + t = (unsigned PY_LONG_LONG)ival; + while (t) { + *p++ = (digit)(t & MASK); + t >>= SHIFT; + } + } + return (PyObject *)v; } /* Create a new long int object from a C unsigned PY_LONG_LONG int. */ @@ -857,11 +881,26 @@ PyLong_FromLongLong(PY_LONG_LONG ival) PyObject * PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG ival) { - unsigned PY_LONG_LONG bytes = ival; - int one = 1; - return _PyLong_FromByteArray( - (unsigned char *)&bytes, - SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 0); + PyLongObject *v; + unsigned PY_LONG_LONG t; + int ndigits = 0; + + /* Count the number of Python digits. */ + t = (unsigned PY_LONG_LONG)ival; + while (t) { + ++ndigits; + t >>= SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + v->ob_size = ndigits; + while (ival) { + *p++ = (digit)(ival & MASK); + ival >>= SHIFT; + } + } + return (PyObject *)v; } /* Create a new long int object from a C Py_ssize_t. */ @@ -1305,7 +1344,33 @@ long_format(PyObject *aa, int base, int addL) return (PyObject *)str; } -/* *str points to the first digit in a string of base base digits. base +/* Table of digit values for 8-bit string -> integer conversion. + * '0' maps to 0, ..., '9' maps to 9. + * 'a' and 'A' map to 10, ..., 'z' and 'Z' map to 35. + * All other indices map to 37. + * Note that when converting a base B string, a char c is a legitimate + * base B digit iff _PyLong_DigitValue[Py_CHARMASK(c)] < B. + */ +int _PyLong_DigitValue[256] = { + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 37, 37, 37, 37, 37, 37, + 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, + 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, +}; + +/* *str points to the first digit in a string of base `base` digits. base * is a power of 2 (2, 4, 8, 16, or 32). *str is set to point to the first * non-digit (which may be *str!). A normalized long is returned. * The point to this routine is that it takes time linear in the number of @@ -1329,20 +1394,8 @@ long_from_binary_base(char **str, int base) n >>= 1; /* n <- total # of bits needed, while setting p to end-of-string */ n = 0; - for (;;) { - int k = -1; - char ch = *p; - - if (ch <= '9') - k = ch - '0'; - else if (ch >= 'a') - k = ch - 'a' + 10; - else if (ch >= 'A') - k = ch - 'A' + 10; - if (k < 0 || k >= base) - break; + while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base) ++p; - } *str = p; n = (p - start) * bits_per_char; if (n / bits_per_char != p - start) { @@ -1362,17 +1415,7 @@ long_from_binary_base(char **str, int base) bits_in_accum = 0; pdigit = z->ob_digit; while (--p >= start) { - int k; - char ch = *p; - - if (ch <= '9') - k = ch - '0'; - else if (ch >= 'a') - k = ch - 'a' + 10; - else { - assert(ch >= 'A'); - k = ch - 'A' + 10; - } + int k = _PyLong_DigitValue[Py_CHARMASK(*p)]; assert(k >= 0 && k < base); accum |= (twodigits)(k << bits_in_accum); bits_in_accum += bits_per_char; @@ -1428,33 +1471,140 @@ PyLong_FromString(char *str, char **pend, int base) } if (base == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) str += 2; + start = str; if ((base & (base - 1)) == 0) z = long_from_binary_base(&str, base); else { - z = _PyLong_New(0); - for ( ; z != NULL; ++str) { - int k = -1; - PyLongObject *temp; - - if (*str <= '9') - k = *str - '0'; - else if (*str >= 'a') - k = *str - 'a' + 10; - else if (*str >= 'A') - k = *str - 'A' + 10; - if (k < 0 || k >= base) - break; - temp = muladd1(z, (digit)base, (digit)k); - Py_DECREF(z); - z = temp; +/*** +Binary bases can be converted in time linear in the number of digits, because +Python's representation base is binary. Other bases (including decimal!) use +the simple quadratic-time algorithm below, complicated by some speed tricks. + +First some math: the largest integer that can be expressed in N base-B digits +is B**N-1. Consequently, if we have an N-digit input in base B, the worst- +case number of Python digits needed to hold it is the smallest integer n s.t. + + BASE**n-1 >= B**N-1 [or, adding 1 to both sides] + BASE**n >= B**N [taking logs to base BASE] + n >= log(B**N)/log(BASE) = N * log(B)/log(BASE) + +The static array log_base_BASE[base] == log(base)/log(BASE) so we can compute +this quickly. A Python long with that much space is reserved near the start, +and the result is computed into it. + +The input string is actually treated as being in base base**i (i.e., i digits +are processed at a time), where two more static arrays hold: + + convwidth_base[base] = the largest integer i such that base**i <= BASE + convmultmax_base[base] = base ** convwidth_base[base] + +The first of these is the largest i such that i consecutive input digits +must fit in a single Python digit. The second is effectively the input +base we're really using. + +Viewing the input as a sequence <c0, c1, ..., c_n-1> of digits in base +convmultmax_base[base], the result is "simply" + + (((c0*B + c1)*B + c2)*B + c3)*B + ... ))) + c_n-1 + +where B = convmultmax_base[base]. +***/ + register twodigits c; /* current input character */ + Py_ssize_t size_z; + int i; + int convwidth; + twodigits convmultmax, convmult; + digit *pz, *pzstop; + char* scan; + + static double log_base_BASE[37] = {0.0e0,}; + static int convwidth_base[37] = {0,}; + static twodigits convmultmax_base[37] = {0,}; + + if (log_base_BASE[base] == 0.0) { + twodigits convmax = base; + int i = 1; + + log_base_BASE[base] = log((double)base) / + log((double)BASE); + for (;;) { + twodigits next = convmax * base; + if (next > BASE) + break; + convmax = next; + ++i; + } + convmultmax_base[base] = convmax; + assert(i > 0); + convwidth_base[base] = i; + } + + /* Find length of the string of numeric characters. */ + scan = str; + while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base) + ++scan; + + /* Create a long object that can contain the largest possible + * integer with this base and length. Note that there's no + * need to initialize z->ob_digit -- no slot is read up before + * being stored into. + */ + size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1; + assert(size_z > 0); + z = _PyLong_New(size_z); + if (z == NULL) + return NULL; + z->ob_size = 0; + + /* `convwidth` consecutive input digits are treated as a single + * digit in base `convmultmax`. + */ + convwidth = convwidth_base[base]; + convmultmax = convmultmax_base[base]; + + /* Work ;-) */ + while (str < scan) { + /* grab up to convwidth digits from the input string */ + c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)]; + for (i = 1; i < convwidth && str != scan; ++i, ++str) { + c = (twodigits)(c * base + + _PyLong_DigitValue[Py_CHARMASK(*str)]); + assert(c < BASE); + } + + convmult = convmultmax; + /* Calculate the shift only if we couldn't get + * convwidth digits. + */ + if (i != convwidth) { + convmult = base; + for ( ; i > 1; --i) + convmult *= base; + } + + /* Multiply z by convmult, and add c. */ + pz = z->ob_digit; + pzstop = pz + z->ob_size; + for (; pz < pzstop; ++pz) { + c += (twodigits)*pz * convmult; + *pz = (digit)(c & MASK); + c >>= SHIFT; + } + /* carry off the current end? */ + if (c) { + assert(c < BASE); + assert(z->ob_size < size_z); + *pz = (digit)c; + ++z->ob_size; + } } } if (z == NULL) return NULL; if (str == start) goto onError; - if (sign < 0 && z != NULL && z->ob_size != 0) + if (sign < 0) z->ob_size = -(z->ob_size); if (*str == 'L' || *str == 'l') str++; @@ -1580,9 +1730,10 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) assert(size_w == ABS(w->ob_size)); /* That's how d was calculated */ size_v = ABS(v->ob_size); - a = _PyLong_New(size_v - size_w + 1); + k = size_v - size_w; + a = _PyLong_New(k + 1); - for (j = size_v, k = a->ob_size-1; a != NULL && k >= 0; --j, --k) { + for (j = size_v; a != NULL && k >= 0; --j, --k) { digit vj = (j >= size_v) ? 0 : v->ob_digit[j]; twodigits q; stwodigits carry = 0; diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt new file mode 100644 index 0000000..82a8774 --- /dev/null +++ b/Objects/stringlib/README.txt @@ -0,0 +1,34 @@ +bits shared by the stringobject and unicodeobject implementations (and +possibly other modules, in a not too distant future). + +the stuff in here is included into relevant places; see the individual +source files for details. + +-------------------------------------------------------------------- +the following defines used by the different modules: + +STRINGLIB_CHAR + + the type used to hold a character (char or Py_UNICODE) + +STRINGLIB_EMPTY + + a PyObject representing the empty string + +int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t) + + compares two strings. returns 0 if they match, and non-zero if not. + +Py_ssize_t STRINGLIB_LEN(PyObject*) + + returns the length of the given string object (which must be of the + right type) + +PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t) + + creates a new string object + +STRINGLIB_CHAR* STRINGLIB_STR(PyObject*) + + returns the pointer to the character data for the given string + object (which must be of the right type) diff --git a/Objects/stringlib/count.h b/Objects/stringlib/count.h new file mode 100644 index 0000000..0bd02b5 --- /dev/null +++ b/Objects/stringlib/count.h @@ -0,0 +1,34 @@ +/* stringlib: count implementation */ + +#ifndef STRINGLIB_COUNT_H +#define STRINGLIB_COUNT_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len) +{ + Py_ssize_t count; + + if (sub_len == 0) + return str_len + 1; + + count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT); + + if (count < 0) + count = 0; /* no match */ + + return count; +} + +#endif + +/* +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h new file mode 100644 index 0000000..8f79c36 --- /dev/null +++ b/Objects/stringlib/fastsearch.h @@ -0,0 +1,104 @@ +/* stringlib: fastsearch implementation */ + +#ifndef STRINGLIB_FASTSEARCH_H +#define STRINGLIB_FASTSEARCH_H + +/* fast search/count implementation, based on a mix between boyer- + moore and horspool, with a few more bells and whistles on the top. + for some more background, see: http://effbot.org/stringlib */ + +/* note: fastsearch may access s[n], which isn't a problem when using + Python's ordinary string types, but may cause problems if you're + using this code in other contexts. also, the count mode returns -1 + if there cannot possible be a match in the target string, and 0 if + it has actually checked for matches, but didn't find any. callers + beware! */ + +#define FAST_COUNT 0 +#define FAST_SEARCH 1 + +Py_LOCAL_INLINE(Py_ssize_t) +fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, + const STRINGLIB_CHAR* p, Py_ssize_t m, + int mode) +{ + long mask; + Py_ssize_t skip, count = 0; + Py_ssize_t i, j, mlast, w; + + w = n - m; + + if (w < 0) + return -1; + + /* look for special cases */ + if (m <= 1) { + if (m <= 0) + return -1; + /* use special case for 1-character strings */ + if (mode == FAST_COUNT) { + for (i = 0; i < n; i++) + if (s[i] == p[0]) + count++; + return count; + } else { + for (i = 0; i < n; i++) + if (s[i] == p[0]) + return i; + } + return -1; + } + + mlast = m - 1; + + /* create compressed boyer-moore delta 1 table */ + skip = mlast - 1; + /* process pattern[:-1] */ + for (mask = i = 0; i < mlast; i++) { + mask |= (1 << (p[i] & 0x1F)); + if (p[i] == p[mlast]) + skip = mlast - i - 1; + } + /* process pattern[-1] outside the loop */ + mask |= (1 << (p[mlast] & 0x1F)); + + for (i = 0; i <= w; i++) { + /* note: using mlast in the skip path slows things down on x86 */ + if (s[i+m-1] == p[m-1]) { + /* candidate match */ + for (j = 0; j < mlast; j++) + if (s[i+j] != p[j]) + break; + if (j == mlast) { + /* got a match! */ + if (mode != FAST_COUNT) + return i; + count++; + i = i + mlast; + continue; + } + /* miss: check if next character is part of pattern */ + if (!(mask & (1 << (s[i+m] & 0x1F)))) + i = i + m; + else + i = i + skip; + } else { + /* skip: check if next character is part of pattern */ + if (!(mask & (1 << (s[i+m] & 0x1F)))) + i = i + m; + } + } + + if (mode != FAST_COUNT) + return -1; + return count; +} + +#endif + +/* +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h new file mode 100644 index 0000000..4cea2db --- /dev/null +++ b/Objects/stringlib/find.h @@ -0,0 +1,112 @@ +/* stringlib: find/index implementation */ + +#ifndef STRINGLIB_FIND_H +#define STRINGLIB_FIND_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t offset) +{ + Py_ssize_t pos; + + if (sub_len == 0) + return offset; + + pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH); + + if (pos >= 0) + pos += offset; + + return pos; +} + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t offset) +{ + Py_ssize_t pos; + + /* XXX - create reversefastsearch helper! */ + if (sub_len == 0) + pos = str_len + offset; + else { + Py_ssize_t j; + pos = -1; + for (j = str_len - sub_len; j >= 0; --j) + if (STRINGLIB_CMP(str+j, sub, sub_len) == 0) { + pos = j + offset; + break; + } + } + + return pos; +} + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t start, Py_ssize_t end) +{ + if (start < 0) + start += str_len; + if (start < 0) + start = 0; + if (end > str_len) + end = str_len; + if (end < 0) + end += str_len; + if (end < 0) + end = 0; + + return stringlib_find( + str + start, end - start, + sub, sub_len, start + ); +} + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t start, Py_ssize_t end) +{ + if (start < 0) + start += str_len; + if (start < 0) + start = 0; + if (end > str_len) + end = str_len; + if (end < 0) + end += str_len; + if (end < 0) + end = 0; + + return stringlib_rfind(str + start, end - start, sub, sub_len, start); +} + +#ifdef STRINGLIB_STR + +Py_LOCAL_INLINE(int) +stringlib_contains_obj(PyObject* str, PyObject* sub) +{ + return stringlib_find( + STRINGLIB_STR(str), STRINGLIB_LEN(str), + STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0 + ) != -1; +} + +#endif /* STRINGLIB_STR */ + +#endif /* STRINGLIB_FIND_H */ + +/* +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h new file mode 100644 index 0000000..1486347 --- /dev/null +++ b/Objects/stringlib/partition.h @@ -0,0 +1,111 @@ +/* stringlib: partition implementation */ + +#ifndef STRINGLIB_PARTITION_H +#define STRINGLIB_PARTITION_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(PyObject*) +stringlib_partition( + PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, + PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len + ) +{ + PyObject* out; + Py_ssize_t pos; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + out = PyTuple_New(3); + if (!out) + return NULL; + + pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH); + + if (pos < 0) { + Py_INCREF(str_obj); + PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); + return out; + } + + PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); + Py_INCREF(sep_obj); + PyTuple_SET_ITEM(out, 1, sep_obj); + pos += sep_len; + PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } + + return out; +} + +Py_LOCAL_INLINE(PyObject*) +stringlib_rpartition( + PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, + PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len + ) +{ + PyObject* out; + Py_ssize_t pos, j; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + out = PyTuple_New(3); + if (!out) + return NULL; + + /* XXX - create reversefastsearch helper! */ + pos = -1; + for (j = str_len - sep_len; j >= 0; --j) + if (STRINGLIB_CMP(str+j, sep, sep_len) == 0) { + pos = j; + break; + } + + if (pos < 0) { + Py_INCREF(str_obj); + PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); + return out; + } + + PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); + Py_INCREF(sep_obj); + PyTuple_SET_ITEM(out, 1, sep_obj); + pos += sep_len; + PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } + + return out; +} + +#endif + +/* +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/Objects/stringobject.c b/Objects/stringobject.c index b34dcb2..110c38e 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1,6 +1,7 @@ /* String object implementation */ #define PY_SSIZE_T_CLEAN + #include "Python.h" #include <ctype.h> @@ -176,14 +177,11 @@ PyString_FromFormatV(const char *format, va_list vargs) while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f))) ; - /* skip the 'l' in %ld, since it doesn't change the - width. although only %d is supported (see - "expand" section below), others can be easily - added */ - if (*f == 'l' && *(f+1) == 'd') - ++f; - /* likewise for %zd */ - if (*f == 'z' && *(f+1) == 'd') + /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since + * they don't affect the amount of space we reserve. + */ + if ((*f == 'l' || *f == 'z') && + (f[1] == 'd' || f[1] == 'u')) ++f; switch (*f) { @@ -193,7 +191,7 @@ PyString_FromFormatV(const char *format, va_list vargs) case '%': n++; break; - case 'd': case 'i': case 'x': + case 'd': case 'u': case 'i': case 'x': (void) va_arg(count, int); /* 20 bytes is enough to hold a 64-bit integer. Decimal takes the most space. @@ -255,14 +253,14 @@ PyString_FromFormatV(const char *format, va_list vargs) } while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f))) f++; - /* handle the long flag, but only for %ld. others - can be added when necessary. */ - if (*f == 'l' && *(f+1) == 'd') { + /* handle the long flag, but only for %ld and %lu. + others can be added when necessary. */ + if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { longflag = 1; ++f; } /* handle the size_t flag. */ - if (*f == 'z' && *(f+1) == 'd') { + if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { size_tflag = 1; ++f; } @@ -275,10 +273,22 @@ PyString_FromFormatV(const char *format, va_list vargs) if (longflag) sprintf(s, "%ld", va_arg(vargs, long)); else if (size_tflag) + sprintf(s, "%" PY_FORMAT_SIZE_T "d", + va_arg(vargs, Py_ssize_t)); + else + sprintf(s, "%d", va_arg(vargs, int)); + s += strlen(s); + break; + case 'u': + if (longflag) + sprintf(s, "%lu", + va_arg(vargs, unsigned long)); + else if (size_tflag) sprintf(s, "%" PY_FORMAT_SIZE_T "u", va_arg(vargs, size_t)); else - sprintf(s, "%d", va_arg(vargs, int)); + sprintf(s, "%u", + va_arg(vargs, unsigned int)); s += strlen(s); break; case 'i': @@ -680,6 +690,9 @@ PyObject *PyString_DecodeEscape(const char *s, return NULL; } +/* -------------------------------------------------------------------- */ +/* object api */ + static Py_ssize_t string_getsize(register PyObject *op) { @@ -754,8 +767,25 @@ PyString_AsStringAndSize(register PyObject *obj, return 0; } +/* -------------------------------------------------------------------- */ /* Methods */ +#define STRINGLIB_CHAR char + +#define STRINGLIB_CMP memcmp +#define STRINGLIB_LEN PyString_GET_SIZE +#define STRINGLIB_NEW PyString_FromStringAndSize +#define STRINGLIB_STR PyString_AS_STRING + +#define STRINGLIB_EMPTY nullstring + +#include "stringlib/fastsearch.h" + +#include "stringlib/count.h" +#include "stringlib/find.h" +#include "stringlib/partition.h" + + static int string_print(PyStringObject *op, FILE *fp, int flags) { @@ -900,7 +930,7 @@ string_length(PyStringObject *a) static PyObject * string_concat(register PyStringObject *a, register PyObject *bb) { - register size_t size; + register Py_ssize_t size; register PyStringObject *op; if (!PyString_Check(bb)) { #ifdef Py_USING_UNICODE @@ -924,7 +954,12 @@ string_concat(register PyStringObject *a, register PyObject *bb) return (PyObject *)a; } size = a->ob_size + b->ob_size; - /* XXX check overflow */ + if (size < 0) { + PyErr_SetString(PyExc_OverflowError, + "strings are too large to concat"); + return NULL; + } + /* Inline PyObject_NewVar */ op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); if (op == NULL) @@ -1017,65 +1052,36 @@ string_slice(register PyStringObject *a, register Py_ssize_t i, } static int -string_contains(PyObject *a, PyObject *el) +string_contains(PyObject *str_obj, PyObject *sub_obj) { - char *s = PyString_AS_STRING(a); - const char *sub = PyString_AS_STRING(el); - char *last; - Py_ssize_t len_sub = PyString_GET_SIZE(el); - Py_ssize_t shortsub; - char firstchar, lastchar; - - if (!PyString_CheckExact(el)) { + if (!PyString_CheckExact(sub_obj)) { #ifdef Py_USING_UNICODE - if (PyUnicode_Check(el)) - return PyUnicode_Contains(a, el); + if (PyUnicode_Check(sub_obj)) + return PyUnicode_Contains(str_obj, sub_obj); #endif - if (!PyString_Check(el)) { + if (!PyString_Check(sub_obj)) { PyErr_SetString(PyExc_TypeError, "'in <string>' requires string as left operand"); return -1; } } - if (len_sub == 0) - return 1; - /* last points to one char beyond the start of the rightmost - substring. When s<last, there is still room for a possible match - and s[0] through s[len_sub-1] will be in bounds. - shortsub is len_sub minus the last character which is checked - separately just before the memcmp(). That check helps prevent - false starts and saves the setup time for memcmp(). - */ - firstchar = sub[0]; - shortsub = len_sub - 1; - lastchar = sub[shortsub]; - last = s + PyString_GET_SIZE(a) - len_sub + 1; - while (s < last) { - s = (char *)memchr(s, firstchar, last-s); - if (s == NULL) - return 0; - assert(s < last); - if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0) - return 1; - s++; - } - return 0; + return stringlib_contains_obj(str_obj, sub_obj); } static PyObject * string_item(PyStringObject *a, register Py_ssize_t i) { + char pchar; PyObject *v; - char *pchar; if (i < 0 || i >= a->ob_size) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; } - pchar = a->ob_sval + i; - v = (PyObject *)characters[*pchar & UCHAR_MAX]; + pchar = a->ob_sval[i]; + v = (PyObject *)characters[pchar & UCHAR_MAX]; if (v == NULL) - v = PyString_FromStringAndSize(pchar, 1); + v = PyString_FromStringAndSize(&pchar, 1); else { #ifdef COUNT_ALLOCS one_strings++; @@ -1151,9 +1157,8 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op) int _PyString_Eq(PyObject *o1, PyObject *o2) { - PyStringObject *a, *b; - a = (PyStringObject*)o1; - b = (PyStringObject*)o2; + PyStringObject *a = (PyStringObject*) o1; + PyStringObject *b = (PyStringObject*) o2; return a->ob_size == b->ob_size && *a->ob_sval == *b->ob_sval && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0; @@ -1308,6 +1313,27 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; #define STRIPNAME(i) (stripformat[i]+3) + +/* Don't call if length < 2 */ +#define Py_STRING_MATCH(target, offset, pattern, length) \ + (target[offset] == pattern[0] && \ + target[offset+length-1] == pattern[length-1] && \ + !memcmp(target+offset+1, pattern+1, length-2) ) + + +/* Overallocate the initial list to reduce the number of reallocs for small + split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three + resizes, to sizes 4, 8, then 16. Most observed string splits are for human + text (roughly 11 words per line) and field delimited data (usually 1-10 + fields). For large strings the split algorithms are bandwidth limited + so increasing the preallocation likely will not improve things.*/ + +#define MAX_PREALLOC 12 + +/* 5 splits gives 6 elements */ +#define PREALLOC_SIZE(maxsplit) \ + (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) + #define SPLIT_APPEND(data, left, right) \ str = PyString_FromStringAndSize((data) + (left), \ (right) - (left)); \ @@ -1320,74 +1346,90 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; else \ Py_DECREF(str); -#define SPLIT_INSERT(data, left, right) \ +#define SPLIT_ADD(data, left, right) { \ str = PyString_FromStringAndSize((data) + (left), \ (right) - (left)); \ if (str == NULL) \ goto onError; \ - if (PyList_Insert(list, 0, str)) { \ - Py_DECREF(str); \ - goto onError; \ + if (count < MAX_PREALLOC) { \ + PyList_SET_ITEM(list, count, str); \ + } else { \ + if (PyList_Append(list, str)) { \ + Py_DECREF(str); \ + goto onError; \ + } \ + else \ + Py_DECREF(str); \ } \ - else \ - Py_DECREF(str); + count++; } -static PyObject * +/* Always force the list to the expected size. */ +#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count; + +#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; } +#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; } +#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; } +#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; } + +Py_LOCAL_INLINE(PyObject *) split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) { - Py_ssize_t i, j; + Py_ssize_t i, j, count=0; PyObject *str; - PyObject *list = PyList_New(0); + PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); if (list == NULL) return NULL; - for (i = j = 0; i < len; ) { - while (i < len && isspace(Py_CHARMASK(s[i]))) - i++; - j = i; - while (i < len && !isspace(Py_CHARMASK(s[i]))) - i++; - if (j < i) { - if (maxsplit-- <= 0) - break; - SPLIT_APPEND(s, j, i); - while (i < len && isspace(Py_CHARMASK(s[i]))) - i++; - j = i; - } + i = j = 0; + + while (maxsplit-- > 0) { + SKIP_SPACE(s, i, len); + if (i==len) break; + j = i; i++; + SKIP_NONSPACE(s, i, len); + SPLIT_ADD(s, j, i); } - if (j < len) { - SPLIT_APPEND(s, j, len); + + if (i < len) { + /* Only occurs when maxsplit was reached */ + /* Skip any remaining whitespace and copy to end of string */ + SKIP_SPACE(s, i, len); + if (i != len) + SPLIT_ADD(s, i, len); } + FIX_PREALLOC_SIZE(list); return list; onError: Py_DECREF(list); return NULL; } -static PyObject * +Py_LOCAL_INLINE(PyObject *) split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) { - register Py_ssize_t i, j; + register Py_ssize_t i, j, count=0; PyObject *str; - PyObject *list = PyList_New(0); + PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); if (list == NULL) return NULL; - for (i = j = 0; i < len; ) { - if (s[i] == ch) { - if (maxcount-- <= 0) + i = j = 0; + while ((j < len) && (maxcount-- > 0)) { + for(; j<len; j++) { + /* I found that using memchr makes no difference */ + if (s[j] == ch) { + SPLIT_ADD(s, i, j); + i = j = j + 1; break; - SPLIT_APPEND(s, j, i); - i = j = i + 1; - } else - i++; + } + } } - if (j <= len) { - SPLIT_APPEND(s, j, len); + if (i <= len) { + SPLIT_ADD(s, i, len); } + FIX_PREALLOC_SIZE(list); return list; onError: @@ -1407,10 +1449,12 @@ static PyObject * string_split(PyStringObject *self, PyObject *args) { Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; - int err; - Py_ssize_t maxsplit = -1; + Py_ssize_t maxsplit = -1, count=0; const char *s = PyString_AS_STRING(self), *sub; - PyObject *list, *item, *subobj = Py_None; + PyObject *list, *str, *subobj = Py_None; +#ifdef USE_FAST + Py_ssize_t pos; +#endif if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) return NULL; @@ -1436,98 +1480,166 @@ string_split(PyStringObject *self, PyObject *args) else if (n == 1) return split_char(s, len, sub[0], maxsplit); - list = PyList_New(0); + list = PyList_New(PREALLOC_SIZE(maxsplit)); if (list == NULL) return NULL; +#ifdef USE_FAST i = j = 0; - while (i+n <= len) { - if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) { - if (maxsplit-- <= 0) + while (maxsplit-- > 0) { + pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH); + if (pos < 0) + break; + j = i+pos; + SPLIT_ADD(s, i, j); + i = j + n; + + } +#else + i = j = 0; + while ((j+n <= len) && (maxsplit-- > 0)) { + for (; j+n <= len; j++) { + if (Py_STRING_MATCH(s, j, sub, n)) { + SPLIT_ADD(s, i, j); + i = j = j + n; break; - item = PyString_FromStringAndSize(s+j, i-j); - if (item == NULL) - goto fail; - err = PyList_Append(list, item); - Py_DECREF(item); - if (err < 0) - goto fail; - i = j = i + n; + } } - else - i++; } - item = PyString_FromStringAndSize(s+j, len-j); - if (item == NULL) - goto fail; - err = PyList_Append(list, item); - Py_DECREF(item); - if (err < 0) - goto fail; - +#endif + SPLIT_ADD(s, i, len); + FIX_PREALLOC_SIZE(list); return list; - fail: + onError: Py_DECREF(list); return NULL; } +PyDoc_STRVAR(partition__doc__, +"S.partition(sep) -> (head, sep, tail)\n\ +\n\ +Searches for the separator sep in S, and returns the part before it,\n\ +the separator itself, and the part after it. If the separator is not\n\ +found, returns S and two empty strings."); + +static PyObject * +string_partition(PyStringObject *self, PyObject *sep_obj) +{ + const char *sep; + Py_ssize_t sep_len; + + if (PyString_Check(sep_obj)) { + sep = PyString_AS_STRING(sep_obj); + sep_len = PyString_GET_SIZE(sep_obj); + } +#ifdef Py_USING_UNICODE + else if (PyUnicode_Check(sep_obj)) + return PyUnicode_Partition((PyObject *) self, sep_obj); +#endif + else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) + return NULL; + + return stringlib_partition( + (PyObject*) self, + PyString_AS_STRING(self), PyString_GET_SIZE(self), + sep_obj, sep, sep_len + ); +} + +PyDoc_STRVAR(rpartition__doc__, +"S.rpartition(sep) -> (head, sep, tail)\n\ +\n\ +Searches for the separator sep in S, starting at the end of S, and returns\n\ +the part before it, the separator itself, and the part after it. If the\n\ +separator is not found, returns S and two empty strings."); + static PyObject * +string_rpartition(PyStringObject *self, PyObject *sep_obj) +{ + const char *sep; + Py_ssize_t sep_len; + + if (PyString_Check(sep_obj)) { + sep = PyString_AS_STRING(sep_obj); + sep_len = PyString_GET_SIZE(sep_obj); + } +#ifdef Py_USING_UNICODE + else if (PyUnicode_Check(sep_obj)) + return PyUnicode_Partition((PyObject *) self, sep_obj); +#endif + else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) + return NULL; + + return stringlib_rpartition( + (PyObject*) self, + PyString_AS_STRING(self), PyString_GET_SIZE(self), + sep_obj, sep, sep_len + ); +} + +Py_LOCAL_INLINE(PyObject *) rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) { - Py_ssize_t i, j; + Py_ssize_t i, j, count=0; PyObject *str; - PyObject *list = PyList_New(0); + PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); if (list == NULL) return NULL; - for (i = j = len - 1; i >= 0; ) { - while (i >= 0 && isspace(Py_CHARMASK(s[i]))) - i--; - j = i; - while (i >= 0 && !isspace(Py_CHARMASK(s[i]))) - i--; - if (j > i) { - if (maxsplit-- <= 0) - break; - SPLIT_INSERT(s, i + 1, j + 1); - while (i >= 0 && isspace(Py_CHARMASK(s[i]))) - i--; - j = i; - } - } - if (j >= 0) { - SPLIT_INSERT(s, 0, j + 1); - } + i = j = len-1; + + while (maxsplit-- > 0) { + RSKIP_SPACE(s, i); + if (i<0) break; + j = i; i--; + RSKIP_NONSPACE(s, i); + SPLIT_ADD(s, i + 1, j + 1); + } + if (i >= 0) { + /* Only occurs when maxsplit was reached */ + /* Skip any remaining whitespace and copy to beginning of string */ + RSKIP_SPACE(s, i); + if (i >= 0) + SPLIT_ADD(s, 0, i + 1); + + } + FIX_PREALLOC_SIZE(list); + if (PyList_Reverse(list) < 0) + goto onError; return list; onError: Py_DECREF(list); return NULL; } -static PyObject * +Py_LOCAL_INLINE(PyObject *) rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) { - register Py_ssize_t i, j; + register Py_ssize_t i, j, count=0; PyObject *str; - PyObject *list = PyList_New(0); + PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); if (list == NULL) return NULL; - for (i = j = len - 1; i >= 0; ) { - if (s[i] == ch) { - if (maxcount-- <= 0) + i = j = len - 1; + while ((i >= 0) && (maxcount-- > 0)) { + for (; i >= 0; i--) { + if (s[i] == ch) { + SPLIT_ADD(s, i + 1, j + 1); + j = i = i - 1; break; - SPLIT_INSERT(s, i + 1, j + 1); - j = i = i - 1; - } else - i--; + } + } } if (j >= -1) { - SPLIT_INSERT(s, 0, j + 1); + SPLIT_ADD(s, 0, j + 1); } + FIX_PREALLOC_SIZE(list); + if (PyList_Reverse(list) < 0) + goto onError; return list; onError: @@ -1548,10 +1660,9 @@ static PyObject * string_rsplit(PyStringObject *self, PyObject *args) { Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; - int err; - Py_ssize_t maxsplit = -1; + Py_ssize_t maxsplit = -1, count=0; const char *s = PyString_AS_STRING(self), *sub; - PyObject *list, *item, *subobj = Py_None; + PyObject *list, *str, *subobj = Py_None; if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) return NULL; @@ -1577,40 +1688,30 @@ string_rsplit(PyStringObject *self, PyObject *args) else if (n == 1) return rsplit_char(s, len, sub[0], maxsplit); - list = PyList_New(0); + list = PyList_New(PREALLOC_SIZE(maxsplit)); if (list == NULL) return NULL; j = len; i = j - n; - while (i >= 0) { - if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) { - if (maxsplit-- <= 0) + + while ( (i >= 0) && (maxsplit-- > 0) ) { + for (; i>=0; i--) { + if (Py_STRING_MATCH(s, i, sub, n)) { + SPLIT_ADD(s, i + n, j); + j = i; + i -= n; break; - item = PyString_FromStringAndSize(s+i+n, j-i-n); - if (item == NULL) - goto fail; - err = PyList_Insert(list, 0, item); - Py_DECREF(item); - if (err < 0) - goto fail; - j = i; - i -= n; + } } - else - i--; } - item = PyString_FromStringAndSize(s, j); - if (item == NULL) - goto fail; - err = PyList_Insert(list, 0, item); - Py_DECREF(item); - if (err < 0) - goto fail; - + SPLIT_ADD(s, 0, j); + FIX_PREALLOC_SIZE(list); + if (PyList_Reverse(list) < 0) + goto onError; return list; - fail: +onError: Py_DECREF(list); return NULL; } @@ -1727,7 +1828,7 @@ _PyString_Join(PyObject *sep, PyObject *x) return string_join((PyStringObject *)sep, x); } -static void +Py_LOCAL_INLINE(void) string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) { if (*end > len) @@ -1742,50 +1843,38 @@ string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) *start = 0; } -static Py_ssize_t +Py_LOCAL_INLINE(Py_ssize_t) string_find_internal(PyStringObject *self, PyObject *args, int dir) { - const char *s = PyString_AS_STRING(self), *sub; - Py_ssize_t len = PyString_GET_SIZE(self); - Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX; PyObject *subobj; + const char *sub; + Py_ssize_t sub_len; + Py_ssize_t start=0, end=PY_SSIZE_T_MAX; /* XXX ssize_t i */ - if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", - &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last)) + if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj, + _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return -2; if (PyString_Check(subobj)) { sub = PyString_AS_STRING(subobj); - n = PyString_GET_SIZE(subobj); + sub_len = PyString_GET_SIZE(subobj); } #ifdef Py_USING_UNICODE else if (PyUnicode_Check(subobj)) - return PyUnicode_Find((PyObject *)self, subobj, i, last, dir); + return PyUnicode_Find( + (PyObject *)self, subobj, start, end, dir); #endif - else if (PyObject_AsCharBuffer(subobj, &sub, &n)) + else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) return -2; - string_adjust_indices(&i, &last, len); - - if (dir > 0) { - if (n == 0 && i <= last) - return (long)i; - last -= n; - for (; i <= last; ++i) - if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0) - return (long)i; - } - else { - Py_ssize_t j; - - if (n == 0 && i <= last) - return last; - for (j = last-n; j >= i; --j) - if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0) - return j; - } - - return -1; + if (dir > 0) + return stringlib_find_slice( + PyString_AS_STRING(self), PyString_GET_SIZE(self), + sub, sub_len, start, end); + else + return stringlib_rfind_slice( + PyString_AS_STRING(self), PyString_GET_SIZE(self), + sub, sub_len, start, end); } @@ -1867,7 +1956,7 @@ string_rindex(PyStringObject *self, PyObject *args) } -static PyObject * +Py_LOCAL_INLINE(PyObject *) do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) { char *s = PyString_AS_STRING(self); @@ -1900,7 +1989,7 @@ do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) } -static PyObject * +Py_LOCAL_INLINE(PyObject *) do_strip(PyStringObject *self, int striptype) { char *s = PyString_AS_STRING(self); @@ -1930,7 +2019,7 @@ do_strip(PyStringObject *self, int striptype) } -static PyObject * +Py_LOCAL_INLINE(PyObject *) do_argstrip(PyStringObject *self, int striptype, PyObject *args) { PyObject *sep = NULL; @@ -2024,57 +2113,68 @@ PyDoc_STRVAR(lower__doc__, \n\ Return a copy of the string S converted to lowercase."); +/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */ +#ifndef _tolower +#define _tolower tolower +#endif + static PyObject * string_lower(PyStringObject *self) { - char *s = PyString_AS_STRING(self), *s_new; + char *s; Py_ssize_t i, n = PyString_GET_SIZE(self); PyObject *newobj; newobj = PyString_FromStringAndSize(NULL, n); - if (newobj == NULL) + if (!newobj) return NULL; - s_new = PyString_AsString(newobj); + + s = PyString_AS_STRING(newobj); + + memcpy(s, PyString_AS_STRING(self), n); + for (i = 0; i < n; i++) { - int c = Py_CHARMASK(*s++); - if (isupper(c)) { - *s_new = tolower(c); - } else - *s_new = c; - s_new++; + int c = Py_CHARMASK(s[i]); + if (isupper(c)) + s[i] = _tolower(c); } + return newobj; } - PyDoc_STRVAR(upper__doc__, "S.upper() -> string\n\ \n\ Return a copy of the string S converted to uppercase."); +#ifndef _toupper +#define _toupper toupper +#endif + static PyObject * string_upper(PyStringObject *self) { - char *s = PyString_AS_STRING(self), *s_new; + char *s; Py_ssize_t i, n = PyString_GET_SIZE(self); PyObject *newobj; newobj = PyString_FromStringAndSize(NULL, n); - if (newobj == NULL) + if (!newobj) return NULL; - s_new = PyString_AsString(newobj); + + s = PyString_AS_STRING(newobj); + + memcpy(s, PyString_AS_STRING(self), n); + for (i = 0; i < n; i++) { - int c = Py_CHARMASK(*s++); - if (islower(c)) { - *s_new = toupper(c); - } else - *s_new = c; - s_new++; + int c = Py_CHARMASK(s[i]); + if (islower(c)) + s[i] = _toupper(c); } + return newobj; } - PyDoc_STRVAR(title__doc__, "S.title() -> string\n\ \n\ @@ -2150,62 +2250,44 @@ string_capitalize(PyStringObject *self) PyDoc_STRVAR(count__doc__, "S.count(sub[, start[, end]]) -> int\n\ \n\ -Return the number of occurrences of substring sub in string\n\ -S[start:end]. Optional arguments start and end are\n\ -interpreted as in slice notation."); +Return the number of non-overlapping occurrences of substring sub in\n\ +string S[start:end]. Optional arguments start and end are interpreted\n\ +as in slice notation."); static PyObject * string_count(PyStringObject *self, PyObject *args) { - const char *s = PyString_AS_STRING(self), *sub, *t; - Py_ssize_t len = PyString_GET_SIZE(self), n; - Py_ssize_t i = 0, last = PY_SSIZE_T_MAX; - Py_ssize_t m, r; - PyObject *subobj; + PyObject *sub_obj; + const char *str = PyString_AS_STRING(self), *sub; + Py_ssize_t sub_len; + Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; - if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj, - _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last)) + if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj, + _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - if (PyString_Check(subobj)) { - sub = PyString_AS_STRING(subobj); - n = PyString_GET_SIZE(subobj); + if (PyString_Check(sub_obj)) { + sub = PyString_AS_STRING(sub_obj); + sub_len = PyString_GET_SIZE(sub_obj); } #ifdef Py_USING_UNICODE - else if (PyUnicode_Check(subobj)) { + else if (PyUnicode_Check(sub_obj)) { Py_ssize_t count; - count = PyUnicode_Count((PyObject *)self, subobj, i, last); + count = PyUnicode_Count((PyObject *)self, sub_obj, start, end); if (count == -1) return NULL; else - return PyInt_FromLong((long) count); + return PyInt_FromSsize_t(count); } #endif - else if (PyObject_AsCharBuffer(subobj, &sub, &n)) + else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) return NULL; - string_adjust_indices(&i, &last, len); - - m = last + 1 - n; - if (n == 0) - return PyInt_FromSsize_t(m-i); + string_adjust_indices(&start, &end, PyString_GET_SIZE(self)); - r = 0; - while (i < m) { - if (!memcmp(s+i, sub, n)) { - r++; - i += n; - } else { - i++; - } - if (i >= m) - break; - t = (const char *)memchr(s+i, sub[0], m-i); - if (t == NULL) - break; - i = t - s; - } - return PyInt_FromSsize_t(r); + return PyInt_FromSsize_t( + stringlib_count(str + start, end - start, sub, sub_len) + ); } PyDoc_STRVAR(swapcase__doc__, @@ -2359,156 +2441,616 @@ string_translate(PyStringObject *self, PyObject *args) } -/* What follows is used for implementing replace(). Perry Stoll. */ +#define FORWARD 1 +#define REVERSE -1 -/* - mymemfind +/* find and count characters and substrings */ - strstr replacement for arbitrary blocks of memory. +#define findchar(target, target_len, c) \ + ((char *)memchr((const void *)(target), c, target_len)) - Locates the first occurrence in the memory pointed to by MEM of the - contents of memory pointed to by PAT. Returns the index into MEM if - found, or -1 if not found. If len of PAT is greater than length of - MEM, the function returns -1. -*/ -static Py_ssize_t -mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len) +/* String ops must return a string. */ +/* If the object is subclass of string, create a copy */ +Py_LOCAL(PyStringObject *) +return_self(PyStringObject *self) { - register Py_ssize_t ii; + if (PyString_CheckExact(self)) { + Py_INCREF(self); + return self; + } + return (PyStringObject *)PyString_FromStringAndSize( + PyString_AS_STRING(self), + PyString_GET_SIZE(self)); +} - /* pattern can not occur in the last pat_len-1 chars */ - len -= pat_len; +Py_LOCAL_INLINE(Py_ssize_t) +countchar(char *target, int target_len, char c, Py_ssize_t maxcount) +{ + Py_ssize_t count=0; + char *start=target; + char *end=target+target_len; - for (ii = 0; ii <= len; ii++) { - if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) { - return ii; - } + while ( (start=findchar(start, end-start, c)) != NULL ) { + count++; + if (count >= maxcount) + break; + start += 1; + } + return count; +} + +Py_LOCAL(Py_ssize_t) +findstring(char *target, Py_ssize_t target_len, + char *pattern, Py_ssize_t pattern_len, + Py_ssize_t start, + Py_ssize_t end, + int direction) +{ + if (start < 0) { + start += target_len; + if (start < 0) + start = 0; + } + if (end > target_len) { + end = target_len; + } else if (end < 0) { + end += target_len; + if (end < 0) + end = 0; + } + + /* zero-length substrings always match at the first attempt */ + if (pattern_len == 0) + return (direction > 0) ? start : end; + + end -= pattern_len; + + if (direction < 0) { + for (; end >= start; end--) + if (Py_STRING_MATCH(target, end, pattern, pattern_len)) + return end; + } else { + for (; start <= end; start++) + if (Py_STRING_MATCH(target, start, pattern, pattern_len)) + return start; } return -1; } -/* - mymemcnt +Py_LOCAL_INLINE(Py_ssize_t) +countstring(char *target, Py_ssize_t target_len, + char *pattern, Py_ssize_t pattern_len, + Py_ssize_t start, + Py_ssize_t end, + int direction, Py_ssize_t maxcount) +{ + Py_ssize_t count=0; + + if (start < 0) { + start += target_len; + if (start < 0) + start = 0; + } + if (end > target_len) { + end = target_len; + } else if (end < 0) { + end += target_len; + if (end < 0) + end = 0; + } + + /* zero-length substrings match everywhere */ + if (pattern_len == 0 || maxcount == 0) { + if (target_len+1 < maxcount) + return target_len+1; + return maxcount; + } + + end -= pattern_len; + if (direction < 0) { + for (; (end >= start); end--) + if (Py_STRING_MATCH(target, end, pattern, pattern_len)) { + count++; + if (--maxcount <= 0) break; + end -= pattern_len-1; + } + } else { + for (; (start <= end); start++) + if (Py_STRING_MATCH(target, start, pattern, pattern_len)) { + count++; + if (--maxcount <= 0) + break; + start += pattern_len-1; + } + } + return count; +} - Return the number of distinct times PAT is found in MEM. - meaning mem=1111 and pat==11 returns 2. - mem=11111 and pat==11 also return 2. - */ -static Py_ssize_t -mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len) + +/* Algorithms for different cases of string replacement */ + +/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ +Py_LOCAL(PyStringObject *) +replace_interleave(PyStringObject *self, + PyStringObject *to, + Py_ssize_t maxcount) { - register Py_ssize_t offset = 0; - Py_ssize_t nfound = 0; + char *self_s, *to_s, *result_s; + Py_ssize_t self_len, to_len, result_len; + Py_ssize_t count, i, product; + PyStringObject *result; + + self_len = PyString_GET_SIZE(self); + to_len = PyString_GET_SIZE(to); + + /* 1 at the end plus 1 after every character */ + count = self_len+1; + if (maxcount < count) + count = maxcount; + + /* Check for overflow */ + /* result_len = count * to_len + self_len; */ + product = count * to_len; + if (product / to_len != count) { + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); + return NULL; + } + result_len = product + self_len; + if (result_len < 0) { + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); + return NULL; + } + + if (! (result = (PyStringObject *) + PyString_FromStringAndSize(NULL, result_len)) ) + return NULL; - while (len >= 0) { - offset = mymemfind(mem, len, pat, pat_len); - if (offset == -1) - break; - mem += offset + pat_len; - len -= offset + pat_len; - nfound++; + self_s = PyString_AS_STRING(self); + to_s = PyString_AS_STRING(to); + to_len = PyString_GET_SIZE(to); + result_s = PyString_AS_STRING(result); + + /* TODO: special case single character, which doesn't need memcpy */ + + /* Lay the first one down (guaranteed this will occur) */ + memcpy(result_s, to_s, to_len); + result_s += to_len; + count -= 1; + + for (i=0; i<count; i++) { + *result_s++ = *self_s++; + memcpy(result_s, to_s, to_len); + result_s += to_len; } - return nfound; + + /* Copy the rest of the original string */ + memcpy(result_s, self_s, self_len-i); + + return result; } -/* - mymemreplace +/* Special case for deleting a single character */ +/* len(self)>=1, len(from)==1, to="", maxcount>=1 */ +Py_LOCAL(PyStringObject *) +replace_delete_single_character(PyStringObject *self, + char from_c, Py_ssize_t maxcount) +{ + char *self_s, *result_s; + char *start, *next, *end; + Py_ssize_t self_len, result_len; + Py_ssize_t count; + PyStringObject *result; - Return a string in which all occurrences of PAT in memory STR are - replaced with SUB. + self_len = PyString_GET_SIZE(self); + self_s = PyString_AS_STRING(self); - If length of PAT is less than length of STR or there are no occurrences - of PAT in STR, then the original string is returned. Otherwise, a new - string is allocated here and returned. + count = countchar(self_s, self_len, from_c, maxcount); + if (count == 0) { + return return_self(self); + } + + result_len = self_len - count; /* from_len == 1 */ + assert(result_len>=0); - on return, out_len is: - the length of output string, or - -1 if the input string is returned, or - unchanged if an error occurs (no memory). + if ( (result = (PyStringObject *) + PyString_FromStringAndSize(NULL, result_len)) == NULL) + return NULL; + result_s = PyString_AS_STRING(result); - return value is: - the new string allocated locally, or - NULL if an error occurred. -*/ -static char * -mymemreplace(const char *str, Py_ssize_t len, /* input string */ - const char *pat, Py_ssize_t pat_len, /* pattern string to find */ - const char *sub, Py_ssize_t sub_len, /* substitution string */ - Py_ssize_t count, /* number of replacements */ - Py_ssize_t *out_len) + start = self_s; + end = self_s + self_len; + while (count-- > 0) { + next = findchar(start, end-start, from_c); + if (next == NULL) + break; + memcpy(result_s, start, next-start); + result_s += (next-start); + start = next+1; + } + memcpy(result_s, start, end-start); + + return result; +} + +/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ + +Py_LOCAL(PyStringObject *) +replace_delete_substring(PyStringObject *self, PyStringObject *from, + Py_ssize_t maxcount) { + char *self_s, *from_s, *result_s; + char *start, *next, *end; + Py_ssize_t self_len, from_len, result_len; + Py_ssize_t count, offset; + PyStringObject *result; + + self_len = PyString_GET_SIZE(self); + self_s = PyString_AS_STRING(self); + from_len = PyString_GET_SIZE(from); + from_s = PyString_AS_STRING(from); + + count = countstring(self_s, self_len, + from_s, from_len, + 0, self_len, 1, + maxcount); + + if (count == 0) { + /* no matches */ + return return_self(self); + } + + result_len = self_len - (count * from_len); + assert (result_len>=0); + + if ( (result = (PyStringObject *) + PyString_FromStringAndSize(NULL, result_len)) == NULL ) + return NULL; + + result_s = PyString_AS_STRING(result); + + start = self_s; + end = self_s + self_len; + while (count-- > 0) { + offset = findstring(start, end-start, + from_s, from_len, + 0, end-start, FORWARD); + if (offset == -1) + break; + next = start + offset; + + memcpy(result_s, start, next-start); + + result_s += (next-start); + start = next+from_len; + } + memcpy(result_s, start, end-start); + return result; +} + +/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ +Py_LOCAL(PyStringObject *) +replace_single_character_in_place(PyStringObject *self, + char from_c, char to_c, + Py_ssize_t maxcount) { - char *out_s; - char *new_s; - Py_ssize_t nfound, offset, new_len; - - if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len) - goto return_same; - - /* find length of output string */ - nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1; - if (count < 0) - count = PY_SSIZE_T_MAX; - else if (nfound > count) - nfound = count; - if (nfound == 0) - goto return_same; - - new_len = len + nfound*(sub_len - pat_len); - if (new_len == 0) { - /* Have to allocate something for the caller to free(). */ - out_s = (char *)PyMem_MALLOC(1); - if (out_s == NULL) - return NULL; - out_s[0] = '\0'; + char *self_s, *result_s, *start, *end, *next; + Py_ssize_t self_len; + PyStringObject *result; + + /* The result string will be the same size */ + self_s = PyString_AS_STRING(self); + self_len = PyString_GET_SIZE(self); + + next = findchar(self_s, self_len, from_c); + + if (next == NULL) { + /* No matches; return the original string */ + return return_self(self); + } + + /* Need to make a new string */ + result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); + if (result == NULL) + return NULL; + result_s = PyString_AS_STRING(result); + memcpy(result_s, self_s, self_len); + + /* change everything in-place, starting with this one */ + start = result_s + (next-self_s); + *start = to_c; + start++; + end = result_s + self_len; + + while (--maxcount > 0) { + next = findchar(start, end-start, from_c); + if (next == NULL) + break; + *next = to_c; + start = next+1; } - else { - assert(new_len > 0); - new_s = (char *)PyMem_MALLOC(new_len); - if (new_s == NULL) - return NULL; - out_s = new_s; + + return result; +} - if (pat_len > 0) { - for (; nfound > 0; --nfound) { - /* find index of next instance of pattern */ - offset = mymemfind(str, len, pat, pat_len); - if (offset == -1) - break; +/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ +Py_LOCAL(PyStringObject *) +replace_substring_in_place(PyStringObject *self, + PyStringObject *from, + PyStringObject *to, + Py_ssize_t maxcount) +{ + char *result_s, *start, *end; + char *self_s, *from_s, *to_s; + Py_ssize_t self_len, from_len, offset; + PyStringObject *result; + + /* The result string will be the same size */ + + self_s = PyString_AS_STRING(self); + self_len = PyString_GET_SIZE(self); + + from_s = PyString_AS_STRING(from); + from_len = PyString_GET_SIZE(from); + to_s = PyString_AS_STRING(to); + + offset = findstring(self_s, self_len, + from_s, from_len, + 0, self_len, FORWARD); + + if (offset == -1) { + /* No matches; return the original string */ + return return_self(self); + } + + /* Need to make a new string */ + result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); + if (result == NULL) + return NULL; + result_s = PyString_AS_STRING(result); + memcpy(result_s, self_s, self_len); + + + /* change everything in-place, starting with this one */ + start = result_s + offset; + memcpy(start, to_s, from_len); + start += from_len; + end = result_s + self_len; + + while ( --maxcount > 0) { + offset = findstring(start, end-start, + from_s, from_len, + 0, end-start, FORWARD); + if (offset==-1) + break; + memcpy(start+offset, to_s, from_len); + start += offset+from_len; + } + + return result; +} - /* copy non matching part of input string */ - memcpy(new_s, str, offset); - str += offset + pat_len; - len -= offset + pat_len; +/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ +Py_LOCAL(PyStringObject *) +replace_single_character(PyStringObject *self, + char from_c, + PyStringObject *to, + Py_ssize_t maxcount) +{ + char *self_s, *to_s, *result_s; + char *start, *next, *end; + Py_ssize_t self_len, to_len, result_len; + Py_ssize_t count, product; + PyStringObject *result; + + self_s = PyString_AS_STRING(self); + self_len = PyString_GET_SIZE(self); + + count = countchar(self_s, self_len, from_c, maxcount); + + if (count == 0) { + /* no matches, return unchanged */ + return return_self(self); + } + + to_s = PyString_AS_STRING(to); + to_len = PyString_GET_SIZE(to); + + /* use the difference between current and new, hence the "-1" */ + /* result_len = self_len + count * (to_len-1) */ + product = count * (to_len-1); + if (product / (to_len-1) != count) { + PyErr_SetString(PyExc_OverflowError, "replace string is too long"); + return NULL; + } + result_len = self_len + product; + if (result_len < 0) { + PyErr_SetString(PyExc_OverflowError, "replace string is too long"); + return NULL; + } + + if ( (result = (PyStringObject *) + PyString_FromStringAndSize(NULL, result_len)) == NULL) + return NULL; + result_s = PyString_AS_STRING(result); + + start = self_s; + end = self_s + self_len; + while (count-- > 0) { + next = findchar(start, end-start, from_c); + if (next == NULL) + break; + + if (next == start) { + /* replace with the 'to' */ + memcpy(result_s, to_s, to_len); + result_s += to_len; + start += 1; + } else { + /* copy the unchanged old then the 'to' */ + memcpy(result_s, start, next-start); + result_s += (next-start); + memcpy(result_s, to_s, to_len); + result_s += to_len; + start = next+1; + } + } + /* Copy the remainder of the remaining string */ + memcpy(result_s, start, end-start); + + return result; +} - /* copy substitute into the output string */ - new_s += offset; - memcpy(new_s, sub, sub_len); - new_s += sub_len; - } - /* copy any remaining values into output string */ - if (len > 0) - memcpy(new_s, str, len); +/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ +Py_LOCAL(PyStringObject *) +replace_substring(PyStringObject *self, + PyStringObject *from, + PyStringObject *to, + Py_ssize_t maxcount) { + char *self_s, *from_s, *to_s, *result_s; + char *start, *next, *end; + Py_ssize_t self_len, from_len, to_len, result_len; + Py_ssize_t count, offset, product; + PyStringObject *result; + + self_s = PyString_AS_STRING(self); + self_len = PyString_GET_SIZE(self); + from_s = PyString_AS_STRING(from); + from_len = PyString_GET_SIZE(from); + + count = countstring(self_s, self_len, + from_s, from_len, + 0, self_len, FORWARD, maxcount); + if (count == 0) { + /* no matches, return unchanged */ + return return_self(self); + } + + to_s = PyString_AS_STRING(to); + to_len = PyString_GET_SIZE(to); + + /* Check for overflow */ + /* result_len = self_len + count * (to_len-from_len) */ + product = count * (to_len-from_len); + if (product / (to_len-from_len) != count) { + PyErr_SetString(PyExc_OverflowError, "replace string is too long"); + return NULL; + } + result_len = self_len + product; + if (result_len < 0) { + PyErr_SetString(PyExc_OverflowError, "replace string is too long"); + return NULL; + } + + if ( (result = (PyStringObject *) + PyString_FromStringAndSize(NULL, result_len)) == NULL) + return NULL; + result_s = PyString_AS_STRING(result); + + start = self_s; + end = self_s + self_len; + while (count-- > 0) { + offset = findstring(start, end-start, + from_s, from_len, + 0, end-start, FORWARD); + if (offset == -1) + break; + next = start+offset; + if (next == start) { + /* replace with the 'to' */ + memcpy(result_s, to_s, to_len); + result_s += to_len; + start += from_len; + } else { + /* copy the unchanged old then the 'to' */ + memcpy(result_s, start, next-start); + result_s += (next-start); + memcpy(result_s, to_s, to_len); + result_s += to_len; + start = next+from_len; } - else { - for (;;++str, --len) { - memcpy(new_s, sub, sub_len); - new_s += sub_len; - if (--nfound <= 0) { - memcpy(new_s, str, len); - break; - } - *new_s++ = *str; - } + } + /* Copy the remainder of the remaining string */ + memcpy(result_s, start, end-start); + + return result; +} + + +Py_LOCAL(PyStringObject *) +replace(PyStringObject *self, + PyStringObject *from, + PyStringObject *to, + Py_ssize_t maxcount) +{ + Py_ssize_t from_len, to_len; + + if (maxcount < 0) { + maxcount = PY_SSIZE_T_MAX; + } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) { + /* nothing to do; return the original string */ + return return_self(self); + } + + from_len = PyString_GET_SIZE(from); + to_len = PyString_GET_SIZE(to); + + if (maxcount == 0 || + (from_len == 0 && to_len == 0)) { + /* nothing to do; return the original string */ + return return_self(self); + } + + /* Handle zero-length special cases */ + + if (from_len == 0) { + /* insert the 'to' string everywhere. */ + /* >>> "Python".replace("", ".") */ + /* '.P.y.t.h.o.n.' */ + return replace_interleave(self, to, maxcount); + } + + /* Except for "".replace("", "A") == "A" there is no way beyond this */ + /* point for an empty self string to generate a non-empty string */ + /* Special case so the remaining code always gets a non-empty string */ + if (PyString_GET_SIZE(self) == 0) { + return return_self(self); + } + + if (to_len == 0) { + /* delete all occurances of 'from' string */ + if (from_len == 1) { + return replace_delete_single_character( + self, PyString_AS_STRING(from)[0], maxcount); + } else { + return replace_delete_substring(self, from, maxcount); } } - *out_len = new_len; - return out_s; - return_same: - *out_len = -1; - return (char *)str; /* cast away const */ -} + /* Handle special case where both strings have the same length */ + + if (from_len == to_len) { + if (from_len == 1) { + return replace_single_character_in_place( + self, + PyString_AS_STRING(from)[0], + PyString_AS_STRING(to)[0], + maxcount); + } else { + return replace_substring_in_place( + self, from, to, maxcount); + } + } + /* Otherwise use the more generic algorithms */ + if (from_len == 1) { + return replace_single_character(self, PyString_AS_STRING(from)[0], + to, maxcount); + } else { + /* len('from')>=2, len('to')>=1 */ + return replace_substring(self, from, to, maxcount); + } +} PyDoc_STRVAR(replace__doc__, "S.replace (old, new[, count]) -> string\n\ @@ -2520,66 +3062,42 @@ given, only the first count occurrences are replaced."); static PyObject * string_replace(PyStringObject *self, PyObject *args) { - const char *str = PyString_AS_STRING(self), *sub, *repl; - char *new_s; - const Py_ssize_t len = PyString_GET_SIZE(self); - Py_ssize_t sub_len, repl_len, out_len; Py_ssize_t count = -1; - PyObject *newobj; - PyObject *subobj, *replobj; + PyObject *from, *to; + const char *tmp_s; + Py_ssize_t tmp_len; - if (!PyArg_ParseTuple(args, "OO|n:replace", - &subobj, &replobj, &count)) + if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) return NULL; - if (PyString_Check(subobj)) { - sub = PyString_AS_STRING(subobj); - sub_len = PyString_GET_SIZE(subobj); + if (PyString_Check(from)) { + /* Can this be made a '!check' after the Unicode check? */ } #ifdef Py_USING_UNICODE - else if (PyUnicode_Check(subobj)) + if (PyUnicode_Check(from)) return PyUnicode_Replace((PyObject *)self, - subobj, replobj, count); + from, to, count); #endif - else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) + else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len)) return NULL; - if (PyString_Check(replobj)) { - repl = PyString_AS_STRING(replobj); - repl_len = PyString_GET_SIZE(replobj); + if (PyString_Check(to)) { + /* Can this be made a '!check' after the Unicode check? */ } #ifdef Py_USING_UNICODE - else if (PyUnicode_Check(replobj)) + else if (PyUnicode_Check(to)) return PyUnicode_Replace((PyObject *)self, - subobj, replobj, count); + from, to, count); #endif - else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len)) + else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len)) return NULL; - new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len); - if (new_s == NULL) { - PyErr_NoMemory(); - return NULL; - } - if (out_len == -1) { - if (PyString_CheckExact(self)) { - /* we're returning another reference to self */ - newobj = (PyObject*)self; - Py_INCREF(newobj); - } - else { - newobj = PyString_FromStringAndSize(str, len); - if (newobj == NULL) - return NULL; - } - } - else { - newobj = PyString_FromStringAndSize(new_s, out_len); - PyMem_FREE(new_s); - } - return newobj; + return (PyObject *)replace((PyStringObject *) self, + (PyStringObject *) from, + (PyStringObject *) to, count); } +/** End DALKE **/ PyDoc_STRVAR(startswith__doc__, "S.startswith(prefix[, start[, end]]) -> bool\n\ @@ -2820,7 +3338,7 @@ string_expandtabs(PyStringObject *self, PyObject *args) return u; } -static PyObject * +Py_LOCAL_INLINE(PyObject *) pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill) { PyObject *u; @@ -3237,6 +3755,14 @@ string_splitlines(PyStringObject *self, PyObject *args) data = PyString_AS_STRING(self); len = PyString_GET_SIZE(self); + /* This does not use the preallocated list because splitlines is + usually run with hundreds of newlines. The overhead of + switching between PyList_SET_ITEM and append causes about a + 2-3% slowdown for that common case. A smarter implementation + could move the if check out, so the SET_ITEMs are done first + and the appends only done when the prealloc buffer is full. + That's too much work for little gain.*/ + list = PyList_New(0); if (!list) goto onError; @@ -3274,6 +3800,9 @@ string_splitlines(PyStringObject *self, PyObject *args) } #undef SPLIT_APPEND +#undef SPLIT_ADD +#undef MAX_PREALLOC +#undef PREALLOC_SIZE static PyObject * string_getnewargs(PyStringObject *v) @@ -3303,6 +3832,7 @@ string_methods[] = { {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__}, {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__}, + {"partition", (PyCFunction)string_partition, METH_O, partition__doc__}, {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__}, {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__}, {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__}, @@ -3310,6 +3840,8 @@ string_methods[] = { {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__}, {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__}, {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__}, + {"rpartition", (PyCFunction)string_rpartition, METH_O, + rpartition__doc__}, {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__}, {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__}, @@ -3566,7 +4098,7 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize) /* Helpers for formatstring */ -static PyObject * +Py_LOCAL_INLINE(PyObject *) getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) { Py_ssize_t argidx = *p_argidx; @@ -3595,7 +4127,7 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) #define F_ALT (1<<3) #define F_ZERO (1<<4) -static int +Py_LOCAL_INLINE(int) formatfloat(char *buf, size_t buflen, int flags, int prec, int type, PyObject *v) { @@ -3782,7 +4314,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type, return result; } -static int +Py_LOCAL_INLINE(int) formatint(char *buf, size_t buflen, int flags, int prec, int type, PyObject *v) { @@ -3854,7 +4386,7 @@ formatint(char *buf, size_t buflen, int flags, return (int)strlen(buf); } -static int +Py_LOCAL_INLINE(int) formatchar(char *buf, size_t buflen, PyObject *v) { /* presume that the buffer is at least 2 characters long */ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 38820d4..03f2b07 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4636,10 +4636,10 @@ slot_tp_getattr_hook(PyObject *self, PyObject *name) (void *)PyObject_GenericGetAttr)) res = PyObject_GenericGetAttr(self, name); else - res = PyObject_CallFunction(getattribute, "OO", self, name); + res = PyObject_CallFunctionObjArgs(getattribute, self, name, NULL); if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) { PyErr_Clear(); - res = PyObject_CallFunction(getattr, "OO", self, name); + res = PyObject_CallFunctionObjArgs(getattr, self, name, NULL); } return res; } @@ -4776,7 +4776,7 @@ slot_tp_descr_get(PyObject *self, PyObject *obj, PyObject *type) obj = Py_None; if (type == NULL) type = Py_None; - return PyObject_CallFunction(get, "OOO", self, obj, type); + return PyObject_CallFunctionObjArgs(get, self, obj, type, NULL); } static int @@ -5717,8 +5717,8 @@ super_descr_get(PyObject *self, PyObject *obj, PyObject *type) if (su->ob_type != &PySuper_Type) /* If su is an instance of a (strict) subclass of super, call its type */ - return PyObject_CallFunction((PyObject *)su->ob_type, - "OO", su->type, obj); + return PyObject_CallFunctionObjArgs((PyObject *)su->ob_type, + su->type, obj, NULL); else { /* Inline the common case */ PyTypeObject *obj_type = supercheck(su->type, obj); diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index b432399..73def09 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -140,20 +140,48 @@ int _PyUnicode_IsDigit(Py_UNICODE ch) double _PyUnicode_ToNumeric(Py_UNICODE ch) { switch (ch) { + case 0x0F33: + return (double) -1 / 2; + case 0x17F0: case 0x3007: +#ifdef Py_UNICODE_WIDE + case 0x1018A: +#endif return (double) 0; case 0x09F4: + case 0x17F1: case 0x215F: case 0x2160: case 0x2170: case 0x3021: + case 0x3192: + case 0x3220: case 0x3280: +#ifdef Py_UNICODE_WIDE + case 0x10107: + case 0x10142: + case 0x10158: + case 0x10159: + case 0x1015A: + case 0x10320: + case 0x103D1: +#endif return (double) 1; case 0x00BD: + case 0x0F2A: + case 0x2CFD: +#ifdef Py_UNICODE_WIDE + case 0x10141: + case 0x10175: + case 0x10176: +#endif return (double) 1 / 2; case 0x2153: return (double) 1 / 3; case 0x00BC: +#ifdef Py_UNICODE_WIDE + case 0x10140: +#endif return (double) 1 / 4; case 0x2155: return (double) 1 / 5; @@ -168,92 +196,201 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch) case 0x2469: case 0x247D: case 0x2491: + case 0x24FE: case 0x277F: case 0x2789: case 0x2793: case 0x3038: + case 0x3229: case 0x3289: +#ifdef Py_UNICODE_WIDE + case 0x10110: + case 0x10149: + case 0x10150: + case 0x10157: + case 0x10160: + case 0x10161: + case 0x10162: + case 0x10163: + case 0x10164: + case 0x10322: + case 0x103D3: + case 0x10A44: +#endif return (double) 10; case 0x0BF1: case 0x137B: case 0x216D: case 0x217D: +#ifdef Py_UNICODE_WIDE + case 0x10119: + case 0x1014B: + case 0x10152: + case 0x1016A: + case 0x103D5: + case 0x10A46: +#endif return (double) 100; case 0x0BF2: case 0x216F: case 0x217F: case 0x2180: +#ifdef Py_UNICODE_WIDE + case 0x10122: + case 0x1014D: + case 0x10154: + case 0x10171: + case 0x10A47: +#endif return (double) 1000; case 0x137C: case 0x2182: +#ifdef Py_UNICODE_WIDE + case 0x1012B: + case 0x10155: +#endif return (double) 10000; case 0x216A: case 0x217A: case 0x246A: case 0x247E: case 0x2492: + case 0x24EB: return (double) 11; + case 0x0F2F: + return (double) 11 / 2; case 0x216B: case 0x217B: case 0x246B: case 0x247F: case 0x2493: + case 0x24EC: return (double) 12; case 0x246C: case 0x2480: case 0x2494: + case 0x24ED: return (double) 13; + case 0x0F30: + return (double) 13 / 2; case 0x246D: case 0x2481: case 0x2495: + case 0x24EE: return (double) 14; case 0x246E: case 0x2482: case 0x2496: + case 0x24EF: return (double) 15; + case 0x0F31: + return (double) 15 / 2; case 0x09F9: case 0x246F: case 0x2483: case 0x2497: + case 0x24F0: return (double) 16; case 0x16EE: case 0x2470: case 0x2484: case 0x2498: + case 0x24F1: return (double) 17; + case 0x0F32: + return (double) 17 / 2; case 0x16EF: case 0x2471: case 0x2485: case 0x2499: + case 0x24F2: return (double) 18; case 0x16F0: case 0x2472: case 0x2486: case 0x249A: + case 0x24F3: return (double) 19; case 0x09F5: + case 0x17F2: case 0x2161: case 0x2171: case 0x3022: + case 0x3193: + case 0x3221: case 0x3281: +#ifdef Py_UNICODE_WIDE + case 0x10108: + case 0x1015B: + case 0x1015C: + case 0x1015D: + case 0x1015E: + case 0x103D2: +#endif return (double) 2; case 0x2154: +#ifdef Py_UNICODE_WIDE + case 0x10177: +#endif return (double) 2 / 3; case 0x2156: - return (double) 2 / 5; + return (double) 2 / 5; case 0x1373: case 0x2473: case 0x2487: case 0x249B: + case 0x24F4: case 0x3039: - return (double) 20; +#ifdef Py_UNICODE_WIDE + case 0x10111: + case 0x103D4: + case 0x10A45: +#endif + return (double) 20; +#ifdef Py_UNICODE_WIDE + case 0x1011A: + return (double) 200; + case 0x10123: + return (double) 2000; + case 0x1012C: + return (double) 20000; +#endif + case 0x3251: + return (double) 21; + case 0x3252: + return (double) 22; + case 0x3253: + return (double) 23; + case 0x3254: + return (double) 24; + case 0x3255: + return (double) 25; + case 0x3256: + return (double) 26; + case 0x3257: + return (double) 27; + case 0x3258: + return (double) 28; + case 0x3259: + return (double) 29; case 0x09F6: + case 0x17F3: case 0x2162: case 0x2172: case 0x3023: + case 0x3194: + case 0x3222: case 0x3282: +#ifdef Py_UNICODE_WIDE + case 0x10109: +#endif return (double) 3; + case 0x0F2B: + return (double) 3 / 2; case 0x00BE: +#ifdef Py_UNICODE_WIDE + case 0x10178: +#endif return (double) 3 / 4; case 0x2157: return (double) 3 / 5; @@ -261,22 +398,103 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch) return (double) 3 / 8; case 0x1374: case 0x303A: + case 0x325A: +#ifdef Py_UNICODE_WIDE + case 0x10112: + case 0x10165: +#endif return (double) 30; +#ifdef Py_UNICODE_WIDE + case 0x1011B: + case 0x1016B: + return (double) 300; + case 0x10124: + return (double) 3000; + case 0x1012D: + return (double) 30000; +#endif + case 0x325B: + return (double) 31; + case 0x325C: + return (double) 32; + case 0x325D: + return (double) 33; + case 0x325E: + return (double) 34; + case 0x325F: + return (double) 35; + case 0x32B1: + return (double) 36; + case 0x32B2: + return (double) 37; + case 0x32B3: + return (double) 38; + case 0x32B4: + return (double) 39; case 0x09F7: + case 0x17F4: case 0x2163: case 0x2173: case 0x3024: + case 0x3195: + case 0x3223: case 0x3283: +#ifdef Py_UNICODE_WIDE + case 0x1010A: +#endif return (double) 4; case 0x2158: return (double) 4 / 5; case 0x1375: - return (double) 40; + case 0x32B5: +#ifdef Py_UNICODE_WIDE + case 0x10113: +#endif + return (double) 40; +#ifdef Py_UNICODE_WIDE + case 0x1011C: + return (double) 400; + case 0x10125: + return (double) 4000; + case 0x1012E: + return (double) 40000; +#endif + case 0x32B6: + return (double) 41; + case 0x32B7: + return (double) 42; + case 0x32B8: + return (double) 43; + case 0x32B9: + return (double) 44; + case 0x32BA: + return (double) 45; + case 0x32BB: + return (double) 46; + case 0x32BC: + return (double) 47; + case 0x32BD: + return (double) 48; + case 0x32BE: + return (double) 49; + case 0x17F5: case 0x2164: case 0x2174: case 0x3025: + case 0x3224: case 0x3284: +#ifdef Py_UNICODE_WIDE + case 0x1010B: + case 0x10143: + case 0x10148: + case 0x1014F: + case 0x1015F: + case 0x10173: + case 0x10321: +#endif return (double) 5; + case 0x0F2C: + return (double) 5 / 2; case 0x215A: return (double) 5 / 6; case 0x215D: @@ -284,42 +502,147 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch) case 0x1376: case 0x216C: case 0x217C: + case 0x32BF: +#ifdef Py_UNICODE_WIDE + case 0x10114: + case 0x10144: + case 0x1014A: + case 0x10151: + case 0x10166: + case 0x10167: + case 0x10168: + case 0x10169: + case 0x10174: + case 0x10323: +#endif return (double) 50; case 0x216E: case 0x217E: +#ifdef Py_UNICODE_WIDE + case 0x1011D: + case 0x10145: + case 0x1014C: + case 0x10153: + case 0x1016C: + case 0x1016D: + case 0x1016E: + case 0x1016F: + case 0x10170: +#endif return (double) 500; case 0x2181: +#ifdef Py_UNICODE_WIDE + case 0x10126: + case 0x10146: + case 0x1014E: + case 0x10172: +#endif return (double) 5000; +#ifdef Py_UNICODE_WIDE + case 0x1012F: + case 0x10147: + case 0x10156: + return (double) 50000; +#endif + case 0x17F6: case 0x2165: case 0x2175: case 0x3026: + case 0x3225: case 0x3285: +#ifdef Py_UNICODE_WIDE + case 0x1010C: +#endif return (double) 6; case 0x1377: +#ifdef Py_UNICODE_WIDE + case 0x10115: +#endif return (double) 60; +#ifdef Py_UNICODE_WIDE + case 0x1011E: + return (double) 600; + case 0x10127: + return (double) 6000; + case 0x10130: + return (double) 60000; +#endif + case 0x17F7: case 0x2166: case 0x2176: case 0x3027: + case 0x3226: case 0x3286: +#ifdef Py_UNICODE_WIDE + case 0x1010D: +#endif return (double) 7; + case 0x0F2D: + return (double) 7 / 2; case 0x215E: return (double) 7 / 8; case 0x1378: +#ifdef Py_UNICODE_WIDE + case 0x10116: +#endif return (double) 70; +#ifdef Py_UNICODE_WIDE + case 0x1011F: + return (double) 700; + case 0x10128: + return (double) 7000; + case 0x10131: + return (double) 70000; +#endif + case 0x17F8: case 0x2167: case 0x2177: case 0x3028: + case 0x3227: case 0x3287: +#ifdef Py_UNICODE_WIDE + case 0x1010E: +#endif return (double) 8; case 0x1379: +#ifdef Py_UNICODE_WIDE + case 0x10117: +#endif return (double) 80; +#ifdef Py_UNICODE_WIDE + case 0x10120: + return (double) 800; + case 0x10129: + return (double) 8000; + case 0x10132: + return (double) 80000; +#endif + case 0x17F9: case 0x2168: case 0x2178: case 0x3029: + case 0x3228: case 0x3288: +#ifdef Py_UNICODE_WIDE + case 0x1010F: +#endif return (double) 9; + case 0x0F2E: + return (double) 9 / 2; case 0x137A: +#ifdef Py_UNICODE_WIDE + case 0x10118: +#endif return (double) 90; +#ifdef Py_UNICODE_WIDE + case 0x10121: + case 0x1034A: + return (double) 900; + case 0x1012A: + return (double) 9000; + case 0x10133: + return (double) 90000; +#endif default: return (double) _PyUnicode_ToDigit(ch); } @@ -327,9 +650,7 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch) int _PyUnicode_IsNumeric(Py_UNICODE ch) { - if (_PyUnicode_ToNumeric(ch) < 0.0) - return 0; - return 1; + return _PyUnicode_ToNumeric(ch) != -1.0; } #ifndef WANT_WCTYPE_FUNCTIONS diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e62c774..6cdb0fc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4,6 +4,9 @@ Unicode implementation based on original code by Fredrik Lundh, modified by Marc-Andre Lemburg <mal@lemburg.com> according to the Unicode Integration Proposal (see file Misc/unicode.txt). +Major speed upgrades to the method implementations at the Reykjavik +NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. + Copyright (c) Corporation for National Research Initiatives. -------------------------------------------------------------------- @@ -121,6 +124,51 @@ PyUnicode_GetMax(void) #endif } +/* --- Bloom Filters ----------------------------------------------------- */ + +/* stuff to implement simple "bloom filters" for Unicode characters. + to keep things simple, we use a single bitmask, using the least 5 + bits from each unicode characters as the bit index. */ + +/* the linebreak mask is set up by Unicode_Init below */ + +#define BLOOM_MASK unsigned long + +static BLOOM_MASK bloom_linebreak; + +#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F)))) + +#define BLOOM_LINEBREAK(ch)\ + (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK((ch))) + +Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len) +{ + /* calculate simple bloom-style bitmask for a given unicode string */ + + long mask; + Py_ssize_t i; + + mask = 0; + for (i = 0; i < len; i++) + mask |= (1 << (ptr[i] & 0x1F)); + + return mask; +} + +Py_LOCAL_INLINE(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen) +{ + Py_ssize_t i; + + for (i = 0; i < setlen; i++) + if (set[i] == chr) + return 1; + + return 0; +} + +#define BLOOM_MEMBER(mask, chr, set, setlen)\ + BLOOM(mask, chr) && unicode_member(chr, set, setlen) + /* --- Unicode Object ----------------------------------------------------- */ static @@ -136,6 +184,7 @@ int unicode_resize(register PyUnicodeObject *unicode, /* Resizing shared object (unicode_empty or single character objects) in-place is not allowed. Use PyUnicode_Resize() instead ! */ + if (unicode == unicode_empty || (unicode->length == 1 && unicode->str[0] < 256U && @@ -145,8 +194,11 @@ int unicode_resize(register PyUnicodeObject *unicode, return -1; } - /* We allocate one more byte to make sure the string is - Ux0000 terminated -- XXX is this needed ? */ + /* We allocate one more byte to make sure the string is Ux0000 terminated. + The overallocation is also used by fastsearch, which assumes that it's + safe to look at str[length] (without making any assumptions about what + it contains). */ + oldstr = unicode->str; PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1); if (!unicode->str) { @@ -181,7 +233,7 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length) { register PyUnicodeObject *unicode; - /* Optimization fo empty strings */ + /* Optimization for empty strings */ if (length == 0 && unicode_empty != NULL) { Py_INCREF(unicode_empty); return unicode_empty; @@ -1963,9 +2015,20 @@ onError: */ -static const Py_UNICODE *findchar(const Py_UNICODE *s, - Py_ssize_t size, - Py_UNICODE ch); +Py_LOCAL_INLINE(const Py_UNICODE *) findchar(const Py_UNICODE *s, + Py_ssize_t size, + Py_UNICODE ch) +{ + /* like wcschr, but doesn't stop at NULL characters */ + + while (size-- > 0) { + if (*s == ch) + return s; + s++; + } + + return NULL; +} static PyObject *unicodeescape_string(const Py_UNICODE *s, @@ -2313,7 +2376,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, end = s + size; while (s < end) { - *p = *(Py_UNICODE *)s; + memcpy(p, s, sizeof(Py_UNICODE)); /* We have to sanity check the raw data, otherwise doom looms for some malformed UCS-4 data. */ if ( @@ -3791,124 +3854,104 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, /* --- Helpers ------------------------------------------------------------ */ -static -Py_ssize_t count(PyUnicodeObject *self, - Py_ssize_t start, - Py_ssize_t end, - PyUnicodeObject *substring) -{ - Py_ssize_t count = 0; +#define STRINGLIB_CHAR Py_UNICODE - if (start < 0) - start += self->length; - if (start < 0) - start = 0; - if (end > self->length) - end = self->length; - if (end < 0) - end += self->length; - if (end < 0) - end = 0; +#define STRINGLIB_LEN PyUnicode_GET_SIZE +#define STRINGLIB_NEW PyUnicode_FromUnicode +#define STRINGLIB_STR PyUnicode_AS_UNICODE - if (substring->length == 0) - return (end - start + 1); +Py_LOCAL_INLINE(int) +STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len) +{ + if (str[0] != other[0]) + return 1; + return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE)); +} - end -= substring->length; +#define STRINGLIB_EMPTY unicode_empty - while (start <= end) - if (Py_UNICODE_MATCH(self, start, substring)) { - count++; - start += substring->length; - } else - start++; +#include "stringlib/fastsearch.h" - return count; -} +#include "stringlib/count.h" +#include "stringlib/find.h" +#include "stringlib/partition.h" + +/* helper macro to fixup start/end slice values */ +#define FIX_START_END(obj) \ + if (start < 0) \ + start += (obj)->length; \ + if (start < 0) \ + start = 0; \ + if (end > (obj)->length) \ + end = (obj)->length; \ + if (end < 0) \ + end += (obj)->length; \ + if (end < 0) \ + end = 0; Py_ssize_t PyUnicode_Count(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end) + PyObject *substr, + Py_ssize_t start, + Py_ssize_t end) { Py_ssize_t result; + PyUnicodeObject* str_obj; + PyUnicodeObject* sub_obj; - str = PyUnicode_FromObject(str); - if (str == NULL) + str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str); + if (!str_obj) return -1; - substr = PyUnicode_FromObject(substr); - if (substr == NULL) { - Py_DECREF(str); + sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr); + if (!sub_obj) { + Py_DECREF(str_obj); return -1; } - result = count((PyUnicodeObject *)str, - start, end, - (PyUnicodeObject *)substr); - - Py_DECREF(str); - Py_DECREF(substr); - return result; -} - -static -Py_ssize_t findstring(PyUnicodeObject *self, - PyUnicodeObject *substring, - Py_ssize_t start, - Py_ssize_t end, - int direction) -{ - if (start < 0) - start += self->length; - if (start < 0) - start = 0; - - if (end > self->length) - end = self->length; - if (end < 0) - end += self->length; - if (end < 0) - end = 0; + FIX_START_END(str_obj); - if (substring->length == 0) - return (direction > 0) ? start : end; - - end -= substring->length; + result = stringlib_count( + str_obj->str + start, end - start, sub_obj->str, sub_obj->length + ); - if (direction < 0) { - for (; end >= start; end--) - if (Py_UNICODE_MATCH(self, end, substring)) - return end; - } else { - for (; start <= end; start++) - if (Py_UNICODE_MATCH(self, start, substring)) - return start; - } + Py_DECREF(sub_obj); + Py_DECREF(str_obj); - return -1; + return result; } Py_ssize_t PyUnicode_Find(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end, - int direction) + PyObject *sub, + Py_ssize_t start, + Py_ssize_t end, + int direction) { Py_ssize_t result; str = PyUnicode_FromObject(str); - if (str == NULL) + if (!str) return -2; - substr = PyUnicode_FromObject(substr); - if (substr == NULL) { + sub = PyUnicode_FromObject(sub); + if (!sub) { Py_DECREF(str); return -2; } - result = findstring((PyUnicodeObject *)str, - (PyUnicodeObject *)substr, - start, end, direction); + if (direction > 0) + result = stringlib_find_slice( + PyUnicode_AS_UNICODE(str), PyUnicode_GET_SIZE(str), + PyUnicode_AS_UNICODE(sub), PyUnicode_GET_SIZE(sub), + start, end + ); + else + result = stringlib_rfind_slice( + PyUnicode_AS_UNICODE(str), PyUnicode_GET_SIZE(str), + PyUnicode_AS_UNICODE(sub), PyUnicode_GET_SIZE(sub), + start, end + ); + Py_DECREF(str); - Py_DECREF(substr); + Py_DECREF(sub); + return result; } @@ -3919,20 +3962,10 @@ int tailmatch(PyUnicodeObject *self, Py_ssize_t end, int direction) { - if (start < 0) - start += self->length; - if (start < 0) - start = 0; - if (substring->length == 0) return 1; - if (end > self->length) - end = self->length; - if (end < 0) - end += self->length; - if (end < 0) - end = 0; + FIX_START_END(self); end -= substring->length; if (end < start) @@ -3974,22 +4007,6 @@ Py_ssize_t PyUnicode_Tailmatch(PyObject *str, return result; } -static -const Py_UNICODE *findchar(const Py_UNICODE *s, - Py_ssize_t size, - Py_UNICODE ch) -{ - /* like wcschr, but doesn't stop at NULL characters */ - - while (size-- > 0) { - if (*s == ch) - return s; - s++; - } - - return NULL; -} - /* Apply fixfct filter to the Unicode object self and return a reference to the modified object */ @@ -4148,10 +4165,10 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) PyObject *internal_separator = NULL; const Py_UNICODE blank = ' '; const Py_UNICODE *sep = ␣ - size_t seplen = 1; + Py_ssize_t seplen = 1; PyUnicodeObject *res = NULL; /* the result */ - size_t res_alloc = 100; /* # allocated bytes for string in res */ - size_t res_used; /* # used bytes */ + Py_ssize_t res_alloc = 100; /* # allocated bytes for string in res */ + Py_ssize_t res_used; /* # used bytes */ Py_UNICODE *res_p; /* pointer to free byte in res's string area */ PyObject *fseq; /* PySequence_Fast(seq) */ Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */ @@ -4212,8 +4229,8 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) res_used = 0; for (i = 0; i < seqlen; ++i) { - size_t itemlen; - size_t new_res_used; + Py_ssize_t itemlen; + Py_ssize_t new_res_used; item = PySequence_Fast_GET_ITEM(fseq, i); /* Convert item to Unicode. */ @@ -4235,19 +4252,18 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) /* Make sure we have enough space for the separator and the item. */ itemlen = PyUnicode_GET_SIZE(item); new_res_used = res_used + itemlen; - if (new_res_used < res_used || new_res_used > PY_SSIZE_T_MAX) + if (new_res_used <= 0) goto Overflow; if (i < seqlen - 1) { new_res_used += seplen; - if (new_res_used < res_used || new_res_used > PY_SSIZE_T_MAX) + if (new_res_used <= 0) goto Overflow; } if (new_res_used > res_alloc) { /* double allocated size until it's big enough */ do { - size_t oldsize = res_alloc; res_alloc += res_alloc; - if (res_alloc < oldsize || res_alloc > PY_SSIZE_T_MAX) + if (res_alloc <= 0) goto Overflow; } while (new_res_used > res_alloc); if (_PyUnicode_Resize(&res, res_alloc) < 0) { @@ -4333,17 +4349,6 @@ PyUnicodeObject *pad(PyUnicodeObject *self, else \ Py_DECREF(str); -#define SPLIT_INSERT(data, left, right) \ - str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \ - if (!str) \ - goto onError; \ - if (PyList_Insert(list, 0, str)) { \ - Py_DECREF(str); \ - goto onError; \ - } \ - else \ - Py_DECREF(str); - static PyObject *split_whitespace(PyUnicodeObject *self, PyObject *list, @@ -4404,7 +4409,7 @@ PyObject *PyUnicode_Splitlines(PyObject *string, Py_ssize_t eol; /* Find a line and append it */ - while (i < len && !Py_UNICODE_ISLINEBREAK(data[i])) + while (i < len && !BLOOM_LINEBREAK(data[i])) i++; /* Skip the line break reading CRLF as one line break */ @@ -4515,15 +4520,17 @@ PyObject *rsplit_whitespace(PyUnicodeObject *self, if (j > i) { if (maxcount-- <= 0) break; - SPLIT_INSERT(self->str, i + 1, j + 1); + SPLIT_APPEND(self->str, i + 1, j + 1); while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i])) i--; j = i; } } if (j >= 0) { - SPLIT_INSERT(self->str, 0, j + 1); + SPLIT_APPEND(self->str, 0, j + 1); } + if (PyList_Reverse(list) < 0) + goto onError; return list; onError: @@ -4546,14 +4553,16 @@ PyObject *rsplit_char(PyUnicodeObject *self, if (self->str[i] == ch) { if (maxcount-- <= 0) break; - SPLIT_INSERT(self->str, i + 1, j + 1); + SPLIT_APPEND(self->str, i + 1, j + 1); j = i = i - 1; } else i--; } if (j >= -1) { - SPLIT_INSERT(self->str, 0, j + 1); + SPLIT_APPEND(self->str, 0, j + 1); } + if (PyList_Reverse(list) < 0) + goto onError; return list; onError: @@ -4577,15 +4586,17 @@ PyObject *rsplit_substring(PyUnicodeObject *self, if (Py_UNICODE_MATCH(self, i, substring)) { if (maxcount-- <= 0) break; - SPLIT_INSERT(self->str, i + sublen, j); + SPLIT_APPEND(self->str, i + sublen, j); j = i; i -= sublen; } else i--; } if (j >= 0) { - SPLIT_INSERT(self->str, 0, j); + SPLIT_APPEND(self->str, 0, j); } + if (PyList_Reverse(list) < 0) + goto onError; return list; onError: @@ -4594,7 +4605,6 @@ PyObject *rsplit_substring(PyUnicodeObject *self, } #undef SPLIT_APPEND -#undef SPLIT_INSERT static PyObject *split(PyUnicodeObject *self, @@ -4665,88 +4675,128 @@ PyObject *replace(PyUnicodeObject *self, if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; - if (str1->length == 1 && str2->length == 1) { + if (str1->length == str2->length) { + /* same length */ Py_ssize_t i; - - /* replace characters */ - if (!findchar(self->str, self->length, str1->str[0]) && - PyUnicode_CheckExact(self)) { - /* nothing to replace, return original string */ - Py_INCREF(self); - u = self; + if (str1->length == 1) { + /* replace characters */ + Py_UNICODE u1, u2; + if (!findchar(self->str, self->length, str1->str[0])) + goto nothing; + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + if (!u) + return NULL; + Py_UNICODE_COPY(u->str, self->str, self->length); + u1 = str1->str[0]; + u2 = str2->str[0]; + for (i = 0; i < u->length; i++) + if (u->str[i] == u1) { + if (--maxcount < 0) + break; + u->str[i] = u2; + } } else { - Py_UNICODE u1 = str1->str[0]; - Py_UNICODE u2 = str2->str[0]; - - u = (PyUnicodeObject*) PyUnicode_FromUnicode( - NULL, - self->length + i = fastsearch( + self->str, self->length, str1->str, str1->length, FAST_SEARCH ); - if (u != NULL) { - Py_UNICODE_COPY(u->str, self->str, - self->length); - for (i = 0; i < u->length; i++) - if (u->str[i] == u1) { - if (--maxcount < 0) - break; - u->str[i] = u2; - } - } + if (i < 0) + goto nothing; + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + if (!u) + return NULL; + Py_UNICODE_COPY(u->str, self->str, self->length); + while (i <= self->length - str1->length) + if (Py_UNICODE_MATCH(self, i, str1)) { + if (--maxcount < 0) + break; + Py_UNICODE_COPY(u->str+i, str2->str, str2->length); + i += str1->length; + } else + i++; } - } else { - Py_ssize_t n, i; + + Py_ssize_t n, i, j, e; + Py_ssize_t product, new_size, delta; Py_UNICODE *p; /* replace strings */ - n = count(self, 0, self->length, str1); + n = stringlib_count(self->str, self->length, str1->str, str1->length); if (n > maxcount) n = maxcount; - if (n == 0) { - /* nothing to replace, return original string */ - if (PyUnicode_CheckExact(self)) { - Py_INCREF(self); - u = self; - } - else { - u = (PyUnicodeObject *) - PyUnicode_FromUnicode(self->str, self->length); - } + if (n == 0) + goto nothing; + /* new_size = self->length + n * (str2->length - str1->length)); */ + delta = (str2->length - str1->length); + if (delta == 0) { + new_size = self->length; } else { - u = _PyUnicode_New( - self->length + n * (str2->length - str1->length)); - if (u) { - i = 0; - p = u->str; - if (str1->length > 0) { - while (i <= self->length - str1->length) - if (Py_UNICODE_MATCH(self, i, str1)) { - /* replace string segment */ - Py_UNICODE_COPY(p, str2->str, str2->length); - p += str2->length; - i += str1->length; - if (--n <= 0) { - /* copy remaining part */ - Py_UNICODE_COPY(p, self->str+i, self->length-i); - break; - } - } else - *p++ = self->str[i++]; - } else { - while (n > 0) { - Py_UNICODE_COPY(p, str2->str, str2->length); - p += str2->length; - if (--n <= 0) - break; - *p++ = self->str[i++]; - } - Py_UNICODE_COPY(p, self->str+i, self->length-i); + product = n * (str2->length - str1->length); + if ((product / (str2->length - str1->length)) != n) { + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); + return NULL; + } + new_size = self->length + product; + if (new_size < 0) { + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); + return NULL; + } + } + u = _PyUnicode_New(new_size); + if (!u) + return NULL; + i = 0; + p = u->str; + e = self->length - str1->length; + if (str1->length > 0) { + while (n-- > 0) { + /* look for next match */ + j = i; + while (j <= e) { + if (Py_UNICODE_MATCH(self, j, str1)) + break; + j++; + } + if (j > i) { + if (j > e) + break; + /* copy unchanged part [i:j] */ + Py_UNICODE_COPY(p, self->str+i, j-i); + p += j - i; + } + /* copy substitution string */ + if (str2->length > 0) { + Py_UNICODE_COPY(p, str2->str, str2->length); + p += str2->length; } + i = j + str1->length; + } + if (i < self->length) + /* copy tail [i:] */ + Py_UNICODE_COPY(p, self->str+i, self->length-i); + } else { + /* interleave */ + while (n > 0) { + Py_UNICODE_COPY(p, str2->str, str2->length); + p += str2->length; + if (--n <= 0) + break; + *p++ = self->str[i++]; } + Py_UNICODE_COPY(p, self->str+i, self->length-i); } } - return (PyObject *) u; + +nothing: + /* nothing to replace; return original string (when possible) */ + if (PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return (PyObject *) self; + } + return PyUnicode_FromUnicode(self->str, self->length); } /* --- Unicode Object Methods --------------------------------------------- */ @@ -4983,54 +5033,29 @@ onError: int PyUnicode_Contains(PyObject *container, PyObject *element) { - PyUnicodeObject *u = NULL, *v = NULL; + PyObject *str, *sub; int result; - Py_ssize_t size; - register const Py_UNICODE *lhs, *end, *rhs; /* Coerce the two arguments */ - v = (PyUnicodeObject *)PyUnicode_FromObject(element); - if (v == NULL) { + sub = PyUnicode_FromObject(element); + if (!sub) { PyErr_SetString(PyExc_TypeError, "'in <string>' requires string as left operand"); - goto onError; + return -1; } - u = (PyUnicodeObject *)PyUnicode_FromObject(container); - if (u == NULL) - goto onError; - - size = PyUnicode_GET_SIZE(v); - rhs = PyUnicode_AS_UNICODE(v); - lhs = PyUnicode_AS_UNICODE(u); - result = 0; - if (size == 1) { - end = lhs + PyUnicode_GET_SIZE(u); - while (lhs < end) { - if (*lhs++ == *rhs) { - result = 1; - break; - } - } - } - else { - end = lhs + (PyUnicode_GET_SIZE(u) - size); - while (lhs <= end) { - if (memcmp(lhs++, rhs, size * sizeof(Py_UNICODE)) == 0) { - result = 1; - break; - } - } + str = PyUnicode_FromObject(container); + if (!str) { + Py_DECREF(sub); + return -1; } - Py_DECREF(u); - Py_DECREF(v); - return result; + result = stringlib_contains_obj(str, sub); -onError: - Py_XDECREF(u); - Py_XDECREF(v); - return -1; + Py_DECREF(str); + Py_DECREF(sub); + + return result; } /* Concat to string or Unicode object giving a new Unicode object. */ @@ -5078,8 +5103,8 @@ onError: PyDoc_STRVAR(count__doc__, "S.count(sub[, start[, end]]) -> int\n\ \n\ -Return the number of occurrences of substring sub in Unicode string\n\ -S[start:end]. Optional arguments start and end are\n\ +Return the number of non-overlapping occurrences of substring sub in\n\ +Unicode string S[start:end]. Optional arguments start and end are\n\ interpreted as in slice notation."); static PyObject * @@ -5095,24 +5120,19 @@ unicode_count(PyUnicodeObject *self, PyObject *args) return NULL; substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); + (PyObject *)substring); if (substring == NULL) return NULL; - if (start < 0) - start += self->length; - if (start < 0) - start = 0; - if (end > self->length) - end = self->length; - if (end < 0) - end += self->length; - if (end < 0) - end = 0; + FIX_START_END(self); - result = PyInt_FromLong((long) count(self, start, end, substring)); + result = PyInt_FromSsize_t( + stringlib_count(self->str + start, end - start, + substring->str, substring->length) + ); Py_DECREF(substring); + return result; } @@ -5262,23 +5282,27 @@ Return -1 on failure."); static PyObject * unicode_find(PyUnicodeObject *self, PyObject *args) { - PyUnicodeObject *substring; + PyObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *result; + Py_ssize_t result; if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); - if (substring == NULL) + substring = PyUnicode_FromObject(substring); + if (!substring) return NULL; - result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1)); + result = stringlib_find_slice( + PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self), + PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring), + start, end + ); Py_DECREF(substring); - return result; + + return PyInt_FromSsize_t(result); } static PyObject * @@ -5328,26 +5352,30 @@ static PyObject * unicode_index(PyUnicodeObject *self, PyObject *args) { Py_ssize_t result; - PyUnicodeObject *substring; + PyObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; if (!PyArg_ParseTuple(args, "O|O&O&:index", &substring, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); - if (substring == NULL) + substring = PyUnicode_FromObject(substring); + if (!substring) return NULL; - result = findstring(self, substring, start, end, 1); + result = stringlib_find_slice( + PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self), + PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring), + start, end + ); Py_DECREF(substring); + if (result < 0) { PyErr_SetString(PyExc_ValueError, "substring not found"); return NULL; } + return PyInt_FromSsize_t(result); } @@ -5702,16 +5730,6 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; #define STRIPNAME(i) (stripformat[i]+3) -static const Py_UNICODE * -unicode_memchr(const Py_UNICODE *s, Py_UNICODE c, size_t n) -{ - size_t i; - for (i = 0; i < n; ++i) - if (s[i] == c) - return s+i; - return NULL; -} - /* externally visible for str.strip(unicode) */ PyObject * _PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj) @@ -5722,27 +5740,29 @@ _PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj) Py_ssize_t seplen = PyUnicode_GET_SIZE(sepobj); Py_ssize_t i, j; + BLOOM_MASK sepmask = make_bloom_mask(sep, seplen); + i = 0; if (striptype != RIGHTSTRIP) { - while (i < len && unicode_memchr(sep, s[i], seplen)) { - i++; - } + while (i < len && BLOOM_MEMBER(sepmask, s[i], sep, seplen)) { + i++; + } } j = len; if (striptype != LEFTSTRIP) { - do { - j--; - } while (j >= i && unicode_memchr(sep, s[j], seplen)); - j++; + do { + j--; + } while (j >= i && BLOOM_MEMBER(sepmask, s[j], sep, seplen)); + j++; } if (i == 0 && j == len && PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return (PyObject*)self; + Py_INCREF(self); + return (PyObject*)self; } else - return PyUnicode_FromUnicode(s+i, j-i); + return PyUnicode_FromUnicode(s+i, j-i); } @@ -5898,9 +5918,19 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len) p = u->str; - while (len-- > 0) { - Py_UNICODE_COPY(p, str->str, str->length); - p += str->length; + if (str->length == 1 && len > 0) { + Py_UNICODE_FILL(p, str->str[0], len); + } else { + Py_ssize_t done = 0; /* number of characters copied this far */ + if (done < nchars) { + Py_UNICODE_COPY(p, str->str, str->length); + done = str->length; + } + while (done < nchars) { + int n = (done <= nchars-done) ? done : nchars-done; + Py_UNICODE_COPY(p+done, p, n); + done += n; + } } return (PyObject*) u; @@ -5993,23 +6023,27 @@ Return -1 on failure."); static PyObject * unicode_rfind(PyUnicodeObject *self, PyObject *args) { - PyUnicodeObject *substring; + PyObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *result; + Py_ssize_t result; if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); - if (substring == NULL) + substring = PyUnicode_FromObject(substring); + if (!substring) return NULL; - result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1)); + result = stringlib_rfind_slice( + PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self), + PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring), + start, end + ); Py_DECREF(substring); - return result; + + return PyInt_FromSsize_t(result); } PyDoc_STRVAR(rindex__doc__, @@ -6020,22 +6054,26 @@ Like S.rfind() but raise ValueError when the substring is not found."); static PyObject * unicode_rindex(PyUnicodeObject *self, PyObject *args) { - Py_ssize_t result; - PyUnicodeObject *substring; + PyObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; + Py_ssize_t result; if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); - if (substring == NULL) + substring = PyUnicode_FromObject(substring); + if (!substring) return NULL; - result = findstring(self, substring, start, end, -1); + result = stringlib_rfind_slice( + PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self), + PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring), + start, end + ); Py_DECREF(substring); + if (result < 0) { PyErr_SetString(PyExc_ValueError, "substring not found"); return NULL; @@ -6137,6 +6175,87 @@ unicode_split(PyUnicodeObject *self, PyObject *args) return PyUnicode_Split((PyObject *)self, substring, maxcount); } +PyObject * +PyUnicode_Partition(PyObject *str_in, PyObject *sep_in) +{ + PyObject* str_obj; + PyObject* sep_obj; + PyObject* out; + + str_obj = PyUnicode_FromObject(str_in); + if (!str_obj) + return NULL; + sep_obj = PyUnicode_FromObject(sep_in); + if (!sep_obj) { + Py_DECREF(str_obj); + return NULL; + } + + out = stringlib_partition( + str_obj, PyUnicode_AS_UNICODE(str_obj), PyUnicode_GET_SIZE(str_obj), + sep_obj, PyUnicode_AS_UNICODE(sep_obj), PyUnicode_GET_SIZE(sep_obj) + ); + + Py_DECREF(sep_obj); + Py_DECREF(str_obj); + + return out; +} + + +PyObject * +PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in) +{ + PyObject* str_obj; + PyObject* sep_obj; + PyObject* out; + + str_obj = PyUnicode_FromObject(str_in); + if (!str_obj) + return NULL; + sep_obj = PyUnicode_FromObject(sep_in); + if (!sep_obj) { + Py_DECREF(str_obj); + return NULL; + } + + out = stringlib_rpartition( + str_obj, PyUnicode_AS_UNICODE(str_obj), PyUnicode_GET_SIZE(str_obj), + sep_obj, PyUnicode_AS_UNICODE(sep_obj), PyUnicode_GET_SIZE(sep_obj) + ); + + Py_DECREF(sep_obj); + Py_DECREF(str_obj); + + return out; +} + +PyDoc_STRVAR(partition__doc__, +"S.partition(sep) -> (head, sep, tail)\n\ +\n\ +Searches for the separator sep in S, and returns the part before it,\n\ +the separator itself, and the part after it. If the separator is not\n\ +found, returns S and two empty strings."); + +static PyObject* +unicode_partition(PyUnicodeObject *self, PyObject *separator) +{ + return PyUnicode_Partition((PyObject *)self, separator); +} + +PyDoc_STRVAR(rpartition__doc__, +"S.rpartition(sep) -> (head, sep, tail)\n\ +\n\ +Searches for the separator sep in S, starting at the end of S, and returns\n\ +the part before it, the separator itself, and the part after it. If the\n\ +separator is not found, returns S and two empty strings."); + +static PyObject* +unicode_rpartition(PyUnicodeObject *self, PyObject *separator) +{ + return PyUnicode_RPartition((PyObject *)self, separator); +} + PyObject *PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) @@ -6390,6 +6509,7 @@ static PyMethodDef unicode_methods[] = { {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__}, {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, + {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__}, {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, @@ -6400,6 +6520,7 @@ static PyMethodDef unicode_methods[] = { {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__}, {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__}, + {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__}, {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__}, {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__}, {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__}, @@ -7375,6 +7496,18 @@ void _PyUnicode_Init(void) { int i; + /* XXX - move this array to unicodectype.c ? */ + Py_UNICODE linebreak[] = { + 0x000A, /* LINE FEED */ + 0x000D, /* CARRIAGE RETURN */ + 0x001C, /* FILE SEPARATOR */ + 0x001D, /* GROUP SEPARATOR */ + 0x001E, /* RECORD SEPARATOR */ + 0x0085, /* NEXT LINE */ + 0x2028, /* LINE SEPARATOR */ + 0x2029, /* PARAGRAPH SEPARATOR */ + }; + /* Init the implementation */ unicode_freelist = NULL; unicode_freelist_size = 0; @@ -7384,6 +7517,11 @@ void _PyUnicode_Init(void) unicode_latin1[i] = NULL; if (PyType_Ready(&PyUnicode_Type) < 0) Py_FatalError("Can't initialize 'unicode'"); + + /* initialize the linebreak bloom filter */ + bloom_linebreak = make_bloom_mask( + linebreak, sizeof(linebreak) / sizeof(linebreak[0]) + ); } /* Finalize the Unicode implementation */ diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index 3f2c261..9c2a626 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -847,7 +847,7 @@ PyWeakref_GetObject(PyObject *ref) static void handle_callback(PyWeakReference *ref, PyObject *callback) { - PyObject *cbresult = PyObject_CallFunction(callback, "O", ref); + PyObject *cbresult = PyObject_CallFunctionObjArgs(callback, ref, NULL); if (cbresult == NULL) PyErr_WriteUnraisable(callback); |