summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
Diffstat (limited to 'Objects')
-rw-r--r--Objects/classobject.c27
-rw-r--r--Objects/codeobject.c3
-rw-r--r--Objects/exceptions.c2141
-rw-r--r--Objects/fileobject.c92
-rw-r--r--Objects/floatobject.c7
-rw-r--r--Objects/frameobject.c220
-rw-r--r--Objects/longobject.c269
-rw-r--r--Objects/stringlib/README.txt34
-rw-r--r--Objects/stringlib/count.h34
-rw-r--r--Objects/stringlib/fastsearch.h104
-rw-r--r--Objects/stringlib/find.h112
-rw-r--r--Objects/stringlib/partition.h111
-rw-r--r--Objects/stringobject.c1408
-rw-r--r--Objects/typeobject.c10
-rw-r--r--Objects/unicodectype.c333
-rw-r--r--Objects/unicodeobject.c772
-rw-r--r--Objects/weakrefobject.c2
17 files changed, 4690 insertions, 989 deletions
diff --git a/Objects/classobject.c b/Objects/classobject.c
index 594de11..9cfdf0e 100644
--- a/Objects/classobject.c
+++ b/Objects/classobject.c
@@ -81,12 +81,9 @@ PyClass_New(PyObject *bases, PyObject *dict, PyObject *name)
if (!PyClass_Check(base)) {
if (PyCallable_Check(
(PyObject *) base->ob_type))
- return PyObject_CallFunction(
+ return PyObject_CallFunctionObjArgs(
(PyObject *) base->ob_type,
- "OOO",
- name,
- bases,
- dict);
+ name, bases, dict, NULL);
PyErr_SetString(PyExc_TypeError,
"PyClass_New: base must be a class");
return NULL;
@@ -320,7 +317,7 @@ class_setattr(PyClassObject *op, PyObject *name, PyObject *v)
}
sname = PyString_AsString(name);
if (sname[0] == '_' && sname[1] == '_') {
- int n = PyString_Size(name);
+ Py_ssize_t n = PyString_Size(name);
if (sname[n-1] == '_' && sname[n-2] == '_') {
char *err = NULL;
if (strcmp(sname, "__dict__") == 0)
@@ -380,7 +377,7 @@ class_str(PyClassObject *op)
PyObject *mod = PyDict_GetItemString(op->cl_dict, "__module__");
PyObject *name = op->cl_name;
PyObject *res;
- int m, n;
+ Py_ssize_t m, n;
if (name == NULL || !PyString_Check(name))
return class_repr(op);
@@ -638,7 +635,7 @@ instance_dealloc(register PyInstanceObject *inst)
PyObject_GC_Del(inst);
}
else {
- int refcnt = inst->ob_refcnt;
+ Py_ssize_t refcnt = inst->ob_refcnt;
/* __del__ resurrected it! Make it look like the original
* Py_DECREF never happened.
*/
@@ -778,7 +775,7 @@ instance_setattr(PyInstanceObject *inst, PyObject *name, PyObject *v)
PyObject *func, *args, *res, *tmp;
char *sname = PyString_AsString(name);
if (sname[0] == '_' && sname[1] == '_') {
- int n = PyString_Size(name);
+ Py_ssize_t n = PyString_Size(name);
if (sname[n-1] == '_' && sname[n-2] == '_') {
if (strcmp(sname, "__dict__") == 0) {
if (PyEval_GetRestricted()) {
@@ -1075,21 +1072,15 @@ static PyMappingMethods instance_as_mapping = {
static PyObject *
instance_item(PyInstanceObject *inst, Py_ssize_t i)
{
- PyObject *func, *arg, *res;
+ PyObject *func, *res;
if (getitemstr == NULL)
getitemstr = PyString_InternFromString("__getitem__");
func = instance_getattr(inst, getitemstr);
if (func == NULL)
return NULL;
- arg = Py_BuildValue("(n)", i);
- if (arg == NULL) {
- Py_DECREF(func);
- return NULL;
- }
- res = PyEval_CallObject(func, arg);
+ res = PyObject_CallFunction(func, "n", i);
Py_DECREF(func);
- Py_DECREF(arg);
return res;
}
@@ -1263,7 +1254,7 @@ instance_contains(PyInstanceObject *inst, PyObject *member)
*/
PyErr_Clear();
return _PySequence_IterSearch((PyObject *)inst, member,
- PY_ITERSEARCH_CONTAINS);
+ PY_ITERSEARCH_CONTAINS) > 0;
}
else
return -1;
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 8ae2399..a9bcb01 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -102,6 +102,7 @@ PyCode_New(int argcount, int nlocals, int stacksize, int flags,
co->co_firstlineno = firstlineno;
Py_INCREF(lnotab);
co->co_lnotab = lnotab;
+ co->co_zombieframe = NULL;
}
return co;
}
@@ -265,6 +266,8 @@ code_dealloc(PyCodeObject *co)
Py_XDECREF(co->co_filename);
Py_XDECREF(co->co_name);
Py_XDECREF(co->co_lnotab);
+ if (co->co_zombieframe != NULL)
+ PyObject_GC_Del(co->co_zombieframe);
PyObject_DEL(co);
}
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
new file mode 100644
index 0000000..6271372
--- /dev/null
+++ b/Objects/exceptions.c
@@ -0,0 +1,2141 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "structmember.h"
+#include "osdefs.h"
+
+#define MAKE_IT_NONE(x) (x) = Py_None; Py_INCREF(Py_None);
+#define EXC_MODULE_NAME "exceptions."
+
+/* NOTE: If the exception class hierarchy changes, don't forget to update
+ * Lib/test/exception_hierarchy.txt
+ */
+
+PyDoc_STRVAR(exceptions_doc, "Python's standard exception class hierarchy.\n\
+\n\
+Exceptions found here are defined both in the exceptions module and the\n\
+built-in namespace. It is recommended that user-defined exceptions\n\
+inherit from Exception. See the documentation for the exception\n\
+inheritance hierarchy.\n\
+");
+
+/*
+ * BaseException
+ */
+static PyObject *
+BaseException_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyBaseExceptionObject *self;
+
+ self = (PyBaseExceptionObject *)type->tp_alloc(type, 0);
+ /* the dict is created on the fly in PyObject_GenericSetAttr */
+ self->message = self->dict = NULL;
+
+ self->args = PyTuple_New(0);
+ if (!self->args) {
+ Py_DECREF(self);
+ return NULL;
+ }
+
+ self->message = PyString_FromString("");
+ if (!self->message) {
+ Py_DECREF(self);
+ return NULL;
+ }
+
+ return (PyObject *)self;
+}
+
+static int
+BaseException_init(PyBaseExceptionObject *self, PyObject *args, PyObject *kwds)
+{
+ Py_DECREF(self->args);
+ self->args = args;
+ Py_INCREF(self->args);
+
+ if (PyTuple_GET_SIZE(self->args) == 1) {
+ Py_DECREF(self->message);
+ self->message = PyTuple_GET_ITEM(self->args, 0);
+ Py_INCREF(self->message);
+ }
+ return 0;
+}
+
+int
+BaseException_clear(PyBaseExceptionObject *self)
+{
+ Py_CLEAR(self->dict);
+ Py_CLEAR(self->args);
+ Py_CLEAR(self->message);
+ return 0;
+}
+
+static void
+BaseException_dealloc(PyBaseExceptionObject *self)
+{
+ BaseException_clear(self);
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+int
+BaseException_traverse(PyBaseExceptionObject *self, visitproc visit, void *arg)
+{
+ if (self->dict)
+ Py_VISIT(self->dict);
+ Py_VISIT(self->args);
+ Py_VISIT(self->message);
+ return 0;
+}
+
+static PyObject *
+BaseException_str(PyBaseExceptionObject *self)
+{
+ PyObject *out;
+
+ switch (PySequence_Length(self->args)) {
+ case 0:
+ out = PyString_FromString("");
+ break;
+ case 1:
+ {
+ PyObject *tmp = PySequence_GetItem(self->args, 0);
+ if (tmp) {
+ out = PyObject_Str(tmp);
+ Py_DECREF(tmp);
+ }
+ else
+ out = NULL;
+ break;
+ }
+ case -1:
+ PyErr_Clear();
+ /* Fall through */
+ default:
+ out = PyObject_Str(self->args);
+ break;
+ }
+
+ return out;
+}
+
+static PyObject *
+BaseException_repr(PyBaseExceptionObject *self)
+{
+ Py_ssize_t args_len;
+ PyObject *repr_suffix;
+ PyObject *repr;
+ char *name;
+ char *dot;
+
+ args_len = PySequence_Length(self->args);
+ if (args_len < 0) {
+ return NULL;
+ }
+
+ if (args_len == 0) {
+ repr_suffix = PyString_FromString("()");
+ if (!repr_suffix)
+ return NULL;
+ }
+ else {
+ PyObject *args_repr = PyObject_Repr(self->args);
+ if (!args_repr)
+ return NULL;
+ repr_suffix = args_repr;
+ }
+
+ name = (char *)self->ob_type->tp_name;
+ dot = strrchr(name, '.');
+ if (dot != NULL) name = dot+1;
+
+ repr = PyString_FromString(name);
+ if (!repr) {
+ Py_DECREF(repr_suffix);
+ return NULL;
+ }
+
+ PyString_ConcatAndDel(&repr, repr_suffix);
+ return repr;
+}
+
+/* Pickling support */
+static PyObject *
+BaseException_reduce(PyBaseExceptionObject *self)
+{
+ return PyTuple_Pack(3, self->ob_type, self->args, self->dict);
+}
+
+
+#ifdef Py_USING_UNICODE
+/* while this method generates fairly uninspired output, it a least
+ * guarantees that we can display exceptions that have unicode attributes
+ */
+static PyObject *
+BaseException_unicode(PyBaseExceptionObject *self)
+{
+ if (PySequence_Length(self->args) == 0)
+ return PyUnicode_FromUnicode(NULL, 0);
+ if (PySequence_Length(self->args) == 1) {
+ PyObject *temp = PySequence_GetItem(self->args, 0);
+ PyObject *unicode_obj;
+ if (!temp) {
+ return NULL;
+ }
+ unicode_obj = PyObject_Unicode(temp);
+ Py_DECREF(temp);
+ return unicode_obj;
+ }
+ return PyObject_Unicode(self->args);
+}
+#endif /* Py_USING_UNICODE */
+
+static PyMethodDef BaseException_methods[] = {
+ {"__reduce__", (PyCFunction)BaseException_reduce, METH_NOARGS },
+#ifdef Py_USING_UNICODE
+ {"__unicode__", (PyCFunction)BaseException_unicode, METH_NOARGS },
+#endif
+ {NULL, NULL, 0, NULL},
+};
+
+
+
+static PyObject *
+BaseException_getitem(PyBaseExceptionObject *self, Py_ssize_t index)
+{
+ return PySequence_GetItem(self->args, index);
+}
+
+static PySequenceMethods BaseException_as_sequence = {
+ 0, /* sq_length; */
+ 0, /* sq_concat; */
+ 0, /* sq_repeat; */
+ (ssizeargfunc)BaseException_getitem, /* sq_item; */
+ 0, /* sq_slice; */
+ 0, /* sq_ass_item; */
+ 0, /* sq_ass_slice; */
+ 0, /* sq_contains; */
+ 0, /* sq_inplace_concat; */
+ 0 /* sq_inplace_repeat; */
+};
+
+static PyMemberDef BaseException_members[] = {
+ {"message", T_OBJECT, offsetof(PyBaseExceptionObject, message), 0,
+ PyDoc_STR("exception message")},
+ {NULL} /* Sentinel */
+};
+
+
+static PyObject *
+BaseException_get_dict(PyBaseExceptionObject *self)
+{
+ if (self->dict == NULL) {
+ self->dict = PyDict_New();
+ if (!self->dict)
+ return NULL;
+ }
+ Py_INCREF(self->dict);
+ return self->dict;
+}
+
+static int
+BaseException_set_dict(PyBaseExceptionObject *self, PyObject *val)
+{
+ if (val == NULL) {
+ PyErr_SetString(PyExc_TypeError, "__dict__ may not be deleted");
+ return -1;
+ }
+ if (!PyDict_Check(val)) {
+ PyErr_SetString(PyExc_TypeError, "__dict__ must be a dictionary");
+ return -1;
+ }
+ Py_CLEAR(self->dict);
+ Py_INCREF(val);
+ self->dict = val;
+ return 0;
+}
+
+static PyObject *
+BaseException_get_args(PyBaseExceptionObject *self)
+{
+ if (self->args == NULL) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+ Py_INCREF(self->args);
+ return self->args;
+}
+
+static int
+BaseException_set_args(PyBaseExceptionObject *self, PyObject *val)
+{
+ PyObject *seq;
+ if (val == NULL) {
+ PyErr_SetString(PyExc_TypeError, "args may not be deleted");
+ return -1;
+ }
+ seq = PySequence_Tuple(val);
+ if (!seq) return -1;
+ self->args = seq;
+ return 0;
+}
+
+static PyGetSetDef BaseException_getset[] = {
+ {"__dict__", (getter)BaseException_get_dict, (setter)BaseException_set_dict},
+ {"args", (getter)BaseException_get_args, (setter)BaseException_set_args},
+ {NULL},
+};
+
+
+static PyTypeObject _PyExc_BaseException = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ EXC_MODULE_NAME "BaseException", /*tp_name*/
+ sizeof(PyBaseExceptionObject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ (destructor)BaseException_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /* tp_compare; */
+ (reprfunc)BaseException_repr, /*tp_repr*/
+ 0, /*tp_as_number*/
+ &BaseException_as_sequence, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash */
+ 0, /*tp_call*/
+ (reprfunc)BaseException_str, /*tp_str*/
+ PyObject_GenericGetAttr, /*tp_getattro*/
+ PyObject_GenericSetAttr, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+ PyDoc_STR("Common base class for all exceptions"), /* tp_doc */
+ (traverseproc)BaseException_traverse, /* tp_traverse */
+ (inquiry)BaseException_clear, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ BaseException_methods, /* tp_methods */
+ BaseException_members, /* tp_members */
+ BaseException_getset, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ offsetof(PyBaseExceptionObject, dict), /* tp_dictoffset */
+ (initproc)BaseException_init, /* tp_init */
+ 0, /* tp_alloc */
+ BaseException_new, /* tp_new */
+};
+/* the CPython API expects exceptions to be (PyObject *) - both a hold-over
+from the previous implmentation and also allowing Python objects to be used
+in the API */
+PyObject *PyExc_BaseException = (PyObject *)&_PyExc_BaseException;
+
+/* note these macros omit the last semicolon so the macro invocation may
+ * include it and not look strange.
+ */
+#define SimpleExtendsException(EXCBASE, EXCNAME, EXCDOC) \
+static PyTypeObject _PyExc_ ## EXCNAME = { \
+ PyObject_HEAD_INIT(NULL) \
+ 0, \
+ EXC_MODULE_NAME # EXCNAME, \
+ sizeof(PyBaseExceptionObject), \
+ 0, (destructor)BaseException_dealloc, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, \
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, \
+ PyDoc_STR(EXCDOC), (traverseproc)BaseException_traverse, \
+ (inquiry)BaseException_clear, 0, 0, 0, 0, 0, 0, 0, &_ ## EXCBASE, \
+ 0, 0, 0, offsetof(PyBaseExceptionObject, dict), \
+ (initproc)BaseException_init, 0, BaseException_new,\
+}; \
+PyObject *PyExc_ ## EXCNAME = (PyObject *)&_PyExc_ ## EXCNAME
+
+#define MiddlingExtendsException(EXCBASE, EXCNAME, EXCSTORE, EXCDOC) \
+static PyTypeObject _PyExc_ ## EXCNAME = { \
+ PyObject_HEAD_INIT(NULL) \
+ 0, \
+ EXC_MODULE_NAME # EXCNAME, \
+ sizeof(Py ## EXCSTORE ## Object), \
+ 0, (destructor)BaseException_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, \
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, \
+ PyDoc_STR(EXCDOC), (traverseproc)BaseException_traverse, \
+ (inquiry)BaseException_clear, 0, 0, 0, 0, 0, 0, 0, &_ ## EXCBASE, \
+ 0, 0, 0, offsetof(Py ## EXCSTORE ## Object, dict), \
+ (initproc)EXCSTORE ## _init, 0, EXCSTORE ## _new,\
+}; \
+PyObject *PyExc_ ## EXCNAME = (PyObject *)&_PyExc_ ## EXCNAME
+
+#define ComplexExtendsException(EXCBASE, EXCNAME, EXCSTORE, EXCDEALLOC, EXCMETHODS, EXCMEMBERS, EXCSTR, EXCDOC) \
+static PyTypeObject _PyExc_ ## EXCNAME = { \
+ PyObject_HEAD_INIT(NULL) \
+ 0, \
+ EXC_MODULE_NAME # EXCNAME, \
+ sizeof(Py ## EXCSTORE ## Object), 0, \
+ (destructor)EXCSTORE ## _dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ (reprfunc)EXCSTR, 0, 0, 0, \
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, \
+ PyDoc_STR(EXCDOC), (traverseproc)EXCSTORE ## _traverse, \
+ (inquiry)EXCSTORE ## _clear, 0, 0, 0, 0, EXCMETHODS, \
+ EXCMEMBERS, 0, &_ ## EXCBASE, \
+ 0, 0, 0, offsetof(Py ## EXCSTORE ## Object, dict), \
+ (initproc)EXCSTORE ## _init, 0, EXCSTORE ## _new,\
+}; \
+PyObject *PyExc_ ## EXCNAME = (PyObject *)&_PyExc_ ## EXCNAME
+
+
+/*
+ * Exception extends BaseException
+ */
+SimpleExtendsException(PyExc_BaseException, Exception,
+ "Common base class for all non-exit exceptions.");
+
+
+/*
+ * StandardError extends Exception
+ */
+SimpleExtendsException(PyExc_Exception, StandardError,
+ "Base class for all standard Python exceptions that do not represent\n"
+ "interpreter exiting.");
+
+
+/*
+ * TypeError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, TypeError,
+ "Inappropriate argument type.");
+
+
+/*
+ * StopIteration extends Exception
+ */
+SimpleExtendsException(PyExc_Exception, StopIteration,
+ "Signal the end from iterator.next().");
+
+
+/*
+ * GeneratorExit extends Exception
+ */
+SimpleExtendsException(PyExc_Exception, GeneratorExit,
+ "Request that a generator exit.");
+
+
+/*
+ * SystemExit extends BaseException
+ */
+static PyObject *
+SystemExit_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PySystemExitObject *self;
+
+ self = (PySystemExitObject *)BaseException_new(type, args, kwds);
+ if (!self)
+ return NULL;
+
+ MAKE_IT_NONE(self->code);
+
+ return (PyObject *)self;
+}
+
+static int
+SystemExit_init(PySystemExitObject *self, PyObject *args, PyObject *kwds)
+{
+ Py_ssize_t size = PyTuple_GET_SIZE(args);
+
+ if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+ return -1;
+
+ Py_DECREF(self->code);
+ if (size == 1)
+ self->code = PyTuple_GET_ITEM(args, 0);
+ else if (size > 1)
+ self->code = args;
+ Py_INCREF(self->code);
+ return 0;
+}
+
+int
+SystemExit_clear(PySystemExitObject *self)
+{
+ Py_CLEAR(self->code);
+ return BaseException_clear((PyBaseExceptionObject *)self);
+}
+
+static void
+SystemExit_dealloc(PySystemExitObject *self)
+{
+ SystemExit_clear(self);
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+int
+SystemExit_traverse(PySystemExitObject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->code);
+ return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
+}
+
+static PyMemberDef SystemExit_members[] = {
+ {"message", T_OBJECT, offsetof(PySystemExitObject, message), 0,
+ PyDoc_STR("exception message")},
+ {"code", T_OBJECT, offsetof(PySystemExitObject, code), 0,
+ PyDoc_STR("exception code")},
+ {NULL} /* Sentinel */
+};
+
+ComplexExtendsException(PyExc_BaseException, SystemExit, SystemExit,
+ SystemExit_dealloc, 0, SystemExit_members, 0,
+ "Request to exit from the interpreter.");
+
+/*
+ * KeyboardInterrupt extends BaseException
+ */
+SimpleExtendsException(PyExc_BaseException, KeyboardInterrupt,
+ "Program interrupted by user.");
+
+
+/*
+ * ImportError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, ImportError,
+ "Import can't find module, or can't find name in module.");
+
+
+/*
+ * EnvironmentError extends StandardError
+ */
+
+static PyObject *
+EnvironmentError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyEnvironmentErrorObject *self = NULL;
+
+ self = (PyEnvironmentErrorObject *)BaseException_new(type, args, kwds);
+ if (!self)
+ return NULL;
+
+ self->myerrno = Py_None;
+ Py_INCREF(Py_None);
+ self->strerror = Py_None;
+ Py_INCREF(Py_None);
+ self->filename = Py_None;
+ Py_INCREF(Py_None);
+
+ return (PyObject *)self;
+}
+
+/* Where a function has a single filename, such as open() or some
+ * of the os module functions, PyErr_SetFromErrnoWithFilename() is
+ * called, giving a third argument which is the filename. But, so
+ * that old code using in-place unpacking doesn't break, e.g.:
+ *
+ * except IOError, (errno, strerror):
+ *
+ * we hack args so that it only contains two items. This also
+ * means we need our own __str__() which prints out the filename
+ * when it was supplied.
+ */
+static int
+EnvironmentError_init(PyEnvironmentErrorObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ PyObject *myerrno = NULL, *strerror = NULL, *filename = NULL;
+ PyObject *subslice = NULL;
+
+ if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+ return -1;
+
+ if (PyTuple_GET_SIZE(args) <= 1) {
+ return 0;
+ }
+
+ if (!PyArg_UnpackTuple(args, "EnvironmentError", 2, 3,
+ &myerrno, &strerror, &filename)) {
+ return -1;
+ }
+ Py_DECREF(self->myerrno); /* replacing */
+ self->myerrno = myerrno;
+ Py_INCREF(self->myerrno);
+
+ Py_DECREF(self->strerror); /* replacing */
+ self->strerror = strerror;
+ Py_INCREF(self->strerror);
+
+ /* self->filename will remain Py_None otherwise */
+ if (filename != NULL) {
+ Py_DECREF(self->filename); /* replacing */
+ self->filename = filename;
+ Py_INCREF(self->filename);
+
+ subslice = PyTuple_GetSlice(args, 0, 2);
+ if (!subslice)
+ return -1;
+
+ Py_DECREF(self->args); /* replacing args */
+ self->args = subslice;
+ }
+ return 0;
+}
+
+int
+EnvironmentError_clear(PyEnvironmentErrorObject *self)
+{
+ Py_CLEAR(self->myerrno);
+ Py_CLEAR(self->strerror);
+ Py_CLEAR(self->filename);
+ return BaseException_clear((PyBaseExceptionObject *)self);
+}
+
+static void
+EnvironmentError_dealloc(PyEnvironmentErrorObject *self)
+{
+ EnvironmentError_clear(self);
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+int
+EnvironmentError_traverse(PyEnvironmentErrorObject *self, visitproc visit,
+ void *arg)
+{
+ Py_VISIT(self->myerrno);
+ Py_VISIT(self->strerror);
+ Py_VISIT(self->filename);
+ return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
+}
+
+static PyObject *
+EnvironmentError_str(PyEnvironmentErrorObject *self)
+{
+ PyObject *rtnval = NULL;
+
+ if (self->filename != Py_None) {
+ PyObject *fmt = PyString_FromString("[Errno %s] %s: %s");
+ PyObject *repr = PyObject_Repr(self->filename);
+ PyObject *tuple = PyTuple_New(3);
+
+ if (!fmt || !repr || !tuple) {
+ Py_XDECREF(fmt);
+ Py_XDECREF(repr);
+ Py_XDECREF(tuple);
+ return NULL;
+ }
+ Py_INCREF(self->myerrno);
+ PyTuple_SET_ITEM(tuple, 0, self->myerrno);
+ Py_INCREF(self->strerror);
+ PyTuple_SET_ITEM(tuple, 1, self->strerror);
+ Py_INCREF(repr);
+ PyTuple_SET_ITEM(tuple, 2, repr);
+
+ rtnval = PyString_Format(fmt, tuple);
+
+ Py_DECREF(fmt);
+ Py_DECREF(tuple);
+ }
+ else if (PyObject_IsTrue(self->myerrno) &&
+ PyObject_IsTrue(self->strerror)) {
+ PyObject *fmt = PyString_FromString("[Errno %s] %s");
+ PyObject *tuple = PyTuple_New(2);
+
+ if (!fmt || !tuple) {
+ Py_XDECREF(fmt);
+ Py_XDECREF(tuple);
+ return NULL;
+ }
+ Py_INCREF(self->myerrno);
+ PyTuple_SET_ITEM(tuple, 0, self->myerrno);
+ Py_INCREF(self->strerror);
+ PyTuple_SET_ITEM(tuple, 1, self->strerror);
+
+ rtnval = PyString_Format(fmt, tuple);
+
+ Py_DECREF(fmt);
+ Py_DECREF(tuple);
+ }
+ else
+ rtnval = BaseException_str((PyBaseExceptionObject *)self);
+
+ return rtnval;
+}
+
+static PyMemberDef EnvironmentError_members[] = {
+ {"message", T_OBJECT, offsetof(PyEnvironmentErrorObject, message), 0,
+ PyDoc_STR("exception message")},
+ {"errno", T_OBJECT, offsetof(PyEnvironmentErrorObject, myerrno), 0,
+ PyDoc_STR("exception errno")},
+ {"strerror", T_OBJECT, offsetof(PyEnvironmentErrorObject, strerror), 0,
+ PyDoc_STR("exception strerror")},
+ {"filename", T_OBJECT, offsetof(PyEnvironmentErrorObject, filename), 0,
+ PyDoc_STR("exception filename")},
+ {NULL} /* Sentinel */
+};
+
+
+static PyObject *
+EnvironmentError_reduce(PyEnvironmentErrorObject *self)
+{
+ PyObject *args = self->args;
+ PyObject *res = NULL, *tmp;
+ /* self->args is only the first two real arguments if there was a
+ * file name given to EnvironmentError. */
+ if (PyTuple_Check(args) &&
+ PyTuple_GET_SIZE(args) == 2 &&
+ self->filename != Py_None) {
+
+ args = PyTuple_New(3);
+ if (!args) return NULL;
+
+ tmp = PyTuple_GetItem(self->args, 0);
+ if (!tmp) goto finish;
+ Py_INCREF(tmp);
+ PyTuple_SET_ITEM(args, 0, tmp);
+
+ tmp = PyTuple_GetItem(self->args, 1);
+ if (!tmp) goto finish;
+ Py_INCREF(tmp);
+ PyTuple_SET_ITEM(args, 1, tmp);
+
+ Py_INCREF(self->filename);
+ PyTuple_SET_ITEM(args, 2, self->filename);
+ } else {
+ Py_INCREF(args);
+ }
+ res = PyTuple_Pack(3, self->ob_type, args, self->dict);
+ finish:
+ Py_DECREF(args);
+ return res;
+}
+
+
+static PyMethodDef EnvironmentError_methods[] = {
+ {"__reduce__", (PyCFunction)EnvironmentError_reduce, METH_NOARGS},
+ {NULL}
+};
+
+ComplexExtendsException(PyExc_StandardError, EnvironmentError,
+ EnvironmentError, EnvironmentError_dealloc,
+ EnvironmentError_methods, EnvironmentError_members,
+ EnvironmentError_str,
+ "Base class for I/O related errors.");
+
+
+/*
+ * IOError extends EnvironmentError
+ */
+MiddlingExtendsException(PyExc_EnvironmentError, IOError,
+ EnvironmentError, "I/O operation failed.");
+
+
+/*
+ * OSError extends EnvironmentError
+ */
+MiddlingExtendsException(PyExc_EnvironmentError, OSError,
+ EnvironmentError, "OS system call failed.");
+
+
+/*
+ * WindowsError extends OSError
+ */
+#ifdef MS_WINDOWS
+#include "errmap.h"
+
+int
+WindowsError_clear(PyWindowsErrorObject *self)
+{
+ Py_CLEAR(self->myerrno);
+ Py_CLEAR(self->strerror);
+ Py_CLEAR(self->filename);
+ Py_CLEAR(self->winerror);
+ return BaseException_clear((PyBaseExceptionObject *)self);
+}
+
+static void
+WindowsError_dealloc(PyWindowsErrorObject *self)
+{
+ WindowsError_clear(self);
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+int
+WindowsError_traverse(PyWindowsErrorObject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->myerrno);
+ Py_VISIT(self->strerror);
+ Py_VISIT(self->filename);
+ Py_VISIT(self->winerror);
+ return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
+}
+
+static PyObject *
+WindowsError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyObject *o_errcode = NULL;
+ long errcode;
+ PyWindowsErrorObject *self;
+ long posix_errno;
+
+ self = (PyWindowsErrorObject *)EnvironmentError_new(type, args, kwds);
+ if (!self)
+ return NULL;
+
+ if (self->myerrno == Py_None) {
+ self->winerror = self->myerrno;
+ Py_INCREF(self->winerror);
+ return (PyObject *)self;
+ }
+
+ /* Set errno to the POSIX errno, and winerror to the Win32
+ error code. */
+ errcode = PyInt_AsLong(self->myerrno);
+ if (errcode == -1 && PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
+ /* give a clearer error message */
+ PyErr_SetString(PyExc_TypeError, "errno has to be an integer");
+ goto failed;
+ }
+ posix_errno = winerror_to_errno(errcode);
+
+ self->winerror = self->myerrno;
+
+ o_errcode = PyInt_FromLong(posix_errno);
+ if (!o_errcode)
+ goto failed;
+
+ self->myerrno = o_errcode;
+
+ return (PyObject *)self;
+failed:
+ /* Could not set errno. */
+ Py_DECREF(self);
+ return NULL;
+}
+
+static int
+WindowsError_init(PyWindowsErrorObject *self, PyObject *args, PyObject *kwds)
+{
+ PyObject *o_errcode = NULL;
+ long errcode;
+ long posix_errno;
+
+ if (EnvironmentError_init((PyEnvironmentErrorObject *)self, args, kwds)
+ == -1)
+ return -1;
+
+ if (self->myerrno == Py_None) {
+ Py_DECREF(self->winerror);
+ self->winerror = self->myerrno;
+ Py_INCREF(self->winerror);
+ return 0;
+ }
+
+ /* Set errno to the POSIX errno, and winerror to the Win32
+ error code. */
+ errcode = PyInt_AsLong(self->myerrno);
+ if (errcode == -1 && PyErr_Occurred())
+ return -1;
+ posix_errno = winerror_to_errno(errcode);
+
+ Py_DECREF(self->winerror);
+ self->winerror = self->myerrno;
+
+ o_errcode = PyInt_FromLong(posix_errno);
+ if (!o_errcode)
+ return -1;
+
+ self->myerrno = o_errcode;
+
+ return 0;
+}
+
+
+static PyObject *
+WindowsError_str(PyWindowsErrorObject *self)
+{
+ PyObject *repr = NULL;
+ PyObject *fmt = NULL;
+ PyObject *tuple = NULL;
+ PyObject *rtnval = NULL;
+
+ if (self->filename != Py_None) {
+ fmt = PyString_FromString("[Error %s] %s: %s");
+ repr = PyObject_Repr(self->filename);
+ if (!fmt || !repr)
+ goto finally;
+
+ tuple = PyTuple_Pack(3, self->myerrno, self->strerror, repr);
+ if (!tuple)
+ goto finally;
+
+ rtnval = PyString_Format(fmt, tuple);
+ Py_DECREF(tuple);
+ }
+ else if (PyObject_IsTrue(self->myerrno) &&
+ PyObject_IsTrue(self->strerror)) {
+ fmt = PyString_FromString("[Error %s] %s");
+ if (!fmt)
+ goto finally;
+
+ tuple = PyTuple_Pack(2, self->myerrno, self->strerror);
+ if (!tuple)
+ goto finally;
+
+ rtnval = PyString_Format(fmt, tuple);
+ Py_DECREF(tuple);
+ }
+ else
+ rtnval = EnvironmentError_str((PyEnvironmentErrorObject *)self);
+
+ finally:
+ Py_XDECREF(repr);
+ Py_XDECREF(fmt);
+ Py_XDECREF(tuple);
+ return rtnval;
+}
+
+static PyMemberDef WindowsError_members[] = {
+ {"message", T_OBJECT, offsetof(PyWindowsErrorObject, message), 0,
+ PyDoc_STR("exception message")},
+ {"errno", T_OBJECT, offsetof(PyWindowsErrorObject, myerrno), 0,
+ PyDoc_STR("POSIX exception code")},
+ {"strerror", T_OBJECT, offsetof(PyWindowsErrorObject, strerror), 0,
+ PyDoc_STR("exception strerror")},
+ {"filename", T_OBJECT, offsetof(PyWindowsErrorObject, filename), 0,
+ PyDoc_STR("exception filename")},
+ {"winerror", T_OBJECT, offsetof(PyWindowsErrorObject, winerror), 0,
+ PyDoc_STR("Win32 exception code")},
+ {NULL} /* Sentinel */
+};
+
+ComplexExtendsException(PyExc_OSError, WindowsError, WindowsError,
+ WindowsError_dealloc, 0, WindowsError_members,
+ WindowsError_str, "MS-Windows OS system call failed.");
+
+#endif /* MS_WINDOWS */
+
+
+/*
+ * VMSError extends OSError (I think)
+ */
+#ifdef __VMS
+MiddlingExtendsException(PyExc_OSError, VMSError, EnvironmentError,
+ "OpenVMS OS system call failed.");
+#endif
+
+
+/*
+ * EOFError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, EOFError,
+ "Read beyond end of file.");
+
+
+/*
+ * RuntimeError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, RuntimeError,
+ "Unspecified run-time error.");
+
+
+/*
+ * NotImplementedError extends RuntimeError
+ */
+SimpleExtendsException(PyExc_RuntimeError, NotImplementedError,
+ "Method or function hasn't been implemented yet.");
+
+/*
+ * NameError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, NameError,
+ "Name not found globally.");
+
+/*
+ * UnboundLocalError extends NameError
+ */
+SimpleExtendsException(PyExc_NameError, UnboundLocalError,
+ "Local name referenced but not bound to a value.");
+
+/*
+ * AttributeError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, AttributeError,
+ "Attribute not found.");
+
+
+/*
+ * SyntaxError extends StandardError
+ */
+static PyObject *
+SyntaxError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PySyntaxErrorObject *self = NULL;
+
+ self = (PySyntaxErrorObject *)BaseException_new(type, args, kwds);
+ if (!self)
+ return NULL;
+
+ MAKE_IT_NONE(self->msg)
+ MAKE_IT_NONE(self->filename)
+ MAKE_IT_NONE(self->lineno)
+ MAKE_IT_NONE(self->offset)
+ MAKE_IT_NONE(self->text)
+
+ /* this is always None - yes, I know it doesn't seem to be used
+ anywhere, but it was in the previous implementation */
+ MAKE_IT_NONE(self->print_file_and_line)
+
+ return (PyObject *)self;
+}
+
+static int
+SyntaxError_init(PySyntaxErrorObject *self, PyObject *args, PyObject *kwds)
+{
+ PyObject *info = NULL;
+ Py_ssize_t lenargs = PyTuple_GET_SIZE(args);
+
+ if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+ return -1;
+
+ if (lenargs >= 1) {
+ Py_DECREF(self->msg);
+ self->msg = PyTuple_GET_ITEM(args, 0);
+ Py_INCREF(self->msg);
+ }
+ if (lenargs == 2) {
+ info = PyTuple_GET_ITEM(args, 1);
+ info = PySequence_Tuple(info);
+ if (!info) return -1;
+
+ Py_DECREF(self->filename);
+ self->filename = PyTuple_GET_ITEM(info, 0);
+ Py_INCREF(self->filename);
+
+ Py_DECREF(self->lineno);
+ self->lineno = PyTuple_GET_ITEM(info, 1);
+ Py_INCREF(self->lineno);
+
+ Py_DECREF(self->offset);
+ self->offset = PyTuple_GET_ITEM(info, 2);
+ Py_INCREF(self->offset);
+
+ Py_DECREF(self->text);
+ self->text = PyTuple_GET_ITEM(info, 3);
+ Py_INCREF(self->text);
+ }
+ return 0;
+}
+
+int
+SyntaxError_clear(PySyntaxErrorObject *self)
+{
+ Py_CLEAR(self->msg);
+ Py_CLEAR(self->filename);
+ Py_CLEAR(self->lineno);
+ Py_CLEAR(self->offset);
+ Py_CLEAR(self->text);
+ Py_CLEAR(self->print_file_and_line);
+ return BaseException_clear((PyBaseExceptionObject *)self);
+}
+
+static void
+SyntaxError_dealloc(PySyntaxErrorObject *self)
+{
+ SyntaxError_clear(self);
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+int
+SyntaxError_traverse(PySyntaxErrorObject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->msg);
+ Py_VISIT(self->filename);
+ Py_VISIT(self->lineno);
+ Py_VISIT(self->offset);
+ Py_VISIT(self->text);
+ Py_VISIT(self->print_file_and_line);
+ return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
+}
+
+/* This is called "my_basename" instead of just "basename" to avoid name
+ conflicts with glibc; basename is already prototyped if _GNU_SOURCE is
+ defined, and Python does define that. */
+static char *
+my_basename(char *name)
+{
+ char *cp = name;
+ char *result = name;
+
+ if (name == NULL)
+ return "???";
+ while (*cp != '\0') {
+ if (*cp == SEP)
+ result = cp + 1;
+ ++cp;
+ }
+ return result;
+}
+
+
+static PyObject *
+SyntaxError_str(PySyntaxErrorObject *self)
+{
+ PyObject *str;
+ PyObject *result;
+
+ str = PyObject_Str(self->msg);
+ result = str;
+
+ /* XXX -- do all the additional formatting with filename and
+ lineno here */
+
+ if (str != NULL && PyString_Check(str)) {
+ int have_filename = 0;
+ int have_lineno = 0;
+ char *buffer = NULL;
+
+ have_filename = (self->filename != NULL) &&
+ PyString_Check(self->filename);
+ have_lineno = (self->lineno != NULL) && PyInt_Check(self->lineno);
+
+ if (have_filename || have_lineno) {
+ Py_ssize_t bufsize = PyString_GET_SIZE(str) + 64;
+ if (have_filename)
+ bufsize += PyString_GET_SIZE(self->filename);
+
+ buffer = (char *)PyMem_MALLOC(bufsize);
+ if (buffer != NULL) {
+ if (have_filename && have_lineno)
+ PyOS_snprintf(buffer, bufsize, "%s (%s, line %ld)",
+ PyString_AS_STRING(str),
+ my_basename(PyString_AS_STRING(self->filename)),
+ PyInt_AsLong(self->lineno));
+ else if (have_filename)
+ PyOS_snprintf(buffer, bufsize, "%s (%s)",
+ PyString_AS_STRING(str),
+ my_basename(PyString_AS_STRING(self->filename)));
+ else if (have_lineno)
+ PyOS_snprintf(buffer, bufsize, "%s (line %ld)",
+ PyString_AS_STRING(str),
+ PyInt_AsLong(self->lineno));
+
+ result = PyString_FromString(buffer);
+ PyMem_FREE(buffer);
+
+ if (result == NULL)
+ result = str;
+ else
+ Py_DECREF(str);
+ }
+ }
+ }
+ return result;
+}
+
+static PyMemberDef SyntaxError_members[] = {
+ {"message", T_OBJECT, offsetof(PySyntaxErrorObject, message), 0,
+ PyDoc_STR("exception message")},
+ {"msg", T_OBJECT, offsetof(PySyntaxErrorObject, msg), 0,
+ PyDoc_STR("exception msg")},
+ {"filename", T_OBJECT, offsetof(PySyntaxErrorObject, filename), 0,
+ PyDoc_STR("exception filename")},
+ {"lineno", T_OBJECT, offsetof(PySyntaxErrorObject, lineno), 0,
+ PyDoc_STR("exception lineno")},
+ {"offset", T_OBJECT, offsetof(PySyntaxErrorObject, offset), 0,
+ PyDoc_STR("exception offset")},
+ {"text", T_OBJECT, offsetof(PySyntaxErrorObject, text), 0,
+ PyDoc_STR("exception text")},
+ {"print_file_and_line", T_OBJECT,
+ offsetof(PySyntaxErrorObject, print_file_and_line), 0,
+ PyDoc_STR("exception print_file_and_line")},
+ {NULL} /* Sentinel */
+};
+
+ComplexExtendsException(PyExc_StandardError, SyntaxError, SyntaxError,
+ SyntaxError_dealloc, 0, SyntaxError_members,
+ SyntaxError_str, "Invalid syntax.");
+
+
+/*
+ * IndentationError extends SyntaxError
+ */
+MiddlingExtendsException(PyExc_SyntaxError, IndentationError, SyntaxError,
+ "Improper indentation.");
+
+
+/*
+ * TabError extends IndentationError
+ */
+MiddlingExtendsException(PyExc_IndentationError, TabError, SyntaxError,
+ "Improper mixture of spaces and tabs.");
+
+
+/*
+ * LookupError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, LookupError,
+ "Base class for lookup errors.");
+
+
+/*
+ * IndexError extends LookupError
+ */
+SimpleExtendsException(PyExc_LookupError, IndexError,
+ "Sequence index out of range.");
+
+
+/*
+ * KeyError extends LookupError
+ */
+static PyObject *
+KeyError_str(PyBaseExceptionObject *self)
+{
+ /* If args is a tuple of exactly one item, apply repr to args[0].
+ This is done so that e.g. the exception raised by {}[''] prints
+ KeyError: ''
+ rather than the confusing
+ KeyError
+ alone. The downside is that if KeyError is raised with an explanatory
+ string, that string will be displayed in quotes. Too bad.
+ If args is anything else, use the default BaseException__str__().
+ */
+ if (PyTuple_Check(self->args) && PyTuple_GET_SIZE(self->args) == 1) {
+ PyObject *key = PyTuple_GET_ITEM(self->args, 0);
+ return PyObject_Repr(key);
+ }
+ return BaseException_str(self);
+}
+
+ComplexExtendsException(PyExc_LookupError, KeyError, BaseException,
+ 0, 0, 0, KeyError_str, "Mapping key not found.");
+
+
+/*
+ * ValueError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, ValueError,
+ "Inappropriate argument value (of correct type).");
+
+/*
+ * UnicodeError extends ValueError
+ */
+
+SimpleExtendsException(PyExc_ValueError, UnicodeError,
+ "Unicode related error.");
+
+#ifdef Py_USING_UNICODE
+static int
+get_int(PyObject *attr, Py_ssize_t *value, const char *name)
+{
+ if (!attr) {
+ PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
+ return -1;
+ }
+
+ if (PyInt_Check(attr)) {
+ *value = PyInt_AS_LONG(attr);
+ } else if (PyLong_Check(attr)) {
+ *value = _PyLong_AsSsize_t(attr);
+ if (*value == -1 && PyErr_Occurred())
+ return -1;
+ } else {
+ PyErr_Format(PyExc_TypeError, "%.200s attribute must be int", name);
+ return -1;
+ }
+ return 0;
+}
+
+static int
+set_ssize_t(PyObject **attr, Py_ssize_t value)
+{
+ PyObject *obj = PyInt_FromSsize_t(value);
+ if (!obj)
+ return -1;
+ Py_XDECREF(*attr);
+ *attr = obj;
+ return 0;
+}
+
+static PyObject *
+get_string(PyObject *attr, const char *name)
+{
+ if (!attr) {
+ PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
+ return NULL;
+ }
+
+ if (!PyString_Check(attr)) {
+ PyErr_Format(PyExc_TypeError, "%.200s attribute must be str", name);
+ return NULL;
+ }
+ Py_INCREF(attr);
+ return attr;
+}
+
+
+static int
+set_string(PyObject **attr, const char *value)
+{
+ PyObject *obj = PyString_FromString(value);
+ if (!obj)
+ return -1;
+ Py_XDECREF(*attr);
+ *attr = obj;
+ return 0;
+}
+
+
+static PyObject *
+get_unicode(PyObject *attr, const char *name)
+{
+ if (!attr) {
+ PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
+ return NULL;
+ }
+
+ if (!PyUnicode_Check(attr)) {
+ PyErr_Format(PyExc_TypeError,
+ "%.200s attribute must be unicode", name);
+ return NULL;
+ }
+ Py_INCREF(attr);
+ return attr;
+}
+
+PyObject *
+PyUnicodeEncodeError_GetEncoding(PyObject *exc)
+{
+ return get_string(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
+}
+
+PyObject *
+PyUnicodeDecodeError_GetEncoding(PyObject *exc)
+{
+ return get_string(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
+}
+
+PyObject *
+PyUnicodeEncodeError_GetObject(PyObject *exc)
+{
+ return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object");
+}
+
+PyObject *
+PyUnicodeDecodeError_GetObject(PyObject *exc)
+{
+ return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
+}
+
+PyObject *
+PyUnicodeTranslateError_GetObject(PyObject *exc)
+{
+ return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object");
+}
+
+int
+PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
+{
+ if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
+ Py_ssize_t size;
+ PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
+ "object");
+ if (!obj) return -1;
+ size = PyUnicode_GET_SIZE(obj);
+ if (*start<0)
+ *start = 0; /*XXX check for values <0*/
+ if (*start>=size)
+ *start = size-1;
+ return 0;
+ }
+ return -1;
+}
+
+
+int
+PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
+{
+ if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
+ Py_ssize_t size;
+ PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
+ "object");
+ if (!obj) return -1;
+ size = PyString_GET_SIZE(obj);
+ if (*start<0)
+ *start = 0;
+ if (*start>=size)
+ *start = size-1;
+ return 0;
+ }
+ return -1;
+}
+
+
+int
+PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
+{
+ return PyUnicodeEncodeError_GetStart(exc, start);
+}
+
+
+int
+PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
+{
+ return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start);
+}
+
+
+int
+PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
+{
+ return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start);
+}
+
+
+int
+PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
+{
+ return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start);
+}
+
+
+int
+PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
+{
+ if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
+ Py_ssize_t size;
+ PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
+ "object");
+ if (!obj) return -1;
+ size = PyUnicode_GET_SIZE(obj);
+ if (*end<1)
+ *end = 1;
+ if (*end>size)
+ *end = size;
+ return 0;
+ }
+ return -1;
+}
+
+
+int
+PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
+{
+ if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
+ Py_ssize_t size;
+ PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
+ "object");
+ if (!obj) return -1;
+ size = PyString_GET_SIZE(obj);
+ if (*end<1)
+ *end = 1;
+ if (*end>size)
+ *end = size;
+ return 0;
+ }
+ return -1;
+}
+
+
+int
+PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *start)
+{
+ return PyUnicodeEncodeError_GetEnd(exc, start);
+}
+
+
+int
+PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
+{
+ return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end);
+}
+
+
+int
+PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
+{
+ return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end);
+}
+
+
+int
+PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
+{
+ return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end);
+}
+
+PyObject *
+PyUnicodeEncodeError_GetReason(PyObject *exc)
+{
+ return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
+}
+
+
+PyObject *
+PyUnicodeDecodeError_GetReason(PyObject *exc)
+{
+ return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
+}
+
+
+PyObject *
+PyUnicodeTranslateError_GetReason(PyObject *exc)
+{
+ return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
+}
+
+
+int
+PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason)
+{
+ return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
+}
+
+
+int
+PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason)
+{
+ return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
+}
+
+
+int
+PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
+{
+ return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
+}
+
+
+static PyObject *
+UnicodeError_new(PyTypeObject *type, PyObject *args, PyObject *kwds,
+ PyTypeObject *objecttype)
+{
+ PyUnicodeErrorObject *self;
+
+ self = (PyUnicodeErrorObject *)BaseException_new(type, args, kwds);
+ if (!self)
+ return NULL;
+
+ MAKE_IT_NONE(self->encoding);
+ MAKE_IT_NONE(self->object);
+ MAKE_IT_NONE(self->start);
+ MAKE_IT_NONE(self->end);
+ MAKE_IT_NONE(self->reason);
+
+ return (PyObject *)self;
+}
+
+static int
+UnicodeError_init(PyUnicodeErrorObject *self, PyObject *args, PyObject *kwds,
+ PyTypeObject *objecttype)
+{
+ if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
+ &PyString_Type, &self->encoding,
+ objecttype, &self->object,
+ &PyInt_Type, &self->start,
+ &PyInt_Type, &self->end,
+ &PyString_Type, &self->reason)) {
+ self->encoding = self->object = self->start = self->end =
+ self->reason = NULL;
+ return -1;
+ }
+
+ Py_INCREF(self->encoding);
+ Py_INCREF(self->object);
+ Py_INCREF(self->start);
+ Py_INCREF(self->end);
+ Py_INCREF(self->reason);
+
+ return 0;
+}
+
+int
+UnicodeError_clear(PyUnicodeErrorObject *self)
+{
+ Py_CLEAR(self->encoding);
+ Py_CLEAR(self->object);
+ Py_CLEAR(self->start);
+ Py_CLEAR(self->end);
+ Py_CLEAR(self->reason);
+ return BaseException_clear((PyBaseExceptionObject *)self);
+}
+
+static void
+UnicodeError_dealloc(PyUnicodeErrorObject *self)
+{
+ UnicodeError_clear(self);
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+int
+UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->encoding);
+ Py_VISIT(self->object);
+ Py_VISIT(self->start);
+ Py_VISIT(self->end);
+ Py_VISIT(self->reason);
+ return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
+}
+
+static PyMemberDef UnicodeError_members[] = {
+ {"message", T_OBJECT, offsetof(PyUnicodeErrorObject, message), 0,
+ PyDoc_STR("exception message")},
+ {"encoding", T_OBJECT, offsetof(PyUnicodeErrorObject, encoding), 0,
+ PyDoc_STR("exception encoding")},
+ {"object", T_OBJECT, offsetof(PyUnicodeErrorObject, object), 0,
+ PyDoc_STR("exception object")},
+ {"start", T_OBJECT, offsetof(PyUnicodeErrorObject, start), 0,
+ PyDoc_STR("exception start")},
+ {"end", T_OBJECT, offsetof(PyUnicodeErrorObject, end), 0,
+ PyDoc_STR("exception end")},
+ {"reason", T_OBJECT, offsetof(PyUnicodeErrorObject, reason), 0,
+ PyDoc_STR("exception reason")},
+ {NULL} /* Sentinel */
+};
+
+
+/*
+ * UnicodeEncodeError extends UnicodeError
+ */
+static PyObject *
+UnicodeEncodeError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ return UnicodeError_new(type, args, kwds, &PyUnicode_Type);
+}
+
+static int
+UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+ return -1;
+ return UnicodeError_init((PyUnicodeErrorObject *)self, args,
+ kwds, &PyUnicode_Type);
+}
+
+static PyObject *
+UnicodeEncodeError_str(PyObject *self)
+{
+ Py_ssize_t start;
+ Py_ssize_t end;
+
+ if (PyUnicodeEncodeError_GetStart(self, &start))
+ return NULL;
+
+ if (PyUnicodeEncodeError_GetEnd(self, &end))
+ return NULL;
+
+ if (end==start+1) {
+ int badchar = (int)PyUnicode_AS_UNICODE(((PyUnicodeErrorObject *)self)->object)[start];
+ char badchar_str[20];
+ if (badchar <= 0xff)
+ PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar);
+ else if (badchar <= 0xffff)
+ PyOS_snprintf(badchar_str, sizeof(badchar_str), "u%04x", badchar);
+ else
+ PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar);
+ return PyString_FromFormat(
+ "'%.400s' codec can't encode character u'\\%s' in position %zd: %.400s",
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
+ badchar_str,
+ start,
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
+ );
+ }
+ return PyString_FromFormat(
+ "'%.400s' codec can't encode characters in position %zd-%zd: %.400s",
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
+ start,
+ (end-1),
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
+ );
+}
+
+static PyTypeObject _PyExc_UnicodeEncodeError = {
+ PyObject_HEAD_INIT(NULL)
+ 0,
+ "UnicodeEncodeError",
+ sizeof(PyUnicodeErrorObject), 0,
+ (destructor)UnicodeError_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ (reprfunc)UnicodeEncodeError_str, 0, 0, 0,
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ PyDoc_STR("Unicode encoding error."), (traverseproc)BaseException_traverse,
+ (inquiry)BaseException_clear, 0, 0, 0, 0, 0, UnicodeError_members,
+ 0, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict),
+ (initproc)UnicodeEncodeError_init, 0, UnicodeEncodeError_new,
+};
+PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError;
+
+PyObject *
+PyUnicodeEncodeError_Create(
+ const char *encoding, const Py_UNICODE *object, Py_ssize_t length,
+ Py_ssize_t start, Py_ssize_t end, const char *reason)
+{
+ return PyObject_CallFunction(PyExc_UnicodeEncodeError, "su#nns",
+ encoding, object, length, start, end, reason);
+}
+
+
+/*
+ * UnicodeDecodeError extends UnicodeError
+ */
+static PyObject *
+UnicodeDecodeError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ return UnicodeError_new(type, args, kwds, &PyString_Type);
+}
+
+static int
+UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+ return -1;
+ return UnicodeError_init((PyUnicodeErrorObject *)self, args,
+ kwds, &PyString_Type);
+}
+
+static PyObject *
+UnicodeDecodeError_str(PyObject *self)
+{
+ Py_ssize_t start;
+ Py_ssize_t end;
+
+ if (PyUnicodeDecodeError_GetStart(self, &start))
+ return NULL;
+
+ if (PyUnicodeDecodeError_GetEnd(self, &end))
+ return NULL;
+
+ if (end==start+1) {
+ /* FromFormat does not support %02x, so format that separately */
+ char byte[4];
+ PyOS_snprintf(byte, sizeof(byte), "%02x",
+ ((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
+ return PyString_FromFormat(
+ "'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
+ byte,
+ start,
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
+ );
+ }
+ return PyString_FromFormat(
+ "'%.400s' codec can't decode bytes in position %zd-%zd: %.400s",
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
+ start,
+ (end-1),
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
+ );
+}
+
+static PyTypeObject _PyExc_UnicodeDecodeError = {
+ PyObject_HEAD_INIT(NULL)
+ 0,
+ EXC_MODULE_NAME "UnicodeDecodeError",
+ sizeof(PyUnicodeErrorObject), 0,
+ (destructor)UnicodeError_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ (reprfunc)UnicodeDecodeError_str, 0, 0, 0,
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ PyDoc_STR("Unicode decoding error."), (traverseproc)BaseException_traverse,
+ (inquiry)BaseException_clear, 0, 0, 0, 0, 0, UnicodeError_members,
+ 0, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict),
+ (initproc)UnicodeDecodeError_init, 0, UnicodeDecodeError_new,
+};
+PyObject *PyExc_UnicodeDecodeError = (PyObject *)&_PyExc_UnicodeDecodeError;
+
+PyObject *
+PyUnicodeDecodeError_Create(
+ const char *encoding, const char *object, Py_ssize_t length,
+ Py_ssize_t start, Py_ssize_t end, const char *reason)
+{
+ assert(length < INT_MAX);
+ assert(start < INT_MAX);
+ assert(end < INT_MAX);
+ return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns",
+ encoding, object, length, start, end, reason);
+}
+
+
+/*
+ * UnicodeTranslateError extends UnicodeError
+ */
+static PyObject *
+UnicodeTranslateError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyUnicodeErrorObject *self = NULL;
+
+ self = (PyUnicodeErrorObject *)BaseException_new(type, args, kwds);
+ if (!self)
+ return NULL;
+
+ MAKE_IT_NONE(self->encoding);
+ MAKE_IT_NONE(self->object);
+ MAKE_IT_NONE(self->start);
+ MAKE_IT_NONE(self->end);
+ MAKE_IT_NONE(self->reason);
+
+ return (PyObject *)self;
+}
+
+static int
+UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+ return -1;
+
+ Py_CLEAR(self->object);
+ Py_CLEAR(self->start);
+ Py_CLEAR(self->end);
+ Py_CLEAR(self->reason);
+
+ if (!PyArg_ParseTuple(args, "O!O!O!O!",
+ &PyUnicode_Type, &self->object,
+ &PyInt_Type, &self->start,
+ &PyInt_Type, &self->end,
+ &PyString_Type, &self->reason)) {
+ self->object = self->start = self->end = self->reason = NULL;
+ return -1;
+ }
+
+ Py_INCREF(self->object);
+ Py_INCREF(self->start);
+ Py_INCREF(self->end);
+ Py_INCREF(self->reason);
+
+ return 0;
+}
+
+
+static PyObject *
+UnicodeTranslateError_str(PyObject *self)
+{
+ Py_ssize_t start;
+ Py_ssize_t end;
+
+ if (PyUnicodeTranslateError_GetStart(self, &start))
+ return NULL;
+
+ if (PyUnicodeTranslateError_GetEnd(self, &end))
+ return NULL;
+
+ if (end==start+1) {
+ int badchar = (int)PyUnicode_AS_UNICODE(((PyUnicodeErrorObject *)self)->object)[start];
+ char badchar_str[20];
+ if (badchar <= 0xff)
+ PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar);
+ else if (badchar <= 0xffff)
+ PyOS_snprintf(badchar_str, sizeof(badchar_str), "u%04x", badchar);
+ else
+ PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar);
+ return PyString_FromFormat(
+ "can't translate character u'\\%s' in position %zd: %.400s",
+ badchar_str,
+ start,
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
+ );
+ }
+ return PyString_FromFormat(
+ "can't translate characters in position %zd-%zd: %.400s",
+ start,
+ (end-1),
+ PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
+ );
+}
+
+static PyTypeObject _PyExc_UnicodeTranslateError = {
+ PyObject_HEAD_INIT(NULL)
+ 0,
+ EXC_MODULE_NAME "UnicodeTranslateError",
+ sizeof(PyUnicodeErrorObject), 0,
+ (destructor)UnicodeError_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ (reprfunc)UnicodeTranslateError_str, 0, 0, 0,
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ PyDoc_STR("Unicode decoding error."), (traverseproc)UnicodeError_traverse,
+ (inquiry)UnicodeError_clear, 0, 0, 0, 0, 0, UnicodeError_members,
+ 0, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict),
+ (initproc)UnicodeTranslateError_init, 0, UnicodeTranslateError_new,
+};
+PyObject *PyExc_UnicodeTranslateError = (PyObject *)&_PyExc_UnicodeTranslateError;
+
+PyObject *
+PyUnicodeTranslateError_Create(
+ const Py_UNICODE *object, Py_ssize_t length,
+ Py_ssize_t start, Py_ssize_t end, const char *reason)
+{
+ return PyObject_CallFunction(PyExc_UnicodeTranslateError, "u#nns",
+ object, length, start, end, reason);
+}
+#endif
+
+
+/*
+ * AssertionError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, AssertionError,
+ "Assertion failed.");
+
+
+/*
+ * ArithmeticError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, ArithmeticError,
+ "Base class for arithmetic errors.");
+
+
+/*
+ * FloatingPointError extends ArithmeticError
+ */
+SimpleExtendsException(PyExc_ArithmeticError, FloatingPointError,
+ "Floating point operation failed.");
+
+
+/*
+ * OverflowError extends ArithmeticError
+ */
+SimpleExtendsException(PyExc_ArithmeticError, OverflowError,
+ "Result too large to be represented.");
+
+
+/*
+ * ZeroDivisionError extends ArithmeticError
+ */
+SimpleExtendsException(PyExc_ArithmeticError, ZeroDivisionError,
+ "Second argument to a division or modulo operation was zero.");
+
+
+/*
+ * SystemError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, SystemError,
+ "Internal error in the Python interpreter.\n"
+ "\n"
+ "Please report this to the Python maintainer, along with the traceback,\n"
+ "the Python version, and the hardware/OS platform and version.");
+
+
+/*
+ * ReferenceError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, ReferenceError,
+ "Weak ref proxy used after referent went away.");
+
+
+/*
+ * MemoryError extends StandardError
+ */
+SimpleExtendsException(PyExc_StandardError, MemoryError, "Out of memory.");
+
+
+/* Warning category docstrings */
+
+/*
+ * Warning extends Exception
+ */
+SimpleExtendsException(PyExc_Exception, Warning,
+ "Base class for warning categories.");
+
+
+/*
+ * UserWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, UserWarning,
+ "Base class for warnings generated by user code.");
+
+
+/*
+ * DeprecationWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, DeprecationWarning,
+ "Base class for warnings about deprecated features.");
+
+
+/*
+ * PendingDeprecationWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, PendingDeprecationWarning,
+ "Base class for warnings about features which will be deprecated\n"
+ "in the future.");
+
+
+/*
+ * SyntaxWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, SyntaxWarning,
+ "Base class for warnings about dubious syntax.");
+
+
+/*
+ * RuntimeWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, RuntimeWarning,
+ "Base class for warnings about dubious runtime behavior.");
+
+
+/*
+ * FutureWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, FutureWarning,
+ "Base class for warnings about constructs that will change semantically\n"
+ "in the future.");
+
+
+/*
+ * ImportWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, ImportWarning,
+ "Base class for warnings about probable mistakes in module imports");
+
+
+/* Pre-computed MemoryError instance. Best to create this as early as
+ * possible and not wait until a MemoryError is actually raised!
+ */
+PyObject *PyExc_MemoryErrorInst=NULL;
+
+/* module global functions */
+static PyMethodDef functions[] = {
+ /* Sentinel */
+ {NULL, NULL}
+};
+
+#define PRE_INIT(TYPE) if (PyType_Ready(&_PyExc_ ## TYPE) < 0) \
+ Py_FatalError("exceptions bootstrapping error.");
+
+#define POST_INIT(TYPE) Py_INCREF(PyExc_ ## TYPE); \
+ PyModule_AddObject(m, # TYPE, PyExc_ ## TYPE); \
+ if (PyDict_SetItemString(bdict, # TYPE, PyExc_ ## TYPE)) \
+ Py_FatalError("Module dictionary insertion problem.");
+
+PyMODINIT_FUNC
+_PyExc_Init(void)
+{
+ PyObject *m, *bltinmod, *bdict;
+
+ PRE_INIT(BaseException)
+ PRE_INIT(Exception)
+ PRE_INIT(StandardError)
+ PRE_INIT(TypeError)
+ PRE_INIT(StopIteration)
+ PRE_INIT(GeneratorExit)
+ PRE_INIT(SystemExit)
+ PRE_INIT(KeyboardInterrupt)
+ PRE_INIT(ImportError)
+ PRE_INIT(EnvironmentError)
+ PRE_INIT(IOError)
+ PRE_INIT(OSError)
+#ifdef MS_WINDOWS
+ PRE_INIT(WindowsError)
+#endif
+#ifdef __VMS
+ PRE_INIT(VMSError)
+#endif
+ PRE_INIT(EOFError)
+ PRE_INIT(RuntimeError)
+ PRE_INIT(NotImplementedError)
+ PRE_INIT(NameError)
+ PRE_INIT(UnboundLocalError)
+ PRE_INIT(AttributeError)
+ PRE_INIT(SyntaxError)
+ PRE_INIT(IndentationError)
+ PRE_INIT(TabError)
+ PRE_INIT(LookupError)
+ PRE_INIT(IndexError)
+ PRE_INIT(KeyError)
+ PRE_INIT(ValueError)
+ PRE_INIT(UnicodeError)
+#ifdef Py_USING_UNICODE
+ PRE_INIT(UnicodeEncodeError)
+ PRE_INIT(UnicodeDecodeError)
+ PRE_INIT(UnicodeTranslateError)
+#endif
+ PRE_INIT(AssertionError)
+ PRE_INIT(ArithmeticError)
+ PRE_INIT(FloatingPointError)
+ PRE_INIT(OverflowError)
+ PRE_INIT(ZeroDivisionError)
+ PRE_INIT(SystemError)
+ PRE_INIT(ReferenceError)
+ PRE_INIT(MemoryError)
+ PRE_INIT(Warning)
+ PRE_INIT(UserWarning)
+ PRE_INIT(DeprecationWarning)
+ PRE_INIT(PendingDeprecationWarning)
+ PRE_INIT(SyntaxWarning)
+ PRE_INIT(RuntimeWarning)
+ PRE_INIT(FutureWarning)
+ PRE_INIT(ImportWarning)
+
+ m = Py_InitModule4("exceptions", functions, exceptions_doc,
+ (PyObject *)NULL, PYTHON_API_VERSION);
+ if (m == NULL) return;
+
+ bltinmod = PyImport_ImportModule("__builtin__");
+ if (bltinmod == NULL)
+ Py_FatalError("exceptions bootstrapping error.");
+ bdict = PyModule_GetDict(bltinmod);
+ if (bdict == NULL)
+ Py_FatalError("exceptions bootstrapping error.");
+
+ POST_INIT(BaseException)
+ POST_INIT(Exception)
+ POST_INIT(StandardError)
+ POST_INIT(TypeError)
+ POST_INIT(StopIteration)
+ POST_INIT(GeneratorExit)
+ POST_INIT(SystemExit)
+ POST_INIT(KeyboardInterrupt)
+ POST_INIT(ImportError)
+ POST_INIT(EnvironmentError)
+ POST_INIT(IOError)
+ POST_INIT(OSError)
+#ifdef MS_WINDOWS
+ POST_INIT(WindowsError)
+#endif
+#ifdef __VMS
+ POST_INIT(VMSError)
+#endif
+ POST_INIT(EOFError)
+ POST_INIT(RuntimeError)
+ POST_INIT(NotImplementedError)
+ POST_INIT(NameError)
+ POST_INIT(UnboundLocalError)
+ POST_INIT(AttributeError)
+ POST_INIT(SyntaxError)
+ POST_INIT(IndentationError)
+ POST_INIT(TabError)
+ POST_INIT(LookupError)
+ POST_INIT(IndexError)
+ POST_INIT(KeyError)
+ POST_INIT(ValueError)
+ POST_INIT(UnicodeError)
+#ifdef Py_USING_UNICODE
+ POST_INIT(UnicodeEncodeError)
+ POST_INIT(UnicodeDecodeError)
+ POST_INIT(UnicodeTranslateError)
+#endif
+ POST_INIT(AssertionError)
+ POST_INIT(ArithmeticError)
+ POST_INIT(FloatingPointError)
+ POST_INIT(OverflowError)
+ POST_INIT(ZeroDivisionError)
+ POST_INIT(SystemError)
+ POST_INIT(ReferenceError)
+ POST_INIT(MemoryError)
+ POST_INIT(Warning)
+ POST_INIT(UserWarning)
+ POST_INIT(DeprecationWarning)
+ POST_INIT(PendingDeprecationWarning)
+ POST_INIT(SyntaxWarning)
+ POST_INIT(RuntimeWarning)
+ POST_INIT(FutureWarning)
+ POST_INIT(ImportWarning)
+
+ PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL);
+ if (!PyExc_MemoryErrorInst)
+ Py_FatalError("Cannot pre-allocate MemoryError instance\n");
+
+ Py_DECREF(bltinmod);
+}
+
+void
+_PyExc_Fini(void)
+{
+ Py_XDECREF(PyExc_MemoryErrorInst);
+ PyExc_MemoryErrorInst = NULL;
+}
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index ab2616d..997792a 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -136,46 +136,45 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
/* check for known incorrect mode strings - problem is, platforms are
free to accept any mode characters they like and are supposed to
ignore stuff they don't understand... write or append mode with
- universal newline support is expressly forbidden by PEP 278. */
+ universal newline support is expressly forbidden by PEP 278.
+ Additionally, remove the 'U' from the mode string as platforms
+ won't know what it is. */
/* zero return is kewl - one is un-kewl */
static int
-check_the_mode(char *mode)
+sanitize_the_mode(char *mode)
{
+ char *upos;
size_t len = strlen(mode);
- switch (len) {
- case 0:
+ if (!len) {
PyErr_SetString(PyExc_ValueError, "empty mode string");
return 1;
+ }
- /* reject wU, aU */
- case 2:
- switch (mode[0]) {
- case 'w':
- case 'a':
- if (mode[1] == 'U') {
- PyErr_SetString(PyExc_ValueError,
- "invalid mode string");
- return 1;
- }
- break;
+ upos = strchr(mode, 'U');
+ if (upos) {
+ memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
+
+ if (mode[0] == 'w' || mode[0] == 'a') {
+ PyErr_Format(PyExc_ValueError, "universal newline "
+ "mode can only be used with modes "
+ "starting with 'r'");
+ return 1;
}
- break;
- /* reject w+U, a+U, wU+, aU+ */
- case 3:
- switch (mode[0]) {
- case 'w':
- case 'a':
- if ((mode[1] == '+' && mode[2] == 'U') ||
- (mode[1] == 'U' && mode[2] == '+')) {
- PyErr_SetString(PyExc_ValueError,
- "invalid mode string");
- return 1;
- }
- break;
+ if (mode[0] != 'r') {
+ memmove(mode+1, mode, strlen(mode)+1);
+ mode[0] = 'r';
}
- break;
+
+ if (!strchr(mode, 'b')) {
+ memmove(mode+2, mode+1, strlen(mode));
+ mode[1] = 'b';
+ }
+ } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
+ PyErr_Format(PyExc_ValueError, "mode string must begin with "
+ "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
+ return 1;
}
return 0;
@@ -184,6 +183,7 @@ check_the_mode(char *mode)
static PyObject *
open_the_file(PyFileObject *f, char *name, char *mode)
{
+ char *newmode;
assert(f != NULL);
assert(PyFile_Check(f));
#ifdef MS_WINDOWS
@@ -195,8 +195,18 @@ open_the_file(PyFileObject *f, char *name, char *mode)
assert(mode != NULL);
assert(f->f_fp == NULL);
- if (check_the_mode(mode))
+ /* probably need to replace 'U' by 'rb' */
+ newmode = PyMem_MALLOC(strlen(mode) + 3);
+ if (!newmode) {
+ PyErr_NoMemory();
return NULL;
+ }
+ strcpy(newmode, mode);
+
+ if (sanitize_the_mode(newmode)) {
+ f = NULL;
+ goto cleanup;
+ }
/* rexec.py can't stop a user from getting the file() constructor --
all they have to do is get *any* file object f, and then do
@@ -204,16 +214,15 @@ open_the_file(PyFileObject *f, char *name, char *mode)
if (PyEval_GetRestricted()) {
PyErr_SetString(PyExc_IOError,
"file() constructor not accessible in restricted mode");
- return NULL;
+ f = NULL;
+ goto cleanup;
}
errno = 0;
- if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
- mode = "rb";
#ifdef MS_WINDOWS
if (PyUnicode_Check(f->f_name)) {
PyObject *wmode;
- wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
+ wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
if (f->f_name && wmode) {
Py_BEGIN_ALLOW_THREADS
/* PyUnicode_AS_UNICODE OK without thread
@@ -227,7 +236,7 @@ open_the_file(PyFileObject *f, char *name, char *mode)
#endif
if (NULL == f->f_fp && NULL != name) {
Py_BEGIN_ALLOW_THREADS
- f->f_fp = fopen(name, mode);
+ f->f_fp = fopen(name, newmode);
Py_END_ALLOW_THREADS
}
@@ -254,6 +263,10 @@ open_the_file(PyFileObject *f, char *name, char *mode)
}
if (f != NULL)
f = dircheck(f);
+
+cleanup:
+ PyMem_FREE(newmode);
+
return (PyObject *)f;
}
@@ -1705,9 +1718,6 @@ PyDoc_STRVAR(close_doc,
PyDoc_STRVAR(isatty_doc,
"isatty() -> true or false. True if the file is connected to a tty device.");
-PyDoc_STRVAR(context_doc,
- "__context__() -> self.");
-
PyDoc_STRVAR(enter_doc,
"__enter__() -> self.");
@@ -1727,7 +1737,6 @@ static PyMethodDef file_methods[] = {
{"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
{"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
{"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
- {"__context__", (PyCFunction)file_self, METH_NOARGS, context_doc},
{"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc},
{"__exit__", (PyCFunction)file_close, METH_VARARGS, close_doc},
{NULL, NULL} /* sentinel */
@@ -2023,10 +2032,6 @@ PyDoc_STR(
"'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
"\n"
"'U' cannot be combined with 'w' or '+' mode.\n"
-)
-PyDoc_STR(
-"\n"
-"Note: open() is an alias for file()."
);
PyTypeObject PyFile_Type = {
@@ -2447,4 +2452,3 @@ Py_UniversalNewlineFread(char *buf, size_t n,
#ifdef __cplusplus
}
#endif
-
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 8708690..74f1315 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -384,7 +384,7 @@ float_richcompare(PyObject *v, PyObject *w, int op)
if (PyFloat_Check(w))
j = PyFloat_AS_DOUBLE(w);
- else if (Py_IS_INFINITY(i) || Py_IS_NAN(i)) {
+ else if (!Py_IS_FINITE(i)) {
if (PyInt_Check(w) || PyLong_Check(w))
/* If i is an infinity, its magnitude exceeds any
* finite integer, so it doesn't matter which int we
@@ -783,10 +783,7 @@ float_pow(PyObject *v, PyObject *w, PyObject *z)
* bug; we let that slide in math.pow() (which currently
* reflects all platform accidents), but not for Python's **.
*/
- if (iv == -1.0 && !Py_IS_INFINITY(iw) && iw == iw) {
- /* XXX the "iw == iw" was to weed out NaNs. This
- * XXX doesn't actually work on all platforms.
- */
+ if (iv == -1.0 && Py_IS_FINITE(iw)) {
/* Return 1 if iw is even, -1 if iw is odd; there's
* no guarantee that any C integral type is big
* enough to hold iw, so we have to check this
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index 9aabc7a..fcb5e4e 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -350,13 +350,32 @@ static PyGetSetDef frame_getsetlist[] = {
};
/* Stack frames are allocated and deallocated at a considerable rate.
- In an attempt to improve the speed of function calls, we maintain a
- separate free list of stack frames (just like integers are
- allocated in a special way -- see intobject.c). When a stack frame
- is on the free list, only the following members have a meaning:
+ In an attempt to improve the speed of function calls, we:
+
+ 1. Hold a single "zombie" frame on each code object. This retains
+ the allocated and initialised frame object from an invocation of
+ the code object. The zombie is reanimated the next time we need a
+ frame object for that code object. Doing this saves the malloc/
+ realloc required when using a free_list frame that isn't the
+ correct size. It also saves some field initialisation.
+
+ In zombie mode, no field of PyFrameObject holds a reference, but
+ the following fields are still valid:
+
+ * ob_type, ob_size, f_code, f_valuestack;
+
+ * f_locals, f_trace,
+ f_exc_type, f_exc_value, f_exc_traceback are NULL;
+
+ * f_localsplus does not require re-allocation and
+ the local variables in f_localsplus are NULL.
+
+ 2. We also maintain a separate free list of stack frames (just like
+ integers are allocated in a special way -- see intobject.c). When
+ a stack frame is on the free list, only the following members have
+ a meaning:
ob_type == &Frametype
f_back next item on free list, or NULL
- f_nlocals number of locals
f_stacksize size of value stack
ob_size size of localsplus
Note that the value and block stacks are preserved -- this can save
@@ -380,41 +399,43 @@ static int numfree = 0; /* number of frames currently in free_list */
static void
frame_dealloc(PyFrameObject *f)
{
- int i, slots;
- PyObject **fastlocals;
- PyObject **p;
+ PyObject **p, **valuestack;
+ PyCodeObject *co;
PyObject_GC_UnTrack(f);
Py_TRASHCAN_SAFE_BEGIN(f)
/* Kill all local variables */
- slots = f->f_nlocals + f->f_ncells + f->f_nfreevars;
- fastlocals = f->f_localsplus;
- for (i = slots; --i >= 0; ++fastlocals) {
- Py_XDECREF(*fastlocals);
- }
+ valuestack = f->f_valuestack;
+ for (p = f->f_localsplus; p < valuestack; p++)
+ Py_CLEAR(*p);
/* Free stack */
if (f->f_stacktop != NULL) {
- for (p = f->f_valuestack; p < f->f_stacktop; p++)
+ for (p = valuestack; p < f->f_stacktop; p++)
Py_XDECREF(*p);
}
Py_XDECREF(f->f_back);
- Py_DECREF(f->f_code);
Py_DECREF(f->f_builtins);
Py_DECREF(f->f_globals);
- Py_XDECREF(f->f_locals);
- Py_XDECREF(f->f_trace);
- Py_XDECREF(f->f_exc_type);
- Py_XDECREF(f->f_exc_value);
- Py_XDECREF(f->f_exc_traceback);
- if (numfree < MAXFREELIST) {
+ Py_CLEAR(f->f_locals);
+ Py_CLEAR(f->f_trace);
+ Py_CLEAR(f->f_exc_type);
+ Py_CLEAR(f->f_exc_value);
+ Py_CLEAR(f->f_exc_traceback);
+
+ co = f->f_code;
+ if (co != NULL && co->co_zombieframe == NULL)
+ co->co_zombieframe = f;
+ else if (numfree < MAXFREELIST) {
++numfree;
f->f_back = free_list;
free_list = f;
- }
- else
+ }
+ else
PyObject_GC_Del(f);
+
+ Py_XDECREF(co);
Py_TRASHCAN_SAFE_END(f)
}
@@ -435,7 +456,7 @@ frame_traverse(PyFrameObject *f, visitproc visit, void *arg)
Py_VISIT(f->f_exc_traceback);
/* locals */
- slots = f->f_nlocals + f->f_ncells + f->f_nfreevars;
+ slots = f->f_code->co_nlocals + PyTuple_GET_SIZE(f->f_code->co_cellvars) + PyTuple_GET_SIZE(f->f_code->co_freevars);
fastlocals = f->f_localsplus;
for (i = slots; --i >= 0; ++fastlocals)
Py_VISIT(*fastlocals);
@@ -468,7 +489,7 @@ frame_clear(PyFrameObject *f)
Py_CLEAR(f->f_trace);
/* locals */
- slots = f->f_nlocals + f->f_ncells + f->f_nfreevars;
+ slots = f->f_code->co_nlocals + PyTuple_GET_SIZE(f->f_code->co_cellvars) + PyTuple_GET_SIZE(f->f_code->co_freevars);
fastlocals = f->f_localsplus;
for (i = slots; --i >= 0; ++fastlocals)
Py_CLEAR(*fastlocals);
@@ -532,7 +553,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
PyFrameObject *back = tstate->frame;
PyFrameObject *f;
PyObject *builtins;
- Py_ssize_t extras, ncells, nfrees, i;
+ Py_ssize_t i;
#ifdef Py_DEBUG
if (code == NULL || globals == NULL || !PyDict_Check(globals) ||
@@ -541,9 +562,6 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
return NULL;
}
#endif
- ncells = PyTuple_GET_SIZE(code->co_cellvars);
- nfrees = PyTuple_GET_SIZE(code->co_freevars);
- extras = code->co_stacksize + code->co_nlocals + ncells + nfrees;
if (back == NULL || back->f_globals != globals) {
builtins = PyDict_GetItem(globals, builtin_object);
if (builtins) {
@@ -574,71 +592,82 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
assert(builtins != NULL && PyDict_Check(builtins));
Py_INCREF(builtins);
}
- if (free_list == NULL) {
- f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras);
- if (f == NULL) {
- Py_DECREF(builtins);
- return NULL;
- }
+ if (code->co_zombieframe != NULL) {
+ f = code->co_zombieframe;
+ code->co_zombieframe = NULL;
+ _Py_NewReference((PyObject *)f);
+ assert(f->f_code == code);
}
- else {
- assert(numfree > 0);
- --numfree;
- f = free_list;
- free_list = free_list->f_back;
- if (f->ob_size < extras) {
- f = PyObject_GC_Resize(PyFrameObject, f, extras);
- if (f == NULL) {
- Py_DECREF(builtins);
- return NULL;
- }
- }
- _Py_NewReference((PyObject *)f);
+ else {
+ Py_ssize_t extras, ncells, nfrees;
+ ncells = PyTuple_GET_SIZE(code->co_cellvars);
+ nfrees = PyTuple_GET_SIZE(code->co_freevars);
+ extras = code->co_stacksize + code->co_nlocals + ncells +
+ nfrees;
+ if (free_list == NULL) {
+ f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type,
+ extras);
+ if (f == NULL) {
+ Py_DECREF(builtins);
+ return NULL;
+ }
+ }
+ else {
+ assert(numfree > 0);
+ --numfree;
+ f = free_list;
+ free_list = free_list->f_back;
+ if (f->ob_size < extras) {
+ f = PyObject_GC_Resize(PyFrameObject, f, extras);
+ if (f == NULL) {
+ Py_DECREF(builtins);
+ return NULL;
+ }
+ }
+ _Py_NewReference((PyObject *)f);
+ }
+
+ f->f_code = code;
+ extras = code->co_nlocals + ncells + nfrees;
+ f->f_valuestack = f->f_localsplus + extras;
+ for (i=0; i<extras; i++)
+ f->f_localsplus[i] = NULL;
+ f->f_locals = NULL;
+ f->f_trace = NULL;
+ f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL;
}
f->f_builtins = builtins;
Py_XINCREF(back);
f->f_back = back;
Py_INCREF(code);
- f->f_code = code;
Py_INCREF(globals);
f->f_globals = globals;
/* Most functions have CO_NEWLOCALS and CO_OPTIMIZED set. */
if ((code->co_flags & (CO_NEWLOCALS | CO_OPTIMIZED)) ==
(CO_NEWLOCALS | CO_OPTIMIZED))
- locals = NULL; /* PyFrame_FastToLocals() will set. */
+ ; /* f_locals = NULL; will be set by PyFrame_FastToLocals() */
else if (code->co_flags & CO_NEWLOCALS) {
locals = PyDict_New();
if (locals == NULL) {
Py_DECREF(f);
return NULL;
}
+ f->f_locals = locals;
}
else {
if (locals == NULL)
locals = globals;
Py_INCREF(locals);
+ f->f_locals = locals;
}
- f->f_locals = locals;
- f->f_trace = NULL;
- f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL;
f->f_tstate = tstate;
f->f_lasti = -1;
f->f_lineno = code->co_firstlineno;
f->f_restricted = (builtins != tstate->interp->builtins);
f->f_iblock = 0;
- f->f_nlocals = code->co_nlocals;
- f->f_stacksize = code->co_stacksize;
- f->f_ncells = ncells;
- f->f_nfreevars = nfrees;
-
- extras = f->f_nlocals + ncells + nfrees;
- /* Tim said it's ok to replace memset */
- for (i=0; i<extras; i++)
- f->f_localsplus[i] = NULL;
-
- f->f_valuestack = f->f_localsplus + extras;
- f->f_stacktop = f->f_valuestack;
+
+ f->f_stacktop = f->f_valuestack;
_PyObject_GC_TRACK(f);
return f;
}
@@ -725,7 +754,9 @@ PyFrame_FastToLocals(PyFrameObject *f)
PyObject *locals, *map;
PyObject **fast;
PyObject *error_type, *error_value, *error_traceback;
+ PyCodeObject *co;
Py_ssize_t j;
+ int ncells, nfreevars;
if (f == NULL)
return;
locals = f->f_locals;
@@ -736,27 +767,24 @@ PyFrame_FastToLocals(PyFrameObject *f)
return;
}
}
- map = f->f_code->co_varnames;
+ co = f->f_code;
+ map = co->co_varnames;
if (!PyTuple_Check(map))
return;
PyErr_Fetch(&error_type, &error_value, &error_traceback);
fast = f->f_localsplus;
j = PyTuple_GET_SIZE(map);
- if (j > f->f_nlocals)
- j = f->f_nlocals;
- if (f->f_nlocals)
+ if (j > co->co_nlocals)
+ j = co->co_nlocals;
+ if (co->co_nlocals)
map_to_dict(map, j, locals, fast, 0);
- if (f->f_ncells || f->f_nfreevars) {
- if (!(PyTuple_Check(f->f_code->co_cellvars)
- && PyTuple_Check(f->f_code->co_freevars))) {
- return;
- }
- map_to_dict(f->f_code->co_cellvars,
- PyTuple_GET_SIZE(f->f_code->co_cellvars),
- locals, fast + f->f_nlocals, 1);
- map_to_dict(f->f_code->co_freevars,
- PyTuple_GET_SIZE(f->f_code->co_freevars),
- locals, fast + f->f_nlocals + f->f_ncells, 1);
+ ncells = PyTuple_GET_SIZE(co->co_cellvars);
+ nfreevars = PyTuple_GET_SIZE(co->co_freevars);
+ if (ncells || nfreevars) {
+ map_to_dict(co->co_cellvars, ncells,
+ locals, fast + co->co_nlocals, 1);
+ map_to_dict(co->co_freevars, nfreevars,
+ locals, fast + co->co_nlocals + ncells, 1);
}
PyErr_Restore(error_type, error_value, error_traceback);
}
@@ -768,11 +796,14 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
PyObject *locals, *map;
PyObject **fast;
PyObject *error_type, *error_value, *error_traceback;
+ PyCodeObject *co;
Py_ssize_t j;
+ int ncells, nfreevars;
if (f == NULL)
return;
locals = f->f_locals;
- map = f->f_code->co_varnames;
+ co = f->f_code;
+ map = co->co_varnames;
if (locals == NULL)
return;
if (!PyTuple_Check(map))
@@ -780,21 +811,18 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
PyErr_Fetch(&error_type, &error_value, &error_traceback);
fast = f->f_localsplus;
j = PyTuple_GET_SIZE(map);
- if (j > f->f_nlocals)
- j = f->f_nlocals;
- if (f->f_nlocals)
- dict_to_map(f->f_code->co_varnames, j, locals, fast, 0, clear);
- if (f->f_ncells || f->f_nfreevars) {
- if (!(PyTuple_Check(f->f_code->co_cellvars)
- && PyTuple_Check(f->f_code->co_freevars)))
- return;
- dict_to_map(f->f_code->co_cellvars,
- PyTuple_GET_SIZE(f->f_code->co_cellvars),
- locals, fast + f->f_nlocals, 1, clear);
- dict_to_map(f->f_code->co_freevars,
- PyTuple_GET_SIZE(f->f_code->co_freevars),
- locals, fast + f->f_nlocals + f->f_ncells, 1,
- clear);
+ if (j > co->co_nlocals)
+ j = co->co_nlocals;
+ if (co->co_nlocals)
+ dict_to_map(co->co_varnames, j, locals, fast, 0, clear);
+ ncells = PyTuple_GET_SIZE(co->co_cellvars);
+ nfreevars = PyTuple_GET_SIZE(co->co_freevars);
+ if (ncells || nfreevars) {
+ dict_to_map(co->co_cellvars, ncells,
+ locals, fast + co->co_nlocals, 1, clear);
+ dict_to_map(co->co_freevars, nfreevars,
+ locals, fast + co->co_nlocals + ncells, 1,
+ clear);
}
PyErr_Restore(error_type, error_value, error_traceback);
}
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 3073923..cd02eb3 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -40,7 +40,7 @@ static PyObject *long_format(PyObject *aa, int base, int addL);
#define SIGCHECK(PyTryBlock) \
if (--_Py_Ticker < 0) { \
_Py_Ticker = _Py_CheckInterval; \
- if (PyErr_CheckSignals()) { PyTryBlock; } \
+ if (PyErr_CheckSignals()) PyTryBlock \
}
/* Normalize (remove leading zeros from) a long int object.
@@ -66,8 +66,7 @@ long_normalize(register PyLongObject *v)
PyLongObject *
_PyLong_New(Py_ssize_t size)
{
- if (size > INT_MAX) {
- /* XXX: Fix this check when ob_size becomes ssize_t */
+ if (size > PY_SSIZE_T_MAX) {
PyErr_NoMemory();
return NULL;
}
@@ -278,9 +277,9 @@ _long_as_ssize_t(PyObject *vv) {
overflow:
PyErr_SetString(PyExc_OverflowError,
"long int too large to convert to int");
- if (sign > 0)
+ if (sign > 0)
return PY_SSIZE_T_MAX;
- else
+ else
return PY_SSIZE_T_MIN;
}
@@ -845,11 +844,36 @@ PyLong_AsVoidPtr(PyObject *vv)
PyObject *
PyLong_FromLongLong(PY_LONG_LONG ival)
{
- PY_LONG_LONG bytes = ival;
- int one = 1;
- return _PyLong_FromByteArray(
- (unsigned char *)&bytes,
- SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 1);
+ PyLongObject *v;
+ unsigned PY_LONG_LONG t; /* unsigned so >> doesn't propagate sign bit */
+ int ndigits = 0;
+ int negative = 0;
+
+ if (ival < 0) {
+ ival = -ival;
+ negative = 1;
+ }
+
+ /* Count the number of Python digits.
+ We used to pick 5 ("big enough for anything"), but that's a
+ waste of time and space given that 5*15 = 75 bits are rarely
+ needed. */
+ t = (unsigned PY_LONG_LONG)ival;
+ while (t) {
+ ++ndigits;
+ t >>= SHIFT;
+ }
+ v = _PyLong_New(ndigits);
+ if (v != NULL) {
+ digit *p = v->ob_digit;
+ v->ob_size = negative ? -ndigits : ndigits;
+ t = (unsigned PY_LONG_LONG)ival;
+ while (t) {
+ *p++ = (digit)(t & MASK);
+ t >>= SHIFT;
+ }
+ }
+ return (PyObject *)v;
}
/* Create a new long int object from a C unsigned PY_LONG_LONG int. */
@@ -857,11 +881,26 @@ PyLong_FromLongLong(PY_LONG_LONG ival)
PyObject *
PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG ival)
{
- unsigned PY_LONG_LONG bytes = ival;
- int one = 1;
- return _PyLong_FromByteArray(
- (unsigned char *)&bytes,
- SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 0);
+ PyLongObject *v;
+ unsigned PY_LONG_LONG t;
+ int ndigits = 0;
+
+ /* Count the number of Python digits. */
+ t = (unsigned PY_LONG_LONG)ival;
+ while (t) {
+ ++ndigits;
+ t >>= SHIFT;
+ }
+ v = _PyLong_New(ndigits);
+ if (v != NULL) {
+ digit *p = v->ob_digit;
+ v->ob_size = ndigits;
+ while (ival) {
+ *p++ = (digit)(ival & MASK);
+ ival >>= SHIFT;
+ }
+ }
+ return (PyObject *)v;
}
/* Create a new long int object from a C Py_ssize_t. */
@@ -1305,7 +1344,33 @@ long_format(PyObject *aa, int base, int addL)
return (PyObject *)str;
}
-/* *str points to the first digit in a string of base base digits. base
+/* Table of digit values for 8-bit string -> integer conversion.
+ * '0' maps to 0, ..., '9' maps to 9.
+ * 'a' and 'A' map to 10, ..., 'z' and 'Z' map to 35.
+ * All other indices map to 37.
+ * Note that when converting a base B string, a char c is a legitimate
+ * base B digit iff _PyLong_DigitValue[Py_CHARMASK(c)] < B.
+ */
+int _PyLong_DigitValue[256] = {
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 37, 37, 37, 37, 37, 37,
+ 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
+ 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+};
+
+/* *str points to the first digit in a string of base `base` digits. base
* is a power of 2 (2, 4, 8, 16, or 32). *str is set to point to the first
* non-digit (which may be *str!). A normalized long is returned.
* The point to this routine is that it takes time linear in the number of
@@ -1329,20 +1394,8 @@ long_from_binary_base(char **str, int base)
n >>= 1;
/* n <- total # of bits needed, while setting p to end-of-string */
n = 0;
- for (;;) {
- int k = -1;
- char ch = *p;
-
- if (ch <= '9')
- k = ch - '0';
- else if (ch >= 'a')
- k = ch - 'a' + 10;
- else if (ch >= 'A')
- k = ch - 'A' + 10;
- if (k < 0 || k >= base)
- break;
+ while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base)
++p;
- }
*str = p;
n = (p - start) * bits_per_char;
if (n / bits_per_char != p - start) {
@@ -1362,17 +1415,7 @@ long_from_binary_base(char **str, int base)
bits_in_accum = 0;
pdigit = z->ob_digit;
while (--p >= start) {
- int k;
- char ch = *p;
-
- if (ch <= '9')
- k = ch - '0';
- else if (ch >= 'a')
- k = ch - 'a' + 10;
- else {
- assert(ch >= 'A');
- k = ch - 'A' + 10;
- }
+ int k = _PyLong_DigitValue[Py_CHARMASK(*p)];
assert(k >= 0 && k < base);
accum |= (twodigits)(k << bits_in_accum);
bits_in_accum += bits_per_char;
@@ -1428,33 +1471,140 @@ PyLong_FromString(char *str, char **pend, int base)
}
if (base == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
str += 2;
+
start = str;
if ((base & (base - 1)) == 0)
z = long_from_binary_base(&str, base);
else {
- z = _PyLong_New(0);
- for ( ; z != NULL; ++str) {
- int k = -1;
- PyLongObject *temp;
-
- if (*str <= '9')
- k = *str - '0';
- else if (*str >= 'a')
- k = *str - 'a' + 10;
- else if (*str >= 'A')
- k = *str - 'A' + 10;
- if (k < 0 || k >= base)
- break;
- temp = muladd1(z, (digit)base, (digit)k);
- Py_DECREF(z);
- z = temp;
+/***
+Binary bases can be converted in time linear in the number of digits, because
+Python's representation base is binary. Other bases (including decimal!) use
+the simple quadratic-time algorithm below, complicated by some speed tricks.
+
+First some math: the largest integer that can be expressed in N base-B digits
+is B**N-1. Consequently, if we have an N-digit input in base B, the worst-
+case number of Python digits needed to hold it is the smallest integer n s.t.
+
+ BASE**n-1 >= B**N-1 [or, adding 1 to both sides]
+ BASE**n >= B**N [taking logs to base BASE]
+ n >= log(B**N)/log(BASE) = N * log(B)/log(BASE)
+
+The static array log_base_BASE[base] == log(base)/log(BASE) so we can compute
+this quickly. A Python long with that much space is reserved near the start,
+and the result is computed into it.
+
+The input string is actually treated as being in base base**i (i.e., i digits
+are processed at a time), where two more static arrays hold:
+
+ convwidth_base[base] = the largest integer i such that base**i <= BASE
+ convmultmax_base[base] = base ** convwidth_base[base]
+
+The first of these is the largest i such that i consecutive input digits
+must fit in a single Python digit. The second is effectively the input
+base we're really using.
+
+Viewing the input as a sequence <c0, c1, ..., c_n-1> of digits in base
+convmultmax_base[base], the result is "simply"
+
+ (((c0*B + c1)*B + c2)*B + c3)*B + ... ))) + c_n-1
+
+where B = convmultmax_base[base].
+***/
+ register twodigits c; /* current input character */
+ Py_ssize_t size_z;
+ int i;
+ int convwidth;
+ twodigits convmultmax, convmult;
+ digit *pz, *pzstop;
+ char* scan;
+
+ static double log_base_BASE[37] = {0.0e0,};
+ static int convwidth_base[37] = {0,};
+ static twodigits convmultmax_base[37] = {0,};
+
+ if (log_base_BASE[base] == 0.0) {
+ twodigits convmax = base;
+ int i = 1;
+
+ log_base_BASE[base] = log((double)base) /
+ log((double)BASE);
+ for (;;) {
+ twodigits next = convmax * base;
+ if (next > BASE)
+ break;
+ convmax = next;
+ ++i;
+ }
+ convmultmax_base[base] = convmax;
+ assert(i > 0);
+ convwidth_base[base] = i;
+ }
+
+ /* Find length of the string of numeric characters. */
+ scan = str;
+ while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base)
+ ++scan;
+
+ /* Create a long object that can contain the largest possible
+ * integer with this base and length. Note that there's no
+ * need to initialize z->ob_digit -- no slot is read up before
+ * being stored into.
+ */
+ size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1;
+ assert(size_z > 0);
+ z = _PyLong_New(size_z);
+ if (z == NULL)
+ return NULL;
+ z->ob_size = 0;
+
+ /* `convwidth` consecutive input digits are treated as a single
+ * digit in base `convmultmax`.
+ */
+ convwidth = convwidth_base[base];
+ convmultmax = convmultmax_base[base];
+
+ /* Work ;-) */
+ while (str < scan) {
+ /* grab up to convwidth digits from the input string */
+ c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)];
+ for (i = 1; i < convwidth && str != scan; ++i, ++str) {
+ c = (twodigits)(c * base +
+ _PyLong_DigitValue[Py_CHARMASK(*str)]);
+ assert(c < BASE);
+ }
+
+ convmult = convmultmax;
+ /* Calculate the shift only if we couldn't get
+ * convwidth digits.
+ */
+ if (i != convwidth) {
+ convmult = base;
+ for ( ; i > 1; --i)
+ convmult *= base;
+ }
+
+ /* Multiply z by convmult, and add c. */
+ pz = z->ob_digit;
+ pzstop = pz + z->ob_size;
+ for (; pz < pzstop; ++pz) {
+ c += (twodigits)*pz * convmult;
+ *pz = (digit)(c & MASK);
+ c >>= SHIFT;
+ }
+ /* carry off the current end? */
+ if (c) {
+ assert(c < BASE);
+ assert(z->ob_size < size_z);
+ *pz = (digit)c;
+ ++z->ob_size;
+ }
}
}
if (z == NULL)
return NULL;
if (str == start)
goto onError;
- if (sign < 0 && z != NULL && z->ob_size != 0)
+ if (sign < 0)
z->ob_size = -(z->ob_size);
if (*str == 'L' || *str == 'l')
str++;
@@ -1580,9 +1730,10 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
assert(size_w == ABS(w->ob_size)); /* That's how d was calculated */
size_v = ABS(v->ob_size);
- a = _PyLong_New(size_v - size_w + 1);
+ k = size_v - size_w;
+ a = _PyLong_New(k + 1);
- for (j = size_v, k = a->ob_size-1; a != NULL && k >= 0; --j, --k) {
+ for (j = size_v; a != NULL && k >= 0; --j, --k) {
digit vj = (j >= size_v) ? 0 : v->ob_digit[j];
twodigits q;
stwodigits carry = 0;
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
new file mode 100644
index 0000000..82a8774
--- /dev/null
+++ b/Objects/stringlib/README.txt
@@ -0,0 +1,34 @@
+bits shared by the stringobject and unicodeobject implementations (and
+possibly other modules, in a not too distant future).
+
+the stuff in here is included into relevant places; see the individual
+source files for details.
+
+--------------------------------------------------------------------
+the following defines used by the different modules:
+
+STRINGLIB_CHAR
+
+ the type used to hold a character (char or Py_UNICODE)
+
+STRINGLIB_EMPTY
+
+ a PyObject representing the empty string
+
+int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t)
+
+ compares two strings. returns 0 if they match, and non-zero if not.
+
+Py_ssize_t STRINGLIB_LEN(PyObject*)
+
+ returns the length of the given string object (which must be of the
+ right type)
+
+PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t)
+
+ creates a new string object
+
+STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
+
+ returns the pointer to the character data for the given string
+ object (which must be of the right type)
diff --git a/Objects/stringlib/count.h b/Objects/stringlib/count.h
new file mode 100644
index 0000000..0bd02b5
--- /dev/null
+++ b/Objects/stringlib/count.h
@@ -0,0 +1,34 @@
+/* stringlib: count implementation */
+
+#ifndef STRINGLIB_COUNT_H
+#define STRINGLIB_COUNT_H
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(Py_ssize_t)
+stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+ const STRINGLIB_CHAR* sub, Py_ssize_t sub_len)
+{
+ Py_ssize_t count;
+
+ if (sub_len == 0)
+ return str_len + 1;
+
+ count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT);
+
+ if (count < 0)
+ count = 0; /* no match */
+
+ return count;
+}
+
+#endif
+
+/*
+Local variables:
+c-basic-offset: 4
+indent-tabs-mode: nil
+End:
+*/
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
new file mode 100644
index 0000000..8f79c36
--- /dev/null
+++ b/Objects/stringlib/fastsearch.h
@@ -0,0 +1,104 @@
+/* stringlib: fastsearch implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#define STRINGLIB_FASTSEARCH_H
+
+/* fast search/count implementation, based on a mix between boyer-
+ moore and horspool, with a few more bells and whistles on the top.
+ for some more background, see: http://effbot.org/stringlib */
+
+/* note: fastsearch may access s[n], which isn't a problem when using
+ Python's ordinary string types, but may cause problems if you're
+ using this code in other contexts. also, the count mode returns -1
+ if there cannot possible be a match in the target string, and 0 if
+ it has actually checked for matches, but didn't find any. callers
+ beware! */
+
+#define FAST_COUNT 0
+#define FAST_SEARCH 1
+
+Py_LOCAL_INLINE(Py_ssize_t)
+fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
+ const STRINGLIB_CHAR* p, Py_ssize_t m,
+ int mode)
+{
+ long mask;
+ Py_ssize_t skip, count = 0;
+ Py_ssize_t i, j, mlast, w;
+
+ w = n - m;
+
+ if (w < 0)
+ return -1;
+
+ /* look for special cases */
+ if (m <= 1) {
+ if (m <= 0)
+ return -1;
+ /* use special case for 1-character strings */
+ if (mode == FAST_COUNT) {
+ for (i = 0; i < n; i++)
+ if (s[i] == p[0])
+ count++;
+ return count;
+ } else {
+ for (i = 0; i < n; i++)
+ if (s[i] == p[0])
+ return i;
+ }
+ return -1;
+ }
+
+ mlast = m - 1;
+
+ /* create compressed boyer-moore delta 1 table */
+ skip = mlast - 1;
+ /* process pattern[:-1] */
+ for (mask = i = 0; i < mlast; i++) {
+ mask |= (1 << (p[i] & 0x1F));
+ if (p[i] == p[mlast])
+ skip = mlast - i - 1;
+ }
+ /* process pattern[-1] outside the loop */
+ mask |= (1 << (p[mlast] & 0x1F));
+
+ for (i = 0; i <= w; i++) {
+ /* note: using mlast in the skip path slows things down on x86 */
+ if (s[i+m-1] == p[m-1]) {
+ /* candidate match */
+ for (j = 0; j < mlast; j++)
+ if (s[i+j] != p[j])
+ break;
+ if (j == mlast) {
+ /* got a match! */
+ if (mode != FAST_COUNT)
+ return i;
+ count++;
+ i = i + mlast;
+ continue;
+ }
+ /* miss: check if next character is part of pattern */
+ if (!(mask & (1 << (s[i+m] & 0x1F))))
+ i = i + m;
+ else
+ i = i + skip;
+ } else {
+ /* skip: check if next character is part of pattern */
+ if (!(mask & (1 << (s[i+m] & 0x1F))))
+ i = i + m;
+ }
+ }
+
+ if (mode != FAST_COUNT)
+ return -1;
+ return count;
+}
+
+#endif
+
+/*
+Local variables:
+c-basic-offset: 4
+indent-tabs-mode: nil
+End:
+*/
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
new file mode 100644
index 0000000..4cea2db
--- /dev/null
+++ b/Objects/stringlib/find.h
@@ -0,0 +1,112 @@
+/* stringlib: find/index implementation */
+
+#ifndef STRINGLIB_FIND_H
+#define STRINGLIB_FIND_H
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(Py_ssize_t)
+stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+ const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+ Py_ssize_t offset)
+{
+ Py_ssize_t pos;
+
+ if (sub_len == 0)
+ return offset;
+
+ pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH);
+
+ if (pos >= 0)
+ pos += offset;
+
+ return pos;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+ const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+ Py_ssize_t offset)
+{
+ Py_ssize_t pos;
+
+ /* XXX - create reversefastsearch helper! */
+ if (sub_len == 0)
+ pos = str_len + offset;
+ else {
+ Py_ssize_t j;
+ pos = -1;
+ for (j = str_len - sub_len; j >= 0; --j)
+ if (STRINGLIB_CMP(str+j, sub, sub_len) == 0) {
+ pos = j + offset;
+ break;
+ }
+ }
+
+ return pos;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+ const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+ Py_ssize_t start, Py_ssize_t end)
+{
+ if (start < 0)
+ start += str_len;
+ if (start < 0)
+ start = 0;
+ if (end > str_len)
+ end = str_len;
+ if (end < 0)
+ end += str_len;
+ if (end < 0)
+ end = 0;
+
+ return stringlib_find(
+ str + start, end - start,
+ sub, sub_len, start
+ );
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+ const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+ Py_ssize_t start, Py_ssize_t end)
+{
+ if (start < 0)
+ start += str_len;
+ if (start < 0)
+ start = 0;
+ if (end > str_len)
+ end = str_len;
+ if (end < 0)
+ end += str_len;
+ if (end < 0)
+ end = 0;
+
+ return stringlib_rfind(str + start, end - start, sub, sub_len, start);
+}
+
+#ifdef STRINGLIB_STR
+
+Py_LOCAL_INLINE(int)
+stringlib_contains_obj(PyObject* str, PyObject* sub)
+{
+ return stringlib_find(
+ STRINGLIB_STR(str), STRINGLIB_LEN(str),
+ STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
+ ) != -1;
+}
+
+#endif /* STRINGLIB_STR */
+
+#endif /* STRINGLIB_FIND_H */
+
+/*
+Local variables:
+c-basic-offset: 4
+indent-tabs-mode: nil
+End:
+*/
diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h
new file mode 100644
index 0000000..1486347
--- /dev/null
+++ b/Objects/stringlib/partition.h
@@ -0,0 +1,111 @@
+/* stringlib: partition implementation */
+
+#ifndef STRINGLIB_PARTITION_H
+#define STRINGLIB_PARTITION_H
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(PyObject*)
+stringlib_partition(
+ PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+ PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len
+ )
+{
+ PyObject* out;
+ Py_ssize_t pos;
+
+ if (sep_len == 0) {
+ PyErr_SetString(PyExc_ValueError, "empty separator");
+ return NULL;
+ }
+
+ out = PyTuple_New(3);
+ if (!out)
+ return NULL;
+
+ pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH);
+
+ if (pos < 0) {
+ Py_INCREF(str_obj);
+ PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
+ Py_INCREF(STRINGLIB_EMPTY);
+ PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
+ Py_INCREF(STRINGLIB_EMPTY);
+ PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
+ return out;
+ }
+
+ PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
+ Py_INCREF(sep_obj);
+ PyTuple_SET_ITEM(out, 1, sep_obj);
+ pos += sep_len;
+ PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
+
+ if (PyErr_Occurred()) {
+ Py_DECREF(out);
+ return NULL;
+ }
+
+ return out;
+}
+
+Py_LOCAL_INLINE(PyObject*)
+stringlib_rpartition(
+ PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+ PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len
+ )
+{
+ PyObject* out;
+ Py_ssize_t pos, j;
+
+ if (sep_len == 0) {
+ PyErr_SetString(PyExc_ValueError, "empty separator");
+ return NULL;
+ }
+
+ out = PyTuple_New(3);
+ if (!out)
+ return NULL;
+
+ /* XXX - create reversefastsearch helper! */
+ pos = -1;
+ for (j = str_len - sep_len; j >= 0; --j)
+ if (STRINGLIB_CMP(str+j, sep, sep_len) == 0) {
+ pos = j;
+ break;
+ }
+
+ if (pos < 0) {
+ Py_INCREF(str_obj);
+ PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
+ Py_INCREF(STRINGLIB_EMPTY);
+ PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
+ Py_INCREF(STRINGLIB_EMPTY);
+ PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
+ return out;
+ }
+
+ PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
+ Py_INCREF(sep_obj);
+ PyTuple_SET_ITEM(out, 1, sep_obj);
+ pos += sep_len;
+ PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
+
+ if (PyErr_Occurred()) {
+ Py_DECREF(out);
+ return NULL;
+ }
+
+ return out;
+}
+
+#endif
+
+/*
+Local variables:
+c-basic-offset: 4
+indent-tabs-mode: nil
+End:
+*/
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index b34dcb2..110c38e 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -1,6 +1,7 @@
/* String object implementation */
#define PY_SSIZE_T_CLEAN
+
#include "Python.h"
#include <ctype.h>
@@ -176,14 +177,11 @@ PyString_FromFormatV(const char *format, va_list vargs)
while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
;
- /* skip the 'l' in %ld, since it doesn't change the
- width. although only %d is supported (see
- "expand" section below), others can be easily
- added */
- if (*f == 'l' && *(f+1) == 'd')
- ++f;
- /* likewise for %zd */
- if (*f == 'z' && *(f+1) == 'd')
+ /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
+ * they don't affect the amount of space we reserve.
+ */
+ if ((*f == 'l' || *f == 'z') &&
+ (f[1] == 'd' || f[1] == 'u'))
++f;
switch (*f) {
@@ -193,7 +191,7 @@ PyString_FromFormatV(const char *format, va_list vargs)
case '%':
n++;
break;
- case 'd': case 'i': case 'x':
+ case 'd': case 'u': case 'i': case 'x':
(void) va_arg(count, int);
/* 20 bytes is enough to hold a 64-bit
integer. Decimal takes the most space.
@@ -255,14 +253,14 @@ PyString_FromFormatV(const char *format, va_list vargs)
}
while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
f++;
- /* handle the long flag, but only for %ld. others
- can be added when necessary. */
- if (*f == 'l' && *(f+1) == 'd') {
+ /* handle the long flag, but only for %ld and %lu.
+ others can be added when necessary. */
+ if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
longflag = 1;
++f;
}
/* handle the size_t flag. */
- if (*f == 'z' && *(f+1) == 'd') {
+ if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
size_tflag = 1;
++f;
}
@@ -275,10 +273,22 @@ PyString_FromFormatV(const char *format, va_list vargs)
if (longflag)
sprintf(s, "%ld", va_arg(vargs, long));
else if (size_tflag)
+ sprintf(s, "%" PY_FORMAT_SIZE_T "d",
+ va_arg(vargs, Py_ssize_t));
+ else
+ sprintf(s, "%d", va_arg(vargs, int));
+ s += strlen(s);
+ break;
+ case 'u':
+ if (longflag)
+ sprintf(s, "%lu",
+ va_arg(vargs, unsigned long));
+ else if (size_tflag)
sprintf(s, "%" PY_FORMAT_SIZE_T "u",
va_arg(vargs, size_t));
else
- sprintf(s, "%d", va_arg(vargs, int));
+ sprintf(s, "%u",
+ va_arg(vargs, unsigned int));
s += strlen(s);
break;
case 'i':
@@ -680,6 +690,9 @@ PyObject *PyString_DecodeEscape(const char *s,
return NULL;
}
+/* -------------------------------------------------------------------- */
+/* object api */
+
static Py_ssize_t
string_getsize(register PyObject *op)
{
@@ -754,8 +767,25 @@ PyString_AsStringAndSize(register PyObject *obj,
return 0;
}
+/* -------------------------------------------------------------------- */
/* Methods */
+#define STRINGLIB_CHAR char
+
+#define STRINGLIB_CMP memcmp
+#define STRINGLIB_LEN PyString_GET_SIZE
+#define STRINGLIB_NEW PyString_FromStringAndSize
+#define STRINGLIB_STR PyString_AS_STRING
+
+#define STRINGLIB_EMPTY nullstring
+
+#include "stringlib/fastsearch.h"
+
+#include "stringlib/count.h"
+#include "stringlib/find.h"
+#include "stringlib/partition.h"
+
+
static int
string_print(PyStringObject *op, FILE *fp, int flags)
{
@@ -900,7 +930,7 @@ string_length(PyStringObject *a)
static PyObject *
string_concat(register PyStringObject *a, register PyObject *bb)
{
- register size_t size;
+ register Py_ssize_t size;
register PyStringObject *op;
if (!PyString_Check(bb)) {
#ifdef Py_USING_UNICODE
@@ -924,7 +954,12 @@ string_concat(register PyStringObject *a, register PyObject *bb)
return (PyObject *)a;
}
size = a->ob_size + b->ob_size;
- /* XXX check overflow */
+ if (size < 0) {
+ PyErr_SetString(PyExc_OverflowError,
+ "strings are too large to concat");
+ return NULL;
+ }
+
/* Inline PyObject_NewVar */
op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
if (op == NULL)
@@ -1017,65 +1052,36 @@ string_slice(register PyStringObject *a, register Py_ssize_t i,
}
static int
-string_contains(PyObject *a, PyObject *el)
+string_contains(PyObject *str_obj, PyObject *sub_obj)
{
- char *s = PyString_AS_STRING(a);
- const char *sub = PyString_AS_STRING(el);
- char *last;
- Py_ssize_t len_sub = PyString_GET_SIZE(el);
- Py_ssize_t shortsub;
- char firstchar, lastchar;
-
- if (!PyString_CheckExact(el)) {
+ if (!PyString_CheckExact(sub_obj)) {
#ifdef Py_USING_UNICODE
- if (PyUnicode_Check(el))
- return PyUnicode_Contains(a, el);
+ if (PyUnicode_Check(sub_obj))
+ return PyUnicode_Contains(str_obj, sub_obj);
#endif
- if (!PyString_Check(el)) {
+ if (!PyString_Check(sub_obj)) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires string as left operand");
return -1;
}
}
- if (len_sub == 0)
- return 1;
- /* last points to one char beyond the start of the rightmost
- substring. When s<last, there is still room for a possible match
- and s[0] through s[len_sub-1] will be in bounds.
- shortsub is len_sub minus the last character which is checked
- separately just before the memcmp(). That check helps prevent
- false starts and saves the setup time for memcmp().
- */
- firstchar = sub[0];
- shortsub = len_sub - 1;
- lastchar = sub[shortsub];
- last = s + PyString_GET_SIZE(a) - len_sub + 1;
- while (s < last) {
- s = (char *)memchr(s, firstchar, last-s);
- if (s == NULL)
- return 0;
- assert(s < last);
- if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
- return 1;
- s++;
- }
- return 0;
+ return stringlib_contains_obj(str_obj, sub_obj);
}
static PyObject *
string_item(PyStringObject *a, register Py_ssize_t i)
{
+ char pchar;
PyObject *v;
- char *pchar;
if (i < 0 || i >= a->ob_size) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL;
}
- pchar = a->ob_sval + i;
- v = (PyObject *)characters[*pchar & UCHAR_MAX];
+ pchar = a->ob_sval[i];
+ v = (PyObject *)characters[pchar & UCHAR_MAX];
if (v == NULL)
- v = PyString_FromStringAndSize(pchar, 1);
+ v = PyString_FromStringAndSize(&pchar, 1);
else {
#ifdef COUNT_ALLOCS
one_strings++;
@@ -1151,9 +1157,8 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
int
_PyString_Eq(PyObject *o1, PyObject *o2)
{
- PyStringObject *a, *b;
- a = (PyStringObject*)o1;
- b = (PyStringObject*)o2;
+ PyStringObject *a = (PyStringObject*) o1;
+ PyStringObject *b = (PyStringObject*) o2;
return a->ob_size == b->ob_size
&& *a->ob_sval == *b->ob_sval
&& memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
@@ -1308,6 +1313,27 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
+
+/* Don't call if length < 2 */
+#define Py_STRING_MATCH(target, offset, pattern, length) \
+ (target[offset] == pattern[0] && \
+ target[offset+length-1] == pattern[length-1] && \
+ !memcmp(target+offset+1, pattern+1, length-2) )
+
+
+/* Overallocate the initial list to reduce the number of reallocs for small
+ split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
+ resizes, to sizes 4, 8, then 16. Most observed string splits are for human
+ text (roughly 11 words per line) and field delimited data (usually 1-10
+ fields). For large strings the split algorithms are bandwidth limited
+ so increasing the preallocation likely will not improve things.*/
+
+#define MAX_PREALLOC 12
+
+/* 5 splits gives 6 elements */
+#define PREALLOC_SIZE(maxsplit) \
+ (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
+
#define SPLIT_APPEND(data, left, right) \
str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \
@@ -1320,74 +1346,90 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
else \
Py_DECREF(str);
-#define SPLIT_INSERT(data, left, right) \
+#define SPLIT_ADD(data, left, right) { \
str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
- if (PyList_Insert(list, 0, str)) { \
- Py_DECREF(str); \
- goto onError; \
+ if (count < MAX_PREALLOC) { \
+ PyList_SET_ITEM(list, count, str); \
+ } else { \
+ if (PyList_Append(list, str)) { \
+ Py_DECREF(str); \
+ goto onError; \
+ } \
+ else \
+ Py_DECREF(str); \
} \
- else \
- Py_DECREF(str);
+ count++; }
-static PyObject *
+/* Always force the list to the expected size. */
+#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
+
+#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
+#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
+#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
+#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
+
+Py_LOCAL_INLINE(PyObject *)
split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
{
- Py_ssize_t i, j;
+ Py_ssize_t i, j, count=0;
PyObject *str;
- PyObject *list = PyList_New(0);
+ PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
- for (i = j = 0; i < len; ) {
- while (i < len && isspace(Py_CHARMASK(s[i])))
- i++;
- j = i;
- while (i < len && !isspace(Py_CHARMASK(s[i])))
- i++;
- if (j < i) {
- if (maxsplit-- <= 0)
- break;
- SPLIT_APPEND(s, j, i);
- while (i < len && isspace(Py_CHARMASK(s[i])))
- i++;
- j = i;
- }
+ i = j = 0;
+
+ while (maxsplit-- > 0) {
+ SKIP_SPACE(s, i, len);
+ if (i==len) break;
+ j = i; i++;
+ SKIP_NONSPACE(s, i, len);
+ SPLIT_ADD(s, j, i);
}
- if (j < len) {
- SPLIT_APPEND(s, j, len);
+
+ if (i < len) {
+ /* Only occurs when maxsplit was reached */
+ /* Skip any remaining whitespace and copy to end of string */
+ SKIP_SPACE(s, i, len);
+ if (i != len)
+ SPLIT_ADD(s, i, len);
}
+ FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
-static PyObject *
+Py_LOCAL_INLINE(PyObject *)
split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
- register Py_ssize_t i, j;
+ register Py_ssize_t i, j, count=0;
PyObject *str;
- PyObject *list = PyList_New(0);
+ PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
- for (i = j = 0; i < len; ) {
- if (s[i] == ch) {
- if (maxcount-- <= 0)
+ i = j = 0;
+ while ((j < len) && (maxcount-- > 0)) {
+ for(; j<len; j++) {
+ /* I found that using memchr makes no difference */
+ if (s[j] == ch) {
+ SPLIT_ADD(s, i, j);
+ i = j = j + 1;
break;
- SPLIT_APPEND(s, j, i);
- i = j = i + 1;
- } else
- i++;
+ }
+ }
}
- if (j <= len) {
- SPLIT_APPEND(s, j, len);
+ if (i <= len) {
+ SPLIT_ADD(s, i, len);
}
+ FIX_PREALLOC_SIZE(list);
return list;
onError:
@@ -1407,10 +1449,12 @@ static PyObject *
string_split(PyStringObject *self, PyObject *args)
{
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
- int err;
- Py_ssize_t maxsplit = -1;
+ Py_ssize_t maxsplit = -1, count=0;
const char *s = PyString_AS_STRING(self), *sub;
- PyObject *list, *item, *subobj = Py_None;
+ PyObject *list, *str, *subobj = Py_None;
+#ifdef USE_FAST
+ Py_ssize_t pos;
+#endif
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
return NULL;
@@ -1436,98 +1480,166 @@ string_split(PyStringObject *self, PyObject *args)
else if (n == 1)
return split_char(s, len, sub[0], maxsplit);
- list = PyList_New(0);
+ list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
+#ifdef USE_FAST
i = j = 0;
- while (i+n <= len) {
- if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
- if (maxsplit-- <= 0)
+ while (maxsplit-- > 0) {
+ pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
+ if (pos < 0)
+ break;
+ j = i+pos;
+ SPLIT_ADD(s, i, j);
+ i = j + n;
+
+ }
+#else
+ i = j = 0;
+ while ((j+n <= len) && (maxsplit-- > 0)) {
+ for (; j+n <= len; j++) {
+ if (Py_STRING_MATCH(s, j, sub, n)) {
+ SPLIT_ADD(s, i, j);
+ i = j = j + n;
break;
- item = PyString_FromStringAndSize(s+j, i-j);
- if (item == NULL)
- goto fail;
- err = PyList_Append(list, item);
- Py_DECREF(item);
- if (err < 0)
- goto fail;
- i = j = i + n;
+ }
}
- else
- i++;
}
- item = PyString_FromStringAndSize(s+j, len-j);
- if (item == NULL)
- goto fail;
- err = PyList_Append(list, item);
- Py_DECREF(item);
- if (err < 0)
- goto fail;
-
+#endif
+ SPLIT_ADD(s, i, len);
+ FIX_PREALLOC_SIZE(list);
return list;
- fail:
+ onError:
Py_DECREF(list);
return NULL;
}
+PyDoc_STRVAR(partition__doc__,
+"S.partition(sep) -> (head, sep, tail)\n\
+\n\
+Searches for the separator sep in S, and returns the part before it,\n\
+the separator itself, and the part after it. If the separator is not\n\
+found, returns S and two empty strings.");
+
+static PyObject *
+string_partition(PyStringObject *self, PyObject *sep_obj)
+{
+ const char *sep;
+ Py_ssize_t sep_len;
+
+ if (PyString_Check(sep_obj)) {
+ sep = PyString_AS_STRING(sep_obj);
+ sep_len = PyString_GET_SIZE(sep_obj);
+ }
+#ifdef Py_USING_UNICODE
+ else if (PyUnicode_Check(sep_obj))
+ return PyUnicode_Partition((PyObject *) self, sep_obj);
+#endif
+ else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
+ return NULL;
+
+ return stringlib_partition(
+ (PyObject*) self,
+ PyString_AS_STRING(self), PyString_GET_SIZE(self),
+ sep_obj, sep, sep_len
+ );
+}
+
+PyDoc_STRVAR(rpartition__doc__,
+"S.rpartition(sep) -> (head, sep, tail)\n\
+\n\
+Searches for the separator sep in S, starting at the end of S, and returns\n\
+the part before it, the separator itself, and the part after it. If the\n\
+separator is not found, returns S and two empty strings.");
+
static PyObject *
+string_rpartition(PyStringObject *self, PyObject *sep_obj)
+{
+ const char *sep;
+ Py_ssize_t sep_len;
+
+ if (PyString_Check(sep_obj)) {
+ sep = PyString_AS_STRING(sep_obj);
+ sep_len = PyString_GET_SIZE(sep_obj);
+ }
+#ifdef Py_USING_UNICODE
+ else if (PyUnicode_Check(sep_obj))
+ return PyUnicode_Partition((PyObject *) self, sep_obj);
+#endif
+ else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
+ return NULL;
+
+ return stringlib_rpartition(
+ (PyObject*) self,
+ PyString_AS_STRING(self), PyString_GET_SIZE(self),
+ sep_obj, sep, sep_len
+ );
+}
+
+Py_LOCAL_INLINE(PyObject *)
rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
{
- Py_ssize_t i, j;
+ Py_ssize_t i, j, count=0;
PyObject *str;
- PyObject *list = PyList_New(0);
+ PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
- for (i = j = len - 1; i >= 0; ) {
- while (i >= 0 && isspace(Py_CHARMASK(s[i])))
- i--;
- j = i;
- while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
- i--;
- if (j > i) {
- if (maxsplit-- <= 0)
- break;
- SPLIT_INSERT(s, i + 1, j + 1);
- while (i >= 0 && isspace(Py_CHARMASK(s[i])))
- i--;
- j = i;
- }
- }
- if (j >= 0) {
- SPLIT_INSERT(s, 0, j + 1);
- }
+ i = j = len-1;
+
+ while (maxsplit-- > 0) {
+ RSKIP_SPACE(s, i);
+ if (i<0) break;
+ j = i; i--;
+ RSKIP_NONSPACE(s, i);
+ SPLIT_ADD(s, i + 1, j + 1);
+ }
+ if (i >= 0) {
+ /* Only occurs when maxsplit was reached */
+ /* Skip any remaining whitespace and copy to beginning of string */
+ RSKIP_SPACE(s, i);
+ if (i >= 0)
+ SPLIT_ADD(s, 0, i + 1);
+
+ }
+ FIX_PREALLOC_SIZE(list);
+ if (PyList_Reverse(list) < 0)
+ goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
-static PyObject *
+Py_LOCAL_INLINE(PyObject *)
rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
- register Py_ssize_t i, j;
+ register Py_ssize_t i, j, count=0;
PyObject *str;
- PyObject *list = PyList_New(0);
+ PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
- for (i = j = len - 1; i >= 0; ) {
- if (s[i] == ch) {
- if (maxcount-- <= 0)
+ i = j = len - 1;
+ while ((i >= 0) && (maxcount-- > 0)) {
+ for (; i >= 0; i--) {
+ if (s[i] == ch) {
+ SPLIT_ADD(s, i + 1, j + 1);
+ j = i = i - 1;
break;
- SPLIT_INSERT(s, i + 1, j + 1);
- j = i = i - 1;
- } else
- i--;
+ }
+ }
}
if (j >= -1) {
- SPLIT_INSERT(s, 0, j + 1);
+ SPLIT_ADD(s, 0, j + 1);
}
+ FIX_PREALLOC_SIZE(list);
+ if (PyList_Reverse(list) < 0)
+ goto onError;
return list;
onError:
@@ -1548,10 +1660,9 @@ static PyObject *
string_rsplit(PyStringObject *self, PyObject *args)
{
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
- int err;
- Py_ssize_t maxsplit = -1;
+ Py_ssize_t maxsplit = -1, count=0;
const char *s = PyString_AS_STRING(self), *sub;
- PyObject *list, *item, *subobj = Py_None;
+ PyObject *list, *str, *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
return NULL;
@@ -1577,40 +1688,30 @@ string_rsplit(PyStringObject *self, PyObject *args)
else if (n == 1)
return rsplit_char(s, len, sub[0], maxsplit);
- list = PyList_New(0);
+ list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
j = len;
i = j - n;
- while (i >= 0) {
- if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
- if (maxsplit-- <= 0)
+
+ while ( (i >= 0) && (maxsplit-- > 0) ) {
+ for (; i>=0; i--) {
+ if (Py_STRING_MATCH(s, i, sub, n)) {
+ SPLIT_ADD(s, i + n, j);
+ j = i;
+ i -= n;
break;
- item = PyString_FromStringAndSize(s+i+n, j-i-n);
- if (item == NULL)
- goto fail;
- err = PyList_Insert(list, 0, item);
- Py_DECREF(item);
- if (err < 0)
- goto fail;
- j = i;
- i -= n;
+ }
}
- else
- i--;
}
- item = PyString_FromStringAndSize(s, j);
- if (item == NULL)
- goto fail;
- err = PyList_Insert(list, 0, item);
- Py_DECREF(item);
- if (err < 0)
- goto fail;
-
+ SPLIT_ADD(s, 0, j);
+ FIX_PREALLOC_SIZE(list);
+ if (PyList_Reverse(list) < 0)
+ goto onError;
return list;
- fail:
+onError:
Py_DECREF(list);
return NULL;
}
@@ -1727,7 +1828,7 @@ _PyString_Join(PyObject *sep, PyObject *x)
return string_join((PyStringObject *)sep, x);
}
-static void
+Py_LOCAL_INLINE(void)
string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
{
if (*end > len)
@@ -1742,50 +1843,38 @@ string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
*start = 0;
}
-static Py_ssize_t
+Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject *self, PyObject *args, int dir)
{
- const char *s = PyString_AS_STRING(self), *sub;
- Py_ssize_t len = PyString_GET_SIZE(self);
- Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
PyObject *subobj;
+ const char *sub;
+ Py_ssize_t sub_len;
+ Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
/* XXX ssize_t i */
- if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
- &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
+ if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
+ _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return -2;
if (PyString_Check(subobj)) {
sub = PyString_AS_STRING(subobj);
- n = PyString_GET_SIZE(subobj);
+ sub_len = PyString_GET_SIZE(subobj);
}
#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj))
- return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
+ return PyUnicode_Find(
+ (PyObject *)self, subobj, start, end, dir);
#endif
- else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+ else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
return -2;
- string_adjust_indices(&i, &last, len);
-
- if (dir > 0) {
- if (n == 0 && i <= last)
- return (long)i;
- last -= n;
- for (; i <= last; ++i)
- if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
- return (long)i;
- }
- else {
- Py_ssize_t j;
-
- if (n == 0 && i <= last)
- return last;
- for (j = last-n; j >= i; --j)
- if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
- return j;
- }
-
- return -1;
+ if (dir > 0)
+ return stringlib_find_slice(
+ PyString_AS_STRING(self), PyString_GET_SIZE(self),
+ sub, sub_len, start, end);
+ else
+ return stringlib_rfind_slice(
+ PyString_AS_STRING(self), PyString_GET_SIZE(self),
+ sub, sub_len, start, end);
}
@@ -1867,7 +1956,7 @@ string_rindex(PyStringObject *self, PyObject *args)
}
-static PyObject *
+Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
{
char *s = PyString_AS_STRING(self);
@@ -1900,7 +1989,7 @@ do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
}
-static PyObject *
+Py_LOCAL_INLINE(PyObject *)
do_strip(PyStringObject *self, int striptype)
{
char *s = PyString_AS_STRING(self);
@@ -1930,7 +2019,7 @@ do_strip(PyStringObject *self, int striptype)
}
-static PyObject *
+Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyStringObject *self, int striptype, PyObject *args)
{
PyObject *sep = NULL;
@@ -2024,57 +2113,68 @@ PyDoc_STRVAR(lower__doc__,
\n\
Return a copy of the string S converted to lowercase.");
+/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
+#ifndef _tolower
+#define _tolower tolower
+#endif
+
static PyObject *
string_lower(PyStringObject *self)
{
- char *s = PyString_AS_STRING(self), *s_new;
+ char *s;
Py_ssize_t i, n = PyString_GET_SIZE(self);
PyObject *newobj;
newobj = PyString_FromStringAndSize(NULL, n);
- if (newobj == NULL)
+ if (!newobj)
return NULL;
- s_new = PyString_AsString(newobj);
+
+ s = PyString_AS_STRING(newobj);
+
+ memcpy(s, PyString_AS_STRING(self), n);
+
for (i = 0; i < n; i++) {
- int c = Py_CHARMASK(*s++);
- if (isupper(c)) {
- *s_new = tolower(c);
- } else
- *s_new = c;
- s_new++;
+ int c = Py_CHARMASK(s[i]);
+ if (isupper(c))
+ s[i] = _tolower(c);
}
+
return newobj;
}
-
PyDoc_STRVAR(upper__doc__,
"S.upper() -> string\n\
\n\
Return a copy of the string S converted to uppercase.");
+#ifndef _toupper
+#define _toupper toupper
+#endif
+
static PyObject *
string_upper(PyStringObject *self)
{
- char *s = PyString_AS_STRING(self), *s_new;
+ char *s;
Py_ssize_t i, n = PyString_GET_SIZE(self);
PyObject *newobj;
newobj = PyString_FromStringAndSize(NULL, n);
- if (newobj == NULL)
+ if (!newobj)
return NULL;
- s_new = PyString_AsString(newobj);
+
+ s = PyString_AS_STRING(newobj);
+
+ memcpy(s, PyString_AS_STRING(self), n);
+
for (i = 0; i < n; i++) {
- int c = Py_CHARMASK(*s++);
- if (islower(c)) {
- *s_new = toupper(c);
- } else
- *s_new = c;
- s_new++;
+ int c = Py_CHARMASK(s[i]);
+ if (islower(c))
+ s[i] = _toupper(c);
}
+
return newobj;
}
-
PyDoc_STRVAR(title__doc__,
"S.title() -> string\n\
\n\
@@ -2150,62 +2250,44 @@ string_capitalize(PyStringObject *self)
PyDoc_STRVAR(count__doc__,
"S.count(sub[, start[, end]]) -> int\n\
\n\
-Return the number of occurrences of substring sub in string\n\
-S[start:end]. Optional arguments start and end are\n\
-interpreted as in slice notation.");
+Return the number of non-overlapping occurrences of substring sub in\n\
+string S[start:end]. Optional arguments start and end are interpreted\n\
+as in slice notation.");
static PyObject *
string_count(PyStringObject *self, PyObject *args)
{
- const char *s = PyString_AS_STRING(self), *sub, *t;
- Py_ssize_t len = PyString_GET_SIZE(self), n;
- Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
- Py_ssize_t m, r;
- PyObject *subobj;
+ PyObject *sub_obj;
+ const char *str = PyString_AS_STRING(self), *sub;
+ Py_ssize_t sub_len;
+ Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
- if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
- _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
+ if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
+ _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- if (PyString_Check(subobj)) {
- sub = PyString_AS_STRING(subobj);
- n = PyString_GET_SIZE(subobj);
+ if (PyString_Check(sub_obj)) {
+ sub = PyString_AS_STRING(sub_obj);
+ sub_len = PyString_GET_SIZE(sub_obj);
}
#ifdef Py_USING_UNICODE
- else if (PyUnicode_Check(subobj)) {
+ else if (PyUnicode_Check(sub_obj)) {
Py_ssize_t count;
- count = PyUnicode_Count((PyObject *)self, subobj, i, last);
+ count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
if (count == -1)
return NULL;
else
- return PyInt_FromLong((long) count);
+ return PyInt_FromSsize_t(count);
}
#endif
- else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+ else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
return NULL;
- string_adjust_indices(&i, &last, len);
-
- m = last + 1 - n;
- if (n == 0)
- return PyInt_FromSsize_t(m-i);
+ string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
- r = 0;
- while (i < m) {
- if (!memcmp(s+i, sub, n)) {
- r++;
- i += n;
- } else {
- i++;
- }
- if (i >= m)
- break;
- t = (const char *)memchr(s+i, sub[0], m-i);
- if (t == NULL)
- break;
- i = t - s;
- }
- return PyInt_FromSsize_t(r);
+ return PyInt_FromSsize_t(
+ stringlib_count(str + start, end - start, sub, sub_len)
+ );
}
PyDoc_STRVAR(swapcase__doc__,
@@ -2359,156 +2441,616 @@ string_translate(PyStringObject *self, PyObject *args)
}
-/* What follows is used for implementing replace(). Perry Stoll. */
+#define FORWARD 1
+#define REVERSE -1
-/*
- mymemfind
+/* find and count characters and substrings */
- strstr replacement for arbitrary blocks of memory.
+#define findchar(target, target_len, c) \
+ ((char *)memchr((const void *)(target), c, target_len))
- Locates the first occurrence in the memory pointed to by MEM of the
- contents of memory pointed to by PAT. Returns the index into MEM if
- found, or -1 if not found. If len of PAT is greater than length of
- MEM, the function returns -1.
-*/
-static Py_ssize_t
-mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
+/* String ops must return a string. */
+/* If the object is subclass of string, create a copy */
+Py_LOCAL(PyStringObject *)
+return_self(PyStringObject *self)
{
- register Py_ssize_t ii;
+ if (PyString_CheckExact(self)) {
+ Py_INCREF(self);
+ return self;
+ }
+ return (PyStringObject *)PyString_FromStringAndSize(
+ PyString_AS_STRING(self),
+ PyString_GET_SIZE(self));
+}
- /* pattern can not occur in the last pat_len-1 chars */
- len -= pat_len;
+Py_LOCAL_INLINE(Py_ssize_t)
+countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
+{
+ Py_ssize_t count=0;
+ char *start=target;
+ char *end=target+target_len;
- for (ii = 0; ii <= len; ii++) {
- if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
- return ii;
- }
+ while ( (start=findchar(start, end-start, c)) != NULL ) {
+ count++;
+ if (count >= maxcount)
+ break;
+ start += 1;
+ }
+ return count;
+}
+
+Py_LOCAL(Py_ssize_t)
+findstring(char *target, Py_ssize_t target_len,
+ char *pattern, Py_ssize_t pattern_len,
+ Py_ssize_t start,
+ Py_ssize_t end,
+ int direction)
+{
+ if (start < 0) {
+ start += target_len;
+ if (start < 0)
+ start = 0;
+ }
+ if (end > target_len) {
+ end = target_len;
+ } else if (end < 0) {
+ end += target_len;
+ if (end < 0)
+ end = 0;
+ }
+
+ /* zero-length substrings always match at the first attempt */
+ if (pattern_len == 0)
+ return (direction > 0) ? start : end;
+
+ end -= pattern_len;
+
+ if (direction < 0) {
+ for (; end >= start; end--)
+ if (Py_STRING_MATCH(target, end, pattern, pattern_len))
+ return end;
+ } else {
+ for (; start <= end; start++)
+ if (Py_STRING_MATCH(target, start, pattern, pattern_len))
+ return start;
}
return -1;
}
-/*
- mymemcnt
+Py_LOCAL_INLINE(Py_ssize_t)
+countstring(char *target, Py_ssize_t target_len,
+ char *pattern, Py_ssize_t pattern_len,
+ Py_ssize_t start,
+ Py_ssize_t end,
+ int direction, Py_ssize_t maxcount)
+{
+ Py_ssize_t count=0;
+
+ if (start < 0) {
+ start += target_len;
+ if (start < 0)
+ start = 0;
+ }
+ if (end > target_len) {
+ end = target_len;
+ } else if (end < 0) {
+ end += target_len;
+ if (end < 0)
+ end = 0;
+ }
+
+ /* zero-length substrings match everywhere */
+ if (pattern_len == 0 || maxcount == 0) {
+ if (target_len+1 < maxcount)
+ return target_len+1;
+ return maxcount;
+ }
+
+ end -= pattern_len;
+ if (direction < 0) {
+ for (; (end >= start); end--)
+ if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
+ count++;
+ if (--maxcount <= 0) break;
+ end -= pattern_len-1;
+ }
+ } else {
+ for (; (start <= end); start++)
+ if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
+ count++;
+ if (--maxcount <= 0)
+ break;
+ start += pattern_len-1;
+ }
+ }
+ return count;
+}
- Return the number of distinct times PAT is found in MEM.
- meaning mem=1111 and pat==11 returns 2.
- mem=11111 and pat==11 also return 2.
- */
-static Py_ssize_t
-mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
+
+/* Algorithms for different cases of string replacement */
+
+/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
+Py_LOCAL(PyStringObject *)
+replace_interleave(PyStringObject *self,
+ PyStringObject *to,
+ Py_ssize_t maxcount)
{
- register Py_ssize_t offset = 0;
- Py_ssize_t nfound = 0;
+ char *self_s, *to_s, *result_s;
+ Py_ssize_t self_len, to_len, result_len;
+ Py_ssize_t count, i, product;
+ PyStringObject *result;
+
+ self_len = PyString_GET_SIZE(self);
+ to_len = PyString_GET_SIZE(to);
+
+ /* 1 at the end plus 1 after every character */
+ count = self_len+1;
+ if (maxcount < count)
+ count = maxcount;
+
+ /* Check for overflow */
+ /* result_len = count * to_len + self_len; */
+ product = count * to_len;
+ if (product / to_len != count) {
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
+ return NULL;
+ }
+ result_len = product + self_len;
+ if (result_len < 0) {
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
+ return NULL;
+ }
+
+ if (! (result = (PyStringObject *)
+ PyString_FromStringAndSize(NULL, result_len)) )
+ return NULL;
- while (len >= 0) {
- offset = mymemfind(mem, len, pat, pat_len);
- if (offset == -1)
- break;
- mem += offset + pat_len;
- len -= offset + pat_len;
- nfound++;
+ self_s = PyString_AS_STRING(self);
+ to_s = PyString_AS_STRING(to);
+ to_len = PyString_GET_SIZE(to);
+ result_s = PyString_AS_STRING(result);
+
+ /* TODO: special case single character, which doesn't need memcpy */
+
+ /* Lay the first one down (guaranteed this will occur) */
+ memcpy(result_s, to_s, to_len);
+ result_s += to_len;
+ count -= 1;
+
+ for (i=0; i<count; i++) {
+ *result_s++ = *self_s++;
+ memcpy(result_s, to_s, to_len);
+ result_s += to_len;
}
- return nfound;
+
+ /* Copy the rest of the original string */
+ memcpy(result_s, self_s, self_len-i);
+
+ return result;
}
-/*
- mymemreplace
+/* Special case for deleting a single character */
+/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
+Py_LOCAL(PyStringObject *)
+replace_delete_single_character(PyStringObject *self,
+ char from_c, Py_ssize_t maxcount)
+{
+ char *self_s, *result_s;
+ char *start, *next, *end;
+ Py_ssize_t self_len, result_len;
+ Py_ssize_t count;
+ PyStringObject *result;
- Return a string in which all occurrences of PAT in memory STR are
- replaced with SUB.
+ self_len = PyString_GET_SIZE(self);
+ self_s = PyString_AS_STRING(self);
- If length of PAT is less than length of STR or there are no occurrences
- of PAT in STR, then the original string is returned. Otherwise, a new
- string is allocated here and returned.
+ count = countchar(self_s, self_len, from_c, maxcount);
+ if (count == 0) {
+ return return_self(self);
+ }
+
+ result_len = self_len - count; /* from_len == 1 */
+ assert(result_len>=0);
- on return, out_len is:
- the length of output string, or
- -1 if the input string is returned, or
- unchanged if an error occurs (no memory).
+ if ( (result = (PyStringObject *)
+ PyString_FromStringAndSize(NULL, result_len)) == NULL)
+ return NULL;
+ result_s = PyString_AS_STRING(result);
- return value is:
- the new string allocated locally, or
- NULL if an error occurred.
-*/
-static char *
-mymemreplace(const char *str, Py_ssize_t len, /* input string */
- const char *pat, Py_ssize_t pat_len, /* pattern string to find */
- const char *sub, Py_ssize_t sub_len, /* substitution string */
- Py_ssize_t count, /* number of replacements */
- Py_ssize_t *out_len)
+ start = self_s;
+ end = self_s + self_len;
+ while (count-- > 0) {
+ next = findchar(start, end-start, from_c);
+ if (next == NULL)
+ break;
+ memcpy(result_s, start, next-start);
+ result_s += (next-start);
+ start = next+1;
+ }
+ memcpy(result_s, start, end-start);
+
+ return result;
+}
+
+/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
+
+Py_LOCAL(PyStringObject *)
+replace_delete_substring(PyStringObject *self, PyStringObject *from,
+ Py_ssize_t maxcount) {
+ char *self_s, *from_s, *result_s;
+ char *start, *next, *end;
+ Py_ssize_t self_len, from_len, result_len;
+ Py_ssize_t count, offset;
+ PyStringObject *result;
+
+ self_len = PyString_GET_SIZE(self);
+ self_s = PyString_AS_STRING(self);
+ from_len = PyString_GET_SIZE(from);
+ from_s = PyString_AS_STRING(from);
+
+ count = countstring(self_s, self_len,
+ from_s, from_len,
+ 0, self_len, 1,
+ maxcount);
+
+ if (count == 0) {
+ /* no matches */
+ return return_self(self);
+ }
+
+ result_len = self_len - (count * from_len);
+ assert (result_len>=0);
+
+ if ( (result = (PyStringObject *)
+ PyString_FromStringAndSize(NULL, result_len)) == NULL )
+ return NULL;
+
+ result_s = PyString_AS_STRING(result);
+
+ start = self_s;
+ end = self_s + self_len;
+ while (count-- > 0) {
+ offset = findstring(start, end-start,
+ from_s, from_len,
+ 0, end-start, FORWARD);
+ if (offset == -1)
+ break;
+ next = start + offset;
+
+ memcpy(result_s, start, next-start);
+
+ result_s += (next-start);
+ start = next+from_len;
+ }
+ memcpy(result_s, start, end-start);
+ return result;
+}
+
+/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
+Py_LOCAL(PyStringObject *)
+replace_single_character_in_place(PyStringObject *self,
+ char from_c, char to_c,
+ Py_ssize_t maxcount)
{
- char *out_s;
- char *new_s;
- Py_ssize_t nfound, offset, new_len;
-
- if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
- goto return_same;
-
- /* find length of output string */
- nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
- if (count < 0)
- count = PY_SSIZE_T_MAX;
- else if (nfound > count)
- nfound = count;
- if (nfound == 0)
- goto return_same;
-
- new_len = len + nfound*(sub_len - pat_len);
- if (new_len == 0) {
- /* Have to allocate something for the caller to free(). */
- out_s = (char *)PyMem_MALLOC(1);
- if (out_s == NULL)
- return NULL;
- out_s[0] = '\0';
+ char *self_s, *result_s, *start, *end, *next;
+ Py_ssize_t self_len;
+ PyStringObject *result;
+
+ /* The result string will be the same size */
+ self_s = PyString_AS_STRING(self);
+ self_len = PyString_GET_SIZE(self);
+
+ next = findchar(self_s, self_len, from_c);
+
+ if (next == NULL) {
+ /* No matches; return the original string */
+ return return_self(self);
+ }
+
+ /* Need to make a new string */
+ result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
+ if (result == NULL)
+ return NULL;
+ result_s = PyString_AS_STRING(result);
+ memcpy(result_s, self_s, self_len);
+
+ /* change everything in-place, starting with this one */
+ start = result_s + (next-self_s);
+ *start = to_c;
+ start++;
+ end = result_s + self_len;
+
+ while (--maxcount > 0) {
+ next = findchar(start, end-start, from_c);
+ if (next == NULL)
+ break;
+ *next = to_c;
+ start = next+1;
}
- else {
- assert(new_len > 0);
- new_s = (char *)PyMem_MALLOC(new_len);
- if (new_s == NULL)
- return NULL;
- out_s = new_s;
+
+ return result;
+}
- if (pat_len > 0) {
- for (; nfound > 0; --nfound) {
- /* find index of next instance of pattern */
- offset = mymemfind(str, len, pat, pat_len);
- if (offset == -1)
- break;
+/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
+Py_LOCAL(PyStringObject *)
+replace_substring_in_place(PyStringObject *self,
+ PyStringObject *from,
+ PyStringObject *to,
+ Py_ssize_t maxcount)
+{
+ char *result_s, *start, *end;
+ char *self_s, *from_s, *to_s;
+ Py_ssize_t self_len, from_len, offset;
+ PyStringObject *result;
+
+ /* The result string will be the same size */
+
+ self_s = PyString_AS_STRING(self);
+ self_len = PyString_GET_SIZE(self);
+
+ from_s = PyString_AS_STRING(from);
+ from_len = PyString_GET_SIZE(from);
+ to_s = PyString_AS_STRING(to);
+
+ offset = findstring(self_s, self_len,
+ from_s, from_len,
+ 0, self_len, FORWARD);
+
+ if (offset == -1) {
+ /* No matches; return the original string */
+ return return_self(self);
+ }
+
+ /* Need to make a new string */
+ result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
+ if (result == NULL)
+ return NULL;
+ result_s = PyString_AS_STRING(result);
+ memcpy(result_s, self_s, self_len);
+
+
+ /* change everything in-place, starting with this one */
+ start = result_s + offset;
+ memcpy(start, to_s, from_len);
+ start += from_len;
+ end = result_s + self_len;
+
+ while ( --maxcount > 0) {
+ offset = findstring(start, end-start,
+ from_s, from_len,
+ 0, end-start, FORWARD);
+ if (offset==-1)
+ break;
+ memcpy(start+offset, to_s, from_len);
+ start += offset+from_len;
+ }
+
+ return result;
+}
- /* copy non matching part of input string */
- memcpy(new_s, str, offset);
- str += offset + pat_len;
- len -= offset + pat_len;
+/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
+Py_LOCAL(PyStringObject *)
+replace_single_character(PyStringObject *self,
+ char from_c,
+ PyStringObject *to,
+ Py_ssize_t maxcount)
+{
+ char *self_s, *to_s, *result_s;
+ char *start, *next, *end;
+ Py_ssize_t self_len, to_len, result_len;
+ Py_ssize_t count, product;
+ PyStringObject *result;
+
+ self_s = PyString_AS_STRING(self);
+ self_len = PyString_GET_SIZE(self);
+
+ count = countchar(self_s, self_len, from_c, maxcount);
+
+ if (count == 0) {
+ /* no matches, return unchanged */
+ return return_self(self);
+ }
+
+ to_s = PyString_AS_STRING(to);
+ to_len = PyString_GET_SIZE(to);
+
+ /* use the difference between current and new, hence the "-1" */
+ /* result_len = self_len + count * (to_len-1) */
+ product = count * (to_len-1);
+ if (product / (to_len-1) != count) {
+ PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ return NULL;
+ }
+ result_len = self_len + product;
+ if (result_len < 0) {
+ PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ return NULL;
+ }
+
+ if ( (result = (PyStringObject *)
+ PyString_FromStringAndSize(NULL, result_len)) == NULL)
+ return NULL;
+ result_s = PyString_AS_STRING(result);
+
+ start = self_s;
+ end = self_s + self_len;
+ while (count-- > 0) {
+ next = findchar(start, end-start, from_c);
+ if (next == NULL)
+ break;
+
+ if (next == start) {
+ /* replace with the 'to' */
+ memcpy(result_s, to_s, to_len);
+ result_s += to_len;
+ start += 1;
+ } else {
+ /* copy the unchanged old then the 'to' */
+ memcpy(result_s, start, next-start);
+ result_s += (next-start);
+ memcpy(result_s, to_s, to_len);
+ result_s += to_len;
+ start = next+1;
+ }
+ }
+ /* Copy the remainder of the remaining string */
+ memcpy(result_s, start, end-start);
+
+ return result;
+}
- /* copy substitute into the output string */
- new_s += offset;
- memcpy(new_s, sub, sub_len);
- new_s += sub_len;
- }
- /* copy any remaining values into output string */
- if (len > 0)
- memcpy(new_s, str, len);
+/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
+Py_LOCAL(PyStringObject *)
+replace_substring(PyStringObject *self,
+ PyStringObject *from,
+ PyStringObject *to,
+ Py_ssize_t maxcount) {
+ char *self_s, *from_s, *to_s, *result_s;
+ char *start, *next, *end;
+ Py_ssize_t self_len, from_len, to_len, result_len;
+ Py_ssize_t count, offset, product;
+ PyStringObject *result;
+
+ self_s = PyString_AS_STRING(self);
+ self_len = PyString_GET_SIZE(self);
+ from_s = PyString_AS_STRING(from);
+ from_len = PyString_GET_SIZE(from);
+
+ count = countstring(self_s, self_len,
+ from_s, from_len,
+ 0, self_len, FORWARD, maxcount);
+ if (count == 0) {
+ /* no matches, return unchanged */
+ return return_self(self);
+ }
+
+ to_s = PyString_AS_STRING(to);
+ to_len = PyString_GET_SIZE(to);
+
+ /* Check for overflow */
+ /* result_len = self_len + count * (to_len-from_len) */
+ product = count * (to_len-from_len);
+ if (product / (to_len-from_len) != count) {
+ PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ return NULL;
+ }
+ result_len = self_len + product;
+ if (result_len < 0) {
+ PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ return NULL;
+ }
+
+ if ( (result = (PyStringObject *)
+ PyString_FromStringAndSize(NULL, result_len)) == NULL)
+ return NULL;
+ result_s = PyString_AS_STRING(result);
+
+ start = self_s;
+ end = self_s + self_len;
+ while (count-- > 0) {
+ offset = findstring(start, end-start,
+ from_s, from_len,
+ 0, end-start, FORWARD);
+ if (offset == -1)
+ break;
+ next = start+offset;
+ if (next == start) {
+ /* replace with the 'to' */
+ memcpy(result_s, to_s, to_len);
+ result_s += to_len;
+ start += from_len;
+ } else {
+ /* copy the unchanged old then the 'to' */
+ memcpy(result_s, start, next-start);
+ result_s += (next-start);
+ memcpy(result_s, to_s, to_len);
+ result_s += to_len;
+ start = next+from_len;
}
- else {
- for (;;++str, --len) {
- memcpy(new_s, sub, sub_len);
- new_s += sub_len;
- if (--nfound <= 0) {
- memcpy(new_s, str, len);
- break;
- }
- *new_s++ = *str;
- }
+ }
+ /* Copy the remainder of the remaining string */
+ memcpy(result_s, start, end-start);
+
+ return result;
+}
+
+
+Py_LOCAL(PyStringObject *)
+replace(PyStringObject *self,
+ PyStringObject *from,
+ PyStringObject *to,
+ Py_ssize_t maxcount)
+{
+ Py_ssize_t from_len, to_len;
+
+ if (maxcount < 0) {
+ maxcount = PY_SSIZE_T_MAX;
+ } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
+ /* nothing to do; return the original string */
+ return return_self(self);
+ }
+
+ from_len = PyString_GET_SIZE(from);
+ to_len = PyString_GET_SIZE(to);
+
+ if (maxcount == 0 ||
+ (from_len == 0 && to_len == 0)) {
+ /* nothing to do; return the original string */
+ return return_self(self);
+ }
+
+ /* Handle zero-length special cases */
+
+ if (from_len == 0) {
+ /* insert the 'to' string everywhere. */
+ /* >>> "Python".replace("", ".") */
+ /* '.P.y.t.h.o.n.' */
+ return replace_interleave(self, to, maxcount);
+ }
+
+ /* Except for "".replace("", "A") == "A" there is no way beyond this */
+ /* point for an empty self string to generate a non-empty string */
+ /* Special case so the remaining code always gets a non-empty string */
+ if (PyString_GET_SIZE(self) == 0) {
+ return return_self(self);
+ }
+
+ if (to_len == 0) {
+ /* delete all occurances of 'from' string */
+ if (from_len == 1) {
+ return replace_delete_single_character(
+ self, PyString_AS_STRING(from)[0], maxcount);
+ } else {
+ return replace_delete_substring(self, from, maxcount);
}
}
- *out_len = new_len;
- return out_s;
- return_same:
- *out_len = -1;
- return (char *)str; /* cast away const */
-}
+ /* Handle special case where both strings have the same length */
+
+ if (from_len == to_len) {
+ if (from_len == 1) {
+ return replace_single_character_in_place(
+ self,
+ PyString_AS_STRING(from)[0],
+ PyString_AS_STRING(to)[0],
+ maxcount);
+ } else {
+ return replace_substring_in_place(
+ self, from, to, maxcount);
+ }
+ }
+ /* Otherwise use the more generic algorithms */
+ if (from_len == 1) {
+ return replace_single_character(self, PyString_AS_STRING(from)[0],
+ to, maxcount);
+ } else {
+ /* len('from')>=2, len('to')>=1 */
+ return replace_substring(self, from, to, maxcount);
+ }
+}
PyDoc_STRVAR(replace__doc__,
"S.replace (old, new[, count]) -> string\n\
@@ -2520,66 +3062,42 @@ given, only the first count occurrences are replaced.");
static PyObject *
string_replace(PyStringObject *self, PyObject *args)
{
- const char *str = PyString_AS_STRING(self), *sub, *repl;
- char *new_s;
- const Py_ssize_t len = PyString_GET_SIZE(self);
- Py_ssize_t sub_len, repl_len, out_len;
Py_ssize_t count = -1;
- PyObject *newobj;
- PyObject *subobj, *replobj;
+ PyObject *from, *to;
+ const char *tmp_s;
+ Py_ssize_t tmp_len;
- if (!PyArg_ParseTuple(args, "OO|n:replace",
- &subobj, &replobj, &count))
+ if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
return NULL;
- if (PyString_Check(subobj)) {
- sub = PyString_AS_STRING(subobj);
- sub_len = PyString_GET_SIZE(subobj);
+ if (PyString_Check(from)) {
+ /* Can this be made a '!check' after the Unicode check? */
}
#ifdef Py_USING_UNICODE
- else if (PyUnicode_Check(subobj))
+ if (PyUnicode_Check(from))
return PyUnicode_Replace((PyObject *)self,
- subobj, replobj, count);
+ from, to, count);
#endif
- else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
+ else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
return NULL;
- if (PyString_Check(replobj)) {
- repl = PyString_AS_STRING(replobj);
- repl_len = PyString_GET_SIZE(replobj);
+ if (PyString_Check(to)) {
+ /* Can this be made a '!check' after the Unicode check? */
}
#ifdef Py_USING_UNICODE
- else if (PyUnicode_Check(replobj))
+ else if (PyUnicode_Check(to))
return PyUnicode_Replace((PyObject *)self,
- subobj, replobj, count);
+ from, to, count);
#endif
- else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
+ else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
return NULL;
- new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
- if (new_s == NULL) {
- PyErr_NoMemory();
- return NULL;
- }
- if (out_len == -1) {
- if (PyString_CheckExact(self)) {
- /* we're returning another reference to self */
- newobj = (PyObject*)self;
- Py_INCREF(newobj);
- }
- else {
- newobj = PyString_FromStringAndSize(str, len);
- if (newobj == NULL)
- return NULL;
- }
- }
- else {
- newobj = PyString_FromStringAndSize(new_s, out_len);
- PyMem_FREE(new_s);
- }
- return newobj;
+ return (PyObject *)replace((PyStringObject *) self,
+ (PyStringObject *) from,
+ (PyStringObject *) to, count);
}
+/** End DALKE **/
PyDoc_STRVAR(startswith__doc__,
"S.startswith(prefix[, start[, end]]) -> bool\n\
@@ -2820,7 +3338,7 @@ string_expandtabs(PyStringObject *self, PyObject *args)
return u;
}
-static PyObject *
+Py_LOCAL_INLINE(PyObject *)
pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
{
PyObject *u;
@@ -3237,6 +3755,14 @@ string_splitlines(PyStringObject *self, PyObject *args)
data = PyString_AS_STRING(self);
len = PyString_GET_SIZE(self);
+ /* This does not use the preallocated list because splitlines is
+ usually run with hundreds of newlines. The overhead of
+ switching between PyList_SET_ITEM and append causes about a
+ 2-3% slowdown for that common case. A smarter implementation
+ could move the if check out, so the SET_ITEMs are done first
+ and the appends only done when the prealloc buffer is full.
+ That's too much work for little gain.*/
+
list = PyList_New(0);
if (!list)
goto onError;
@@ -3274,6 +3800,9 @@ string_splitlines(PyStringObject *self, PyObject *args)
}
#undef SPLIT_APPEND
+#undef SPLIT_ADD
+#undef MAX_PREALLOC
+#undef PREALLOC_SIZE
static PyObject *
string_getnewargs(PyStringObject *v)
@@ -3303,6 +3832,7 @@ string_methods[] = {
{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
{"endswith", (PyCFunction)string_endswith, METH_VARARGS,
endswith__doc__},
+ {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
@@ -3310,6 +3840,8 @@ string_methods[] = {
{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
+ {"rpartition", (PyCFunction)string_rpartition, METH_O,
+ rpartition__doc__},
{"startswith", (PyCFunction)string_startswith, METH_VARARGS,
startswith__doc__},
{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
@@ -3566,7 +4098,7 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
/* Helpers for formatstring */
-static PyObject *
+Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
{
Py_ssize_t argidx = *p_argidx;
@@ -3595,7 +4127,7 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
#define F_ALT (1<<3)
#define F_ZERO (1<<4)
-static int
+Py_LOCAL_INLINE(int)
formatfloat(char *buf, size_t buflen, int flags,
int prec, int type, PyObject *v)
{
@@ -3782,7 +4314,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
return result;
}
-static int
+Py_LOCAL_INLINE(int)
formatint(char *buf, size_t buflen, int flags,
int prec, int type, PyObject *v)
{
@@ -3854,7 +4386,7 @@ formatint(char *buf, size_t buflen, int flags,
return (int)strlen(buf);
}
-static int
+Py_LOCAL_INLINE(int)
formatchar(char *buf, size_t buflen, PyObject *v)
{
/* presume that the buffer is at least 2 characters long */
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 38820d4..03f2b07 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -4636,10 +4636,10 @@ slot_tp_getattr_hook(PyObject *self, PyObject *name)
(void *)PyObject_GenericGetAttr))
res = PyObject_GenericGetAttr(self, name);
else
- res = PyObject_CallFunction(getattribute, "OO", self, name);
+ res = PyObject_CallFunctionObjArgs(getattribute, self, name, NULL);
if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
PyErr_Clear();
- res = PyObject_CallFunction(getattr, "OO", self, name);
+ res = PyObject_CallFunctionObjArgs(getattr, self, name, NULL);
}
return res;
}
@@ -4776,7 +4776,7 @@ slot_tp_descr_get(PyObject *self, PyObject *obj, PyObject *type)
obj = Py_None;
if (type == NULL)
type = Py_None;
- return PyObject_CallFunction(get, "OOO", self, obj, type);
+ return PyObject_CallFunctionObjArgs(get, self, obj, type, NULL);
}
static int
@@ -5717,8 +5717,8 @@ super_descr_get(PyObject *self, PyObject *obj, PyObject *type)
if (su->ob_type != &PySuper_Type)
/* If su is an instance of a (strict) subclass of super,
call its type */
- return PyObject_CallFunction((PyObject *)su->ob_type,
- "OO", su->type, obj);
+ return PyObject_CallFunctionObjArgs((PyObject *)su->ob_type,
+ su->type, obj, NULL);
else {
/* Inline the common case */
PyTypeObject *obj_type = supercheck(su->type, obj);
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
index b432399..73def09 100644
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -140,20 +140,48 @@ int _PyUnicode_IsDigit(Py_UNICODE ch)
double _PyUnicode_ToNumeric(Py_UNICODE ch)
{
switch (ch) {
+ case 0x0F33:
+ return (double) -1 / 2;
+ case 0x17F0:
case 0x3007:
+#ifdef Py_UNICODE_WIDE
+ case 0x1018A:
+#endif
return (double) 0;
case 0x09F4:
+ case 0x17F1:
case 0x215F:
case 0x2160:
case 0x2170:
case 0x3021:
+ case 0x3192:
+ case 0x3220:
case 0x3280:
+#ifdef Py_UNICODE_WIDE
+ case 0x10107:
+ case 0x10142:
+ case 0x10158:
+ case 0x10159:
+ case 0x1015A:
+ case 0x10320:
+ case 0x103D1:
+#endif
return (double) 1;
case 0x00BD:
+ case 0x0F2A:
+ case 0x2CFD:
+#ifdef Py_UNICODE_WIDE
+ case 0x10141:
+ case 0x10175:
+ case 0x10176:
+#endif
return (double) 1 / 2;
case 0x2153:
return (double) 1 / 3;
case 0x00BC:
+#ifdef Py_UNICODE_WIDE
+ case 0x10140:
+#endif
return (double) 1 / 4;
case 0x2155:
return (double) 1 / 5;
@@ -168,92 +196,201 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x2469:
case 0x247D:
case 0x2491:
+ case 0x24FE:
case 0x277F:
case 0x2789:
case 0x2793:
case 0x3038:
+ case 0x3229:
case 0x3289:
+#ifdef Py_UNICODE_WIDE
+ case 0x10110:
+ case 0x10149:
+ case 0x10150:
+ case 0x10157:
+ case 0x10160:
+ case 0x10161:
+ case 0x10162:
+ case 0x10163:
+ case 0x10164:
+ case 0x10322:
+ case 0x103D3:
+ case 0x10A44:
+#endif
return (double) 10;
case 0x0BF1:
case 0x137B:
case 0x216D:
case 0x217D:
+#ifdef Py_UNICODE_WIDE
+ case 0x10119:
+ case 0x1014B:
+ case 0x10152:
+ case 0x1016A:
+ case 0x103D5:
+ case 0x10A46:
+#endif
return (double) 100;
case 0x0BF2:
case 0x216F:
case 0x217F:
case 0x2180:
+#ifdef Py_UNICODE_WIDE
+ case 0x10122:
+ case 0x1014D:
+ case 0x10154:
+ case 0x10171:
+ case 0x10A47:
+#endif
return (double) 1000;
case 0x137C:
case 0x2182:
+#ifdef Py_UNICODE_WIDE
+ case 0x1012B:
+ case 0x10155:
+#endif
return (double) 10000;
case 0x216A:
case 0x217A:
case 0x246A:
case 0x247E:
case 0x2492:
+ case 0x24EB:
return (double) 11;
+ case 0x0F2F:
+ return (double) 11 / 2;
case 0x216B:
case 0x217B:
case 0x246B:
case 0x247F:
case 0x2493:
+ case 0x24EC:
return (double) 12;
case 0x246C:
case 0x2480:
case 0x2494:
+ case 0x24ED:
return (double) 13;
+ case 0x0F30:
+ return (double) 13 / 2;
case 0x246D:
case 0x2481:
case 0x2495:
+ case 0x24EE:
return (double) 14;
case 0x246E:
case 0x2482:
case 0x2496:
+ case 0x24EF:
return (double) 15;
+ case 0x0F31:
+ return (double) 15 / 2;
case 0x09F9:
case 0x246F:
case 0x2483:
case 0x2497:
+ case 0x24F0:
return (double) 16;
case 0x16EE:
case 0x2470:
case 0x2484:
case 0x2498:
+ case 0x24F1:
return (double) 17;
+ case 0x0F32:
+ return (double) 17 / 2;
case 0x16EF:
case 0x2471:
case 0x2485:
case 0x2499:
+ case 0x24F2:
return (double) 18;
case 0x16F0:
case 0x2472:
case 0x2486:
case 0x249A:
+ case 0x24F3:
return (double) 19;
case 0x09F5:
+ case 0x17F2:
case 0x2161:
case 0x2171:
case 0x3022:
+ case 0x3193:
+ case 0x3221:
case 0x3281:
+#ifdef Py_UNICODE_WIDE
+ case 0x10108:
+ case 0x1015B:
+ case 0x1015C:
+ case 0x1015D:
+ case 0x1015E:
+ case 0x103D2:
+#endif
return (double) 2;
case 0x2154:
+#ifdef Py_UNICODE_WIDE
+ case 0x10177:
+#endif
return (double) 2 / 3;
case 0x2156:
- return (double) 2 / 5;
+ return (double) 2 / 5;
case 0x1373:
case 0x2473:
case 0x2487:
case 0x249B:
+ case 0x24F4:
case 0x3039:
- return (double) 20;
+#ifdef Py_UNICODE_WIDE
+ case 0x10111:
+ case 0x103D4:
+ case 0x10A45:
+#endif
+ return (double) 20;
+#ifdef Py_UNICODE_WIDE
+ case 0x1011A:
+ return (double) 200;
+ case 0x10123:
+ return (double) 2000;
+ case 0x1012C:
+ return (double) 20000;
+#endif
+ case 0x3251:
+ return (double) 21;
+ case 0x3252:
+ return (double) 22;
+ case 0x3253:
+ return (double) 23;
+ case 0x3254:
+ return (double) 24;
+ case 0x3255:
+ return (double) 25;
+ case 0x3256:
+ return (double) 26;
+ case 0x3257:
+ return (double) 27;
+ case 0x3258:
+ return (double) 28;
+ case 0x3259:
+ return (double) 29;
case 0x09F6:
+ case 0x17F3:
case 0x2162:
case 0x2172:
case 0x3023:
+ case 0x3194:
+ case 0x3222:
case 0x3282:
+#ifdef Py_UNICODE_WIDE
+ case 0x10109:
+#endif
return (double) 3;
+ case 0x0F2B:
+ return (double) 3 / 2;
case 0x00BE:
+#ifdef Py_UNICODE_WIDE
+ case 0x10178:
+#endif
return (double) 3 / 4;
case 0x2157:
return (double) 3 / 5;
@@ -261,22 +398,103 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
return (double) 3 / 8;
case 0x1374:
case 0x303A:
+ case 0x325A:
+#ifdef Py_UNICODE_WIDE
+ case 0x10112:
+ case 0x10165:
+#endif
return (double) 30;
+#ifdef Py_UNICODE_WIDE
+ case 0x1011B:
+ case 0x1016B:
+ return (double) 300;
+ case 0x10124:
+ return (double) 3000;
+ case 0x1012D:
+ return (double) 30000;
+#endif
+ case 0x325B:
+ return (double) 31;
+ case 0x325C:
+ return (double) 32;
+ case 0x325D:
+ return (double) 33;
+ case 0x325E:
+ return (double) 34;
+ case 0x325F:
+ return (double) 35;
+ case 0x32B1:
+ return (double) 36;
+ case 0x32B2:
+ return (double) 37;
+ case 0x32B3:
+ return (double) 38;
+ case 0x32B4:
+ return (double) 39;
case 0x09F7:
+ case 0x17F4:
case 0x2163:
case 0x2173:
case 0x3024:
+ case 0x3195:
+ case 0x3223:
case 0x3283:
+#ifdef Py_UNICODE_WIDE
+ case 0x1010A:
+#endif
return (double) 4;
case 0x2158:
return (double) 4 / 5;
case 0x1375:
- return (double) 40;
+ case 0x32B5:
+#ifdef Py_UNICODE_WIDE
+ case 0x10113:
+#endif
+ return (double) 40;
+#ifdef Py_UNICODE_WIDE
+ case 0x1011C:
+ return (double) 400;
+ case 0x10125:
+ return (double) 4000;
+ case 0x1012E:
+ return (double) 40000;
+#endif
+ case 0x32B6:
+ return (double) 41;
+ case 0x32B7:
+ return (double) 42;
+ case 0x32B8:
+ return (double) 43;
+ case 0x32B9:
+ return (double) 44;
+ case 0x32BA:
+ return (double) 45;
+ case 0x32BB:
+ return (double) 46;
+ case 0x32BC:
+ return (double) 47;
+ case 0x32BD:
+ return (double) 48;
+ case 0x32BE:
+ return (double) 49;
+ case 0x17F5:
case 0x2164:
case 0x2174:
case 0x3025:
+ case 0x3224:
case 0x3284:
+#ifdef Py_UNICODE_WIDE
+ case 0x1010B:
+ case 0x10143:
+ case 0x10148:
+ case 0x1014F:
+ case 0x1015F:
+ case 0x10173:
+ case 0x10321:
+#endif
return (double) 5;
+ case 0x0F2C:
+ return (double) 5 / 2;
case 0x215A:
return (double) 5 / 6;
case 0x215D:
@@ -284,42 +502,147 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1376:
case 0x216C:
case 0x217C:
+ case 0x32BF:
+#ifdef Py_UNICODE_WIDE
+ case 0x10114:
+ case 0x10144:
+ case 0x1014A:
+ case 0x10151:
+ case 0x10166:
+ case 0x10167:
+ case 0x10168:
+ case 0x10169:
+ case 0x10174:
+ case 0x10323:
+#endif
return (double) 50;
case 0x216E:
case 0x217E:
+#ifdef Py_UNICODE_WIDE
+ case 0x1011D:
+ case 0x10145:
+ case 0x1014C:
+ case 0x10153:
+ case 0x1016C:
+ case 0x1016D:
+ case 0x1016E:
+ case 0x1016F:
+ case 0x10170:
+#endif
return (double) 500;
case 0x2181:
+#ifdef Py_UNICODE_WIDE
+ case 0x10126:
+ case 0x10146:
+ case 0x1014E:
+ case 0x10172:
+#endif
return (double) 5000;
+#ifdef Py_UNICODE_WIDE
+ case 0x1012F:
+ case 0x10147:
+ case 0x10156:
+ return (double) 50000;
+#endif
+ case 0x17F6:
case 0x2165:
case 0x2175:
case 0x3026:
+ case 0x3225:
case 0x3285:
+#ifdef Py_UNICODE_WIDE
+ case 0x1010C:
+#endif
return (double) 6;
case 0x1377:
+#ifdef Py_UNICODE_WIDE
+ case 0x10115:
+#endif
return (double) 60;
+#ifdef Py_UNICODE_WIDE
+ case 0x1011E:
+ return (double) 600;
+ case 0x10127:
+ return (double) 6000;
+ case 0x10130:
+ return (double) 60000;
+#endif
+ case 0x17F7:
case 0x2166:
case 0x2176:
case 0x3027:
+ case 0x3226:
case 0x3286:
+#ifdef Py_UNICODE_WIDE
+ case 0x1010D:
+#endif
return (double) 7;
+ case 0x0F2D:
+ return (double) 7 / 2;
case 0x215E:
return (double) 7 / 8;
case 0x1378:
+#ifdef Py_UNICODE_WIDE
+ case 0x10116:
+#endif
return (double) 70;
+#ifdef Py_UNICODE_WIDE
+ case 0x1011F:
+ return (double) 700;
+ case 0x10128:
+ return (double) 7000;
+ case 0x10131:
+ return (double) 70000;
+#endif
+ case 0x17F8:
case 0x2167:
case 0x2177:
case 0x3028:
+ case 0x3227:
case 0x3287:
+#ifdef Py_UNICODE_WIDE
+ case 0x1010E:
+#endif
return (double) 8;
case 0x1379:
+#ifdef Py_UNICODE_WIDE
+ case 0x10117:
+#endif
return (double) 80;
+#ifdef Py_UNICODE_WIDE
+ case 0x10120:
+ return (double) 800;
+ case 0x10129:
+ return (double) 8000;
+ case 0x10132:
+ return (double) 80000;
+#endif
+ case 0x17F9:
case 0x2168:
case 0x2178:
case 0x3029:
+ case 0x3228:
case 0x3288:
+#ifdef Py_UNICODE_WIDE
+ case 0x1010F:
+#endif
return (double) 9;
+ case 0x0F2E:
+ return (double) 9 / 2;
case 0x137A:
+#ifdef Py_UNICODE_WIDE
+ case 0x10118:
+#endif
return (double) 90;
+#ifdef Py_UNICODE_WIDE
+ case 0x10121:
+ case 0x1034A:
+ return (double) 900;
+ case 0x1012A:
+ return (double) 9000;
+ case 0x10133:
+ return (double) 90000;
+#endif
default:
return (double) _PyUnicode_ToDigit(ch);
}
@@ -327,9 +650,7 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
int _PyUnicode_IsNumeric(Py_UNICODE ch)
{
- if (_PyUnicode_ToNumeric(ch) < 0.0)
- return 0;
- return 1;
+ return _PyUnicode_ToNumeric(ch) != -1.0;
}
#ifndef WANT_WCTYPE_FUNCTIONS
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e62c774..6cdb0fc 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4,6 +4,9 @@ Unicode implementation based on original code by Fredrik Lundh,
modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
Unicode Integration Proposal (see file Misc/unicode.txt).
+Major speed upgrades to the method implementations at the Reykjavik
+NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
+
Copyright (c) Corporation for National Research Initiatives.
--------------------------------------------------------------------
@@ -121,6 +124,51 @@ PyUnicode_GetMax(void)
#endif
}
+/* --- Bloom Filters ----------------------------------------------------- */
+
+/* stuff to implement simple "bloom filters" for Unicode characters.
+ to keep things simple, we use a single bitmask, using the least 5
+ bits from each unicode characters as the bit index. */
+
+/* the linebreak mask is set up by Unicode_Init below */
+
+#define BLOOM_MASK unsigned long
+
+static BLOOM_MASK bloom_linebreak;
+
+#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F))))
+
+#define BLOOM_LINEBREAK(ch)\
+ (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK((ch)))
+
+Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
+{
+ /* calculate simple bloom-style bitmask for a given unicode string */
+
+ long mask;
+ Py_ssize_t i;
+
+ mask = 0;
+ for (i = 0; i < len; i++)
+ mask |= (1 << (ptr[i] & 0x1F));
+
+ return mask;
+}
+
+Py_LOCAL_INLINE(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen)
+{
+ Py_ssize_t i;
+
+ for (i = 0; i < setlen; i++)
+ if (set[i] == chr)
+ return 1;
+
+ return 0;
+}
+
+#define BLOOM_MEMBER(mask, chr, set, setlen)\
+ BLOOM(mask, chr) && unicode_member(chr, set, setlen)
+
/* --- Unicode Object ----------------------------------------------------- */
static
@@ -136,6 +184,7 @@ int unicode_resize(register PyUnicodeObject *unicode,
/* Resizing shared object (unicode_empty or single character
objects) in-place is not allowed. Use PyUnicode_Resize()
instead ! */
+
if (unicode == unicode_empty ||
(unicode->length == 1 &&
unicode->str[0] < 256U &&
@@ -145,8 +194,11 @@ int unicode_resize(register PyUnicodeObject *unicode,
return -1;
}
- /* We allocate one more byte to make sure the string is
- Ux0000 terminated -- XXX is this needed ? */
+ /* We allocate one more byte to make sure the string is Ux0000 terminated.
+ The overallocation is also used by fastsearch, which assumes that it's
+ safe to look at str[length] (without making any assumptions about what
+ it contains). */
+
oldstr = unicode->str;
PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1);
if (!unicode->str) {
@@ -181,7 +233,7 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
{
register PyUnicodeObject *unicode;
- /* Optimization fo empty strings */
+ /* Optimization for empty strings */
if (length == 0 && unicode_empty != NULL) {
Py_INCREF(unicode_empty);
return unicode_empty;
@@ -1963,9 +2015,20 @@ onError:
*/
-static const Py_UNICODE *findchar(const Py_UNICODE *s,
- Py_ssize_t size,
- Py_UNICODE ch);
+Py_LOCAL_INLINE(const Py_UNICODE *) findchar(const Py_UNICODE *s,
+ Py_ssize_t size,
+ Py_UNICODE ch)
+{
+ /* like wcschr, but doesn't stop at NULL characters */
+
+ while (size-- > 0) {
+ if (*s == ch)
+ return s;
+ s++;
+ }
+
+ return NULL;
+}
static
PyObject *unicodeescape_string(const Py_UNICODE *s,
@@ -2313,7 +2376,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
end = s + size;
while (s < end) {
- *p = *(Py_UNICODE *)s;
+ memcpy(p, s, sizeof(Py_UNICODE));
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
if (
@@ -3791,124 +3854,104 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
/* --- Helpers ------------------------------------------------------------ */
-static
-Py_ssize_t count(PyUnicodeObject *self,
- Py_ssize_t start,
- Py_ssize_t end,
- PyUnicodeObject *substring)
-{
- Py_ssize_t count = 0;
+#define STRINGLIB_CHAR Py_UNICODE
- if (start < 0)
- start += self->length;
- if (start < 0)
- start = 0;
- if (end > self->length)
- end = self->length;
- if (end < 0)
- end += self->length;
- if (end < 0)
- end = 0;
+#define STRINGLIB_LEN PyUnicode_GET_SIZE
+#define STRINGLIB_NEW PyUnicode_FromUnicode
+#define STRINGLIB_STR PyUnicode_AS_UNICODE
- if (substring->length == 0)
- return (end - start + 1);
+Py_LOCAL_INLINE(int)
+STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
+{
+ if (str[0] != other[0])
+ return 1;
+ return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE));
+}
- end -= substring->length;
+#define STRINGLIB_EMPTY unicode_empty
- while (start <= end)
- if (Py_UNICODE_MATCH(self, start, substring)) {
- count++;
- start += substring->length;
- } else
- start++;
+#include "stringlib/fastsearch.h"
- return count;
-}
+#include "stringlib/count.h"
+#include "stringlib/find.h"
+#include "stringlib/partition.h"
+
+/* helper macro to fixup start/end slice values */
+#define FIX_START_END(obj) \
+ if (start < 0) \
+ start += (obj)->length; \
+ if (start < 0) \
+ start = 0; \
+ if (end > (obj)->length) \
+ end = (obj)->length; \
+ if (end < 0) \
+ end += (obj)->length; \
+ if (end < 0) \
+ end = 0;
Py_ssize_t PyUnicode_Count(PyObject *str,
- PyObject *substr,
- Py_ssize_t start,
- Py_ssize_t end)
+ PyObject *substr,
+ Py_ssize_t start,
+ Py_ssize_t end)
{
Py_ssize_t result;
+ PyUnicodeObject* str_obj;
+ PyUnicodeObject* sub_obj;
- str = PyUnicode_FromObject(str);
- if (str == NULL)
+ str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str);
+ if (!str_obj)
return -1;
- substr = PyUnicode_FromObject(substr);
- if (substr == NULL) {
- Py_DECREF(str);
+ sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr);
+ if (!sub_obj) {
+ Py_DECREF(str_obj);
return -1;
}
- result = count((PyUnicodeObject *)str,
- start, end,
- (PyUnicodeObject *)substr);
-
- Py_DECREF(str);
- Py_DECREF(substr);
- return result;
-}
-
-static
-Py_ssize_t findstring(PyUnicodeObject *self,
- PyUnicodeObject *substring,
- Py_ssize_t start,
- Py_ssize_t end,
- int direction)
-{
- if (start < 0)
- start += self->length;
- if (start < 0)
- start = 0;
-
- if (end > self->length)
- end = self->length;
- if (end < 0)
- end += self->length;
- if (end < 0)
- end = 0;
+ FIX_START_END(str_obj);
- if (substring->length == 0)
- return (direction > 0) ? start : end;
-
- end -= substring->length;
+ result = stringlib_count(
+ str_obj->str + start, end - start, sub_obj->str, sub_obj->length
+ );
- if (direction < 0) {
- for (; end >= start; end--)
- if (Py_UNICODE_MATCH(self, end, substring))
- return end;
- } else {
- for (; start <= end; start++)
- if (Py_UNICODE_MATCH(self, start, substring))
- return start;
- }
+ Py_DECREF(sub_obj);
+ Py_DECREF(str_obj);
- return -1;
+ return result;
}
Py_ssize_t PyUnicode_Find(PyObject *str,
- PyObject *substr,
- Py_ssize_t start,
- Py_ssize_t end,
- int direction)
+ PyObject *sub,
+ Py_ssize_t start,
+ Py_ssize_t end,
+ int direction)
{
Py_ssize_t result;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (!str)
return -2;
- substr = PyUnicode_FromObject(substr);
- if (substr == NULL) {
+ sub = PyUnicode_FromObject(sub);
+ if (!sub) {
Py_DECREF(str);
return -2;
}
- result = findstring((PyUnicodeObject *)str,
- (PyUnicodeObject *)substr,
- start, end, direction);
+ if (direction > 0)
+ result = stringlib_find_slice(
+ PyUnicode_AS_UNICODE(str), PyUnicode_GET_SIZE(str),
+ PyUnicode_AS_UNICODE(sub), PyUnicode_GET_SIZE(sub),
+ start, end
+ );
+ else
+ result = stringlib_rfind_slice(
+ PyUnicode_AS_UNICODE(str), PyUnicode_GET_SIZE(str),
+ PyUnicode_AS_UNICODE(sub), PyUnicode_GET_SIZE(sub),
+ start, end
+ );
+
Py_DECREF(str);
- Py_DECREF(substr);
+ Py_DECREF(sub);
+
return result;
}
@@ -3919,20 +3962,10 @@ int tailmatch(PyUnicodeObject *self,
Py_ssize_t end,
int direction)
{
- if (start < 0)
- start += self->length;
- if (start < 0)
- start = 0;
-
if (substring->length == 0)
return 1;
- if (end > self->length)
- end = self->length;
- if (end < 0)
- end += self->length;
- if (end < 0)
- end = 0;
+ FIX_START_END(self);
end -= substring->length;
if (end < start)
@@ -3974,22 +4007,6 @@ Py_ssize_t PyUnicode_Tailmatch(PyObject *str,
return result;
}
-static
-const Py_UNICODE *findchar(const Py_UNICODE *s,
- Py_ssize_t size,
- Py_UNICODE ch)
-{
- /* like wcschr, but doesn't stop at NULL characters */
-
- while (size-- > 0) {
- if (*s == ch)
- return s;
- s++;
- }
-
- return NULL;
-}
-
/* Apply fixfct filter to the Unicode object self and return a
reference to the modified object */
@@ -4148,10 +4165,10 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
PyObject *internal_separator = NULL;
const Py_UNICODE blank = ' ';
const Py_UNICODE *sep = &blank;
- size_t seplen = 1;
+ Py_ssize_t seplen = 1;
PyUnicodeObject *res = NULL; /* the result */
- size_t res_alloc = 100; /* # allocated bytes for string in res */
- size_t res_used; /* # used bytes */
+ Py_ssize_t res_alloc = 100; /* # allocated bytes for string in res */
+ Py_ssize_t res_used; /* # used bytes */
Py_UNICODE *res_p; /* pointer to free byte in res's string area */
PyObject *fseq; /* PySequence_Fast(seq) */
Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */
@@ -4212,8 +4229,8 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
res_used = 0;
for (i = 0; i < seqlen; ++i) {
- size_t itemlen;
- size_t new_res_used;
+ Py_ssize_t itemlen;
+ Py_ssize_t new_res_used;
item = PySequence_Fast_GET_ITEM(fseq, i);
/* Convert item to Unicode. */
@@ -4235,19 +4252,18 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
/* Make sure we have enough space for the separator and the item. */
itemlen = PyUnicode_GET_SIZE(item);
new_res_used = res_used + itemlen;
- if (new_res_used < res_used || new_res_used > PY_SSIZE_T_MAX)
+ if (new_res_used <= 0)
goto Overflow;
if (i < seqlen - 1) {
new_res_used += seplen;
- if (new_res_used < res_used || new_res_used > PY_SSIZE_T_MAX)
+ if (new_res_used <= 0)
goto Overflow;
}
if (new_res_used > res_alloc) {
/* double allocated size until it's big enough */
do {
- size_t oldsize = res_alloc;
res_alloc += res_alloc;
- if (res_alloc < oldsize || res_alloc > PY_SSIZE_T_MAX)
+ if (res_alloc <= 0)
goto Overflow;
} while (new_res_used > res_alloc);
if (_PyUnicode_Resize(&res, res_alloc) < 0) {
@@ -4333,17 +4349,6 @@ PyUnicodeObject *pad(PyUnicodeObject *self,
else \
Py_DECREF(str);
-#define SPLIT_INSERT(data, left, right) \
- str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \
- if (!str) \
- goto onError; \
- if (PyList_Insert(list, 0, str)) { \
- Py_DECREF(str); \
- goto onError; \
- } \
- else \
- Py_DECREF(str);
-
static
PyObject *split_whitespace(PyUnicodeObject *self,
PyObject *list,
@@ -4404,7 +4409,7 @@ PyObject *PyUnicode_Splitlines(PyObject *string,
Py_ssize_t eol;
/* Find a line and append it */
- while (i < len && !Py_UNICODE_ISLINEBREAK(data[i]))
+ while (i < len && !BLOOM_LINEBREAK(data[i]))
i++;
/* Skip the line break reading CRLF as one line break */
@@ -4515,15 +4520,17 @@ PyObject *rsplit_whitespace(PyUnicodeObject *self,
if (j > i) {
if (maxcount-- <= 0)
break;
- SPLIT_INSERT(self->str, i + 1, j + 1);
+ SPLIT_APPEND(self->str, i + 1, j + 1);
while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
i--;
j = i;
}
}
if (j >= 0) {
- SPLIT_INSERT(self->str, 0, j + 1);
+ SPLIT_APPEND(self->str, 0, j + 1);
}
+ if (PyList_Reverse(list) < 0)
+ goto onError;
return list;
onError:
@@ -4546,14 +4553,16 @@ PyObject *rsplit_char(PyUnicodeObject *self,
if (self->str[i] == ch) {
if (maxcount-- <= 0)
break;
- SPLIT_INSERT(self->str, i + 1, j + 1);
+ SPLIT_APPEND(self->str, i + 1, j + 1);
j = i = i - 1;
} else
i--;
}
if (j >= -1) {
- SPLIT_INSERT(self->str, 0, j + 1);
+ SPLIT_APPEND(self->str, 0, j + 1);
}
+ if (PyList_Reverse(list) < 0)
+ goto onError;
return list;
onError:
@@ -4577,15 +4586,17 @@ PyObject *rsplit_substring(PyUnicodeObject *self,
if (Py_UNICODE_MATCH(self, i, substring)) {
if (maxcount-- <= 0)
break;
- SPLIT_INSERT(self->str, i + sublen, j);
+ SPLIT_APPEND(self->str, i + sublen, j);
j = i;
i -= sublen;
} else
i--;
}
if (j >= 0) {
- SPLIT_INSERT(self->str, 0, j);
+ SPLIT_APPEND(self->str, 0, j);
}
+ if (PyList_Reverse(list) < 0)
+ goto onError;
return list;
onError:
@@ -4594,7 +4605,6 @@ PyObject *rsplit_substring(PyUnicodeObject *self,
}
#undef SPLIT_APPEND
-#undef SPLIT_INSERT
static
PyObject *split(PyUnicodeObject *self,
@@ -4665,88 +4675,128 @@ PyObject *replace(PyUnicodeObject *self,
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
- if (str1->length == 1 && str2->length == 1) {
+ if (str1->length == str2->length) {
+ /* same length */
Py_ssize_t i;
-
- /* replace characters */
- if (!findchar(self->str, self->length, str1->str[0]) &&
- PyUnicode_CheckExact(self)) {
- /* nothing to replace, return original string */
- Py_INCREF(self);
- u = self;
+ if (str1->length == 1) {
+ /* replace characters */
+ Py_UNICODE u1, u2;
+ if (!findchar(self->str, self->length, str1->str[0]))
+ goto nothing;
+ u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
+ if (!u)
+ return NULL;
+ Py_UNICODE_COPY(u->str, self->str, self->length);
+ u1 = str1->str[0];
+ u2 = str2->str[0];
+ for (i = 0; i < u->length; i++)
+ if (u->str[i] == u1) {
+ if (--maxcount < 0)
+ break;
+ u->str[i] = u2;
+ }
} else {
- Py_UNICODE u1 = str1->str[0];
- Py_UNICODE u2 = str2->str[0];
-
- u = (PyUnicodeObject*) PyUnicode_FromUnicode(
- NULL,
- self->length
+ i = fastsearch(
+ self->str, self->length, str1->str, str1->length, FAST_SEARCH
);
- if (u != NULL) {
- Py_UNICODE_COPY(u->str, self->str,
- self->length);
- for (i = 0; i < u->length; i++)
- if (u->str[i] == u1) {
- if (--maxcount < 0)
- break;
- u->str[i] = u2;
- }
- }
+ if (i < 0)
+ goto nothing;
+ u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
+ if (!u)
+ return NULL;
+ Py_UNICODE_COPY(u->str, self->str, self->length);
+ while (i <= self->length - str1->length)
+ if (Py_UNICODE_MATCH(self, i, str1)) {
+ if (--maxcount < 0)
+ break;
+ Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
+ i += str1->length;
+ } else
+ i++;
}
-
} else {
- Py_ssize_t n, i;
+
+ Py_ssize_t n, i, j, e;
+ Py_ssize_t product, new_size, delta;
Py_UNICODE *p;
/* replace strings */
- n = count(self, 0, self->length, str1);
+ n = stringlib_count(self->str, self->length, str1->str, str1->length);
if (n > maxcount)
n = maxcount;
- if (n == 0) {
- /* nothing to replace, return original string */
- if (PyUnicode_CheckExact(self)) {
- Py_INCREF(self);
- u = self;
- }
- else {
- u = (PyUnicodeObject *)
- PyUnicode_FromUnicode(self->str, self->length);
- }
+ if (n == 0)
+ goto nothing;
+ /* new_size = self->length + n * (str2->length - str1->length)); */
+ delta = (str2->length - str1->length);
+ if (delta == 0) {
+ new_size = self->length;
} else {
- u = _PyUnicode_New(
- self->length + n * (str2->length - str1->length));
- if (u) {
- i = 0;
- p = u->str;
- if (str1->length > 0) {
- while (i <= self->length - str1->length)
- if (Py_UNICODE_MATCH(self, i, str1)) {
- /* replace string segment */
- Py_UNICODE_COPY(p, str2->str, str2->length);
- p += str2->length;
- i += str1->length;
- if (--n <= 0) {
- /* copy remaining part */
- Py_UNICODE_COPY(p, self->str+i, self->length-i);
- break;
- }
- } else
- *p++ = self->str[i++];
- } else {
- while (n > 0) {
- Py_UNICODE_COPY(p, str2->str, str2->length);
- p += str2->length;
- if (--n <= 0)
- break;
- *p++ = self->str[i++];
- }
- Py_UNICODE_COPY(p, self->str+i, self->length-i);
+ product = n * (str2->length - str1->length);
+ if ((product / (str2->length - str1->length)) != n) {
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
+ return NULL;
+ }
+ new_size = self->length + product;
+ if (new_size < 0) {
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
+ return NULL;
+ }
+ }
+ u = _PyUnicode_New(new_size);
+ if (!u)
+ return NULL;
+ i = 0;
+ p = u->str;
+ e = self->length - str1->length;
+ if (str1->length > 0) {
+ while (n-- > 0) {
+ /* look for next match */
+ j = i;
+ while (j <= e) {
+ if (Py_UNICODE_MATCH(self, j, str1))
+ break;
+ j++;
+ }
+ if (j > i) {
+ if (j > e)
+ break;
+ /* copy unchanged part [i:j] */
+ Py_UNICODE_COPY(p, self->str+i, j-i);
+ p += j - i;
+ }
+ /* copy substitution string */
+ if (str2->length > 0) {
+ Py_UNICODE_COPY(p, str2->str, str2->length);
+ p += str2->length;
}
+ i = j + str1->length;
+ }
+ if (i < self->length)
+ /* copy tail [i:] */
+ Py_UNICODE_COPY(p, self->str+i, self->length-i);
+ } else {
+ /* interleave */
+ while (n > 0) {
+ Py_UNICODE_COPY(p, str2->str, str2->length);
+ p += str2->length;
+ if (--n <= 0)
+ break;
+ *p++ = self->str[i++];
}
+ Py_UNICODE_COPY(p, self->str+i, self->length-i);
}
}
-
return (PyObject *) u;
+
+nothing:
+ /* nothing to replace; return original string (when possible) */
+ if (PyUnicode_CheckExact(self)) {
+ Py_INCREF(self);
+ return (PyObject *) self;
+ }
+ return PyUnicode_FromUnicode(self->str, self->length);
}
/* --- Unicode Object Methods --------------------------------------------- */
@@ -4983,54 +5033,29 @@ onError:
int PyUnicode_Contains(PyObject *container,
PyObject *element)
{
- PyUnicodeObject *u = NULL, *v = NULL;
+ PyObject *str, *sub;
int result;
- Py_ssize_t size;
- register const Py_UNICODE *lhs, *end, *rhs;
/* Coerce the two arguments */
- v = (PyUnicodeObject *)PyUnicode_FromObject(element);
- if (v == NULL) {
+ sub = PyUnicode_FromObject(element);
+ if (!sub) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires string as left operand");
- goto onError;
+ return -1;
}
- u = (PyUnicodeObject *)PyUnicode_FromObject(container);
- if (u == NULL)
- goto onError;
-
- size = PyUnicode_GET_SIZE(v);
- rhs = PyUnicode_AS_UNICODE(v);
- lhs = PyUnicode_AS_UNICODE(u);
- result = 0;
- if (size == 1) {
- end = lhs + PyUnicode_GET_SIZE(u);
- while (lhs < end) {
- if (*lhs++ == *rhs) {
- result = 1;
- break;
- }
- }
- }
- else {
- end = lhs + (PyUnicode_GET_SIZE(u) - size);
- while (lhs <= end) {
- if (memcmp(lhs++, rhs, size * sizeof(Py_UNICODE)) == 0) {
- result = 1;
- break;
- }
- }
+ str = PyUnicode_FromObject(container);
+ if (!str) {
+ Py_DECREF(sub);
+ return -1;
}
- Py_DECREF(u);
- Py_DECREF(v);
- return result;
+ result = stringlib_contains_obj(str, sub);
-onError:
- Py_XDECREF(u);
- Py_XDECREF(v);
- return -1;
+ Py_DECREF(str);
+ Py_DECREF(sub);
+
+ return result;
}
/* Concat to string or Unicode object giving a new Unicode object. */
@@ -5078,8 +5103,8 @@ onError:
PyDoc_STRVAR(count__doc__,
"S.count(sub[, start[, end]]) -> int\n\
\n\
-Return the number of occurrences of substring sub in Unicode string\n\
-S[start:end]. Optional arguments start and end are\n\
+Return the number of non-overlapping occurrences of substring sub in\n\
+Unicode string S[start:end]. Optional arguments start and end are\n\
interpreted as in slice notation.");
static PyObject *
@@ -5095,24 +5120,19 @@ unicode_count(PyUnicodeObject *self, PyObject *args)
return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
+ (PyObject *)substring);
if (substring == NULL)
return NULL;
- if (start < 0)
- start += self->length;
- if (start < 0)
- start = 0;
- if (end > self->length)
- end = self->length;
- if (end < 0)
- end += self->length;
- if (end < 0)
- end = 0;
+ FIX_START_END(self);
- result = PyInt_FromLong((long) count(self, start, end, substring));
+ result = PyInt_FromSsize_t(
+ stringlib_count(self->str + start, end - start,
+ substring->str, substring->length)
+ );
Py_DECREF(substring);
+
return result;
}
@@ -5262,23 +5282,27 @@ Return -1 on failure.");
static PyObject *
unicode_find(PyUnicodeObject *self, PyObject *args)
{
- PyUnicodeObject *substring;
+ PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *result;
+ Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
- if (substring == NULL)
+ substring = PyUnicode_FromObject(substring);
+ if (!substring)
return NULL;
- result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1));
+ result = stringlib_find_slice(
+ PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self),
+ PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring),
+ start, end
+ );
Py_DECREF(substring);
- return result;
+
+ return PyInt_FromSsize_t(result);
}
static PyObject *
@@ -5328,26 +5352,30 @@ static PyObject *
unicode_index(PyUnicodeObject *self, PyObject *args)
{
Py_ssize_t result;
- PyUnicodeObject *substring;
+ PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
if (!PyArg_ParseTuple(args, "O|O&O&:index", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
-
- substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
- if (substring == NULL)
+ substring = PyUnicode_FromObject(substring);
+ if (!substring)
return NULL;
- result = findstring(self, substring, start, end, 1);
+ result = stringlib_find_slice(
+ PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self),
+ PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring),
+ start, end
+ );
Py_DECREF(substring);
+
if (result < 0) {
PyErr_SetString(PyExc_ValueError, "substring not found");
return NULL;
}
+
return PyInt_FromSsize_t(result);
}
@@ -5702,16 +5730,6 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
-static const Py_UNICODE *
-unicode_memchr(const Py_UNICODE *s, Py_UNICODE c, size_t n)
-{
- size_t i;
- for (i = 0; i < n; ++i)
- if (s[i] == c)
- return s+i;
- return NULL;
-}
-
/* externally visible for str.strip(unicode) */
PyObject *
_PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
@@ -5722,27 +5740,29 @@ _PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
Py_ssize_t seplen = PyUnicode_GET_SIZE(sepobj);
Py_ssize_t i, j;
+ BLOOM_MASK sepmask = make_bloom_mask(sep, seplen);
+
i = 0;
if (striptype != RIGHTSTRIP) {
- while (i < len && unicode_memchr(sep, s[i], seplen)) {
- i++;
- }
+ while (i < len && BLOOM_MEMBER(sepmask, s[i], sep, seplen)) {
+ i++;
+ }
}
j = len;
if (striptype != LEFTSTRIP) {
- do {
- j--;
- } while (j >= i && unicode_memchr(sep, s[j], seplen));
- j++;
+ do {
+ j--;
+ } while (j >= i && BLOOM_MEMBER(sepmask, s[j], sep, seplen));
+ j++;
}
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
- Py_INCREF(self);
- return (PyObject*)self;
+ Py_INCREF(self);
+ return (PyObject*)self;
}
else
- return PyUnicode_FromUnicode(s+i, j-i);
+ return PyUnicode_FromUnicode(s+i, j-i);
}
@@ -5898,9 +5918,19 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
p = u->str;
- while (len-- > 0) {
- Py_UNICODE_COPY(p, str->str, str->length);
- p += str->length;
+ if (str->length == 1 && len > 0) {
+ Py_UNICODE_FILL(p, str->str[0], len);
+ } else {
+ Py_ssize_t done = 0; /* number of characters copied this far */
+ if (done < nchars) {
+ Py_UNICODE_COPY(p, str->str, str->length);
+ done = str->length;
+ }
+ while (done < nchars) {
+ int n = (done <= nchars-done) ? done : nchars-done;
+ Py_UNICODE_COPY(p+done, p, n);
+ done += n;
+ }
}
return (PyObject*) u;
@@ -5993,23 +6023,27 @@ Return -1 on failure.");
static PyObject *
unicode_rfind(PyUnicodeObject *self, PyObject *args)
{
- PyUnicodeObject *substring;
+ PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *result;
+ Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
- if (substring == NULL)
+ substring = PyUnicode_FromObject(substring);
+ if (!substring)
return NULL;
- result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1));
+ result = stringlib_rfind_slice(
+ PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self),
+ PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring),
+ start, end
+ );
Py_DECREF(substring);
- return result;
+
+ return PyInt_FromSsize_t(result);
}
PyDoc_STRVAR(rindex__doc__,
@@ -6020,22 +6054,26 @@ Like S.rfind() but raise ValueError when the substring is not found.");
static PyObject *
unicode_rindex(PyUnicodeObject *self, PyObject *args)
{
- Py_ssize_t result;
- PyUnicodeObject *substring;
+ PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
+ Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
- if (substring == NULL)
+ substring = PyUnicode_FromObject(substring);
+ if (!substring)
return NULL;
- result = findstring(self, substring, start, end, -1);
+ result = stringlib_rfind_slice(
+ PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self),
+ PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring),
+ start, end
+ );
Py_DECREF(substring);
+
if (result < 0) {
PyErr_SetString(PyExc_ValueError, "substring not found");
return NULL;
@@ -6137,6 +6175,87 @@ unicode_split(PyUnicodeObject *self, PyObject *args)
return PyUnicode_Split((PyObject *)self, substring, maxcount);
}
+PyObject *
+PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
+{
+ PyObject* str_obj;
+ PyObject* sep_obj;
+ PyObject* out;
+
+ str_obj = PyUnicode_FromObject(str_in);
+ if (!str_obj)
+ return NULL;
+ sep_obj = PyUnicode_FromObject(sep_in);
+ if (!sep_obj) {
+ Py_DECREF(str_obj);
+ return NULL;
+ }
+
+ out = stringlib_partition(
+ str_obj, PyUnicode_AS_UNICODE(str_obj), PyUnicode_GET_SIZE(str_obj),
+ sep_obj, PyUnicode_AS_UNICODE(sep_obj), PyUnicode_GET_SIZE(sep_obj)
+ );
+
+ Py_DECREF(sep_obj);
+ Py_DECREF(str_obj);
+
+ return out;
+}
+
+
+PyObject *
+PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
+{
+ PyObject* str_obj;
+ PyObject* sep_obj;
+ PyObject* out;
+
+ str_obj = PyUnicode_FromObject(str_in);
+ if (!str_obj)
+ return NULL;
+ sep_obj = PyUnicode_FromObject(sep_in);
+ if (!sep_obj) {
+ Py_DECREF(str_obj);
+ return NULL;
+ }
+
+ out = stringlib_rpartition(
+ str_obj, PyUnicode_AS_UNICODE(str_obj), PyUnicode_GET_SIZE(str_obj),
+ sep_obj, PyUnicode_AS_UNICODE(sep_obj), PyUnicode_GET_SIZE(sep_obj)
+ );
+
+ Py_DECREF(sep_obj);
+ Py_DECREF(str_obj);
+
+ return out;
+}
+
+PyDoc_STRVAR(partition__doc__,
+"S.partition(sep) -> (head, sep, tail)\n\
+\n\
+Searches for the separator sep in S, and returns the part before it,\n\
+the separator itself, and the part after it. If the separator is not\n\
+found, returns S and two empty strings.");
+
+static PyObject*
+unicode_partition(PyUnicodeObject *self, PyObject *separator)
+{
+ return PyUnicode_Partition((PyObject *)self, separator);
+}
+
+PyDoc_STRVAR(rpartition__doc__,
+"S.rpartition(sep) -> (head, sep, tail)\n\
+\n\
+Searches for the separator sep in S, starting at the end of S, and returns\n\
+the part before it, the separator itself, and the part after it. If the\n\
+separator is not found, returns S and two empty strings.");
+
+static PyObject*
+unicode_rpartition(PyUnicodeObject *self, PyObject *separator)
+{
+ return PyUnicode_RPartition((PyObject *)self, separator);
+}
+
PyObject *PyUnicode_RSplit(PyObject *s,
PyObject *sep,
Py_ssize_t maxsplit)
@@ -6390,6 +6509,7 @@ static PyMethodDef unicode_methods[] = {
{"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
{"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
{"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
+ {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__},
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
@@ -6400,6 +6520,7 @@ static PyMethodDef unicode_methods[] = {
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
{"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
{"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
+ {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__},
{"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
{"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
{"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
@@ -7375,6 +7496,18 @@ void _PyUnicode_Init(void)
{
int i;
+ /* XXX - move this array to unicodectype.c ? */
+ Py_UNICODE linebreak[] = {
+ 0x000A, /* LINE FEED */
+ 0x000D, /* CARRIAGE RETURN */
+ 0x001C, /* FILE SEPARATOR */
+ 0x001D, /* GROUP SEPARATOR */
+ 0x001E, /* RECORD SEPARATOR */
+ 0x0085, /* NEXT LINE */
+ 0x2028, /* LINE SEPARATOR */
+ 0x2029, /* PARAGRAPH SEPARATOR */
+ };
+
/* Init the implementation */
unicode_freelist = NULL;
unicode_freelist_size = 0;
@@ -7384,6 +7517,11 @@ void _PyUnicode_Init(void)
unicode_latin1[i] = NULL;
if (PyType_Ready(&PyUnicode_Type) < 0)
Py_FatalError("Can't initialize 'unicode'");
+
+ /* initialize the linebreak bloom filter */
+ bloom_linebreak = make_bloom_mask(
+ linebreak, sizeof(linebreak) / sizeof(linebreak[0])
+ );
}
/* Finalize the Unicode implementation */
diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c
index 3f2c261..9c2a626 100644
--- a/Objects/weakrefobject.c
+++ b/Objects/weakrefobject.c
@@ -847,7 +847,7 @@ PyWeakref_GetObject(PyObject *ref)
static void
handle_callback(PyWeakReference *ref, PyObject *callback)
{
- PyObject *cbresult = PyObject_CallFunction(callback, "O", ref);
+ PyObject *cbresult = PyObject_CallFunctionObjArgs(callback, ref, NULL);
if (cbresult == NULL)
PyErr_WriteUnraisable(callback);