diff options
-rw-r--r-- | Doc/ext/ext.tex | 391 |
1 files changed, 385 insertions, 6 deletions
diff --git a/Doc/ext/ext.tex b/Doc/ext/ext.tex index 0ffec46..9b45172 100644 --- a/Doc/ext/ext.tex +++ b/Doc/ext/ext.tex @@ -1750,6 +1750,7 @@ implementation of CObjects (files \file{Include/cobject.h} and \chapter{Defining New Types \label{defining-new-types}} \sectionauthor{Michael Hudson}{mwh21@cam.ac.uk} +\sectionauthor{Dave Kuhlman}{dkuhlman@rexx.com} As mentioned in the last chapter, Python allows the writer of an extension module to define new types that can be manipulated from @@ -2065,6 +2066,7 @@ move to that directory and fire up Python --- you should be able to That wasn't so hard, was it? + \section{Type Methods \label{dnt-type-methods}} @@ -2111,18 +2113,42 @@ typedef struct _typeobject { char *tp_doc; /* Documentation string */ + /* Assigned meaning in release 2.0 */ /* call function for all accessible objects */ traverseproc tp_traverse; /* delete references to contained objects */ inquiry tp_clear; + /* Assigned meaning in release 2.1 */ /* rich comparisons */ richcmpfunc tp_richcompare; /* weak reference enabler */ long tp_weaklistoffset; + /* Added in release 2.2 */ + /* Iterators */ + getiterfunc tp_iter; + iternextfunc tp_iternext; + + /* Attribute descriptor and subclassing stuff */ + struct PyMethodDef *tp_methods; + struct memberlist *tp_members; + struct getsetlist *tp_getset; + struct _typeobject *tp_base; + PyObject *tp_dict; + descrgetfunc tp_descr_get; + descrsetfunc tp_descr_set; + long tp_dictoffset; + initproc tp_init; + allocfunc tp_alloc; + newfunc tp_new; + destructor tp_free; /* Low-level free-memory routine */ + PyObject *tp_bases; + PyObject *tp_mro; /* method resolution order */ + PyObject *tp_defined; + } PyTypeObject; \end{verbatim} @@ -2130,8 +2156,14 @@ Now that's a \emph{lot} of methods. Don't worry too much though - if you have a type you want to define, the chances are very good that you will only implement a handful of these. -As you probably expect by now, I'm going to go over this line-by-line, -saying a word about each field as we get to it. +As you probably expect by now, we're going to go over this and give +more information about the various handlers. We won't go in the order +they are defined in the structure, because there is a lot of +historical baggage that impacts the ordering of the fields; be sure +your type initializaion keeps the fields in the right order! It's +often easiest to find an example that includes all the fields you need +(even if they're initialized to \code{0}) and then change the values +to suit your new type. \begin{verbatim} char *tp_name; /* For printing */ @@ -2151,21 +2183,368 @@ for variable length structures (think: strings, lists) which is where the \cdata{tp_itemsize} field comes in. This will be dealt with later. -Now we come to the basic type methods - the ones most extension types +\begin{verbatim} + char *tp_doc; +\end{verbatim} + +Here you can put a string (or its address) that you want returned when +the Python script references \code{obj.__doc__} to retrieve the +docstring. + +Now we come to the basic type methods---the ones most extension types will implement. + +\subsection{Finalization and De-allocation} + \begin{verbatim} destructor tp_dealloc; +\end{verbatim} + +This function is called when the reference count of the instance of +your type is reduced to zero and the Python interpreter wants to +reclaim it. If your type has memory to free or other clean-up to +perform, put it here. The object itself needs to be freed here as +well. Here is an example of this function: + +\begin{verbatim} +static void +newdatatype_dealloc(newdatatypeobject * obj) +{ + free(obj->obj_UnderlyingDatatypePtr); + PyObject_DEL(obj); +} +\end{verbatim} + + +\subsection{Object Representation} + +In Python, there are three ways to generate a textual representation +of an object: the \function{repr()}\bifuncindex{repr} function (or +equivalent backtick syntax), the \function{str()}\bifuncindex{str} +function, and the \keyword{print} statement. For most objects, the +\keyword{print} statement is equivalent to the \function{str()} +function, but it is possible to special-case printing to a +\ctype{FILE*} if necessary; this should only be done if efficiency is +identified as a problem and profiling suggests that creating a +temporary string object to be written to a file is too expensive. + +These handlers are all optional, and most types at most need to +implement the \member{tp_str} and \member{tp_repr} handlers. + +\begin{verbatim} + reprfunc tp_repr; + reprfunc tp_str; printfunc tp_print; +\end{verbatim} + +The \member{tp_repr} handler should return a string object containing +a representation of the instance for which it is called. Here is a +simple example: + +\begin{verbatim} +static PyObject * +newdatatype_repr(newdatatypeobject * obj) +{ + char buf[4096]; + sprintf(buf, "Repr-ified_newdatatype{{size:%d}}", + obj->obj_UnderlyingDatatypePtr->size); + return PyString_FromString(buf); +} +\end{verbatim} + +If no \member{tp_repr} handler is specified, the interpreter will +supply a representation that uses the type's \member{tp_name} and a +uniquely-identifying value for the object. + +The \member{tp_str} handler is to \function{str()} what the +\member{tp_repr} handler described above is to \function{repr()}; that +is, it is called when Python code calls \function{str()} on an +instance of your object. It's implementation is very similar to the +\member{tp_repr} function, but the resulting string is intended to be +human consumption. It \member{tp_str} is not specified, the +\member{tp_repr} handler is used instead. + +Here is a simple example: + +\begin{verbatim} +static PyObject * +newdatatype_str(newdatatypeobject * obj) +{ + PyObject *pyString; + char buf[4096]; + sprintf(buf, "Stringified_newdatatype{{size:%d}}", + obj->obj_UnderlyingDatatypePtr->size + ); + pyString = PyString_FromString(buf); + return pyString; +} +\end{verbatim} + +The print function will be called whenever Python needs to "print" an +instance of the type. For example, if 'node' is an instance of type +TreeNode, then the print function is called when Python code calls: + +\begin{verbatim} +print node +\end{verbatim} + +There is a flags argument and one flag, \constant{Py_PRINT_RAW}, and +it suggests that you print without string quotes and possibly without +interpreting escape sequences. + +The print function receives a file object as an argument. You will +likely want to write to that file object. + +Here is a sampe print function: + +\begin{verbatim} +static int +newdatatype_print(newdatatypeobject *obj, FILE *fp, int flags) +{ + if (flags & Py_PRINT_RAW) { + fprintf(fp, "<{newdatatype object--size: %d}>", + obj->obj_UnderlyingDatatypePtr->size); + } + else { + fprintf(fp, "\"<{newdatatype object--size: %d}>\"", + obj->obj_UnderlyingDatatypePtr->size); + } + return 0; +} +\end{verbatim} + + +\subsection{Attribute Management Functions} + +\begin{verbatim} getattrfunc tp_getattr; setattrfunc tp_setattr; +\end{verbatim} + +The \member{tp_getattr} handle is called when the object requires an +attribute look-up. It is called in the same situations where the +\method{__getattr__()} method of a class would be called. + +A likely way to handle this is (1) to implement a set of functions +(such as \cfunction{newdatatype_getSize()} and +\cfunction{newdatatype_setSize()} in the example below), (2) provide a +method table listing these functions, and (3) provide a getattr +function that returns the result of a lookup in that table. + +Here is an example: + +\begin{verbatim} +static PyMethodDef newdatatype_methods[] = { + {"getSize", (PyCFunction)newdatatype_getSize, METH_VARARGS}, + {"setSize", (PyCFunction)newdatatype_setSize, METH_VARARGS}, + {NULL, NULL} /* sentinel */ +}; + +static PyObject * +newdatatype_getattr(newdatatypeobject *obj, char *name) +{ + return Py_FindMethod(newdatatype_methods, (PyObject *)obj, name); +} +\end{verbatim} + +The \member{tp_setattr} handler is called when the +\method{__setattr__()} or \method{__delattr__()} method of a class +instance would be called. When an attribute should be deleted, the +third parameter will be \NULL. Here is an example that simply raises +an exception; if this were really all you wanted, the +\member{tp_setattr} handler should be set to \NULL. + +\begin{verbatim} +static int +newdatatype_setattr(newdatatypeobject *obj, char *name, PyObject *v) +{ + char buf[1024]; + sprintf(buf, "Set attribute not supported for attribute %s", name); + PyErr_SetString(PyExc_RuntimeError, buf); + return -1; +} +\end{verbatim} + + +\subsection{Object Comparison} + +\begin{verbatim} cmpfunc tp_compare; - reprfunc tp_repr; \end{verbatim} +The \member{tp_compare} handler is called when comparisons are needed +are the object does not implement the specific rich comparison method +which matches the requested comparison. (It is always used if defined +and the \cfunction{PyObject_Compare()} or \cfunction{PyObject_Cmp()} +functions are used, or if \function{cmp()} is used from Python.) +It is analogous to the \method{__cmp__()} method. This function +should return a negative integer if \var{obj1} is less than +\var{obj2}, \code{0} if they are equal, and a positive integer if +\var{obj1} is greater than +\var{obj2}. + +Here is a sample implementation: + +\begin{verbatim} +static int +newdatatype_compare(newdatatypeobject * obj1, newdatatypeobject * obj2) +{ + long result; + + if (obj1->obj_UnderlyingDatatypePtr->size < + obj2->obj_UnderlyingDatatypePtr->size) { + result = -1; + } + else if (obj1->obj_UnderlyingDatatypePtr->size > + obj2->obj_UnderlyingDatatypePtr->size) { + result = 1; + } + else { + result = 0; + } + return result; +} +\end{verbatim} + + +\subsection{Abstract Protocol Support} + +\begin{verbatim} + tp_as_number; + tp_as_sequence; + tp_as_mapping; +\end{verbatim} + +If you wish your object to be able to act like a number, a sequence, +or a mapping object, then you place the address of a structure that +implements the C type \ctype{PyNumberMethods}, +\ctype{PySequenceMethods}, or \ctype{PyMappingMethods}, respectively. +It is up to you to fill in this structure with appropriate values. You +can find examples of the use of each of these in the \file{Objects} +directory of the Python source distribution. + + +\begin{verbatim} + hashfunc tp_hash; +\end{verbatim} + +This function, if you choose to provide it, should return a hash +number for an instance of your datatype. Here is a moderately +pointless example: + +\begin{verbatim} +static long +newdatatype_hash(newdatatypeobject *obj) +{ + long result; + result = obj->obj_UnderlyingDatatypePtr->size; + result = result * 3; + return result; +} +\end{verbatim} + +\begin{verbatim} + ternaryfunc tp_call; +\end{verbatim} + +This function is called when an instance of your datatype is "called", +for example, if \code{obj1} is an instance of your datatype and the Python +script contains \code{obj1('hello')}, the \member{tp_call} handler is +invoked. + +This function takes three arguments: + +\begin{enumerate} + \item + \var{arg1} is the instance of the datatype which is the subject of + the call. If the call is \code{obj1('hello')}, then \var{arg1} is + \code{obj1}. + + \item + \var{arg2} is a tuple containing the arguments to the call. You + can use \cfunction{PyArg_ParseTuple()} to extract the arguments. + + \item + \var{arg3} is a dictionary of keyword arguments that were passed. + If this is non-\NULL{} and you support keyword arguments, use + \cfunction{PyArg_ParseTupleAndKeywords()} to extract the + arguments. If you do not want to support keyword arguments and + this is non-\NULL, raise a \exception{TypeError} with a message + saying that keyword arguments are not supported. +\end{enumerate} + +Here is a desultory example of the implementation of call function. + +\begin{verbatim} +/* Implement the call function. + * obj1 is the instance receiving the call. + * obj2 is a tuple containing the arguments to the call, in this + * case 3 strings. + */ +static PyObject * +newdatatype_call(newdatatypeobject *obj, PyObject *args, PyObject *other) +{ + PyObject *result; + char *arg1; + char *arg2; + char *arg3; + char buf[4096]; + if (!PyArg_ParseTuple(args, "sss:call", &arg1, &arg2, &arg3)) { + return NULL; + } + sprintf(buf, + "Returning -- value: [%d] arg1: [%s] arg2: [%s] arg3: [%s]\n", + obj->obj_UnderlyingDatatypePtr->size, + arg1, arg2, arg3); + printf(buf); + return PyString_FromString(buf); +} +\end{verbatim} + + +\subsection{More Suggestions} + +Remember that you can omit most of these functions, in which case you +provide \code{0} as a value. + +In the \file{Objects} directory of the Python source distribution, +there is a file \file{xxobject.c}, which is intended to be used as a +template for the implementation of new types. One useful strategy +for implementing a new type is to copy and rename this file, then +read the instructions at the top of it. + +There are type definitions for each of the functions you must +provide. They are in \file{object.h} in the Python include +directory that comes with the source distribution of Python. + +In order to learn how to implement any specific method for your new +datatype, do the following: Download and unpack the Python source +distribution. Go the the \file{Objects} directory, then search the +C source files for \code{tp_} plus the function you want (for +example, \code{tp_print} or \code{tp_compare}). You will find +examples of the function you want to implement. + +When you need to verify that the type of an object is indeed the +object you are implementing and if you use xxobject.c as an starting +template for your implementation, then there is a macro defined for +this purpose. The macro definition will look something like this: + +\begin{verbatim} +#define is_newdatatypeobject(v) ((v)->ob_type == &Newdatatypetype) +\end{verbatim} + +And, a sample of its use might be something like the following: + +\begin{verbatim} + if (!is_newdatatypeobject(objp1) { + PyErr_SetString(PyExc_TypeError, "arg #1 not a newdatatype"); + return NULL; + } +\end{verbatim} -%\section{Attributes \& Methods -% \label{dnt-attrs-and-meths}} +%For a reasonably extensive example, from which most of the snippits +%above were taken, see \file{newdatatype.c} and \file{newdatatype.h}. \chapter{Building C and \Cpp{} Extensions on \UNIX{} |