summaryrefslogtreecommitdiffstats
path: root/Modules/pyexpat.c
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2002-06-27 19:40:48 (GMT)
committerFred Drake <fdrake@acm.org>2002-06-27 19:40:48 (GMT)
commitb91a36b2305c348ac4c27c823d75c11ea37e603c (patch)
treef63b8f3a122899f52e9f5a79490ab1bc13a5a84d /Modules/pyexpat.c
parentd805fefff4263534bb8c79b2ef266a8a5b245e8a (diff)
downloadcpython-b91a36b2305c348ac4c27c823d75c11ea37e603c.zip
cpython-b91a36b2305c348ac4c27c823d75c11ea37e603c.tar.gz
cpython-b91a36b2305c348ac4c27c823d75c11ea37e603c.tar.bz2
Integrate the changes from PyXML's version of pyexpat.c revisions
1.47, 1.48, 1.49 (name interning support).
Diffstat (limited to 'Modules/pyexpat.c')
-rw-r--r--Modules/pyexpat.c155
1 files changed, 109 insertions, 46 deletions
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index ae8bf1b..7c70f1b 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -60,6 +60,7 @@ typedef struct {
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
+ PyObject *intern; /* Dictionary to intern strings */
PyObject **handlers;
} xmlparseobject;
@@ -123,7 +124,7 @@ set_error(xmlparseobject *self)
Returns None if str is a null pointer. */
static PyObject *
-conv_string_to_unicode(XML_Char *str)
+conv_string_to_unicode(const XML_Char *str)
{
/* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */
@@ -132,8 +133,7 @@ conv_string_to_unicode(XML_Char *str)
Py_INCREF(Py_None);
return Py_None;
}
- return PyUnicode_DecodeUTF8((const char *)str,
- strlen((const char *)str),
+ return PyUnicode_DecodeUTF8(str, strlen(str),
"strict");
}
@@ -155,7 +155,7 @@ conv_string_len_to_unicode(const XML_Char *str, int len)
Returns None if str is a null pointer. */
static PyObject *
-conv_string_to_utf8(XML_Char *str)
+conv_string_to_utf8(const XML_Char *str)
{
/* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */
@@ -164,7 +164,7 @@ conv_string_to_utf8(XML_Char *str)
Py_INCREF(Py_None);
return Py_None;
}
- return PyString_FromString((const char *)str);
+ return PyString_FromString(str);
}
static PyObject *
@@ -275,6 +275,25 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
? conv_string_to_unicode : conv_string_to_utf8)
#endif
+static PyObject*
+string_intern(xmlparseobject *self, const char* str)
+{
+ PyObject *result = STRING_CONV_FUNC(str);
+ PyObject *value;
+ if (!self->intern)
+ return result;
+ value = PyDict_GetItem(self->intern, result);
+ if (!value) {
+ if (PyDict_SetItem(self->intern, result, result) == 0)
+ return result;
+ else
+ return NULL;
+ }
+ Py_INCREF(value);
+ Py_DECREF(result);
+ return value;
+}
+
static void
my_StartElementHandler(void *userData,
const XML_Char *name, const XML_Char **atts)
@@ -307,7 +326,7 @@ my_StartElementHandler(void *userData,
return;
}
for (i = 0; i < max; i += 2) {
- PyObject *n = STRING_CONV_FUNC((XML_Char *) atts[i]);
+ PyObject *n = string_intern(self, (XML_Char *) atts[i]);
PyObject *v;
if (n == NULL) {
flag_error(self);
@@ -336,7 +355,7 @@ my_StartElementHandler(void *userData,
Py_DECREF(v);
}
}
- args = Py_BuildValue("(O&N)", STRING_CONV_FUNC,name, container);
+ args = Py_BuildValue("(NN)", string_intern(self, name), container);
if (args == NULL) {
Py_DECREF(container);
return;
@@ -394,13 +413,13 @@ my_##NAME##Handler PARAMS {\
VOID_HANDLER(EndElement,
(void *userData, const XML_Char *name),
- ("(O&)", STRING_CONV_FUNC, name))
+ ("(N)", string_intern(self, name)))
VOID_HANDLER(ProcessingInstruction,
(void *userData,
const XML_Char *target,
const XML_Char *data),
- ("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data))
+ ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
#ifndef Py_USING_UNICODE
VOID_HANDLER(CharacterData,
@@ -421,10 +440,10 @@ VOID_HANDLER(UnparsedEntityDecl,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName),
- ("(O&O&O&O&O&)",
- STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base,
- STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId,
- STRING_CONV_FUNC,notationName))
+ ("(NNNNN)",
+ string_intern(self, entityName), string_intern(self, base),
+ string_intern(self, systemId), string_intern(self, publicId),
+ string_intern(self, notationName)))
#ifndef Py_USING_UNICODE
VOID_HANDLER(EntityDecl,
@@ -437,11 +456,12 @@ VOID_HANDLER(EntityDecl,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName),
- ("O&iNO&O&O&O&",
- STRING_CONV_FUNC,entityName, is_parameter_entity,
+ ("NiNNNNN",
+ string_intern(self, entityName), is_parameter_entity,
conv_string_len_to_utf8(value, value_length),
- STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
- STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
+ string_intern(self, base), string_intern(self, systemId),
+ string_intern(self, publicId),
+ string_intern(self, notationName)))
#else
VOID_HANDLER(EntityDecl,
(void *userData,
@@ -453,13 +473,14 @@ VOID_HANDLER(EntityDecl,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName),
- ("O&iNO&O&O&O&",
- STRING_CONV_FUNC,entityName, is_parameter_entity,
+ ("NiNNNNN",
+ string_intern(self, entityName), is_parameter_entity,
(self->returns_unicode
? conv_string_len_to_unicode(value, value_length)
: conv_string_len_to_utf8(value, value_length)),
- STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
- STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
+ string_intern(self, base), string_intern(self, systemId),
+ string_intern(self, publicId),
+ string_intern(self, notationName)))
#endif
VOID_HANDLER(XmlDecl,
@@ -473,7 +494,7 @@ VOID_HANDLER(XmlDecl,
static PyObject *
conv_content_model(XML_Content * const model,
- PyObject *(*conv_string)(XML_Char *))
+ PyObject *(*conv_string)(const XML_Char *))
{
PyObject *result = NULL;
PyObject *children = PyTuple_New(model->numchildren);
@@ -514,8 +535,8 @@ VOID_HANDLER(ElementDecl,
(void *userData,
const XML_Char *name,
XML_Content *model),
- ("O&O&",
- STRING_CONV_FUNC,name,
+ ("NO&",
+ string_intern(self, name),
(self->returns_unicode ? conv_content_model_unicode
: conv_content_model_utf8),model))
#else
@@ -523,8 +544,8 @@ VOID_HANDLER(ElementDecl,
(void *userData,
const XML_Char *name,
XML_Content *model),
- ("O&O&",
- STRING_CONV_FUNC,name, conv_content_model_utf8,model))
+ ("NO&",
+ string_intern(self, name), conv_content_model_utf8,model))
#endif
VOID_HANDLER(AttlistDecl,
@@ -534,8 +555,8 @@ VOID_HANDLER(AttlistDecl,
const XML_Char *att_type,
const XML_Char *dflt,
int isrequired),
- ("(O&O&O&O&i)",
- STRING_CONV_FUNC,elname, STRING_CONV_FUNC,attname,
+ ("(NNO&O&i)",
+ string_intern(self, elname), string_intern(self, attname),
STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
isrequired))
@@ -545,24 +566,25 @@ VOID_HANDLER(NotationDecl,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId),
- ("(O&O&O&O&)",
- STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base,
- STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId))
+ ("(NNNN)",
+ string_intern(self, notationName), string_intern(self, base),
+ string_intern(self, systemId), string_intern(self, publicId)))
VOID_HANDLER(StartNamespaceDecl,
(void *userData,
const XML_Char *prefix,
const XML_Char *uri),
- ("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri))
+ ("(NN)",
+ string_intern(self, prefix), string_intern(self, uri)))
VOID_HANDLER(EndNamespaceDecl,
(void *userData,
const XML_Char *prefix),
- ("(O&)", STRING_CONV_FUNC,prefix))
+ ("(N)", string_intern(self, prefix)))
VOID_HANDLER(Comment,
- (void *userData, const XML_Char *prefix),
- ("(O&)", STRING_CONV_FUNC,prefix))
+ (void *userData, const XML_Char *data),
+ ("(O&)", STRING_CONV_FUNC,data))
VOID_HANDLER(StartCdataSection,
(void *userData),
@@ -605,9 +627,9 @@ RC_HANDLER(int, ExternalEntityRef,
const XML_Char *systemId,
const XML_Char *publicId),
int rc=0;,
- ("(O&O&O&O&)",
- STRING_CONV_FUNC,context, STRING_CONV_FUNC,base,
- STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId),
+ ("(O&NNN)",
+ STRING_CONV_FUNC,context, string_intern(self, base),
+ string_intern(self, systemId), string_intern(self, publicId)),
rc = PyInt_AsLong(rv);, rc,
XML_GetUserData(parser))
@@ -617,8 +639,8 @@ VOID_HANDLER(StartDoctypeDecl,
(void *userData, const XML_Char *doctypeName,
const XML_Char *sysid, const XML_Char *pubid,
int has_internal_subset),
- ("(O&O&O&i)", STRING_CONV_FUNC,doctypeName,
- STRING_CONV_FUNC,sysid, STRING_CONV_FUNC,pubid,
+ ("(NNNi)", string_intern(self, doctypeName),
+ string_intern(self, sysid), string_intern(self, pubid),
has_internal_subset))
VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
@@ -856,6 +878,8 @@ xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding);
new_parser->handlers = 0;
+ new_parser->intern = self->intern;
+ Py_XINCREF(new_parser->intern);
#ifdef Py_TPFLAGS_HAVE_GC
PyObject_GC_Track(new_parser);
#else
@@ -988,7 +1012,7 @@ XML_Encoding * info)
#endif
static PyObject *
-newxmlparseobject(char *encoding, char *namespace_separator)
+newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
{
int i;
xmlparseobject *self;
@@ -1022,6 +1046,8 @@ newxmlparseobject(char *encoding, char *namespace_separator)
else {
self->itself = XML_ParserCreate(encoding);
}
+ self->intern = intern;
+ Py_XINCREF(self->intern);
#ifdef Py_TPFLAGS_HAVE_GC
PyObject_GC_Track(self);
#else
@@ -1074,6 +1100,7 @@ xmlparse_dealloc(xmlparseobject *self)
}
free(self->handlers);
}
+ Py_XDECREF(self->intern);
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
/* Code for versions before 1.6 */
free(self);
@@ -1118,6 +1145,16 @@ xmlparse_getattr(xmlparseobject *self, char *name)
return PyInt_FromLong((long) self->returns_unicode);
if (strcmp(name, "specified_attributes") == 0)
return PyInt_FromLong((long) self->specified_attributes);
+ if (strcmp(name, "intern") == 0) {
+ if (self->intern == NULL) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+ else {
+ Py_INCREF(self->intern);
+ return self->intern;
+ }
+ }
handlernum = handlername2int(name);
@@ -1138,6 +1175,7 @@ xmlparse_getattr(xmlparseobject *self, char *name)
PyList_Append(rc, PyString_FromString("ordered_attributes"));
PyList_Append(rc, PyString_FromString("returns_unicode"));
PyList_Append(rc, PyString_FromString("specified_attributes"));
+ PyList_Append(rc, PyString_FromString("intern"));
return rc;
}
@@ -1221,6 +1259,8 @@ static int
xmlparse_clear(xmlparseobject *op)
{
clear_handlers(op, 0);
+ Py_XDECREF(op->intern);
+ op->intern = 0;
return 0;
}
#endif
@@ -1275,10 +1315,14 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
{
char *encoding = NULL;
char *namespace_separator = NULL;
- static char *kwlist[] = {"encoding", "namespace_separator", NULL};
-
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|zz:ParserCreate", kwlist,
- &encoding, &namespace_separator))
+ PyObject *intern = NULL;
+ PyObject *result;
+ int intern_decref = 0;
+ static char *kwlist[] = {"encoding", "namespace_separator",
+ "intern", NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
+ &encoding, &namespace_separator, &intern))
return NULL;
if (namespace_separator != NULL
&& strlen(namespace_separator) > 1) {
@@ -1287,7 +1331,26 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
" character, omitted, or None");
return NULL;
}
- return newxmlparseobject(encoding, namespace_separator);
+ /* Explicitly passing None means no interning is desired.
+ Not passing anything means that a new dictionary is used. */
+ if (intern == Py_None)
+ intern = NULL;
+ else if (intern == NULL) {
+ intern = PyDict_New();
+ if (!intern)
+ return NULL;
+ intern_decref = 1;
+ }
+ else if (!PyDict_Check(intern)) {
+ PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
+ return NULL;
+ }
+
+ result = newxmlparseobject(encoding, namespace_separator, intern);
+ if (intern_decref) {
+ Py_DECREF(intern);
+ }
+ return result;
}
PyDoc_STRVAR(pyexpat_ErrorString__doc__,