summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-07-23 17:42:32 (GMT)
committerGuido van Rossum <guido@python.org>2007-07-23 17:42:32 (GMT)
commit4ca947183154a7cfc7a6ccbb2e5c856a16a5dce3 (patch)
tree2d0aa37ade9702ac5af2725414d6ab24b0125171 /Modules
parent9e473c28e4eb65e86fc11a5717cc6e7e1febd898 (diff)
downloadcpython-4ca947183154a7cfc7a6ccbb2e5c856a16a5dce3.zip
cpython-4ca947183154a7cfc7a6ccbb2e5c856a16a5dce3.tar.gz
cpython-4ca947183154a7cfc7a6ccbb2e5c856a16a5dce3.tar.bz2
SF patch# 1759016 by Joe Gregorio, who writes:
1. Removed "returns_unicode" attribute, associated code in the module to support that attribute, and all tests associated with it. 2. Parsed data is now returned as unicode strings. 3. Changed input tests to use io.BytesIO instead of StringIO, to reflect the byte processing nature of expat.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/pyexpat.c94
1 files changed, 17 insertions, 77 deletions
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 13c1d27..8638b2e 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -62,8 +62,6 @@ typedef struct {
PyObject_HEAD
XML_Parser itself;
- int returns_unicode; /* True if Unicode strings are returned;
- if false, UTF-8 strings are returned */
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
@@ -185,35 +183,6 @@ conv_string_len_to_unicode(const XML_Char *str, int len)
return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
}
-/* Convert a string of XML_Chars into an 8-bit Python string.
- Returns None if str is a null pointer. */
-
-static PyObject *
-conv_string_to_utf8(const XML_Char *str)
-{
- /* XXX currently this code assumes that XML_Char is 8-bit,
- and hence in UTF-8. */
- /* UTF-8 from Expat, UTF-8 desired */
- if (str == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- return PyString_FromString(str);
-}
-
-static PyObject *
-conv_string_len_to_utf8(const XML_Char *str, int len)
-{
- /* XXX currently this code assumes that XML_Char is 8-bit,
- and hence in UTF-8. */
- /* UTF-8 from Expat, UTF-8 desired */
- if (str == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- return PyString_FromStringAndSize((const char *)str, len);
-}
-
/* Callback routines */
static void clear_handlers(xmlparseobject *self, int initial);
@@ -411,14 +380,10 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
return res;
}
-/* Python 2.0 and later versions, when built with Unicode support */
-#define STRING_CONV_FUNC (self->returns_unicode \
- ? conv_string_to_unicode : conv_string_to_utf8)
-
static PyObject*
string_intern(xmlparseobject *self, const char* str)
{
- PyObject *result = STRING_CONV_FUNC(str);
+ PyObject *result = conv_string_to_unicode(str);
PyObject *value;
/* result can be NULL if the unicode conversion failed. */
if (!result)
@@ -449,9 +414,7 @@ call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
args = PyTuple_New(1);
if (args == NULL)
return -1;
- temp = (self->returns_unicode
- ? conv_string_len_to_unicode(buffer, len)
- : conv_string_len_to_utf8(buffer, len));
+ temp = (conv_string_len_to_unicode(buffer, len));
if (temp == NULL) {
Py_DECREF(args);
flag_error(self);
@@ -556,7 +519,7 @@ my_StartElementHandler(void *userData,
Py_DECREF(container);
return;
}
- v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
+ v = conv_string_to_unicode((XML_Char *) atts[i+1]);
if (v == NULL) {
flag_error(self);
Py_DECREF(container);
@@ -645,7 +608,7 @@ VOID_HANDLER(ProcessingInstruction,
(void *userData,
const XML_Char *target,
const XML_Char *data),
- ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
+ ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
VOID_HANDLER(UnparsedEntityDecl,
(void *userData,
@@ -671,9 +634,7 @@ VOID_HANDLER(EntityDecl,
const XML_Char *notationName),
("NiNNNNN",
string_intern(self, entityName), is_parameter_entity,
- (self->returns_unicode
- ? conv_string_len_to_unicode(value, value_length)
- : conv_string_len_to_utf8(value, value_length)),
+ (conv_string_len_to_unicode(value, value_length)),
string_intern(self, base), string_intern(self, systemId),
string_intern(self, publicId),
string_intern(self, notationName)))
@@ -684,7 +645,7 @@ VOID_HANDLER(XmlDecl,
const XML_Char *encoding,
int standalone),
("(O&O&i)",
- STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
+ conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
standalone))
static PyObject *
@@ -727,10 +688,7 @@ my_ElementDeclHandler(void *userData,
if (flush_character_buffer(self) < 0)
goto finally;
- modelobj = conv_content_model(model,
- (self->returns_unicode
- ? conv_string_to_unicode
- : conv_string_to_utf8));
+ modelobj = conv_content_model(model, (conv_string_to_unicode));
if (modelobj == NULL) {
flag_error(self);
goto finally;
@@ -772,7 +730,7 @@ VOID_HANDLER(AttlistDecl,
int isrequired),
("(NNO&O&i)",
string_intern(self, elname), string_intern(self, attname),
- STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
+ conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
isrequired))
#if XML_COMBINED_VERSION >= 19504
@@ -808,7 +766,7 @@ VOID_HANDLER(EndNamespaceDecl,
VOID_HANDLER(Comment,
(void *userData, const XML_Char *data),
- ("(O&)", STRING_CONV_FUNC,data))
+ ("(O&)", conv_string_to_unicode ,data))
VOID_HANDLER(StartCdataSection,
(void *userData),
@@ -820,15 +778,11 @@ VOID_HANDLER(EndCdataSection,
VOID_HANDLER(Default,
(void *userData, const XML_Char *s, int len),
- ("(N)", (self->returns_unicode
- ? conv_string_len_to_unicode(s,len)
- : conv_string_len_to_utf8(s,len))))
+ ("(N)", (conv_string_len_to_unicode(s,len))))
VOID_HANDLER(DefaultHandlerExpand,
(void *userData, const XML_Char *s, int len),
- ("(N)", (self->returns_unicode
- ? conv_string_len_to_unicode(s,len)
- : conv_string_len_to_utf8(s,len))))
+ ("(N)", (conv_string_len_to_unicode(s,len))))
INT_HANDLER(NotStandalone,
(void *userData),
@@ -842,7 +796,7 @@ RC_HANDLER(int, ExternalEntityRef,
const XML_Char *publicId),
int rc=0;,
("(O&NNN)",
- STRING_CONV_FUNC,context, string_intern(self, base),
+ conv_string_to_unicode ,context, string_intern(self, base),
string_intern(self, systemId), string_intern(self, publicId)),
rc = PyInt_AsLong(rv);, rc,
XML_GetUserData(parser))
@@ -924,13 +878,13 @@ readinst(char *buf, int buf_size, PyObject *meth)
goto finally;
/* XXX what to do if it returns a Unicode string? */
- if (!PyString_Check(str)) {
+ if (!PyBytes_Check(str)) {
PyErr_Format(PyExc_TypeError,
- "read() did not return a string object (type=%.400s)",
+ "read() did not return a bytes object (type=%.400s)",
Py_Type(str)->tp_name);
goto finally;
}
- len = PyString_GET_SIZE(str);
+ len = PyBytes_GET_SIZE(str);
if (len > buf_size) {
PyErr_Format(PyExc_ValueError,
"read() returned too much data: "
@@ -938,7 +892,7 @@ readinst(char *buf, int buf_size, PyObject *meth)
buf_size, len);
goto finally;
}
- memcpy(buf, PyString_AsString(str), len);
+ memcpy(buf, PyBytes_AsString(str), len);
finally:
Py_XDECREF(arg);
Py_XDECREF(str);
@@ -1044,7 +998,7 @@ xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
= XML_GetInputContext(self->itself, &offset, &size);
if (buffer != NULL)
- return PyString_FromStringAndSize(buffer + offset,
+ return PyBytes_FromStringAndSize(buffer + offset,
size - offset);
else
Py_RETURN_NONE;
@@ -1098,7 +1052,6 @@ xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
}
else
new_parser->buffer = NULL;
- new_parser->returns_unicode = self->returns_unicode;
new_parser->ordered_attributes = self->ordered_attributes;
new_parser->specified_attributes = self->specified_attributes;
new_parser->in_callback = 0;
@@ -1283,8 +1236,6 @@ newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
if (self == NULL)
return NULL;
- self->returns_unicode = 1;
-
self->buffer = NULL;
self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
self->buffer_used = 0;
@@ -1436,8 +1387,6 @@ xmlparse_getattr(xmlparseobject *self, char *name)
return get_pybool(self->ns_prefixes);
if (strcmp(name, "ordered_attributes") == 0)
return get_pybool(self->ordered_attributes);
- if (strcmp(name, "returns_unicode") == 0)
- return get_pybool((long) self->returns_unicode);
if (strcmp(name, "specified_attributes") == 0)
return get_pybool((long) self->specified_attributes);
if (strcmp(name, "intern") == 0) {
@@ -1482,7 +1431,6 @@ xmlparse_getattr(xmlparseobject *self, char *name)
APPEND(rc, "buffer_used");
APPEND(rc, "namespace_prefixes");
APPEND(rc, "ordered_attributes");
- APPEND(rc, "returns_unicode");
APPEND(rc, "specified_attributes");
APPEND(rc, "intern");
@@ -1570,14 +1518,6 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
self->ordered_attributes = 0;
return 0;
}
- if (strcmp(name, "returns_unicode") == 0) {
- if (PyObject_IsTrue(v)) {
- self->returns_unicode = 1;
- }
- else
- self->returns_unicode = 0;
- return 0;
- }
if (strcmp(name, "specified_attributes") == 0) {
if (PyObject_IsTrue(v))
self->specified_attributes = 1;