diff options
author | Andrew McNamara <andrewm@object-craft.com.au> | 2005-01-07 04:42:45 (GMT) |
---|---|---|
committer | Andrew McNamara <andrewm@object-craft.com.au> | 2005-01-07 04:42:45 (GMT) |
commit | 1196cf185ca4e5a74edc65f4b1e46cb60ff645a3 (patch) | |
tree | 678a7acd5ad8e24ffe2755ec432ad775e0e225fd /Modules/_csv.c | |
parent | 72b83c86a9c992bc7635dbeaa08bfa95ec033236 (diff) | |
download | cpython-1196cf185ca4e5a74edc65f4b1e46cb60ff645a3.zip cpython-1196cf185ca4e5a74edc65f4b1e46cb60ff645a3.tar.gz cpython-1196cf185ca4e5a74edc65f4b1e46cb60ff645a3.tar.bz2 |
Improved the implementation of the internal "dialect" type. The new
implementation features better error reporting, and better compliance
with the PEP.
Diffstat (limited to 'Modules/_csv.c')
-rw-r--r-- | Modules/_csv.c | 426 |
1 files changed, 239 insertions, 187 deletions
diff --git a/Modules/_csv.c b/Modules/_csv.c index 4c3a3ad..c47e9d6 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -73,7 +73,7 @@ typedef struct { char escapechar; /* escape character */ int skipinitialspace; /* ignore spaces following delimiter? */ PyObject *lineterminator; /* string to write between records */ - QuoteStyle quoting; /* style of quoting to write */ + int quoting; /* style of quoting to write */ int strict; /* raise exception on bad CSV */ } DialectObj; @@ -130,17 +130,6 @@ get_dialect_from_registry(PyObject * name_obj) return dialect_obj; } -static int -check_delattr(PyObject *v) -{ - if (v == NULL) { - PyErr_SetString(PyExc_TypeError, - "Cannot delete attribute"); - return -1; - } - return 0; -} - static PyObject * get_string(PyObject *str) { @@ -148,25 +137,6 @@ get_string(PyObject *str) return str; } -static int -set_string(PyObject **str, PyObject *v) -{ - if (check_delattr(v) < 0) - return -1; - if (!PyString_Check(v) -#ifdef Py_USING_UNICODE -&& !PyUnicode_Check(v) -#endif -) { - PyErr_BadArgument(); - return -1; - } - Py_XDECREF(*str); - Py_INCREF(v); - *str = v; - return 0; -} - static PyObject * get_nullchar_as_None(char c) { @@ -178,48 +148,22 @@ get_nullchar_as_None(char c) return PyString_FromStringAndSize((char*)&c, 1); } -static int -set_None_as_nullchar(char * addr, PyObject *v) -{ - if (check_delattr(v) < 0) - return -1; - if (v == Py_None) - *addr = '\0'; - else if (!PyString_Check(v) || PyString_Size(v) != 1) { - PyErr_BadArgument(); - return -1; - } - else { - char *s = PyString_AsString(v); - if (s == NULL) - return -1; - *addr = s[0]; - } - return 0; -} - static PyObject * Dialect_get_lineterminator(DialectObj *self) { return get_string(self->lineterminator); } -static int -Dialect_set_lineterminator(DialectObj *self, PyObject *value) -{ - return set_string(&self->lineterminator, value); -} - static PyObject * Dialect_get_escapechar(DialectObj *self) { return get_nullchar_as_None(self->escapechar); } -static int -Dialect_set_escapechar(DialectObj *self, PyObject *value) +static PyObject * +Dialect_get_quotechar(DialectObj *self) { - return set_None_as_nullchar(&self->escapechar, value); + return get_nullchar_as_None(self->quotechar); } static PyObject * @@ -229,51 +173,109 @@ Dialect_get_quoting(DialectObj *self) } static int -Dialect_set_quoting(DialectObj *self, PyObject *v) +_set_bool(const char *name, int *target, PyObject *src, int dflt) +{ + if (src == NULL) + *target = dflt; + else + *target = PyObject_IsTrue(src); + return 0; +} + +static int +_set_int(const char *name, int *target, PyObject *src, int dflt) +{ + if (src == NULL) + *target = dflt; + else { + if (!PyInt_Check(src)) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be an integer", name); + return -1; + } + *target = PyInt_AsLong(src); + } + return 0; +} + +static int +_set_char(const char *name, char *target, PyObject *src, char dflt) +{ + if (src == NULL) + *target = dflt; + else { + if (src == Py_None) + *target = '\0'; + else if (!PyString_Check(src) || PyString_Size(src) != 1) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be an 1-character string", + name); + return -1; + } + else { + char *s = PyString_AsString(src); + if (s == NULL) + return -1; + *target = s[0]; + } + } + return 0; +} + +static int +_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) +{ + if (src == NULL) + *target = PyString_FromString(dflt); + else { + if (src == Py_None) + *target = NULL; + else if (!PyString_Check(src) +#ifdef Py_USING_UNICODE + && !PyUnicode_Check(src) +#endif + ) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be an string", name); + return -1; + } else { + Py_XDECREF(*target); + Py_INCREF(src); + *target = src; + } + } + return 0; +} + +static int +dialect_check_quoting(int quoting) { - int quoting; StyleDesc *qs = quote_styles; - if (check_delattr(v) < 0) - return -1; - if (!PyInt_Check(v)) { - PyErr_BadArgument(); - return -1; - } - quoting = PyInt_AsLong(v); for (qs = quote_styles; qs->name; qs++) { - if (qs->style == quoting) { - self->quoting = quoting; + if (qs->style == quoting) return 0; - } } - PyErr_BadArgument(); + PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); return -1; } -static struct PyMethodDef Dialect_methods[] = { - { NULL, NULL } -}; - #define D_OFF(x) offsetof(DialectObj, x) static struct PyMemberDef Dialect_memberlist[] = { - { "quotechar", T_CHAR, D_OFF(quotechar) }, - { "delimiter", T_CHAR, D_OFF(delimiter) }, - { "skipinitialspace", T_INT, D_OFF(skipinitialspace) }, - { "doublequote", T_INT, D_OFF(doublequote) }, - { "strict", T_INT, D_OFF(strict) }, + { "delimiter", T_CHAR, D_OFF(delimiter), READONLY }, + { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY }, + { "doublequote", T_INT, D_OFF(doublequote), READONLY }, + { "strict", T_INT, D_OFF(strict), READONLY }, { NULL } }; static PyGetSetDef Dialect_getsetlist[] = { - { "escapechar", (getter)Dialect_get_escapechar, - (setter)Dialect_set_escapechar }, - { "lineterminator", (getter)Dialect_get_lineterminator, - (setter)Dialect_set_lineterminator }, - { "quoting", (getter)Dialect_get_quoting, - (setter)Dialect_set_quoting }, - {NULL}, + { "escapechar", (getter)Dialect_get_escapechar}, + { "lineterminator", (getter)Dialect_get_lineterminator}, + { "quotechar", (getter)Dialect_get_quotechar}, + { "quoting", (getter)Dialect_get_quoting}, + {NULL}, }; static void @@ -283,107 +285,155 @@ Dialect_dealloc(DialectObj *self) self->ob_type->tp_free((PyObject *)self); } +/* + * Return a new reference to a dialect instance + * + * If given a string, looks up the name in our dialect registry + * If given a class, instantiate (which runs python validity checks) + * If given an instance, return a new reference to the instance + */ +static PyObject * +dialect_instantiate(PyObject *dialect) +{ + Py_INCREF(dialect); + /* If dialect is a string, look it up in our registry */ + if (PyString_Check(dialect) +#ifdef Py_USING_UNICODE + || PyUnicode_Check(dialect) +#endif + ) { + PyObject * new_dia; + new_dia = get_dialect_from_registry(dialect); + Py_DECREF(dialect); + return new_dia; + } + /* A class rather than an instance? Instantiate */ + if (PyObject_TypeCheck(dialect, &PyClass_Type)) { + PyObject * new_dia; + new_dia = PyObject_CallFunction(dialect, ""); + Py_DECREF(dialect); + return new_dia; + } + /* Make sure we finally have an instance */ + if (!PyInstance_Check(dialect)) { + PyErr_SetString(PyExc_TypeError, "dialect must be an instance"); + Py_DECREF(dialect); + return NULL; + } + return dialect; +} + +static char *dialect_kws[] = { + "dialect", + "delimiter", + "doublequote", + "escapechar", + "lineterminator", + "quotechar", + "quoting", + "skipinitialspace", + "strict", + NULL +}; + static int dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs) { - PyObject *dialect = NULL, *name_obj, *value_obj; - - self->quotechar = '"'; - self->delimiter = ','; - self->escapechar = '\0'; - self->skipinitialspace = 0; - Py_XDECREF(self->lineterminator); - self->lineterminator = PyString_FromString("\r\n"); - if (self->lineterminator == NULL) + int ret = -1; + PyObject *dialect = NULL; + PyObject *delimiter = NULL; + PyObject *doublequote = NULL; + PyObject *escapechar = NULL; + PyObject *lineterminator = NULL; + PyObject *quotechar = NULL; + PyObject *quoting = NULL; + PyObject *skipinitialspace = NULL; + PyObject *strict = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|OOOOOOOOO", dialect_kws, + &dialect, + &delimiter, + &doublequote, + &escapechar, + &lineterminator, + "echar, + "ing, + &skipinitialspace, + &strict)) return -1; - self->quoting = QUOTE_MINIMAL; - self->doublequote = 1; - self->strict = 0; - if (!PyArg_UnpackTuple(args, "", 0, 1, &dialect)) - return -1; - Py_XINCREF(dialect); - if (kwargs != NULL) { - PyObject * key = PyString_FromString("dialect"); - PyObject * d; - - d = PyDict_GetItem(kwargs, key); - if (d) { - Py_INCREF(d); - Py_XDECREF(dialect); - PyDict_DelItem(kwargs, key); - dialect = d; - } - Py_DECREF(key); - } - if (dialect != NULL) { - int i; - PyObject * dir_list; + Py_XINCREF(delimiter); + Py_XINCREF(doublequote); + Py_XINCREF(escapechar); + Py_XINCREF(lineterminator); + Py_XINCREF(quotechar); + Py_XINCREF(quoting); + Py_XINCREF(skipinitialspace); + Py_XINCREF(strict); + if (dialect != NULL) { + dialect = dialect_instantiate(dialect); + if (dialect == NULL) + goto err; +#define DIALECT_GETATTR(v, n) \ + if (v == NULL) \ + v = PyObject_GetAttrString(dialect, n) + + DIALECT_GETATTR(delimiter, "delimiter"); + DIALECT_GETATTR(doublequote, "doublequote"); + DIALECT_GETATTR(escapechar, "escapechar"); + DIALECT_GETATTR(lineterminator, "lineterminator"); + DIALECT_GETATTR(quotechar, "quotechar"); + DIALECT_GETATTR(quoting, "quoting"); + DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); + DIALECT_GETATTR(strict, "strict"); + PyErr_Clear(); + Py_DECREF(dialect); + } - /* If dialect is a string, look it up in our registry */ - if (PyString_Check(dialect) -#ifdef Py_USING_UNICODE - || PyUnicode_Check(dialect) -#endif - ) { - PyObject * new_dia; - new_dia = get_dialect_from_registry(dialect); - Py_DECREF(dialect); - if (new_dia == NULL) - return -1; - dialect = new_dia; - } - /* A class rather than an instance? Instantiate */ - if (PyObject_TypeCheck(dialect, &PyClass_Type)) { - PyObject * new_dia; - new_dia = PyObject_CallFunction(dialect, ""); - Py_DECREF(dialect); - if (new_dia == NULL) - return -1; - dialect = new_dia; - } - /* Make sure we finally have an instance */ - if (!PyInstance_Check(dialect) || - (dir_list = PyObject_Dir(dialect)) == NULL) { - PyErr_SetString(PyExc_TypeError, - "dialect must be an instance"); - Py_DECREF(dialect); - return -1; - } - /* And extract the attributes */ - for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) { - char *s; - name_obj = PyList_GET_ITEM(dir_list, i); - s = PyString_AsString(name_obj); - if (s == NULL) - return -1; - if (s[0] == '_') - continue; - value_obj = PyObject_GetAttr(dialect, name_obj); - if (value_obj) { - if (PyObject_SetAttr((PyObject *)self, - name_obj, value_obj)) { - Py_DECREF(value_obj); - Py_DECREF(dir_list); - Py_DECREF(dialect); - return -1; - } - Py_DECREF(value_obj); - } - } - Py_DECREF(dir_list); - Py_DECREF(dialect); - } - if (kwargs != NULL) { - int pos = 0; + /* check types and convert to C values */ +#define DIASET(meth, name, target, src, dflt) \ + if (meth(name, target, src, dflt)) \ + goto err + DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); + DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1); + DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0); + DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); + DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"'); + DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); + DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0); + DIASET(_set_bool, "strict", &self->strict, strict, 0); + + /* validate options */ + if (dialect_check_quoting(self->quoting)) + goto err; + if (self->delimiter == 0) { + PyErr_SetString(PyExc_TypeError, "delimiter must be set"); + goto err; + } + if (quotechar == Py_None && self->quoting != QUOTE_NONE) + self->quoting = QUOTE_NONE; + if (self->quoting != QUOTE_NONE && self->quotechar == 0) { + PyErr_SetString(PyExc_TypeError, + "quotechar must be set if quoting enabled"); + goto err; + } + if (self->lineterminator == 0) { + PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); + goto err; + } - while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) { - if (PyObject_SetAttr((PyObject *)self, - name_obj, value_obj)) - return -1; - } - } - return 0; + ret = 0; +err: + Py_XDECREF(delimiter); + Py_XDECREF(doublequote); + Py_XDECREF(escapechar); + Py_XDECREF(lineterminator); + Py_XDECREF(quotechar); + Py_XDECREF(quoting); + Py_XDECREF(skipinitialspace); + Py_XDECREF(strict); + return ret; } static PyObject * @@ -433,7 +483,7 @@ static PyTypeObject Dialect_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - Dialect_methods, /* tp_methods */ + 0, /* tp_methods */ Dialect_memberlist, /* tp_members */ Dialect_getsetlist, /* tp_getset */ 0, /* tp_base */ @@ -509,7 +559,8 @@ parse_process_char(ReaderObj *self, char c) parse_save_field(self); self->state = START_RECORD; } - else if (c == dialect->quotechar) { + else if (c == dialect->quotechar && + dialect->quoting != QUOTE_NONE) { /* start quoted field */ self->state = IN_QUOTED_FIELD; } @@ -572,7 +623,8 @@ parse_process_char(ReaderObj *self, char c) /* Possible escape character */ self->state = ESCAPE_IN_QUOTED_FIELD; } - else if (c == dialect->quotechar) { + else if (c == dialect->quotechar && + dialect->quoting != QUOTE_NONE) { if (dialect->doublequote) { /* doublequote; " represented by "" */ self->state = QUOTE_IN_QUOTED_FIELD; @@ -1332,7 +1384,7 @@ csv_register_dialect(PyObject *module, PyObject *args) return NULL; } Py_INCREF(dialect_obj); - /* A class rather than an instance? Instanciate */ + /* A class rather than an instance? Instantiate */ if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) { PyObject * new_dia; new_dia = PyObject_CallFunction(dialect_obj, ""); |