summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew McNamara <andrewm@object-craft.com.au>2005-01-12 07:44:42 (GMT)
committerAndrew McNamara <andrewm@object-craft.com.au>2005-01-12 07:44:42 (GMT)
commitc89f284df8903de66be6c60eda7d9187b38956f8 (patch)
treeda725f672dd0f5f90cf3ed11f28472fe7434e7e7
parent31d8896ee21c28ee55ae4ba2764e11fba88c9fd4 (diff)
downloadcpython-c89f284df8903de66be6c60eda7d9187b38956f8.zip
cpython-c89f284df8903de66be6c60eda7d9187b38956f8.tar.gz
cpython-c89f284df8903de66be6c60eda7d9187b38956f8.tar.bz2
When using QUOTE_NONNUMERIC, we now test for "numericness" with
PyNumber_Check, rather than trying to convert to a float. Reimplemented writer - now raises exceptions when it sees a quotechar but neither doublequote or escapechar are set. Doublequote results are now more consistent (eg, single quote should generate """", rather than "", which is ambiguous).
-rw-r--r--Lib/test/test_csv.py30
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_csv.c140
3 files changed, 81 insertions, 92 deletions
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index be1147d..a3c0843 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -152,25 +152,35 @@ class Test_Csv(unittest.TestCase):
(bigstring, bigstring))
def test_write_quoting(self):
- self._write_test(['a','1','p,q'], 'a,1,"p,q"')
+ self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
self.assertRaises(csv.Error,
self._write_test,
- ['a','1','p,q'], 'a,1,"p,q"',
+ ['a',1,'p,q'], 'a,1,p,q',
quoting = csv.QUOTE_NONE)
- self._write_test(['a','1','p,q'], 'a,1,"p,q"',
+ self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
quoting = csv.QUOTE_MINIMAL)
- self._write_test(['a','1','p,q'], '"a",1,"p,q"',
+ self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
quoting = csv.QUOTE_NONNUMERIC)
- self._write_test(['a','1','p,q'], '"a","1","p,q"',
+ self._write_test(['a',1,'p,q'], '"a","1","p,q"',
quoting = csv.QUOTE_ALL)
def test_write_escape(self):
- self._write_test(['a','1','p,q'], 'a,1,"p,q"',
+ self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
escapechar='\\')
-# FAILED - needs to be fixed [am]:
-# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"',
-# escapechar='\\', doublequote = 0)
- self._write_test(['a','1','p,q'], 'a,1,p\\,q',
+ self.assertRaises(csv.Error,
+ self._write_test,
+ ['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
+ escapechar=None, doublequote=False)
+ self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
+ escapechar='\\', doublequote = False)
+ self._write_test(['"'], '""""',
+ escapechar='\\', quoting = csv.QUOTE_MINIMAL)
+ self._write_test(['"'], '\\"',
+ escapechar='\\', quoting = csv.QUOTE_MINIMAL,
+ doublequote = False)
+ self._write_test(['"'], '\\"',
+ escapechar='\\', quoting = csv.QUOTE_NONE)
+ self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
escapechar='\\', quoting = csv.QUOTE_NONE)
def test_writerows(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 4bb11e4..02f54bd 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -45,6 +45,9 @@ Library
+ quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
dictates.
+ the parser now removes the escapechar prefix from escaped characters.
+ + QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting
+ to cast to float.
+ + writer doublequote handling improved.
+ Dialect classes passed to the module are no longer instantiated by
the module before being parsed (the former validation scheme required
this, but the mechanism was unreliable).
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 03b291f..30b7eca 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
{
DialectObj *dialect = self->dialect;
int i, rec_len;
+ char *lineterm;
+
+#define ADDCH(c) \
+ do {\
+ if (copy_phase) \
+ self->rec[rec_len] = c;\
+ rec_len++;\
+ } while(0)
+
+ lineterm = PyString_AsString(dialect->lineterminator);
+ if (lineterm == NULL)
+ return -1;
rec_len = self->rec_len;
- /* If this is not the first field we need a field separator.
- */
- if (self->num_fields > 0) {
- if (copy_phase)
- self->rec[rec_len] = dialect->delimiter;
- rec_len++;
- }
- /* Handle preceding quote.
- */
- switch (dialect->quoting) {
- case QUOTE_ALL:
- *quoted = 1;
- if (copy_phase)
- self->rec[rec_len] = dialect->quotechar;
- rec_len++;
- break;
- case QUOTE_MINIMAL:
- case QUOTE_NONNUMERIC:
- /* We only know about quoted in the copy phase.
- */
- if (copy_phase && *quoted) {
- self->rec[rec_len] = dialect->quotechar;
- rec_len++;
- }
- break;
- case QUOTE_NONE:
- break;
- }
- /* Copy/count field data.
- */
+ /* If this is not the first field we need a field separator */
+ if (self->num_fields > 0)
+ ADDCH(dialect->delimiter);
+
+ /* Handle preceding quote */
+ if (copy_phase && *quoted)
+ ADDCH(dialect->quotechar);
+
+ /* Copy/count field data */
for (i = 0;; i++) {
char c = field[i];
+ int want_escape = 0;
if (c == '\0')
break;
- /* If in doublequote mode we escape quote chars with a
- * quote.
- */
- if (dialect->quoting != QUOTE_NONE &&
- c == dialect->quotechar && dialect->doublequote) {
- if (copy_phase)
- self->rec[rec_len] = dialect->quotechar;
- *quoted = 1;
- rec_len++;
- }
- /* Some special characters need to be escaped. If we have a
- * quote character switch to quoted field instead of escaping
- * individual characters.
- */
- if (!*quoted
- && (c == dialect->delimiter ||
- c == dialect->escapechar ||
- c == '\n' || c == '\r')) {
- if (dialect->quoting != QUOTE_NONE)
- *quoted = 1;
- else if (dialect->escapechar) {
- if (copy_phase)
- self->rec[rec_len] = dialect->escapechar;
- rec_len++;
- }
+ if (c == dialect->delimiter ||
+ c == dialect->escapechar ||
+ c == dialect->quotechar ||
+ strchr(lineterm, c)) {
+ if (dialect->quoting == QUOTE_NONE)
+ want_escape = 1;
else {
- PyErr_Format(error_obj,
- "delimiter must be quoted or escaped");
- return -1;
+ if (c == dialect->quotechar) {
+ if (dialect->doublequote)
+ ADDCH(dialect->quotechar);
+ else
+ want_escape = 1;
+ }
+ if (!want_escape)
+ *quoted = 1;
+ }
+ if (want_escape) {
+ if (!dialect->escapechar) {
+ PyErr_Format(error_obj,
+ "need to escape, but no escapechar set");
+ return -1;
+ }
+ ADDCH(dialect->escapechar);
}
}
/* Copy field character into record buffer.
*/
- if (copy_phase)
- self->rec[rec_len] = c;
- rec_len++;
+ ADDCH(c);
}
/* If field is empty check if it needs to be quoted.
@@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
*quoted = 1;
}
- /* Handle final quote character on field.
- */
if (*quoted) {
if (copy_phase)
- self->rec[rec_len] = dialect->quotechar;
+ ADDCH(dialect->quotechar);
else
- /* Didn't know about leading quote until we found it
- * necessary in field data - compensate for it now.
- */
- rec_len++;
- rec_len++;
+ rec_len += 2;
}
-
return rec_len;
+#undef ADDCH
}
static int
@@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq)
if (field == NULL)
return NULL;
- quoted = 0;
- if (dialect->quoting == QUOTE_NONNUMERIC) {
- PyObject *num;
-
- num = PyNumber_Float(field);
- if (num == NULL) {
- quoted = 1;
- PyErr_Clear();
- }
- else {
- Py_DECREF(num);
- }
+ switch (dialect->quoting) {
+ case QUOTE_NONNUMERIC:
+ quoted = !PyNumber_Check(field);
+ break;
+ case QUOTE_ALL:
+ quoted = 1;
+ break;
+ default:
+ quoted = 0;
+ break;
}
if (PyString_Check(field)) {