From c89f284df8903de66be6c60eda7d9187b38956f8 Mon Sep 17 00:00:00 2001 From: Andrew McNamara Date: Wed, 12 Jan 2005 07:44:42 +0000 Subject: When using QUOTE_NONNUMERIC, we now test for "numericness" with PyNumber_Check, rather than trying to convert to a float. Reimplemented writer - now raises exceptions when it sees a quotechar but neither doublequote or escapechar are set. Doublequote results are now more consistent (eg, single quote should generate """", rather than "", which is ambiguous). --- Lib/test/test_csv.py | 30 +++++++---- Misc/NEWS | 3 ++ Modules/_csv.c | 140 +++++++++++++++++++++------------------------------ 3 files changed, 81 insertions(+), 92 deletions(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index be1147d..a3c0843 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -152,25 +152,35 @@ class Test_Csv(unittest.TestCase): (bigstring, bigstring)) def test_write_quoting(self): - self._write_test(['a','1','p,q'], 'a,1,"p,q"') + self._write_test(['a',1,'p,q'], 'a,1,"p,q"') self.assertRaises(csv.Error, self._write_test, - ['a','1','p,q'], 'a,1,"p,q"', + ['a',1,'p,q'], 'a,1,p,q', quoting = csv.QUOTE_NONE) - self._write_test(['a','1','p,q'], 'a,1,"p,q"', + self._write_test(['a',1,'p,q'], 'a,1,"p,q"', quoting = csv.QUOTE_MINIMAL) - self._write_test(['a','1','p,q'], '"a",1,"p,q"', + self._write_test(['a',1,'p,q'], '"a",1,"p,q"', quoting = csv.QUOTE_NONNUMERIC) - self._write_test(['a','1','p,q'], '"a","1","p,q"', + self._write_test(['a',1,'p,q'], '"a","1","p,q"', quoting = csv.QUOTE_ALL) def test_write_escape(self): - self._write_test(['a','1','p,q'], 'a,1,"p,q"', + self._write_test(['a',1,'p,q'], 'a,1,"p,q"', escapechar='\\') -# FAILED - needs to be fixed [am]: -# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"', -# escapechar='\\', doublequote = 0) - self._write_test(['a','1','p,q'], 'a,1,p\\,q', + self.assertRaises(csv.Error, + self._write_test, + ['a',1,'p,"q"'], 'a,1,"p,\\"q\\""', + escapechar=None, doublequote=False) + self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""', + escapechar='\\', doublequote = False) + self._write_test(['"'], '""""', + escapechar='\\', quoting = csv.QUOTE_MINIMAL) + self._write_test(['"'], '\\"', + escapechar='\\', quoting = csv.QUOTE_MINIMAL, + doublequote = False) + self._write_test(['"'], '\\"', + escapechar='\\', quoting = csv.QUOTE_NONE) + self._write_test(['a',1,'p,q'], 'a,1,p\\,q', escapechar='\\', quoting = csv.QUOTE_NONE) def test_writerows(self): diff --git a/Misc/NEWS b/Misc/NEWS index 4bb11e4..02f54bd 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -45,6 +45,9 @@ Library + quotechar=None and quoting=QUOTE_NONE now work the way PEP 305 dictates. + the parser now removes the escapechar prefix from escaped characters. + + QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting + to cast to float. + + writer doublequote handling improved. + Dialect classes passed to the module are no longer instantiated by the module before being parsed (the former validation scheme required this, but the mechanism was unreliable). diff --git a/Modules/_csv.c b/Modules/_csv.c index 03b291f..30b7eca 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty, { DialectObj *dialect = self->dialect; int i, rec_len; + char *lineterm; + +#define ADDCH(c) \ + do {\ + if (copy_phase) \ + self->rec[rec_len] = c;\ + rec_len++;\ + } while(0) + + lineterm = PyString_AsString(dialect->lineterminator); + if (lineterm == NULL) + return -1; rec_len = self->rec_len; - /* If this is not the first field we need a field separator. - */ - if (self->num_fields > 0) { - if (copy_phase) - self->rec[rec_len] = dialect->delimiter; - rec_len++; - } - /* Handle preceding quote. - */ - switch (dialect->quoting) { - case QUOTE_ALL: - *quoted = 1; - if (copy_phase) - self->rec[rec_len] = dialect->quotechar; - rec_len++; - break; - case QUOTE_MINIMAL: - case QUOTE_NONNUMERIC: - /* We only know about quoted in the copy phase. - */ - if (copy_phase && *quoted) { - self->rec[rec_len] = dialect->quotechar; - rec_len++; - } - break; - case QUOTE_NONE: - break; - } - /* Copy/count field data. - */ + /* If this is not the first field we need a field separator */ + if (self->num_fields > 0) + ADDCH(dialect->delimiter); + + /* Handle preceding quote */ + if (copy_phase && *quoted) + ADDCH(dialect->quotechar); + + /* Copy/count field data */ for (i = 0;; i++) { char c = field[i]; + int want_escape = 0; if (c == '\0') break; - /* If in doublequote mode we escape quote chars with a - * quote. - */ - if (dialect->quoting != QUOTE_NONE && - c == dialect->quotechar && dialect->doublequote) { - if (copy_phase) - self->rec[rec_len] = dialect->quotechar; - *quoted = 1; - rec_len++; - } - /* Some special characters need to be escaped. If we have a - * quote character switch to quoted field instead of escaping - * individual characters. - */ - if (!*quoted - && (c == dialect->delimiter || - c == dialect->escapechar || - c == '\n' || c == '\r')) { - if (dialect->quoting != QUOTE_NONE) - *quoted = 1; - else if (dialect->escapechar) { - if (copy_phase) - self->rec[rec_len] = dialect->escapechar; - rec_len++; - } + if (c == dialect->delimiter || + c == dialect->escapechar || + c == dialect->quotechar || + strchr(lineterm, c)) { + if (dialect->quoting == QUOTE_NONE) + want_escape = 1; else { - PyErr_Format(error_obj, - "delimiter must be quoted or escaped"); - return -1; + if (c == dialect->quotechar) { + if (dialect->doublequote) + ADDCH(dialect->quotechar); + else + want_escape = 1; + } + if (!want_escape) + *quoted = 1; + } + if (want_escape) { + if (!dialect->escapechar) { + PyErr_Format(error_obj, + "need to escape, but no escapechar set"); + return -1; + } + ADDCH(dialect->escapechar); } } /* Copy field character into record buffer. */ - if (copy_phase) - self->rec[rec_len] = c; - rec_len++; + ADDCH(c); } /* If field is empty check if it needs to be quoted. @@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty, *quoted = 1; } - /* Handle final quote character on field. - */ if (*quoted) { if (copy_phase) - self->rec[rec_len] = dialect->quotechar; + ADDCH(dialect->quotechar); else - /* Didn't know about leading quote until we found it - * necessary in field data - compensate for it now. - */ - rec_len++; - rec_len++; + rec_len += 2; } - return rec_len; +#undef ADDCH } static int @@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq) if (field == NULL) return NULL; - quoted = 0; - if (dialect->quoting == QUOTE_NONNUMERIC) { - PyObject *num; - - num = PyNumber_Float(field); - if (num == NULL) { - quoted = 1; - PyErr_Clear(); - } - else { - Py_DECREF(num); - } + switch (dialect->quoting) { + case QUOTE_NONNUMERIC: + quoted = !PyNumber_Check(field); + break; + case QUOTE_ALL: + quoted = 1; + break; + default: + quoted = 0; + break; } if (PyString_Check(field)) { -- cgit v0.12