summaryrefslogtreecommitdiffstats
path: root/Modules/_csv.c
diff options
context:
space:
mode:
authorAndrew McNamara <andrewm@object-craft.com.au>2005-01-12 07:44:42 (GMT)
committerAndrew McNamara <andrewm@object-craft.com.au>2005-01-12 07:44:42 (GMT)
commitc89f284df8903de66be6c60eda7d9187b38956f8 (patch)
treeda725f672dd0f5f90cf3ed11f28472fe7434e7e7 /Modules/_csv.c
parent31d8896ee21c28ee55ae4ba2764e11fba88c9fd4 (diff)
downloadcpython-c89f284df8903de66be6c60eda7d9187b38956f8.zip
cpython-c89f284df8903de66be6c60eda7d9187b38956f8.tar.gz
cpython-c89f284df8903de66be6c60eda7d9187b38956f8.tar.bz2
When using QUOTE_NONNUMERIC, we now test for "numericness" with
PyNumber_Check, rather than trying to convert to a float. Reimplemented writer - now raises exceptions when it sees a quotechar but neither doublequote or escapechar are set. Doublequote results are now more consistent (eg, single quote should generate """", rather than "", which is ambiguous).
Diffstat (limited to 'Modules/_csv.c')
-rw-r--r--Modules/_csv.c140
1 files changed, 58 insertions, 82 deletions
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 03b291f..30b7eca 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
{
DialectObj *dialect = self->dialect;
int i, rec_len;
+ char *lineterm;
+
+#define ADDCH(c) \
+ do {\
+ if (copy_phase) \
+ self->rec[rec_len] = c;\
+ rec_len++;\
+ } while(0)
+
+ lineterm = PyString_AsString(dialect->lineterminator);
+ if (lineterm == NULL)
+ return -1;
rec_len = self->rec_len;
- /* If this is not the first field we need a field separator.
- */
- if (self->num_fields > 0) {
- if (copy_phase)
- self->rec[rec_len] = dialect->delimiter;
- rec_len++;
- }
- /* Handle preceding quote.
- */
- switch (dialect->quoting) {
- case QUOTE_ALL:
- *quoted = 1;
- if (copy_phase)
- self->rec[rec_len] = dialect->quotechar;
- rec_len++;
- break;
- case QUOTE_MINIMAL:
- case QUOTE_NONNUMERIC:
- /* We only know about quoted in the copy phase.
- */
- if (copy_phase && *quoted) {
- self->rec[rec_len] = dialect->quotechar;
- rec_len++;
- }
- break;
- case QUOTE_NONE:
- break;
- }
- /* Copy/count field data.
- */
+ /* If this is not the first field we need a field separator */
+ if (self->num_fields > 0)
+ ADDCH(dialect->delimiter);
+
+ /* Handle preceding quote */
+ if (copy_phase && *quoted)
+ ADDCH(dialect->quotechar);
+
+ /* Copy/count field data */
for (i = 0;; i++) {
char c = field[i];
+ int want_escape = 0;
if (c == '\0')
break;
- /* If in doublequote mode we escape quote chars with a
- * quote.
- */
- if (dialect->quoting != QUOTE_NONE &&
- c == dialect->quotechar && dialect->doublequote) {
- if (copy_phase)
- self->rec[rec_len] = dialect->quotechar;
- *quoted = 1;
- rec_len++;
- }
- /* Some special characters need to be escaped. If we have a
- * quote character switch to quoted field instead of escaping
- * individual characters.
- */
- if (!*quoted
- && (c == dialect->delimiter ||
- c == dialect->escapechar ||
- c == '\n' || c == '\r')) {
- if (dialect->quoting != QUOTE_NONE)
- *quoted = 1;
- else if (dialect->escapechar) {
- if (copy_phase)
- self->rec[rec_len] = dialect->escapechar;
- rec_len++;
- }
+ if (c == dialect->delimiter ||
+ c == dialect->escapechar ||
+ c == dialect->quotechar ||
+ strchr(lineterm, c)) {
+ if (dialect->quoting == QUOTE_NONE)
+ want_escape = 1;
else {
- PyErr_Format(error_obj,
- "delimiter must be quoted or escaped");
- return -1;
+ if (c == dialect->quotechar) {
+ if (dialect->doublequote)
+ ADDCH(dialect->quotechar);
+ else
+ want_escape = 1;
+ }
+ if (!want_escape)
+ *quoted = 1;
+ }
+ if (want_escape) {
+ if (!dialect->escapechar) {
+ PyErr_Format(error_obj,
+ "need to escape, but no escapechar set");
+ return -1;
+ }
+ ADDCH(dialect->escapechar);
}
}
/* Copy field character into record buffer.
*/
- if (copy_phase)
- self->rec[rec_len] = c;
- rec_len++;
+ ADDCH(c);
}
/* If field is empty check if it needs to be quoted.
@@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
*quoted = 1;
}
- /* Handle final quote character on field.
- */
if (*quoted) {
if (copy_phase)
- self->rec[rec_len] = dialect->quotechar;
+ ADDCH(dialect->quotechar);
else
- /* Didn't know about leading quote until we found it
- * necessary in field data - compensate for it now.
- */
- rec_len++;
- rec_len++;
+ rec_len += 2;
}
-
return rec_len;
+#undef ADDCH
}
static int
@@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq)
if (field == NULL)
return NULL;
- quoted = 0;
- if (dialect->quoting == QUOTE_NONNUMERIC) {
- PyObject *num;
-
- num = PyNumber_Float(field);
- if (num == NULL) {
- quoted = 1;
- PyErr_Clear();
- }
- else {
- Py_DECREF(num);
- }
+ switch (dialect->quoting) {
+ case QUOTE_NONNUMERIC:
+ quoted = !PyNumber_Check(field);
+ break;
+ case QUOTE_ALL:
+ quoted = 1;
+ break;
+ default:
+ quoted = 0;
+ break;
}
if (PyString_Check(field)) {