diff options
author | R David Murray <rdmurray@bitdance.com> | 2013-03-20 02:41:47 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2013-03-20 02:41:47 (GMT) |
commit | c7c42efb16612f66af266073937cffa99ad49a33 (patch) | |
tree | 2653077e3c508f41ac849282d53d801c1779f76a | |
parent | 04cbe0c35b20c8379baf55cc5e152f88449e5202 (diff) | |
download | cpython-c7c42efb16612f66af266073937cffa99ad49a33.zip cpython-c7c42efb16612f66af266073937cffa99ad49a33.tar.gz cpython-c7c42efb16612f66af266073937cffa99ad49a33.tar.bz2 |
#15927: Fix cvs.reader parsing of escaped \r\n with quoting off.
This fix means that such values are correctly roundtripped, since
cvs.writer already does the correct escaping.
Patch by Michael Johnson.
-rw-r--r-- | Lib/test/test_csv.py | 9 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/_csv.c | 13 |
4 files changed, 25 insertions, 1 deletions
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 3dc3836..974d73d 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -308,6 +308,15 @@ class Test_Csv(unittest.TestCase): for i, row in enumerate(csv.reader(fileobj)): self.assertEqual(row, rows[i]) + def test_roundtrip_escaped_unquoted_newlines(self): + with TemporaryFile("w+", newline='') as fileobj: + writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\") + rows = [['a\nb','b'],['c','x\r\nd']] + writer.writerows(rows) + fileobj.seek(0) + for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")): + self.assertEqual(row,rows[i]) + class TestDialectRegistry(unittest.TestCase): def test_registry_badargs(self): self.assertRaises(TypeError, csv.list_dialects, None) @@ -591,6 +591,7 @@ Orjan Johansen Fredrik Johansson Gregory K. Johnson Kent Johnson +Michael Johnson Simon Johnston Matt Joiner Thomas Jollans @@ -289,6 +289,9 @@ Core and Builtins Library ------- +- Issue #15927: CVS now correctly parses escaped newlines and carriage + when parsing with quoting turned off. + - Issue #17467: add readline and readlines support to mock_open in unittest.mock. diff --git a/Modules/_csv.c b/Modules/_csv.c index 48a5cf8..39f9d7d 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -51,7 +51,7 @@ static struct PyModuleDef _csvmodule; typedef enum { START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, - EAT_CRNL + EAT_CRNL,AFTER_ESCAPED_CRNL } ParserState; typedef enum { @@ -644,6 +644,12 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) break; case ESCAPED_CHAR: + if (c == '\n' | c=='\r') { + if (parse_add_char(self, c) < 0) + return -1; + self->state = AFTER_ESCAPED_CRNL; + break; + } if (c == '\0') c = '\n'; if (parse_add_char(self, c) < 0) @@ -651,6 +657,11 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) self->state = IN_FIELD; break; + case AFTER_ESCAPED_CRNL: + if (c == '\0') + break; + /*fallthru*/ + case IN_FIELD: /* in unquoted field */ if (c == '\n' || c == '\r' || c == '\0') { |