summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2013-03-20 02:41:47 (GMT)
committerR David Murray <rdmurray@bitdance.com>2013-03-20 02:41:47 (GMT)
commitc7c42efb16612f66af266073937cffa99ad49a33 (patch)
tree2653077e3c508f41ac849282d53d801c1779f76a
parent04cbe0c35b20c8379baf55cc5e152f88449e5202 (diff)
downloadcpython-c7c42efb16612f66af266073937cffa99ad49a33.zip
cpython-c7c42efb16612f66af266073937cffa99ad49a33.tar.gz
cpython-c7c42efb16612f66af266073937cffa99ad49a33.tar.bz2
#15927: Fix cvs.reader parsing of escaped \r\n with quoting off.
This fix means that such values are correctly roundtripped, since cvs.writer already does the correct escaping. Patch by Michael Johnson.
-rw-r--r--Lib/test/test_csv.py9
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_csv.c13
4 files changed, 25 insertions, 1 deletions
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index 3dc3836..974d73d 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -308,6 +308,15 @@ class Test_Csv(unittest.TestCase):
for i, row in enumerate(csv.reader(fileobj)):
self.assertEqual(row, rows[i])
+ def test_roundtrip_escaped_unquoted_newlines(self):
+ with TemporaryFile("w+", newline='') as fileobj:
+ writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
+ rows = [['a\nb','b'],['c','x\r\nd']]
+ writer.writerows(rows)
+ fileobj.seek(0)
+ for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
+ self.assertEqual(row,rows[i])
+
class TestDialectRegistry(unittest.TestCase):
def test_registry_badargs(self):
self.assertRaises(TypeError, csv.list_dialects, None)
diff --git a/Misc/ACKS b/Misc/ACKS
index 976fc56..600e0bf 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -591,6 +591,7 @@ Orjan Johansen
Fredrik Johansson
Gregory K. Johnson
Kent Johnson
+Michael Johnson
Simon Johnston
Matt Joiner
Thomas Jollans
diff --git a/Misc/NEWS b/Misc/NEWS
index d3e98b3..792b62f 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -289,6 +289,9 @@ Core and Builtins
Library
-------
+- Issue #15927: CVS now correctly parses escaped newlines and carriage
+ when parsing with quoting turned off.
+
- Issue #17467: add readline and readlines support to mock_open in
unittest.mock.
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 48a5cf8..39f9d7d 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -51,7 +51,7 @@ static struct PyModuleDef _csvmodule;
typedef enum {
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
- EAT_CRNL
+ EAT_CRNL,AFTER_ESCAPED_CRNL
} ParserState;
typedef enum {
@@ -644,6 +644,12 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
break;
case ESCAPED_CHAR:
+ if (c == '\n' | c=='\r') {
+ if (parse_add_char(self, c) < 0)
+ return -1;
+ self->state = AFTER_ESCAPED_CRNL;
+ break;
+ }
if (c == '\0')
c = '\n';
if (parse_add_char(self, c) < 0)
@@ -651,6 +657,11 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
self->state = IN_FIELD;
break;
+ case AFTER_ESCAPED_CRNL:
+ if (c == '\0')
+ break;
+ /*fallthru*/
+
case IN_FIELD:
/* in unquoted field */
if (c == '\n' || c == '\r' || c == '\0') {