summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2013-06-29 22:42:24 (GMT)
committerR David Murray <rdmurray@bitdance.com>2013-06-29 22:42:24 (GMT)
commit634e076bbe08de3b5b4dfc2a1e10a20b64114ac1 (patch)
tree4f1ff4125ddc40af6ed8a17baa033d57937693f3
parent06beaba785b78cc89986b447ef2bb7be4f7a7696 (diff)
parent925a3225708c40c8cd58a7475c5a6d3bd89db01d (diff)
downloadcpython-634e076bbe08de3b5b4dfc2a1e10a20b64114ac1.zip
cpython-634e076bbe08de3b5b4dfc2a1e10a20b64114ac1.tar.gz
cpython-634e076bbe08de3b5b4dfc2a1e10a20b64114ac1.tar.bz2
Merge #18155: Regex-escape delimiter, in case it is a regex special char.
-rw-r--r--Lib/csv.py5
-rw-r--r--Lib/test/test_csv.py42
-rw-r--r--Misc/NEWS6
3 files changed, 47 insertions, 6 deletions
diff --git a/Lib/csv.py b/Lib/csv.py
index 8dfc77e..da3bc44 100644
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -264,8 +264,9 @@ class Sniffer:
# if we see an extra quote between delimiters, we've got a
# double quoted format
- dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
- {'delim':delim, 'quote':quotechar}, re.MULTILINE)
+ dq_regexp = re.compile(
+ r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
+ {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index 974d73d..559e51f 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -805,7 +805,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
"""
- header = '''\
+ header1 = '''\
"venue","city","state","date","performers"
'''
sample3 = '''\
@@ -824,10 +824,35 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
sample6 = "a|b|c\r\nd|e|f\r\n"
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
+# Issue 18155: Use a delimiter that is a special char to regex:
+
+ header2 = '''\
+"venue"+"city"+"state"+"date"+"performers"
+'''
+ sample8 = """\
+Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes
+Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence
+Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow
+Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
+"""
+ sample9 = """\
+'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'
+'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'
+'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow'
+'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
+"""
+
def test_has_header(self):
sniffer = csv.Sniffer()
self.assertEqual(sniffer.has_header(self.sample1), False)
- self.assertEqual(sniffer.has_header(self.header+self.sample1), True)
+ self.assertEqual(sniffer.has_header(self.header1 + self.sample1),
+ True)
+
+ def test_has_header_regex_special_delimiter(self):
+ sniffer = csv.Sniffer()
+ self.assertEqual(sniffer.has_header(self.sample8), False)
+ self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
+ True)
def test_sniff(self):
sniffer = csv.Sniffer()
@@ -861,13 +886,24 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
dialect = sniffer.sniff(self.sample7)
self.assertEqual(dialect.delimiter, "|")
self.assertEqual(dialect.quotechar, "'")
+ dialect = sniffer.sniff(self.sample8)
+ self.assertEqual(dialect.delimiter, '+')
+ dialect = sniffer.sniff(self.sample9)
+ self.assertEqual(dialect.delimiter, '+')
+ self.assertEqual(dialect.quotechar, "'")
def test_doublequote(self):
sniffer = csv.Sniffer()
- dialect = sniffer.sniff(self.header)
+ dialect = sniffer.sniff(self.header1)
+ self.assertFalse(dialect.doublequote)
+ dialect = sniffer.sniff(self.header2)
self.assertFalse(dialect.doublequote)
dialect = sniffer.sniff(self.sample2)
self.assertTrue(dialect.doublequote)
+ dialect = sniffer.sniff(self.sample8)
+ self.assertFalse(dialect.doublequote)
+ dialect = sniffer.sniff(self.sample9)
+ self.assertTrue(dialect.doublequote)
if not hasattr(sys, "gettotalrefcount"):
if support.verbose: print("*** skipping leakage tests ***")
diff --git a/Misc/NEWS b/Misc/NEWS
index d02de27..e829e72 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -135,6 +135,10 @@ Core and Builtins
Library
-------
+- Issue #18155: The csv module now correctly handles csv files that use
+ a delimter character that has a special meaning in regexes, instead of
+ throwing an exception.
+
- Issue #14360: encode_quopri can now be successfully used as an encoder
when constructing a MIMEApplication object.
@@ -156,7 +160,7 @@ Library
lists all loaded CA certificates and cert_store_stats() returns amount of
loaded X.509 certs, X.509 CA certs and CRLs.
-- Issue #18167: cgi.FieldStorage no more fails to handle multipart/form-data
+- Issue #18167: cgi.FieldStorage no longer fails to handle multipart/form-data
when \r\n appears at end of 65535 bytes without other newlines.
- Issue #18076: Introduce importlib.util.decode_source().