diff options
author | R David Murray <rdmurray@bitdance.com> | 2013-06-29 22:43:59 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2013-06-29 22:43:59 (GMT) |
commit | 24dc75365e4b6903e3c77a9e480451fb3e31b55b (patch) | |
tree | 3424504216b56a95da1e061478f05b9d34682d0e | |
parent | 1d14246b7750e0954962c71eb30bd3243032df13 (diff) | |
download | cpython-24dc75365e4b6903e3c77a9e480451fb3e31b55b.zip cpython-24dc75365e4b6903e3c77a9e480451fb3e31b55b.tar.gz cpython-24dc75365e4b6903e3c77a9e480451fb3e31b55b.tar.bz2 |
#18155: Regex-escape delimiter, in case it is a regex special char.
Patch by Vajrasky Kok, with slight modification to the tests by me.
-rw-r--r-- | Lib/csv.py | 5 | ||||
-rw-r--r-- | Lib/test/test_csv.py | 42 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS | 6 |
4 files changed, 48 insertions, 6 deletions
@@ -261,8 +261,9 @@ class Sniffer: # if we see an extra quote between delimiters, we've got a # double quoted format - dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ - {'delim':delim, 'quote':quotechar}, re.MULTILINE) + dq_regexp = re.compile( + r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ + {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 53ca5ab..3f82665 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -914,7 +914,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' 'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back' """ - header = '''\ + header1 = '''\ "venue","city","state","date","performers" ''' sample3 = '''\ @@ -933,10 +933,35 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back sample6 = "a|b|c\r\nd|e|f\r\n" sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n" +# Issue 18155: Use a delimiter that is a special char to regex: + + header2 = '''\ +"venue"+"city"+"state"+"date"+"performers" +''' + sample8 = """\ +Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes +Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence +Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow +Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back +""" + sample9 = """\ +'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes' +'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence' +'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow' +'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back' +""" + def test_has_header(self): sniffer = csv.Sniffer() self.assertEqual(sniffer.has_header(self.sample1), False) - self.assertEqual(sniffer.has_header(self.header+self.sample1), True) + self.assertEqual(sniffer.has_header(self.header1 + self.sample1), + True) + + def test_has_header_regex_special_delimiter(self): + sniffer = csv.Sniffer() + self.assertEqual(sniffer.has_header(self.sample8), False) + self.assertEqual(sniffer.has_header(self.header2 + self.sample8), + True) def test_sniff(self): sniffer = csv.Sniffer() @@ -970,13 +995,24 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back dialect = sniffer.sniff(self.sample7) self.assertEqual(dialect.delimiter, "|") self.assertEqual(dialect.quotechar, "'") + dialect = sniffer.sniff(self.sample8) + self.assertEqual(dialect.delimiter, '+') + dialect = sniffer.sniff(self.sample9) + self.assertEqual(dialect.delimiter, '+') + self.assertEqual(dialect.quotechar, "'") def test_doublequote(self): sniffer = csv.Sniffer() - dialect = sniffer.sniff(self.header) + dialect = sniffer.sniff(self.header1) + self.assertFalse(dialect.doublequote) + dialect = sniffer.sniff(self.header2) self.assertFalse(dialect.doublequote) dialect = sniffer.sniff(self.sample2) self.assertTrue(dialect.doublequote) + dialect = sniffer.sniff(self.sample8) + self.assertFalse(dialect.doublequote) + dialect = sniffer.sniff(self.sample9) + self.assertTrue(dialect.doublequote) if not hasattr(sys, "gettotalrefcount"): if test_support.verbose: print "*** skipping leakage tests ***" @@ -545,6 +545,7 @@ Jeff Knupp Greg Kochanski Damon Kohler Marko Kohtala +Vajrasky Kok Guido Kollerie Peter A. Koren Joseph Koshy @@ -24,11 +24,15 @@ Core and Builtins Library ------- +- Issue #18155: The csv module now correctly handles csv files that use + a delimiter character that has a special meaning in regexes, instead of + throwing an exception. + - Issue #18135: ssl.SSLSocket.write() now raises an OverflowError if the input string in longer than 2 gigabytes. The ssl module does not support partial write. -- Issue #18167: cgi.FieldStorage no more fails to handle multipart/form-data +- Issue #18167: cgi.FieldStorage no longer fails to handle multipart/form-data when \r\n appears at end of 65535 bytes without other newlines. - Issue #17403: urllib.parse.robotparser normalizes the urls before adding to |