diff options
author | Skip Montanaro <skip@pobox.com> | 2009-09-28 02:12:27 (GMT) |
---|---|---|
committer | Skip Montanaro <skip@pobox.com> | 2009-09-28 02:12:27 (GMT) |
commit | b4fd4d37a1b01620cd1bf66d61dcd481b13db411 (patch) | |
tree | c0f50b69aca5e3764e859522142799c923cc36ab | |
parent | 17565e5b7be31a85165e4fa8b2324e27831fd742 (diff) | |
download | cpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.zip cpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.tar.gz cpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.tar.bz2 |
Patch from Thomas Barr so that csv.Sniffer will set doublequote property.
Closes issue 6606.
-rw-r--r-- | Lib/csv.py | 22 | ||||
-rw-r--r-- | Lib/test/test_csv.py | 9 |
2 files changed, 25 insertions, 6 deletions
@@ -170,7 +170,7 @@ class Sniffer: Returns a dialect (or None) corresponding to the sample """ - quotechar, delimiter, skipinitialspace = \ + quotechar, doublequote, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) if not delimiter: delimiter, skipinitialspace = self._guess_delimiter(sample, @@ -184,8 +184,8 @@ class Sniffer: lineterminator = '\r\n' quoting = QUOTE_MINIMAL # escapechar = '' - doublequote = False + dialect.doublequote = doublequote dialect.delimiter = delimiter # _csv.reader won't accept a quotechar of '' dialect.quotechar = quotechar or '"' @@ -217,8 +217,8 @@ class Sniffer: break if not matches: - return ('', None, 0) # (quotechar, delimiter, skipinitialspace) - + # (quotechar, doublequote, delimiter, skipinitialspace) + return ('', False, None, 0) quotes = {} delims = {} spaces = 0 @@ -255,7 +255,19 @@ class Sniffer: delim = '' skipinitialspace = 0 - return (quotechar, delim, skipinitialspace) + # if we see an extra quote between delimiters, we've got a + # double quoted format + dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ + {'delim':delim, 'quote':quotechar}, re.MULTILINE) + + + + if dq_regexp.search(data): + doublequote = True + else: + doublequote = False + + return (quotechar, doublequote, delim, skipinitialspace) def _guess_delimiter(self, data, delimiters): diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index aeb68bb..cad59ac 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -891,7 +891,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back 'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes' 'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence' 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' -'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back' +'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back' """ header = '''\ "venue","city","state","date","performers" @@ -950,6 +950,13 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back self.assertEqual(dialect.delimiter, "|") self.assertEqual(dialect.quotechar, "'") + def test_doublequote(self): + sniffer = csv.Sniffer() + dialect = sniffer.sniff(self.header) + self.assertFalse(dialect.doublequote) + dialect = sniffer.sniff(self.sample2) + self.assertTrue(dialect.doublequote) + if not hasattr(sys, "gettotalrefcount"): if test_support.verbose: print "*** skipping leakage tests ***" else: |