summaryrefslogtreecommitdiffstats
path: root/Lib/csv.py
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2009-09-28 02:12:27 (GMT)
committerSkip Montanaro <skip@pobox.com>2009-09-28 02:12:27 (GMT)
commitb4fd4d37a1b01620cd1bf66d61dcd481b13db411 (patch)
treec0f50b69aca5e3764e859522142799c923cc36ab /Lib/csv.py
parent17565e5b7be31a85165e4fa8b2324e27831fd742 (diff)
downloadcpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.zip
cpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.tar.gz
cpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.tar.bz2
Patch from Thomas Barr so that csv.Sniffer will set doublequote property.
Closes issue 6606.
Diffstat (limited to 'Lib/csv.py')
-rw-r--r--Lib/csv.py22
1 files changed, 17 insertions, 5 deletions
diff --git a/Lib/csv.py b/Lib/csv.py
index ff51a86..3db5dac 100644
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -170,7 +170,7 @@ class Sniffer:
Returns a dialect (or None) corresponding to the sample
"""
- quotechar, delimiter, skipinitialspace = \
+ quotechar, doublequote, delimiter, skipinitialspace = \
self._guess_quote_and_delimiter(sample, delimiters)
if not delimiter:
delimiter, skipinitialspace = self._guess_delimiter(sample,
@@ -184,8 +184,8 @@ class Sniffer:
lineterminator = '\r\n'
quoting = QUOTE_MINIMAL
# escapechar = ''
- doublequote = False
+ dialect.doublequote = doublequote
dialect.delimiter = delimiter
# _csv.reader won't accept a quotechar of ''
dialect.quotechar = quotechar or '"'
@@ -217,8 +217,8 @@ class Sniffer:
break
if not matches:
- return ('', None, 0) # (quotechar, delimiter, skipinitialspace)
-
+ # (quotechar, doublequote, delimiter, skipinitialspace)
+ return ('', False, None, 0)
quotes = {}
delims = {}
spaces = 0
@@ -255,7 +255,19 @@ class Sniffer:
delim = ''
skipinitialspace = 0
- return (quotechar, delim, skipinitialspace)
+ # if we see an extra quote between delimiters, we've got a
+ # double quoted format
+ dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
+ {'delim':delim, 'quote':quotechar}, re.MULTILINE)
+
+
+
+ if dq_regexp.search(data):
+ doublequote = True
+ else:
+ doublequote = False
+
+ return (quotechar, doublequote, delim, skipinitialspace)
def _guess_delimiter(self, data, delimiters):