diff options
Diffstat (limited to 'Lib/csv.py')
-rw-r--r-- | Lib/csv.py | 22 |
1 files changed, 17 insertions, 5 deletions
@@ -165,7 +165,7 @@ class Sniffer: Returns a dialect (or None) corresponding to the sample """ - quotechar, delimiter, skipinitialspace = \ + quotechar, doublequote, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) if not delimiter: delimiter, skipinitialspace = self._guess_delimiter(sample, @@ -179,8 +179,8 @@ class Sniffer: lineterminator = '\r\n' quoting = QUOTE_MINIMAL # escapechar = '' - doublequote = False + dialect.doublequote = doublequote dialect.delimiter = delimiter # _csv.reader won't accept a quotechar of '' dialect.quotechar = quotechar or '"' @@ -212,8 +212,8 @@ class Sniffer: break if not matches: - return ('', None, 0) # (quotechar, delimiter, skipinitialspace) - + # (quotechar, doublequote, delimiter, skipinitialspace) + return ('', False, None, 0) quotes = {} delims = {} spaces = 0 @@ -248,7 +248,19 @@ class Sniffer: delim = '' skipinitialspace = 0 - return (quotechar, delim, skipinitialspace) + # if we see an extra quote between delimiters, we've got a + # double quoted format + dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ + {'delim':delim, 'quote':quotechar}, re.MULTILINE) + + + + if dq_regexp.search(data): + doublequote = True + else: + doublequote = False + + return (quotechar, doublequote, delim, skipinitialspace) def _guess_delimiter(self, data, delimiters): |