diff options
author | Skip Montanaro <skip@pobox.com> | 2005-12-30 05:09:48 (GMT) |
---|---|---|
committer | Skip Montanaro <skip@pobox.com> | 2005-12-30 05:09:48 (GMT) |
commit | 39b29be8a6639212402303adab4df29976d8fc7c (patch) | |
tree | 39ec5c719125381f6898668ba36f1090334359d0 /Lib/csv.py | |
parent | 0174dddc65af50900324afca3c5d2400858b75f0 (diff) | |
download | cpython-39b29be8a6639212402303adab4df29976d8fc7c.zip cpython-39b29be8a6639212402303adab4df29976d8fc7c.tar.gz cpython-39b29be8a6639212402303adab4df29976d8fc7c.tar.bz2 |
Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was
returning 'a' as the delimiter. It now returns '|', but not because I
understood better what the code was supposed to do. Would someone that
understands the idea behind _guess_delimiter() (see its doc string) look to
see if my fallback choice is better than before or if it's just serendipity
that I picked the proper delimiter?
Diffstat (limited to 'Lib/csv.py')
-rw-r--r-- | Lib/csv.py | 13 |
1 files changed, 10 insertions, 3 deletions
@@ -152,10 +152,13 @@ class Sniffer: quotechar, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) - if delimiter is None: + if not delimiter: delimiter, skipinitialspace = self._guess_delimiter(sample, delimiters) + if not delimiter: + raise Error, "Could not determine delimiter" + class dialect(Dialect): _name = "sniffed" lineterminator = '\r\n' @@ -329,8 +332,12 @@ class Sniffer: data[0].count("%c " % d)) return (d, skipinitialspace) - # finally, just return the first damn character in the list - delim = delims.keys()[0] + # nothing else indicates a preference, pick the character that + # dominates(?) + items = [(v,k) for (k,v) in delims.items()] + items.sort() + delim = items[-1][1] + skipinitialspace = (data[0].count(delim) == data[0].count("%c " % delim)) return (delim, skipinitialspace) |