summaryrefslogtreecommitdiffstats
path: root/Lib/csv.py
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2005-12-30 05:09:48 (GMT)
committerSkip Montanaro <skip@pobox.com>2005-12-30 05:09:48 (GMT)
commit39b29be8a6639212402303adab4df29976d8fc7c (patch)
tree39ec5c719125381f6898668ba36f1090334359d0 /Lib/csv.py
parent0174dddc65af50900324afca3c5d2400858b75f0 (diff)
downloadcpython-39b29be8a6639212402303adab4df29976d8fc7c.zip
cpython-39b29be8a6639212402303adab4df29976d8fc7c.tar.gz
cpython-39b29be8a6639212402303adab4df29976d8fc7c.tar.bz2
Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was
returning 'a' as the delimiter. It now returns '|', but not because I understood better what the code was supposed to do. Would someone that understands the idea behind _guess_delimiter() (see its doc string) look to see if my fallback choice is better than before or if it's just serendipity that I picked the proper delimiter?
Diffstat (limited to 'Lib/csv.py')
-rw-r--r--Lib/csv.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/Lib/csv.py b/Lib/csv.py
index 7516380..f213854 100644
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -152,10 +152,13 @@ class Sniffer:
quotechar, delimiter, skipinitialspace = \
self._guess_quote_and_delimiter(sample, delimiters)
- if delimiter is None:
+ if not delimiter:
delimiter, skipinitialspace = self._guess_delimiter(sample,
delimiters)
+ if not delimiter:
+ raise Error, "Could not determine delimiter"
+
class dialect(Dialect):
_name = "sniffed"
lineterminator = '\r\n'
@@ -329,8 +332,12 @@ class Sniffer:
data[0].count("%c " % d))
return (d, skipinitialspace)
- # finally, just return the first damn character in the list
- delim = delims.keys()[0]
+ # nothing else indicates a preference, pick the character that
+ # dominates(?)
+ items = [(v,k) for (k,v) in delims.items()]
+ items.sort()
+ delim = items[-1][1]
+
skipinitialspace = (data[0].count(delim) ==
data[0].count("%c " % delim))
return (delim, skipinitialspace)