diff options
author | Skip Montanaro <skip@pobox.com> | 2005-03-18 16:56:37 (GMT) |
---|---|---|
committer | Skip Montanaro <skip@pobox.com> | 2005-03-18 16:56:37 (GMT) |
commit | 5011c3f7fcda2c6ee7cc970e61e363a4ec1f092e (patch) | |
tree | 9821663eadf5b43a8113251a2ed7f2f67e272b97 /Doc | |
parent | 09515af5e2c23d9a363a8ef872d81c685fe29a2b (diff) | |
download | cpython-5011c3f7fcda2c6ee7cc970e61e363a4ec1f092e.zip cpython-5011c3f7fcda2c6ee7cc970e61e363a4ec1f092e.tar.gz cpython-5011c3f7fcda2c6ee7cc970e61e363a4ec1f092e.tar.bz2 |
add UnicodeReader and UnicodeWriter example classes
Diffstat (limited to 'Doc')
-rw-r--r-- | Doc/lib/libcsv.tex | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/Doc/lib/libcsv.tex b/Doc/lib/libcsv.tex index 0788ec1..2816203 100644 --- a/Doc/lib/libcsv.tex +++ b/Doc/lib/libcsv.tex @@ -424,3 +424,38 @@ import csv print csv.reader(['one,two,three'])[0] \end{verbatim} +The \module{csv} module doesn't directly support reading and writing +Unicode, but it is 8-bit clean save for some problems with \ASCII{} NUL +characters, so you can write classes that handle the encoding and decoding +for you as long as you avoid encodings like utf-16 that use NULs. + +\begin{verbatim} +import csv + +class UnicodeReader: + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + self.reader = csv.reader(f, dialect=dialect, **kwds) + self.encoding = encoding + + def next(self): + row = self.reader.next() + return [unicode(s, self.encoding) for s in row] + + def __iter__(self): + return self + +class UnicodeWriter: + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + self.writer = csv.writer(f, dialect=dialect, **kwds) + self.encoding = encoding + + def writerow(self, row): + self.writer.writerow([s.encode("utf-8") for s in row]) + + def writerows(self, rows): + for row in rows: + self.writerow(row) +\end{verbatim} + +They should work just like the \class{csv.reader} and \class{csv.writer} +classes but add an \var{encoding} parameter. |