summaryrefslogtreecommitdiffstats
path: root/Doc
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2005-03-18 16:56:37 (GMT)
committerSkip Montanaro <skip@pobox.com>2005-03-18 16:56:37 (GMT)
commit5011c3f7fcda2c6ee7cc970e61e363a4ec1f092e (patch)
tree9821663eadf5b43a8113251a2ed7f2f67e272b97 /Doc
parent09515af5e2c23d9a363a8ef872d81c685fe29a2b (diff)
downloadcpython-5011c3f7fcda2c6ee7cc970e61e363a4ec1f092e.zip
cpython-5011c3f7fcda2c6ee7cc970e61e363a4ec1f092e.tar.gz
cpython-5011c3f7fcda2c6ee7cc970e61e363a4ec1f092e.tar.bz2
add UnicodeReader and UnicodeWriter example classes
Diffstat (limited to 'Doc')
-rw-r--r--Doc/lib/libcsv.tex35
1 files changed, 35 insertions, 0 deletions
diff --git a/Doc/lib/libcsv.tex b/Doc/lib/libcsv.tex
index 0788ec1..2816203 100644
--- a/Doc/lib/libcsv.tex
+++ b/Doc/lib/libcsv.tex
@@ -424,3 +424,38 @@ import csv
print csv.reader(['one,two,three'])[0]
\end{verbatim}
+The \module{csv} module doesn't directly support reading and writing
+Unicode, but it is 8-bit clean save for some problems with \ASCII{} NUL
+characters, so you can write classes that handle the encoding and decoding
+for you as long as you avoid encodings like utf-16 that use NULs.
+
+\begin{verbatim}
+import csv
+
+class UnicodeReader:
+ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+ self.reader = csv.reader(f, dialect=dialect, **kwds)
+ self.encoding = encoding
+
+ def next(self):
+ row = self.reader.next()
+ return [unicode(s, self.encoding) for s in row]
+
+ def __iter__(self):
+ return self
+
+class UnicodeWriter:
+ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+ self.writer = csv.writer(f, dialect=dialect, **kwds)
+ self.encoding = encoding
+
+ def writerow(self, row):
+ self.writer.writerow([s.encode("utf-8") for s in row])
+
+ def writerows(self, rows):
+ for row in rows:
+ self.writerow(row)
+\end{verbatim}
+
+They should work just like the \class{csv.reader} and \class{csv.writer}
+classes but add an \var{encoding} parameter.