Change the example classes UnicodeReader and UnicodeWriter so

that they work with all encodings. For UnicodeReader the real input stream is wrapped in a line iterator that reencodes the input to UTF-8. For UnicodeWriter the UTF-8 encoded output is written to a queue for where it is reencoded to the target encoding and written to the real output stream.

Change the example classes UnicodeReader and UnicodeWriter so
that they work with all encodings. For UnicodeReader the real input stream is wrapped in a line iterator that reencodes the input to UTF-8. For UnicodeWriter the UTF-8 encoded output is written to a queue for where it is reencoded to the target encoding and written to the real output stream.
f7bc5f94 · Walter Dörwald · f4d8f390 · f7bc5f94
Kaydet (Commit) f7bc5f94 authored Nis 04, 2006 tarafından Walter Dörwald
Hide whitespace changes
Inline Side-by-side

Showing with 35 additions and 11 deletions

libcsv.tex Doc/lib/libcsv.tex +35 -11

No files found.
--- a/Doc/lib/libcsv.tex
+++ b/Doc/lib/libcsv.tex
@@ -456,44 +456,68 @@ def utf_8_encoder(unicode_csv_data):
        yield line.encode('utf-8')
 \end{verbatim}

-The classes below work just like the \class{csv.reader} and
-\class{csv.writer} classes, but they add an \var{encoding} parameter
-to allow for encoded files:
+For all other encodings the following \class{UnicodeReader} and
+\class{UnicodeWriter} classes can be used. They take an additional
+\var{encoding} parameter in their constructor and make sure that the data
+passes the real reader or writer encoded as UTF-8:

 \begin{verbatim}
-import csv
+import csv, codecs, cStringIO

-class UnicodeReader:
+class UTF8Recoder:
+    """
+    Iterator that reads an encoded stream and reencodes the input to UTF-8
+    """
+    def __init__(self, f, encoding):
+        self.reader = codecs.getreader(encoding)(f)

+    def __iter__(self):
+        return self
+
+    def next(self):
+        return self.reader.next().encode("utf-8")
+
+class UnicodeReader:
    """
    A CSV reader which will iterate over lines in the CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+        f = UTF8Recoder(f, encoding)
        self.reader = csv.reader(f, dialect=dialect, **kwds)
-        self.encoding = encoding

    def next(self):
        row = self.reader.next()
-        return [unicode(s, self.encoding) for s in row]
+        return [unicode(s, "utf-8") for s in row]

    def __iter__(self):
        return self

 class UnicodeWriter:
-
    """
    A CSV writer which will write rows to CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
-        self.writer = csv.writer(f, dialect=dialect, **kwds)
-        self.encoding = encoding
+        # Redirect output to a queue
+        self.queue = cStringIO.StringIO()
+        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
+        self.stream = f
+        self.encoder = codecs.getincrementalencoder(encoding)()

    def writerow(self, row):
-        self.writer.writerow([s.encode(self.encoding) for s in row])
+        self.writer.writerow([s.encode("utf-8") for s in row])
+        # Fetch UTF-8 output from the queue ...
+        data = self.queue.getvalue()
+        data = data.decode("utf-8")
+        # ... and reencode it into the target encoding
+        data = self.encoder.encode(data)
+        # write to the target stream
+        self.stream.write(data)
+        # empty queue
+        self.queue.truncate(0)

    def writerows(self, rows):
        for row in rows: