Kaydet (Commit) f7bc5f94 authored tarafından Walter Dörwald's avatar Walter Dörwald

Change the example classes UnicodeReader and UnicodeWriter so

that they work with all encodings. For UnicodeReader the real
input stream is wrapped in a line iterator that reencodes the
input to UTF-8. For UnicodeWriter the UTF-8 encoded output is
written to a queue for where it is reencoded to the target
encoding and written to the real output stream.
üst f4d8f390
......@@ -456,44 +456,68 @@ def utf_8_encoder(unicode_csv_data):
yield line.encode('utf-8')
\end{verbatim}
The classes below work just like the \class{csv.reader} and
\class{csv.writer} classes, but they add an \var{encoding} parameter
to allow for encoded files:
For all other encodings the following \class{UnicodeReader} and
\class{UnicodeWriter} classes can be used. They take an additional
\var{encoding} parameter in their constructor and make sure that the data
passes the real reader or writer encoded as UTF-8:
\begin{verbatim}
import csv
import csv, codecs, cStringIO
class UnicodeReader:
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
self.encoding = encoding
def next(self):
row = self.reader.next()
return [unicode(s, self.encoding) for s in row]
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
self.writer = csv.writer(f, dialect=dialect, **kwds)
self.encoding = encoding
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode(self.encoding) for s in row])
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment