Kaydet (Commit) f7bc5f94 authored tarafından Walter Dörwald's avatar Walter Dörwald

Change the example classes UnicodeReader and UnicodeWriter so

that they work with all encodings. For UnicodeReader the real
input stream is wrapped in a line iterator that reencodes the
input to UTF-8. For UnicodeWriter the UTF-8 encoded output is
written to a queue for where it is reencoded to the target
encoding and written to the real output stream.
üst f4d8f390
...@@ -456,44 +456,68 @@ def utf_8_encoder(unicode_csv_data): ...@@ -456,44 +456,68 @@ def utf_8_encoder(unicode_csv_data):
yield line.encode('utf-8') yield line.encode('utf-8')
\end{verbatim} \end{verbatim}
The classes below work just like the \class{csv.reader} and For all other encodings the following \class{UnicodeReader} and
\class{csv.writer} classes, but they add an \var{encoding} parameter \class{UnicodeWriter} classes can be used. They take an additional
to allow for encoded files: \var{encoding} parameter in their constructor and make sure that the data
passes the real reader or writer encoded as UTF-8:
\begin{verbatim} \begin{verbatim}
import csv import csv, codecs, cStringIO
class UnicodeReader: class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
""" """
A CSV reader which will iterate over lines in the CSV file "f", A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding. which is encoded in the given encoding.
""" """
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds) self.reader = csv.reader(f, dialect=dialect, **kwds)
self.encoding = encoding
def next(self): def next(self):
row = self.reader.next() row = self.reader.next()
return [unicode(s, self.encoding) for s in row] return [unicode(s, "utf-8") for s in row]
def __iter__(self): def __iter__(self):
return self return self
class UnicodeWriter: class UnicodeWriter:
""" """
A CSV writer which will write rows to CSV file "f", A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding. which is encoded in the given encoding.
""" """
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
self.writer = csv.writer(f, dialect=dialect, **kwds) # Redirect output to a queue
self.encoding = encoding self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row): def writerow(self, row):
self.writer.writerow([s.encode(self.encoding) for s in row]) self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows): def writerows(self, rows):
for row in rows: for row in rows:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment