Kaydet (Commit) e2b07055 authored tarafından Neal Norwitz's avatar Neal Norwitz

Revert r61508: it caused test_mailbox to fail on all platforms.

üst ada8c3b0
......@@ -1180,14 +1180,14 @@ class TextIOWrapper(TextIOBase):
self._encoder = None
self._decoder = None
self._decoded_text = "" # buffer for text produced by decoder
self._decoded_text_offset = 0 # offset to text returned by read()
self._snapshot = None # info for reconstructing decoder state
self._seekable = self._telling = self.buffer.seekable()
# A word about _snapshot. This attribute is either None, or a tuple
# (decoder_state, next_input) where decoder_state is the second
# (integer) item of the decoder state, and next_input is the chunk
# of bytes that comes after the snapshot point in the input.
# (decoder_state, input_chunk, decoded_chars) where decoder_state is
# the second (integer) item of the decoder state, input_chunk is the
# chunk of bytes that was read, and decoded_chars is the number of
# characters rendered by the decoder after feeding it those bytes.
# We use this to reconstruct intermediate decoder states in tell().
# Naming convention:
......@@ -1271,10 +1271,10 @@ class TextIOWrapper(TextIOBase):
"""
Read and decode the next chunk of data from the BufferedReader.
The return value is True unless EOF was reached. The decoded string
is placed in self._decoded_text (replacing its previous value).
(The entire input chunk is sent to the decoder, though some of it
may remain buffered in the decoder, yet to be converted.)
Return a tuple of two elements: all the bytes that were read, and
the decoded string produced by the decoder. (The entire input
chunk is sent to the decoder, but some of it may remain buffered
in the decoder, yet to be converted.)
"""
if self._decoder is None:
......@@ -1283,9 +1283,8 @@ class TextIOWrapper(TextIOBase):
# No one should call tell(), so don't bother taking a snapshot.
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
eof = not input_chunk
self._decoded_text = self._decoder.decode(input_chunk, eof)
self._decoded_text_offset = 0
return not eof
decoded = self._decoder.decode(input_chunk, eof)
return (input_chunk, decoded)
# The cookie returned by tell() cannot include the contents of
# the decoder's buffer, so we need to snapshot a point in the
......@@ -1299,15 +1298,16 @@ class TextIOWrapper(TextIOBase):
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
eof = not input_chunk
self._decoded_text = self._decoder.decode(input_chunk, eof)
self._decoded_text_offset = 0
decoded = self._decoder.decode(input_chunk, eof)
# At the snapshot point, len(dec_buffer) bytes ago, the next input
# to be passed to the decoder is dec_buffer + input_chunk.
self._snapshot = (dec_flags, dec_buffer + input_chunk)
return not eof
# At the snapshot point len(dec_buffer) bytes ago, the next input
# to be passed to the decoder is dec_buffer + input_chunk. Save
# len(decoded) so that later, tell() can figure out how much
# decoded data has been used up by TextIOWrapper.read().
self._snapshot = (dec_flags, dec_buffer + input_chunk, len(decoded))
return (input_chunk, decoded)
def _pack_cookie(self, position, dec_flags=0,
def _encode_tell_cookie(self, position, dec_flags=0,
feed_bytes=0, need_eof=0, skip_chars=0):
# The meaning of a tell() cookie is: seek to position, set the
# decoder flags to dec_flags, read feed_bytes bytes, feed them
......@@ -1317,7 +1317,7 @@ class TextIOWrapper(TextIOBase):
return (position | (dec_flags<<64) | (feed_bytes<<128) |
(skip_chars<<192) | bool(need_eof)<<256)
def _unpack_cookie(self, bigint):
def _decode_tell_cookie(self, bigint):
rest, position = divmod(bigint, 1<<64)
rest, dec_flags = divmod(rest, 1<<64)
rest, feed_bytes = divmod(rest, 1<<64)
......@@ -1339,14 +1339,14 @@ class TextIOWrapper(TextIOBase):
return position
# Skip backward to the snapshot point (see _read_chunk).
dec_flags, next_input = self._snapshot
dec_flags, next_input, decoded_chars = self._snapshot
position -= len(next_input)
# How many decoded characters have been returned since the snapshot?
skip_chars = self._decoded_text_offset
# How many decoded characters have been consumed since the snapshot?
skip_chars = decoded_chars - len(self._decoded_text)
if skip_chars == 0:
# We haven't moved from the snapshot point.
return self._pack_cookie(position, dec_flags)
return self._encode_tell_cookie(position, dec_flags)
# Walk the decoder forward, one byte at a time, to find the minimum
# input necessary to give us the decoded characters we need to skip.
......@@ -1373,8 +1373,8 @@ class TextIOWrapper(TextIOBase):
if decoded_chars >= skip_chars:
break
else:
# We didn't get enough decoded data; signal EOF to get more.
decoded = decoder.decode(b"", final=True)
# We didn't get enough decoded data; send EOF to get more.
decoded = decoder.decode(b"", True)
decoded_chars += len(decoded)
need_eof = 1
if decoded_chars < skip_chars:
......@@ -1385,7 +1385,7 @@ class TextIOWrapper(TextIOBase):
position += safe_fed_bytes
fed_bytes -= safe_fed_bytes
skip_chars -= safe_decoded_chars
return self._pack_cookie(
return self._encode_tell_cookie(
position, dec_flags, fed_bytes, need_eof, skip_chars)
finally:
decoder.setstate(saved_state)
......@@ -1405,7 +1405,8 @@ class TextIOWrapper(TextIOBase):
raise IOError("can't do nonzero end-relative seeks")
self.flush()
position = self.buffer.seek(0, 2)
self._clear_decoded_text()
self._decoded_text = ""
self._snapshot = None
if self._decoder:
self._decoder.reset()
return position
......@@ -1418,70 +1419,48 @@ class TextIOWrapper(TextIOBase):
# Seek back to the snapshot point.
position, dec_flags, feed_bytes, need_eof, skip_chars = \
self._unpack_cookie(cookie)
self._decode_tell_cookie(cookie)
self.buffer.seek(position)
self._clear_decoded_text()
self._decoded_text = ""
self._snapshot = None
if self._decoder or dec_flags or feed_bytes or need_eof:
# Restore the decoder flags to their values from the snapshot.
self._decoder = self._decoder or self._get_decoder()
self._decoder.setstate((b"", dec_flags))
self._snapshot = (dec_flags, b'')
if feed_bytes or need_eof:
# Feed feed_bytes bytes to the decoder.
input_chunk = self.buffer.read(feed_bytes)
self._decoded_text = self._decoder.decode(input_chunk, need_eof)
if len(self._decoded_text) < skip_chars:
decoded = self._decoder.decode(input_chunk, need_eof)
if len(decoded) < skip_chars:
raise IOError("can't restore logical file position")
# Skip skip_chars of the decoded characters.
self._decoded_text_offset = skip_chars
self._decoded_text = decoded[skip_chars:]
# Restore the snapshot.
self._snapshot = (dec_flags, input_chunk)
self._snapshot = (dec_flags, input_chunk, len(decoded))
return cookie
def _clear_decoded_text(self):
"""Reset the _decoded_text buffer."""
self._decoded_text = ''
self._decoded_text_offset = 0
self._snapshot = None
def _emit_decoded_text(self, n=None):
"""Advance into the _decoded_text buffer."""
offset = self._decoded_text_offset
if n is None:
text = self._decoded_text[offset:]
else:
text = self._decoded_text[offset:offset + n]
self._decoded_text_offset += len(text)
return text
def _unemit_decoded_text(self, n):
"""Rewind the _decoded_text buffer."""
if self._decoded_text_offset < n:
raise AssertionError("unemit out of bounds")
self._decoded_text_offset -= n
def read(self, n=None):
if n is None:
n = -1
decoder = self._decoder or self._get_decoder()
result = self._decoded_text
if n < 0:
# Read everything.
result = (self._emit_decoded_text() +
decoder.decode(self.buffer.read(), final=True))
self._clear_decoded_text()
result += decoder.decode(self.buffer.read(), True)
self._decoded_text = ""
self._snapshot = None
return result
else:
# Keep reading chunks until we have n characters to return.
eof = False
result = self._emit_decoded_text(n)
while len(result) < n and not eof:
eof = not self._read_chunk()
result += self._emit_decoded_text(n - len(result))
return result
while len(result) < n:
input_chunk, decoded = self._read_chunk()
result += decoded
if not input_chunk:
break
self._decoded_text = result[n:]
return result[:n]
def __next__(self):
self._telling = False
......@@ -1495,20 +1474,21 @@ class TextIOWrapper(TextIOBase):
def readline(self, limit=None):
if limit is None:
limit = -1
if limit >= 0:
# XXX Hack to support limit argument, for backwards compatibility
line = self.readline()
if len(line) <= limit:
return line
line, self._decoded_text = \
line[:limit], line[limit:] + self._decoded_text
return line
# Grab all the decoded text (we will rewind any extra bits later).
line = self._emit_decoded_text()
line = self._decoded_text
start = 0
decoder = self._decoder or self._get_decoder()
pos = endpos = None
while True:
if limit >= 0 and len(line) >= limit:
# Length limit has been reached.
endpos = limit
break
if self._readtranslate:
# Newlines are already translated, only search for \n
pos = line.find('\n', start)
......@@ -1558,18 +1538,20 @@ class TextIOWrapper(TextIOBase):
# No line ending seen yet - get more data
more_line = ''
while self._read_chunk():
if self._decoded_text:
while True:
readahead, pending = self._read_chunk()
more_line = pending
if more_line or not readahead:
break
if self._decoded_text:
line += self._emit_decoded_text()
if more_line:
line += more_line
else:
# end of file
self._clear_decoded_text()
self._decoded_text = ''
self._snapshot = None
return line
# Rewind _decoded_text to just after the line ending we found.
self._unemit_decoded_text(len(line) - endpos)
self._decoded_text = line[endpos:]
return line[:endpos]
@property
......
......@@ -590,9 +590,7 @@ class StatefulIncrementalDecoderTest(unittest.TestCase):
# I=0, O=3
(b'i.o3.x.xyz.toolong.', False, 'x--.xyz.too.'),
# I=6, O=3
(b'i.o3.i6.abcdefghijklmnop', True, 'abc.ghi.mno.'),
# I=5, O=8 with newlines
(b'i.o8.i5.abc\ndef\nghy\nz', True, 'abc\nd---.ef\ngh---.y\nz-----.')
(b'i.o3.i6.abcdefghijklmnop', True, 'abc.ghi.mno.')
]
def testDecoder(self):
......@@ -892,8 +890,8 @@ class TextIOWrapperTest(unittest.TestCase):
return codecs.CodecInfo(
name='test_decoder', encode=None, decode=None,
incrementalencoder=None,
incrementaldecoder=StatefulIncrementalDecoder,
streamreader=None, streamwriter=None)
streamreader=None, streamwriter=None,
incrementaldecoder=StatefulIncrementalDecoder)
def testSeekAndTellWithData(data, min_pos=0):
"""Tell/seek to various points within a data stream and ensure
......@@ -905,42 +903,16 @@ class TextIOWrapperTest(unittest.TestCase):
decoded = f.read()
f.close()
# Use read() to move to various positions in the input;
# then tell, read some more data, and seek back.
for i in range(min_pos, len(decoded) + 1): # to read before tell
for j in [1, 5, len(decoded)]: # to read after tell
for i in range(min_pos, len(decoded) + 1): # seek positions
for j in [1, 5, len(decoded) - i]: # read lengths
f = io.open(test_support.TESTFN, encoding='test_decoder')
self.assertEquals(f.read(i), decoded[:i])
cookie = f.tell()
self.assertEquals(f.read(j), decoded[i:i + j])
f.seek(cookie)
self.assertEquals(f.tell(), cookie)
self.assertEquals(f.read(), decoded[i:])
f.close()
lines = len(decoded.split('\n'))
# Use readline() to move to various positions in the input;
# then tell, read some more data, and seek back.
for limit in [-1, 4, 128]: # 'limit' argument for readline()
for j in [1, 5, len(decoded)]: # to read after tell()
f = io.open(test_support.TESTFN, encoding='test_decoder')
text = ''
for k in range(lines): # repeatedly call readline()
line = f.readline(limit=limit)
if limit >= 0:
self.assert_(len(line) <= limit)
text += line
i = len(text)
self.assertEquals(text, decoded[:i])
cookie = f.tell()
self.assertEquals(f.read(j), decoded[i:i + j])
f.seek(cookie)
self.assertEquals(f.tell(), cookie)
self.assertEquals(f.read(), decoded[i:])
f.seek(cookie)
f.close()
# Register a special incremental decoder for testing.
codecs.register(lookupTestDecoder)
self.codecEnabled = 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment