Unverified Kaydet (Commit) c1b75b5f authored tarafından Miss Islington (bot)'s avatar Miss Islington (bot) Kaydeden (comit) GitHub

bpo-34010: Fix tarfile read performance regression (GH-8020)


During buffered read, use a list followed by join instead of extending a bytes object.
This is how it was done before but changed in commit b506dc32.
(cherry picked from commit 12a08c47)
Co-authored-by: 's avatarhajoscher <hajoscher@gmail.com>
üst 2cbd1bb1
...@@ -532,7 +532,7 @@ class _Stream: ...@@ -532,7 +532,7 @@ class _Stream:
if not buf: if not buf:
break break
t.append(buf) t.append(buf)
buf = "".join(t) buf = b"".join(t)
else: else:
buf = self._read(size) buf = self._read(size)
self.pos += len(buf) self.pos += len(buf)
...@@ -545,6 +545,7 @@ class _Stream: ...@@ -545,6 +545,7 @@ class _Stream:
return self.__read(size) return self.__read(size)
c = len(self.dbuf) c = len(self.dbuf)
t = [self.dbuf]
while c < size: while c < size:
buf = self.__read(self.bufsize) buf = self.__read(self.bufsize)
if not buf: if not buf:
...@@ -553,26 +554,27 @@ class _Stream: ...@@ -553,26 +554,27 @@ class _Stream:
buf = self.cmp.decompress(buf) buf = self.cmp.decompress(buf)
except self.exception: except self.exception:
raise ReadError("invalid compressed data") raise ReadError("invalid compressed data")
self.dbuf += buf t.append(buf)
c += len(buf) c += len(buf)
buf = self.dbuf[:size] t = b"".join(t)
self.dbuf = self.dbuf[size:] self.dbuf = t[size:]
return buf return t[:size]
def __read(self, size): def __read(self, size):
"""Return size bytes from stream. If internal buffer is empty, """Return size bytes from stream. If internal buffer is empty,
read another block from the stream. read another block from the stream.
""" """
c = len(self.buf) c = len(self.buf)
t = [self.buf]
while c < size: while c < size:
buf = self.fileobj.read(self.bufsize) buf = self.fileobj.read(self.bufsize)
if not buf: if not buf:
break break
self.buf += buf t.append(buf)
c += len(buf) c += len(buf)
buf = self.buf[:size] t = b"".join(t)
self.buf = self.buf[size:] self.buf = t[size:]
return buf return t[:size]
# class _Stream # class _Stream
class _StreamProxy(object): class _StreamProxy(object):
......
Fixed a performance regression for reading streams with tarfile. The
buffered read should use a list, instead of appending to a bytes object.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment