Kaydet (Commit) 06e52252 authored tarafından Serhiy Storchaka's avatar Serhiy Storchaka Kaydeden (comit) GitHub

bpo-10030: Sped up reading encrypted ZIP files by 2 times. (#550)

üst d4edfc9a
...@@ -509,65 +509,63 @@ class ZipInfo (object): ...@@ -509,65 +509,63 @@ class ZipInfo (object):
return self.filename[-1] == '/' return self.filename[-1] == '/'
class _ZipDecrypter: # ZIP encryption uses the CRC32 one-byte primitive for scrambling some
"""Class to handle decryption of files stored within a ZIP archive. # internal keys. We noticed that a direct implementation is faster than
# relying on binascii.crc32().
_crctable = None
def _gen_crc(crc):
for j in range(8):
if crc & 1:
crc = (crc >> 1) ^ 0xEDB88320
else:
crc >>= 1
return crc
# ZIP supports a password-based form of encryption. Even though known
# plaintext attacks have been found against it, it is still useful
# to be able to get data out of such a file.
#
# Usage:
# zd = _ZipDecrypter(mypwd)
# plain_bytes = zd(cypher_bytes)
def _ZipDecrypter(pwd):
key0 = 305419896
key1 = 591751049
key2 = 878082192
global _crctable
if _crctable is None:
_crctable = list(map(_gen_crc, range(256)))
crctable = _crctable
def crc32(ch, crc):
"""Compute the CRC32 primitive on one byte."""
return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
ZIP supports a password-based form of encryption. Even though known def update_keys(c):
plaintext attacks have been found against it, it is still useful nonlocal key0, key1, key2
to be able to get data out of such a file. key0 = crc32(c, key0)
key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
key2 = crc32(key1 >> 24, key2)
Usage: for p in pwd:
zd = _ZipDecrypter(mypwd) update_keys(p)
plain_char = zd(cypher_char)
plain_text = map(zd, cypher_text)
"""
def _GenerateCRCTable(): def decrypter(data):
"""Generate a CRC-32 table. """Decrypt a bytes object."""
result = bytearray()
append = result.append
for c in data:
k = key2 | 2
c ^= ((k * (k^1)) >> 8) & 0xFF
update_keys(c)
append(c)
return bytes(result)
ZIP encryption uses the CRC32 one-byte primitive for scrambling some return decrypter
internal keys. We noticed that a direct implementation is faster than
relying on binascii.crc32().
"""
poly = 0xedb88320
table = [0] * 256
for i in range(256):
crc = i
for j in range(8):
if crc & 1:
crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
else:
crc = ((crc >> 1) & 0x7FFFFFFF)
table[i] = crc
return table
crctable = None
def _crc32(self, ch, crc):
"""Compute the CRC32 primitive on one byte."""
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
def __init__(self, pwd):
if _ZipDecrypter.crctable is None:
_ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
self.key0 = 305419896
self.key1 = 591751049
self.key2 = 878082192
for p in pwd:
self._UpdateKeys(p)
def _UpdateKeys(self, c):
self.key0 = self._crc32(c, self.key0)
self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
self.key1 = (self.key1 * 134775813 + 1) & 4294967295
self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
def __call__(self, c):
"""Decrypt a single character."""
assert isinstance(c, int)
k = self.key2 | 2
c = c ^ (((k * (k^1)) >> 8) & 255)
self._UpdateKeys(c)
return c
class LZMACompressor: class LZMACompressor:
...@@ -953,7 +951,7 @@ class ZipExtFile(io.BufferedIOBase): ...@@ -953,7 +951,7 @@ class ZipExtFile(io.BufferedIOBase):
raise EOFError raise EOFError
if self._decrypter is not None: if self._decrypter is not None:
data = bytes(map(self._decrypter, data)) data = self._decrypter(data)
return data return data
def close(self): def close(self):
...@@ -1411,7 +1409,7 @@ class ZipFile: ...@@ -1411,7 +1409,7 @@ class ZipFile:
# or the MSB of the file time depending on the header type # or the MSB of the file time depending on the header type
# and is used to check the correctness of the password. # and is used to check the correctness of the password.
header = zef_file.read(12) header = zef_file.read(12)
h = list(map(zd, header[0:12])) h = zd(header[0:12])
if zinfo.flag_bits & 0x8: if zinfo.flag_bits & 0x8:
# compare against the file type from extended local headers # compare against the file type from extended local headers
check_byte = (zinfo._raw_time >> 8) & 0xff check_byte = (zinfo._raw_time >> 8) & 0xff
......
...@@ -301,6 +301,8 @@ Extension Modules ...@@ -301,6 +301,8 @@ Extension Modules
Library Library
------- -------
- bpo-10030: Sped up reading encrypted ZIP files by 2 times.
- bpo-29204: Element.getiterator() and the html parameter of XMLParser() were - bpo-29204: Element.getiterator() and the html parameter of XMLParser() were
deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly). deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly).
Now using them emits a deprecation warning. Now using them emits a deprecation warning.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment