Unverified Kaydet (Commit) 76618061 authored tarafından Łukasz Langa's avatar Łukasz Langa Kaydeden (comit) GitHub

[lib2to3] Make grammar pickling faster (#6491)

* Now uses pickle protocol 4

* Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as
dictionaries in Python 3.6+ are ordered by default

This still produces deterministic pickles (that hash the same with MD5).
Tested with different PYTHONHASHSEED values.
üst 2bea9476
...@@ -86,21 +86,9 @@ class Grammar(object): ...@@ -86,21 +86,9 @@ class Grammar(object):
self.start = 256 self.start = 256
def dump(self, filename): def dump(self, filename):
"""Dump the grammar tables to a pickle file. """Dump the grammar tables to a pickle file."""
dump() recursively changes all dict to OrderedDict, so the pickled file
is not exactly the same as what was passed in to dump(). load() uses the
pickled file to create the tables, but only changes OrderedDict to dict
at the top level; it does not recursively change OrderedDict to dict.
So, the loaded tables are different from the original tables that were
passed to load() in that some of the OrderedDict (from the pickled file)
are not changed back to dict. For parsing, this has no effect on
performance because OrderedDict uses dict's __getitem__ with nothing in
between.
"""
with open(filename, "wb") as f: with open(filename, "wb") as f:
d = _make_deterministic(self.__dict__) pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
pickle.dump(d, f, 2)
def load(self, filename): def load(self, filename):
"""Load the grammar tables from a pickle file.""" """Load the grammar tables from a pickle file."""
...@@ -141,17 +129,6 @@ class Grammar(object): ...@@ -141,17 +129,6 @@ class Grammar(object):
print("start", self.start) print("start", self.start)
def _make_deterministic(top):
if isinstance(top, dict):
return collections.OrderedDict(
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
if isinstance(top, list):
return [_make_deterministic(e) for e in top]
if isinstance(top, tuple):
return tuple(_make_deterministic(e) for e in top)
return top
# Map from operator to number (since tokenize doesn't do this) # Map from operator to number (since tokenize doesn't do this)
opmap_raw = """ opmap_raw = """
......
lib2to3 now uses pickle protocol 4 for pre-computed grammars.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment