diff options
author | Ćukasz Langa <lukasz@langa.pl> | 2018-04-17 00:33:59 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-17 00:33:59 (GMT) |
commit | 76618061b92e23a53b78b7ec3a173cb0e7749b4a (patch) | |
tree | bf9566760688e23d116ad8ae0d241c7091950463 | |
parent | 2bea9476286ad8e2e87bf0415ff743b1487c5018 (diff) | |
download | cpython-76618061b92e23a53b78b7ec3a173cb0e7749b4a.zip cpython-76618061b92e23a53b78b7ec3a173cb0e7749b4a.tar.gz cpython-76618061b92e23a53b78b7ec3a173cb0e7749b4a.tar.bz2 |
[lib2to3] Make grammar pickling faster (#6491)
* Now uses pickle protocol 4
* Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as
dictionaries in Python 3.6+ are ordered by default
This still produces deterministic pickles (that hash the same with MD5).
Tested with different PYTHONHASHSEED values.
-rw-r--r-- | Lib/lib2to3/pgen2/grammar.py | 27 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst | 1 |
2 files changed, 3 insertions, 25 deletions
diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py index 088c58b..c00cb22 100644 --- a/Lib/lib2to3/pgen2/grammar.py +++ b/Lib/lib2to3/pgen2/grammar.py @@ -86,21 +86,9 @@ class Grammar(object): self.start = 256 def dump(self, filename): - """Dump the grammar tables to a pickle file. - - dump() recursively changes all dict to OrderedDict, so the pickled file - is not exactly the same as what was passed in to dump(). load() uses the - pickled file to create the tables, but only changes OrderedDict to dict - at the top level; it does not recursively change OrderedDict to dict. - So, the loaded tables are different from the original tables that were - passed to load() in that some of the OrderedDict (from the pickled file) - are not changed back to dict. For parsing, this has no effect on - performance because OrderedDict uses dict's __getitem__ with nothing in - between. - """ + """Dump the grammar tables to a pickle file.""" with open(filename, "wb") as f: - d = _make_deterministic(self.__dict__) - pickle.dump(d, f, 2) + pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL) def load(self, filename): """Load the grammar tables from a pickle file.""" @@ -141,17 +129,6 @@ class Grammar(object): print("start", self.start) -def _make_deterministic(top): - if isinstance(top, dict): - return collections.OrderedDict( - sorted(((k, _make_deterministic(v)) for k, v in top.items()))) - if isinstance(top, list): - return [_make_deterministic(e) for e in top] - if isinstance(top, tuple): - return tuple(_make_deterministic(e) for e in top) - return top - - # Map from operator to number (since tokenize doesn't do this) opmap_raw = """ diff --git a/Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst b/Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst new file mode 100644 index 0000000..8116e3b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst @@ -0,0 +1 @@ +lib2to3 now uses pickle protocol 4 for pre-computed grammars. |