summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorƁukasz Langa <lukasz@langa.pl>2018-04-17 00:33:59 (GMT)
committerGitHub <noreply@github.com>2018-04-17 00:33:59 (GMT)
commit76618061b92e23a53b78b7ec3a173cb0e7749b4a (patch)
treebf9566760688e23d116ad8ae0d241c7091950463
parent2bea9476286ad8e2e87bf0415ff743b1487c5018 (diff)
downloadcpython-76618061b92e23a53b78b7ec3a173cb0e7749b4a.zip
cpython-76618061b92e23a53b78b7ec3a173cb0e7749b4a.tar.gz
cpython-76618061b92e23a53b78b7ec3a173cb0e7749b4a.tar.bz2
[lib2to3] Make grammar pickling faster (#6491)
* Now uses pickle protocol 4 * Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as dictionaries in Python 3.6+ are ordered by default This still produces deterministic pickles (that hash the same with MD5). Tested with different PYTHONHASHSEED values.
-rw-r--r--Lib/lib2to3/pgen2/grammar.py27
-rw-r--r--Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst1
2 files changed, 3 insertions, 25 deletions
diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py
index 088c58b..c00cb22 100644
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py
@@ -86,21 +86,9 @@ class Grammar(object):
self.start = 256
def dump(self, filename):
- """Dump the grammar tables to a pickle file.
-
- dump() recursively changes all dict to OrderedDict, so the pickled file
- is not exactly the same as what was passed in to dump(). load() uses the
- pickled file to create the tables, but only changes OrderedDict to dict
- at the top level; it does not recursively change OrderedDict to dict.
- So, the loaded tables are different from the original tables that were
- passed to load() in that some of the OrderedDict (from the pickled file)
- are not changed back to dict. For parsing, this has no effect on
- performance because OrderedDict uses dict's __getitem__ with nothing in
- between.
- """
+ """Dump the grammar tables to a pickle file."""
with open(filename, "wb") as f:
- d = _make_deterministic(self.__dict__)
- pickle.dump(d, f, 2)
+ pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
def load(self, filename):
"""Load the grammar tables from a pickle file."""
@@ -141,17 +129,6 @@ class Grammar(object):
print("start", self.start)
-def _make_deterministic(top):
- if isinstance(top, dict):
- return collections.OrderedDict(
- sorted(((k, _make_deterministic(v)) for k, v in top.items())))
- if isinstance(top, list):
- return [_make_deterministic(e) for e in top]
- if isinstance(top, tuple):
- return tuple(_make_deterministic(e) for e in top)
- return top
-
-
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
diff --git a/Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst b/Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst
new file mode 100644
index 0000000..8116e3b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst
@@ -0,0 +1 @@
+lib2to3 now uses pickle protocol 4 for pre-computed grammars.