summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/lib2to3/pgen2/driver.py15
-rw-r--r--Lib/lib2to3/pgen2/grammar.py28
-rw-r--r--Lib/lib2to3/pgen2/pgen.py8
-rw-r--r--Lib/lib2to3/tests/support.py6
-rw-r--r--Lib/lib2to3/tests/test_parser.py72
-rw-r--r--Misc/NEWS4
6 files changed, 115 insertions, 18 deletions
diff --git a/Lib/lib2to3/pgen2/driver.py b/Lib/lib2to3/pgen2/driver.py
index 3ccc69d..a27b9cb 100644
--- a/Lib/lib2to3/pgen2/driver.py
+++ b/Lib/lib2to3/pgen2/driver.py
@@ -106,16 +106,19 @@ class Driver(object):
return self.parse_tokens(tokens, debug)
+def _generate_pickle_name(gt):
+ head, tail = os.path.splitext(gt)
+ if tail == ".txt":
+ tail = ""
+ return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+
+
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger()
- if gp is None:
- head, tail = os.path.splitext(gt)
- if tail == ".txt":
- tail = ""
- gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+ gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
@@ -124,7 +127,7 @@ def load_grammar(gt="Grammar.txt", gp=None,
try:
g.dump(gp)
except OSError as e:
- logger.info("Writing failed:"+str(e))
+ logger.info("Writing failed: %s", e)
else:
g = grammar.Grammar()
g.load(gp)
diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py
index b4481d1..52cdbc0 100644
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py
@@ -13,6 +13,7 @@ fallback token code OP, but the parser needs the actual token code.
"""
# Python imports
+import collections
import pickle
# Local imports
@@ -85,9 +86,21 @@ class Grammar(object):
self.start = 256
def dump(self, filename):
- """Dump the grammar tables to a pickle file."""
+ """Dump the grammar tables to a pickle file.
+
+ dump() recursively changes all dict to OrderedDict, so the pickled file
+ is not exactly the same as what was passed in to dump(). load() uses the
+ pickled file to create the tables, but only changes OrderedDict to dict
+ at the top level; it does not recursively change OrderedDict to dict.
+ So, the loaded tables are different from the original tables that were
+ passed to load() in that some of the OrderedDict (from the pickled file)
+ are not changed back to dict. For parsing, this has no effect on
+ performance because OrderedDict uses dict's __getitem__ with nothing in
+ between.
+ """
with open(filename, "wb") as f:
- pickle.dump(self.__dict__, f, 2)
+ d = _make_deterministic(self.__dict__)
+ pickle.dump(d, f, 2)
def load(self, filename):
"""Load the grammar tables from a pickle file."""
@@ -124,6 +137,17 @@ class Grammar(object):
print("start", self.start)
+def _make_deterministic(top):
+ if isinstance(top, dict):
+ return collections.OrderedDict(
+ sorted(((k, _make_deterministic(v)) for k, v in top.items())))
+ if isinstance(top, list):
+ return [_make_deterministic(e) for e in top]
+ if isinstance(top, tuple):
+ return tuple(_make_deterministic(e) for e in top)
+ return top
+
+
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
diff --git a/Lib/lib2to3/pgen2/pgen.py b/Lib/lib2to3/pgen2/pgen.py
index 2c51eef..b0cbd16 100644
--- a/Lib/lib2to3/pgen2/pgen.py
+++ b/Lib/lib2to3/pgen2/pgen.py
@@ -39,7 +39,7 @@ class ParserGenerator(object):
states = []
for state in dfa:
arcs = []
- for label, next in state.arcs.items():
+ for label, next in sorted(state.arcs.items()):
arcs.append((self.make_label(c, label), dfa.index(next)))
if state.isfinal:
arcs.append((0, dfa.index(state)))
@@ -52,7 +52,7 @@ class ParserGenerator(object):
def make_first(self, c, name):
rawfirst = self.first[name]
first = {}
- for label in rawfirst:
+ for label in sorted(rawfirst):
ilabel = self.make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first[ilabel] = 1
@@ -192,7 +192,7 @@ class ParserGenerator(object):
for label, next in nfastate.arcs:
if label is not None:
addclosure(next, arcs.setdefault(label, {}))
- for label, nfaset in arcs.items():
+ for label, nfaset in sorted(arcs.items()):
for st in states:
if st.nfaset == nfaset:
break
@@ -222,7 +222,7 @@ class ParserGenerator(object):
print("Dump of DFA for", name)
for i, state in enumerate(dfa):
print(" State", i, state.isfinal and "(final)" or "")
- for label, next in state.arcs.items():
+ for label, next in sorted(state.arcs.items()):
print(" %s -> %d" % (label, dfa.index(next)))
def simplify_dfa(self, dfa):
diff --git a/Lib/lib2to3/tests/support.py b/Lib/lib2to3/tests/support.py
index 6f2d214..0897177 100644
--- a/Lib/lib2to3/tests/support.py
+++ b/Lib/lib2to3/tests/support.py
@@ -11,13 +11,13 @@ from textwrap import dedent
# Local imports
from lib2to3 import pytree, refactor
-from lib2to3.pgen2 import driver
+from lib2to3.pgen2 import driver as pgen2_driver
test_dir = os.path.dirname(__file__)
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
grammar_path = os.path.join(test_dir, "..", "Grammar.txt")
-grammar = driver.load_grammar(grammar_path)
-driver = driver.Driver(grammar, convert=pytree.convert)
+grammar = pgen2_driver.load_grammar(grammar_path)
+driver = pgen2_driver.Driver(grammar, convert=pytree.convert)
def parse_string(string):
return driver.parse_string(reformat(string), debug=True)
diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py
index b533c01..0b2ca8b 100644
--- a/Lib/lib2to3/tests/test_parser.py
+++ b/Lib/lib2to3/tests/test_parser.py
@@ -6,8 +6,6 @@ parts of the grammar we've changed, we also make sure we can parse the
test_grammar.py files from both Python 2 and Python 3.
"""
-from __future__ import with_statement
-
# Testing imports
from . import support
from .support import driver, test_dir
@@ -15,12 +13,15 @@ from test.support import verbose
# Python imports
import os
+import shutil
+import subprocess
import sys
+import tempfile
import unittest
import warnings
-import subprocess
# Local imports
+from lib2to3.pgen2 import driver as pgen2_driver
from lib2to3.pgen2 import tokenize
from ..pgen2.parse import ParseError
from lib2to3.pygram import python_symbols as syms
@@ -35,6 +36,71 @@ class TestDriver(support.TestCase):
self.assertEqual(t.children[1].children[0].type, syms.print_stmt)
+class TestPgen2Caching(support.TestCase):
+ def test_load_grammar_from_txt_file(self):
+ pgen2_driver.load_grammar(support.grammar_path, save=False, force=True)
+
+ def test_load_grammar_from_pickle(self):
+ # Make a copy of the grammar file in a temp directory we are
+ # guaranteed to be able to write to.
+ tmpdir = tempfile.mkdtemp()
+ try:
+ grammar_copy = os.path.join(
+ tmpdir, os.path.basename(support.grammar_path))
+ shutil.copy(support.grammar_path, grammar_copy)
+ pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
+
+ pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
+ self.assertTrue(os.path.exists(pickle_name))
+
+ os.unlink(grammar_copy) # Only the pickle remains...
+ pgen2_driver.load_grammar(grammar_copy, save=False, force=False)
+ finally:
+ shutil.rmtree(tmpdir)
+
+ @unittest.skipIf(sys.executable is None, 'sys.executable required')
+ def test_load_grammar_from_subprocess(self):
+ tmpdir = tempfile.mkdtemp()
+ tmpsubdir = os.path.join(tmpdir, 'subdir')
+ try:
+ os.mkdir(tmpsubdir)
+ grammar_base = os.path.basename(support.grammar_path)
+ grammar_copy = os.path.join(tmpdir, grammar_base)
+ grammar_sub_copy = os.path.join(tmpsubdir, grammar_base)
+ shutil.copy(support.grammar_path, grammar_copy)
+ shutil.copy(support.grammar_path, grammar_sub_copy)
+ pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
+ pickle_sub_name = pgen2_driver._generate_pickle_name(
+ grammar_sub_copy)
+ self.assertNotEqual(pickle_name, pickle_sub_name)
+
+ # Generate a pickle file from this process.
+ pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
+ self.assertTrue(os.path.exists(pickle_name))
+
+ # Generate a new pickle file in a subprocess with a most likely
+ # different hash randomization seed.
+ sub_env = dict(os.environ)
+ sub_env['PYTHONHASHSEED'] = 'random'
+ subprocess.check_call(
+ [sys.executable, '-c', """
+from lib2to3.pgen2 import driver as pgen2_driver
+pgen2_driver.load_grammar(%r, save=True, force=True)
+ """ % (grammar_sub_copy,)],
+ env=sub_env)
+ self.assertTrue(os.path.exists(pickle_sub_name))
+
+ with open(pickle_name, 'rb') as pickle_f_1, \
+ open(pickle_sub_name, 'rb') as pickle_f_2:
+ self.assertEqual(
+ pickle_f_1.read(), pickle_f_2.read(),
+ msg='Grammar caches generated using different hash seeds'
+ ' were not identical.')
+ finally:
+ shutil.rmtree(tmpdir)
+
+
+
class GrammarTest(support.TestCase):
def validate(self, code):
support.parse_string(code)
diff --git a/Misc/NEWS b/Misc/NEWS
index 9398b8d..e4991c9 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -67,6 +67,10 @@ Core and Builtins
Library
-------
+- lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
+ between runs given the same Grammar.txt input regardless of the hash
+ randomization setting.
+
- Issue #27570: Avoid zero-length memcpy() etc calls with null source
pointers in the "ctypes" and "array" modules.