diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2019-09-27 14:53:34 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-27 14:53:34 (GMT) |
commit | 6693f730e0eb77d9453f73a3da33b78a97e996ee (patch) | |
tree | 767d8adedbf3db4c89a1b74d030d1b563e4199fb | |
parent | 90558158093c0ad893102158fd3c2dd9f864e82e (diff) | |
download | cpython-6693f730e0eb77d9453f73a3da33b78a97e996ee.zip cpython-6693f730e0eb77d9453f73a3da33b78a97e996ee.tar.gz cpython-6693f730e0eb77d9453f73a3da33b78a97e996ee.tar.bz2 |
bpo-38187: Fix a refleak in Tools/c-analyzer. (gh-16304)
The "Slot" helper (descriptor) is leaking references due to its caching mechanism. The change includes a partial fix to Slot, but also adds Variable.storage to replace the problematic use of Slot.
https://bugs.python.org/issue38187
13 files changed, 208 insertions, 89 deletions
diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py index e69de29..bc502ef 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py +++ b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py index 93100e0..215023d 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py +++ b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py @@ -15,9 +15,6 @@ class FromFileTests(unittest.TestCase): _return_read_tsv = () - def tearDown(self): - Variable._isglobal.instances.clear() - @property def calls(self): try: diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py index e69de29..bc502ef 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py +++ b/Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py index b29f966..8288992 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py +++ b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py @@ -64,7 +64,9 @@ class StaticsFromBinaryTests(_Base): **self.kwargs)) self.assertEqual(found, [ + info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), + info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), ]) @@ -299,7 +301,7 @@ class StaticsTest(_Base): info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'), info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'), info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'), - info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'), + info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'), # skipped info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'), info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'), info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'), @@ -318,6 +320,7 @@ class StaticsTest(_Base): self.assertEqual(found, [ info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'), + info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'), info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'), info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'), info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'), diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py index e69de29..bc502ef 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py +++ b/Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py index 1dfe5d0..d1a966c 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py +++ b/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py @@ -4,7 +4,7 @@ import unittest from ..util import PseudoStr, StrProxy, Object from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_analyzer_common.info import ID + from c_analyzer_common.info import ID, UNKNOWN from c_parser.info import ( normalize_vartype, Variable, ) @@ -31,38 +31,47 @@ class VariableTests(unittest.TestCase): VALID_ARGS = ( ('x/y/z/spam.c', 'func', 'eggs'), + 'static', 'int', ) VALID_KWARGS = dict(zip(Variable._fields, VALID_ARGS)) VALID_EXPECTED = VALID_ARGS def test_init_typical_global(self): - static = Variable( - id=ID( - filename='x/y/z/spam.c', - funcname=None, - name='eggs', - ), - vartype='int', - ) + for storage in ('static', 'extern', 'implicit'): + with self.subTest(storage): + static = Variable( + id=ID( + filename='x/y/z/spam.c', + funcname=None, + name='eggs', + ), + storage=storage, + vartype='int', + ) - self.assertEqual(static, ( - ('x/y/z/spam.c', None, 'eggs'), - 'int', - )) + self.assertEqual(static, ( + ('x/y/z/spam.c', None, 'eggs'), + storage, + 'int', + )) def test_init_typical_local(self): - static = Variable( - id=ID( - filename='x/y/z/spam.c', - funcname='func', - name='eggs', - ), - vartype='int', - ) + for storage in ('static', 'local'): + with self.subTest(storage): + static = Variable( + id=ID( + filename='x/y/z/spam.c', + funcname='func', + name='eggs', + ), + storage=storage, + vartype='int', + ) self.assertEqual(static, ( ('x/y/z/spam.c', 'func', 'eggs'), + storage, 'int', )) @@ -71,12 +80,14 @@ class VariableTests(unittest.TestCase): with self.subTest(repr(value)): static = Variable( id=value, + storage=value, vartype=value, ) self.assertEqual(static, ( None, None, + None, )) def test_init_all_coerced(self): @@ -89,34 +100,42 @@ class VariableTests(unittest.TestCase): PseudoStr('func'), PseudoStr('spam'), ), + storage=PseudoStr('static'), vartype=PseudoStr('int'), ), (id, + 'static', 'int', )), ('non-str 1', dict( id=id, + storage=Object(), vartype=Object(), ), (id, '<object>', + '<object>', )), ('non-str 2', dict( id=id, + storage=StrProxy('static'), vartype=StrProxy('variable'), ), (id, + 'static', 'variable', )), ('non-str', dict( id=id, - vartype=('a', 'b', 'c'), + storage=('a', 'b', 'c'), + vartype=('x', 'y', 'z'), ), (id, "('a', 'b', 'c')", + "('x', 'y', 'z')", )), ] for summary, kwargs, expected in tests: @@ -134,36 +153,43 @@ class VariableTests(unittest.TestCase): def test_iterable(self): static = Variable(**self.VALID_KWARGS) - id, vartype = static + id, storage, vartype = static - values = (id, vartype) + values = (id, storage, vartype) for value, expected in zip(values, self.VALID_EXPECTED): self.assertEqual(value, expected) def test_fields(self): - static = Variable(('a', 'b', 'z'), 'x') + static = Variable(('a', 'b', 'z'), 'x', 'y') self.assertEqual(static.id, ('a', 'b', 'z')) - self.assertEqual(static.vartype, 'x') + self.assertEqual(static.storage, 'x') + self.assertEqual(static.vartype, 'y') def test___getattr__(self): - static = Variable(('a', 'b', 'z'), 'x') + static = Variable(('a', 'b', 'z'), 'x', 'y') self.assertEqual(static.filename, 'a') self.assertEqual(static.funcname, 'b') self.assertEqual(static.name, 'z') def test_validate_typical(self): - static = Variable( - id=ID( - filename='x/y/z/spam.c', - funcname='func', - name='eggs', - ), - vartype='int', - ) + validstorage = ('static', 'extern', 'implicit', 'local') + self.assertEqual(set(validstorage), set(Variable.STORAGE)) + + for storage in validstorage: + with self.subTest(storage): + static = Variable( + id=ID( + filename='x/y/z/spam.c', + funcname='func', + name='eggs', + ), + storage=storage, + vartype='int', + ) - static.validate() # This does not fail. + static.validate() # This does not fail. def test_validate_missing_field(self): for field in Variable._fields: @@ -173,6 +199,13 @@ class VariableTests(unittest.TestCase): with self.assertRaises(TypeError): static.validate() + for field in ('storage', 'vartype'): + with self.subTest(field): + static = Variable(**self.VALID_KWARGS) + static = static._replace(**{field: UNKNOWN}) + + with self.assertRaises(TypeError): + static.validate() def test_validate_bad_field(self): badch = tuple(c for c in string.punctuation + string.digits) @@ -185,6 +218,7 @@ class VariableTests(unittest.TestCase): ) + badch tests = [ ('id', ()), # Any non-empty str is okay. + ('storage', ('external', 'global') + notnames), ('vartype', ()), # Any non-empty str is okay. ] seen = set() @@ -199,6 +233,8 @@ class VariableTests(unittest.TestCase): static.validate() for field, invalid in tests: + if field == 'id': + continue valid = seen - set(invalid) for value in valid: with self.subTest(f'{field}={value!r}'): diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py index e69de29..bc502ef 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py +++ b/Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Tools/c-analyzer/c_analyzer_common/_generate.py b/Tools/c-analyzer/c_analyzer_common/_generate.py index 1629aa6..9b2fc9e 100644 --- a/Tools/c-analyzer/c_analyzer_common/_generate.py +++ b/Tools/c-analyzer/c_analyzer_common/_generate.py @@ -262,7 +262,7 @@ def _known(symbol): raise if symbol.name not in decl: decl = decl + symbol.name - return Variable(varid, decl) + return Variable(varid, 'static', decl) def known_row(varid, decl): @@ -291,7 +291,7 @@ def known_rows(symbols, *, except KeyError: found = _find_match(symbol, cache, filenames) if found is None: - found = Variable(symbol.id, UNKNOWN) + found = Variable(symbol.id, UNKNOWN, UNKNOWN) yield _as_known(found.id, found.vartype) else: raise NotImplementedError # XXX incorporate KNOWN diff --git a/Tools/c-analyzer/c_analyzer_common/known.py b/Tools/c-analyzer/c_analyzer_common/known.py index a0c6dfa..dec1e1d 100644 --- a/Tools/c-analyzer/c_analyzer_common/known.py +++ b/Tools/c-analyzer/c_analyzer_common/known.py @@ -34,34 +34,41 @@ def from_file(infile, *, id = ID(filename, funcname, name) if kind == 'variable': values = known['variables'] - value = Variable(id, declaration) - value._isglobal = _is_global(declaration) or id.funcname is None + if funcname: + storage = _get_storage(declaration) or 'local' + else: + storage = _get_storage(declaration) or 'implicit' + value = Variable(id, storage, declaration) else: raise ValueError(f'unsupported kind in row {row}') - if value.name == 'id' and declaration == UNKNOWN: - # None of these are variables. - declaration = 'int id'; - else: - value.validate() + value.validate() +# if value.name == 'id' and declaration == UNKNOWN: +# # None of these are variables. +# declaration = 'int id'; +# else: +# value.validate() values[id] = value return known -def _is_global(vartype): +def _get_storage(decl): # statics - if vartype.startswith('static '): - return True - if vartype.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): - return True - if vartype.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')): - return True - if vartype.startswith('PyDoc_VAR('): - return True - if vartype.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): - return True - if vartype.startswith('WRAP_METHOD('): - return True + if decl.startswith('static '): + return 'static' + if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): + return 'static' + if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')): + return 'static' + if decl.startswith('PyDoc_VAR('): + return 'static' + if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): + return 'static' + if decl.startswith('WRAP_METHOD('): + return 'static' # public extern - if vartype.startswith('PyAPI_DATA('): - return True - return False + if decl.startswith('extern '): + return 'extern' + if decl.startswith('PyAPI_DATA('): + return 'extern' + # implicit or local + return None diff --git a/Tools/c-analyzer/c_analyzer_common/util.py b/Tools/c-analyzer/c_analyzer_common/util.py index 511c54f..43d0bb6 100644 --- a/Tools/c-analyzer/c_analyzer_common/util.py +++ b/Tools/c-analyzer/c_analyzer_common/util.py @@ -82,6 +82,13 @@ class Slot: self.default = default self.readonly = readonly + # The instance cache is not inherently tied to the normal + # lifetime of the instances. So must do something in order to + # avoid keeping the instances alive by holding a reference here. + # Ideally we would use weakref.WeakValueDictionary to do this. + # However, most builtin types do not support weakrefs. So + # instead we monkey-patch __del__ on the attached class to clear + # the instance. self.instances = {} self.name = None @@ -89,6 +96,12 @@ class Slot: if self.name is not None: raise TypeError('already used') self.name = name + try: + slotnames = cls.__slot_names__ + except AttributeError: + slotnames = cls.__slot_names__ = [] + slotnames.append(name) + self._ensure___del__(cls, slotnames) def __get__(self, obj, cls): if obj is None: # called on the class @@ -115,7 +128,23 @@ class Slot: def __delete__(self, obj): if self.readonly: raise AttributeError(f'{self.name} is readonly') - self.instances[id(obj)] = self.default + self.instances[id(obj)] = self.default # XXX refleak? + + def _ensure___del__(self, cls, slotnames): # See the comment in __init__(). + try: + old___del__ = cls.__del__ + except AttributeError: + old___del__ = (lambda s: None) + else: + if getattr(old___del__, '_slotted', False): + return + + def __del__(_self): + for name in slotnames: + delattr(_self, name) + old___del__(_self) + __del__._slotted = True + cls.__del__ = __del__ def set(self, obj, value): """Update the cached value for an object. diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py index d7368b4..a4e32d7 100644 --- a/Tools/c-analyzer/c_parser/info.py +++ b/Tools/c-analyzer/c_parser/info.py @@ -1,4 +1,5 @@ from collections import namedtuple +import re from c_analyzer_common import info, util from c_analyzer_common.util import classonly, _NTBase @@ -15,28 +16,53 @@ def normalize_vartype(vartype): return str(vartype) +def extract_storage(decl, *, isfunc=False): + """Return (storage, vartype) based on the given declaration. + + The default storage is "implicit" or "local". + """ + if decl == info.UNKNOWN: + return decl, decl + if decl.startswith('static '): + return 'static', decl + #return 'static', decl.partition(' ')[2].strip() + elif decl.startswith('extern '): + return 'extern', decl + #return 'extern', decl.partition(' ')[2].strip() + elif re.match('.*\b(static|extern)\b', decl): + raise NotImplementedError + elif isfunc: + return 'local', decl + else: + return 'implicit', decl + + class Variable(_NTBase, - namedtuple('Variable', 'id vartype')): + namedtuple('Variable', 'id storage vartype')): """Information about a single variable declaration.""" __slots__ = () - _isglobal = util.Slot() - def __del__(self): - del self._isglobal + STORAGE = ( + 'static', + 'extern', + 'implicit', + 'local', + ) @classonly - def from_parts(cls, filename, funcname, name, vartype, isglobal=False): + def from_parts(cls, filename, funcname, name, decl, storage=None): + if storage is None: + storage, decl = extract_storage(decl, isfunc=funcname) id = info.ID(filename, funcname, name) - self = cls(id, vartype) - if isglobal: - self._isglobal = True + self = cls(id, storage, decl) return self - def __new__(cls, id, vartype): + def __new__(cls, id, storage, vartype): self = super().__new__( cls, id=info.ID.from_raw(id), + storage=str(storage) if storage else None, vartype=normalize_vartype(vartype) if vartype else None, ) return self @@ -63,18 +89,17 @@ class Variable(_NTBase, """Fail if the object is invalid (i.e. init with bad data).""" self._validate_id() + if self.storage is None or self.storage == info.UNKNOWN: + raise TypeError('missing storage') + elif self.storage not in self.STORAGE: + raise ValueError(f'unsupported storage {self.storage:r}') + if self.vartype is None or self.vartype == info.UNKNOWN: raise TypeError('missing vartype') @property def isglobal(self): - try: - return self._isglobal - except AttributeError: - # XXX Include extern variables. - # XXX Ignore functions. - self._isglobal = ('static' in self.vartype.split()) - return self._isglobal + return self.storage != 'local' @property def isconst(self): diff --git a/Tools/c-analyzer/c_parser/naive.py b/Tools/c-analyzer/c_parser/naive.py index e0370cc..160f96c 100644 --- a/Tools/c-analyzer/c_parser/naive.py +++ b/Tools/c-analyzer/c_parser/naive.py @@ -163,7 +163,7 @@ def find_variables(varids, filenames=None, *, srcfiles = [varid.filename] else: if not filenames: - yield Variable(varid, UNKNOWN) + yield Variable(varid, UNKNOWN, UNKNOWN) continue srcfiles = filenames for filename in srcfiles: @@ -177,4 +177,4 @@ def find_variables(varids, filenames=None, *, used.add(found) break else: - yield Variable(varid, UNKNOWN) + yield Variable(varid, UNKNOWN, UNKNOWN) diff --git a/Tools/c-analyzer/c_symbols/resolve.py b/Tools/c-analyzer/c_symbols/resolve.py index dc876ae..56210ce 100644 --- a/Tools/c-analyzer/c_symbols/resolve.py +++ b/Tools/c-analyzer/c_symbols/resolve.py @@ -68,14 +68,11 @@ def find_in_source(symbol, dirnames, *, if symbol.funcname and symbol.funcname != UNKNOWN: raise NotImplementedError - (filename, funcname, vartype + (filename, funcname, decl ) = _find_symbol(symbol.name, filenames, _perfilecache) if filename == UNKNOWN: return None - return info.Variable( - id=(filename, funcname, symbol.name), - vartype=vartype, - ) + return info.Variable.from_parts(filename, funcname, symbol.name, decl) def get_resolver(knownvars=None, dirnames=None, *, @@ -144,6 +141,7 @@ def symbols_to_variables(symbols, *, #raise NotImplementedError(symbol) resolved = info.Variable( id=symbol.id, + storage=UNKNOWN, vartype=UNKNOWN, ) yield resolved |