summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2017-05-05 07:46:47 (GMT)
committerGitHub <noreply@github.com>2017-05-05 07:46:47 (GMT)
commitef9c0e732fc50aefbdd7c5a80e04e14b31684e66 (patch)
treea59653d14c060ab0037012529ccc678ee8049b96 /Lib
parent7186cc29be352bed6f1110873283d073fd0643e4 (diff)
downloadcpython-ef9c0e732fc50aefbdd7c5a80e04e14b31684e66.zip
cpython-ef9c0e732fc50aefbdd7c5a80e04e14b31684e66.tar.gz
cpython-ef9c0e732fc50aefbdd7c5a80e04e14b31684e66.tar.bz2
bpo-30264: ExpatParser closes the source on error (#1451)
ExpatParser.parse() of xml.sax.xmlreader now always closes the source: close the file object or the urllib object if source is a string (not an open file-like object). The change fixes a ResourceWarning on parsing error. Add test_parse_close_source() unit test.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_sax.py24
-rw-r--r--Lib/xml/sax/expatreader.py33
2 files changed, 40 insertions, 17 deletions
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index 2411895..2eb6290 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -4,6 +4,7 @@
from xml.sax import make_parser, ContentHandler, \
SAXException, SAXReaderNotAvailable, SAXParseException
import unittest
+from unittest import mock
try:
make_parser()
except SAXReaderNotAvailable:
@@ -175,12 +176,8 @@ class ParseTest(unittest.TestCase):
with self.assertRaises(SAXException):
self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
make_xml_file(self.data, 'iso-8859-1', None)
- with support.check_warnings(('unclosed file', ResourceWarning)):
- # XXX Failed parser leaks an opened file.
- with self.assertRaises(SAXException):
- self.check_parse(TESTFN)
- # Collect leaked file.
- gc.collect()
+ with self.assertRaises(SAXException):
+ self.check_parse(TESTFN)
with open(TESTFN, 'rb') as f:
with self.assertRaises(SAXException):
self.check_parse(f)
@@ -194,6 +191,21 @@ class ParseTest(unittest.TestCase):
input.setEncoding('iso-8859-1')
self.check_parse(input)
+ def test_parse_close_source(self):
+ builtin_open = open
+ fileobj = None
+
+ def mock_open(*args):
+ nonlocal fileobj
+ fileobj = builtin_open(*args)
+ return fileobj
+
+ with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
+ make_xml_file(self.data, 'iso-8859-1', None)
+ with self.assertRaises(SAXException):
+ self.check_parse(TESTFN)
+ self.assertTrue(fileobj.closed)
+
def check_parseString(self, s):
from xml.sax import parseString
result = StringIO()
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index 98b5ca9..421358f 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -105,9 +105,16 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
source = saxutils.prepare_input_source(source)
self._source = source
- self.reset()
- self._cont_handler.setDocumentLocator(ExpatLocator(self))
- xmlreader.IncrementalParser.parse(self, source)
+ try:
+ self.reset()
+ self._cont_handler.setDocumentLocator(ExpatLocator(self))
+ xmlreader.IncrementalParser.parse(self, source)
+ except:
+ # bpo-30264: Close the source on error to not leak resources:
+ # xml.sax.parse() doesn't give access to the underlying parser
+ # to the caller
+ self._close_source()
+ raise
def prepareParser(self, source):
if source.getSystemId() is not None:
@@ -213,6 +220,17 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
# FIXME: when to invoke error()?
self._err_handler.fatalError(exc)
+ def _close_source(self):
+ source = self._source
+ try:
+ file = source.getCharacterStream()
+ if file is not None:
+ file.close()
+ finally:
+ file = source.getByteStream()
+ if file is not None:
+ file.close()
+
def close(self):
if (self._entity_stack or self._parser is None or
isinstance(self._parser, _ClosedParser)):
@@ -232,14 +250,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
parser.ErrorLineNumber = self._parser.ErrorLineNumber
self._parser = parser
- try:
- file = self._source.getCharacterStream()
- if file is not None:
- file.close()
- finally:
- file = self._source.getByteStream()
- if file is not None:
- file.close()
+ self._close_source()
def _reset_cont_handler(self):
self._parser.ProcessingInstructionHandler = \