summaryrefslogtreecommitdiffstats
path: root/Lib/email/feedparser.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-08-12 10:59:11 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-08-12 10:59:11 (GMT)
commit320a1c0ff715b6c04034722393efe510973430ee (patch)
tree6f9d2e40433afd332a4a9e4c26f2136394ffc8ac /Lib/email/feedparser.py
parent6f2017076293f0e1ea807260434053a58be6667b (diff)
downloadcpython-320a1c0ff715b6c04034722393efe510973430ee.zip
cpython-320a1c0ff715b6c04034722393efe510973430ee.tar.gz
cpython-320a1c0ff715b6c04034722393efe510973430ee.tar.bz2
Issue #21448: Fixed FeedParser feed() to avoid O(N**2) behavior when parsing long line.
Original patch by Raymond Hettinger.
Diffstat (limited to 'Lib/email/feedparser.py')
-rw-r--r--Lib/email/feedparser.py26
1 files changed, 18 insertions, 8 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index 6cf9b91..0c3b572 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -50,8 +50,8 @@ class BufferedSubFile(object):
simple abstraction -- it parses until EOF closes the current message.
"""
def __init__(self):
- # The last partial line pushed into this object.
- self._partial = ''
+ # Chunks of the last partial line pushed into this object.
+ self._partial = []
# The list of full, pushed lines, in reverse order
self._lines = []
# The stack of false-EOF checking predicates.
@@ -67,8 +67,8 @@ class BufferedSubFile(object):
def close(self):
# Don't forget any trailing partial line.
- self._lines.append(self._partial)
- self._partial = ''
+ self.pushlines(''.join(self._partial).splitlines(True))
+ self._partial = []
self._closed = True
def readline(self):
@@ -96,16 +96,26 @@ class BufferedSubFile(object):
def push(self, data):
"""Push some new data into this object."""
- # Handle any previous leftovers
- data, self._partial = self._partial + data, ''
# Crack into lines, but preserve the linesep characters on the end of each
parts = data.splitlines(True)
+
+ if not parts or not parts[0].endswith(('\n', '\r')):
+ # No new complete lines, so just accumulate partials
+ self._partial += parts
+ return
+
+ if self._partial:
+ # If there are previous leftovers, complete them now
+ self._partial.append(parts[0])
+ parts[0:1] = ''.join(self._partial).splitlines(True)
+ del self._partial[:]
+
# If the last element of the list does not end in a newline, then treat
# it as a partial line. We only check for '\n' here because a line
# ending with '\r' might be a line that was split in the middle of a
# '\r\n' sequence (see bugs 1555570 and 1721862).
- if parts and not parts[-1].endswith('\n'):
- self._partial = parts.pop()
+ if not parts[-1].endswith('\n'):
+ self._partial = [parts.pop()]
self.pushlines(parts)
def pushlines(self, lines):