diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-08-12 10:59:11 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-08-12 10:59:11 (GMT) |
commit | 320a1c0ff715b6c04034722393efe510973430ee (patch) | |
tree | 6f9d2e40433afd332a4a9e4c26f2136394ffc8ac /Lib/email/feedparser.py | |
parent | 6f2017076293f0e1ea807260434053a58be6667b (diff) | |
download | cpython-320a1c0ff715b6c04034722393efe510973430ee.zip cpython-320a1c0ff715b6c04034722393efe510973430ee.tar.gz cpython-320a1c0ff715b6c04034722393efe510973430ee.tar.bz2 |
Issue #21448: Fixed FeedParser feed() to avoid O(N**2) behavior when parsing long line.
Original patch by Raymond Hettinger.
Diffstat (limited to 'Lib/email/feedparser.py')
-rw-r--r-- | Lib/email/feedparser.py | 26 |
1 files changed, 18 insertions, 8 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 6cf9b91..0c3b572 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -50,8 +50,8 @@ class BufferedSubFile(object): simple abstraction -- it parses until EOF closes the current message. """ def __init__(self): - # The last partial line pushed into this object. - self._partial = '' + # Chunks of the last partial line pushed into this object. + self._partial = [] # The list of full, pushed lines, in reverse order self._lines = [] # The stack of false-EOF checking predicates. @@ -67,8 +67,8 @@ class BufferedSubFile(object): def close(self): # Don't forget any trailing partial line. - self._lines.append(self._partial) - self._partial = '' + self.pushlines(''.join(self._partial).splitlines(True)) + self._partial = [] self._closed = True def readline(self): @@ -96,16 +96,26 @@ class BufferedSubFile(object): def push(self, data): """Push some new data into this object.""" - # Handle any previous leftovers - data, self._partial = self._partial + data, '' # Crack into lines, but preserve the linesep characters on the end of each parts = data.splitlines(True) + + if not parts or not parts[0].endswith(('\n', '\r')): + # No new complete lines, so just accumulate partials + self._partial += parts + return + + if self._partial: + # If there are previous leftovers, complete them now + self._partial.append(parts[0]) + parts[0:1] = ''.join(self._partial).splitlines(True) + del self._partial[:] + # If the last element of the list does not end in a newline, then treat # it as a partial line. We only check for '\n' here because a line # ending with '\r' might be a line that was split in the middle of a # '\r\n' sequence (see bugs 1555570 and 1721862). - if parts and not parts[-1].endswith('\n'): - self._partial = parts.pop() + if not parts[-1].endswith('\n'): + self._partial = [parts.pop()] self.pushlines(parts) def pushlines(self, lines): |