summaryrefslogtreecommitdiffstats
path: root/Lib/email/parser.py
diff options
context:
space:
mode:
authorR. David Murray <rdmurray@bitdance.com>2010-10-08 15:55:28 (GMT)
committerR. David Murray <rdmurray@bitdance.com>2010-10-08 15:55:28 (GMT)
commit96fd54eaec700cc50e5960f45ee79bc25c2c48c5 (patch)
tree4e4fc3f48d8957b6b0fccc372410e8374ce4fb70 /Lib/email/parser.py
parent59fdd6736bbf1ba14083a4bb777abaefc364f876 (diff)
downloadcpython-96fd54eaec700cc50e5960f45ee79bc25c2c48c5.zip
cpython-96fd54eaec700cc50e5960f45ee79bc25c2c48c5.tar.gz
cpython-96fd54eaec700cc50e5960f45ee79bc25c2c48c5.tar.bz2
#4661: add bytes parsing and generation to email (email version bump to 5.1.0)
The work on this is not 100% complete, but everything is present to allow real-world testing of the code. The only remaining major todo item is to (hopefully!) enhance the handling of non-ASCII bytes in headers converted to unicode by RFC2047 encoding them rather than replacing them with '?'s.
Diffstat (limited to 'Lib/email/parser.py')
-rw-r--r--Lib/email/parser.py46
1 files changed, 45 insertions, 1 deletions
diff --git a/Lib/email/parser.py b/Lib/email/parser.py
index 06014e2..b83e0f7 100644
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -7,7 +7,7 @@
__all__ = ['Parser', 'HeaderParser']
import warnings
-from io import StringIO
+from io import StringIO, TextIOWrapper
from email.feedparser import FeedParser
from email.message import Message
@@ -89,3 +89,47 @@ class HeaderParser(Parser):
def parsestr(self, text, headersonly=True):
return Parser.parsestr(self, text, True)
+
+
+class BytesParser:
+
+ def __init__(self, *args, **kw):
+ """Parser of binary RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The input must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceeded by a `Unix-from' header. The
+ header block is terminated either by the end of the input or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ self.parser = Parser(*args, **kw)
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a binary file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
+ return self.parser.parse(fp, headersonly)
+
+
+ def parsebytes(self, text, headersonly=False):
+ """Create a message structure from a byte string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ text = text.decode('ASCII', errors='surrogateescape')
+ return self.parser.parsestr(text, headersonly)