diff options
author | R. David Murray <rdmurray@bitdance.com> | 2010-10-08 15:55:28 (GMT) |
---|---|---|
committer | R. David Murray <rdmurray@bitdance.com> | 2010-10-08 15:55:28 (GMT) |
commit | 96fd54eaec700cc50e5960f45ee79bc25c2c48c5 (patch) | |
tree | 4e4fc3f48d8957b6b0fccc372410e8374ce4fb70 /Lib/email/parser.py | |
parent | 59fdd6736bbf1ba14083a4bb777abaefc364f876 (diff) | |
download | cpython-96fd54eaec700cc50e5960f45ee79bc25c2c48c5.zip cpython-96fd54eaec700cc50e5960f45ee79bc25c2c48c5.tar.gz cpython-96fd54eaec700cc50e5960f45ee79bc25c2c48c5.tar.bz2 |
#4661: add bytes parsing and generation to email (email version bump to 5.1.0)
The work on this is not 100% complete, but everything is present to
allow real-world testing of the code. The only remaining major todo
item is to (hopefully!) enhance the handling of non-ASCII bytes in headers
converted to unicode by RFC2047 encoding them rather than replacing them with
'?'s.
Diffstat (limited to 'Lib/email/parser.py')
-rw-r--r-- | Lib/email/parser.py | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 06014e2..b83e0f7 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -7,7 +7,7 @@ __all__ = ['Parser', 'HeaderParser'] import warnings -from io import StringIO +from io import StringIO, TextIOWrapper from email.feedparser import FeedParser from email.message import Message @@ -89,3 +89,47 @@ class HeaderParser(Parser): def parsestr(self, text, headersonly=True): return Parser.parsestr(self, text, True) + + +class BytesParser: + + def __init__(self, *args, **kw): + """Parser of binary RFC 2822 and MIME email messages. + + Creates an in-memory object tree representing the email message, which + can then be manipulated and turned over to a Generator to return the + textual representation of the message. + + The input must be formatted as a block of RFC 2822 headers and header + continuation lines, optionally preceeded by a `Unix-from' header. The + header block is terminated either by the end of the input or by a + blank line. + + _class is the class to instantiate for new message objects when they + must be created. This class must have a constructor that can take + zero arguments. Default is Message.Message. + """ + self.parser = Parser(*args, **kw) + + def parse(self, fp, headersonly=False): + """Create a message structure from the data in a binary file. + + Reads all the data from the file and returns the root of the message + structure. Optional headersonly is a flag specifying whether to stop + parsing after reading the headers or not. The default is False, + meaning it parses the entire contents of the file. + """ + fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') + return self.parser.parse(fp, headersonly) + + + def parsebytes(self, text, headersonly=False): + """Create a message structure from a byte string. + + Returns the root of the message structure. Optional headersonly is a + flag specifying whether to stop parsing after reading the headers or + not. The default is False, meaning it parses the entire contents of + the file. + """ + text = text.decode('ASCII', errors='surrogateescape') + return self.parser.parsestr(text, headersonly) |