diff options
Diffstat (limited to 'Lib/email/message.py')
-rw-r--r-- | Lib/email/message.py | 86 |
1 files changed, 51 insertions, 35 deletions
diff --git a/Lib/email/message.py b/Lib/email/message.py index f1ffcdb..3feab52 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -10,14 +10,14 @@ import re import uu import base64 import binascii -import warnings from io import BytesIO, StringIO # Intrapackage imports from email import utils from email import errors -from email import header +from email._policybase import compat32 from email import charset as _charset +from email._encoded_words import decode_b Charset = _charset.Charset SEMISPACE = '; ' @@ -26,24 +26,6 @@ SEMISPACE = '; ' # existence of which force quoting of the parameter value. tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') -# How to figure out if we are processing strings that come from a byte -# source with undecodable characters. -_has_surrogates = re.compile( - '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search - - -# Helper functions -def _sanitize_header(name, value): - # If the header value contains surrogates, return a Header using - # the unknown-8bit charset to encode the bytes as encoded words. - if not isinstance(value, str): - # Assume it is already a header object - return value - if _has_surrogates(value): - return header.Header(value, charset=_charset.UNKNOWN8BIT, - header_name=name) - else: - return value def _splitparam(param): # Split header parameters. BAW: this may be too simple. It isn't @@ -136,7 +118,8 @@ class Message: you must use the explicit API to set or get all the headers. Not all of the mapping methods are implemented. """ - def __init__(self): + def __init__(self, policy=compat32): + self.policy = policy self._headers = [] self._unixfrom = None self._payload = None @@ -246,7 +229,7 @@ class Message: cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. if isinstance(payload, str): - if _has_surrogates(payload): + if utils._has_surrogates(payload): bpayload = payload.encode('ascii', 'surrogateescape') if not decode: try: @@ -267,11 +250,12 @@ class Message: if cte == 'quoted-printable': return utils._qdecode(bpayload) elif cte == 'base64': - try: - return base64.b64decode(bpayload) - except binascii.Error: - # Incorrect padding - return bpayload + # XXX: this is a bit of a hack; decode_b should probably be factored + # out somewhere, but I haven't figured out where yet. + value, defects = decode_b(b''.join(bpayload.splitlines())) + for defect in defects: + self.policy.handle_defect(self, defect) + return value elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): in_file = BytesIO(bpayload) out_file = BytesIO() @@ -362,7 +346,17 @@ class Message: Note: this does not overwrite an existing header with the same field name. Use __delitem__() first to delete any existing headers. """ - self._headers.append((name, val)) + max_count = self.policy.header_max_count(name) + if max_count: + lname = name.lower() + found = 0 + for k, v in self._headers: + if k.lower() == lname: + found += 1 + if found >= max_count: + raise ValueError("There may be at most {} {} headers " + "in a message".format(max_count, name)) + self._headers.append(self.policy.header_store_parse(name, val)) def __delitem__(self, name): """Delete all occurrences of a header, if present. @@ -401,7 +395,8 @@ class Message: Any fields deleted and re-inserted are always appended to the header list. """ - return [_sanitize_header(k, v) for k, v in self._headers] + return [self.policy.header_fetch_parse(k, v) + for k, v in self._headers] def items(self): """Get all the message's header fields and values. @@ -411,7 +406,8 @@ class Message: Any fields deleted and re-inserted are always appended to the header list. """ - return [(k, _sanitize_header(k, v)) for k, v in self._headers] + return [(k, self.policy.header_fetch_parse(k, v)) + for k, v in self._headers] def get(self, name, failobj=None): """Get a header value. @@ -422,10 +418,29 @@ class Message: name = name.lower() for k, v in self._headers: if k.lower() == name: - return _sanitize_header(k, v) + return self.policy.header_fetch_parse(k, v) return failobj # + # "Internal" methods (public API, but only intended for use by a parser + # or generator, not normal application code. + # + + def set_raw(self, name, value): + """Store name and value in the model without modification. + + This is an "internal" API, intended only for use by a parser. + """ + self._headers.append((name, value)) + + def raw_items(self): + """Return the (name, value) header pairs without modification. + + This is an "internal" API, intended only for use by a generator. + """ + return iter(self._headers.copy()) + + # # Additional useful stuff # @@ -442,7 +457,7 @@ class Message: name = name.lower() for k, v in self._headers: if k.lower() == name: - values.append(_sanitize_header(k, v)) + values.append(self.policy.header_fetch_parse(k, v)) if not values: return failobj return values @@ -475,7 +490,7 @@ class Message: parts.append(_formatparam(k.replace('_', '-'), v)) if _value is not None: parts.insert(0, _value) - self._headers.append((_name, SEMISPACE.join(parts))) + self[_name] = SEMISPACE.join(parts) def replace_header(self, _name, _value): """Replace a header. @@ -487,7 +502,7 @@ class Message: _name = _name.lower() for i, (k, v) in zip(range(len(self._headers)), self._headers): if k.lower() == _name: - self._headers[i] = (k, _value) + self._headers[i] = self.policy.header_store_parse(k, _value) break else: raise KeyError(_name) @@ -803,7 +818,8 @@ class Message: parts.append(k) else: parts.append('%s=%s' % (k, v)) - newheaders.append((h, SEMISPACE.join(parts))) + val = SEMISPACE.join(parts) + newheaders.append(self.policy.header_store_parse(h, val)) else: newheaders.append((h, v)) |