#14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface methods to Policy that allow for Policy to control the parsing and folding of headers in such a way that we can construct a backward compatibility policy that is 100% compatible with the 3.2 API, while allowing a new policy to implement the email6 API. (2) it adds that backward compatibility policy and refactors the test suite so that the only differences between the 3.2 test_email.py file and the 3.3 test_email.py file is some small changes in test framework and the addition of tests for bugs fixed that apply to the 3.2 API. There are some additional teaks, such as moving just the code needed for the compatibility policy into _policybase, so that the library code can import only _policybase. That way the new code that will be added for email6 will only get imported when a non-compatibility policy is imported.
author: R David Murray <rdmurray@bitdance.com> 2012-05-25 19:01:48 (GMT)
committer: R David Murray <rdmurray@bitdance.com> 2012-05-25 19:01:48 (GMT)
commit: c27e52265b7ff4aa57dc357c289cce8c9dd0fec3 (patch)
tree: b2a25260b0aa89d0a4db3c0d2f91c8cb5e68d51a /Lib/email/_policybase.py
parent: 9242c1378f77214f5b9b90149861cb13ca986fb0 (diff)
download: cpython-c27e52265b7ff4aa57dc357c289cce8c9dd0fec3.zip
cpython-c27e52265b7ff4aa57dc357c289cce8c9dd0fec3.tar.gz
cpython-c27e52265b7ff4aa57dc357c289cce8c9dd0fec3.tar.bz2
1 files changed, 338 insertions, 0 deletions
diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py
new file mode 100644
index 0000000..05736d0
--- /dev/null
+++ b/Lib/email/_policybase.py
@@ -0,0 +1,338 @@
+"""Policy framework for the email package.
+
+Allows fine grained feature control of how the package parses and emits data.
+"""
+
+import abc
+from email import header
+from email import charset as _charset
+from email.utils import _has_surrogates
+
+__all__ = [
+    'Policy',
+    'Compat32',
+    'compat32',
+    ]
+
+
+class _PolicyBase:
+
+    """Policy Object basic framework.
+
+    This class is useless unless subclassed.  A subclass should define
+    class attributes with defaults for any values that are to be
+    managed by the Policy object.  The constructor will then allow
+    non-default values to be set for these attributes at instance
+    creation time.  The instance will be callable, taking these same
+    attributes keyword arguments, and returning a new instance
+    identical to the called instance except for those values changed
+    by the keyword arguments.  Instances may be added, yielding new
+    instances with any non-default values from the right hand
+    operand overriding those in the left hand operand.  That is,
+
+        A + B == A(<non-default values of B>)
+
+    The repr of an instance can be used to reconstruct the object
+    if and only if the repr of the values can be used to reconstruct
+    those values.
+
+    """
+
+    def __init__(self, **kw):
+        """Create new Policy, possibly overriding some defaults.
+
+        See class docstring for a list of overridable attributes.
+
+        """
+        for name, value in kw.items():
+            if hasattr(self, name):
+                super(_PolicyBase,self).__setattr__(name, value)
+            else:
+                raise TypeError(
+                    "{!r} is an invalid keyword argument for {}".format(
+                        name, self.__class__.__name__))
+
+    def __repr__(self):
+        args = [ "{}={!r}".format(name, value)
+                 for name, value in self.__dict__.items() ]
+        return "{}({})".format(self.__class__.__name__, ', '.join(args))
+
+    def clone(self, **kw):
+        """Return a new instance with specified attributes changed.
+
+        The new instance has the same attribute values as the current object,
+        except for the changes passed in as keyword arguments.
+
+        """
+        for attr, value in self.__dict__.items():
+            if attr not in kw:
+                kw[attr] = value
+        return self.__class__(**kw)
+
+    def __setattr__(self, name, value):
+        if hasattr(self, name):
+            msg = "{!r} object attribute {!r} is read-only"
+        else:
+            msg = "{!r} object has no attribute {!r}"
+        raise AttributeError(msg.format(self.__class__.__name__, name))
+
+    def __add__(self, other):
+        """Non-default values from right operand override those from left.
+
+        The object returned is a new instance of the subclass.
+
+        """
+        return self.clone(**other.__dict__)
+
+
+# Conceptually this isn't a subclass of ABCMeta, but since we want Policy to
+# use ABCMeta as a metaclass *and* we want it to use this one as well, we have
+# to make this one a subclas of ABCMeta.
+class _DocstringExtenderMetaclass(abc.ABCMeta):
+
+    def __new__(meta, classname, bases, classdict):
+        if classdict.get('__doc__') and classdict['__doc__'].startswith('+'):
+            classdict['__doc__'] = meta._append_doc(bases[0].__doc__,
+                                                    classdict['__doc__'])
+        for name, attr in classdict.items():
+            if attr.__doc__ and attr.__doc__.startswith('+'):
+                for cls in (cls for base in bases for cls in base.mro()):
+                    doc = getattr(getattr(cls, name), '__doc__')
+                    if doc:
+                        attr.__doc__ = meta._append_doc(doc, attr.__doc__)
+                        break
+        return super().__new__(meta, classname, bases, classdict)
+
+    @staticmethod
+    def _append_doc(doc, added_doc):
+        added_doc = added_doc.split('\n', 1)[1]
+        return doc + '\n' + added_doc
+
+
+class Policy(_PolicyBase, metaclass=_DocstringExtenderMetaclass):
+
+    r"""Controls for how messages are interpreted and formatted.
+
+    Most of the classes and many of the methods in the email package accept
+    Policy objects as parameters.  A Policy object contains a set of values and
+    functions that control how input is interpreted and how output is rendered.
+    For example, the parameter 'raise_on_defect' controls whether or not an RFC
+    violation results in an error being raised or not, while 'max_line_length'
+    controls the maximum length of output lines when a Message is serialized.
+
+    Any valid attribute may be overridden when a Policy is created by passing
+    it as a keyword argument to the constructor.  Policy objects are immutable,
+    but a new Policy object can be created with only certain values changed by
+    calling the Policy instance with keyword arguments.  Policy objects can
+    also be added, producing a new Policy object in which the non-default
+    attributes set in the right hand operand overwrite those specified in the
+    left operand.
+
+    Settable attributes:
+
+    raise_on_defect     -- If true, then defects should be raised as errors.
+                           Default: False.
+
+    linesep             -- string containing the value to use as separation
+                           between output lines.  Default '\n'.
+
+    cte_type            -- Type of allowed content transfer encodings
+
+                           7bit  -- ASCII only
+                           8bit  -- Content-Transfer-Encoding: 8bit is allowed
+
+                           Default: 8bit.  Also controls the disposition of
+                           (RFC invalid) binary data in headers; see the
+                           documentation of the binary_fold method.
+
+    max_line_length     -- maximum length of lines, excluding 'linesep',
+                           during serialization.  None or 0 means no line
+                           wrapping is done.  Default is 78.
+
+    """
+
+    raise_on_defect = False
+    linesep = '\n'
+    cte_type = '8bit'
+    max_line_length = 78
+
+    def handle_defect(self, obj, defect):
+        """Based on policy, either raise defect or call register_defect.
+
+            handle_defect(obj, defect)
+
+        defect should be a Defect subclass, but in any case must be an
+        Exception subclass.  obj is the object on which the defect should be
+        registered if it is not raised.  If the raise_on_defect is True, the
+        defect is raised as an error, otherwise the object and the defect are
+        passed to register_defect.
+
+        This method is intended to be called by parsers that discover defects.
+        The email package parsers always call it with Defect instances.
+
+        """
+        if self.raise_on_defect:
+            raise defect
+        self.register_defect(obj, defect)
+
+    def register_defect(self, obj, defect):
+        """Record 'defect' on 'obj'.
+
+        Called by handle_defect if raise_on_defect is False.  This method is
+        part of the Policy API so that Policy subclasses can implement custom
+        defect handling.  The default implementation calls the append method of
+        the defects attribute of obj.  The objects used by the email package by
+        default that get passed to this method will always have a defects
+        attribute with an append method.
+
+        """
+        obj.defects.append(defect)
+
+    @abc.abstractmethod
+    def header_source_parse(self, sourcelines):
+        """Given a list of linesep terminated strings constituting the lines of
+        a single header, return the (name, value) tuple that should be stored
+        in the model.  The input lines should retain their terminating linesep
+        characters.  The lines passed in by the email package may contain
+        surrogateescaped binary data.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def header_store_parse(self, name, value):
+        """Given the header name and the value provided by the application
+        program, return the (name, value) that should be stored in the model.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def header_fetch_parse(self, name, value):
+        """Given the header name and the value from the model, return the value
+        to be returned to the application program that is requesting that
+        header.  The value passed in by the email package may contain
+        surrogateescaped binary data if the lines were parsed by a BytesParser.
+        The returned value should not contain any surrogateescaped data.
+
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def fold(self, name, value):
+        """Given the header name and the value from the model, return a string
+        containing linesep characters that implement the folding of the header
+        according to the policy controls.  The value passed in by the email
+        package may contain surrogateescaped binary data if the lines were
+        parsed by a BytesParser.  The returned value should not contain any
+        surrogateescaped data.
+
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def fold_binary(self, name, value):
+        """Given the header name and the value from the model, return binary
+        data containing linesep characters that implement the folding of the
+        header according to the policy controls.  The value passed in by the
+        email package may contain surrogateescaped binary data.
+
+        """
+        raise NotImplementedError
+
+
+class Compat32(Policy):
+
+    """+
+    This particular policy is the backward compatibility Policy.  It
+    replicates the behavior of the email package version 5.1.
+    """
+
+    def _sanitize_header(self, name, value):
+        # If the header value contains surrogates, return a Header using
+        # the unknown-8bit charset to encode the bytes as encoded words.
+        if not isinstance(value, str):
+            # Assume it is already a header object
+            return value
+        if _has_surrogates(value):
+            return header.Header(value, charset=_charset.UNKNOWN8BIT,
+                                 header_name=name)
+        else:
+            return value
+
+    def header_source_parse(self, sourcelines):
+        """+
+        The name is parsed as everything up to the ':' and returned unmodified.
+        The value is determined by stripping leading whitespace off the
+        remainder of the first line, joining all subsequent lines together, and
+        stripping any trailing carriage return or linefeed characters.
+
+        """
+        name, value = sourcelines[0].split(':', 1)
+        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+        return (name, value.rstrip('\r\n'))
+
+    def header_store_parse(self, name, value):
+        """+
+        The name and value are returned unmodified.
+        """
+        return (name, value)
+
+    def header_fetch_parse(self, name, value):
+        """+
+        If the value contains binary data, it is converted into a Header object
+        using the unknown-8bit charset.  Otherwise it is returned unmodified.
+        """
+        return self._sanitize_header(name, value)
+
+    def fold(self, name, value):
+        """+
+        Headers are folded using the Header folding algorithm, which preserves
+        existing line breaks in the value, and wraps each resulting line to the
+        max_line_length.  Non-ASCII binary data are CTE encoded using the
+        unknown-8bit charset.
+
+        """
+        return self._fold(name, value, sanitize=True)
+
+    def fold_binary(self, name, value):
+        """+
+        Headers are folded using the Header folding algorithm, which preserves
+        existing line breaks in the value, and wraps each resulting line to the
+        max_line_length.  If cte_type is 7bit, non-ascii binary data is CTE
+        encoded using the unknown-8bit charset.  Otherwise the original source
+        header is used, with its existing line breaks and/or binary data.
+
+        """
+        folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
+        return folded.encode('ascii', 'surrogateescape')
+
+    def _fold(self, name, value, sanitize):
+        parts = []
+        parts.append('%s: ' % name)
+        if isinstance(value, str):
+            if _has_surrogates(value):
+                if sanitize:
+                    h = header.Header(value,
+                                      charset=_charset.UNKNOWN8BIT,
+                                      header_name=name)
+                else:
+                    # If we have raw 8bit data in a byte string, we have no idea
+                    # what the encoding is.  There is no safe way to split this
+                    # string.  If it's ascii-subset, then we could do a normal
+                    # ascii split, but if it's multibyte then we could break the
+                    # string.  There's no way to know so the least harm seems to
+                    # be to not split the string and risk it being too long.
+                    parts.append(value)
+                    h = None
+            else:
+                h = header.Header(value, header_name=name)
+        else:
+            # Assume it is a Header-like object.
+            h = value
+        if h is not None:
+            parts.append(h.encode(linesep=self.linesep,
+                                  maxlinelen=self.max_line_length))
+        parts.append(self.linesep)
+        return ''.join(parts)
+
+
+compat32 = Compat32()
author	R David Murray <rdmurray@bitdance.com>	2012-05-25 19:01:48 (GMT)
committer	R David Murray <rdmurray@bitdance.com>	2012-05-25 19:01:48 (GMT)
commit	c27e52265b7ff4aa57dc357c289cce8c9dd0fec3 (patch)
tree	b2a25260b0aa89d0a4db3c0d2f91c8cb5e68d51a /Lib/email/_policybase.py
parent	9242c1378f77214f5b9b90149861cb13ca986fb0 (diff)
download	cpython-c27e52265b7ff4aa57dc357c289cce8c9dd0fec3.zip cpython-c27e52265b7ff4aa57dc357c289cce8c9dd0fec3.tar.gz cpython-c27e52265b7ff4aa57dc357c289cce8c9dd0fec3.tar.bz2