1 files changed, 286 insertions, 443 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index b4737c8..b34c58b 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -97,96 +97,14 @@ def quote_string(value):
     return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
 
 #
-# Accumulator for header folding
-#
-
-class _Folded:
-
-    def __init__(self, maxlen, policy):
-        self.maxlen = maxlen
-        self.policy = policy
-        self.lastlen = 0
-        self.stickyspace = None
-        self.firstline = True
-        self.done = []
-        self.current = []
-
-    def newline(self):
-        self.done.extend(self.current)
-        self.done.append(self.policy.linesep)
-        self.current.clear()
-        self.lastlen = 0
-
-    def finalize(self):
-        if self.current:
-            self.newline()
-
-    def __str__(self):
-        return ''.join(self.done)
-
-    def append(self, stoken):
-        self.current.append(stoken)
-
-    def append_if_fits(self, token, stoken=None):
-        if stoken is None:
-            stoken = str(token)
-        l = len(stoken)
-        if self.stickyspace is not None:
-            stickyspace_len = len(self.stickyspace)
-            if self.lastlen + stickyspace_len + l <= self.maxlen:
-                self.current.append(self.stickyspace)
-                self.lastlen += stickyspace_len
-                self.current.append(stoken)
-                self.lastlen += l
-                self.stickyspace = None
-                self.firstline = False
-                return True
-            if token.has_fws:
-                ws = token.pop_leading_fws()
-                if ws is not None:
-                    self.stickyspace += str(ws)
-                    stickyspace_len += len(ws)
-                token._fold(self)
-                return True
-            if stickyspace_len and l + 1 <= self.maxlen:
-                margin = self.maxlen - l
-                if 0 < margin < stickyspace_len:
-                    trim = stickyspace_len - margin
-                    self.current.append(self.stickyspace[:trim])
-                    self.stickyspace = self.stickyspace[trim:]
-                    stickyspace_len = trim
-                self.newline()
-                self.current.append(self.stickyspace)
-                self.current.append(stoken)
-                self.lastlen = l + stickyspace_len
-                self.stickyspace = None
-                self.firstline = False
-                return True
-            if not self.firstline:
-                self.newline()
-            self.current.append(self.stickyspace)
-            self.current.append(stoken)
-            self.stickyspace = None
-            self.firstline = False
-            return True
-        if self.lastlen + l <= self.maxlen:
-            self.current.append(stoken)
-            self.lastlen += l
-            return True
-        if l < self.maxlen:
-            self.newline()
-            self.current.append(stoken)
-            self.lastlen = l
-            return True
-        return False
-
-#
 # TokenList and its subclasses
 #
 
 class TokenList(list):
 
     token_type = None
+    syntactic_break = True
+    ew_combine_allowed = True
 
     def __init__(self, *args, **kw):
         super().__init__(*args, **kw)
@@ -207,84 +125,13 @@ class TokenList(list):
     def all_defects(self):
         return sum((x.all_defects for x in self), self.defects)
 
-    #
-    # Folding API
-    #
-    # parts():
-    #
-    # return a list of objects that constitute the "higher level syntactic
-    # objects" specified by the RFC as the best places to fold a header line.
-    # The returned objects must include leading folding white space, even if
-    # this means mutating the underlying parse tree of the object.  Each object
-    # is only responsible for returning *its* parts, and should not drill down
-    # to any lower level except as required to meet the leading folding white
-    # space constraint.
-    #
-    # _fold(folded):
-    #
-    #   folded: the result accumulator.  This is an instance of _Folded.
-    #       (XXX: I haven't finished factoring this out yet, the folding code
-    #       pretty much uses this as a state object.) When the folded.current
-    #       contains as much text as will fit, the _fold method should call
-    #       folded.newline.
-    #  folded.lastlen: the current length of the test stored in folded.current.
-    #  folded.maxlen: The maximum number of characters that may appear on a
-    #       folded line.  Differs from the policy setting in that "no limit" is
-    #       represented by +inf, which means it can be used in the trivially
-    #       logical fashion in comparisons.
-    #
-    # Currently no subclasses implement parts, and I think this will remain
-    # true.  A subclass only needs to implement _fold when the generic version
-    # isn't sufficient.  _fold will need to be implemented primarily when it is
-    # possible for encoded words to appear in the specialized token-list, since
-    # there is no generic algorithm that can know where exactly the encoded
-    # words are allowed.  A _fold implementation is responsible for filling
-    # lines in the same general way that the top level _fold does. It may, and
-    # should, call the _fold method of sub-objects in a similar fashion to that
-    # of the top level _fold.
-    #
-    # XXX: I'm hoping it will be possible to factor the existing code further
-    # to reduce redundancy and make the logic clearer.
-
-    @property
-    def parts(self):
-        klass = self.__class__
-        this = []
-        for token in self:
-            if token.startswith_fws():
-                if this:
-                    yield this[0] if len(this)==1 else klass(this)
-                    this.clear()
-            end_ws = token.pop_trailing_ws()
-            this.append(token)
-            if end_ws:
-                yield klass(this)
-                this = [end_ws]
-        if this:
-            yield this[0] if len(this)==1 else klass(this)
-
     def startswith_fws(self):
         return self[0].startswith_fws()
 
-    def pop_leading_fws(self):
-        if self[0].token_type == 'fws':
-            return self.pop(0)
-        return self[0].pop_leading_fws()
-
-    def pop_trailing_ws(self):
-        if self[-1].token_type == 'cfws':
-            return self.pop(-1)
-        return self[-1].pop_trailing_ws()
-
     @property
-    def has_fws(self):
-        for part in self:
-            if part.has_fws:
-                return True
-        return False
-
-    def has_leading_comment(self):
-        return self[0].has_leading_comment()
+    def as_ew_allowed(self):
+        """True if all top level tokens of this part may be RFC2047 encoded."""
+        return all(part.as_ew_allowed for part in self)
 
     @property
     def comments(self):
@@ -294,69 +141,13 @@ class TokenList(list):
         return comments
 
     def fold(self, *, policy):
-        # max_line_length 0/None means no limit, ie: infinitely long.
-        maxlen = policy.max_line_length or float("+inf")
-        folded = _Folded(maxlen, policy)
-        self._fold(folded)
-        folded.finalize()
-        return str(folded)
-
-    def as_encoded_word(self, charset):
-        # This works only for things returned by 'parts', which include
-        # the leading fws, if any, that should be used.
-        res = []
-        ws = self.pop_leading_fws()
-        if ws:
-            res.append(ws)
-        trailer = self.pop(-1) if self[-1].token_type=='fws' else ''
-        res.append(_ew.encode(str(self), charset))
-        res.append(trailer)
-        return ''.join(res)
-
-    def cte_encode(self, charset, policy):
-        res = []
-        for part in self:
-            res.append(part.cte_encode(charset, policy))
-        return ''.join(res)
-
-    def _fold(self, folded):
-        encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
-        for part in self.parts:
-            tstr = str(part)
-            tlen = len(tstr)
-            try:
-                str(part).encode(encoding)
-            except UnicodeEncodeError:
-                if any(isinstance(x, errors.UndecodableBytesDefect)
-                        for x in part.all_defects):
-                    charset = 'unknown-8bit'
-                else:
-                    # XXX: this should be a policy setting when utf8 is False.
-                    charset = 'utf-8'
-                tstr = part.cte_encode(charset, folded.policy)
-                tlen = len(tstr)
-            if folded.append_if_fits(part, tstr):
-                continue
-            # Peel off the leading whitespace if any and make it sticky, to
-            # avoid infinite recursion.
-            ws = part.pop_leading_fws()
-            if ws is not None:
-                folded.stickyspace = str(ws)
-                if folded.append_if_fits(part):
-                    continue
-            if part.has_fws:
-                part._fold(folded)
-                continue
-            # There are no fold points in this one; it is too long for a single
-            # line and can't be split...we just have to put it on its own line.
-            folded.append(tstr)
-            folded.newline()
+        return _refold_parse_tree(self, policy=policy)
 
     def pprint(self, indent=''):
-        print('\n'.join(self._pp(indent='')))
+        print(self.ppstr(indent=indent))
 
     def ppstr(self, indent=''):
-        return '\n'.join(self._pp(indent=''))
+        return '\n'.join(self._pp(indent=indent))
 
     def _pp(self, indent=''):
         yield '{}{}/{}('.format(
@@ -391,173 +182,11 @@ class UnstructuredTokenList(TokenList):
 
     token_type = 'unstructured'
 
-    def _fold(self, folded):
-        last_ew = None
-        encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
-        for part in self.parts:
-            tstr = str(part)
-            is_ew = False
-            try:
-                str(part).encode(encoding)
-            except UnicodeEncodeError:
-                if any(isinstance(x, errors.UndecodableBytesDefect)
-                       for x in part.all_defects):
-                    charset = 'unknown-8bit'
-                else:
-                    charset = 'utf-8'
-                if last_ew is not None:
-                    # We've already done an EW, combine this one with it
-                    # if there's room.
-                    chunk = get_unstructured(
-                        ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
-                    oldlastlen = sum(len(x) for x in folded.current[:last_ew])
-                    schunk = str(chunk)
-                    lchunk = len(schunk)
-                    if oldlastlen + lchunk <= folded.maxlen:
-                        del folded.current[last_ew:]
-                        folded.append(schunk)
-                        folded.lastlen = oldlastlen + lchunk
-                        continue
-                tstr = part.as_encoded_word(charset)
-                is_ew = True
-            if folded.append_if_fits(part, tstr):
-                if is_ew:
-                    last_ew = len(folded.current) - 1
-                continue
-            if is_ew or last_ew:
-                # It's too big to fit on the line, but since we've
-                # got encoded words we can use encoded word folding.
-                part._fold_as_ew(folded)
-                continue
-            # Peel off the leading whitespace if any and make it sticky, to
-            # avoid infinite recursion.
-            ws = part.pop_leading_fws()
-            if ws is not None:
-                folded.stickyspace = str(ws)
-                if folded.append_if_fits(part):
-                    continue
-            if part.has_fws:
-                part._fold(folded)
-                continue
-            # It can't be split...we just have to put it on its own line.
-            folded.append(tstr)
-            folded.newline()
-            last_ew = None
-
-    def cte_encode(self, charset, policy):
-        res = []
-        last_ew = None
-        for part in self:
-            spart = str(part)
-            try:
-                spart.encode('us-ascii')
-                res.append(spart)
-            except UnicodeEncodeError:
-                if last_ew is None:
-                    res.append(part.cte_encode(charset, policy))
-                    last_ew = len(res)
-                else:
-                    tl = get_unstructured(''.join(res[last_ew:] + [spart]))
-                    res.append(tl.as_encoded_word(charset))
-        return ''.join(res)
-
 
 class Phrase(TokenList):
 
     token_type = 'phrase'
 
-    def _fold(self, folded):
-        # As with Unstructured, we can have pure ASCII with or without
-        # surrogateescape encoded bytes, or we could have unicode.  But this
-        # case is more complicated, since we have to deal with the various
-        # sub-token types and how they can be composed in the face of
-        # unicode-that-needs-CTE-encoding, and the fact that if a token a
-        # comment that becomes a barrier across which we can't compose encoded
-        # words.
-        last_ew = None
-        encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
-        for part in self.parts:
-            tstr = str(part)
-            tlen = len(tstr)
-            has_ew = False
-            try:
-                str(part).encode(encoding)
-            except UnicodeEncodeError:
-                if any(isinstance(x, errors.UndecodableBytesDefect)
-                        for x in part.all_defects):
-                    charset = 'unknown-8bit'
-                else:
-                    charset = 'utf-8'
-                if last_ew is not None and not part.has_leading_comment():
-                    # We've already done an EW, let's see if we can combine
-                    # this one with it.  The last_ew logic ensures that all we
-                    # have at this point is atoms, no comments or quoted
-                    # strings.  So we can treat the text between the last
-                    # encoded word and the content of this token as
-                    # unstructured text, and things will work correctly.  But
-                    # we have to strip off any trailing comment on this token
-                    # first, and if it is a quoted string we have to pull out
-                    # the content (we're encoding it, so it no longer needs to
-                    # be quoted).
-                    if part[-1].token_type == 'cfws' and part.comments:
-                        remainder = part.pop(-1)
-                    else:
-                        remainder = ''
-                    for i, token in enumerate(part):
-                        if token.token_type == 'bare-quoted-string':
-                            part[i] = UnstructuredTokenList(token[:])
-                    chunk = get_unstructured(
-                        ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
-                    schunk = str(chunk)
-                    lchunk = len(schunk)
-                    if last_ew + lchunk <= folded.maxlen:
-                        del folded.current[last_ew:]
-                        folded.append(schunk)
-                        folded.lastlen = sum(len(x) for x in folded.current)
-                        continue
-                tstr = part.as_encoded_word(charset)
-                tlen = len(tstr)
-                has_ew = True
-            if folded.append_if_fits(part, tstr):
-                if has_ew and not part.comments:
-                    last_ew = len(folded.current) - 1
-                elif part.comments or part.token_type == 'quoted-string':
-                    # If a comment is involved we can't combine EWs.  And if a
-                    # quoted string is involved, it's not worth the effort to
-                    # try to combine them.
-                    last_ew = None
-                continue
-            part._fold(folded)
-
-    def cte_encode(self, charset, policy):
-        res = []
-        last_ew = None
-        is_ew = False
-        for part in self:
-            spart = str(part)
-            try:
-                spart.encode('us-ascii')
-                res.append(spart)
-            except UnicodeEncodeError:
-                is_ew = True
-                if last_ew is None:
-                    if not part.comments:
-                        last_ew = len(res)
-                    res.append(part.cte_encode(charset, policy))
-                elif not part.has_leading_comment():
-                    if part[-1].token_type == 'cfws' and part.comments:
-                        remainder = part.pop(-1)
-                    else:
-                        remainder = ''
-                    for i, token in enumerate(part):
-                        if token.token_type == 'bare-quoted-string':
-                            part[i] = UnstructuredTokenList(token[:])
-                    tl = get_unstructured(''.join(res[last_ew:] + [spart]))
-                    res[last_ew:] = [tl.as_encoded_word(charset)]
-            if part.comments or (not is_ew and part.token_type == 'quoted-string'):
-                last_ew = None
-        return ''.join(res)
-
 class Word(TokenList):
 
     token_type = 'word'
@@ -567,9 +196,6 @@ class CFWSList(WhiteSpaceTokenList):
 
     token_type = 'cfws'
 
-    def has_leading_comment(self):
-        return bool(self.comments)
-
 
 class Atom(TokenList):
 
@@ -579,6 +205,7 @@ class Atom(TokenList):
 class Token(TokenList):
 
     token_type = 'token'
+    encode_as_ew = False
 
 
 class EncodedWord(TokenList):
@@ -588,13 +215,6 @@ class EncodedWord(TokenList):
     charset = None
     lang = None
 
-    @property
-    def encoded(self):
-        if self.cte is not None:
-            return self.cte
-        _ew.encode(str(self), self.charset)
-
-
 
 class QuotedString(TokenList):
 
@@ -865,6 +485,7 @@ class InvalidMailbox(TokenList):
 class Domain(TokenList):
 
     token_type = 'domain'
+    as_ew_allowed = False
 
     @property
     def domain(self):
@@ -879,11 +500,13 @@ class DotAtom(TokenList):
 class DotAtomText(TokenList):
 
     token_type = 'dot-atom-text'
+    as_ew_allowed = True
 
 
 class AddrSpec(TokenList):
 
     token_type = 'addr-spec'
+    as_ew_allowed = False
 
     @property
     def local_part(self):
@@ -916,11 +539,13 @@ class AddrSpec(TokenList):
 class ObsLocalPart(TokenList):
 
     token_type = 'obs-local-part'
+    as_ew_allowed = False
 
 
 class DisplayName(Phrase):
 
     token_type = 'display-name'
+    ew_combine_allowed = False
 
     @property
     def display_name(self):
@@ -960,6 +585,7 @@ class DisplayName(Phrase):
 class LocalPart(TokenList):
 
     token_type = 'local-part'
+    as_ew_allowed = False
 
     @property
     def value(self):
@@ -995,6 +621,7 @@ class LocalPart(TokenList):
 class DomainLiteral(TokenList):
 
     token_type = 'domain-literal'
+    as_ew_allowed = False
 
     @property
     def domain(self):
@@ -1081,6 +708,7 @@ class Value(TokenList):
 class MimeParameters(TokenList):
 
     token_type = 'mime-parameters'
+    syntactic_break = False
 
     @property
     def params(self):
@@ -1165,6 +793,10 @@ class MimeParameters(TokenList):
 
 class ParameterizedHeaderValue(TokenList):
 
+    # Set this false so that the value doesn't wind up on a new line even
+    # if it and the parameters would fit there but not on the first line.
+    syntactic_break = False
+
     @property
     def params(self):
         for token in reversed(self):
@@ -1172,18 +804,11 @@ class ParameterizedHeaderValue(TokenList):
                 return token.params
         return {}
 
-    @property
-    def parts(self):
-        if self and self[-1].token_type == 'mime-parameters':
-            # We don't want to start a new line if all of the params don't fit
-            # after the value, so unwrap the parameter list.
-            return TokenList(self[:-1] + self[-1])
-        return TokenList(self).parts
-
 
 class ContentType(ParameterizedHeaderValue):
 
     token_type = 'content-type'
+    as_ew_allowed = False
     maintype = 'text'
     subtype = 'plain'
 
@@ -1191,40 +816,27 @@ class ContentType(ParameterizedHeaderValue):
 class ContentDisposition(ParameterizedHeaderValue):
 
     token_type = 'content-disposition'
+    as_ew_allowed = False
     content_disposition = None
 
 
 class ContentTransferEncoding(TokenList):
 
     token_type = 'content-transfer-encoding'
+    as_ew_allowed = False
     cte = '7bit'
 
 
 class HeaderLabel(TokenList):
 
     token_type = 'header-label'
+    as_ew_allowed = False
 
 
 class Header(TokenList):
 
     token_type = 'header'
 
-    def _fold(self, folded):
-        folded.append(str(self.pop(0)))
-        folded.lastlen = len(folded.current[0])
-        # The first line of the header is different from all others: we don't
-        # want to start a new object on a new line if it has any fold points in
-        # it that would allow part of it to be on the first header line.
-        # Further, if the first fold point would fit on the new line, we want
-        # to do that, but if it doesn't we want to put it on the first line.
-        # Folded supports this via the stickyspace attribute.  If this
-        # attribute is not None, it does the special handling.
-        folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else ''
-        rest = self.pop(0)
-        if self:
-            raise ValueError("Malformed Header token list")
-        rest._fold(folded)
-
 
 #
 # Terminal classes and instances
@@ -1232,6 +844,10 @@ class Header(TokenList):
 
 class Terminal(str):
 
+    as_ew_allowed = True
+    ew_combine_allowed = True
+    syntactic_break = True
+
     def __new__(cls, value, token_type):
         self = super().__new__(cls, value)
         self.token_type = token_type
@@ -1241,6 +857,9 @@ class Terminal(str):
     def __repr__(self):
         return "{}({})".format(self.__class__.__name__, super().__repr__())
 
+    def pprint(self):
+        print(self.__class__.__name__ + '/' + self.token_type)
+
     @property
     def all_defects(self):
         return list(self.defects)
@@ -1254,29 +873,14 @@ class Terminal(str):
             '' if not self.defects else ' {}'.format(self.defects),
             )]
 
-    def cte_encode(self, charset, policy):
-        value = str(self)
-        try:
-            value.encode('us-ascii')
-            return value
-        except UnicodeEncodeError:
-            return _ew.encode(value, charset)
-
     def pop_trailing_ws(self):
         # This terminates the recursion.
         return None
 
-    def pop_leading_fws(self):
-        # This terminates the recursion.
-        return None
-
     @property
     def comments(self):
         return []
 
-    def has_leading_comment(self):
-        return False
-
     def __getnewargs__(self):
         return(str(self), self.token_type)
 
@@ -1290,8 +894,6 @@ class WhiteSpaceTerminal(Terminal):
     def startswith_fws(self):
         return True
 
-    has_fws = True
-
 
 class ValueTerminal(Terminal):
 
@@ -1302,11 +904,6 @@ class ValueTerminal(Terminal):
     def startswith_fws(self):
         return False
 
-    has_fws = False
-
-    def as_encoded_word(self, charset):
-        return _ew.encode(str(self), charset)
-
 
 class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
 
@@ -1314,15 +911,9 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
     def value(self):
         return ''
 
-    @property
-    def encoded(self):
-        return self[:]
-
     def __str__(self):
         return ''
 
-    has_fws = True
-
 
 # XXX these need to become classes and used as instances so
 # that a program can't change them in a parse tree and screw
@@ -2751,7 +2342,7 @@ def get_parameter(value):
         if value[0] != "'":
             raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
                                           "delimiter, but found {!r}".format(value))
-        appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
+        appendto.append(ValueTerminal("'", 'RFC2231-delimiter'))
         value = value[1:]
         if value and value[0] != "'":
             token, value = get_attrtext(value)
@@ -2760,7 +2351,7 @@ def get_parameter(value):
             if not value or value[0] != "'":
                 raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
                                   "delimiter, but found {}".format(value))
-        appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
+        appendto.append(ValueTerminal("'", 'RFC2231-delimiter'))
         value = value[1:]
     if remainder is not None:
         # Treat the rest of value as bare quoted string content.
@@ -2965,3 +2556,255 @@ def parse_content_transfer_encoding_header(value):
             token, value = get_phrase(value)
             cte_header.append(token)
     return cte_header
+
+
+#
+# Header folding
+#
+# Header folding is complex, with lots of rules and corner cases.  The
+# following code does its best to obey the rules and handle the corner
+# cases, but you can be sure there are few bugs:)
+#
+# This folder generally canonicalizes as it goes, preferring the stringified
+# version of each token.  The tokens contain information that supports the
+# folder, including which tokens can be encoded in which ways.
+#
+# Folded text is accumulated in a simple list of strings ('lines'), each
+# one of which should be less than policy.max_line_length ('maxlen').
+#
+
+def _steal_trailing_WSP_if_exists(lines):
+    wsp = ''
+    if lines and lines[-1] and lines[-1][-1] in WSP:
+        wsp = lines[-1][-1]
+        lines[-1] = lines[-1][:-1]
+    return wsp
+
+def _refold_parse_tree(parse_tree, *, policy):
+    """Return string of contents of parse_tree folded according to RFC rules.
+
+    """
+    # max_line_length 0/None means no limit, ie: infinitely long.
+    maxlen = policy.max_line_length or float("+inf")
+    encoding = 'utf-8' if policy.utf8 else 'us-ascii'
+    lines = ['']
+    last_ew = None
+    wrap_as_ew_blocked = 0
+    want_encoding = False
+    end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked')
+    parts = list(parse_tree)
+    while parts:
+        part = parts.pop(0)
+        if part is end_ew_not_allowed:
+            wrap_as_ew_blocked -= 1
+            continue
+        tstr = str(part)
+        try:
+            tstr.encode(encoding)
+            charset = encoding
+        except UnicodeEncodeError:
+            if any(isinstance(x, errors.UndecodableBytesDefect)
+                   for x in part.all_defects):
+                charset = 'unknown-8bit'
+            else:
+                # If policy.utf8 is false this should really be taken from a
+                # 'charset' property on the policy.
+                charset = 'utf-8'
+            want_encoding = True
+        if part.token_type == 'mime-parameters':
+            # Mime parameter folding (using RFC2231) is extra special.
+            _fold_mime_parameters(part, lines, maxlen, encoding)
+            continue
+        if want_encoding and not wrap_as_ew_blocked:
+            if not part.as_ew_allowed:
+                want_encoding = False
+                last_ew = None
+                if part.syntactic_break:
+                    encoded_part = part.fold(policy=policy)[:-1] # strip nl
+                    if policy.linesep not in encoded_part:
+                        # It fits on a single line
+                        if len(encoded_part) > maxlen - len(lines[-1]):
+                            # But not on this one, so start a new one.
+                            newline = _steal_trailing_WSP_if_exists(lines)
+                            # XXX what if encoded_part has no leading FWS?
+                            lines.append(newline)
+                        lines[-1] += encoded_part
+                        continue
+                # Either this is not a major syntactic break, so we don't
+                # want it on a line by itself even if it fits, or it
+                # doesn't fit on a line by itself.  Either way, fall through
+                # to unpacking the subparts and wrapping them.
+            if not hasattr(part, 'encode'):
+                # It's not a Terminal, do each piece individually.
+                parts = list(part) + parts
+            else:
+                # It's a terminal, wrap it as an encoded word, possibly
+                # combining it with previously encoded words if allowed.
+                last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew,
+                                      part.ew_combine_allowed, charset)
+            want_encoding = False
+            continue
+        if len(tstr) <= maxlen - len(lines[-1]):
+            lines[-1] += tstr
+            continue
+        # This part is too long to fit.  The RFC wants us to break at
+        # "major syntactic breaks", so unless we don't consider this
+        # to be one, check if it will fit on the next line by itself.
+        if (part.syntactic_break and
+                len(tstr) + 1 <= maxlen):
+            newline = _steal_trailing_WSP_if_exists(lines)
+            if newline or part.startswith_fws():
+                lines.append(newline + tstr)
+                continue
+        if not hasattr(part, 'encode'):
+            # It's not a terminal, try folding the subparts.
+            newparts = list(part)
+            if not part.as_ew_allowed:
+                wrap_as_ew_blocked += 1
+                newparts.append(end_ew_not_allowed)
+            parts = newparts + parts
+            continue
+        if part.as_ew_allowed and not wrap_as_ew_blocked:
+            # It doesn't need CTE encoding, but encode it anyway so we can
+            # wrap it.
+            parts.insert(0, part)
+            want_encoding = True
+            continue
+        # We can't figure out how to wrap, it, so give up.
+        newline = _steal_trailing_WSP_if_exists(lines)
+        if newline or part.startswith_fws():
+            lines.append(newline + tstr)
+        else:
+            # We can't fold it onto the next line either...
+            lines[-1] += tstr
+    return policy.linesep.join(lines) + policy.linesep
+
+def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
+    """Fold string to_encode into lines as encoded word, combining if allowed.
+    Return the new value for last_ew, or None if ew_combine_allowed is False.
+
+    If there is already an encoded word in the last line of lines (indicated by
+    a non-None value for last_ew) and ew_combine_allowed is true, decode the
+    existing ew, combine it with to_encode, and re-encode.  Otherwise, encode
+    to_encode.  In either case, split to_encode as necessary so that the
+    encoded segments fit within maxlen.
+
+    """
+    if last_ew is not None and ew_combine_allowed:
+        to_encode = str(
+            get_unstructured(lines[-1][last_ew:] + to_encode))
+        lines[-1] = lines[-1][:last_ew]
+    if to_encode[0] in WSP:
+        # We're joining this to non-encoded text, so don't encode
+        # the leading blank.
+        leading_wsp = to_encode[0]
+        to_encode = to_encode[1:]
+        if (len(lines[-1]) == maxlen):
+            lines.append(_steal_trailing_WSP_if_exists(lines))
+        lines[-1] += leading_wsp
+    trailing_wsp = ''
+    if to_encode[-1] in WSP:
+        # Likewise for the trailing space.
+        trailing_wsp = to_encode[-1]
+        to_encode = to_encode[:-1]
+    new_last_ew = len(lines[-1]) if last_ew is None else last_ew
+    while to_encode:
+        remaining_space = maxlen - len(lines[-1])
+        # The RFC2047 chrome takes up 7 characters plus the length
+        # of the charset name.
+        encode_as = 'utf-8' if charset == 'us-ascii' else charset
+        text_space = remaining_space - len(encode_as) - 7
+        if text_space <= 0:
+            lines.append(' ')
+            # XXX We'll get an infinite loop here if maxlen is <= 7
+            continue
+        first_part = to_encode[:text_space]
+        ew = _ew.encode(first_part, charset=encode_as)
+        excess = len(ew) - remaining_space
+        if excess > 0:
+            # encode always chooses the shortest encoding, so this
+            # is guaranteed to fit at this point.
+            first_part = first_part[:-excess]
+            ew = _ew.encode(first_part)
+        lines[-1] += ew
+        to_encode = to_encode[len(first_part):]
+        if to_encode:
+            lines.append(' ')
+            new_last_ew = len(lines[-1])
+    lines[-1] += trailing_wsp
+    return new_last_ew if ew_combine_allowed else None
+
+def _fold_mime_parameters(part, lines, maxlen, encoding):
+    """Fold TokenList 'part' into the 'lines' list as mime parameters.
+
+    Using the decoded list of parameters and values, format them according to
+    the RFC rules, including using RFC2231 encoding if the value cannot be
+    expressed in 'encoding' and/or the paramter+value is too long to fit within
+    'maxlen'.
+
+    """
+    # Special case for RFC2231 encoding: start from decoded values and use
+    # RFC2231 encoding iff needed.
+    #
+    # Note that the 1 and 2s being added to the length calculations are
+    # accounting for the possibly-needed spaces and semicolons we'll be adding.
+    #
+    for name, value in part.params:
+        # XXX What if this ';' puts us over maxlen the first time through the
+        # loop?  We should split the header value onto a newline in that case,
+        # but to do that we need to recognize the need earlier or reparse the
+        # header, so I'm going to ignore that bug for now.  It'll only put us
+        # one character over.
+        if not lines[-1].rstrip().endswith(';'):
+            lines[-1] += ';'
+        charset = encoding
+        error_handler = 'strict'
+        try:
+            value.encode(encoding)
+            encoding_required = False
+        except UnicodeEncodeError:
+            encoding_required = True
+            if utils._has_surrogates(value):
+                charset = 'unknown-8bit'
+                error_handler = 'surrogateescape'
+            else:
+                charset = 'utf-8'
+        if encoding_required:
+            encoded_value = urllib.parse.quote(
+                value, safe='', errors=error_handler)
+            tstr = "{}*={}''{}".format(name, charset, encoded_value)
+        else:
+            tstr = '{}={}'.format(name, quote_string(value))
+        if len(lines[-1]) + len(tstr) + 1 < maxlen:
+            lines[-1] = lines[-1] + ' ' + tstr
+            continue
+        elif len(tstr) + 2 <= maxlen:
+            lines.append(' ' + tstr)
+            continue
+        # We need multiple sections.  We are allowed to mix encoded and
+        # non-encoded sections, but we aren't going to.  We'll encode them all.
+        section = 0
+        extra_chrome = charset + "''"
+        while value:
+            chrome_len = len(name) + len(str(section)) + 3 + len(extra_chrome)
+            if maxlen <= chrome_len + 3:
+                # We need room for the leading blank, the trailing semicolon,
+                # and at least one character of the value.  If we don't
+                # have that, we'd be stuck, so in that case fall back to
+                # the RFC standard width.
+                maxlen = 78
+            splitpoint = maxchars = maxlen - chrome_len - 2
+            while True:
+                partial = value[:splitpoint]
+                encoded_value = urllib.parse.quote(
+                    partial, safe='', errors=error_handler)
+                if len(encoded_value) <= maxchars:
+                    break
+                splitpoint -= 1
+            lines.append(" {}*{}*={}{}".format(
+                name, section, extra_chrome, encoded_value))
+            extra_chrome = ''
+            section += 1
+            value = value[splitpoint:]
+            if value:
+                lines[-1] += ';'