diff options
Diffstat (limited to 'Lib/email/utils.py')
-rw-r--r-- | Lib/email/utils.py | 61 |
1 files changed, 41 insertions, 20 deletions
diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 250eb19..26ebb0e 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -25,6 +25,7 @@ import time import base64 import random import socket +import urllib import warnings from cStringIO import StringIO @@ -45,6 +46,7 @@ COMMASPACE = ', ' EMPTYSTRING = '' UEMPTYSTRING = u'' CRLF = '\r\n' +TICK = "'" specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[][\\()"]') @@ -230,12 +232,14 @@ def unquote(str): # RFC2231-related functions - parameter encoding and decoding def decode_rfc2231(s): """Decode string according to RFC 2231""" - import urllib - parts = s.split("'", 2) - if len(parts) == 1: - return None, None, urllib.unquote(s) - charset, language, s = parts - return charset, language, urllib.unquote(s) + parts = s.split(TICK, 2) + if len(parts) <= 2: + return None, None, s + if len(parts) > 3: + charset, language = parts[:2] + s = TICK.join(parts[2:]) + return charset, language, s + return parts def encode_rfc2231(s, charset=None, language=None): @@ -259,37 +263,54 @@ rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$') def decode_params(params): """Decode parameters list according to RFC 2231. - params is a sequence of 2-tuples containing (content type, string value). + params is a sequence of 2-tuples containing (param name, string value). """ + # Copy params so we don't mess with the original + params = params[:] new_params = [] - # maps parameter's name to a list of continuations + # Map parameter's name to a list of continuations. The values are a + # 3-tuple of the continuation number, the string value, and a flag + # specifying whether a particular segment is %-encoded. rfc2231_params = {} - # params is a sequence of 2-tuples containing (content_type, string value) - name, value = params[0] + name, value = params.pop(0) new_params.append((name, value)) - # Cycle through each of the rest of the parameters. - for name, value in params[1:]: + while params: + name, value = params.pop(0) + if name.endswith('*'): + encoded = True + else: + encoded = False value = unquote(value) mo = rfc2231_continuation.match(name) if mo: name, num = mo.group('name', 'num') if num is not None: num = int(num) - rfc2231_param1 = rfc2231_params.setdefault(name, []) - rfc2231_param1.append((num, value)) + rfc2231_params.setdefault(name, []).append((num, value, encoded)) else: new_params.append((name, '"%s"' % quote(value))) if rfc2231_params: for name, continuations in rfc2231_params.items(): value = [] + extended = False # Sort by number continuations.sort() - # And now append all values in num order - for num, continuation in continuations: - value.append(continuation) - charset, language, value = decode_rfc2231(EMPTYSTRING.join(value)) - new_params.append( - (name, (charset, language, '"%s"' % quote(value)))) + # And now append all values in numerical order, converting + # %-encodings for the encoded segments. If any of the + # continuation names ends in a *, then the entire string, after + # decoding segments and concatenating, must have the charset and + # language specifiers at the beginning of the string. + for num, s, encoded in continuations: + if encoded: + s = urllib.unquote(s) + extended = True + value.append(s) + value = quote(EMPTYSTRING.join(value)) + if extended: + charset, language, value = decode_rfc2231(value) + new_params.append((name, (charset, language, '"%s"' % value))) + else: + new_params.append((name, '"%s"' % value)) return new_params def collapse_rfc2231_value(value, errors='replace', |