summaryrefslogtreecommitdiffstats
path: root/Lib/email/utils.py
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2012-05-25 22:42:14 (GMT)
committerR David Murray <rdmurray@bitdance.com>2012-05-25 22:42:14 (GMT)
commit0b6f6c82b51b7071d88f48abb3192bf3dc2a2d24 (patch)
treed6bd5f56722b8fff6db8bdf39b47b1c4a87a3d42 /Lib/email/utils.py
parent0fa2edd08f7b2b028f61a22fab9a648d58699c0b (diff)
downloadcpython-0b6f6c82b51b7071d88f48abb3192bf3dc2a2d24.zip
cpython-0b6f6c82b51b7071d88f48abb3192bf3dc2a2d24.tar.gz
cpython-0b6f6c82b51b7071d88f48abb3192bf3dc2a2d24.tar.bz2
#12586: add provisional email policy with new header parsing and folding.
When the new policies are used (and only when the new policies are explicitly used) headers turn into objects that have attributes based on their parsed values, and can be set using objects that encapsulate the values, as well as set directly from unicode strings. The folding algorithm then takes care of encoding unicode where needed, and folding according to the highest level syntactic objects. With this patch only date and time headers are parsed as anything other than unstructured, but that is all the helper methods in the existing API handle. I do plan to add more parsers, and complete the set specified in the RFC before the package becomes stable.
Diffstat (limited to 'Lib/email/utils.py')
-rw-r--r--Lib/email/utils.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index b82d5c5..b7e1bb9 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -62,6 +62,13 @@ escapesre = re.compile(r'[\\"]')
_has_surrogates = re.compile(
'([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
+# How to deal with a string containing bytes before handing it to the
+# application through the 'normal' interface.
+def _sanitize(string):
+ # Turn any escaped bytes into unicode 'unknown' char.
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ return original_bytes.decode('ascii', 'replace')
+
# Helpers