__unicode__(): When converting to a unicode string, we need to

preserve spaces in the encoded/unencoded word boundaries. RFC 2047 is ambiguous here, but most people expect the space to be preserved. Really closes SF bug # 640110.
author: Barry Warsaw <barry@python.org> 2003-03-06 16:10:30 (GMT)
committer: Barry Warsaw <barry@python.org> 2003-03-06 16:10:30 (GMT)
commit: 484880534160893c610e5e3a723cf8a3d9f8f116 (patch)
tree: 6efbda060c129331f77c7791e4749c6a71fa16b1 /Lib/email/Header.py
parent: e05dcce686692d0078792a1a542d889bd2ef91f5 (diff)
download: cpython-484880534160893c610e5e3a723cf8a3d9f8f116.zip
cpython-484880534160893c610e5e3a723cf8a3d9f8f116.tar.gz
cpython-484880534160893c610e5e3a723cf8a3d9f8f116.tar.bz2
1 files changed, 20 insertions, 3 deletions
diff --git a/Lib/email/Header.py b/Lib/email/Header.py
index a6a1b07..47a5508 100644
--- a/Lib/email/Header.py
+++ b/Lib/email/Header.py
@@ -28,8 +28,10 @@ CRLFSPACE = '\r\n '
 CRLF = '\r\n'
 NL = '\n'
 SPACE = ' '
+USPACE = u' '
 SPACE8 = ' ' * 8
 EMPTYSTRING = ''
+UEMPTYSTRING = u''
 
 MAXLINELEN = 76
 
@@ -204,9 +206,24 @@ class Header:
 
     def __unicode__(self):
         """Helper for the built-in unicode function."""
-        # charset item is a Charset instance so we need to stringify it.
-        uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
-        return u''.join(uchunks)
+        uchunks = []
+        lastcs = None
+        for s, charset in self._chunks:
+            # We must preserve spaces between encoded and non-encoded word
+            # boundaries, which means for us we need to add a space when we go
+            # from a charset to None/us-ascii, or from None/us-ascii to a
+            # charset.  Only do this for the second and subsequent chunks.
+            nextcs = charset
+            if uchunks:
+                if lastcs is not None:
+                    if nextcs is None or nextcs == 'us-ascii':
+                        uchunks.append(USPACE)
+                        nextcs = None
+                elif nextcs is not None and nextcs <> 'us-ascii':
+                    uchunks.append(USPACE)
+            lastcs = nextcs
+            uchunks.append(unicode(s, str(charset)))
+        return UEMPTYSTRING.join(uchunks)
 
     # Rich comparison operators for equality only.  BAW: does it make sense to
     # have or explicitly disable <, <=, >, >= operators?
author	Barry Warsaw <barry@python.org>	2003-03-06 16:10:30 (GMT)
committer	Barry Warsaw <barry@python.org>	2003-03-06 16:10:30 (GMT)
commit	484880534160893c610e5e3a723cf8a3d9f8f116 (patch)
tree	6efbda060c129331f77c7791e4749c6a71fa16b1 /Lib/email/Header.py
parent	e05dcce686692d0078792a1a542d889bd2ef91f5 (diff)
download	cpython-484880534160893c610e5e3a723cf8a3d9f8f116.zip cpython-484880534160893c610e5e3a723cf8a3d9f8f116.tar.gz cpython-484880534160893c610e5e3a723cf8a3d9f8f116.tar.bz2