summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2003-03-06 16:10:30 (GMT)
committerBarry Warsaw <barry@python.org>2003-03-06 16:10:30 (GMT)
commit484880534160893c610e5e3a723cf8a3d9f8f116 (patch)
tree6efbda060c129331f77c7791e4749c6a71fa16b1 /Lib/email
parente05dcce686692d0078792a1a542d889bd2ef91f5 (diff)
downloadcpython-484880534160893c610e5e3a723cf8a3d9f8f116.zip
cpython-484880534160893c610e5e3a723cf8a3d9f8f116.tar.gz
cpython-484880534160893c610e5e3a723cf8a3d9f8f116.tar.bz2
__unicode__(): When converting to a unicode string, we need to
preserve spaces in the encoded/unencoded word boundaries. RFC 2047 is ambiguous here, but most people expect the space to be preserved. Really closes SF bug # 640110.
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/Header.py23
1 files changed, 20 insertions, 3 deletions
diff --git a/Lib/email/Header.py b/Lib/email/Header.py
index a6a1b07..47a5508 100644
--- a/Lib/email/Header.py
+++ b/Lib/email/Header.py
@@ -28,8 +28,10 @@ CRLFSPACE = '\r\n '
CRLF = '\r\n'
NL = '\n'
SPACE = ' '
+USPACE = u' '
SPACE8 = ' ' * 8
EMPTYSTRING = ''
+UEMPTYSTRING = u''
MAXLINELEN = 76
@@ -204,9 +206,24 @@ class Header:
def __unicode__(self):
"""Helper for the built-in unicode function."""
- # charset item is a Charset instance so we need to stringify it.
- uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
- return u''.join(uchunks)
+ uchunks = []
+ lastcs = None
+ for s, charset in self._chunks:
+ # We must preserve spaces between encoded and non-encoded word
+ # boundaries, which means for us we need to add a space when we go
+ # from a charset to None/us-ascii, or from None/us-ascii to a
+ # charset. Only do this for the second and subsequent chunks.
+ nextcs = charset
+ if uchunks:
+ if lastcs is not None:
+ if nextcs is None or nextcs == 'us-ascii':
+ uchunks.append(USPACE)
+ nextcs = None
+ elif nextcs is not None and nextcs <> 'us-ascii':
+ uchunks.append(USPACE)
+ lastcs = nextcs
+ uchunks.append(unicode(s, str(charset)))
+ return UEMPTYSTRING.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to
# have or explicitly disable <, <=, >, >= operators?