diff options
author | Barry Warsaw <barry@python.org> | 2003-03-06 16:10:30 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2003-03-06 16:10:30 (GMT) |
commit | 484880534160893c610e5e3a723cf8a3d9f8f116 (patch) | |
tree | 6efbda060c129331f77c7791e4749c6a71fa16b1 | |
parent | e05dcce686692d0078792a1a542d889bd2ef91f5 (diff) | |
download | cpython-484880534160893c610e5e3a723cf8a3d9f8f116.zip cpython-484880534160893c610e5e3a723cf8a3d9f8f116.tar.gz cpython-484880534160893c610e5e3a723cf8a3d9f8f116.tar.bz2 |
__unicode__(): When converting to a unicode string, we need to
preserve spaces in the encoded/unencoded word boundaries. RFC 2047 is
ambiguous here, but most people expect the space to be preserved.
Really closes SF bug # 640110.
-rw-r--r-- | Lib/email/Header.py | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/Lib/email/Header.py b/Lib/email/Header.py index a6a1b07..47a5508 100644 --- a/Lib/email/Header.py +++ b/Lib/email/Header.py @@ -28,8 +28,10 @@ CRLFSPACE = '\r\n ' CRLF = '\r\n' NL = '\n' SPACE = ' ' +USPACE = u' ' SPACE8 = ' ' * 8 EMPTYSTRING = '' +UEMPTYSTRING = u'' MAXLINELEN = 76 @@ -204,9 +206,24 @@ class Header: def __unicode__(self): """Helper for the built-in unicode function.""" - # charset item is a Charset instance so we need to stringify it. - uchunks = [unicode(s, str(charset)) for s, charset in self._chunks] - return u''.join(uchunks) + uchunks = [] + lastcs = None + for s, charset in self._chunks: + # We must preserve spaces between encoded and non-encoded word + # boundaries, which means for us we need to add a space when we go + # from a charset to None/us-ascii, or from None/us-ascii to a + # charset. Only do this for the second and subsequent chunks. + nextcs = charset + if uchunks: + if lastcs is not None: + if nextcs is None or nextcs == 'us-ascii': + uchunks.append(USPACE) + nextcs = None + elif nextcs is not None and nextcs <> 'us-ascii': + uchunks.append(USPACE) + lastcs = nextcs + uchunks.append(unicode(s, str(charset))) + return UEMPTYSTRING.join(uchunks) # Rich comparison operators for equality only. BAW: does it make sense to # have or explicitly disable <, <=, >, >= operators? |