summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/Header.py90
1 files changed, 61 insertions, 29 deletions
diff --git a/Lib/email/Header.py b/Lib/email/Header.py
index 70e0bac..0f2eb32 100644
--- a/Lib/email/Header.py
+++ b/Lib/email/Header.py
@@ -1,9 +1,11 @@
# Copyright (C) 2002 Python Software Foundation
-# Author: che@debian.org (Ben Gertzfield)
+# Author: che@debian.org (Ben Gertzfield), barry@zope.com (Barry Warsaw)
"""Header encoding and decoding functionality."""
import re
+from types import StringType, UnicodeType
+
import email.quopriMIME
import email.base64MIME
from email.Charset import Charset
@@ -14,6 +16,12 @@ except SyntaxError:
# Python 2.1 spells integer division differently
from email._compat21 import _floordiv
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
CRLFSPACE = '\r\n '
CRLF = '\r\n'
NL = '\n'
@@ -25,6 +33,9 @@ MAXLINELEN = 76
ENCODE = 1
DECODE = 2
+USASCII = Charset('us-ascii')
+UTF8 = Charset('utf-8')
+
# Match encoded-word strings in the form =?charset?q?Hello_World?=
ecre = re.compile(r'''
=\? # literal =?
@@ -117,21 +128,19 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
class Header:
def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
continuation_ws=' '):
- """Create a MIME-compliant header that can contain many languages.
+ """Create a MIME-compliant header that can contain many character sets.
- Specify the initial header value in s. If None, the initial header
- value is not set.
+ Optional s is the initial header value. If None, the initial header
+ value is not set. You can later append to the header with .append()
+ method calls. s may be a byte string or a Unicode string, but see the
+ .append() documentation for semantics.
- Specify both s's character set, and the default character set by
- setting the charset argument to a Charset object (not a character set
- name string!). If None, a us-ascii Charset is used as both s's
- initial charset and as the default character set for subsequent
- .append() calls.
-
- You can later append to the header with append(s, charset) below;
- charset does not have to be the same as the one initially specified
- here. In fact, it's optional, and if not given, defaults to the
- charset specified in the constructor.
+ Optional charset serves two purposes: it has the same meaning as the
+ charset argument to the .append() method. It also sets the default
+ character set for all subsequent .append() calls that omit the charset
+ argument. If charset is not provided in the constructor, the us-ascii
+ charset is used both as s's initial charset and as the default for
+ subsequent .append() calls.
The maximum line length can be specified explicit via maxlinelen. For
splitting the first line to a shorter value (to account for the field
@@ -143,7 +152,7 @@ class Header:
lines.
"""
if charset is None:
- charset = Charset()
+ charset = USASCII
self._charset = charset
self._continuation_ws = continuation_ws
cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
@@ -186,20 +195,43 @@ class Header:
return not self == other
def append(self, s, charset=None):
- """Append string s with Charset charset to the MIME header.
-
- If charset is given, it should be a Charset instance, or the name of a
- character set (which will be converted to a Charset instance). A
- value of None (the default) means charset is the one given in the
- class constructor.
+ """Append a string to the MIME header.
+
+ Optional charset, if given, should be a Charset instance or the name
+ of a character set (which will be converted to a Charset instance). A
+ value of None (the default) means that the charset given in the
+ constructor is used.
+
+ s may be a byte string or a Unicode string. If it is a byte string
+ (i.e. isinstance(s, StringType) is true), then charset is the encoding
+ of that byte string, and a UnicodeError will be raised if the string
+ cannot be decoded with that charset. If `s' is a Unicode string, then
+ charset is a hint specifying the character set of the characters in
+ the string. In this case, when producing an RFC 2822 compliant header
+ using RFC 2047 rules, the Unicode string will be encoded using the
+ following charsets in order: us-ascii, the charset hint, utf-8.
"""
if charset is None:
charset = self._charset
elif not isinstance(charset, Charset):
charset = Charset(charset)
+ # Normalize and check the string
+ if isinstance(s, StringType):
+ # Possibly raise UnicodeError if it can't e encoded
+ unicode(s, charset.get_output_charset())
+ elif isinstance(s, UnicodeType):
+ # Convert Unicode to byte string for later concatenation
+ for charset in USASCII, charset, UTF8:
+ try:
+ s = s.encode(charset.get_output_charset())
+ break
+ except UnicodeError:
+ pass
+ else:
+ assert False, 'Could not encode to utf-8'
self._chunks.append((s, charset))
- def _split(self, s, charset, firstline=0):
+ def _split(self, s, charset, firstline=False):
# Split up a header safely for use with encode_chunks. BAW: this
# appears to be a private convenience method.
splittable = charset.to_splittable(s)
@@ -227,13 +259,13 @@ class Header:
# We can split on _maxlinelen boundaries because we know that the
# encoding won't change the size of the string
splitpnt = self._maxlinelen
- first = charset.from_splittable(splittable[:splitpnt], 0)
- last = charset.from_splittable(splittable[splitpnt:], 0)
+ first = charset.from_splittable(splittable[:splitpnt], False)
+ last = charset.from_splittable(splittable[splitpnt:], False)
else:
# Divide and conquer.
halfway = _floordiv(len(splittable), 2)
- first = charset.from_splittable(splittable[:halfway], 0)
- last = charset.from_splittable(splittable[halfway:], 0)
+ first = charset.from_splittable(splittable[:halfway], False)
+ last = charset.from_splittable(splittable[halfway:], False)
# Do the split
return self._split(first, charset, firstline) + \
self._split(last, charset)
@@ -248,7 +280,7 @@ class Header:
line = lines.pop(0)
if firstline:
maxlinelen = self._firstlinelen
- firstline = 0
+ firstline = False
else:
#line = line.lstrip()
maxlinelen = self._maxlinelen
@@ -338,7 +370,7 @@ class Header:
# There's no encoding for this chunk's charsets
_max_append(chunks, header, self._maxlinelen)
else:
- _max_append(chunks, charset.header_encode(header, 0),
+ _max_append(chunks, charset.header_encode(header),
self._maxlinelen, ' ')
joiner = NL + self._continuation_ws
return joiner.join(chunks)
@@ -363,6 +395,6 @@ class Header:
"""
newchunks = []
for s, charset in self._chunks:
- newchunks += self._split(s, charset, 1)
+ newchunks += self._split(s, charset, True)
self._chunks = newchunks
return self._encode_chunks()