summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2007-07-28 17:52:25 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2007-07-28 17:52:25 (GMT)
commitc582bfca262ed1f1ded9c6088d9181b763474579 (patch)
treeb43c14b72d253f0007301cc5b47834ca04e4c27e
parentf3f0c611dd6212e87b8ead06f531ee109ab2d5d3 (diff)
downloadcpython-c582bfca262ed1f1ded9c6088d9181b763474579.zip
cpython-c582bfca262ed1f1ded9c6088d9181b763474579.tar.gz
cpython-c582bfca262ed1f1ded9c6088d9181b763474579.tar.bz2
Fix quopri to operate consistently on bytes.
-rwxr-xr-xLib/quopri.py97
-rw-r--r--Lib/test/test_quopri.py65
2 files changed, 87 insertions, 75 deletions
diff --git a/Lib/quopri.py b/Lib/quopri.py
index 5002cc8..62c0503 100755
--- a/Lib/quopri.py
+++ b/Lib/quopri.py
@@ -6,10 +6,10 @@
__all__ = ["encode", "decode", "encodestring", "decodestring"]
-ESCAPE = '='
+ESCAPE = b'='
MAXLINESIZE = 76
-HEX = '0123456789ABCDEF'
-EMPTYSTRING = ''
+HEX = b'0123456789ABCDEF'
+EMPTYSTRING = b''
try:
from binascii import a2b_qp, b2a_qp
@@ -19,23 +19,25 @@ except ImportError:
def needsquoting(c, quotetabs, header):
- """Decide whether a particular character needs to be quoted.
+ """Decide whether a particular byte ordinal needs to be quoted.
The 'quotetabs' flag indicates whether embedded tabs and spaces should be
quoted. Note that line-ending tabs and spaces are always encoded, as per
RFC 1521.
"""
- if c in ' \t':
+ assert isinstance(c, bytes)
+ if c in b' \t':
return quotetabs
# if header, we have to escape _ because _ is used to escape space
- if c == '_':
+ if c == b'_':
return header
- return c == ESCAPE or not (' ' <= c <= '~')
+ return c == ESCAPE or not (b' ' <= c <= b'~')
def quote(c):
"""Quote a single character."""
- i = ord(c)
- return ESCAPE + HEX[i//16] + HEX[i%16]
+ assert isinstance(c, bytes) and len(c)==1
+ c = ord(c)
+ return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
@@ -56,12 +58,12 @@ def encode(input, output, quotetabs, header = 0):
output.write(odata)
return
- def write(s, output=output, lineEnd='\n'):
+ def write(s, output=output, lineEnd=b'\n'):
# RFC 1521 requires that the line ending in a space or tab must have
# that trailing character encoded.
- if s and s[-1:] in ' \t':
- output.write(s[:-1] + quote(s[-1]) + lineEnd)
- elif s == '.':
+ if s and s[-1:] in b' \t':
+ output.write(s[:-1] + quote(s[-1:]) + lineEnd)
+ elif s == b'.':
output.write(quote(s) + lineEnd)
else:
output.write(s + lineEnd)
@@ -73,16 +75,17 @@ def encode(input, output, quotetabs, header = 0):
break
outline = []
# Strip off any readline induced trailing newline
- stripped = ''
- if line[-1:] == '\n':
+ stripped = b''
+ if line[-1:] == b'\n':
line = line[:-1]
- stripped = '\n'
+ stripped = b'\n'
# Calculate the un-length-limited encoded line
for c in line:
+ c = bytes((c,))
if needsquoting(c, quotetabs, header):
c = quote(c)
- if header and c == ' ':
- outline.append('_')
+ if header and c == b' ':
+ outline.append(b'_')
else:
outline.append(c)
# First, write out the previous line
@@ -94,7 +97,7 @@ def encode(input, output, quotetabs, header = 0):
while len(thisline) > MAXLINESIZE:
# Don't forget to include the soft line break `=' sign in the
# length calculation!
- write(thisline[:MAXLINESIZE-1], lineEnd='=\n')
+ write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
thisline = thisline[MAXLINESIZE-1:]
# Write out the current line
prevline = thisline
@@ -105,9 +108,9 @@ def encode(input, output, quotetabs, header = 0):
def encodestring(s, quotetabs = 0, header = 0):
if b2a_qp is not None:
return b2a_qp(s, quotetabs = quotetabs, header = header)
- from io import StringIO
- infp = StringIO(s)
- outfp = StringIO()
+ from io import BytesIO
+ infp = BytesIO(s)
+ outfp = BytesIO()
encode(infp, outfp, quotetabs, header)
return outfp.getvalue()
@@ -124,44 +127,44 @@ def decode(input, output, header = 0):
output.write(odata)
return
- new = ''
+ new = b''
while 1:
line = input.readline()
if not line: break
i, n = 0, len(line)
- if n > 0 and line[n-1] == '\n':
+ if n > 0 and line[n-1:n] == b'\n':
partial = 0; n = n-1
# Strip trailing whitespace
- while n > 0 and line[n-1] in " \t\r":
+ while n > 0 and line[n-1:n] in b" \t\r":
n = n-1
else:
partial = 1
while i < n:
- c = line[i]
- if c == '_' and header:
- new = new + ' '; i = i+1
+ c = line[i:i+1]
+ if c == b'_' and header:
+ new = new + b' '; i = i+1
elif c != ESCAPE:
new = new + c; i = i+1
elif i+1 == n and not partial:
partial = 1; break
elif i+1 < n and line[i+1] == ESCAPE:
new = new + ESCAPE; i = i+2
- elif i+2 < n and ishex(line[i+1]) and ishex(line[i+2]):
- new = new + chr(unhex(line[i+1:i+3])); i = i+3
+ elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
+ new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
else: # Bad escape sequence -- leave it in
new = new + c; i = i+1
if not partial:
- output.write(new + '\n')
- new = ''
+ output.write(new + b'\n')
+ new = b''
if new:
output.write(new)
def decodestring(s, header = 0):
if a2b_qp is not None:
return a2b_qp(s, header = header)
- from io import StringIO
- infp = StringIO(s)
- outfp = StringIO()
+ from io import BytesIO
+ infp = BytesIO(s)
+ outfp = BytesIO()
decode(infp, outfp, header = header)
return outfp.getvalue()
@@ -169,21 +172,23 @@ def decodestring(s, header = 0):
# Other helper functions
def ishex(c):
- """Return true if the character 'c' is a hexadecimal digit."""
- return '0' <= c <= '9' or 'a' <= c <= 'f' or 'A' <= c <= 'F'
+ """Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
+ assert isinstance(c, bytes)
+ return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
def unhex(s):
"""Get the integer value of a hexadecimal number."""
bits = 0
for c in s:
- if '0' <= c <= '9':
+ c = bytes((c,))
+ if b'0' <= c <= b'9':
i = ord('0')
- elif 'a' <= c <= 'f':
+ elif b'a' <= c <= b'f':
i = ord('a')-10
- elif 'A' <= c <= 'F':
- i = ord('A')-10
+ elif b'A' <= c <= b'F':
+ i = ord(b'A')-10
else:
- break
+ assert False, "non-hex digit "+repr(c)
bits = bits*16 + (ord(c) - i)
return bits
@@ -214,18 +219,18 @@ def main():
sts = 0
for file in args:
if file == '-':
- fp = sys.stdin
+ fp = sys.stdin.buffer
else:
try:
- fp = open(file)
+ fp = open(file, "rb")
except IOError as msg:
sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
sts = 1
continue
if deco:
- decode(fp, sys.stdout)
+ decode(fp, sys.stdout.buffer)
else:
- encode(fp, sys.stdout, tabs)
+ encode(fp, sys.stdout.buffer, tabs)
if fp is not sys.stdin:
fp.close()
if sts:
diff --git a/Lib/test/test_quopri.py b/Lib/test/test_quopri.py
index a9e9e80..960759a 100644
--- a/Lib/test/test_quopri.py
+++ b/Lib/test/test_quopri.py
@@ -6,7 +6,7 @@ import quopri
-ENCSAMPLE = """\
+ENCSAMPLE = b"""\
Here's a bunch of special=20
=A1=A2=A3=A4=A5=A6=A7=A8=A9
@@ -25,8 +25,8 @@ characters... have fun!
"""
# First line ends with a space
-DECSAMPLE = "Here's a bunch of special \n" + \
-"""\
+DECSAMPLE = b"Here's a bunch of special \n" + \
+b"""\
\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9
\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3
@@ -67,48 +67,48 @@ class QuopriTestCase(unittest.TestCase):
# used in the "quotetabs=0" tests.
STRINGS = (
# Some normal strings
- ('hello', 'hello'),
- ('''hello
+ (b'hello', b'hello'),
+ (b'''hello
there
- world''', '''hello
+ world''', b'''hello
there
world'''),
- ('''hello
+ (b'''hello
there
world
-''', '''hello
+''', b'''hello
there
world
'''),
- ('\201\202\203', '=81=82=83'),
+ (b'\201\202\203', b'=81=82=83'),
# Add some trailing MUST QUOTE strings
- ('hello ', 'hello=20'),
- ('hello\t', 'hello=09'),
+ (b'hello ', b'hello=20'),
+ (b'hello\t', b'hello=09'),
# Some long lines. First, a single line of 108 characters
- ('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
- '''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx=
+ (b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
+ b'''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx=
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'''),
# A line of exactly 76 characters, no soft line break should be needed
- ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
- 'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'),
+ (b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
+ b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'),
# A line of 77 characters, forcing a soft line break at position 75,
# and a second line of exactly 2 characters (because the soft line
# break `=' sign counts against the line length limit).
- ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
- '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
+ (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
+ b'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
zz'''),
# A line of 151 characters, forcing a soft line break at position 75,
# with a second line of exactly 76 characters and no trailing =
- ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
- '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
+ (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
+ b'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),
# A string containing a hard line break, but which the first line is
# 151 characters and the second line is exactly 76 characters. This
# should leave us with three lines, the first which has a soft line
# break, and which the second and third do not.
- ('''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+ (b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''',
- '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
+ b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),
# Now some really complex stuff ;)
@@ -117,14 +117,14 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
# These are used in the "quotetabs=1" tests.
ESTRINGS = (
- ('hello world', 'hello=20world'),
- ('hello\tworld', 'hello=09world'),
+ (b'hello world', b'hello=20world'),
+ (b'hello\tworld', b'hello=09world'),
)
# These are used in the "header=1" tests.
HSTRINGS = (
- ('hello world', 'hello_world'),
- ('hello_world', 'hello=5Fworld'),
+ (b'hello world', b'hello_world'),
+ (b'hello_world', b'hello=5Fworld'),
)
@withpythonimplementation
@@ -161,18 +161,18 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
@withpythonimplementation
def test_embedded_ws(self):
for p, e in self.ESTRINGS:
- self.assert_(quopri.encodestring(p, quotetabs=True) == e)
+ self.assertEqual(quopri.encodestring(p, quotetabs=True), e)
self.assertEqual(quopri.decodestring(e), p)
@withpythonimplementation
def test_encode_header(self):
for p, e in self.HSTRINGS:
- self.assert_(quopri.encodestring(p, header=True) == e)
+ self.assertEqual(quopri.encodestring(p, header=True), e)
@withpythonimplementation
def test_decode_header(self):
for p, e in self.HSTRINGS:
- self.assert_(quopri.decodestring(e, header=True) == p)
+ self.assertEqual(quopri.decodestring(e, header=True), p)
def test_scriptencode(self):
(p, e) = self.STRINGS[-1]
@@ -182,13 +182,20 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
# On Windows, Python will output the result to stdout using
# CRLF, as the mode of stdout is text mode. To compare this
# with the expected result, we need to do a line-by-line comparison.
- self.assertEqual(cout.splitlines(), e.splitlines())
+ cout = cout.decode('latin-1').splitlines()
+ e = e.decode('latin-1').splitlines()
+ assert len(cout)==len(e)
+ for i in range(len(cout)):
+ self.assertEqual(cout[i], e[i])
+ self.assertEqual(cout, e)
def test_scriptdecode(self):
(p, e) = self.STRINGS[-1]
process = subprocess.Popen([sys.executable, "-mquopri", "-d"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
cout, cerr = process.communicate(e)
+ cout = cout.decode('latin-1')
+ p = p.decode('latin-1')
self.assertEqual(cout.splitlines(), p.splitlines())
def test_main():