Fix quopri to operate consistently on bytes.

author: Martin v. Löwis <martin@v.loewis.de> 2007-07-28 17:52:25 (GMT)
committer: Martin v. Löwis <martin@v.loewis.de> 2007-07-28 17:52:25 (GMT)
commit: c582bfca262ed1f1ded9c6088d9181b763474579 (patch)
tree: b43c14b72d253f0007301cc5b47834ca04e4c27e
parent: f3f0c611dd6212e87b8ead06f531ee109ab2d5d3 (diff)
download: cpython-c582bfca262ed1f1ded9c6088d9181b763474579.zip
cpython-c582bfca262ed1f1ded9c6088d9181b763474579.tar.gz
cpython-c582bfca262ed1f1ded9c6088d9181b763474579.tar.bz2
2 files changed, 87 insertions, 75 deletions
diff --git a/Lib/quopri.py b/Lib/quopri.py
index 5002cc8..62c0503 100755
--- a/Lib/quopri.py
+++ b/Lib/quopri.py
@@ -6,10 +6,10 @@
 
 __all__ = ["encode", "decode", "encodestring", "decodestring"]
 
-ESCAPE = '='
+ESCAPE = b'='
 MAXLINESIZE = 76
-HEX = '0123456789ABCDEF'
-EMPTYSTRING = ''
+HEX = b'0123456789ABCDEF'
+EMPTYSTRING = b''
 
 try:
     from binascii import a2b_qp, b2a_qp
@@ -19,23 +19,25 @@ except ImportError:
 
 
 def needsquoting(c, quotetabs, header):
-    """Decide whether a particular character needs to be quoted.
+    """Decide whether a particular byte ordinal needs to be quoted.
 
     The 'quotetabs' flag indicates whether embedded tabs and spaces should be
     quoted.  Note that line-ending tabs and spaces are always encoded, as per
     RFC 1521.
     """
-    if c in ' \t':
+    assert isinstance(c, bytes)
+    if c in b' \t':
         return quotetabs
     # if header, we have to escape _ because _ is used to escape space
-    if c == '_':
+    if c == b'_':
         return header
-    return c == ESCAPE or not (' ' <= c <= '~')
+    return c == ESCAPE or not (b' ' <= c <= b'~')
 
 def quote(c):
     """Quote a single character."""
-    i = ord(c)
-    return ESCAPE + HEX[i//16] + HEX[i%16]
+    assert isinstance(c, bytes) and len(c)==1
+    c = ord(c)
+    return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
 
 
 
@@ -56,12 +58,12 @@ def encode(input, output, quotetabs, header = 0):
         output.write(odata)
         return
 
-    def write(s, output=output, lineEnd='\n'):
+    def write(s, output=output, lineEnd=b'\n'):
         # RFC 1521 requires that the line ending in a space or tab must have
         # that trailing character encoded.
-        if s and s[-1:] in ' \t':
-            output.write(s[:-1] + quote(s[-1]) + lineEnd)
-        elif s == '.':
+        if s and s[-1:] in b' \t':
+            output.write(s[:-1] + quote(s[-1:]) + lineEnd)
+        elif s == b'.':
             output.write(quote(s) + lineEnd)
         else:
             output.write(s + lineEnd)
@@ -73,16 +75,17 @@ def encode(input, output, quotetabs, header = 0):
             break
         outline = []
         # Strip off any readline induced trailing newline
-        stripped = ''
-        if line[-1:] == '\n':
+        stripped = b''
+        if line[-1:] == b'\n':
             line = line[:-1]
-            stripped = '\n'
+            stripped = b'\n'
         # Calculate the un-length-limited encoded line
         for c in line:
+            c = bytes((c,))
             if needsquoting(c, quotetabs, header):
                 c = quote(c)
-            if header and c == ' ':
-                outline.append('_')
+            if header and c == b' ':
+                outline.append(b'_')
             else:
                 outline.append(c)
         # First, write out the previous line
@@ -94,7 +97,7 @@ def encode(input, output, quotetabs, header = 0):
         while len(thisline) > MAXLINESIZE:
             # Don't forget to include the soft line break `=' sign in the
             # length calculation!
-            write(thisline[:MAXLINESIZE-1], lineEnd='=\n')
+            write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
             thisline = thisline[MAXLINESIZE-1:]
         # Write out the current line
         prevline = thisline
@@ -105,9 +108,9 @@ def encode(input, output, quotetabs, header = 0):
 def encodestring(s, quotetabs = 0, header = 0):
     if b2a_qp is not None:
         return b2a_qp(s, quotetabs = quotetabs, header = header)
-    from io import StringIO
-    infp = StringIO(s)
-    outfp = StringIO()
+    from io import BytesIO
+    infp = BytesIO(s)
+    outfp = BytesIO()
     encode(infp, outfp, quotetabs, header)
     return outfp.getvalue()
 
@@ -124,44 +127,44 @@ def decode(input, output, header = 0):
         output.write(odata)
         return
 
-    new = ''
+    new = b''
     while 1:
         line = input.readline()
         if not line: break
         i, n = 0, len(line)
-        if n > 0 and line[n-1] == '\n':
+        if n > 0 and line[n-1:n] == b'\n':
             partial = 0; n = n-1
             # Strip trailing whitespace
-            while n > 0 and line[n-1] in " \t\r":
+            while n > 0 and line[n-1:n] in b" \t\r":
                 n = n-1
         else:
             partial = 1
         while i < n:
-            c = line[i]
-            if c == '_' and header:
-                new = new + ' '; i = i+1
+            c = line[i:i+1]
+            if c == b'_' and header:
+                new = new + b' '; i = i+1
             elif c != ESCAPE:
                 new = new + c; i = i+1
             elif i+1 == n and not partial:
                 partial = 1; break
             elif i+1 < n and line[i+1] == ESCAPE:
                 new = new + ESCAPE; i = i+2
-            elif i+2 < n and ishex(line[i+1]) and ishex(line[i+2]):
-                new = new + chr(unhex(line[i+1:i+3])); i = i+3
+            elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
+                new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
             else: # Bad escape sequence -- leave it in
                 new = new + c; i = i+1
         if not partial:
-            output.write(new + '\n')
-            new = ''
+            output.write(new + b'\n')
+            new = b''
     if new:
         output.write(new)
 
 def decodestring(s, header = 0):
     if a2b_qp is not None:
         return a2b_qp(s, header = header)
-    from io import StringIO
-    infp = StringIO(s)
-    outfp = StringIO()
+    from io import BytesIO
+    infp = BytesIO(s)
+    outfp = BytesIO()
     decode(infp, outfp, header = header)
     return outfp.getvalue()
 
@@ -169,21 +172,23 @@ def decodestring(s, header = 0):
 
 # Other helper functions
 def ishex(c):
-    """Return true if the character 'c' is a hexadecimal digit."""
-    return '0' <= c <= '9' or 'a' <= c <= 'f' or 'A' <= c <= 'F'
+    """Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
+    assert isinstance(c, bytes)
+    return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
 
 def unhex(s):
     """Get the integer value of a hexadecimal number."""
     bits = 0
     for c in s:
-        if '0' <= c <= '9':
+        c = bytes((c,))
+        if b'0' <= c <= b'9':
             i = ord('0')
-        elif 'a' <= c <= 'f':
+        elif b'a' <= c <= b'f':
             i = ord('a')-10
-        elif 'A' <= c <= 'F':
-            i = ord('A')-10
+        elif b'A' <= c <= b'F':
+            i = ord(b'A')-10
         else:
-            break
+            assert False, "non-hex digit "+repr(c)
         bits = bits*16 + (ord(c) - i)
     return bits
 
@@ -214,18 +219,18 @@ def main():
     sts = 0
     for file in args:
         if file == '-':
-            fp = sys.stdin
+            fp = sys.stdin.buffer
         else:
             try:
-                fp = open(file)
+                fp = open(file, "rb")
             except IOError as msg:
                 sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
                 sts = 1
                 continue
         if deco:
-            decode(fp, sys.stdout)
+            decode(fp, sys.stdout.buffer)
         else:
-            encode(fp, sys.stdout, tabs)
+            encode(fp, sys.stdout.buffer, tabs)
         if fp is not sys.stdin:
             fp.close()
     if sts:
diff --git a/Lib/test/test_quopri.py b/Lib/test/test_quopri.py
index a9e9e80..960759a 100644
--- a/Lib/test/test_quopri.py
+++ b/Lib/test/test_quopri.py
@@ -6,7 +6,7 @@ import quopri
 
 
 
-ENCSAMPLE = """\
+ENCSAMPLE = b"""\
 Here's a bunch of special=20
 
 =A1=A2=A3=A4=A5=A6=A7=A8=A9
@@ -25,8 +25,8 @@ characters... have fun!
 """
 
 # First line ends with a space
-DECSAMPLE = "Here's a bunch of special \n" + \
-"""\
+DECSAMPLE = b"Here's a bunch of special \n" + \
+b"""\
 
 \xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9
 \xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3
@@ -67,48 +67,48 @@ class QuopriTestCase(unittest.TestCase):
     # used in the "quotetabs=0" tests.
     STRINGS = (
         # Some normal strings
-        ('hello', 'hello'),
-        ('''hello
+        (b'hello', b'hello'),
+        (b'''hello
         there
-        world''', '''hello
+        world''', b'''hello
         there
         world'''),
-        ('''hello
+        (b'''hello
         there
         world
-''', '''hello
+''', b'''hello
         there
         world
 '''),
-        ('\201\202\203', '=81=82=83'),
+        (b'\201\202\203', b'=81=82=83'),
         # Add some trailing MUST QUOTE strings
-        ('hello ', 'hello=20'),
-        ('hello\t', 'hello=09'),
+        (b'hello ', b'hello=20'),
+        (b'hello\t', b'hello=09'),
         # Some long lines.  First, a single line of 108 characters
-        ('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
-         '''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx=
+        (b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
+         b'''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx=
 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'''),
         # A line of exactly 76 characters, no soft line break should be needed
-        ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
-        'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'),
+        (b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
+        b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'),
         # A line of 77 characters, forcing a soft line break at position 75,
         # and a second line of exactly 2 characters (because the soft line
         # break `=' sign counts against the line length limit).
-        ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
-         '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
+        (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
+         b'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
 zz'''),
         # A line of 151 characters, forcing a soft line break at position 75,
         # with a second line of exactly 76 characters and no trailing =
-        ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
-         '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
+        (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
+         b'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),
         # A string containing a hard line break, but which the first line is
         # 151 characters and the second line is exactly 76 characters.  This
         # should leave us with three lines, the first which has a soft line
         # break, and which the second and third do not.
-        ('''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+        (b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''',
-         '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
+         b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),
         # Now some really complex stuff ;)
@@ -117,14 +117,14 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
 
     # These are used in the "quotetabs=1" tests.
     ESTRINGS = (
-        ('hello world', 'hello=20world'),
-        ('hello\tworld', 'hello=09world'),
+        (b'hello world', b'hello=20world'),
+        (b'hello\tworld', b'hello=09world'),
         )
 
     # These are used in the "header=1" tests.
     HSTRINGS = (
-        ('hello world', 'hello_world'),
-        ('hello_world', 'hello=5Fworld'),
+        (b'hello world', b'hello_world'),
+        (b'hello_world', b'hello=5Fworld'),
         )
 
     @withpythonimplementation
@@ -161,18 +161,18 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
     @withpythonimplementation
     def test_embedded_ws(self):
         for p, e in self.ESTRINGS:
-            self.assert_(quopri.encodestring(p, quotetabs=True) == e)
+            self.assertEqual(quopri.encodestring(p, quotetabs=True), e)
             self.assertEqual(quopri.decodestring(e), p)
 
     @withpythonimplementation
     def test_encode_header(self):
         for p, e in self.HSTRINGS:
-            self.assert_(quopri.encodestring(p, header=True) == e)
+            self.assertEqual(quopri.encodestring(p, header=True), e)
 
     @withpythonimplementation
     def test_decode_header(self):
         for p, e in self.HSTRINGS:
-            self.assert_(quopri.decodestring(e, header=True) == p)
+            self.assertEqual(quopri.decodestring(e, header=True), p)
 
     def test_scriptencode(self):
         (p, e) = self.STRINGS[-1]
@@ -182,13 +182,20 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''')
         # On Windows, Python will output the result to stdout using
         # CRLF, as the mode of stdout is text mode. To compare this
         # with the expected result, we need to do a line-by-line comparison.
-        self.assertEqual(cout.splitlines(), e.splitlines())
+        cout = cout.decode('latin-1').splitlines()
+        e = e.decode('latin-1').splitlines()
+        assert len(cout)==len(e)
+        for i in range(len(cout)):
+            self.assertEqual(cout[i], e[i])
+        self.assertEqual(cout, e)
 
     def test_scriptdecode(self):
         (p, e) = self.STRINGS[-1]
         process = subprocess.Popen([sys.executable, "-mquopri", "-d"],
                                    stdin=subprocess.PIPE, stdout=subprocess.PIPE)
         cout, cerr = process.communicate(e)
+        cout = cout.decode('latin-1')
+        p = p.decode('latin-1')
         self.assertEqual(cout.splitlines(), p.splitlines())
 
 def test_main():
author	Martin v. Löwis <martin@v.loewis.de>	2007-07-28 17:52:25 (GMT)
committer	Martin v. Löwis <martin@v.loewis.de>	2007-07-28 17:52:25 (GMT)
commit	c582bfca262ed1f1ded9c6088d9181b763474579 (patch)
tree	b43c14b72d253f0007301cc5b47834ca04e4c27e
parent	f3f0c611dd6212e87b8ead06f531ee109ab2d5d3 (diff)
download	cpython-c582bfca262ed1f1ded9c6088d9181b763474579.zip cpython-c582bfca262ed1f1ded9c6088d9181b763474579.tar.gz cpython-c582bfca262ed1f1ded9c6088d9181b763474579.tar.bz2