diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-07-28 17:52:25 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-07-28 17:52:25 (GMT) |
commit | c582bfca262ed1f1ded9c6088d9181b763474579 (patch) | |
tree | b43c14b72d253f0007301cc5b47834ca04e4c27e | |
parent | f3f0c611dd6212e87b8ead06f531ee109ab2d5d3 (diff) | |
download | cpython-c582bfca262ed1f1ded9c6088d9181b763474579.zip cpython-c582bfca262ed1f1ded9c6088d9181b763474579.tar.gz cpython-c582bfca262ed1f1ded9c6088d9181b763474579.tar.bz2 |
Fix quopri to operate consistently on bytes.
-rwxr-xr-x | Lib/quopri.py | 97 | ||||
-rw-r--r-- | Lib/test/test_quopri.py | 65 |
2 files changed, 87 insertions, 75 deletions
diff --git a/Lib/quopri.py b/Lib/quopri.py index 5002cc8..62c0503 100755 --- a/Lib/quopri.py +++ b/Lib/quopri.py @@ -6,10 +6,10 @@ __all__ = ["encode", "decode", "encodestring", "decodestring"] -ESCAPE = '=' +ESCAPE = b'=' MAXLINESIZE = 76 -HEX = '0123456789ABCDEF' -EMPTYSTRING = '' +HEX = b'0123456789ABCDEF' +EMPTYSTRING = b'' try: from binascii import a2b_qp, b2a_qp @@ -19,23 +19,25 @@ except ImportError: def needsquoting(c, quotetabs, header): - """Decide whether a particular character needs to be quoted. + """Decide whether a particular byte ordinal needs to be quoted. The 'quotetabs' flag indicates whether embedded tabs and spaces should be quoted. Note that line-ending tabs and spaces are always encoded, as per RFC 1521. """ - if c in ' \t': + assert isinstance(c, bytes) + if c in b' \t': return quotetabs # if header, we have to escape _ because _ is used to escape space - if c == '_': + if c == b'_': return header - return c == ESCAPE or not (' ' <= c <= '~') + return c == ESCAPE or not (b' ' <= c <= b'~') def quote(c): """Quote a single character.""" - i = ord(c) - return ESCAPE + HEX[i//16] + HEX[i%16] + assert isinstance(c, bytes) and len(c)==1 + c = ord(c) + return ESCAPE + bytes((HEX[c//16], HEX[c%16])) @@ -56,12 +58,12 @@ def encode(input, output, quotetabs, header = 0): output.write(odata) return - def write(s, output=output, lineEnd='\n'): + def write(s, output=output, lineEnd=b'\n'): # RFC 1521 requires that the line ending in a space or tab must have # that trailing character encoded. - if s and s[-1:] in ' \t': - output.write(s[:-1] + quote(s[-1]) + lineEnd) - elif s == '.': + if s and s[-1:] in b' \t': + output.write(s[:-1] + quote(s[-1:]) + lineEnd) + elif s == b'.': output.write(quote(s) + lineEnd) else: output.write(s + lineEnd) @@ -73,16 +75,17 @@ def encode(input, output, quotetabs, header = 0): break outline = [] # Strip off any readline induced trailing newline - stripped = '' - if line[-1:] == '\n': + stripped = b'' + if line[-1:] == b'\n': line = line[:-1] - stripped = '\n' + stripped = b'\n' # Calculate the un-length-limited encoded line for c in line: + c = bytes((c,)) if needsquoting(c, quotetabs, header): c = quote(c) - if header and c == ' ': - outline.append('_') + if header and c == b' ': + outline.append(b'_') else: outline.append(c) # First, write out the previous line @@ -94,7 +97,7 @@ def encode(input, output, quotetabs, header = 0): while len(thisline) > MAXLINESIZE: # Don't forget to include the soft line break `=' sign in the # length calculation! - write(thisline[:MAXLINESIZE-1], lineEnd='=\n') + write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n') thisline = thisline[MAXLINESIZE-1:] # Write out the current line prevline = thisline @@ -105,9 +108,9 @@ def encode(input, output, quotetabs, header = 0): def encodestring(s, quotetabs = 0, header = 0): if b2a_qp is not None: return b2a_qp(s, quotetabs = quotetabs, header = header) - from io import StringIO - infp = StringIO(s) - outfp = StringIO() + from io import BytesIO + infp = BytesIO(s) + outfp = BytesIO() encode(infp, outfp, quotetabs, header) return outfp.getvalue() @@ -124,44 +127,44 @@ def decode(input, output, header = 0): output.write(odata) return - new = '' + new = b'' while 1: line = input.readline() if not line: break i, n = 0, len(line) - if n > 0 and line[n-1] == '\n': + if n > 0 and line[n-1:n] == b'\n': partial = 0; n = n-1 # Strip trailing whitespace - while n > 0 and line[n-1] in " \t\r": + while n > 0 and line[n-1:n] in b" \t\r": n = n-1 else: partial = 1 while i < n: - c = line[i] - if c == '_' and header: - new = new + ' '; i = i+1 + c = line[i:i+1] + if c == b'_' and header: + new = new + b' '; i = i+1 elif c != ESCAPE: new = new + c; i = i+1 elif i+1 == n and not partial: partial = 1; break elif i+1 < n and line[i+1] == ESCAPE: new = new + ESCAPE; i = i+2 - elif i+2 < n and ishex(line[i+1]) and ishex(line[i+2]): - new = new + chr(unhex(line[i+1:i+3])); i = i+3 + elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]): + new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3 else: # Bad escape sequence -- leave it in new = new + c; i = i+1 if not partial: - output.write(new + '\n') - new = '' + output.write(new + b'\n') + new = b'' if new: output.write(new) def decodestring(s, header = 0): if a2b_qp is not None: return a2b_qp(s, header = header) - from io import StringIO - infp = StringIO(s) - outfp = StringIO() + from io import BytesIO + infp = BytesIO(s) + outfp = BytesIO() decode(infp, outfp, header = header) return outfp.getvalue() @@ -169,21 +172,23 @@ def decodestring(s, header = 0): # Other helper functions def ishex(c): - """Return true if the character 'c' is a hexadecimal digit.""" - return '0' <= c <= '9' or 'a' <= c <= 'f' or 'A' <= c <= 'F' + """Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII.""" + assert isinstance(c, bytes) + return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F' def unhex(s): """Get the integer value of a hexadecimal number.""" bits = 0 for c in s: - if '0' <= c <= '9': + c = bytes((c,)) + if b'0' <= c <= b'9': i = ord('0') - elif 'a' <= c <= 'f': + elif b'a' <= c <= b'f': i = ord('a')-10 - elif 'A' <= c <= 'F': - i = ord('A')-10 + elif b'A' <= c <= b'F': + i = ord(b'A')-10 else: - break + assert False, "non-hex digit "+repr(c) bits = bits*16 + (ord(c) - i) return bits @@ -214,18 +219,18 @@ def main(): sts = 0 for file in args: if file == '-': - fp = sys.stdin + fp = sys.stdin.buffer else: try: - fp = open(file) + fp = open(file, "rb") except IOError as msg: sys.stderr.write("%s: can't open (%s)\n" % (file, msg)) sts = 1 continue if deco: - decode(fp, sys.stdout) + decode(fp, sys.stdout.buffer) else: - encode(fp, sys.stdout, tabs) + encode(fp, sys.stdout.buffer, tabs) if fp is not sys.stdin: fp.close() if sts: diff --git a/Lib/test/test_quopri.py b/Lib/test/test_quopri.py index a9e9e80..960759a 100644 --- a/Lib/test/test_quopri.py +++ b/Lib/test/test_quopri.py @@ -6,7 +6,7 @@ import quopri -ENCSAMPLE = """\ +ENCSAMPLE = b"""\ Here's a bunch of special=20 =A1=A2=A3=A4=A5=A6=A7=A8=A9 @@ -25,8 +25,8 @@ characters... have fun! """ # First line ends with a space -DECSAMPLE = "Here's a bunch of special \n" + \ -"""\ +DECSAMPLE = b"Here's a bunch of special \n" + \ +b"""\ \xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9 \xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3 @@ -67,48 +67,48 @@ class QuopriTestCase(unittest.TestCase): # used in the "quotetabs=0" tests. STRINGS = ( # Some normal strings - ('hello', 'hello'), - ('''hello + (b'hello', b'hello'), + (b'''hello there - world''', '''hello + world''', b'''hello there world'''), - ('''hello + (b'''hello there world -''', '''hello +''', b'''hello there world '''), - ('\201\202\203', '=81=82=83'), + (b'\201\202\203', b'=81=82=83'), # Add some trailing MUST QUOTE strings - ('hello ', 'hello=20'), - ('hello\t', 'hello=09'), + (b'hello ', b'hello=20'), + (b'hello\t', b'hello=09'), # Some long lines. First, a single line of 108 characters - ('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', - '''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx= + (b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', + b'''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx= xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'''), # A line of exactly 76 characters, no soft line break should be needed - ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy', - 'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'), + (b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy', + b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'), # A line of 77 characters, forcing a soft line break at position 75, # and a second line of exactly 2 characters (because the soft line # break `=' sign counts against the line length limit). - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', - '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz= + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', + b'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz= zz'''), # A line of 151 characters, forcing a soft line break at position 75, # with a second line of exactly 76 characters and no trailing = - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', - '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz= + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', + b'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz= zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''), # A string containing a hard line break, but which the first line is # 151 characters and the second line is exactly 76 characters. This # should leave us with three lines, the first which has a soft line # break, and which the second and third do not. - ('''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy + (b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''', - '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy= + b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy= yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''), # Now some really complex stuff ;) @@ -117,14 +117,14 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''') # These are used in the "quotetabs=1" tests. ESTRINGS = ( - ('hello world', 'hello=20world'), - ('hello\tworld', 'hello=09world'), + (b'hello world', b'hello=20world'), + (b'hello\tworld', b'hello=09world'), ) # These are used in the "header=1" tests. HSTRINGS = ( - ('hello world', 'hello_world'), - ('hello_world', 'hello=5Fworld'), + (b'hello world', b'hello_world'), + (b'hello_world', b'hello=5Fworld'), ) @withpythonimplementation @@ -161,18 +161,18 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''') @withpythonimplementation def test_embedded_ws(self): for p, e in self.ESTRINGS: - self.assert_(quopri.encodestring(p, quotetabs=True) == e) + self.assertEqual(quopri.encodestring(p, quotetabs=True), e) self.assertEqual(quopri.decodestring(e), p) @withpythonimplementation def test_encode_header(self): for p, e in self.HSTRINGS: - self.assert_(quopri.encodestring(p, header=True) == e) + self.assertEqual(quopri.encodestring(p, header=True), e) @withpythonimplementation def test_decode_header(self): for p, e in self.HSTRINGS: - self.assert_(quopri.decodestring(e, header=True) == p) + self.assertEqual(quopri.decodestring(e, header=True), p) def test_scriptencode(self): (p, e) = self.STRINGS[-1] @@ -182,13 +182,20 @@ zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''') # On Windows, Python will output the result to stdout using # CRLF, as the mode of stdout is text mode. To compare this # with the expected result, we need to do a line-by-line comparison. - self.assertEqual(cout.splitlines(), e.splitlines()) + cout = cout.decode('latin-1').splitlines() + e = e.decode('latin-1').splitlines() + assert len(cout)==len(e) + for i in range(len(cout)): + self.assertEqual(cout[i], e[i]) + self.assertEqual(cout, e) def test_scriptdecode(self): (p, e) = self.STRINGS[-1] process = subprocess.Popen([sys.executable, "-mquopri", "-d"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) cout, cerr = process.communicate(e) + cout = cout.decode('latin-1') + p = p.decode('latin-1') self.assertEqual(cout.splitlines(), p.splitlines()) def test_main(): |