diff options
author | Guido van Rossum <guido@python.org> | 2007-11-19 18:03:44 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-11-19 18:03:44 (GMT) |
commit | 87c0f1d1c95eb1766a1340cd100533dfd7a86d52 (patch) | |
tree | 76c3bee37e7effe5933d830f6fd0cc4ce15c9afc /Lib | |
parent | 87afcbfe549bce674c52b0163bcf4dab6eea616e (diff) | |
download | cpython-87c0f1d1c95eb1766a1340cd100533dfd7a86d52.zip cpython-87c0f1d1c95eb1766a1340cd100533dfd7a86d52.tar.gz cpython-87c0f1d1c95eb1766a1340cd100533dfd7a86d52.tar.bz2 |
Merged revisions 59041-59055 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r59044 | neal.norwitz | 2007-11-18 17:46:20 -0800 (Sun, 18 Nov 2007) | 1 line
Use a slightly more recent version than 1.5.2b2.
........
r59047 | walter.doerwald | 2007-11-19 04:14:05 -0800 (Mon, 19 Nov 2007) | 2 lines
Fix typo in comment.
........
r59049 | walter.doerwald | 2007-11-19 04:41:10 -0800 (Mon, 19 Nov 2007) | 4 lines
Fix for #1444: utf_8_sig.StreamReader was (indirectly through decode())
calling codecs.utf_8_decode() with final==True, which falled with incomplete
byte sequences. Fix and test by James G. Sack.
........
r59051 | nick.coghlan | 2007-11-19 05:56:27 -0800 (Mon, 19 Nov 2007) | 1 line
Enable some test_cmd_line_script debugging output to investigate failure on Mac OSX buildbot
........
r59053 | facundo.batista | 2007-11-19 08:30:24 -0800 (Mon, 19 Nov 2007) | 3 lines
Fixed detail in add_type() explanation (issue 1463).
........
r59054 | guido.van.rossum | 2007-11-19 09:35:24 -0800 (Mon, 19 Nov 2007) | 2 lines
Make this work stand-alone, too.
........
r59055 | guido.van.rossum | 2007-11-19 09:50:22 -0800 (Mon, 19 Nov 2007) | 3 lines
Fix the OSX failures in this test -- they were due to /tmp being a symlink
to /private/tmp. Adding a call to os.path.realpath() to temp_dir() fixed it.
........
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/encodings/utf_8_sig.py | 16 | ||||
-rw-r--r-- | Lib/test/test_cmd_line_script.py | 9 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 53 |
3 files changed, 64 insertions, 14 deletions
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py index 07cd5ee..1bb4792 100644 --- a/Lib/encodings/utf_8_sig.py +++ b/Lib/encodings/utf_8_sig.py @@ -103,12 +103,18 @@ class StreamReader(codecs.StreamReader): pass def decode(self, input, errors='strict'): - if len(input) < 3 and codecs.BOM_UTF8.startswith(input): - # not enough data to decide if this is a BOM - # => try again on the next call - return ("", 0) + if len(input) < 3: + if codecs.BOM_UTF8.startswith(input): + # not enough data to decide if this is a BOM + # => try again on the next call + return ("", 0) + elif input[:3] == codecs.BOM_UTF8: + self.decode = codecs.utf_8_decode + (output, consumed) = codecs.utf_8_decode(input[3:],errors) + return (output, consumed+3) + # (else) no BOM present self.decode = codecs.utf_8_decode - return decode(input, errors) + return codecs.utf_8_decode(input, errors) ### encodings module API diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 3c6e4a0..dcb768f 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -29,6 +29,7 @@ def _run_python(*args): @contextlib.contextmanager def temp_dir(): dirname = tempfile.mkdtemp() + dirname = os.path.realpath(dirname) try: yield dirname finally: @@ -82,7 +83,7 @@ def _make_test_zip(zip_dir, zip_basename, script_name): zip_file.close() # if verbose: # zip_file = zipfile.ZipFile(zip_name, 'r') - # print "Contents of %r:" % zip_name + # print("Contents of %r:" % zip_name) # zip_file.printdir() # zip_file.close() return zip_name @@ -90,9 +91,9 @@ def _make_test_zip(zip_dir, zip_basename, script_name): class CmdLineTest(unittest.TestCase): def _check_script(self, script_name, expected_file, expected_argv0): exit_code, data = _run_python(script_name) - # if verbose: - # print "Output from test script %r:" % script_name - # print data + if verbose: + print("Output from test script %r:" % script_name) + print(data) self.assertEqual(exit_code, 0, data) printed_file = '__file__==%r' % expected_file printed_argv0 = 'sys.argv[0]==%r' % expected_argv0 diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 5833c6d..413a5aa 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -59,7 +59,7 @@ class MixInCheckStateHandling: class ReadTest(unittest.TestCase, MixInCheckStateHandling): def check_partial(self, input, partialresults): # get a StreamReader for the encoding and feed the bytestring version - # of input to the reader byte by byte. Read every available from + # of input to the reader byte by byte. Read everything available from # the StreamReader and check that the results equal the appropriate # entries from partialresults. q = Queue(b"") @@ -618,10 +618,53 @@ class UTF8SigTest(ReadTest): s = "spam" self.assertEqual(d.decode(s.encode("utf-8-sig")), s) - def test_decoder_state(self): - u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff" - self.check_state_handling_decode(self.encoding, - u, u.encode(self.encoding)) + def test_stream_bom(self): + unistring = "ABC\u00A1\u2200XYZ" + bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ" + + reader = codecs.getreader("utf-8-sig") + for sizehint in [None] + list(range(1, 11)) + \ + [64, 128, 256, 512, 1024]: + istream = reader(io.BytesIO(bytestring)) + ostream = io.StringIO() + while 1: + if sizehint is not None: + data = istream.read(sizehint) + else: + data = istream.read() + + if not data: + break + ostream.write(data) + + got = ostream.getvalue() + self.assertEqual(got, unistring) + + def test_stream_bare(self): + unistring = "ABC\u00A1\u2200XYZ" + bytestring = b"ABC\xC2\xA1\xE2\x88\x80XYZ" + + reader = codecs.getreader("utf-8-sig") + for sizehint in [None] + list(range(1, 11)) + \ + [64, 128, 256, 512, 1024]: + istream = reader(io.BytesIO(bytestring)) + ostream = io.StringIO() + while 1: + if sizehint is not None: + data = istream.read(sizehint) + else: + data = istream.read() + + if not data: + break + ostream.write(data) + + got = ostream.getvalue() + self.assertEqual(got, unistring) + +class EscapeDecodeTest(unittest.TestCase): + def test_empty(self): + self.assertEquals(codecs.escape_decode(""), ("", 0)) class RecodingTest(unittest.TestCase): def test_recoding(self): |