diff options
Diffstat (limited to 'Lib/test/test_difflib.py')
-rw-r--r-- | Lib/test/test_difflib.py | 147 |
1 files changed, 146 insertions, 1 deletions
diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index a078e71..ab9debf 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -322,12 +322,157 @@ class TestOutputFormat(unittest.TestCase): self.assertEqual(fmt(0,0), '0') +class TestBytes(unittest.TestCase): + # don't really care about the content of the output, just the fact + # that it's bytes and we don't crash + def check(self, diff): + diff = list(diff) # trigger exceptions first + for line in diff: + self.assertIsInstance( + line, bytes, + "all lines of diff should be bytes, but got: %r" % line) + + def test_byte_content(self): + # if we receive byte strings, we return byte strings + a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes + b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes + + unified = difflib.unified_diff + context = difflib.context_diff + + check = self.check + check(difflib.diff_bytes(unified, a, a)) + check(difflib.diff_bytes(unified, a, b)) + + # now with filenames (content and filenames are all bytes!) + check(difflib.diff_bytes(unified, a, a, b'a', b'a')) + check(difflib.diff_bytes(unified, a, b, b'a', b'b')) + + # and with filenames and dates + check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013')) + check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013')) + + # same all over again, with context diff + check(difflib.diff_bytes(context, a, a)) + check(difflib.diff_bytes(context, a, b)) + check(difflib.diff_bytes(context, a, a, b'a', b'a')) + check(difflib.diff_bytes(context, a, b, b'a', b'b')) + check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013')) + check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013')) + + def test_byte_filenames(self): + # somebody renamed a file from ISO-8859-2 to UTF-8 + fna = b'\xb3odz.txt' # "łodz.txt" + fnb = b'\xc5\x82odz.txt' + + # they transcoded the content at the same time + a = [b'\xa3odz is a city in Poland.'] + b = [b'\xc5\x81odz is a city in Poland.'] + + check = self.check + unified = difflib.unified_diff + context = difflib.context_diff + check(difflib.diff_bytes(unified, a, b, fna, fnb)) + check(difflib.diff_bytes(context, a, b, fna, fnb)) + + def assertDiff(expect, actual): + # do not compare expect and equal as lists, because unittest + # uses difflib to report difference between lists + actual = list(actual) + self.assertEqual(len(expect), len(actual)) + for e, a in zip(expect, actual): + self.assertEqual(e, a) + + expect = [ + b'--- \xb3odz.txt', + b'+++ \xc5\x82odz.txt', + b'@@ -1 +1 @@', + b'-\xa3odz is a city in Poland.', + b'+\xc5\x81odz is a city in Poland.', + ] + actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'') + assertDiff(expect, actual) + + # with dates (plain ASCII) + datea = b'2005-03-18' + dateb = b'2005-03-19' + check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb)) + check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb)) + + expect = [ + # note the mixed encodings here: this is deeply wrong by every + # tenet of Unicode, but it doesn't crash, it's parseable by + # patch, and it's how UNIX(tm) diff behaves + b'--- \xb3odz.txt\t2005-03-18', + b'+++ \xc5\x82odz.txt\t2005-03-19', + b'@@ -1 +1 @@', + b'-\xa3odz is a city in Poland.', + b'+\xc5\x81odz is a city in Poland.', + ] + actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb, + lineterm=b'') + assertDiff(expect, actual) + + def test_mixed_types_content(self): + # type of input content must be consistent: all str or all bytes + a = [b'hello'] + b = ['hello'] + + unified = difflib.unified_diff + context = difflib.context_diff + + expect = "lines to compare must be str, not bytes (b'hello')" + self._assert_type_error(expect, unified, a, b) + self._assert_type_error(expect, unified, b, a) + self._assert_type_error(expect, context, a, b) + self._assert_type_error(expect, context, b, a) + + expect = "all arguments must be bytes, not str ('hello')" + self._assert_type_error(expect, difflib.diff_bytes, unified, a, b) + self._assert_type_error(expect, difflib.diff_bytes, unified, b, a) + self._assert_type_error(expect, difflib.diff_bytes, context, a, b) + self._assert_type_error(expect, difflib.diff_bytes, context, b, a) + + def test_mixed_types_filenames(self): + # cannot pass filenames as bytes if content is str (this may not be + # the right behaviour, but at least the test demonstrates how + # things work) + a = ['hello\n'] + b = ['ohell\n'] + fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1 + fnb = b'ol\xc3a9.txt' # to UTF-8 + self._assert_type_error( + "all arguments must be str, not: b'ol\\xe9.txt'", + difflib.unified_diff, a, b, fna, fnb) + + def test_mixed_types_dates(self): + # type of dates must be consistent with type of contents + a = [b'foo\n'] + b = [b'bar\n'] + datea = '1 fév' + dateb = '3 fév' + self._assert_type_error( + "all arguments must be bytes, not str ('1 fév')", + difflib.diff_bytes, difflib.unified_diff, + a, b, b'a', b'b', datea, dateb) + + # if input is str, non-ASCII dates are fine + a = ['foo\n'] + b = ['bar\n'] + list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb)) + + def _assert_type_error(self, msg, generator, *args): + with self.assertRaises(TypeError) as ctx: + list(generator(*args)) + self.assertEqual(msg, str(ctx.exception)) + + def test_main(): difflib.HtmlDiff._default_prefix = 0 Doctests = doctest.DocTestSuite(difflib) run_unittest( TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, - TestOutputFormat, Doctests) + TestOutputFormat, TestBytes, Doctests) if __name__ == '__main__': test_main() |