diff options
Diffstat (limited to 'Lib/test/test_difflib.py')
-rw-r--r-- | Lib/test/test_difflib.py | 182 |
1 files changed, 181 insertions, 1 deletions
diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index 0ba8f0e..ab9debf 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -107,6 +107,20 @@ patch914575_to1 = """ 5. Flat is better than nested. """ +patch914575_nonascii_from1 = """ + 1. Beautiful is beTTer than ugly. + 2. Explicit is better than ımplıcıt. + 3. Simple is better than complex. + 4. Complex is better than complicated. +""" + +patch914575_nonascii_to1 = """ + 1. Beautiful is better than ügly. + 3. Sımple is better than complex. + 4. Complicated is better than cömplex. + 5. Flat is better than nested. +""" + patch914575_from2 = """ \t\tLine 1: preceeded by from:[tt] to:[ssss] \t\tLine 2: preceeded by from:[sstt] to:[sssst] @@ -223,6 +237,27 @@ class TestSFpatches(unittest.TestCase): new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] difflib.SequenceMatcher(None, old, new).get_opcodes() + def test_make_file_default_charset(self): + html_diff = difflib.HtmlDiff() + output = html_diff.make_file(patch914575_from1.splitlines(), + patch914575_to1.splitlines()) + self.assertIn('content="text/html; charset=utf-8"', output) + + def test_make_file_iso88591_charset(self): + html_diff = difflib.HtmlDiff() + output = html_diff.make_file(patch914575_from1.splitlines(), + patch914575_to1.splitlines(), + charset='iso-8859-1') + self.assertIn('content="text/html; charset=iso-8859-1"', output) + + def test_make_file_usascii_charset_with_nonascii_input(self): + html_diff = difflib.HtmlDiff() + output = html_diff.make_file(patch914575_nonascii_from1.splitlines(), + patch914575_nonascii_to1.splitlines(), + charset='us-ascii') + self.assertIn('content="text/html; charset=us-ascii"', output) + self.assertIn('ımplıcıt', output) + class TestOutputFormat(unittest.TestCase): def test_tab_delimiter(self): @@ -287,12 +322,157 @@ class TestOutputFormat(unittest.TestCase): self.assertEqual(fmt(0,0), '0') +class TestBytes(unittest.TestCase): + # don't really care about the content of the output, just the fact + # that it's bytes and we don't crash + def check(self, diff): + diff = list(diff) # trigger exceptions first + for line in diff: + self.assertIsInstance( + line, bytes, + "all lines of diff should be bytes, but got: %r" % line) + + def test_byte_content(self): + # if we receive byte strings, we return byte strings + a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes + b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes + + unified = difflib.unified_diff + context = difflib.context_diff + + check = self.check + check(difflib.diff_bytes(unified, a, a)) + check(difflib.diff_bytes(unified, a, b)) + + # now with filenames (content and filenames are all bytes!) + check(difflib.diff_bytes(unified, a, a, b'a', b'a')) + check(difflib.diff_bytes(unified, a, b, b'a', b'b')) + + # and with filenames and dates + check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013')) + check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013')) + + # same all over again, with context diff + check(difflib.diff_bytes(context, a, a)) + check(difflib.diff_bytes(context, a, b)) + check(difflib.diff_bytes(context, a, a, b'a', b'a')) + check(difflib.diff_bytes(context, a, b, b'a', b'b')) + check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013')) + check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013')) + + def test_byte_filenames(self): + # somebody renamed a file from ISO-8859-2 to UTF-8 + fna = b'\xb3odz.txt' # "łodz.txt" + fnb = b'\xc5\x82odz.txt' + + # they transcoded the content at the same time + a = [b'\xa3odz is a city in Poland.'] + b = [b'\xc5\x81odz is a city in Poland.'] + + check = self.check + unified = difflib.unified_diff + context = difflib.context_diff + check(difflib.diff_bytes(unified, a, b, fna, fnb)) + check(difflib.diff_bytes(context, a, b, fna, fnb)) + + def assertDiff(expect, actual): + # do not compare expect and equal as lists, because unittest + # uses difflib to report difference between lists + actual = list(actual) + self.assertEqual(len(expect), len(actual)) + for e, a in zip(expect, actual): + self.assertEqual(e, a) + + expect = [ + b'--- \xb3odz.txt', + b'+++ \xc5\x82odz.txt', + b'@@ -1 +1 @@', + b'-\xa3odz is a city in Poland.', + b'+\xc5\x81odz is a city in Poland.', + ] + actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'') + assertDiff(expect, actual) + + # with dates (plain ASCII) + datea = b'2005-03-18' + dateb = b'2005-03-19' + check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb)) + check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb)) + + expect = [ + # note the mixed encodings here: this is deeply wrong by every + # tenet of Unicode, but it doesn't crash, it's parseable by + # patch, and it's how UNIX(tm) diff behaves + b'--- \xb3odz.txt\t2005-03-18', + b'+++ \xc5\x82odz.txt\t2005-03-19', + b'@@ -1 +1 @@', + b'-\xa3odz is a city in Poland.', + b'+\xc5\x81odz is a city in Poland.', + ] + actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb, + lineterm=b'') + assertDiff(expect, actual) + + def test_mixed_types_content(self): + # type of input content must be consistent: all str or all bytes + a = [b'hello'] + b = ['hello'] + + unified = difflib.unified_diff + context = difflib.context_diff + + expect = "lines to compare must be str, not bytes (b'hello')" + self._assert_type_error(expect, unified, a, b) + self._assert_type_error(expect, unified, b, a) + self._assert_type_error(expect, context, a, b) + self._assert_type_error(expect, context, b, a) + + expect = "all arguments must be bytes, not str ('hello')" + self._assert_type_error(expect, difflib.diff_bytes, unified, a, b) + self._assert_type_error(expect, difflib.diff_bytes, unified, b, a) + self._assert_type_error(expect, difflib.diff_bytes, context, a, b) + self._assert_type_error(expect, difflib.diff_bytes, context, b, a) + + def test_mixed_types_filenames(self): + # cannot pass filenames as bytes if content is str (this may not be + # the right behaviour, but at least the test demonstrates how + # things work) + a = ['hello\n'] + b = ['ohell\n'] + fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1 + fnb = b'ol\xc3a9.txt' # to UTF-8 + self._assert_type_error( + "all arguments must be str, not: b'ol\\xe9.txt'", + difflib.unified_diff, a, b, fna, fnb) + + def test_mixed_types_dates(self): + # type of dates must be consistent with type of contents + a = [b'foo\n'] + b = [b'bar\n'] + datea = '1 fév' + dateb = '3 fév' + self._assert_type_error( + "all arguments must be bytes, not str ('1 fév')", + difflib.diff_bytes, difflib.unified_diff, + a, b, b'a', b'b', datea, dateb) + + # if input is str, non-ASCII dates are fine + a = ['foo\n'] + b = ['bar\n'] + list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb)) + + def _assert_type_error(self, msg, generator, *args): + with self.assertRaises(TypeError) as ctx: + list(generator(*args)) + self.assertEqual(msg, str(ctx.exception)) + + def test_main(): difflib.HtmlDiff._default_prefix = 0 Doctests = doctest.DocTestSuite(difflib) run_unittest( TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, - TestOutputFormat, Doctests) + TestOutputFormat, TestBytes, Doctests) if __name__ == '__main__': test_main() |