summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/difflib.rst7
-rw-r--r--Doc/whatsnew/3.5.rst8
-rw-r--r--Lib/difflib.py19
-rw-r--r--Lib/test/test_difflib.py35
-rw-r--r--Lib/test/test_difflib_expect.html2
-rw-r--r--Misc/NEWS2
6 files changed, 63 insertions, 10 deletions
diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst
index 329bde0..4427065 100644
--- a/Doc/library/difflib.rst
+++ b/Doc/library/difflib.rst
@@ -104,7 +104,8 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module.
The following methods are public:
- .. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5)
+ .. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, \
+ numlines=5, *, charset='utf-8')
Compares *fromlines* and *tolines* (lists of strings) and returns a string which
is a complete HTML file containing a table showing line by line differences with
@@ -123,6 +124,10 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module.
the next difference highlight at the top of the browser without any leading
context).
+ .. versionchanged:: 3.5
+ *charset* keyword-only argument was added. The default charset of
+ HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
+
.. method:: make_table(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5)
Compares *fromlines* and *tolines* (lists of strings) and returns a string which
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
index 2f79848..21fafd0 100644
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -225,6 +225,14 @@ contextlib
don't provide any options to redirect it.
(Contributed by Berker Peksag in :issue:`22389`.)
+difflib
+-------
+
+* The charset of the HTML document generated by :meth:`difflib.HtmlDiff.make_file`
+ can now be customized by using *charset* keyword-only parameter. The default
+ charset of HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
+ (Contributed by Berker Peksag in :issue:`2052`.)
+
distutils
---------
diff --git a/Lib/difflib.py b/Lib/difflib.py
index ae3479d..758f1aa 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -1598,7 +1598,7 @@ _file_template = """
<head>
<meta http-equiv="Content-Type"
- content="text/html; charset=ISO-8859-1" />
+ content="text/html; charset=%(charset)s" />
<title></title>
<style type="text/css">%(styles)s
</style>
@@ -1685,8 +1685,8 @@ class HtmlDiff(object):
self._linejunk = linejunk
self._charjunk = charjunk
- def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False,
- numlines=5):
+ def make_file(self, fromlines, tolines, fromdesc='', todesc='',
+ context=False, numlines=5, *, charset='utf-8'):
"""Returns HTML file of side by side comparison with change highlights
Arguments:
@@ -1701,13 +1701,16 @@ class HtmlDiff(object):
When context is False, controls the number of lines to place
the "next" link anchors before the next change (so click of
"next" link jumps to just before the change).
+ charset -- charset of the HTML document
"""
- return self._file_template % dict(
- styles = self._styles,
- legend = self._legend,
- table = self.make_table(fromlines,tolines,fromdesc,todesc,
- context=context,numlines=numlines))
+ return (self._file_template % dict(
+ styles=self._styles,
+ legend=self._legend,
+ table=self.make_table(fromlines, tolines, fromdesc, todesc,
+ context=context, numlines=numlines),
+ charset=charset
+ )).encode(charset, 'xmlcharrefreplace').decode(charset)
def _tab_newline_replace(self,fromlines,tolines):
"""Returns from/to line lists with tabs expanded and newlines removed.
diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py
index 0ba8f0e..a078e71 100644
--- a/Lib/test/test_difflib.py
+++ b/Lib/test/test_difflib.py
@@ -107,6 +107,20 @@ patch914575_to1 = """
5. Flat is better than nested.
"""
+patch914575_nonascii_from1 = """
+ 1. Beautiful is beTTer than ugly.
+ 2. Explicit is better than ımplıcıt.
+ 3. Simple is better than complex.
+ 4. Complex is better than complicated.
+"""
+
+patch914575_nonascii_to1 = """
+ 1. Beautiful is better than ügly.
+ 3. Sımple is better than complex.
+ 4. Complicated is better than cömplex.
+ 5. Flat is better than nested.
+"""
+
patch914575_from2 = """
\t\tLine 1: preceeded by from:[tt] to:[ssss]
\t\tLine 2: preceeded by from:[sstt] to:[sssst]
@@ -223,6 +237,27 @@ class TestSFpatches(unittest.TestCase):
new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
difflib.SequenceMatcher(None, old, new).get_opcodes()
+ def test_make_file_default_charset(self):
+ html_diff = difflib.HtmlDiff()
+ output = html_diff.make_file(patch914575_from1.splitlines(),
+ patch914575_to1.splitlines())
+ self.assertIn('content="text/html; charset=utf-8"', output)
+
+ def test_make_file_iso88591_charset(self):
+ html_diff = difflib.HtmlDiff()
+ output = html_diff.make_file(patch914575_from1.splitlines(),
+ patch914575_to1.splitlines(),
+ charset='iso-8859-1')
+ self.assertIn('content="text/html; charset=iso-8859-1"', output)
+
+ def test_make_file_usascii_charset_with_nonascii_input(self):
+ html_diff = difflib.HtmlDiff()
+ output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
+ patch914575_nonascii_to1.splitlines(),
+ charset='us-ascii')
+ self.assertIn('content="text/html; charset=us-ascii"', output)
+ self.assertIn('&#305;mpl&#305;c&#305;t', output)
+
class TestOutputFormat(unittest.TestCase):
def test_tab_delimiter(self):
diff --git a/Lib/test/test_difflib_expect.html b/Lib/test/test_difflib_expect.html
index 71b6d7a..ea7a24e 100644
--- a/Lib/test/test_difflib_expect.html
+++ b/Lib/test/test_difflib_expect.html
@@ -6,7 +6,7 @@
<head>
<meta http-equiv="Content-Type"
- content="text/html; charset=ISO-8859-1" />
+ content="text/html; charset=utf-8" />
<title></title>
<style type="text/css">
table.diff {font-family:Courier; border:medium;}
diff --git a/Misc/NEWS b/Misc/NEWS
index 35265e1..c73dd17 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -18,6 +18,8 @@ Core and Builtins
Library
-------
+- Issue #2052: Add charset parameter to HtmlDiff.make_file().
+
- Issue #23138: Fixed parsing cookies with absent keys or values in cookiejar.
Patch by Demian Brecht.