From 0951e5d691487033bae283d98c45ccbcf3ae822d Mon Sep 17 00:00:00 2001 From: maiphi Date: Fri, 1 Nov 2019 14:11:10 +0100 Subject: Tex builder: avoid error when reading non-utf-8 log files Python 3 throws a UnicodeDecodeError when reading a non-utf-8 file in text mode with default (utf-8) encoding. This happens when T1 fontenc is used in Latex and a warning in the log file contains e.g. umlauts. Invalid characters are now replaced. --- src/engine/SCons/Tool/tex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/SCons/Tool/tex.py b/src/engine/SCons/Tool/tex.py index 64b9d3b..5cf7bca 100644 --- a/src/engine/SCons/Tool/tex.py +++ b/src/engine/SCons/Tool/tex.py @@ -297,8 +297,8 @@ def InternalLaTeXAuxAction(XXXLaTeXAction, target = None, source= None, env=None logfilename = targetbase + '.log' logContent = '' if os.path.isfile(logfilename): - with open(logfilename, "r") as f: - logContent = f.read() + with open(logfilename, "rb") as f: + logContent = f.read().decode(errors='replace') # Read the fls file to find all .aux files -- cgit v0.12 From 6f35570377e7e7b5003dd6f069b65e95f9ae6efb Mon Sep 17 00:00:00 2001 From: maiphi Date: Fri, 1 Nov 2019 14:22:31 +0100 Subject: Add test case with Latin-1 encoded Latex log file. Required fix in the test framework. In order to make the test work, it was necessary to handle the encoding issue also in the test framework. Otherwise, though the Latex builder can handle the case, the test framework chokes on it. --- test/TEX/LATEX.py | 16 ++++++++++++++++ testing/framework/TestCmd.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/test/TEX/LATEX.py b/test/TEX/LATEX.py index 553313e..592bbb7 100644 --- a/test/TEX/LATEX.py +++ b/test/TEX/LATEX.py @@ -193,6 +193,22 @@ This is the include file. mod %s test.must_not_exist('latexi.ilg') + test.write('SConstruct', """ +env = Environment() +env.PostScript('latin1log.tex') +""") + + test.write('latin1log.tex', r""" +\documentclass[12pt,a4paper]{article} +\usepackage[T1]{fontenc} +\begin{document} +\"oxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +\end{document} +""") + + test.run(arguments = 'latin1log.dvi', stderr = None) + test.must_exist('latin1log.dvi') + test.pass_test() # Local Variables: diff --git a/testing/framework/TestCmd.py b/testing/framework/TestCmd.py index 81e03f3..9218f60 100644 --- a/testing/framework/TestCmd.py +++ b/testing/framework/TestCmd.py @@ -1528,7 +1528,7 @@ class TestCmd(object): # TODO: Run full tests on both platforms and see if this fixes failures # It seems that py3.6 still sets text mode if you set encoding. elif sys.version_info[0] == 3: # TODO and sys.version_info[1] < 6: - stream = stream.decode('utf-8') + stream = stream.decode('utf-8', errors='replace') stream = stream.replace('\r\n', '\n') elif sys.version_info[0] == 2: stream = stream.replace('\r\n', '\n') -- cgit v0.12 From 477ffd82d67ebd4820d33be760c1594fd82c09f0 Mon Sep 17 00:00:00 2001 From: maiphi Date: Fri, 1 Nov 2019 14:40:56 +0100 Subject: Added note about Latex Latin-1/UTF-8 issue to CHANGES.log --- src/CHANGES.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/CHANGES.txt b/src/CHANGES.txt index af448d5..c6312b7 100755 --- a/src/CHANGES.txt +++ b/src/CHANGES.txt @@ -6,6 +6,10 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER + From Philipp Maierhöfer: + - Avoid crash with UnicodeDecodeError on Python 3 when a Latex log file in + non-UTF-8 encoding (e.g. containing umlauts in Latin-1 encoding) is read. + From Mathew Robinson: - Improved threading performance by ensuring NodeInfo is shared -- cgit v0.12 From 10d57a7c5cb108a4fa707560c0e72ccfd3793b76 Mon Sep 17 00:00:00 2001 From: maiphi Date: Thu, 7 Nov 2019 21:22:01 +0100 Subject: Latin-1 log test case: compile to DVI, not PostScript; add comment --- test/TEX/LATEX.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/TEX/LATEX.py b/test/TEX/LATEX.py index 592bbb7..dabe8b1 100644 --- a/test/TEX/LATEX.py +++ b/test/TEX/LATEX.py @@ -28,6 +28,8 @@ r""" Validate that we can set the LATEX string to our own utility, that the produced .dvi, .aux and .log files get removed by the -c option, and that we can use this to wrap calls to the real latex utility. +Check that a log file with a warning encoded in non-UTF-8 (here: Latin-1) +is read without throwing an error. """ import TestSCons @@ -195,9 +197,11 @@ This is the include file. mod %s test.write('SConstruct', """ env = Environment() -env.PostScript('latin1log.tex') +env.DVI('latin1log.tex') """) + # This will trigger an overfull hbox warning in the log file, + # containing the umlaut "o in Latin-1 ("T1 fontenc") encoding. test.write('latin1log.tex', r""" \documentclass[12pt,a4paper]{article} \usepackage[T1]{fontenc} -- cgit v0.12 From b41aedfbe1c00b1fef9072c987803d3af7efac9a Mon Sep 17 00:00:00 2001 From: maiphi Date: Thu, 7 Nov 2019 21:23:03 +0100 Subject: Changelog: add details about fix of Latex log encoding issue --- src/CHANGES.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CHANGES.txt b/src/CHANGES.txt index c6312b7..5d2bd1a 100755 --- a/src/CHANGES.txt +++ b/src/CHANGES.txt @@ -8,7 +8,8 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER From Philipp Maierhöfer: - Avoid crash with UnicodeDecodeError on Python 3 when a Latex log file in - non-UTF-8 encoding (e.g. containing umlauts in Latin-1 encoding) is read. + non-UTF-8 encoding (e.g. containing umlauts in Latin-1 encoding when + the fontenc package is included with \usepackage[T1]{fontenc}) is read. From Mathew Robinson: -- cgit v0.12