From 01ba7df49966eaf14f44962a77898840c70dda96 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Tue, 18 Feb 2025 13:59:34 +0000 Subject: gh-44827: Improve error if BOM on first line of .po file (GH-130187) --- Lib/test/test_tools/test_msgfmt.py | 8 ++++++++ Tools/i18n/msgfmt.py | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/Lib/test/test_tools/test_msgfmt.py b/Lib/test/test_tools/test_msgfmt.py index e3e3035..a6073b8b 100644 --- a/Lib/test/test_tools/test_msgfmt.py +++ b/Lib/test/test_tools/test_msgfmt.py @@ -39,6 +39,14 @@ class CompilationTest(unittest.TestCase): self.assertDictEqual(actual._catalog, expected._catalog) + def test_po_with_bom(self): + with temp_cwd(): + Path('bom.po').write_bytes(b'\xef\xbb\xbfmsgid "Python"\nmsgstr "Pioton"\n') + + res = assert_python_failure(msgfmt, 'bom.po') + err = res.err.decode('utf-8') + self.assertIn('The file bom.po starts with a UTF-8 BOM', err) + def test_invalid_msgid_plural(self): with temp_cwd(): Path('invalid.po').write_text('''\ diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index 3f731e9..f005c4e 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -32,9 +32,11 @@ import getopt import struct import array from email.parser import HeaderParser +import codecs __version__ = "1.2" + MESSAGES = {} @@ -116,6 +118,14 @@ def make(filename, outfile): print(msg, file=sys.stderr) sys.exit(1) + if lines[0].startswith(codecs.BOM_UTF8): + print( + f"The file {infile} starts with a UTF-8 BOM which is not allowed in .po files.\n" + "Please save the file without a BOM and try again.", + file=sys.stderr + ) + sys.exit(1) + section = msgctxt = None fuzzy = 0 -- cgit v0.12