summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2022-06-23 03:09:57 (GMT)
committerGitHub <noreply@github.com>2022-06-23 03:09:57 (GMT)
commit9877f4c6249ac7f374dc48beaf21ea2bf3ee6996 (patch)
treeeffea480b2dc3d7f87af00f3b8ee97c49f9e6a50
parent576dd901170af30fc50b0a7f07a388b38fd724a9 (diff)
downloadcpython-9877f4c6249ac7f374dc48beaf21ea2bf3ee6996.zip
cpython-9877f4c6249ac7f374dc48beaf21ea2bf3ee6996.tar.gz
cpython-9877f4c6249ac7f374dc48beaf21ea2bf3ee6996.tar.bz2
gh-85308: argparse: Use filesystem encoding for arguments file (GH-93277)
-rw-r--r--Doc/library/argparse.rst11
-rw-r--r--Doc/whatsnew/3.12.rst6
-rw-r--r--Lib/argparse.py4
-rw-r--r--Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst4
4 files changed, 23 insertions, 2 deletions
diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst
index 0e62e99..b2fa0b3 100644
--- a/Doc/library/argparse.rst
+++ b/Doc/library/argparse.rst
@@ -562,7 +562,7 @@ at the command line. If the ``fromfile_prefix_chars=`` argument is given to the
specified characters will be treated as files, and will be replaced by the
arguments they contain. For example::
- >>> with open('args.txt', 'w') as fp:
+ >>> with open('args.txt', 'w', encoding=sys.getfilesystemencoding()) as fp:
... fp.write('-f\nbar')
>>> parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
>>> parser.add_argument('-f')
@@ -575,9 +575,18 @@ were in the same place as the original file referencing argument on the command
line. So in the example above, the expression ``['-f', 'foo', '@args.txt']``
is considered equivalent to the expression ``['-f', 'foo', '-f', 'bar']``.
+:class:`ArgumentParser` uses :term:`filesystem encoding and error handler`
+to read the file containing arguments.
+
The ``fromfile_prefix_chars=`` argument defaults to ``None``, meaning that
arguments will never be treated as file references.
+.. versionchanged:: 3.12
+ :class:`ArgumentParser` changed encoding and errors to read arguments files
+ from default (e.g. :func:`locale.getpreferredencoding(False)` and
+ ``"strict"``) to :term:`filesystem encoding and error handler`.
+ Arguments file should be encoded in UTF-8 instead of ANSI Codepage on Windows.
+
argument_default
^^^^^^^^^^^^^^^^
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index 2439479..8dde135 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -233,6 +233,12 @@ Changes in the Python API
select from a larger range than ``randrange(10**25)``.
(Originally suggested by Serhiy Storchaka gh-86388.)
+* :class:`argparse.ArgumentParser` changed encoding and error handler
+ for reading arguments from file (e.g. ``fromfile_prefix_chars`` option)
+ from default text encoding (e.g. :func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`)
+ to :term:`filesystem encoding and error handler`.
+ Argument files should be encoded in UTF-8 instead of ANSI Codepage on Windows.
+
Build Changes
=============
diff --git a/Lib/argparse.py b/Lib/argparse.py
index 1c5520c..02e98bb 100644
--- a/Lib/argparse.py
+++ b/Lib/argparse.py
@@ -2161,7 +2161,9 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
# replace arguments referencing files with the file content
else:
try:
- with open(arg_string[1:]) as args_file:
+ with open(arg_string[1:],
+ encoding=_sys.getfilesystemencoding(),
+ errors=_sys.getfilesystemencodeerrors()) as args_file:
arg_strings = []
for arg_line in args_file.read().splitlines():
for arg in self.convert_arg_line_to_args(arg_line):
diff --git a/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst b/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst
new file mode 100644
index 0000000..4574264
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst
@@ -0,0 +1,4 @@
+Changed :class:`argparse.ArgumentParser` to use :term:`filesystem encoding
+and error handler` instead of default text encoding to read arguments from
+file (e.g. ``fromfile_prefix_chars`` option). This change affects Windows;
+argument file should be encoded with UTF-8 instead of ANSI Codepage.