summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastian Pedersen <31063917+SebastianGPedersen@users.noreply.github.com>2020-04-13 23:07:56 (GMT)
committerGitHub <noreply@github.com>2020-04-13 23:07:56 (GMT)
commita1a0eb4a394a5ac7a8422616ce1ee4125a3ef74f (patch)
treedfc13bcfb05cc9bd76cae41d04f11de1785c3bf2
parent258f5179f9494189e6a80311c86981d2a88ba9d6 (diff)
downloadcpython-a1a0eb4a394a5ac7a8422616ce1ee4125a3ef74f.zip
cpython-a1a0eb4a394a5ac7a8422616ce1ee4125a3ef74f.tar.gz
cpython-a1a0eb4a394a5ac7a8422616ce1ee4125a3ef74f.tar.bz2
bpo-39380: Change ftplib encoding from latin-1 to utf-8 (GH-18048)
Add the encoding in ftplib.FTP and ftplib.FTP_TLS to the constructor as keyword-only and change the default from "latin-1" to "utf-8" to follow RFC 2640.
-rw-r--r--Doc/library/ftplib.rst33
-rw-r--r--Doc/whatsnew/3.9.rst3
-rw-r--r--Lib/ftplib.py22
-rw-r--r--Lib/test/test_ftplib.py94
-rw-r--r--Misc/NEWS.d/next/Tests/2020-03-22-20-00-04.bpo-39380.ZXlRQU.rst3
5 files changed, 101 insertions, 54 deletions
diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst
index a4bb695..f4d4cdf 100644
--- a/Doc/library/ftplib.rst
+++ b/Doc/library/ftplib.rst
@@ -19,6 +19,8 @@ as mirroring other FTP servers. It is also used by the module
:mod:`urllib.request` to handle URLs that use FTP. For more information on FTP
(File Transfer Protocol), see Internet :rfc:`959`.
+The default encoding is UTF-8, following :rfc:`2640`.
+
Here's a sample session using the :mod:`ftplib` module::
>>> from ftplib import FTP
@@ -41,7 +43,7 @@ Here's a sample session using the :mod:`ftplib` module::
The module defines the following items:
-.. class:: FTP(host='', user='', passwd='', acct='', timeout=None, source_address=None)
+.. class:: FTP(host='', user='', passwd='', acct='', timeout=None, source_address=None, *, encoding='utf-8')
Return a new instance of the :class:`FTP` class. When *host* is given, the
method call ``connect(host)`` is made. When *user* is given, additionally
@@ -50,7 +52,8 @@ The module defines the following items:
parameter specifies a timeout in seconds for blocking operations like the
connection attempt (if is not specified, the global default timeout setting
will be used). *source_address* is a 2-tuple ``(host, port)`` for the socket
- to bind to as its source address before connecting.
+ to bind to as its source address before connecting. The *encoding* parameter
+ specifies the encoding for directories and filenames.
The :class:`FTP` class supports the :keyword:`with` statement, e.g.:
@@ -74,9 +77,11 @@ The module defines the following items:
.. versionchanged:: 3.9
If the *timeout* parameter is set to be zero, it will raise a
- :class:`ValueError` to prevent the creation of a non-blocking socket
+ :class:`ValueError` to prevent the creation of a non-blocking socket.
+ The *encoding* parameter was added, and the default was changed from
+ Latin-1 to UTF-8 to follow :rfc:`2640`.
-.. class:: FTP_TLS(host='', user='', passwd='', acct='', keyfile=None, certfile=None, context=None, timeout=None, source_address=None)
+.. class:: FTP_TLS(host='', user='', passwd='', acct='', keyfile=None, certfile=None, context=None, timeout=None, source_address=None, *, encoding='utf-8')
A :class:`FTP` subclass which adds TLS support to FTP as described in
:rfc:`4217`.
@@ -110,7 +115,9 @@ The module defines the following items:
.. versionchanged:: 3.9
If the *timeout* parameter is set to be zero, it will raise a
- :class:`ValueError` to prevent the creation of a non-blocking socket
+ :class:`ValueError` to prevent the creation of a non-blocking socket.
+ The *encoding* parameter was added, and the default was changed from
+ Latin-1 to UTF-8 to follow :rfc:`2640`.
Here's a sample session using the :class:`FTP_TLS` class::
@@ -259,9 +266,10 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
.. method:: FTP.retrlines(cmd, callback=None)
- Retrieve a file or directory listing in ASCII transfer mode. *cmd* should be
- an appropriate ``RETR`` command (see :meth:`retrbinary`) or a command such as
- ``LIST`` or ``NLST`` (usually just the string ``'LIST'``).
+ Retrieve a file or directory listing in the encoding specified by the
+ *encoding* parameter at initialization.
+ *cmd* should be an appropriate ``RETR`` command (see :meth:`retrbinary`) or
+ a command such as ``LIST`` or ``NLST`` (usually just the string ``'LIST'``).
``LIST`` retrieves a list of files and information about those files.
``NLST`` retrieves a list of file names.
The *callback* function is called for each line with a string argument
@@ -291,7 +299,7 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
.. method:: FTP.storlines(cmd, fp, callback=None)
- Store a file in ASCII transfer mode. *cmd* should be an appropriate
+ Store a file in line mode. *cmd* should be an appropriate
``STOR`` command (see :meth:`storbinary`). Lines are read until EOF from the
:term:`file object` *fp* (opened in binary mode) using its :meth:`~io.IOBase.readline`
method to provide the data to be stored. *callback* is an optional single
@@ -309,10 +317,9 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
If optional *rest* is given, a ``REST`` command is sent to the server, passing
*rest* as an argument. *rest* is usually a byte offset into the requested file,
telling the server to restart sending the file's bytes at the requested offset,
- skipping over the initial bytes. Note however that :rfc:`959` requires only that
- *rest* be a string containing characters in the printable range from ASCII code
- 33 to ASCII code 126. The :meth:`transfercmd` method, therefore, converts
- *rest* to a string, but no check is performed on the string's contents. If the
+ skipping over the initial bytes. Note however that the :meth:`transfercmd`
+ method converts *rest* to a string with the *encoding* parameter specified
+ at initialization, but no check is performed on the string's contents. If the
server does not recognize the ``REST`` command, an :exc:`error_reply` exception
will be raised. If this happens, simply call :meth:`transfercmd` without a
*rest* argument.
diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst
index 020a869..1bbcae3 100644
--- a/Doc/whatsnew/3.9.rst
+++ b/Doc/whatsnew/3.9.rst
@@ -790,6 +790,9 @@ Changes in the Python API
environment variable when the :option:`-E` or :option:`-I` command line
options are being used.
+* The *encoding* parameter has been added to the classes :class:`ftplib.FTP` and
+ :class:`ftplib.FTP_TLS` as a keyword-only parameter, and the default encoding
+ is changed from Latin-1 to UTF-8 to follow :rfc:`2640`.
CPython bytecode changes
------------------------
diff --git a/Lib/ftplib.py b/Lib/ftplib.py
index 71b3c28..1f760ed 100644
--- a/Lib/ftplib.py
+++ b/Lib/ftplib.py
@@ -75,13 +75,14 @@ class FTP:
'''An FTP client class.
To create a connection, call the class using these arguments:
- host, user, passwd, acct, timeout
+ host, user, passwd, acct, timeout, source_address, encoding
The first four arguments are all strings, and have default value ''.
- timeout must be numeric and defaults to None if not passed,
- meaning that no timeout will be set on any ftp socket(s)
+ The parameter ´timeout´ must be numeric and defaults to None if not
+ passed, meaning that no timeout will be set on any ftp socket(s).
If a timeout is passed, then this is now the default timeout for all ftp
socket operations for this instance.
+ The last parameter is the encoding of filenames, which defaults to utf-8.
Then use self.connect() with optional host and port argument.
@@ -102,15 +103,16 @@ class FTP:
file = None
welcome = None
passiveserver = 1
- encoding = "latin-1"
def __init__(self, host='', user='', passwd='', acct='',
- timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+ timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None, *,
+ encoding='utf-8'):
"""Initialization method (called by class instantiation).
Initialize host to localhost, port to standard ftp port.
Optional arguments are host (for connect()),
and user, passwd, acct (for login()).
"""
+ self.encoding = encoding
self.source_address = source_address
self.timeout = timeout
if host:
@@ -706,9 +708,10 @@ else:
'''
ssl_version = ssl.PROTOCOL_TLS_CLIENT
- def __init__(self, host='', user='', passwd='', acct='', keyfile=None,
- certfile=None, context=None,
- timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+ def __init__(self, host='', user='', passwd='', acct='',
+ keyfile=None, certfile=None, context=None,
+ timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None, *,
+ encoding='utf-8'):
if context is not None and keyfile is not None:
raise ValueError("context and keyfile arguments are mutually "
"exclusive")
@@ -727,7 +730,8 @@ else:
keyfile=keyfile)
self.context = context
self._prot_p = False
- super().__init__(host, user, passwd, acct, timeout, source_address)
+ super().__init__(host, user, passwd, acct,
+ timeout, source_address, encoding=encoding)
def login(self, user='', passwd='', acct='', secure=True):
if secure and not isinstance(self.sock, ssl.SSLSocket):
diff --git a/Lib/test/test_ftplib.py b/Lib/test/test_ftplib.py
index f40f3a4..cf30a3d 100644
--- a/Lib/test/test_ftplib.py
+++ b/Lib/test/test_ftplib.py
@@ -22,11 +22,12 @@ from test import support
from test.support import HOST, HOSTv6
TIMEOUT = support.LOOPBACK_TIMEOUT
+DEFAULT_ENCODING = 'utf-8'
# the dummy data returned by server over the data channel when
# RETR, LIST, NLST, MLSD commands are issued
-RETR_DATA = 'abcde12345\r\n' * 1000
-LIST_DATA = 'foo\r\nbar\r\n'
-NLST_DATA = 'foo\r\nbar\r\n'
+RETR_DATA = 'abcde12345\r\n' * 1000 + 'non-ascii char \xAE\r\n'
+LIST_DATA = 'foo\r\nbar\r\n non-ascii char \xAE\r\n'
+NLST_DATA = 'foo\r\nbar\r\n non-ascii char \xAE\r\n'
MLSD_DATA = ("type=cdir;perm=el;unique==keVO1+ZF4; test\r\n"
"type=pdir;perm=e;unique==keVO1+d?3; ..\r\n"
"type=OS.unix=slink:/foobar;perm=;unique==keVO1+4G4; foobar\r\n"
@@ -41,7 +42,9 @@ MLSD_DATA = ("type=cdir;perm=el;unique==keVO1+ZF4; test\r\n"
"type=dir;perm=cpmel;unique==keVO1+7G4; incoming\r\n"
"type=file;perm=r;unique==keVO1+1G4; file2\r\n"
"type=file;perm=r;unique==keVO1+1G4; file3\r\n"
- "type=file;perm=r;unique==keVO1+1G4; file4\r\n")
+ "type=file;perm=r;unique==keVO1+1G4; file4\r\n"
+ "type=dir;perm=cpmel;unique==SGP1; dir \xAE non-ascii char\r\n"
+ "type=file;perm=r;unique==SGP2; file \xAE non-ascii char\r\n")
class DummyDTPHandler(asynchat.async_chat):
@@ -51,9 +54,11 @@ class DummyDTPHandler(asynchat.async_chat):
asynchat.async_chat.__init__(self, conn)
self.baseclass = baseclass
self.baseclass.last_received_data = ''
+ self.encoding = baseclass.encoding
def handle_read(self):
- self.baseclass.last_received_data += self.recv(1024).decode('ascii')
+ new_data = self.recv(1024).decode(self.encoding, 'replace')
+ self.baseclass.last_received_data += new_data
def handle_close(self):
# XXX: this method can be called many times in a row for a single
@@ -70,7 +75,7 @@ class DummyDTPHandler(asynchat.async_chat):
self.baseclass.next_data = None
if not what:
return self.close_when_done()
- super(DummyDTPHandler, self).push(what.encode('ascii'))
+ super(DummyDTPHandler, self).push(what.encode(self.encoding))
def handle_error(self):
raise Exception
@@ -80,7 +85,7 @@ class DummyFTPHandler(asynchat.async_chat):
dtp_handler = DummyDTPHandler
- def __init__(self, conn):
+ def __init__(self, conn, encoding=DEFAULT_ENCODING):
asynchat.async_chat.__init__(self, conn)
# tells the socket to handle urgent data inline (ABOR command)
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_OOBINLINE, 1)
@@ -94,12 +99,13 @@ class DummyFTPHandler(asynchat.async_chat):
self.rest = None
self.next_retr_data = RETR_DATA
self.push('220 welcome')
+ self.encoding = encoding
def collect_incoming_data(self, data):
self.in_buffer.append(data)
def found_terminator(self):
- line = b''.join(self.in_buffer).decode('ascii')
+ line = b''.join(self.in_buffer).decode(self.encoding)
self.in_buffer = []
if self.next_response:
self.push(self.next_response)
@@ -121,7 +127,7 @@ class DummyFTPHandler(asynchat.async_chat):
raise Exception
def push(self, data):
- asynchat.async_chat.push(self, data.encode('ascii') + b'\r\n')
+ asynchat.async_chat.push(self, data.encode(self.encoding) + b'\r\n')
def cmd_port(self, arg):
addr = list(map(int, arg.split(',')))
@@ -251,7 +257,7 @@ class DummyFTPServer(asyncore.dispatcher, threading.Thread):
handler = DummyFTPHandler
- def __init__(self, address, af=socket.AF_INET):
+ def __init__(self, address, af=socket.AF_INET, encoding=DEFAULT_ENCODING):
threading.Thread.__init__(self)
asyncore.dispatcher.__init__(self)
self.daemon = True
@@ -262,6 +268,7 @@ class DummyFTPServer(asyncore.dispatcher, threading.Thread):
self.active_lock = threading.Lock()
self.host, self.port = self.socket.getsockname()[:2]
self.handler_instance = None
+ self.encoding = encoding
def start(self):
assert not self.active
@@ -284,7 +291,7 @@ class DummyFTPServer(asyncore.dispatcher, threading.Thread):
self.join()
def handle_accepted(self, conn, addr):
- self.handler_instance = self.handler(conn)
+ self.handler_instance = self.handler(conn, encoding=self.encoding)
def handle_connect(self):
self.close()
@@ -421,8 +428,8 @@ if ssl is not None:
dtp_handler = DummyTLS_DTPHandler
- def __init__(self, conn):
- DummyFTPHandler.__init__(self, conn)
+ def __init__(self, conn, encoding=DEFAULT_ENCODING):
+ DummyFTPHandler.__init__(self, conn, encoding=encoding)
self.secure_data_channel = False
self._ccc = False
@@ -462,10 +469,10 @@ if ssl is not None:
class TestFTPClass(TestCase):
- def setUp(self):
- self.server = DummyFTPServer((HOST, 0))
+ def setUp(self, encoding=DEFAULT_ENCODING):
+ self.server = DummyFTPServer((HOST, 0), encoding=encoding)
self.server.start()
- self.client = ftplib.FTP(timeout=TIMEOUT)
+ self.client = ftplib.FTP(timeout=TIMEOUT, encoding=encoding)
self.client.connect(self.server.host, self.server.port)
def tearDown(self):
@@ -565,14 +572,14 @@ class TestFTPClass(TestCase):
def test_retrbinary(self):
def callback(data):
- received.append(data.decode('ascii'))
+ received.append(data.decode(self.client.encoding))
received = []
self.client.retrbinary('retr', callback)
self.check_data(''.join(received), RETR_DATA)
def test_retrbinary_rest(self):
def callback(data):
- received.append(data.decode('ascii'))
+ received.append(data.decode(self.client.encoding))
for rest in (0, 10, 20):
received = []
self.client.retrbinary('retr', callback, rest=rest)
@@ -584,7 +591,7 @@ class TestFTPClass(TestCase):
self.check_data(''.join(received), RETR_DATA.replace('\r\n', ''))
def test_storbinary(self):
- f = io.BytesIO(RETR_DATA.encode('ascii'))
+ f = io.BytesIO(RETR_DATA.encode(self.client.encoding))
self.client.storbinary('stor', f)
self.check_data(self.server.handler_instance.last_received_data, RETR_DATA)
# test new callback arg
@@ -594,14 +601,16 @@ class TestFTPClass(TestCase):
self.assertTrue(flag)
def test_storbinary_rest(self):
- f = io.BytesIO(RETR_DATA.replace('\r\n', '\n').encode('ascii'))
+ data = RETR_DATA.replace('\r\n', '\n').encode(self.client.encoding)
+ f = io.BytesIO(data)
for r in (30, '30'):
f.seek(0)
self.client.storbinary('stor', f, rest=r)
self.assertEqual(self.server.handler_instance.rest, str(r))
def test_storlines(self):
- f = io.BytesIO(RETR_DATA.replace('\r\n', '\n').encode('ascii'))
+ data = RETR_DATA.replace('\r\n', '\n').encode(self.client.encoding)
+ f = io.BytesIO(data)
self.client.storlines('stor', f)
self.check_data(self.server.handler_instance.last_received_data, RETR_DATA)
# test new callback arg
@@ -790,14 +799,32 @@ class TestFTPClass(TestCase):
f = io.BytesIO(b'x' * self.client.maxline * 2)
self.assertRaises(ftplib.Error, self.client.storlines, 'stor', f)
+ def test_encoding_param(self):
+ encodings = ['latin-1', 'utf-8']
+ for encoding in encodings:
+ with self.subTest(encoding=encoding):
+ self.tearDown()
+ self.setUp(encoding=encoding)
+ self.assertEqual(encoding, self.client.encoding)
+ self.test_retrbinary()
+ self.test_storbinary()
+ self.test_retrlines()
+ new_dir = self.client.mkd('/non-ascii dir \xAE')
+ self.check_data(new_dir, '/non-ascii dir \xAE')
+ # Check default encoding
+ client = ftplib.FTP(timeout=TIMEOUT)
+ self.assertEqual(DEFAULT_ENCODING, client.encoding)
+
@skipUnless(support.IPV6_ENABLED, "IPv6 not enabled")
class TestIPv6Environment(TestCase):
def setUp(self):
- self.server = DummyFTPServer((HOSTv6, 0), af=socket.AF_INET6)
+ self.server = DummyFTPServer((HOSTv6, 0),
+ af=socket.AF_INET6,
+ encoding=DEFAULT_ENCODING)
self.server.start()
- self.client = ftplib.FTP(timeout=TIMEOUT)
+ self.client = ftplib.FTP(timeout=TIMEOUT, encoding=DEFAULT_ENCODING)
self.client.connect(self.server.host, self.server.port)
def tearDown(self):
@@ -824,7 +851,7 @@ class TestIPv6Environment(TestCase):
def test_transfer(self):
def retr():
def callback(data):
- received.append(data.decode('ascii'))
+ received.append(data.decode(self.client.encoding))
received = []
self.client.retrbinary('retr', callback)
self.assertEqual(len(''.join(received)), len(RETR_DATA))
@@ -841,10 +868,10 @@ class TestTLS_FTPClassMixin(TestFTPClass):
and data connections first.
"""
- def setUp(self):
- self.server = DummyTLS_FTPServer((HOST, 0))
+ def setUp(self, encoding=DEFAULT_ENCODING):
+ self.server = DummyTLS_FTPServer((HOST, 0), encoding=encoding)
self.server.start()
- self.client = ftplib.FTP_TLS(timeout=TIMEOUT)
+ self.client = ftplib.FTP_TLS(timeout=TIMEOUT, encoding=encoding)
self.client.connect(self.server.host, self.server.port)
# enable TLS
self.client.auth()
@@ -855,8 +882,8 @@ class TestTLS_FTPClassMixin(TestFTPClass):
class TestTLS_FTPClass(TestCase):
"""Specific TLS_FTP class tests."""
- def setUp(self):
- self.server = DummyTLS_FTPServer((HOST, 0))
+ def setUp(self, encoding=DEFAULT_ENCODING):
+ self.server = DummyTLS_FTPServer((HOST, 0), encoding=encoding)
self.server.start()
self.client = ftplib.FTP_TLS(timeout=TIMEOUT)
self.client.connect(self.server.host, self.server.port)
@@ -877,7 +904,8 @@ class TestTLS_FTPClass(TestCase):
# clear text
with self.client.transfercmd('list') as sock:
self.assertNotIsInstance(sock, ssl.SSLSocket)
- self.assertEqual(sock.recv(1024), LIST_DATA.encode('ascii'))
+ self.assertEqual(sock.recv(1024),
+ LIST_DATA.encode(self.client.encoding))
self.assertEqual(self.client.voidresp(), "226 transfer complete")
# secured, after PROT P
@@ -886,14 +914,16 @@ class TestTLS_FTPClass(TestCase):
self.assertIsInstance(sock, ssl.SSLSocket)
# consume from SSL socket to finalize handshake and avoid
# "SSLError [SSL] shutdown while in init"
- self.assertEqual(sock.recv(1024), LIST_DATA.encode('ascii'))
+ self.assertEqual(sock.recv(1024),
+ LIST_DATA.encode(self.client.encoding))
self.assertEqual(self.client.voidresp(), "226 transfer complete")
# PROT C is issued, the connection must be in cleartext again
self.client.prot_c()
with self.client.transfercmd('list') as sock:
self.assertNotIsInstance(sock, ssl.SSLSocket)
- self.assertEqual(sock.recv(1024), LIST_DATA.encode('ascii'))
+ self.assertEqual(sock.recv(1024),
+ LIST_DATA.encode(self.client.encoding))
self.assertEqual(self.client.voidresp(), "226 transfer complete")
def test_login(self):
diff --git a/Misc/NEWS.d/next/Tests/2020-03-22-20-00-04.bpo-39380.ZXlRQU.rst b/Misc/NEWS.d/next/Tests/2020-03-22-20-00-04.bpo-39380.ZXlRQU.rst
new file mode 100644
index 0000000..1ac9ead
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2020-03-22-20-00-04.bpo-39380.ZXlRQU.rst
@@ -0,0 +1,3 @@
+Add the encoding in :class:`ftplib.FTP` and :class:`ftplib.FTP_TLS` to the
+constructor as keyword-only and change the default from ``latin-1`` to ``utf-8``
+to follow :rfc:`2640`.