diff options
author | Christian Heimes <christian@python.org> | 2023-05-20 23:33:09 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-20 23:33:09 (GMT) |
commit | 3ed57e4995d9f8583083483f397ddc3131720953 (patch) | |
tree | cea152a7d52840c5a958db40c9f543d3fa5d0d8c /Lib | |
parent | 12f1581b0c43f40a792c188e17d2dea2357debb3 (diff) | |
download | cpython-3ed57e4995d9f8583083483f397ddc3131720953.zip cpython-3ed57e4995d9f8583083483f397ddc3131720953.tar.gz cpython-3ed57e4995d9f8583083483f397ddc3131720953.tar.bz2 |
gh-61460: Stronger HMAC in multiprocessing (#20380)
bpo-17258: `multiprocessing` now supports stronger HMAC algorithms for inter-process connection authentication rather than only HMAC-MD5.
Signed-off-by: Christian Heimes <christian@python.org>
gpshead: I Reworked to be more robust while keeping the idea.
The protocol modification idea remains, but we now take advantage of the
message length as an indicator of legacy vs modern protocol version. No
more regular expression usage. We now default to HMAC-SHA256, but do so
in a way that will be compatible when communicating with older clients
or older servers. No protocol transition period is needed.
More integration tests to verify these claims remain true are required. I'm
unaware of anyone depending on multiprocessing connections between
different Python versions.
---------
Signed-off-by: Christian Heimes <christian@python.org>
Co-authored-by: Gregory P. Smith [Google] <greg@krypto.org>
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/multiprocessing/connection.py | 182 | ||||
-rw-r--r-- | Lib/test/_test_multiprocessing.py | 57 |
2 files changed, 198 insertions, 41 deletions
diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index 1a8822b..04eaea8 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -722,11 +722,11 @@ if sys.platform == 'win32': # Authentication stuff # -MESSAGE_LENGTH = 20 +MESSAGE_LENGTH = 40 # MUST be > 20 -CHALLENGE = b'#CHALLENGE#' -WELCOME = b'#WELCOME#' -FAILURE = b'#FAILURE#' +_CHALLENGE = b'#CHALLENGE#' +_WELCOME = b'#WELCOME#' +_FAILURE = b'#FAILURE#' # multiprocessing.connection Authentication Handshake Protocol Description # (as documented for reference after reading the existing code) @@ -750,7 +750,12 @@ FAILURE = b'#FAILURE#' # ------------------------------ --------------------------------------- # 0. Open a connection on the pipe. # 1. Accept connection. -# 2. New random 20 bytes -> MESSAGE +# 2. Random 20+ bytes -> MESSAGE +# Modern servers always send +# more than 20 bytes and include +# a {digest} prefix on it with +# their preferred HMAC digest. +# Legacy ones send ==20 bytes. # 3. send 4 byte length (net order) # prefix followed by: # b'#CHALLENGE#' + MESSAGE @@ -763,14 +768,32 @@ FAILURE = b'#FAILURE#' # 6. Assert that M1 starts with: # b'#CHALLENGE#' # 7. Strip that prefix from M1 into -> M2 -# 8. Compute HMAC-MD5 of AUTHKEY, M2 -> C_DIGEST +# 7.1. Parse M2: if it is exactly 20 bytes in +# length this indicates a legacy server +# supporting only HMAC-MD5. Otherwise the +# 7.2. preferred digest is looked up from an +# expected "{digest}" prefix on M2. No prefix +# or unsupported digest? <- AuthenticationError +# 7.3. Put divined algorithm name in -> D_NAME +# 8. Compute HMAC-D_NAME of AUTHKEY, M2 -> C_DIGEST # 9. Send 4 byte length prefix (net order) # followed by C_DIGEST bytes. -# 10. Compute HMAC-MD5 of AUTHKEY, -# MESSAGE into -> M_DIGEST. -# 11. Receive 4 or 4+8 byte length +# 10. Receive 4 or 4+8 byte length # prefix (#4 dance) -> SIZE. -# 12. Receive min(SIZE, 256) -> C_D. +# 11. Receive min(SIZE, 256) -> C_D. +# 11.1. Parse C_D: legacy servers +# accept it as is, "md5" -> D_NAME +# 11.2. modern servers check the length +# of C_D, IF it is 16 bytes? +# 11.2.1. "md5" -> D_NAME +# and skip to step 12. +# 11.3. longer? expect and parse a "{digest}" +# prefix into -> D_NAME. +# Strip the prefix and store remaining +# bytes in -> C_D. +# 11.4. Don't like D_NAME? <- AuthenticationError +# 12. Compute HMAC-D_NAME of AUTHKEY, +# MESSAGE into -> M_DIGEST. # 13. Compare M_DIGEST == C_D: # 14a: Match? Send length prefix & # b'#WELCOME#' @@ -787,42 +810,139 @@ FAILURE = b'#FAILURE#' # # If this RETURNed, the connection remains open: it has been authenticated. # -# Length prefixes are used consistently even though every step so far has -# always been a singular specific fixed length. This may help us evolve -# the protocol in the future without breaking backwards compatibility. -# -# Similarly the initial challenge message from the serving side has always -# been 20 bytes, but clients can accept a 100+ so using the length of the -# opening challenge message as an indicator of protocol version may work. +# Length prefixes are used consistently. Even on the legacy protocol, this +# was good fortune and allowed us to evolve the protocol by using the length +# of the opening challenge or length of the returned digest as a signal as +# to which protocol the other end supports. + +_ALLOWED_DIGESTS = frozenset( + {b'md5', b'sha256', b'sha384', b'sha3_256', b'sha3_384'}) +_MAX_DIGEST_LEN = max(len(_) for _ in _ALLOWED_DIGESTS) + +# Old hmac-md5 only server versions from Python <=3.11 sent a message of this +# length. It happens to not match the length of any supported digest so we can +# use a message of this length to indicate that we should work in backwards +# compatible md5-only mode without a {digest_name} prefix on our response. +_MD5ONLY_MESSAGE_LENGTH = 20 +_MD5_DIGEST_LEN = 16 +_LEGACY_LENGTHS = (_MD5ONLY_MESSAGE_LENGTH, _MD5_DIGEST_LEN) + + +def _get_digest_name_and_payload(message: bytes) -> (str, bytes): + """Returns a digest name and the payload for a response hash. + + If a legacy protocol is detected based on the message length + or contents the digest name returned will be empty to indicate + legacy mode where MD5 and no digest prefix should be sent. + """ + # modern message format: b"{digest}payload" longer than 20 bytes + # legacy message format: 16 or 20 byte b"payload" + if len(message) in _LEGACY_LENGTHS: + # Either this was a legacy server challenge, or we're processing + # a reply from a legacy client that sent an unprefixed 16-byte + # HMAC-MD5 response. All messages using the modern protocol will + # be longer than either of these lengths. + return '', message + if (message.startswith(b'{') and + (curly := message.find(b'}', 1, _MAX_DIGEST_LEN+2)) > 0): + digest = message[1:curly] + if digest in _ALLOWED_DIGESTS: + payload = message[curly+1:] + return digest.decode('ascii'), payload + raise AuthenticationError( + 'unsupported message length, missing digest prefix, ' + f'or unsupported digest: {message=}') + + +def _create_response(authkey, message): + """Create a MAC based on authkey and message + + The MAC algorithm defaults to HMAC-MD5, unless MD5 is not available or + the message has a '{digest_name}' prefix. For legacy HMAC-MD5, the response + is the raw MAC, otherwise the response is prefixed with '{digest_name}', + e.g. b'{sha256}abcdefg...' + + Note: The MAC protects the entire message including the digest_name prefix. + """ + import hmac + digest_name = _get_digest_name_and_payload(message)[0] + # The MAC protects the entire message: digest header and payload. + if not digest_name: + # Legacy server without a {digest} prefix on message. + # Generate a legacy non-prefixed HMAC-MD5 reply. + try: + return hmac.new(authkey, message, 'md5').digest() + except ValueError: + # HMAC-MD5 is not available (FIPS mode?), fall back to + # HMAC-SHA2-256 modern protocol. The legacy server probably + # doesn't support it and will reject us anyways. :shrug: + digest_name = 'sha256' + # Modern protocol, indicate the digest used in the reply. + response = hmac.new(authkey, message, digest_name).digest() + return b'{%s}%s' % (digest_name.encode('ascii'), response) + + +def _verify_challenge(authkey, message, response): + """Verify MAC challenge + + If our message did not include a digest_name prefix, the client is allowed + to select a stronger digest_name from _ALLOWED_DIGESTS. + + In case our message is prefixed, a client cannot downgrade to a weaker + algorithm, because the MAC is calculated over the entire message + including the '{digest_name}' prefix. + """ + import hmac + response_digest, response_mac = _get_digest_name_and_payload(response) + response_digest = response_digest or 'md5' + try: + expected = hmac.new(authkey, message, response_digest).digest() + except ValueError: + raise AuthenticationError(f'{response_digest=} unsupported') + if len(expected) != len(response_mac): + raise AuthenticationError( + f'expected {response_digest!r} of length {len(expected)} ' + f'got {len(response_mac)}') + if not hmac.compare_digest(expected, response_mac): + raise AuthenticationError('digest received was wrong') -def deliver_challenge(connection, authkey): - import hmac +def deliver_challenge(connection, authkey: bytes, digest_name='sha256'): if not isinstance(authkey, bytes): raise ValueError( "Authkey must be bytes, not {0!s}".format(type(authkey))) + assert MESSAGE_LENGTH > _MD5ONLY_MESSAGE_LENGTH, "protocol constraint" message = os.urandom(MESSAGE_LENGTH) - connection.send_bytes(CHALLENGE + message) - digest = hmac.new(authkey, message, 'md5').digest() + message = b'{%s}%s' % (digest_name.encode('ascii'), message) + # Even when sending a challenge to a legacy client that does not support + # digest prefixes, they'll take the entire thing as a challenge and + # respond to it with a raw HMAC-MD5. + connection.send_bytes(_CHALLENGE + message) response = connection.recv_bytes(256) # reject large message - if response == digest: - connection.send_bytes(WELCOME) + try: + _verify_challenge(authkey, message, response) + except AuthenticationError: + connection.send_bytes(_FAILURE) + raise else: - connection.send_bytes(FAILURE) - raise AuthenticationError('digest received was wrong') + connection.send_bytes(_WELCOME) -def answer_challenge(connection, authkey): - import hmac + +def answer_challenge(connection, authkey: bytes): if not isinstance(authkey, bytes): raise ValueError( "Authkey must be bytes, not {0!s}".format(type(authkey))) message = connection.recv_bytes(256) # reject large message - assert message[:len(CHALLENGE)] == CHALLENGE, 'message = %r' % message - message = message[len(CHALLENGE):] - digest = hmac.new(authkey, message, 'md5').digest() + if not message.startswith(_CHALLENGE): + raise AuthenticationError( + f'Protocol error, expected challenge: {message=}') + message = message[len(_CHALLENGE):] + if len(message) < _MD5ONLY_MESSAGE_LENGTH: + raise AuthenticationError('challenge too short: {len(message)} bytes') + digest = _create_response(authkey, message) connection.send_bytes(digest) response = connection.recv_bytes(256) # reject large message - if response != WELCOME: + if response != _WELCOME: raise AuthenticationError('digest sent was rejected') # diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 9a2db24..767f049 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -48,6 +48,7 @@ import multiprocessing.heap import multiprocessing.managers import multiprocessing.pool import multiprocessing.queues +from multiprocessing.connection import wait, AuthenticationError from multiprocessing import util @@ -131,8 +132,6 @@ HAVE_GETVALUE = not getattr(_multiprocessing, WIN32 = (sys.platform == "win32") -from multiprocessing.connection import wait - def wait_for_handle(handle, timeout): if timeout is not None and timeout < 0.0: timeout = None @@ -3042,7 +3041,7 @@ class _TestRemoteManager(BaseTestCase): del queue -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class _TestManagerRestart(BaseTestCase): @classmethod @@ -3531,7 +3530,7 @@ class _TestPoll(BaseTestCase): # @unittest.skipUnless(HAS_REDUCTION, "test needs multiprocessing.reduction") -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class _TestPicklingConnections(BaseTestCase): ALLOWED_TYPES = ('processes',) @@ -3834,7 +3833,7 @@ class _TestSharedCTypes(BaseTestCase): @unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory") -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class _TestSharedMemory(BaseTestCase): ALLOWED_TYPES = ('processes',) @@ -4636,7 +4635,7 @@ class TestInvalidHandle(unittest.TestCase): -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class OtherTest(unittest.TestCase): # TODO: add more tests for deliver/answer challenge. def test_deliver_challenge_auth_failure(self): @@ -4656,7 +4655,7 @@ class OtherTest(unittest.TestCase): def recv_bytes(self, size): self.count += 1 if self.count == 1: - return multiprocessing.connection.CHALLENGE + return multiprocessing.connection._CHALLENGE elif self.count == 2: return b'something bogus' return b'' @@ -4666,6 +4665,44 @@ class OtherTest(unittest.TestCase): multiprocessing.connection.answer_challenge, _FakeConnection(), b'abc') + +@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') +class ChallengeResponseTest(unittest.TestCase): + authkey = b'supadupasecretkey' + + def create_response(self, message): + return multiprocessing.connection._create_response( + self.authkey, message + ) + + def verify_challenge(self, message, response): + return multiprocessing.connection._verify_challenge( + self.authkey, message, response + ) + + def test_challengeresponse(self): + for algo in [None, "md5", "sha256"]: + with self.subTest(f"{algo=}"): + msg = b'is-twenty-bytes-long' # The length of a legacy message. + if algo: + prefix = b'{%s}' % algo.encode("ascii") + else: + prefix = b'' + msg = prefix + msg + response = self.create_response(msg) + if not response.startswith(prefix): + self.fail(response) + self.verify_challenge(msg, response) + + # TODO(gpshead): We need integration tests for handshakes between modern + # deliver_challenge() and verify_response() code and connections running a + # test-local copy of the legacy Python <=3.11 implementations. + + # TODO(gpshead): properly annotate tests for requires_hashdigest rather than + # only running these on a platform supporting everything. otherwise logic + # issues preventing it from working on FIPS mode setups will be hidden. + # # Test Manager.start()/Pool.__init__() initializer feature - see issue 5585 # @@ -4673,7 +4710,7 @@ class OtherTest(unittest.TestCase): def initializer(ns): ns.test += 1 -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class TestInitializers(unittest.TestCase): def setUp(self): self.mgr = multiprocessing.Manager() @@ -5537,7 +5574,7 @@ class TestPoolNotLeakOnFailure(unittest.TestCase): any(process.is_alive() for process in forked_processes)) -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class TestSyncManagerTypes(unittest.TestCase): """Test all the types which can be shared between a parent and a child process by using a manager which acts as an intermediary @@ -5969,7 +6006,7 @@ def install_tests_in_module_dict(remote_globs, start_method): class Temp(base, Mixin, unittest.TestCase): pass if type_ == 'manager': - Temp = hashlib_helper.requires_hashdigest('md5')(Temp) + Temp = hashlib_helper.requires_hashdigest('sha256')(Temp) Temp.__name__ = Temp.__qualname__ = newname Temp.__module__ = __module__ remote_globs[newname] = Temp |