summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/binascii.rst14
-rw-r--r--Lib/test/test_binascii.py41
-rw-r--r--Misc/NEWS.d/next/Library/2021-01-31-18-24-54.bpo-43086.2_P-SH.rst3
-rw-r--r--Modules/binascii.c52
-rw-r--r--Modules/clinic/binascii.c.h37
5 files changed, 132 insertions, 15 deletions
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst
index 2c0c1bc..fd5df69 100644
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -50,11 +50,23 @@ The :mod:`binascii` module defines the following functions:
Added the *backtick* parameter.
-.. function:: a2b_base64(string)
+.. function:: a2b_base64(string, strict_mode=False)
Convert a block of base64 data back to binary and return the binary data. More
than one line may be passed at a time.
+ If *strict_mode* is true, only valid base64 data will be converted. Invalid base64
+ data will raise :exc:`binascii.Error`.
+
+ Valid base64:
+ * Conforms to :rfc:`3548`.
+ * Contains only characters from the base64 alphabet.
+ * Contains no excess data after padding (including excess padding, newlines, etc.).
+ * Does not start with a padding.
+
+ .. versionchanged:: 3.11
+ Added the *strict_mode* parameter.
+
.. function:: b2a_base64(data, *, newline=True)
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 4d1bf2c..74438d8 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -114,6 +114,47 @@ class BinASCIITest(unittest.TestCase):
# empty strings. TBD: shouldn't it raise an exception instead ?
self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
+ def test_base64_strict_mode(self):
+ # Test base64 with strict mode on
+ def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes):
+ with self.assertRaisesRegex(binascii.Error, assert_regex):
+ binascii.a2b_base64(self.type2test(data), strict_mode=True)
+ self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False),
+ non_strict_mode_expected_result)
+ self.assertEqual(binascii.a2b_base64(self.type2test(data)),
+ non_strict_mode_expected_result)
+
+ def assertExcessData(data, non_strict_mode_expected_result: bytes):
+ _assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result)
+
+ def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
+ _assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result)
+
+ def assertMalformedPadding(data, non_strict_mode_expected_result: bytes):
+ _assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result)
+
+ # Test excess data exceptions
+ assertExcessData(b'ab==a', b'i')
+ assertExcessData(b'ab===', b'i')
+ assertExcessData(b'ab==:', b'i')
+ assertExcessData(b'abc=a', b'i\xb7')
+ assertExcessData(b'abc=:', b'i\xb7')
+ assertExcessData(b'ab==\n', b'i')
+
+ # Test non-base64 data exceptions
+ assertNonBase64Data(b'\nab==', b'i')
+ assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
+ assertNonBase64Data(b'a\nb==', b'i')
+ assertNonBase64Data(b'a\x00b==', b'i')
+
+ # Test malformed padding
+ assertMalformedPadding(b'=', b'')
+ assertMalformedPadding(b'==', b'')
+ assertMalformedPadding(b'===', b'')
+ assertMalformedPadding(b'ab=c=', b'i\xb7')
+ assertMalformedPadding(b'ab=ab==', b'i\xb6\x9b')
+
+
def test_base64errors(self):
# Test base64 with invalid padding
def assertIncorrectPadding(data):
diff --git a/Misc/NEWS.d/next/Library/2021-01-31-18-24-54.bpo-43086.2_P-SH.rst b/Misc/NEWS.d/next/Library/2021-01-31-18-24-54.bpo-43086.2_P-SH.rst
new file mode 100644
index 0000000..f49e7a8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-01-31-18-24-54.bpo-43086.2_P-SH.rst
@@ -0,0 +1,3 @@
+Added a new optional :code:`strict_mode` parameter to *binascii.a2b_base64*.
+When :code:`scrict_mode` is set to :code:`True`, the *a2b_base64* function will accept only valid base64 content.
+More details about what "valid base64 content" is, can be found in the function's documentation.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 59e4b0a..50f25b4 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -433,18 +433,26 @@ binascii.a2b_base64
data: ascii_buffer
/
+ *
+ strict_mode: bool(accept={int}) = False
Decode a line of base64 data.
+
+ strict_mode
+ When set to True, bytes that are not part of the base64 standard are not allowed.
+ The same applies to excess data after padding (= / ==).
[clinic start generated code]*/
static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
-/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
+/*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/
{
assert(data->len >= 0);
const unsigned char *ascii_data = data->buf;
size_t ascii_len = data->len;
+ binascii_state *state = NULL;
+ char padding_started = 0;
/* Allocate the buffer */
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
@@ -455,6 +463,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
return NULL;
unsigned char *bin_data_start = bin_data;
+ if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
+ malformed_padding:
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error, "Leading padding not allowed");
+ }
+ goto error_end;
+ }
+
int quad_pos = 0;
unsigned char leftchar = 0;
int pads = 0;
@@ -465,11 +482,21 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
** the invalid ones.
*/
if (this_ch == BASE64_PAD) {
+ padding_started = 1;
+
if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
- /* A pad sequence means no more input.
- ** We've already interpreted the data
- ** from the quad at this point.
+ /* A pad sequence means we should not parse more input.
+ ** We've already interpreted the data from the quad at this point.
+ ** in strict mode, an error should raise if there's excess data after the padding.
*/
+ if (strict_mode && i + 1 < ascii_len) {
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error, "Excess data after padding");
+ }
+ goto error_end;
+ }
+
goto done;
}
continue;
@@ -477,8 +504,20 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
this_ch = table_a2b_base64[this_ch];
if (this_ch >= 64) {
+ if (strict_mode) {
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error, "Only base64 data is allowed");
+ }
+ goto error_end;
+ }
continue;
}
+
+ // Characters that are not '=', in the middle of the padding, are not allowed
+ if (strict_mode && padding_started) {
+ goto malformed_padding;
+ }
pads = 0;
switch (quad_pos) {
@@ -505,7 +544,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
}
if (quad_pos != 0) {
- binascii_state *state = get_binascii_state(module);
+ state = get_binascii_state(module);
if (state == NULL) {
/* error already set, from get_binascii_state */
} else if (quad_pos == 1) {
@@ -522,6 +561,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
} else {
PyErr_SetString(state->Error, "Incorrect padding");
}
+ error_end:
_PyBytesWriter_Dealloc(&writer);
return NULL;
}
diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h
index ae1c457..a924004 100644
--- a/Modules/clinic/binascii.c.h
+++ b/Modules/clinic/binascii.c.h
@@ -87,27 +87,48 @@ exit:
}
PyDoc_STRVAR(binascii_a2b_base64__doc__,
-"a2b_base64($module, data, /)\n"
+"a2b_base64($module, data, /, *, strict_mode=False)\n"
"--\n"
"\n"
-"Decode a line of base64 data.");
+"Decode a line of base64 data.\n"
+"\n"
+" strict_mode\n"
+" When set to True, bytes that are not part of the base64 standard are not allowed.\n"
+" The same applies to excess data after padding (= / ==).");
#define BINASCII_A2B_BASE64_METHODDEF \
- {"a2b_base64", (PyCFunction)binascii_a2b_base64, METH_O, binascii_a2b_base64__doc__},
+ {"a2b_base64", (PyCFunction)(void(*)(void))binascii_a2b_base64, METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__},
static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data);
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode);
static PyObject *
-binascii_a2b_base64(PyObject *module, PyObject *arg)
+binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
+ static const char * const _keywords[] = {"", "strict_mode", NULL};
+ static _PyArg_Parser _parser = {NULL, _keywords, "a2b_base64", 0};
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
Py_buffer data = {NULL, NULL};
+ int strict_mode = 0;
- if (!ascii_buffer_converter(arg, &data)) {
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+ if (!args) {
goto exit;
}
- return_value = binascii_a2b_base64_impl(module, &data);
+ if (!ascii_buffer_converter(args[0], &data)) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_kwonly;
+ }
+ strict_mode = _PyLong_AsInt(args[1]);
+ if (strict_mode == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+skip_optional_kwonly:
+ return_value = binascii_a2b_base64_impl(module, &data, strict_mode);
exit:
/* Cleanup for data */
@@ -746,4 +767,4 @@ exit:
return return_value;
}
-/*[clinic end generated code: output=95a0178f30801b89 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=0f261ee49971f5ca input=a9049054013a1b77]*/