summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2024-06-24 09:45:45 (GMT)
committerGitHub <noreply@github.com>2024-06-24 09:45:45 (GMT)
commit732c00550f20e73edeac03d9e222c4a719362649 (patch)
treee1465166ee1bc4eee3000901d6465083f7d1f7a3
parent206028dba986f982a940377ab1cb8b8276301b82 (diff)
downloadcpython-732c00550f20e73edeac03d9e222c4a719362649.zip
cpython-732c00550f20e73edeac03d9e222c4a719362649.tar.gz
cpython-732c00550f20e73edeac03d9e222c4a719362649.tar.bz2
[3.13] gh-119614: Fix truncation of strings with embedded null characters in Tkinter (GH-120909) (GH-120938)
Now the null character is always represented as \xc0\x80 for Tcl_NewStringObj(). (cherry picked from commit c38e2f64d012929168dfef7363c9e48bd1a6c731) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
-rw-r--r--Lib/test/test_tcl.py24
-rw-r--r--Lib/test/test_tkinter/test_misc.py9
-rw-r--r--Misc/NEWS.d/next/Library/2024-06-23-17-50-40.gh-issue-119614.vwPGLB.rst2
-rw-r--r--Modules/_tkinter.c40
4 files changed, 68 insertions, 7 deletions
diff --git a/Lib/test/test_tcl.py b/Lib/test/test_tcl.py
index 443787d..d479f7d 100644
--- a/Lib/test/test_tcl.py
+++ b/Lib/test/test_tcl.py
@@ -73,6 +73,18 @@ class TclTest(unittest.TestCase):
tcl.call('set','a','1')
self.assertEqual(tcl.call('set','a'),'1')
+ def test_call_passing_null(self):
+ tcl = self.interp
+ tcl.call('set', 'a', 'a\0b') # ASCII-only
+ self.assertEqual(tcl.getvar('a'), 'a\x00b')
+ self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
+ self.assertEqual(tcl.eval('set a'), 'a\x00b')
+
+ tcl.call('set', 'a', '\u20ac\0') # non-ASCII
+ self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
+ self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
+ self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
+
def testCallException(self):
tcl = self.interp
self.assertRaises(TclError,tcl.call,'set','a')
@@ -98,6 +110,18 @@ class TclTest(unittest.TestCase):
tcl.setvar('a','1')
self.assertEqual(tcl.eval('set a'),'1')
+ def test_setvar_passing_null(self):
+ tcl = self.interp
+ tcl.setvar('a', 'a\0b') # ASCII-only
+ self.assertEqual(tcl.getvar('a'), 'a\x00b')
+ self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
+ self.assertEqual(tcl.eval('set a'), 'a\x00b')
+
+ tcl.setvar('a', '\u20ac\0') # non-ASCII
+ self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
+ self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
+ self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
+
def testSetVarArray(self):
tcl = self.interp
tcl.setvar('a(1)','1')
diff --git a/Lib/test/test_tkinter/test_misc.py b/Lib/test/test_tkinter/test_misc.py
index d9ea642..b0b9ed6 100644
--- a/Lib/test/test_tkinter/test_misc.py
+++ b/Lib/test/test_tkinter/test_misc.py
@@ -476,6 +476,15 @@ class MiscTest(AbstractTkTest, unittest.TestCase):
self.assertEqual(vi.micro, 0)
self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}'))
+ def test_embedded_null(self):
+ widget = tkinter.Entry(self.root)
+ widget.insert(0, 'abc\0def') # ASCII-only
+ widget.selection_range(0, 'end')
+ self.assertEqual(widget.selection_get(), 'abc\x00def')
+ widget.insert(0, '\u20ac\0') # non-ASCII
+ widget.selection_range(0, 'end')
+ self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def')
+
class WmTest(AbstractTkTest, unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2024-06-23-17-50-40.gh-issue-119614.vwPGLB.rst b/Misc/NEWS.d/next/Library/2024-06-23-17-50-40.gh-issue-119614.vwPGLB.rst
new file mode 100644
index 0000000..d518265
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-06-23-17-50-40.gh-issue-119614.vwPGLB.rst
@@ -0,0 +1,2 @@
+Fix truncation of strings with embedded null characters in some internal
+operations in :mod:`tkinter`.
diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c
index 8fe2c5b..cd3722f 100644
--- a/Modules/_tkinter.c
+++ b/Modules/_tkinter.c
@@ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value)
else
Py_UNREACHABLE();
}
-#endif
+#endif /* USE_TCL_UNICODE */
const char *s = Tcl_GetStringFromObj(value, &len);
return unicodeFromTclStringAndSize(s, len);
}
@@ -1018,7 +1018,9 @@ AsObj(PyObject *value)
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
- if (PyUnicode_IS_ASCII(value)) {
+ if (PyUnicode_IS_ASCII(value) &&
+ strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value))
+ {
return Tcl_NewStringObj((const char *)PyUnicode_DATA(value),
(int)size);
}
@@ -1033,9 +1035,6 @@ AsObj(PyObject *value)
"surrogatepass", NATIVE_BYTEORDER);
else
Py_UNREACHABLE();
-#else
- encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
-#endif
if (!encoded) {
return NULL;
}
@@ -1045,12 +1044,39 @@ AsObj(PyObject *value)
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
-#if USE_TCL_UNICODE
result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded),
(int)(size / sizeof(Tcl_UniChar)));
#else
+ encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
+ if (!encoded) {
+ return NULL;
+ }
+ size = PyBytes_GET_SIZE(encoded);
+ if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) {
+ /* The string contains embedded null characters.
+ * Tcl needs a null character to be represented as \xc0\x80 in
+ * the Modified UTF-8 encoding. Otherwise the string can be
+ * truncated in some internal operations.
+ *
+ * NOTE: stringlib_replace() could be used here, but optimizing
+ * this obscure case isn't worth it unless stringlib_replace()
+ * was already exposed in the C API for other reasons. */
+ Py_SETREF(encoded,
+ PyObject_CallMethod(encoded, "replace", "y#y#",
+ "\0", (Py_ssize_t)1,
+ "\xc0\x80", (Py_ssize_t)2));
+ if (!encoded) {
+ return NULL;
+ }
+ size = PyBytes_GET_SIZE(encoded);
+ }
+ if (size > INT_MAX) {
+ Py_DECREF(encoded);
+ PyErr_SetString(PyExc_OverflowError, "string is too long");
+ return NULL;
+ }
result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size);
-#endif
+#endif /* USE_TCL_UNICODE */
Py_DECREF(encoded);
return result;
}