summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorPaul Moore <p.f.moore@gmail.com>2023-01-17 19:23:06 (GMT)
committerGitHub <noreply@github.com>2023-01-17 19:23:06 (GMT)
commitf34176b77f222726d901595968a4b44456186da4 (patch)
tree5647a103b654e990894431136d3a3bffae23ad8f /Modules
parentc5660ae96f2ab5732c68c301ce9a63009f432d93 (diff)
downloadcpython-f34176b77f222726d901595968a4b44456186da4.zip
cpython-f34176b77f222726d901595968a4b44456186da4.tar.gz
cpython-f34176b77f222726d901595968a4b44456186da4.tar.bz2
gh-82052: Don't send partial UTF-8 sequences to the Windows API (GH-101103)
Don't send partial UTF-8 sequences to the Windows API
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_io/winconsoleio.c17
1 files changed, 16 insertions, 1 deletions
diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c
index d5de64b..4f41ab9 100644
--- a/Modules/_io/winconsoleio.c
+++ b/Modules/_io/winconsoleio.c
@@ -954,7 +954,7 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, Py_buffer *b)
{
BOOL res = TRUE;
wchar_t *wbuf;
- DWORD len, wlen, n = 0;
+ DWORD len, wlen, orig_len, n = 0;
HANDLE handle;
if (self->fd == -1)
@@ -984,6 +984,21 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, Py_buffer *b)
have to reduce and recalculate. */
while (wlen > 32766 / sizeof(wchar_t)) {
len /= 2;
+ orig_len = len;
+ /* Reduce the length until we hit the final byte of a UTF-8 sequence
+ * (top bit is unset). Fix for github issue 82052.
+ */
+ while (len > 0 && (((char *)b->buf)[len-1] & 0x80) != 0)
+ --len;
+ /* If we hit a length of 0, something has gone wrong. This shouldn't
+ * be possible, as valid UTF-8 can have at most 3 non-final bytes
+ * before a final one, and our buffer is way longer than that.
+ * But to be on the safe side, if we hit this issue we just restore
+ * the original length and let the console API sort it out.
+ */
+ if (len == 0) {
+ len = orig_len;
+ }
wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0);
}
Py_END_ALLOW_THREADS