diff options
author | Steve Dower <steve.dower@python.org> | 2022-06-10 10:14:25 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-10 10:14:25 (GMT) |
commit | 927b5afee73218500a8fa80df86216cfdc24ef5a (patch) | |
tree | dfe933ba27fa5ed684788f8beb1d413bc028e74e | |
parent | f384a8e477a112d241650d5d4dc4042e7cd4c969 (diff) | |
download | cpython-927b5afee73218500a8fa80df86216cfdc24ef5a.zip cpython-927b5afee73218500a8fa80df86216cfdc24ef5a.tar.gz cpython-927b5afee73218500a8fa80df86216cfdc24ef5a.tar.bz2 |
bpo-42658: Use LCMapStringEx in ntpath.normcase to match OS behaviour for case-folding (GH-93591)
* bpo-42658: Use LCMapStringEx in ntpath.normcase to match OS behaviour for case-folding (GH-32010)
* Use AsWideCharString to avoid memory leaks in deprectated unicode converter
Co-authored-by: AN Long <aisk@users.noreply.github.com>
-rw-r--r-- | Lib/ntpath.py | 42 | ||||
-rw-r--r-- | Lib/test/test_ntpath.py | 2 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst | 3 | ||||
-rw-r--r-- | Modules/_winapi.c | 79 | ||||
-rw-r--r-- | Modules/clinic/_winapi.c.h | 34 |
5 files changed, 151 insertions, 9 deletions
diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 041ebc7..73b1bd1 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -23,6 +23,7 @@ import stat import genericpath from genericpath import * + __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", @@ -41,14 +42,39 @@ def _get_bothseps(path): # Other normalizations (such as optimizing '../' away) are not done # (this is done by normpath). -def normcase(s): - """Normalize case of pathname. - - Makes all characters lowercase and all slashes into backslashes.""" - s = os.fspath(s) - if isinstance(s, bytes): - return s.replace(b'/', b'\\').lower() - else: +try: + from _winapi import ( + LCMapStringEx as _LCMapStringEx, + LOCALE_NAME_INVARIANT as _LOCALE_NAME_INVARIANT, + LCMAP_LOWERCASE as _LCMAP_LOWERCASE) + + def normcase(s): + """Normalize case of pathname. + + Makes all characters lowercase and all slashes into backslashes. + """ + s = os.fspath(s) + if not s: + return s + if isinstance(s, bytes): + encoding = sys.getfilesystemencoding() + s = s.decode(encoding, 'surrogateescape').replace('/', '\\') + s = _LCMapStringEx(_LOCALE_NAME_INVARIANT, + _LCMAP_LOWERCASE, s) + return s.encode(encoding, 'surrogateescape') + else: + return _LCMapStringEx(_LOCALE_NAME_INVARIANT, + _LCMAP_LOWERCASE, + s.replace('/', '\\')) +except ImportError: + def normcase(s): + """Normalize case of pathname. + + Makes all characters lowercase and all slashes into backslashes. + """ + s = os.fspath(s) + if isinstance(s, bytes): + return os.fsencode(os.fsdecode(s).replace('/', '\\').lower()) return s.replace('/', '\\').lower() diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 7211ed8..ab3603b 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -852,6 +852,8 @@ class PathLikeTests(NtpathTestCase): def test_path_normcase(self): self._check_function(self.path.normcase) + if sys.platform == 'win32': + self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ') def test_path_isabs(self): self._check_function(self.path.isabs) diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst new file mode 100644 index 0000000..852cc77 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst @@ -0,0 +1,3 @@ +Support native Windows case-insensitive path comparisons by using +``LCMapStringEx`` instead of :func:`str.lower` in :func:`ntpath.normcase`. +Add ``LCMapStringEx`` to the :mod:`_winapi` module. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 3e24d51..a3c30f2 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1513,6 +1513,68 @@ _winapi_PeekNamedPipe_impl(PyObject *module, HANDLE handle, int size) } /*[clinic input] +_winapi.LCMapStringEx + + locale: unicode + flags: DWORD + src: unicode + +[clinic start generated code]*/ + +static PyObject * +_winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags, + PyObject *src) +/*[clinic end generated code: output=8ea4c9d85a4a1f23 input=2fa6ebc92591731b]*/ +{ + if (flags & (LCMAP_SORTHANDLE | LCMAP_HASH | LCMAP_BYTEREV | + LCMAP_SORTKEY)) { + return PyErr_Format(PyExc_ValueError, "unsupported flags"); + } + + wchar_t *locale_ = PyUnicode_AsWideCharString(locale, NULL); + if (!locale_) { + return NULL; + } + wchar_t *src_ = PyUnicode_AsWideCharString(src, NULL); + if (!src_) { + PyMem_Free(locale_); + return NULL; + } + + int dest_size = LCMapStringEx(locale_, flags, src_, -1, NULL, 0, + NULL, NULL, 0); + if (dest_size == 0) { + PyMem_Free(locale_); + PyMem_Free(src_); + return PyErr_SetFromWindowsErr(0); + } + + wchar_t* dest = PyMem_NEW(wchar_t, dest_size); + if (dest == NULL) { + PyMem_Free(locale_); + PyMem_Free(src_); + return PyErr_NoMemory(); + } + + int nmapped = LCMapStringEx(locale_, flags, src_, -1, dest, dest_size, + NULL, NULL, 0); + if (nmapped == 0) { + DWORD error = GetLastError(); + PyMem_Free(locale_); + PyMem_Free(src_); + PyMem_DEL(dest); + return PyErr_SetFromWindowsErr(error); + } + + PyObject *ret = PyUnicode_FromWideChar(dest, dest_size - 1); + PyMem_Free(locale_); + PyMem_Free(src_); + PyMem_DEL(dest); + + return ret; +} + +/*[clinic input] _winapi.ReadFile handle: HANDLE @@ -2023,6 +2085,7 @@ static PyMethodDef winapi_functions[] = { _WINAPI_OPENFILEMAPPING_METHODDEF _WINAPI_OPENPROCESS_METHODDEF _WINAPI_PEEKNAMEDPIPE_METHODDEF + _WINAPI_LCMAPSTRINGEX_METHODDEF _WINAPI_READFILE_METHODDEF _WINAPI_SETNAMEDPIPEHANDLESTATE_METHODDEF _WINAPI_TERMINATEPROCESS_METHODDEF @@ -2160,6 +2223,22 @@ static int winapi_exec(PyObject *m) WINAPI_CONSTANT(F_DWORD, FILE_TYPE_PIPE); WINAPI_CONSTANT(F_DWORD, FILE_TYPE_REMOTE); + WINAPI_CONSTANT("u", LOCALE_NAME_INVARIANT); + WINAPI_CONSTANT(F_DWORD, LOCALE_NAME_MAX_LENGTH); + WINAPI_CONSTANT("u", LOCALE_NAME_SYSTEM_DEFAULT); + WINAPI_CONSTANT("u", LOCALE_NAME_USER_DEFAULT); + + WINAPI_CONSTANT(F_DWORD, LCMAP_FULLWIDTH); + WINAPI_CONSTANT(F_DWORD, LCMAP_HALFWIDTH); + WINAPI_CONSTANT(F_DWORD, LCMAP_HIRAGANA); + WINAPI_CONSTANT(F_DWORD, LCMAP_KATAKANA); + WINAPI_CONSTANT(F_DWORD, LCMAP_LINGUISTIC_CASING); + WINAPI_CONSTANT(F_DWORD, LCMAP_LOWERCASE); + WINAPI_CONSTANT(F_DWORD, LCMAP_SIMPLIFIED_CHINESE); + WINAPI_CONSTANT(F_DWORD, LCMAP_TITLECASE); + WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE); + WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE); + WINAPI_CONSTANT("i", NULL); return 0; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 9c83d0b..87f624f 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -840,6 +840,38 @@ exit: return return_value; } +PyDoc_STRVAR(_winapi_LCMapStringEx__doc__, +"LCMapStringEx($module, /, locale, flags, src)\n" +"--\n" +"\n"); + +#define _WINAPI_LCMAPSTRINGEX_METHODDEF \ + {"LCMapStringEx", _PyCFunction_CAST(_winapi_LCMapStringEx), METH_FASTCALL|METH_KEYWORDS, _winapi_LCMapStringEx__doc__}, + +static PyObject * +_winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags, + PyObject *src); + +static PyObject * +_winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"locale", "flags", "src", NULL}; + static _PyArg_Parser _parser = {"UkU:LCMapStringEx", _keywords, 0}; + PyObject *locale; + DWORD flags; + PyObject *src; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &locale, &flags, &src)) { + goto exit; + } + return_value = _winapi_LCMapStringEx_impl(module, locale, flags, src); + +exit: + return return_value; +} + PyDoc_STRVAR(_winapi_ReadFile__doc__, "ReadFile($module, /, handle, size, overlapped=False)\n" "--\n" @@ -1184,4 +1216,4 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args exit: return return_value; } -/*[clinic end generated code: output=a4ede01aede352a4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=dfbccec8f11b7433 input=a9049054013a1b77]*/ |