From 3256b178ed31ee8ed0c04a6e53f67d1ef96cb746 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 7 Jun 2022 07:47:27 +0800 Subject: bpo-42658: Use LCMapStringEx in ntpath.normcase to match OS behaviour for case-folding (GH-32010) --- Lib/ntpath.py | 42 ++++++++++++--- Lib/test/test_ntpath.py | 2 + .../2022-03-20-15-47-35.bpo-42658.16eXtb.rst | 3 ++ Modules/_winapi.c | 61 ++++++++++++++++++++++ Modules/clinic/_winapi.c.h | 39 +++++++++++++- 5 files changed, 138 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 041ebc7..73b1bd1 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -23,6 +23,7 @@ import stat import genericpath from genericpath import * + __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", @@ -41,14 +42,39 @@ def _get_bothseps(path): # Other normalizations (such as optimizing '../' away) are not done # (this is done by normpath). -def normcase(s): - """Normalize case of pathname. - - Makes all characters lowercase and all slashes into backslashes.""" - s = os.fspath(s) - if isinstance(s, bytes): - return s.replace(b'/', b'\\').lower() - else: +try: + from _winapi import ( + LCMapStringEx as _LCMapStringEx, + LOCALE_NAME_INVARIANT as _LOCALE_NAME_INVARIANT, + LCMAP_LOWERCASE as _LCMAP_LOWERCASE) + + def normcase(s): + """Normalize case of pathname. + + Makes all characters lowercase and all slashes into backslashes. + """ + s = os.fspath(s) + if not s: + return s + if isinstance(s, bytes): + encoding = sys.getfilesystemencoding() + s = s.decode(encoding, 'surrogateescape').replace('/', '\\') + s = _LCMapStringEx(_LOCALE_NAME_INVARIANT, + _LCMAP_LOWERCASE, s) + return s.encode(encoding, 'surrogateescape') + else: + return _LCMapStringEx(_LOCALE_NAME_INVARIANT, + _LCMAP_LOWERCASE, + s.replace('/', '\\')) +except ImportError: + def normcase(s): + """Normalize case of pathname. + + Makes all characters lowercase and all slashes into backslashes. + """ + s = os.fspath(s) + if isinstance(s, bytes): + return os.fsencode(os.fsdecode(s).replace('/', '\\').lower()) return s.replace('/', '\\').lower() diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 7211ed8..ab3603b 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -852,6 +852,8 @@ class PathLikeTests(NtpathTestCase): def test_path_normcase(self): self._check_function(self.path.normcase) + if sys.platform == 'win32': + self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ') def test_path_isabs(self): self._check_function(self.path.isabs) diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst new file mode 100644 index 0000000..852cc77 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst @@ -0,0 +1,3 @@ +Support native Windows case-insensitive path comparisons by using +``LCMapStringEx`` instead of :func:`str.lower` in :func:`ntpath.normcase`. +Add ``LCMapStringEx`` to the :mod:`_winapi` module. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 3e24d51..4845b4e 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1513,6 +1513,50 @@ _winapi_PeekNamedPipe_impl(PyObject *module, HANDLE handle, int size) } /*[clinic input] +_winapi.LCMapStringEx + + locale: LPCWSTR + flags: DWORD + src: LPCWSTR + +[clinic start generated code]*/ + +static PyObject * +_winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, + LPCWSTR src) +/*[clinic end generated code: output=cf4713d80e2b47c9 input=9fe26f95d5ab0001]*/ +{ + if (flags & (LCMAP_SORTHANDLE | LCMAP_HASH | LCMAP_BYTEREV | + LCMAP_SORTKEY)) { + return PyErr_Format(PyExc_ValueError, "unsupported flags"); + } + + int dest_size = LCMapStringEx(locale, flags, src, -1, NULL, 0, + NULL, NULL, 0); + if (dest_size == 0) { + return PyErr_SetFromWindowsErr(0); + } + + wchar_t* dest = PyMem_NEW(wchar_t, dest_size); + if (dest == NULL) { + return PyErr_NoMemory(); + } + + int nmapped = LCMapStringEx(locale, flags, src, -1, dest, dest_size, + NULL, NULL, 0); + if (nmapped == 0) { + DWORD error = GetLastError(); + PyMem_DEL(dest); + return PyErr_SetFromWindowsErr(error); + } + + PyObject *ret = PyUnicode_FromWideChar(dest, dest_size - 1); + PyMem_DEL(dest); + + return ret; +} + +/*[clinic input] _winapi.ReadFile handle: HANDLE @@ -2023,6 +2067,7 @@ static PyMethodDef winapi_functions[] = { _WINAPI_OPENFILEMAPPING_METHODDEF _WINAPI_OPENPROCESS_METHODDEF _WINAPI_PEEKNAMEDPIPE_METHODDEF + _WINAPI_LCMAPSTRINGEX_METHODDEF _WINAPI_READFILE_METHODDEF _WINAPI_SETNAMEDPIPEHANDLESTATE_METHODDEF _WINAPI_TERMINATEPROCESS_METHODDEF @@ -2160,6 +2205,22 @@ static int winapi_exec(PyObject *m) WINAPI_CONSTANT(F_DWORD, FILE_TYPE_PIPE); WINAPI_CONSTANT(F_DWORD, FILE_TYPE_REMOTE); + WINAPI_CONSTANT("u", LOCALE_NAME_INVARIANT); + WINAPI_CONSTANT(F_DWORD, LOCALE_NAME_MAX_LENGTH); + WINAPI_CONSTANT("u", LOCALE_NAME_SYSTEM_DEFAULT); + WINAPI_CONSTANT("u", LOCALE_NAME_USER_DEFAULT); + + WINAPI_CONSTANT(F_DWORD, LCMAP_FULLWIDTH); + WINAPI_CONSTANT(F_DWORD, LCMAP_HALFWIDTH); + WINAPI_CONSTANT(F_DWORD, LCMAP_HIRAGANA); + WINAPI_CONSTANT(F_DWORD, LCMAP_KATAKANA); + WINAPI_CONSTANT(F_DWORD, LCMAP_LINGUISTIC_CASING); + WINAPI_CONSTANT(F_DWORD, LCMAP_LOWERCASE); + WINAPI_CONSTANT(F_DWORD, LCMAP_SIMPLIFIED_CHINESE); + WINAPI_CONSTANT(F_DWORD, LCMAP_TITLECASE); + WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE); + WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE); + WINAPI_CONSTANT("i", NULL); return 0; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 4d89888..486029a 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -820,6 +820,43 @@ exit: return return_value; } +PyDoc_STRVAR(_winapi_LCMapStringEx__doc__, +"LCMapStringEx($module, /, locale, flags, src)\n" +"--\n" +"\n"); + +#define _WINAPI_LCMAPSTRINGEX_METHODDEF \ + {"LCMapStringEx", _PyCFunction_CAST(_winapi_LCMapStringEx), METH_FASTCALL|METH_KEYWORDS, _winapi_LCMapStringEx__doc__}, + +static PyObject * +_winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, + LPCWSTR src); + +static PyObject * +_winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"locale", "flags", "src", NULL}; + static _PyArg_Parser _parser = {"O&kO&:LCMapStringEx", _keywords, 0}; + LPCWSTR locale; + DWORD flags; + LPCWSTR src; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + _PyUnicode_WideCharString_Converter, &locale, &flags, _PyUnicode_WideCharString_Converter, &src)) { + goto exit; + } + return_value = _winapi_LCMapStringEx_impl(module, locale, flags, src); + +exit: + /* Cleanup for locale */ + PyMem_Free((void *)locale); + /* Cleanup for src */ + PyMem_Free((void *)src); + + return return_value; +} + PyDoc_STRVAR(_winapi_ReadFile__doc__, "ReadFile($module, /, handle, size, overlapped=False)\n" "--\n" @@ -1164,4 +1201,4 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args exit: return return_value; } -/*[clinic end generated code: output=b007dde2e7f2fff8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=6cdefec63a1d7f12 input=a9049054013a1b77]*/ -- cgit v0.12