diff options
author | Dong-hee Na <donghee.na@python.org> | 2021-09-11 15:04:38 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-11 15:04:38 (GMT) |
commit | 9abd07e5963f966c4d6df8f4e4bf390ed8191066 (patch) | |
tree | 2f743801d5f187e0a6c241b810f9ea3f8fbf6e7f | |
parent | 97ea18ecede8bfd33d5ab2dd0e7e2aada2051111 (diff) | |
download | cpython-9abd07e5963f966c4d6df8f4e4bf390ed8191066.zip cpython-9abd07e5963f966c4d6df8f4e4bf390ed8191066.tar.gz cpython-9abd07e5963f966c4d6df8f4e4bf390ed8191066.tar.bz2 |
bpo-44987: Speed up unicode normalization of ASCII strings (GH-28283)
-rw-r--r-- | Doc/whatsnew/3.11.rst | 3 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst | 2 | ||||
-rw-r--r-- | Modules/unicodedata.c | 4 |
3 files changed, 9 insertions, 0 deletions
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 9befe8f..254d722 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -287,6 +287,9 @@ Optimizations * :file:`.pdbrc` is now read with ``'utf-8'`` encoding. +* Pure ASCII strings are now normalized in constant time by :func:`unicodedata.normalize`. + (Contributed by Dong-hee Na in :issue:`bpo-44987`.) + CPython bytecode changes ======================== diff --git a/Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst b/Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst new file mode 100644 index 0000000..dec50d8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst @@ -0,0 +1,2 @@ +Pure ASCII strings are now normalized in constant time by :func:`unicodedata.normalize`. +Patch by Dong-hee Na. diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index b4563f3..9758572 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -807,6 +807,10 @@ is_normalized_quickcheck(PyObject *self, PyObject *input, bool nfc, bool k, return NO; } + if (PyUnicode_IS_ASCII(input)) { + return YES; + } + Py_ssize_t i, len; int kind; const void *data; |