summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDong-hee Na <donghee.na@python.org>2021-09-11 15:04:38 (GMT)
committerGitHub <noreply@github.com>2021-09-11 15:04:38 (GMT)
commit9abd07e5963f966c4d6df8f4e4bf390ed8191066 (patch)
tree2f743801d5f187e0a6c241b810f9ea3f8fbf6e7f
parent97ea18ecede8bfd33d5ab2dd0e7e2aada2051111 (diff)
downloadcpython-9abd07e5963f966c4d6df8f4e4bf390ed8191066.zip
cpython-9abd07e5963f966c4d6df8f4e4bf390ed8191066.tar.gz
cpython-9abd07e5963f966c4d6df8f4e4bf390ed8191066.tar.bz2
bpo-44987: Speed up unicode normalization of ASCII strings (GH-28283)
-rw-r--r--Doc/whatsnew/3.11.rst3
-rw-r--r--Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst2
-rw-r--r--Modules/unicodedata.c4
3 files changed, 9 insertions, 0 deletions
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
index 9befe8f..254d722 100644
--- a/Doc/whatsnew/3.11.rst
+++ b/Doc/whatsnew/3.11.rst
@@ -287,6 +287,9 @@ Optimizations
* :file:`.pdbrc` is now read with ``'utf-8'`` encoding.
+* Pure ASCII strings are now normalized in constant time by :func:`unicodedata.normalize`.
+ (Contributed by Dong-hee Na in :issue:`bpo-44987`.)
+
CPython bytecode changes
========================
diff --git a/Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst b/Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst
new file mode 100644
index 0000000..dec50d8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst
@@ -0,0 +1,2 @@
+Pure ASCII strings are now normalized in constant time by :func:`unicodedata.normalize`.
+Patch by Dong-hee Na.
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index b4563f3..9758572 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -807,6 +807,10 @@ is_normalized_quickcheck(PyObject *self, PyObject *input, bool nfc, bool k,
return NO;
}
+ if (PyUnicode_IS_ASCII(input)) {
+ return YES;
+ }
+
Py_ssize_t i, len;
int kind;
const void *data;