summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2024-06-20 19:06:16 (GMT)
committerGitHub <noreply@github.com>2024-06-20 19:06:16 (GMT)
commit5150795b1cb6393e2b34834b2729d9176315054e (patch)
tree7582d32250483f6a9cce6327f46ce96426194896 /Objects/unicodeobject.c
parent85d90b59e2a9185cad608c5047357be645b4d5c6 (diff)
downloadcpython-5150795b1cb6393e2b34834b2729d9176315054e.zip
cpython-5150795b1cb6393e2b34834b2729d9176315054e.tar.gz
cpython-5150795b1cb6393e2b34834b2729d9176315054e.tar.bz2
gh-119182: Optimize PyUnicode_FromFormat() (#120796)
Use strchr() and ucs1lib_find_max_char() to optimize the code path formatting sub-strings between '%' formats.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c52
1 files changed, 26 insertions, 26 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1f8c89d..e6feed4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2875,47 +2875,47 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
static int
unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
{
- writer->min_length += strlen(format) + 100;
+ Py_ssize_t len = strlen(format);
+ writer->min_length += len + 100;
writer->overallocate = 1;
- va_list vargs2;
- const char *f;
-
// Copy varags to be able to pass a reference to a subfunction.
+ va_list vargs2;
va_copy(vargs2, vargs);
- for (f = format; *f; ) {
+ // _PyUnicodeWriter_WriteASCIIString() below requires the format string
+ // to be encoded to ASCII.
+ int is_ascii = (ucs1lib_find_max_char((Py_UCS1*)format, (Py_UCS1*)format + len) < 128);
+ if (!is_ascii) {
+ Py_ssize_t i;
+ for (i=0; i < len && (unsigned char)format[i] <= 127; i++);
+ PyErr_Format(PyExc_ValueError,
+ "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+ "string, got a non-ASCII byte: 0x%02x",
+ (unsigned char)format[i]);
+ goto fail;
+ }
+
+ for (const char *f = format; *f; ) {
if (*f == '%') {
f = unicode_fromformat_arg(writer, f, &vargs2);
if (f == NULL)
goto fail;
}
else {
- const char *p;
- Py_ssize_t len;
-
- p = f;
- do
- {
- if ((unsigned char)*p > 127) {
- PyErr_Format(PyExc_ValueError,
- "PyUnicode_FromFormatV() expects an ASCII-encoded format "
- "string, got a non-ASCII byte: 0x%02x",
- (unsigned char)*p);
- goto fail;
- }
- p++;
+ const char *p = strchr(f, '%');
+ if (p != NULL) {
+ len = p - f;
}
- while (*p != '\0' && *p != '%');
- len = p - f;
-
- if (*p == '\0')
+ else {
+ len = strlen(f);
writer->overallocate = 0;
+ }
- if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0)
+ if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0) {
goto fail;
-
- f = p;
+ }
+ f += len;
}
}
va_end(vargs2);