summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-08-28 09:11:13 (GMT)
committerGitHub <noreply@github.com>2024-08-28 09:11:13 (GMT)
commit1a0b828994ed4ec1f2ba05123995a7d1e852f4b4 (patch)
tree77fdc324262eb57e237d652d96cda6234618ca89 /Objects
parent6f563e364d1a7902417573f842019746a79cdc1b (diff)
downloadcpython-1a0b828994ed4ec1f2ba05123995a7d1e852f4b4.zip
cpython-1a0b828994ed4ec1f2ba05123995a7d1e852f4b4.tar.gz
cpython-1a0b828994ed4ec1f2ba05123995a7d1e852f4b4.tar.bz2
gh-122561: Clean up and microoptimize str.translate and charmap codec (GH-122932)
* Replace PyLong_AS_LONG() with PyLong_AsLong(). * Call PyLong_AsLong() only once per the replacement code. * Use PyMapping_GetOptionalItem() instead of PyObject_GetItem().
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c70
1 files changed, 43 insertions, 27 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 148d3e5..f4239ce 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8208,8 +8208,12 @@ charmap_decode_mapping(const char *s,
if (key == NULL)
goto onError;
- item = PyObject_GetItem(mapping, key);
+ int rc = PyMapping_GetOptionalItem(mapping, key, &item);
Py_DECREF(key);
+ if (rc == 0) {
+ /* No mapping found means: mapping is undefined. */
+ goto Undefined;
+ }
if (item == NULL) {
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
/* No mapping found means: mapping is undefined. */
@@ -8223,7 +8227,7 @@ charmap_decode_mapping(const char *s,
if (item == Py_None)
goto Undefined;
if (PyLong_Check(item)) {
- long value = PyLong_AS_LONG(item);
+ long value = PyLong_AsLong(item);
if (value == 0xFFFE)
goto Undefined;
if (value < 0 || value > MAX_UNICODE) {
@@ -8507,19 +8511,25 @@ encoding_map_lookup(Py_UCS4 c, PyObject *mapping)
return i;
}
-/* Lookup the character ch in the mapping. If the character
- can't be found, Py_None is returned (or NULL, if another
- error occurred). */
+/* Lookup the character in the mapping.
+ On success, return PyLong, PyBytes or None (if the character can't be found).
+ If the result is PyLong, put its value in replace.
+ On error, return NULL.
+ */
static PyObject *
-charmapencode_lookup(Py_UCS4 c, PyObject *mapping)
+charmapencode_lookup(Py_UCS4 c, PyObject *mapping, unsigned char *replace)
{
PyObject *w = PyLong_FromLong((long)c);
PyObject *x;
if (w == NULL)
return NULL;
- x = PyObject_GetItem(mapping, w);
+ int rc = PyMapping_GetOptionalItem(mapping, w, &x);
Py_DECREF(w);
+ if (rc == 0) {
+ /* No mapping found means: mapping is undefined. */
+ Py_RETURN_NONE;
+ }
if (x == NULL) {
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
/* No mapping found means: mapping is undefined. */
@@ -8531,13 +8541,14 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping)
else if (x == Py_None)
return x;
else if (PyLong_Check(x)) {
- long value = PyLong_AS_LONG(x);
+ long value = PyLong_AsLong(x);
if (value < 0 || value > 255) {
PyErr_SetString(PyExc_TypeError,
"character mapping must be in range(256)");
Py_DECREF(x);
return NULL;
}
+ *replace = (unsigned char)value;
return x;
}
else if (PyBytes_Check(x))
@@ -8578,6 +8589,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
PyObject **outobj, Py_ssize_t *outpos)
{
PyObject *rep;
+ unsigned char replace;
char *outstart;
Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
@@ -8594,7 +8606,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
return enc_SUCCESS;
}
- rep = charmapencode_lookup(c, mapping);
+ rep = charmapencode_lookup(c, mapping, &replace);
if (rep==NULL)
return enc_EXCEPTION;
else if (rep==Py_None) {
@@ -8609,7 +8621,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
return enc_EXCEPTION;
}
outstart = PyBytes_AS_STRING(*outobj);
- outstart[(*outpos)++] = (char)PyLong_AS_LONG(rep);
+ outstart[(*outpos)++] = (char)replace;
}
else {
const char *repchars = PyBytes_AS_STRING(rep);
@@ -8658,6 +8670,7 @@ charmap_encoding_error(
/* find all unencodable characters */
while (collendpos < size) {
PyObject *rep;
+ unsigned char replace;
if (Py_IS_TYPE(mapping, &EncodingMapType)) {
ch = PyUnicode_READ_CHAR(unicode, collendpos);
val = encoding_map_lookup(ch, mapping);
@@ -8668,7 +8681,7 @@ charmap_encoding_error(
}
ch = PyUnicode_READ_CHAR(unicode, collendpos);
- rep = charmapencode_lookup(ch, mapping);
+ rep = charmapencode_lookup(ch, mapping, &replace);
if (rep==NULL)
return -1;
else if (rep!=Py_None) {
@@ -8933,17 +8946,24 @@ unicode_translate_call_errorhandler(const char *errors,
/* Lookup the character ch in the mapping and put the result in result,
which must be decrefed by the caller.
+ The result can be PyLong, PyUnicode, None or NULL.
+ If the result is PyLong, put its value in replace.
Return 0 on success, -1 on error */
static int
-charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
+charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result, Py_UCS4 *replace)
{
PyObject *w = PyLong_FromLong((long)c);
PyObject *x;
if (w == NULL)
return -1;
- x = PyObject_GetItem(mapping, w);
+ int rc = PyMapping_GetOptionalItem(mapping, w, &x);
Py_DECREF(w);
+ if (rc == 0) {
+ /* No mapping found means: use 1:1 mapping. */
+ *result = NULL;
+ return 0;
+ }
if (x == NULL) {
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
/* No mapping found means: use 1:1 mapping. */
@@ -8958,7 +8978,7 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
return 0;
}
else if (PyLong_Check(x)) {
- long value = PyLong_AS_LONG(x);
+ long value = PyLong_AsLong(x);
if (value < 0 || value > MAX_UNICODE) {
PyErr_Format(PyExc_ValueError,
"character mapping must be in range(0x%x)",
@@ -8967,6 +8987,7 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
return -1;
}
*result = x;
+ *replace = (Py_UCS4)value;
return 0;
}
else if (PyUnicode_Check(x)) {
@@ -8990,8 +9011,9 @@ charmaptranslate_output(Py_UCS4 ch, PyObject *mapping,
_PyUnicodeWriter *writer)
{
PyObject *item;
+ Py_UCS4 replace;
- if (charmaptranslate_lookup(ch, mapping, &item))
+ if (charmaptranslate_lookup(ch, mapping, &item, &replace))
return -1;
if (item == NULL) {
@@ -9008,10 +9030,7 @@ charmaptranslate_output(Py_UCS4 ch, PyObject *mapping,
}
if (PyLong_Check(item)) {
- long ch = (Py_UCS4)PyLong_AS_LONG(item);
- /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already
- used it */
- if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) {
+ if (_PyUnicodeWriter_WriteCharInline(writer, replace) < 0) {
Py_DECREF(item);
return -1;
}
@@ -9038,9 +9057,10 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
Py_UCS1 *translate)
{
PyObject *item = NULL;
+ Py_UCS4 replace;
int ret = 0;
- if (charmaptranslate_lookup(ch, mapping, &item)) {
+ if (charmaptranslate_lookup(ch, mapping, &item, &replace)) {
return -1;
}
@@ -9054,10 +9074,7 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
return 1;
}
else if (PyLong_Check(item)) {
- long replace = PyLong_AS_LONG(item);
- /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already
- used it */
- if (127 < replace) {
+ if (replace > 127) {
/* invalid character or character outside ASCII:
skip the fast translate */
goto exit;
@@ -9065,8 +9082,6 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
translate[ch] = (Py_UCS1)replace;
}
else if (PyUnicode_Check(item)) {
- Py_UCS4 replace;
-
if (PyUnicode_GET_LENGTH(item) != 1)
goto exit;
@@ -9219,8 +9234,9 @@ _PyUnicode_TranslateCharmap(PyObject *input,
/* find all untranslatable characters */
while (collend < size) {
PyObject *x;
+ Py_UCS4 replace;
ch = PyUnicode_READ(kind, data, collend);
- if (charmaptranslate_lookup(ch, mapping, &x))
+ if (charmaptranslate_lookup(ch, mapping, &x, &replace))
goto onError;
Py_XDECREF(x);
if (x != Py_None)