diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2023-10-26 21:43:38 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-26 21:43:38 (GMT) |
commit | c81ebf5b3d9aa080d2039f1225f6fed91c29f5a8 (patch) | |
tree | 31d0a5052319b1480704f513aba52057ca829478 /Parser | |
parent | e25d8b40cd70744513e190b1ca153087382b6b09 (diff) | |
download | cpython-c81ebf5b3d9aa080d2039f1225f6fed91c29f5a8.zip cpython-c81ebf5b3d9aa080d2039f1225f6fed91c29f5a8.tar.gz cpython-c81ebf5b3d9aa080d2039f1225f6fed91c29f5a8.tar.bz2 |
[3.12] bpo-43950: handle wide unicode characters in tracebacks (GH-28150) (#111346)
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/pegen.c | 55 | ||||
-rw-r--r-- | Parser/pegen.h | 1 |
2 files changed, 56 insertions, 0 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c index b9894dd..ff02e88 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -38,6 +38,61 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) return size; } +// Calculate the extra amount of width space the given source +// code segment might take if it were to be displayed on a fixed +// width output device. Supports wide unicode characters and emojis. +Py_ssize_t +_PyPegen_calculate_display_width(PyObject *line, Py_ssize_t character_offset) +{ + PyObject *segment = PyUnicode_Substring(line, 0, character_offset); + if (!segment) { + return -1; + } + + // Fast track for ascii strings + if (PyUnicode_IS_ASCII(segment)) { + Py_DECREF(segment); + return character_offset; + } + + PyObject *width_fn = _PyImport_GetModuleAttrString("unicodedata", "east_asian_width"); + if (!width_fn) { + return -1; + } + + Py_ssize_t width = 0; + Py_ssize_t len = PyUnicode_GET_LENGTH(segment); + for (Py_ssize_t i = 0; i < len; i++) { + PyObject *chr = PyUnicode_Substring(segment, i, i + 1); + if (!chr) { + Py_DECREF(segment); + Py_DECREF(width_fn); + return -1; + } + + PyObject *width_specifier = PyObject_CallOneArg(width_fn, chr); + Py_DECREF(chr); + if (!width_specifier) { + Py_DECREF(segment); + Py_DECREF(width_fn); + return -1; + } + + if (_PyUnicode_EqualToASCIIString(width_specifier, "W") || + _PyUnicode_EqualToASCIIString(width_specifier, "F")) { + width += 2; + } + else { + width += 1; + } + Py_DECREF(width_specifier); + } + + Py_DECREF(segment); + Py_DECREF(width_fn); + return width; +} + // Here, mark is the start of the node, while p->mark is the end. // If node==NULL, they should be the same. int diff --git a/Parser/pegen.h b/Parser/pegen.h index a8bfa78..268f380 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -151,6 +151,7 @@ expr_ty _PyPegen_name_token(Parser *p); expr_ty _PyPegen_number_token(Parser *p); void *_PyPegen_string_token(Parser *p); Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset); +Py_ssize_t _PyPegen_calculate_display_width(PyObject *segment, Py_ssize_t character_offset); // Error handling functions and APIs typedef enum { |