diff options
| author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2024-10-07 21:23:49 (GMT) |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-07 21:23:49 (GMT) |
| commit | 03ae82d0d34649d1fcb755015aa567fe3978d4f9 (patch) | |
| tree | 0888310829453b8fd1fa40f1ec90899544271c41 /Python/errors.c | |
| parent | 167d8d2f072505a85c6db72fdaf4edffd6d17f3a (diff) | |
| download | cpython-03ae82d0d34649d1fcb755015aa567fe3978d4f9.zip cpython-03ae82d0d34649d1fcb755015aa567fe3978d4f9.tar.gz cpython-03ae82d0d34649d1fcb755015aa567fe3978d4f9.tar.bz2 | |
[3.13] gh-124188: Fix PyErr_ProgramTextObject() (GH-124189) (GH-124423)
* Detect source file encoding.
* Use the "replace" error handler even for UTF-8 (default) encoding.
* Remove the BOM.
* Fix detection of too long lines if they contain NUL.
* Return the head rather than the tail for truncated long lines.
(cherry picked from commit e2f710792b0418b8ca1ca3b8cdf39588c7268495)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Python/errors.c')
| -rw-r--r-- | Python/errors.c | 91 |
1 files changed, 56 insertions, 35 deletions
diff --git a/Python/errors.c b/Python/errors.c index 433253b..bf1ddfa 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -1856,44 +1856,44 @@ PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset) functionality in tb_displayline() in traceback.c. */ static PyObject * -err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding) +err_programtext(FILE *fp, int lineno, const char* encoding) { - int i; char linebuf[1000]; - if (fp == NULL) { - return NULL; + size_t line_size = 0; + + for (int i = 0; i < lineno; ) { + line_size = 0; + if (_Py_UniversalNewlineFgetsWithSize(linebuf, sizeof(linebuf), + fp, NULL, &line_size) == NULL) + { + /* Error or EOF. */ + return NULL; + } + /* fgets read *something*; if it didn't fill the + whole buffer, it must have found a newline + or hit the end of the file; if the last character is \n, + it obviously found a newline; else we haven't + yet seen a newline, so must continue */ + if (i + 1 < lineno + && line_size == sizeof(linebuf) - 1 + && linebuf[sizeof(linebuf) - 2] != '\n') + { + continue; + } + i++; } - for (i = 0; i < lineno; i++) { - char *pLastChar = &linebuf[sizeof(linebuf) - 2]; - do { - *pLastChar = '\0'; - if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, - fp, NULL) == NULL) { - goto after_loop; - } - /* fgets read *something*; if it didn't get as - far as pLastChar, it must have found a newline - or hit the end of the file; if pLastChar is \n, - it obviously found a newline; else we haven't - yet seen a newline, so must continue */ - } while (*pLastChar != '\0' && *pLastChar != '\n'); + const char *line = linebuf; + /* Skip BOM. */ + if (lineno == 1 && line_size >= 3 && memcmp(line, "\xef\xbb\xbf", 3) == 0) { + line += 3; + line_size -= 3; } - -after_loop: - fclose(fp); - if (i == lineno) { - PyObject *res; - if (encoding != NULL) { - res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace"); - } else { - res = PyUnicode_FromString(linebuf); - } - if (res == NULL) - _PyErr_Clear(tstate); - return res; + PyObject *res = PyUnicode_Decode(line, line_size, encoding, "replace"); + if (res == NULL) { + PyErr_Clear(); } - return NULL; + return res; } PyObject * @@ -1913,20 +1913,41 @@ PyErr_ProgramText(const char *filename, int lineno) return res; } +/* Function from Parser/tokenizer/file_tokenizer.c */ +extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *); + PyObject * _PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding) { + char *found_encoding = NULL; if (filename == NULL || lineno <= 0) { return NULL; } - PyThreadState *tstate = _PyThreadState_GET(); FILE *fp = _Py_fopen_obj(filename, "r" PY_STDIOTEXTMODE); if (fp == NULL) { - _PyErr_Clear(tstate); + PyErr_Clear(); return NULL; } - return err_programtext(tstate, fp, lineno, encoding); + if (encoding == NULL) { + int fd = fileno(fp); + found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename); + encoding = found_encoding; + if (encoding == NULL) { + PyErr_Clear(); + encoding = "utf-8"; + } + /* Reset position */ + if (lseek(fd, 0, SEEK_SET) == (off_t)-1) { + fclose(fp); + PyMem_Free(found_encoding); + return NULL; + } + } + PyObject *res = err_programtext(fp, lineno, encoding); + fclose(fp); + PyMem_Free(found_encoding); + return res; } PyObject * |
