summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-09-24 08:01:37 (GMT)
committerGitHub <noreply@github.com>2024-09-24 08:01:37 (GMT)
commite2f710792b0418b8ca1ca3b8cdf39588c7268495 (patch)
tree5fa743b6be184b1384cea36bc58e591b4568f52e /Python
parent3c83f9958c14cd62ad8951c53536f7788745b0ba (diff)
downloadcpython-e2f710792b0418b8ca1ca3b8cdf39588c7268495.zip
cpython-e2f710792b0418b8ca1ca3b8cdf39588c7268495.tar.gz
cpython-e2f710792b0418b8ca1ca3b8cdf39588c7268495.tar.bz2
gh-124188: Fix PyErr_ProgramTextObject() (GH-124189)
* Detect source file encoding. * Use the "replace" error handler even for UTF-8 (default) encoding. * Remove the BOM. * Fix detection of too long lines if they contain NUL. * Return the head rather than the tail for truncated long lines.
Diffstat (limited to 'Python')
-rw-r--r--Python/errors.c91
1 files changed, 56 insertions, 35 deletions
diff --git a/Python/errors.c b/Python/errors.c
index 29249ac..9e2a3ce 100644
--- a/Python/errors.c
+++ b/Python/errors.c
@@ -1903,44 +1903,44 @@ _PyErr_EmitSyntaxWarning(PyObject *msg, PyObject *filename, int lineno, int col_
functionality in tb_displayline() in traceback.c. */
static PyObject *
-err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding)
+err_programtext(FILE *fp, int lineno, const char* encoding)
{
- int i;
char linebuf[1000];
- if (fp == NULL) {
- return NULL;
+ size_t line_size = 0;
+
+ for (int i = 0; i < lineno; ) {
+ line_size = 0;
+ if (_Py_UniversalNewlineFgetsWithSize(linebuf, sizeof(linebuf),
+ fp, NULL, &line_size) == NULL)
+ {
+ /* Error or EOF. */
+ return NULL;
+ }
+ /* fgets read *something*; if it didn't fill the
+ whole buffer, it must have found a newline
+ or hit the end of the file; if the last character is \n,
+ it obviously found a newline; else we haven't
+ yet seen a newline, so must continue */
+ if (i + 1 < lineno
+ && line_size == sizeof(linebuf) - 1
+ && linebuf[sizeof(linebuf) - 2] != '\n')
+ {
+ continue;
+ }
+ i++;
}
- for (i = 0; i < lineno; i++) {
- char *pLastChar = &linebuf[sizeof(linebuf) - 2];
- do {
- *pLastChar = '\0';
- if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf,
- fp, NULL) == NULL) {
- goto after_loop;
- }
- /* fgets read *something*; if it didn't get as
- far as pLastChar, it must have found a newline
- or hit the end of the file; if pLastChar is \n,
- it obviously found a newline; else we haven't
- yet seen a newline, so must continue */
- } while (*pLastChar != '\0' && *pLastChar != '\n');
+ const char *line = linebuf;
+ /* Skip BOM. */
+ if (lineno == 1 && line_size >= 3 && memcmp(line, "\xef\xbb\xbf", 3) == 0) {
+ line += 3;
+ line_size -= 3;
}
-
-after_loop:
- fclose(fp);
- if (i == lineno) {
- PyObject *res;
- if (encoding != NULL) {
- res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace");
- } else {
- res = PyUnicode_FromString(linebuf);
- }
- if (res == NULL)
- _PyErr_Clear(tstate);
- return res;
+ PyObject *res = PyUnicode_Decode(line, line_size, encoding, "replace");
+ if (res == NULL) {
+ PyErr_Clear();
}
- return NULL;
+ return res;
}
PyObject *
@@ -1960,20 +1960,41 @@ PyErr_ProgramText(const char *filename, int lineno)
return res;
}
+/* Function from Parser/tokenizer/file_tokenizer.c */
+extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
+
PyObject *
_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
{
+ char *found_encoding = NULL;
if (filename == NULL || lineno <= 0) {
return NULL;
}
- PyThreadState *tstate = _PyThreadState_GET();
FILE *fp = _Py_fopen_obj(filename, "r" PY_STDIOTEXTMODE);
if (fp == NULL) {
- _PyErr_Clear(tstate);
+ PyErr_Clear();
return NULL;
}
- return err_programtext(tstate, fp, lineno, encoding);
+ if (encoding == NULL) {
+ int fd = fileno(fp);
+ found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename);
+ encoding = found_encoding;
+ if (encoding == NULL) {
+ PyErr_Clear();
+ encoding = "utf-8";
+ }
+ /* Reset position */
+ if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
+ fclose(fp);
+ PyMem_Free(found_encoding);
+ return NULL;
+ }
+ }
+ PyObject *res = err_programtext(fp, lineno, encoding);
+ fclose(fp);
+ PyMem_Free(found_encoding);
+ return res;
}
PyObject *