summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2020-06-16 15:49:43 (GMT)
committerGitHub <noreply@github.com>2020-06-16 15:49:43 (GMT)
commit51c5896b6205911d29ac07f167ec7f3cf1cb600d (patch)
tree4bb679cf3dd91e96b6abf60f4d139e1e943f5ed0 /Parser
parentfcc60e40bbfe8a229b8b83f1d1ee77fd4bf870d1 (diff)
downloadcpython-51c5896b6205911d29ac07f167ec7f3cf1cb600d.zip
cpython-51c5896b6205911d29ac07f167ec7f3cf1cb600d.tar.gz
cpython-51c5896b6205911d29ac07f167ec7f3cf1cb600d.tar.bz2
bpo-40958: Avoid buffer overflow in the parser when indexing the current line (GH-20875)
Diffstat (limited to 'Parser')
-rw-r--r--Parser/pegen.c20
-rw-r--r--Parser/pegen.h4
2 files changed, 11 insertions, 13 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 4cff734..e153e92 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -140,21 +140,18 @@ _create_dummy_identifier(Parser *p)
}
static inline Py_ssize_t
-byte_offset_to_character_offset(PyObject *line, int col_offset)
+byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
{
const char *str = PyUnicode_AsUTF8(line);
if (!str) {
return 0;
}
+ assert(col_offset >= 0 && (unsigned long)col_offset <= strlen(str));
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
if (!text) {
return 0;
}
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
- str = PyUnicode_AsUTF8(text);
- if (str != NULL && (int)strlen(str) == col_offset) {
- size = strlen(str);
- }
Py_DECREF(text);
return size;
}
@@ -366,7 +363,7 @@ void *
_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
{
Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
- int col_offset;
+ Py_ssize_t col_offset;
if (t->col_offset == -1) {
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - p->tok->buf,
intptr_t, int);
@@ -386,7 +383,7 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
void *
_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
- int lineno, int col_offset,
+ Py_ssize_t lineno, Py_ssize_t col_offset,
const char *errmsg, va_list va)
{
PyObject *value = NULL;
@@ -406,16 +403,17 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
if (!error_line) {
Py_ssize_t size = p->tok->inp - p->tok->buf;
- if (size && p->tok->buf[size-1] == '\n') {
- size--;
- }
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
if (!error_line) {
goto error;
}
}
- Py_ssize_t col_number = byte_offset_to_character_offset(error_line, col_offset);
+ Py_ssize_t col_number = col_offset;
+
+ if (p->tok->encoding != NULL) {
+ col_number = byte_offset_to_character_offset(error_line, col_offset);
+ }
tmp = Py_BuildValue("(OiiN)", p->tok->filename, lineno, col_number, error_line);
if (!tmp) {
diff --git a/Parser/pegen.h b/Parser/pegen.h
index 64cf0ec..c4ff8c9 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -34,7 +34,7 @@ typedef struct _memo {
typedef struct {
int type;
PyObject *bytes;
- int lineno, col_offset, end_lineno, end_col_offset;
+ Py_ssize_t lineno, col_offset, end_lineno, end_col_offset;
Memo *memo;
} Token;
@@ -132,7 +132,7 @@ void *_PyPegen_string_token(Parser *p);
const char *_PyPegen_get_expr_name(expr_ty);
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
- int lineno, int col_offset,
+ Py_ssize_t lineno, Py_ssize_t col_offset,
const char *errmsg, va_list va);
void *_PyPegen_dummy_name(Parser *p, ...);