summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2021-11-21 04:15:22 (GMT)
committerGitHub <noreply@github.com>2021-11-21 04:15:22 (GMT)
commit07cf66fd03e161c09279346da4e76705cf42d535 (patch)
treeb93db952fce0d3d9777b847a519094e99bfbcfc8
parent9841ac2da5689ff765250c1abdbf5af9d3750519 (diff)
downloadcpython-07cf66fd03e161c09279346da4e76705cf42d535.zip
cpython-07cf66fd03e161c09279346da4e76705cf42d535.tar.gz
cpython-07cf66fd03e161c09279346da4e76705cf42d535.tar.bz2
[3.10] Ensure the str member of the tokenizer is always initialised (GH-29681). (GH-29683)
(cherry picked from commit 4f006a789a35f5d1a7ef142bd1304ce167392457) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
-rw-r--r--Parser/pegen.c2
-rw-r--r--Parser/pegen_errors.c425
-rw-r--r--Parser/tokenizer.c2
-rw-r--r--Parser/tokenizer.h2
4 files changed, 428 insertions, 3 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 464a902..8946aa3 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -432,7 +432,7 @@ get_error_line(Parser *p, Py_ssize_t lineno)
* (multi-line) statement are stored in p->tok->interactive_src_start.
* If not, we're parsing from a string, which means that the whole source
* is stored in p->tok->str. */
- assert(p->tok->fp == NULL || p->tok->fp == stdin);
+ assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin);
char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
assert(cur_line != NULL);
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
new file mode 100644
index 0000000..694184a
--- /dev/null
+++ b/Parser/pegen_errors.c
@@ -0,0 +1,425 @@
+#include <Python.h>
+#include <errcode.h>
+
+#include "tokenizer.h"
+#include "pegen.h"
+
+// TOKENIZER ERRORS
+
+void
+_PyPegen_raise_tokenizer_init_error(PyObject *filename)
+{
+ if (!(PyErr_ExceptionMatches(PyExc_LookupError)
+ || PyErr_ExceptionMatches(PyExc_SyntaxError)
+ || PyErr_ExceptionMatches(PyExc_ValueError)
+ || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
+ return;
+ }
+ PyObject *errstr = NULL;
+ PyObject *tuple = NULL;
+ PyObject *type;
+ PyObject *value;
+ PyObject *tback;
+ PyErr_Fetch(&type, &value, &tback);
+ errstr = PyObject_Str(value);
+ if (!errstr) {
+ goto error;
+ }
+
+ PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
+ if (!tmp) {
+ goto error;
+ }
+
+ tuple = PyTuple_Pack(2, errstr, tmp);
+ Py_DECREF(tmp);
+ if (!value) {
+ goto error;
+ }
+ PyErr_SetObject(PyExc_SyntaxError, tuple);
+
+error:
+ Py_XDECREF(type);
+ Py_XDECREF(value);
+ Py_XDECREF(tback);
+ Py_XDECREF(errstr);
+ Py_XDECREF(tuple);
+}
+
+static inline void
+raise_unclosed_parentheses_error(Parser *p) {
+ int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
+ int error_col = p->tok->parencolstack[p->tok->level-1];
+ RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
+ error_lineno, error_col, error_lineno, -1,
+ "'%c' was never closed",
+ p->tok->parenstack[p->tok->level-1]);
+}
+
+int
+_Pypegen_tokenizer_error(Parser *p)
+{
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+
+ const char *msg = NULL;
+ PyObject* errtype = PyExc_SyntaxError;
+ Py_ssize_t col_offset = -1;
+ switch (p->tok->done) {
+ case E_TOKEN:
+ msg = "invalid token";
+ break;
+ case E_EOF:
+ if (p->tok->level) {
+ raise_unclosed_parentheses_error(p);
+ } else {
+ RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+ }
+ return -1;
+ case E_DEDENT:
+ RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
+ return -1;
+ case E_INTR:
+ if (!PyErr_Occurred()) {
+ PyErr_SetNone(PyExc_KeyboardInterrupt);
+ }
+ return -1;
+ case E_NOMEM:
+ PyErr_NoMemory();
+ return -1;
+ case E_TABSPACE:
+ errtype = PyExc_TabError;
+ msg = "inconsistent use of tabs and spaces in indentation";
+ break;
+ case E_TOODEEP:
+ errtype = PyExc_IndentationError;
+ msg = "too many levels of indentation";
+ break;
+ case E_LINECONT: {
+ col_offset = p->tok->cur - p->tok->buf - 1;
+ msg = "unexpected character after line continuation character";
+ break;
+ }
+ default:
+ msg = "unknown parsing error";
+ }
+
+ RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
+ col_offset >= 0 ? col_offset : 0,
+ p->tok->lineno, -1, msg);
+ return -1;
+}
+
+int
+_Pypegen_raise_decode_error(Parser *p)
+{
+ assert(PyErr_Occurred());
+ const char *errtype = NULL;
+ if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
+ errtype = "unicode error";
+ }
+ else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
+ errtype = "value error";
+ }
+ if (errtype) {
+ PyObject *type;
+ PyObject *value;
+ PyObject *tback;
+ PyObject *errstr;
+ PyErr_Fetch(&type, &value, &tback);
+ errstr = PyObject_Str(value);
+ if (errstr) {
+ RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
+ Py_DECREF(errstr);
+ }
+ else {
+ PyErr_Clear();
+ RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
+ }
+ Py_XDECREF(type);
+ Py_XDECREF(value);
+ Py_XDECREF(tback);
+ }
+
+ return -1;
+}
+
+static int
+_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
+ // Tokenize the whole input to see if there are any tokenization
+ // errors such as mistmatching parentheses. These will get priority
+ // over generic syntax errors only if the line number of the error is
+ // before the one that we had for the generic error.
+
+ // We don't want to tokenize to the end for interactive input
+ if (p->tok->prompt != NULL) {
+ return 0;
+ }
+
+ PyObject *type, *value, *traceback;
+ PyErr_Fetch(&type, &value, &traceback);
+
+ Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
+ Py_ssize_t current_err_line = current_token->lineno;
+
+ int ret = 0;
+
+ for (;;) {
+ const char *start;
+ const char *end;
+ switch (_PyTokenizer_Get(p->tok, &start, &end)) {
+ case ERRORTOKEN:
+ if (p->tok->level != 0) {
+ int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
+ if (current_err_line > error_lineno) {
+ raise_unclosed_parentheses_error(p);
+ ret = -1;
+ goto exit;
+ }
+ }
+ break;
+ case ENDMARKER:
+ break;
+ default:
+ continue;
+ }
+ break;
+ }
+
+
+exit:
+ if (PyErr_Occurred()) {
+ Py_XDECREF(value);
+ Py_XDECREF(type);
+ Py_XDECREF(traceback);
+ } else {
+ PyErr_Restore(type, value, traceback);
+ }
+ return ret;
+}
+
+// PARSER ERRORS
+
+void *
+_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
+{
+ if (p->fill == 0) {
+ va_list va;
+ va_start(va, errmsg);
+ _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
+ va_end(va);
+ return NULL;
+ }
+
+ Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
+ Py_ssize_t col_offset;
+ Py_ssize_t end_col_offset = -1;
+ if (t->col_offset == -1) {
+ if (p->tok->cur == p->tok->buf) {
+ col_offset = 0;
+ } else {
+ const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf;
+ col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
+ }
+ } else {
+ col_offset = t->col_offset + 1;
+ }
+
+ if (t->end_col_offset != -1) {
+ end_col_offset = t->end_col_offset + 1;
+ }
+
+ va_list va;
+ va_start(va, errmsg);
+ _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
+ va_end(va);
+
+ return NULL;
+}
+
+static PyObject *
+get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
+{
+ /* If the file descriptor is interactive, the source lines of the current
+ * (multi-line) statement are stored in p->tok->interactive_src_start.
+ * If not, we're parsing from a string, which means that the whole source
+ * is stored in p->tok->str. */
+ assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin);
+
+ char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
+ assert(cur_line != NULL);
+
+ for (int i = 0; i < lineno - 1; i++) {
+ cur_line = strchr(cur_line, '\n') + 1;
+ }
+
+ char *next_newline;
+ if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
+ next_newline = cur_line + strlen(cur_line);
+ }
+ return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
+}
+
+void *
+_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
+ Py_ssize_t lineno, Py_ssize_t col_offset,
+ Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
+ const char *errmsg, va_list va)
+{
+ PyObject *value = NULL;
+ PyObject *errstr = NULL;
+ PyObject *error_line = NULL;
+ PyObject *tmp = NULL;
+ p->error_indicator = 1;
+
+ if (end_lineno == CURRENT_POS) {
+ end_lineno = p->tok->lineno;
+ }
+ if (end_col_offset == CURRENT_POS) {
+ end_col_offset = p->tok->cur - p->tok->line_start;
+ }
+
+ if (p->start_rule == Py_fstring_input) {
+ const char *fstring_msg = "f-string: ";
+ Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
+
+ char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
+ if (!new_errmsg) {
+ return (void *) PyErr_NoMemory();
+ }
+
+ // Copy both strings into new buffer
+ memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
+ memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
+ new_errmsg[len] = 0;
+ errmsg = new_errmsg;
+ }
+ errstr = PyUnicode_FromFormatV(errmsg, va);
+ if (!errstr) {
+ goto error;
+ }
+
+ if (p->tok->fp_interactive) {
+ error_line = get_error_line_from_tokenizer_buffers(p, lineno);
+ }
+ else if (p->start_rule == Py_file_input) {
+ error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
+ (int) lineno, p->tok->encoding);
+ }
+
+ if (!error_line) {
+ /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
+ then we need to find the error line from some other source, because
+ p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
+ failed or we're parsing from a string or the REPL. There's a third edge case where
+ we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
+ `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
+ does not physically exist */
+ assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
+
+ if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
+ Py_ssize_t size = p->tok->inp - p->tok->buf;
+ error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
+ }
+ else if (p->tok->fp == NULL || p->tok->fp == stdin) {
+ error_line = get_error_line_from_tokenizer_buffers(p, lineno);
+ }
+ else {
+ error_line = PyUnicode_FromStringAndSize("", 0);
+ }
+ if (!error_line) {
+ goto error;
+ }
+ }
+
+ if (p->start_rule == Py_fstring_input) {
+ col_offset -= p->starting_col_offset;
+ end_col_offset -= p->starting_col_offset;
+ }
+
+ Py_ssize_t col_number = col_offset;
+ Py_ssize_t end_col_number = end_col_offset;
+
+ if (p->tok->encoding != NULL) {
+ col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
+ if (col_number < 0) {
+ goto error;
+ }
+ if (end_col_number > 0) {
+ Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
+ if (end_col_offset < 0) {
+ goto error;
+ } else {
+ end_col_number = end_col_offset;
+ }
+ }
+ }
+ tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
+ if (!tmp) {
+ goto error;
+ }
+ value = PyTuple_Pack(2, errstr, tmp);
+ Py_DECREF(tmp);
+ if (!value) {
+ goto error;
+ }
+ PyErr_SetObject(errtype, value);
+
+ Py_DECREF(errstr);
+ Py_DECREF(value);
+ if (p->start_rule == Py_fstring_input) {
+ PyMem_Free((void *)errmsg);
+ }
+ return NULL;
+
+error:
+ Py_XDECREF(errstr);
+ Py_XDECREF(error_line);
+ if (p->start_rule == Py_fstring_input) {
+ PyMem_Free((void *)errmsg);
+ }
+ return NULL;
+}
+
+void
+_Pypegen_set_syntax_error(Parser* p, Token* last_token) {
+ // Existing sintax error
+ if (PyErr_Occurred()) {
+ // Prioritize tokenizer errors to custom syntax errors raised
+ // on the second phase only if the errors come from the parser.
+ if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
+ _PyPegen_tokenize_full_source_to_check_for_errors(p);
+ }
+ // Propagate the existing syntax error.
+ return;
+ }
+ // Initialization error
+ if (p->fill == 0) {
+ RAISE_SYNTAX_ERROR("error at start before reading any input");
+ }
+ // Parser encountered EOF (End of File) unexpectedtly
+ if (p->tok->done == E_EOF) {
+ if (p->tok->level) {
+ raise_unclosed_parentheses_error(p);
+ } else {
+ RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+ }
+ return;
+ }
+ // Indentation error in the tokenizer
+ if (last_token->type == INDENT || last_token->type == DEDENT) {
+ RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent");
+ return;
+ }
+ // Unknown error (generic case)
+
+ // Use the last token we found on the first pass to avoid reporting
+ // incorrect locations for generic syntax errors just because we reached
+ // further away when trying to find specific syntax errors in the second
+ // pass.
+ RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
+ // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing
+ // generic SyntaxError we just raised if errors are found.
+ _PyPegen_tokenize_full_source_to_check_for_errors(p);
+} \ No newline at end of file
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 76a22da..672fdb9 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -86,7 +86,7 @@ tok_new(void)
tok->async_def_indent = 0;
tok->async_def_nl = 0;
tok->interactive_underflow = IUNDERFLOW_NORMAL;
-
+ tok->str = NULL;
return tok;
}
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 677f9db..61f0a61 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -71,7 +71,7 @@ struct tok_state {
PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer;
const char* enc; /* Encoding for the current str. */
- char* str;
+ char* str; /* Source string being tokenized (if tokenizing from a string)*/
char* input; /* Tokenizer's newline translated copy of the string. */
int type_comments; /* Whether to look for type comments */
hl ppc">#define destructor xxdestructor #endif #ifndef HAVE_PTHREAD_STUBS # include <pthread.h> #endif #if defined(__APPLE__) || defined(HAVE_PTHREAD_DESTRUCTOR) #undef destructor #endif #include <signal.h> #if defined(__linux__) # include <sys/syscall.h> /* syscall(SYS_gettid) */ #elif defined(__FreeBSD__) # include <pthread_np.h> /* pthread_getthreadid_np() */ #elif defined(__OpenBSD__) # include <unistd.h> /* getthrid() */ #elif defined(_AIX) # include <sys/thread.h> /* thread_self() */ #elif defined(__NetBSD__) # include <lwp.h> /* _lwp_self() */ #elif defined(__DragonFly__) # include <sys/lwp.h> /* lwp_gettid() */ #endif /* The POSIX spec requires that use of pthread_attr_setstacksize be conditional on _POSIX_THREAD_ATTR_STACKSIZE being defined. */ #ifdef _POSIX_THREAD_ATTR_STACKSIZE #ifndef THREAD_STACK_SIZE #define THREAD_STACK_SIZE 0 /* use default stack size */ #endif /* The default stack size for new threads on BSD is small enough that * we'll get hard crashes instead of 'maximum recursion depth exceeded' * exceptions. * * The default stack size below is the empirically determined minimal stack * sizes where a simple recursive function doesn't cause a hard crash. * * For macOS the value of THREAD_STACK_SIZE is determined in configure.ac * as it also depends on the other configure options like chosen sanitizer * runtimes. */ #if defined(__FreeBSD__) && defined(THREAD_STACK_SIZE) && THREAD_STACK_SIZE == 0 #undef THREAD_STACK_SIZE #define THREAD_STACK_SIZE 0x400000 #endif #if defined(_AIX) && defined(THREAD_STACK_SIZE) && THREAD_STACK_SIZE == 0 #undef THREAD_STACK_SIZE #define THREAD_STACK_SIZE 0x200000 #endif /* bpo-38852: test_threading.test_recursion_limit() checks that 1000 recursive Python calls (default recursion limit) doesn't crash, but raise a regular RecursionError exception. In debug mode, Python function calls allocates more memory on the stack, so use a stack of 8 MiB. */ #if defined(__ANDROID__) && defined(THREAD_STACK_SIZE) && THREAD_STACK_SIZE == 0 # ifdef Py_DEBUG # undef THREAD_STACK_SIZE # define THREAD_STACK_SIZE 0x800000 # endif #endif #if defined(__VXWORKS__) && defined(THREAD_STACK_SIZE) && THREAD_STACK_SIZE == 0 #undef THREAD_STACK_SIZE #define THREAD_STACK_SIZE 0x100000 #endif /* for safety, ensure a viable minimum stacksize */ #define THREAD_STACK_MIN 0x8000 /* 32 KiB */ #else /* !_POSIX_THREAD_ATTR_STACKSIZE */ #ifdef THREAD_STACK_SIZE #error "THREAD_STACK_SIZE defined but _POSIX_THREAD_ATTR_STACKSIZE undefined" #endif #endif /* The POSIX spec says that implementations supporting the sem_* family of functions must indicate this by defining _POSIX_SEMAPHORES. */ #ifdef _POSIX_SEMAPHORES /* On FreeBSD 4.x, _POSIX_SEMAPHORES is defined empty, so we need to add 0 to make it work there as well. */ #if (_POSIX_SEMAPHORES+0) == -1 # define HAVE_BROKEN_POSIX_SEMAPHORES #else # include <semaphore.h> # include <errno.h> #endif #endif /* Whether or not to use semaphores directly rather than emulating them with * mutexes and condition variables: */ #if (defined(_POSIX_SEMAPHORES) && !defined(HAVE_BROKEN_POSIX_SEMAPHORES) && \ (defined(HAVE_SEM_TIMEDWAIT) || defined(HAVE_SEM_CLOCKWAIT))) # define USE_SEMAPHORES #else # undef USE_SEMAPHORES #endif /* On platforms that don't use standard POSIX threads pthread_sigmask() * isn't present. DEC threads uses sigprocmask() instead as do most * other UNIX International compliant systems that don't have the full * pthread implementation. */ #if defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK) # define SET_THREAD_SIGMASK pthread_sigmask #else # define SET_THREAD_SIGMASK sigprocmask #endif /* * pthread_cond support */ #define condattr_monotonic _PyRuntime.threads._condattr_monotonic.ptr static void init_condattr(void) { #ifdef CONDATTR_MONOTONIC # define ca _PyRuntime.threads._condattr_monotonic.val // XXX We need to check the return code? pthread_condattr_init(&ca); // XXX We need to run pthread_condattr_destroy() during runtime fini. if (pthread_condattr_setclock(&ca, CLOCK_MONOTONIC) == 0) { condattr_monotonic = &ca; // Use monotonic clock } # undef ca #endif // CONDATTR_MONOTONIC } int _PyThread_cond_init(PyCOND_T *cond) { return pthread_cond_init(cond, condattr_monotonic); } void _PyThread_cond_after(long long us, struct timespec *abs) { _PyTime_t timeout = _PyTime_FromMicrosecondsClamp(us); _PyTime_t t; #ifdef CONDATTR_MONOTONIC if (condattr_monotonic) { t = _PyTime_GetMonotonicClock(); } else #endif { t = _PyTime_GetSystemClock(); } t = _PyTime_Add(t, timeout); _PyTime_AsTimespec_clamp(t, abs); } /* A pthread mutex isn't sufficient to model the Python lock type * because, according to Draft 5 of the docs (P1003.4a/D5), both of the * following are undefined: * -> a thread tries to lock a mutex it already has locked * -> a thread tries to unlock a mutex locked by a different thread * pthread mutexes are designed for serializing threads over short pieces * of code anyway, so wouldn't be an appropriate implementation of * Python's locks regardless. * * The pthread_lock struct implements a Python lock as a "locked?" bit * and a <condition, mutex> pair. In general, if the bit can be acquired * instantly, it is, else the pair is used to block the thread until the * bit is cleared. 9 May 1994 tim@ksr.com */ typedef struct { char locked; /* 0=unlocked, 1=locked */ /* a <cond, mutex> pair to handle an acquire of a locked lock */ pthread_cond_t lock_released; pthread_mutex_t mut; } pthread_lock; #define CHECK_STATUS(name) if (status != 0) { perror(name); error = 1; } #define CHECK_STATUS_PTHREAD(name) if (status != 0) { fprintf(stderr, \ "%s: %s\n", name, strerror(status)); error = 1; } /* * Initialization for the current runtime. */ static void PyThread__init_thread(void) { // The library is only initialized once in the process, // regardless of how many times the Python runtime is initialized. static int lib_initialized = 0; if (!lib_initialized) { lib_initialized = 1; #if defined(_AIX) && defined(__GNUC__) extern void pthread_init(void); pthread_init(); #endif } init_condattr(); } /* * Thread support. */ /* bpo-33015: pythread_callback struct and pythread_wrapper() cast "void func(void *)" to "void* func(void *)": always return NULL. PyThread_start_new_thread() uses "void func(void *)" type, whereas pthread_create() requires a void* return value. */ typedef struct { void (*func) (void *); void *arg; } pythread_callback; static void * pythread_wrapper(void *arg) { /* copy func and func_arg and free the temporary structure */ pythread_callback *callback = arg; void (*func)(void *) = callback->func; void *func_arg = callback->arg; PyMem_RawFree(arg); func(func_arg); return NULL; } unsigned long PyThread_start_new_thread(void (*func)(void *), void *arg) { pthread_t th; int status; #if defined(THREAD_STACK_SIZE) || defined(PTHREAD_SYSTEM_SCHED_SUPPORTED) pthread_attr_t attrs; #endif #if defined(THREAD_STACK_SIZE) size_t tss; #endif if (!initialized) PyThread_init_thread(); #if defined(THREAD_STACK_SIZE) || defined(PTHREAD_SYSTEM_SCHED_SUPPORTED) if (pthread_attr_init(&attrs) != 0) return PYTHREAD_INVALID_THREAD_ID; #endif #if defined(THREAD_STACK_SIZE) PyThreadState *tstate = _PyThreadState_GET(); size_t stacksize = tstate ? tstate->interp->threads.stacksize : 0; tss = (stacksize != 0) ? stacksize : THREAD_STACK_SIZE; if (tss != 0) { if (pthread_attr_setstacksize(&attrs, tss) != 0) { pthread_attr_destroy(&attrs); return PYTHREAD_INVALID_THREAD_ID; } } #endif #if defined(PTHREAD_SYSTEM_SCHED_SUPPORTED) pthread_attr_setscope(&attrs, PTHREAD_SCOPE_SYSTEM); #endif pythread_callback *callback = PyMem_RawMalloc(sizeof(pythread_callback)); if (callback == NULL) { return PYTHREAD_INVALID_THREAD_ID; } callback->func = func; callback->arg = arg; status = pthread_create(&th, #if defined(THREAD_STACK_SIZE) || defined(PTHREAD_SYSTEM_SCHED_SUPPORTED) &attrs, #else (pthread_attr_t*)NULL, #endif pythread_wrapper, callback); #if defined(THREAD_STACK_SIZE) || defined(PTHREAD_SYSTEM_SCHED_SUPPORTED) pthread_attr_destroy(&attrs); #endif if (status != 0) { PyMem_RawFree(callback); return PYTHREAD_INVALID_THREAD_ID; } pthread_detach(th); #if SIZEOF_PTHREAD_T <= SIZEOF_LONG return (unsigned long) th; #else return (unsigned long) *(unsigned long *) &th; #endif } /* XXX This implementation is considered (to quote Tim Peters) "inherently hosed" because: - It does not guarantee the promise that a non-zero integer is returned. - The cast to unsigned long is inherently unsafe. - It is not clear that the 'volatile' (for AIX?) are any longer necessary. */ unsigned long PyThread_get_thread_ident(void) { volatile pthread_t threadid; if (!initialized) PyThread_init_thread(); threadid = pthread_self(); return (unsigned long) threadid; } #ifdef PY_HAVE_THREAD_NATIVE_ID unsigned long PyThread_get_thread_native_id(void) { if (!initialized) PyThread_init_thread(); #ifdef __APPLE__ uint64_t native_id; (void) pthread_threadid_np(NULL, &native_id); #elif defined(__linux__) pid_t native_id; native_id = syscall(SYS_gettid); #elif defined(__FreeBSD__) int native_id; native_id = pthread_getthreadid_np(); #elif defined(__OpenBSD__) pid_t native_id; native_id = getthrid(); #elif defined(_AIX) tid_t native_id; native_id = thread_self(); #elif defined(__NetBSD__) lwpid_t native_id; native_id = _lwp_self(); #elif defined(__DragonFly__) lwpid_t native_id; native_id = lwp_gettid(); #endif return (unsigned long) native_id; } #endif void _Py_NO_RETURN PyThread_exit_thread(void) { if (!initialized) exit(0); #if defined(__wasi__) /* * wasi-threads doesn't have pthread_exit right now * cf. https://github.com/WebAssembly/wasi-threads/issues/7 */ abort(); #else pthread_exit(0); #endif } #ifdef USE_SEMAPHORES /* * Lock support. */ PyThread_type_lock PyThread_allocate_lock(void) { sem_t *lock; int status, error = 0; if (!initialized) PyThread_init_thread(); lock = (sem_t *)PyMem_RawMalloc(sizeof(sem_t)); if (lock) { status = sem_init(lock,0,1); CHECK_STATUS("sem_init"); if (error) { PyMem_RawFree((void *)lock); lock = NULL; } } return (PyThread_type_lock)lock; } void PyThread_free_lock(PyThread_type_lock lock) { sem_t *thelock = (sem_t *)lock; int status, error = 0; (void) error; /* silence unused-but-set-variable warning */ if (!thelock) return; status = sem_destroy(thelock); CHECK_STATUS("sem_destroy"); PyMem_RawFree((void *)thelock); } /* * As of February 2002, Cygwin thread implementations mistakenly report error * codes in the return value of the sem_ calls (like the pthread_ functions). * Correct implementations return -1 and put the code in errno. This supports * either. */ static int fix_status(int status) { return (status == -1) ? errno : status; } PyLockStatus PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds, int intr_flag) { PyLockStatus success; sem_t *thelock = (sem_t *)lock; int status, error = 0; (void) error; /* silence unused-but-set-variable warning */ _PyTime_t timeout; // relative timeout if (microseconds >= 0) { // bpo-41710: PyThread_acquire_lock_timed() cannot report timeout // overflow to the caller, so clamp the timeout to // [_PyTime_MIN, _PyTime_MAX]. // // _PyTime_MAX nanoseconds is around 292.3 years. // // _thread.Lock.acquire() and _thread.RLock.acquire() raise an // OverflowError if microseconds is greater than PY_TIMEOUT_MAX. timeout = _PyTime_FromMicrosecondsClamp(microseconds); } else { timeout = _PyTime_FromNanoseconds(-1); } #ifdef HAVE_SEM_CLOCKWAIT struct timespec abs_timeout; // Local scope for deadline { _PyTime_t deadline = _PyTime_Add(_PyTime_GetMonotonicClock(), timeout); _PyTime_AsTimespec_clamp(deadline, &abs_timeout); } #else _PyTime_t deadline = 0; if (timeout > 0 && !intr_flag) { deadline = _PyDeadline_Init(timeout); } #endif while (1) { if (timeout > 0) { #ifdef HAVE_SEM_CLOCKWAIT status = fix_status(sem_clockwait(thelock, CLOCK_MONOTONIC, &abs_timeout)); #else _PyTime_t abs_time = _PyTime_Add(_PyTime_GetSystemClock(), timeout); struct timespec ts; _PyTime_AsTimespec_clamp(abs_time, &ts); status = fix_status(sem_timedwait(thelock, &ts)); #endif } else if (timeout == 0) { status = fix_status(sem_trywait(thelock)); } else { status = fix_status(sem_wait(thelock)); } /* Retry if interrupted by a signal, unless the caller wants to be notified. */ if (intr_flag || status != EINTR) { break; } // sem_clockwait() uses an absolute timeout, there is no need // to recompute the relative timeout. #ifndef HAVE_SEM_CLOCKWAIT if (timeout > 0) { /* wait interrupted by a signal (EINTR): recompute the timeout */ timeout = _PyDeadline_Get(deadline); if (timeout < 0) { status = ETIMEDOUT; break; } } #endif } /* Don't check the status if we're stopping because of an interrupt. */ if (!(intr_flag && status == EINTR)) { if (timeout > 0) { if (status != ETIMEDOUT) { #ifdef HAVE_SEM_CLOCKWAIT CHECK_STATUS("sem_clockwait"); #else CHECK_STATUS("sem_timedwait"); #endif } } else if (timeout == 0) { if (status != EAGAIN) { CHECK_STATUS("sem_trywait"); } } else { CHECK_STATUS("sem_wait"); } } if (status == 0) { success = PY_LOCK_ACQUIRED; } else if (intr_flag && status == EINTR) { success = PY_LOCK_INTR; } else { success = PY_LOCK_FAILURE; } return success; } void PyThread_release_lock(PyThread_type_lock lock) { sem_t *thelock = (sem_t *)lock; int status, error = 0; (void) error; /* silence unused-but-set-variable warning */ status = sem_post(thelock); CHECK_STATUS("sem_post"); } #else /* USE_SEMAPHORES */ /* * Lock support. */ PyThread_type_lock PyThread_allocate_lock(void) { pthread_lock *lock; int status, error = 0; if (!initialized) PyThread_init_thread(); lock = (pthread_lock *) PyMem_RawCalloc(1, sizeof(pthread_lock)); if (lock) { lock->locked = 0; status = pthread_mutex_init(&lock->mut, NULL); CHECK_STATUS_PTHREAD("pthread_mutex_init"); /* Mark the pthread mutex underlying a Python mutex as pure happens-before. We can't simply mark the Python-level mutex as a mutex because it can be acquired and released in different threads, which will cause errors. */ _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(&lock->mut); status = _PyThread_cond_init(&lock->lock_released); CHECK_STATUS_PTHREAD("pthread_cond_init"); if (error) { PyMem_RawFree((void *)lock); lock = 0; } } return (PyThread_type_lock) lock; } void PyThread_free_lock(PyThread_type_lock lock) { pthread_lock *thelock = (pthread_lock *)lock; int status, error = 0; (void) error; /* silence unused-but-set-variable warning */ /* some pthread-like implementations tie the mutex to the cond * and must have the cond destroyed first. */ status = pthread_cond_destroy( &thelock->lock_released ); CHECK_STATUS_PTHREAD("pthread_cond_destroy"); status = pthread_mutex_destroy( &thelock->mut ); CHECK_STATUS_PTHREAD("pthread_mutex_destroy"); PyMem_RawFree((void *)thelock); } PyLockStatus PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds, int intr_flag) { PyLockStatus success = PY_LOCK_FAILURE; pthread_lock *thelock = (pthread_lock *)lock; int status, error = 0; if (microseconds == 0) { status = pthread_mutex_trylock( &thelock->mut ); if (status != EBUSY) { CHECK_STATUS_PTHREAD("pthread_mutex_trylock[1]"); } } else { status = pthread_mutex_lock( &thelock->mut ); CHECK_STATUS_PTHREAD("pthread_mutex_lock[1]"); } if (status != 0) { goto done; } if (thelock->locked == 0) { success = PY_LOCK_ACQUIRED; goto unlock; } if (microseconds == 0) { goto unlock; } struct timespec abs_timeout; if (microseconds > 0) { _PyThread_cond_after(microseconds, &abs_timeout); } // Continue trying until we get the lock // mut must be locked by me -- part of the condition protocol while (1) { if (microseconds > 0) { status = pthread_cond_timedwait(&thelock->lock_released, &thelock->mut, &abs_timeout); if (status == 1) { break; } if (status == ETIMEDOUT) { break; } CHECK_STATUS_PTHREAD("pthread_cond_timedwait"); } else { status = pthread_cond_wait( &thelock->lock_released, &thelock->mut); CHECK_STATUS_PTHREAD("pthread_cond_wait"); } if (intr_flag && status == 0 && thelock->locked) { // We were woken up, but didn't get the lock. We probably received // a signal. Return PY_LOCK_INTR to allow the caller to handle // it and retry. success = PY_LOCK_INTR; break; } if (status == 0 && !thelock->locked) { success = PY_LOCK_ACQUIRED; break; } // Wait got interrupted by a signal: retry } unlock: if (success == PY_LOCK_ACQUIRED) { thelock->locked = 1; } status = pthread_mutex_unlock( &thelock->mut ); CHECK_STATUS_PTHREAD("pthread_mutex_unlock[1]"); done: if (error) { success = PY_LOCK_FAILURE; } return success; } void PyThread_release_lock(PyThread_type_lock lock) { pthread_lock *thelock = (pthread_lock *)lock; int status, error = 0; (void) error; /* silence unused-but-set-variable warning */ status = pthread_mutex_lock( &thelock->mut ); CHECK_STATUS_PTHREAD("pthread_mutex_lock[3]"); thelock->locked = 0; /* wake up someone (anyone, if any) waiting on the lock */ status = pthread_cond_signal( &thelock->lock_released ); CHECK_STATUS_PTHREAD("pthread_cond_signal"); status = pthread_mutex_unlock( &thelock->mut ); CHECK_STATUS_PTHREAD("pthread_mutex_unlock[3]"); } #endif /* USE_SEMAPHORES */ int _PyThread_at_fork_reinit(PyThread_type_lock *lock) { PyThread_type_lock new_lock = PyThread_allocate_lock(); if (new_lock == NULL) { return -1; } /* bpo-6721, bpo-40089: The old lock can be in an inconsistent state. fork() can be called in the middle of an operation on the lock done by another thread. So don't call PyThread_free_lock(*lock). Leak memory on purpose. Don't release the memory either since the address of a mutex is relevant. Putting two mutexes at the same address can lead to problems. */ *lock = new_lock; return 0; } int PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) { return PyThread_acquire_lock_timed(lock, waitflag ? -1 : 0, /*intr_flag=*/0); } /* set the thread stack size. * Return 0 if size is valid, -1 if size is invalid, * -2 if setting stack size is not supported. */ static int _pythread_pthread_set_stacksize(size_t size) { #if defined(THREAD_STACK_SIZE) pthread_attr_t attrs; size_t tss_min; int rc = 0; #endif /* set to default */ if (size == 0) { _PyInterpreterState_GET()->threads.stacksize = 0; return 0; } #if defined(THREAD_STACK_SIZE) #if defined(PTHREAD_STACK_MIN) tss_min = PTHREAD_STACK_MIN > THREAD_STACK_MIN ? PTHREAD_STACK_MIN : THREAD_STACK_MIN; #else tss_min = THREAD_STACK_MIN; #endif if (size >= tss_min) { /* validate stack size by setting thread attribute */ if (pthread_attr_init(&attrs) == 0) { rc = pthread_attr_setstacksize(&attrs, size); pthread_attr_destroy(&attrs); if (rc == 0) { _PyInterpreterState_GET()->threads.stacksize = size; return 0; } } } return -1; #else return -2; #endif } #define THREAD_SET_STACKSIZE(x) _pythread_pthread_set_stacksize(x) /* Thread Local Storage (TLS) API This API is DEPRECATED since Python 3.7. See PEP 539 for details. */ /* Issue #25658: On platforms where native TLS key is defined in a way that cannot be safely cast to int, PyThread_create_key returns immediately a failure status and other TLS functions all are no-ops. This indicates clearly that the old API is not supported on platforms where it cannot be used reliably, and that no effort will be made to add such support. Note: PTHREAD_KEY_T_IS_COMPATIBLE_WITH_INT will be unnecessary after removing this API. */ int PyThread_create_key(void) { #ifdef PTHREAD_KEY_T_IS_COMPATIBLE_WITH_INT pthread_key_t key; int fail = pthread_key_create(&key, NULL); if (fail) return -1; if (key > INT_MAX) { /* Issue #22206: handle integer overflow */ pthread_key_delete(key); errno = ENOMEM; return -1; } return (int)key; #else return -1; /* never return valid key value. */ #endif } void PyThread_delete_key(int key) { #ifdef PTHREAD_KEY_T_IS_COMPATIBLE_WITH_INT pthread_key_delete(key); #endif } void PyThread_delete_key_value(int key) { #ifdef PTHREAD_KEY_T_IS_COMPATIBLE_WITH_INT pthread_setspecific(key, NULL); #endif } int PyThread_set_key_value(int key, void *value) { #ifdef PTHREAD_KEY_T_IS_COMPATIBLE_WITH_INT int fail = pthread_setspecific(key, value); return fail ? -1 : 0; #else return -1; #endif } void * PyThread_get_key_value(int key) { #ifdef PTHREAD_KEY_T_IS_COMPATIBLE_WITH_INT return pthread_getspecific(key); #else return NULL; #endif } void PyThread_ReInitTLS(void) { } /* Thread Specific Storage (TSS) API Platform-specific components of TSS API implementation. */ int PyThread_tss_create(Py_tss_t *key) { assert(key != NULL); /* If the key has been created, function is silently skipped. */ if (key->_is_initialized) { return 0; } int fail = pthread_key_create(&(key->_key), NULL); if (fail) { return -1; } key->_is_initialized = 1; return 0; } void PyThread_tss_delete(Py_tss_t *key) { assert(key != NULL); /* If the key has not been created, function is silently skipped. */ if (!key->_is_initialized) { return; } pthread_key_delete(key->_key); /* pthread has not provided the defined invalid value for the key. */ key->_is_initialized = 0; } int PyThread_tss_set(Py_tss_t *key, void *value) { assert(key != NULL); int fail = pthread_setspecific(key->_key, value); return fail ? -1 : 0; } void * PyThread_tss_get(Py_tss_t *key) { assert(key != NULL); return pthread_getspecific(key->_key); }