From 2d76be251d0aee89f76e6fa5a63fa1ad3f2b76cf Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 7 Dec 2023 08:47:55 -0500 Subject: gh-111962: Make dtoa thread-safe in `--disable-gil` builds. (#112049) This updates `dtoa.c` to avoid using the Bigint free-list in --disable-gil builds and to pre-computes the needed powers of 5 during interpreter initialization. * gh-111962: Make dtoa thread-safe in `--disable-gil` builds. This avoids using the Bigint free-list in `--disable-gil` builds and pre-computes the needed powers of 5 during interpreter initialization. * Fix size of cached powers of 5 array. We need the powers of 5 up to 5**512 because we only jump straight to underflow when the exponent is less than -512 (or larger than 308). * Rename Py_NOGIL to Py_GIL_DISABLED * Changes from review * Fix assertion placement --- Include/internal/pycore_dtoa.h | 16 ++++++--- Python/dtoa.c | 78 ++++++++++++++++++++++++++++-------------- Python/pylifecycle.c | 6 ++++ 3 files changed, 70 insertions(+), 30 deletions(-) diff --git a/Include/internal/pycore_dtoa.h b/Include/internal/pycore_dtoa.h index ac62a4d..c5cfdf4 100644 --- a/Include/internal/pycore_dtoa.h +++ b/Include/internal/pycore_dtoa.h @@ -35,6 +35,9 @@ struct _dtoa_state { /* The size of the Bigint freelist */ #define Bigint_Kmax 7 +/* The size of the cached powers of 5 array */ +#define Bigint_Pow5size 8 + #ifndef PRIVATE_MEM #define PRIVATE_MEM 2304 #endif @@ -42,9 +45,10 @@ struct _dtoa_state { ((PRIVATE_MEM+sizeof(double)-1)/sizeof(double)) struct _dtoa_state { - /* p5s is a linked list of powers of 5 of the form 5**(2**i), i >= 2 */ + // p5s is an array of powers of 5 of the form: + // 5**(2**(i+2)) for 0 <= i < Bigint_Pow5size + struct Bigint *p5s[Bigint_Pow5size]; // XXX This should be freed during runtime fini. - struct Bigint *p5s; struct Bigint *freelist[Bigint_Kmax+1]; double preallocated[Bigint_PREALLOC_SIZE]; double *preallocated_next; @@ -57,9 +61,6 @@ struct _dtoa_state { #endif // !Py_USING_MEMORY_DEBUGGER -/* These functions are used by modules compiled as C extension like math: - they must be exported. */ - extern double _Py_dg_strtod(const char *str, char **ptr); extern char* _Py_dg_dtoa(double d, int mode, int ndigits, int *decpt, int *sign, char **rve); @@ -67,6 +68,11 @@ extern void _Py_dg_freedtoa(char *s); #endif // _PY_SHORT_FLOAT_REPR == 1 + +extern PyStatus _PyDtoa_Init(PyInterpreterState *interp); +extern void _PyDtoa_Fini(PyInterpreterState *interp); + + #ifdef __cplusplus } #endif diff --git a/Python/dtoa.c b/Python/dtoa.c index 5dfc0e1..6e3162f 100644 --- a/Python/dtoa.c +++ b/Python/dtoa.c @@ -309,7 +309,7 @@ BCinfo { // struct Bigint is defined in pycore_dtoa.h. typedef struct Bigint Bigint; -#ifndef Py_USING_MEMORY_DEBUGGER +#if !defined(Py_GIL_DISABLED) && !defined(Py_USING_MEMORY_DEBUGGER) /* Memory management: memory is allocated from, and returned to, Kmax+1 pools of memory, where pool k (0 <= k <= Kmax) is for Bigints b with b->maxwds == @@ -428,7 +428,7 @@ Bfree(Bigint *v) } } -#endif /* Py_USING_MEMORY_DEBUGGER */ +#endif /* !defined(Py_GIL_DISABLED) && !defined(Py_USING_MEMORY_DEBUGGER) */ #define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \ y->wds*sizeof(Long) + 2*sizeof(int)) @@ -673,10 +673,17 @@ mult(Bigint *a, Bigint *b) static Bigint * pow5mult(Bigint *b, int k) { - Bigint *b1, *p5, *p51; + Bigint *b1, *p5, **p5s; int i; static const int p05[3] = { 5, 25, 125 }; + // For double-to-string conversion, the maximum value of k is limited by + // DBL_MAX_10_EXP (308), the maximum decimal base-10 exponent for binary64. + // For string-to-double conversion, the extreme case is constrained by our + // hardcoded exponent limit before we underflow of -512, adjusted by + // STRTOD_DIGLIM-DBL_DIG-1, giving a maximum of k=535. + assert(0 <= k && k < 1024); + if ((i = k & 3)) { b = multadd(b, p05[i-1], 0); if (b == NULL) @@ -686,18 +693,11 @@ pow5mult(Bigint *b, int k) if (!(k >>= 2)) return b; PyInterpreterState *interp = _PyInterpreterState_GET(); - p5 = interp->dtoa.p5s; - if (!p5) { - /* first time */ - p5 = i2b(625); - if (p5 == NULL) { - Bfree(b); - return NULL; - } - interp->dtoa.p5s = p5; - p5->next = 0; - } + p5s = interp->dtoa.p5s; for(;;) { + assert(p5s != interp->dtoa.p5s + Bigint_Pow5size); + p5 = *p5s; + p5s++; if (k & 1) { b1 = mult(b, p5); Bfree(b); @@ -707,17 +707,6 @@ pow5mult(Bigint *b, int k) } if (!(k >>= 1)) break; - p51 = p5->next; - if (!p51) { - p51 = mult(p5,p5); - if (p51 == NULL) { - Bfree(b); - return NULL; - } - p51->next = 0; - p5->next = p51; - } - p5 = p51; } return b; } @@ -2811,3 +2800,42 @@ _Py_dg_dtoa(double dd, int mode, int ndigits, } #endif // _PY_SHORT_FLOAT_REPR == 1 + +PyStatus +_PyDtoa_Init(PyInterpreterState *interp) +{ +#if _PY_SHORT_FLOAT_REPR == 1 && !defined(Py_USING_MEMORY_DEBUGGER) + Bigint **p5s = interp->dtoa.p5s; + + // 5**4 = 625 + Bigint *p5 = i2b(625); + if (p5 == NULL) { + return PyStatus_NoMemory(); + } + p5s[0] = p5; + + // compute 5**8, 5**16, 5**32, ..., 5**512 + for (Py_ssize_t i = 1; i < Bigint_Pow5size; i++) { + p5 = mult(p5, p5); + if (p5 == NULL) { + return PyStatus_NoMemory(); + } + p5s[i] = p5; + } + +#endif + return PyStatus_Ok(); +} + +void +_PyDtoa_Fini(PyInterpreterState *interp) +{ +#if _PY_SHORT_FLOAT_REPR == 1 && !defined(Py_USING_MEMORY_DEBUGGER) + Bigint **p5s = interp->dtoa.p5s; + for (Py_ssize_t i = 0; i < Bigint_Pow5size; i++) { + Bigint *p5 = p5s[i]; + p5s[i] = NULL; + Bfree(p5); + } +#endif +} diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 95a72eb..20bfe1a 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -820,6 +820,11 @@ pycore_interp_init(PyThreadState *tstate) return status; } + status = _PyDtoa_Init(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // The GC must be initialized before the first GC collection. status = _PyGC_Init(interp); if (_PyStatus_EXCEPTION(status)) { @@ -1776,6 +1781,7 @@ finalize_interp_clear(PyThreadState *tstate) _PyXI_Fini(tstate->interp); _PyExc_ClearExceptionGroupType(tstate->interp); _Py_clear_generic_types(tstate->interp); + _PyDtoa_Fini(tstate->interp); /* Clear interpreter state and all thread states */ _PyInterpreterState_Clear(tstate); -- cgit v0.12