summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorSam Gross <colesbury@gmail.com>2023-09-19 15:54:29 (GMT)
committerGitHub <noreply@github.com>2023-09-19 15:54:29 (GMT)
commit0c89056fe59ac42f09978582479d40e58a236856 (patch)
tree06cd5a790da2a6dd3862567419c25572f96ae373 /Tools
parent0a31ff0050eec5079fd4c9cafd33b4e3e9afd9ab (diff)
downloadcpython-0c89056fe59ac42f09978582479d40e58a236856.zip
cpython-0c89056fe59ac42f09978582479d40e58a236856.tar.gz
cpython-0c89056fe59ac42f09978582479d40e58a236856.tar.bz2
gh-108724: Add PyMutex and _PyParkingLot APIs (gh-109344)
PyMutex is a one byte lock with fast, inlineable lock and unlock functions for the common uncontended case. The design is based on WebKit's WTF::Lock. PyMutex is built using the _PyParkingLot APIs, which provides a cross-platform futex-like API (based on WebKit's WTF::ParkingLot). This internal API will be used for building other synchronization primitives used to implement PEP 703, such as one-time initialization and events. This also includes tests and a mini benchmark in Tools/lockbench/lockbench.py to compare with the existing PyThread_type_lock. Uncontended acquisition + release: * Linux (x86-64): PyMutex: 11 ns, PyThread_type_lock: 44 ns * macOS (arm64): PyMutex: 13 ns, PyThread_type_lock: 18 ns * Windows (x86-64): PyMutex: 13 ns, PyThread_type_lock: 38 ns PR Overview: The primary purpose of this PR is to implement PyMutex, but there are a number of support pieces (described below). * PyMutex: A 1-byte lock that doesn't require memory allocation to initialize and is generally faster than the existing PyThread_type_lock. The API is internal only for now. * _PyParking_Lot: A futex-like API based on the API of the same name in WebKit. Used to implement PyMutex. * _PyRawMutex: A word sized lock used to implement _PyParking_Lot. * PyEvent: A one time event. This was used a bunch in the "nogil" fork and is useful for testing the PyMutex implementation, so I've included it as part of the PR. * pycore_llist.h: Defines common operations on doubly-linked list. Not strictly necessary (could do the list operations manually), but they come up frequently in the "nogil" fork. ( Similar to https://man.freebsd.org/cgi/man.cgi?queue) --------- Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
Diffstat (limited to 'Tools')
-rw-r--r--Tools/c-analyzer/cpython/_parser.py1
-rw-r--r--Tools/c-analyzer/cpython/ignored.tsv3
-rw-r--r--Tools/lockbench/lockbench.py53
3 files changed, 57 insertions, 0 deletions
diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py
index 90334d0..4523b2e 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -314,6 +314,7 @@ MAX_SIZES = {
_abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
_abs('Objects/typeobject.c'): (35_000, 200),
_abs('Python/compile.c'): (20_000, 500),
+ _abs('Python/parking_lot.c'): (40_000, 1000),
_abs('Python/pylifecycle.c'): (500_000, 5000),
_abs('Python/pystate.c'): (500_000, 5000),
diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv
index 8c2be44..336b028 100644
--- a/Tools/c-analyzer/cpython/ignored.tsv
+++ b/Tools/c-analyzer/cpython/ignored.tsv
@@ -50,6 +50,9 @@ Python/getversion.c - version -
Python/bootstrap_hash.c - _Py_HashSecret_Initialized -
Python/pyhash.c - _Py_HashSecret -
+## thread-safe hashtable (internal locks)
+Python/parking_lot.c - buckets -
+
##################################
## state tied to Py_Main()
diff --git a/Tools/lockbench/lockbench.py b/Tools/lockbench/lockbench.py
new file mode 100644
index 0000000..9833d70
--- /dev/null
+++ b/Tools/lockbench/lockbench.py
@@ -0,0 +1,53 @@
+# Measure the performance of PyMutex and PyThread_type_lock locks
+# with short critical sections.
+#
+# Usage: python Tools/lockbench/lockbench.py [CRITICAL_SECTION_LENGTH]
+#
+# How to interpret the results:
+#
+# Acquisitions (kHz): Reports the total number of lock acquisitions in
+# thousands of acquisitions per second. This is the most important metric,
+# particularly for the 1 thread case because even in multithreaded programs,
+# most locks acquisitions are not contended. Values for 2+ threads are
+# only meaningful for `--disable-gil` builds, because the GIL prevents most
+# situations where there is lock contention with short critical sections.
+#
+# Fairness: A measure of how evenly the lock acquisitions are distributed.
+# A fairness of 1.0 means that all threads acquired the lock the same number
+# of times. A fairness of 1/N means that only one thread ever acquired the
+# lock.
+# See https://en.wikipedia.org/wiki/Fairness_measure#Jain's_fairness_index
+
+from _testinternalcapi import benchmark_locks
+import sys
+
+# Max number of threads to test
+MAX_THREADS = 10
+
+# How much "work" to do while holding the lock
+CRITICAL_SECTION_LENGTH = 1
+
+
+def jains_fairness(values):
+ # Jain's fairness index
+ # See https://en.wikipedia.org/wiki/Fairness_measure
+ return (sum(values) ** 2) / (len(values) * sum(x ** 2 for x in values))
+
+def main():
+ print("Lock Type Threads Acquisitions (kHz) Fairness")
+ for lock_type in ["PyMutex", "PyThread_type_lock"]:
+ use_pymutex = (lock_type == "PyMutex")
+ for num_threads in range(1, MAX_THREADS + 1):
+ acquisitions, thread_iters = benchmark_locks(
+ num_threads, use_pymutex, CRITICAL_SECTION_LENGTH)
+
+ acquisitions /= 1000 # report in kHz for readability
+ fairness = jains_fairness(thread_iters)
+
+ print(f"{lock_type: <20}{num_threads: <18}{acquisitions: >5.0f}{fairness: >20.2f}")
+
+
+if __name__ == "__main__":
+ if len(sys.argv) > 1:
+ CRITICAL_SECTION_LENGTH = int(sys.argv[1])
+ main()