diff options
author | Dino Viehland <dinoviehland@fb.com> | 2023-10-30 15:43:11 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-30 15:43:11 (GMT) |
commit | 05f2f0ac92afa560315eb66fd6576683c7f69e2d (patch) | |
tree | a6a4746103a67f06f04ff6df9f290f0305ff9dd5 /Include/internal | |
parent | 4ebf2fae9664a4042511059627f44d46dceb2e09 (diff) | |
download | cpython-05f2f0ac92afa560315eb66fd6576683c7f69e2d.zip cpython-05f2f0ac92afa560315eb66fd6576683c7f69e2d.tar.gz cpython-05f2f0ac92afa560315eb66fd6576683c7f69e2d.tar.bz2 |
gh-90815: Add mimalloc memory allocator (#109914)
* Add mimalloc v2.12
Modified src/alloc.c to remove include of alloc-override.c and not
compile new handler.
Did not include the following files:
- include/mimalloc-new-delete.h
- include/mimalloc-override.h
- src/alloc-override-osx.c
- src/alloc-override.c
- src/static.c
- src/region.c
mimalloc is thread safe and shares a single heap across all runtimes,
therefore finalization and getting global allocated blocks across all
runtimes is different.
* mimalloc: minimal changes for use in Python:
- remove debug spam for freeing large allocations
- use same bytes (0xDD) for freed allocations in CPython and mimalloc
This is important for the test_capi debug memory tests
* Don't export mimalloc symbol in libpython.
* Enable mimalloc as Python allocator option.
* Add mimalloc MIT license.
* Log mimalloc in Lib/test/pythoninfo.py.
* Document new mimalloc support.
* Use macro defs for exports as done in:
https://github.com/python/cpython/pull/31164/
Co-authored-by: Sam Gross <colesbury@gmail.com>
Co-authored-by: Christian Heimes <christian@python.org>
Co-authored-by: Victor Stinner <vstinner@python.org>
Diffstat (limited to 'Include/internal')
-rw-r--r-- | Include/internal/mimalloc/mimalloc.h | 565 | ||||
-rw-r--r-- | Include/internal/mimalloc/mimalloc/atomic.h | 385 | ||||
-rw-r--r-- | Include/internal/mimalloc/mimalloc/internal.h | 979 | ||||
-rw-r--r-- | Include/internal/mimalloc/mimalloc/prim.h | 323 | ||||
-rw-r--r-- | Include/internal/mimalloc/mimalloc/track.h | 147 | ||||
-rw-r--r-- | Include/internal/mimalloc/mimalloc/types.h | 670 | ||||
-rw-r--r-- | Include/internal/pycore_mimalloc.h | 19 | ||||
-rw-r--r-- | Include/internal/pycore_pymem_init.h | 5 |
8 files changed, 3091 insertions, 2 deletions
diff --git a/Include/internal/mimalloc/mimalloc.h b/Include/internal/mimalloc/mimalloc.h new file mode 100644 index 0000000..821129e --- /dev/null +++ b/Include/internal/mimalloc/mimalloc.h @@ -0,0 +1,565 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_H +#define MIMALLOC_H + +#define MI_MALLOC_VERSION 212 // major + 2 digits minor + +// ------------------------------------------------------ +// Compiler specific attributes +// ------------------------------------------------------ + +#ifdef __cplusplus + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 + #define mi_attr_noexcept noexcept + #else + #define mi_attr_noexcept throw() + #endif +#else + #define mi_attr_noexcept +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201703) + #define mi_decl_nodiscard [[nodiscard]] +#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl + #define mi_decl_nodiscard __attribute__((warn_unused_result)) +#elif defined(_HAS_NODISCARD) + #define mi_decl_nodiscard _NODISCARD +#elif (_MSC_VER >= 1700) + #define mi_decl_nodiscard _Check_return_ +#else + #define mi_decl_nodiscard +#endif + +#if defined(_MSC_VER) || defined(__MINGW32__) + #if !defined(MI_SHARED_LIB) + #define mi_decl_export + #elif defined(MI_SHARED_LIB_EXPORT) + #define mi_decl_export __declspec(dllexport) + #else + #define mi_decl_export __declspec(dllimport) + #endif + #if defined(__MINGW32__) + #define mi_decl_restrict + #define mi_attr_malloc __attribute__((malloc)) + #else + #if (_MSC_VER >= 1900) && !defined(__EDG__) + #define mi_decl_restrict __declspec(allocator) __declspec(restrict) + #else + #define mi_decl_restrict __declspec(restrict) + #endif + #define mi_attr_malloc + #endif + #define mi_cdecl __cdecl + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) +#elif defined(__GNUC__) // includes clang and icc + #if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT) + #define mi_decl_export __attribute__((visibility("default"))) + #else + #define mi_decl_export + #endif + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) + #define mi_decl_restrict + #define mi_attr_malloc __attribute__((malloc)) + #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) + #elif defined(__INTEL_COMPILER) + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) + #else + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) + #endif +#else + #define mi_cdecl + #define mi_decl_export + #define mi_decl_restrict + #define mi_attr_malloc + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) +#endif + +// ------------------------------------------------------ +// Includes +// ------------------------------------------------------ + +#include <stddef.h> // size_t +#include <stdbool.h> // bool +#include <stdint.h> // INTPTR_MAX + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------------------------------------------ +// Standard malloc interface +// ------------------------------------------------------ + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_export void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); + +mi_decl_export void mi_free(void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; + +// ------------------------------------------------------ +// Extended functionality +// ------------------------------------------------------ +#define MI_SMALL_WSIZE_MAX (128) +#define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*)) + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); + +mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; + + +// ------------------------------------------------------ +// Internals +// ------------------------------------------------------ + +typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; + +typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); +mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; + +typedef void (mi_cdecl mi_error_fun)(int err, void* arg); +mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); + +mi_decl_export void mi_collect(bool force) mi_attr_noexcept; +mi_decl_export int mi_version(void) mi_attr_noexcept; +mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; +mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; +mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL +mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; + +mi_decl_export void mi_process_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_done(void) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; + +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, + size_t* current_rss, size_t* peak_rss, + size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; + +// ------------------------------------------------------------------------------------- +// Aligned allocation +// Note that `alignment` always follows `size` for consistency with unaligned +// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. +// ------------------------------------------------------------------------------------- + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); + + +// ------------------------------------------------------------------------------------- +// Heaps: first-class, but can only allocate from the same thread that created it. +// ------------------------------------------------------------------------------------- + +struct mi_heap_s; +typedef struct mi_heap_s mi_heap_t; + +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void); +mi_decl_export void mi_heap_delete(mi_heap_t* heap); +mi_decl_export void mi_heap_destroy(mi_heap_t* heap); +mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap); +mi_decl_export mi_heap_t* mi_heap_get_default(void); +mi_decl_export mi_heap_t* mi_heap_get_backing(void); +mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); + +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); + +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); + + +// -------------------------------------------------------------------------------- +// Zero initialized re-allocation. +// Only valid on memory that was originally allocated with zero initialization too. +// e.g. `mi_calloc`, `mi_zalloc`, `mi_zalloc_aligned` etc. +// see <https://github.com/microsoft/mimalloc/issues/63#issuecomment-508272992> +// -------------------------------------------------------------------------------- + +mi_decl_nodiscard mi_decl_export void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); + +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3); + +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); + +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4); + + +// ------------------------------------------------------ +// Analysis +// ------------------------------------------------------ + +mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p); +mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p); +mi_decl_export bool mi_check_owned(const void* p); + +// An area of heap space contains blocks of a single size. +typedef struct mi_heap_area_s { + void* blocks; // start of the area containing heap blocks + size_t reserved; // bytes reserved for this area (virtual) + size_t committed; // current available bytes for this area + size_t used; // number of allocated blocks + size_t block_size; // size in bytes of each block + size_t full_block_size; // size in bytes of a full block including padding and metadata. +} mi_heap_area_t; + +typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg); + +mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); + +// Experimental +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept; + +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; + +mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; + +mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept; + +// Experimental: heaps associated with specific memory arena's +typedef int mi_arena_id_t; +mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); +mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; + +#if MI_MALLOC_VERSION >= 182 +// Create a heap that only allocates in the specified arena +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); +#endif + +// deprecated +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + + +// ------------------------------------------------------ +// Convenience +// ------------------------------------------------------ + +#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp))) +#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp))) +#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp))) +#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp))) +#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp))) +#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp))) + +#define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp))) +#define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp))) +#define mi_heap_calloc_tp(hp,tp,n) ((tp*)mi_heap_calloc(hp,n,sizeof(tp))) +#define mi_heap_mallocn_tp(hp,tp,n) ((tp*)mi_heap_mallocn(hp,n,sizeof(tp))) +#define mi_heap_reallocn_tp(hp,p,tp,n) ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp))) +#define mi_heap_recalloc_tp(hp,p,tp,n) ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp))) + + +// ------------------------------------------------------ +// Options +// ------------------------------------------------------ + +typedef enum mi_option_e { + // stable options + mi_option_show_errors, // print error messages + mi_option_show_stats, // print statistics on termination + mi_option_verbose, // print verbose messages + // the following options are experimental (see src/options.h) + mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) + mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) + mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1) + mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup + mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node + mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup + mi_option_deprecated_segment_cache, + mi_option_deprecated_page_reset, + mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination + mi_option_deprecated_segment_reset, + mi_option_eager_commit_delay, + mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. + mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. + mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) + mi_option_os_tag, // tag used for OS logging (macOS only for now) + mi_option_max_errors, // issue at most N error messages + mi_option_max_warnings, // issue at most N warning messages + mi_option_max_segment_reclaim, + mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe. + mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) + mi_option_arena_purge_mult, + mi_option_purge_extend_delay, + _mi_option_last, + // legacy option names + mi_option_large_os_pages = mi_option_allow_large_os_pages, + mi_option_eager_region_commit = mi_option_arena_eager_commit, + mi_option_reset_decommits = mi_option_purge_decommits, + mi_option_reset_delay = mi_option_purge_delay, + mi_option_abandoned_page_reset = mi_option_abandoned_page_purge +} mi_option_t; + + +mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option); +mi_decl_export void mi_option_enable(mi_option_t option); +mi_decl_export void mi_option_disable(mi_option_t option); +mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); +mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); + +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option); +mi_decl_export void mi_option_set(mi_option_t option, long value); +mi_decl_export void mi_option_set_default(mi_option_t option, long value); + + +// ------------------------------------------------------------------------------------------------------- +// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions. +// (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.) +// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing. +// ------------------------------------------------------------------------------------------------------- + +mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; +mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; + +mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); + +mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export int mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; +mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; + +mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; +mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; + +// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); +mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3); + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); + +#ifdef __cplusplus +} +#endif + +// --------------------------------------------------------------------------------------------- +// Implement the C++ std::allocator interface for use in STL containers. +// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally) +// --------------------------------------------------------------------------------------------- +#ifdef __cplusplus + +#include <cstddef> // std::size_t +#include <cstdint> // PTRDIFF_MAX +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#include <type_traits> // std::true_type +#include <utility> // std::forward +#endif + +template<class T> struct _mi_stl_allocator_common { + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef value_type& reference; + typedef value_type const& const_reference; + typedef value_type* pointer; + typedef value_type const* const_pointer; + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); } + template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); } + #else + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } + #endif + + size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +}; + +template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> { + using typename _mi_stl_allocator_common<T>::size_type; + using typename _mi_stl_allocator_common<T>::value_type; + using typename _mi_stl_allocator_common<T>::pointer; + template <class U> struct rebind { typedef mi_stl_allocator<U> other; }; + + mi_stl_allocator() mi_attr_noexcept = default; + mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept = default; + template<class U> mi_stl_allocator(const mi_stl_allocator<U>&) mi_attr_noexcept { } + mi_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + + #if (__cplusplus >= 201703L) // C++17 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_new_n(count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } + #else + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_new_n(count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using is_always_equal = std::true_type; + #endif +}; + +template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; } +template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; } + + +#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900) // C++11 +#define MI_HAS_HEAP_STL_ALLOCATOR 1 + +#include <memory> // std::shared_ptr + +// Common base class for STL allocators in a specific heap +template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> { + using typename _mi_stl_allocator_common<T>::size_type; + using typename _mi_stl_allocator_common<T>::value_type; + using typename _mi_stl_allocator_common<T>::pointer; + + _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */ + + #if (__cplusplus >= 201703L) // C++17 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } + #else + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using is_always_equal = std::false_type; + #endif + + void collect(bool force) { mi_heap_collect(this->heap.get(), force); } + template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) const { return (this->heap == x.heap); } + +protected: + std::shared_ptr<mi_heap_t> heap; + template<class U, bool D> friend struct _mi_heap_stl_allocator_common; + + _mi_heap_stl_allocator_common() { + mi_heap_t* hp = mi_heap_new(); + this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ + } + _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) mi_attr_noexcept : heap(x.heap) { } + +private: + static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } } + static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } } +}; + +// STL allocator allocation in a specific heap +template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> { + using typename _mi_heap_stl_allocator_common<T, false>::size_type; + mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called + mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap + template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { } + + mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + template<class U> struct rebind { typedef mi_heap_stl_allocator<U> other; }; +}; + +template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); } +template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); } + + +// STL allocator allocation in a specific heap, where `free` does nothing and +// the heap is destroyed in one go on destruction -- use with care! +template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> { + using typename _mi_heap_stl_allocator_common<T, true>::size_type; + mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called + mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap + template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { } + + mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ } + template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; }; +}; + +template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); } +template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); } + +#endif // C++11 + +#endif // __cplusplus + +#endif diff --git a/Include/internal/mimalloc/mimalloc/atomic.h b/Include/internal/mimalloc/mimalloc/atomic.h new file mode 100644 index 0000000..c6b8146 --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/atomic.h @@ -0,0 +1,385 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_ATOMIC_H +#define MIMALLOC_ATOMIC_H + +// -------------------------------------------------------------------------------------------- +// Atomics +// We need to be portable between C, C++, and MSVC. +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// This is why we try to use only `uintptr_t` and `<type>*` as atomic types. +// To gain better insight in the range of used atomics, we use explicitly named memory order operations +// instead of passing the memory order as a parameter. +// ----------------------------------------------------------------------------------------------- + +#if defined(__cplusplus) +// Use C++ atomics +#include <atomic> +#define _Atomic(tp) std::atomic<tp> +#define mi_atomic(name) std::atomic_##name +#define mi_memory_order(name) std::memory_order_##name +#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571 + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif +#elif defined(_MSC_VER) +// Use MSVC C wrapper for C11 atomics +#define _Atomic(tp) tp +#define MI_ATOMIC_VAR_INIT(x) x +#define mi_atomic(name) mi_atomic_##name +#define mi_memory_order(name) mi_memory_order_##name +#else +// Use C11 atomics +#include <stdatomic.h> +#define mi_atomic(name) atomic_##name +#define mi_memory_order(name) memory_order_##name +#if !defined(ATOMIC_VAR_INIT) || (__STDC_VERSION__ >= 201710L) // c17, see issue #735 + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif +#endif + +// Various defines for all used memory orders in mimalloc +#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) + +#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) + +#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) +#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) + +#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) + +#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1) +#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1) +#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1) +#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1) + +static inline void mi_atomic_yield(void); +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add); +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); + + +#if defined(__cplusplus) || !defined(_MSC_VER) + +// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value) +// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well +#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) +#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) + +// In C++ we need to add casts to help resolve templates if NULL is passed +#if defined(__cplusplus) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) +#else +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) +#endif + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { + return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); +} +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { + int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); + while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; +} + +// Used by timers +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) + +#define mi_atomic_casi64_strong_acq_rel(p,e,d) mi_atomic_cas_strong_acq_rel(p,e,d) +#define mi_atomic_addi64_acq_rel(p,i) mi_atomic_add_acq_rel(p,i) + + +#elif defined(_MSC_VER) + +// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <intrin.h> +#ifdef _WIN64 +typedef LONG64 msc_intptr_t; +#define MI_64(f) f##64 +#else +typedef LONG msc_intptr_t; +#define MI_64(f) f +#endif + +typedef enum mi_memory_order_e { + mi_memory_order_relaxed, + mi_memory_order_consume, + mi_memory_order_acquire, + mi_memory_order_release, + mi_memory_order_acq_rel, + mi_memory_order_seq_cst +} mi_memory_order; + +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +} +static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); +} +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + (void)(mo1); (void)(mo2); + uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); + if (read == *expected) { + return true; + } + else { + *expected = read; + return false; + } +} +static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); +} +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); +} +static inline void mi_atomic_thread_fence(mi_memory_order mo) { + (void)(mo); + _Atomic(uintptr_t) x = 0; + mi_atomic_exchange_explicit(&x, 1, mo); +} +static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_IX86) || defined(_M_X64) + return *p; +#else + uintptr_t x = *p; + if (mo > mi_memory_order_relaxed) { + while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; + } + return x; +#endif +} +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); +#if defined(_M_IX86) || defined(_M_X64) + *p = x; +#else + mi_atomic_exchange_explicit(p, x, mo); +#endif +} +static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_X64) + return *p; +#else + int64_t old = *p; + int64_t x = old; + while ((old = InterlockedCompareExchange64(p, x, old)) != x) { + x = old; + } + return x; +#endif +} +static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) { + (void)(mo); +#if defined(x_M_IX86) || defined(_M_X64) + *p = x; +#else + InterlockedExchange64(p, x); +#endif +} + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { +#ifdef _WIN64 + return (int64_t)mi_atomic_addi((int64_t*)p, add); +#else + int64_t current; + int64_t sum; + do { + current = *p; + sum = current + add; + } while (_InterlockedCompareExchange64(p, sum, current) != current); + return current; +#endif +} +static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { + int64_t current; + do { + current = *p; + } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); +} + +static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) { + mi_atomic_addi64_relaxed(p, i); +} + +static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) { + int64_t read = _InterlockedCompareExchange64(p, des, *exp); + if (read == *exp) { + return true; + } + else { + *exp = read; + return false; + } +} + +// The pointer macros cast to `uintptr_t`. +#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) + +#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed)) + + +#endif + + +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { + return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); +} + +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p, -sub); +} + +typedef _Atomic(uintptr_t) mi_atomic_once_t; + +// Returns true only on the first invocation +static inline bool mi_atomic_once( mi_atomic_once_t* once ) { + if (mi_atomic_load_relaxed(once) != 0) return false; // quick test + uintptr_t expected = 0; + return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 +} + +typedef _Atomic(uintptr_t) mi_atomic_guard_t; + +// Allows only one thread to execute at a time +#define mi_atomic_guard(guard) \ + uintptr_t _mi_guard_expected = 0; \ + for(bool _mi_guard_once = true; \ + _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \ + (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) ) + + + +// Yield +#if defined(__cplusplus) +#include <thread> +static inline void mi_atomic_yield(void) { + std::this_thread::yield(); +} +#elif defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +static inline void mi_atomic_yield(void) { + YieldProcessor(); +} +#elif defined(__SSE2__) +#include <emmintrin.h> +static inline void mi_atomic_yield(void) { + _mm_pause(); +} +#elif (defined(__GNUC__) || defined(__clang__)) && \ + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) +#if defined(__x86_64__) || defined(__i386__) +static inline void mi_atomic_yield(void) { + __asm__ volatile ("pause" ::: "memory"); +} +#elif defined(__aarch64__) +static inline void mi_atomic_yield(void) { + __asm__ volatile("wfe"); +} +#elif (defined(__arm__) && __ARM_ARCH__ >= 7) +static inline void mi_atomic_yield(void) { + __asm__ volatile("yield" ::: "memory"); +} +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) +#ifdef __APPLE__ +static inline void mi_atomic_yield(void) { + __asm__ volatile ("or r27,r27,r27" ::: "memory"); +} +#else +static inline void mi_atomic_yield(void) { + __asm__ __volatile__ ("or 27,27,27" ::: "memory"); +} +#endif +#elif defined(__armel__) || defined(__ARMEL__) +static inline void mi_atomic_yield(void) { + __asm__ volatile ("nop" ::: "memory"); +} +#endif +#elif defined(__sun) +// Fallback for other archs +#include <synch.h> +static inline void mi_atomic_yield(void) { + smt_pause(); +} +#elif defined(__wasi__) +#include <sched.h> +static inline void mi_atomic_yield(void) { + sched_yield(); +} +#else +#include <unistd.h> +static inline void mi_atomic_yield(void) { + sleep(0); +} +#endif + + +#endif // __MIMALLOC_ATOMIC_H diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h new file mode 100644 index 0000000..f076bc6 --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -0,0 +1,979 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_INTERNAL_H +#define MIMALLOC_INTERNAL_H + + +// -------------------------------------------------------------------------- +// This file contains the interal API's of mimalloc and various utility +// functions and macros. +// -------------------------------------------------------------------------- + +#include "mimalloc/types.h" +#include "mimalloc/track.h" + +#if (MI_DEBUG>0) +#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) +#else +#define mi_trace_message(...) +#endif + +#define MI_CACHE_LINE 64 +#if defined(_MSC_VER) +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#pragma warning(disable:26812) // unscoped enum warning +#define mi_decl_noinline __declspec(noinline) +#define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc +#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#else +#define mi_decl_noinline +#define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align +#endif + +#if defined(__EMSCRIPTEN__) && !defined(__wasi__) +#define __wasi__ +#endif + +#if defined(__cplusplus) +#define mi_decl_externc extern "C" +#else +#define mi_decl_externc +#endif + +// pthreads +#if !defined(_WIN32) && !defined(__wasi__) +#define MI_USE_PTHREADS +#include <pthread.h> +#endif + +// "options.c" +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); +void _mi_warning_message(const char* fmt, ...); +void _mi_verbose_message(const char* fmt, ...); +void _mi_trace_message(const char* fmt, ...); +void _mi_options_init(void); +void _mi_error_message(int err, const char* fmt, ...); + +// random.c +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_init_weak(mi_random_ctx_t* ctx); +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +uintptr_t _mi_os_random_weak(uintptr_t extra_seed); +static inline uintptr_t _mi_random_shuffle(uintptr_t x); + +// init.c +extern mi_decl_cache_align mi_stats_t _mi_stats_main; +extern mi_decl_cache_align const mi_page_t _mi_page_empty; +bool _mi_is_main_thread(void); +size_t _mi_current_thread_count(void); +bool _mi_preloading(void); // true while the C runtime is not initialized yet +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap +void _mi_thread_done(mi_heap_t* heap); +void _mi_thread_data_collect(void); + +// os.c +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); + +size_t _mi_os_page_size(void); +size_t _mi_os_good_alloc_size(size_t size); +bool _mi_os_has_overcommit(void); +bool _mi_os_has_virtual_reserve(void); + +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); + +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); + +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); +bool _mi_os_use_large_page(size_t size, size_t alignment); +size_t _mi_os_large_page_size(void); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); + +// arena.c +mi_arena_id_t _mi_arena_id_none(void); +void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +bool _mi_arena_contains(const void* p); +void _mi_arena_collect(bool force_purge, mi_stats_t* stats); +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); + +// "segment-map.c" +void _mi_segment_map_allocated_at(const mi_segment_t* segment); +void _mi_segment_map_freed_at(const mi_segment_t* segment); + +// "segment.c" +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); +bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); +void _mi_segment_thread_collect(mi_segments_tld_t* tld); + +#if MI_HUGE_PAGE_ABANDON +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#else +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#endif + +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); +void _mi_abandoned_await_readers(void); +void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); + +// "page.c" +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; + +void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks +void _mi_page_unfull(mi_page_t* page); +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_heap_delayed_free_all(mi_heap_t* heap); +bool _mi_heap_delayed_free_partial(mi_heap_t* heap); +void _mi_heap_collect_retired(mi_heap_t* heap, bool force); + +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); +void _mi_deferred_free(mi_heap_t* heap, bool force); + +void _mi_page_free_collect(mi_page_t* page,bool force); +void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments + +size_t _mi_bin_size(uint8_t bin); // for stats +uint8_t _mi_bin(size_t size); // for stats + +// "heap.c" +void _mi_heap_destroy_pages(mi_heap_t* heap); +void _mi_heap_collect_abandon(mi_heap_t* heap); +void _mi_heap_set_default_direct(mi_heap_t* heap); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); +void _mi_heap_unsafe_destroy_all(void); + +// "stats.c" +void _mi_stats_done(mi_stats_t* stats); +mi_msecs_t _mi_clock_now(void); +mi_msecs_t _mi_clock_end(mi_msecs_t start); +mi_msecs_t _mi_clock_start(void); + +// "alloc.c" +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); +bool _mi_free_delayed_block(mi_block_t* block); +void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); + +// option.c, c primitives +char _mi_toupper(char c); +int _mi_strnicmp(const char* s, const char* t, size_t n); +void _mi_strlcpy(char* dest, const char* src, size_t dest_size); +void _mi_strlcat(char* dest, const char* src, size_t dest_size); +size_t _mi_strlen(const char* s); +size_t _mi_strnlen(const char* s, size_t max_len); + + +#if MI_DEBUG>1 +bool _mi_page_is_valid(mi_page_t* page); +#endif + + +// ------------------------------------------------------ +// Branches +// ------------------------------------------------------ + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + + +/* ----------------------------------------------------------- + Error codes passed to `_mi_fatal_error` + All are recoverable but EFAULT is a serious error and aborts by default in secure mode. + For portability define undefined error codes using common Unix codes: + <https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html> +----------------------------------------------------------- */ +#include <errno.h> +#ifndef EAGAIN // double free +#define EAGAIN (11) +#endif +#ifndef ENOMEM // out of memory +#define ENOMEM (12) +#endif +#ifndef EFAULT // corrupted free-list or meta-data +#define EFAULT (14) +#endif +#ifndef EINVAL // trying to free an invalid pointer +#define EINVAL (22) +#endif +#ifndef EOVERFLOW // count*size overflow +#define EOVERFLOW (75) +#endif + + +/* ----------------------------------------------------------- + Inlined definitions +----------------------------------------------------------- */ +#define MI_UNUSED(x) (void)(x) +#if (MI_DEBUG>0) +#define MI_UNUSED_RELEASE(x) +#else +#define MI_UNUSED_RELEASE(x) MI_UNUSED(x) +#endif + +#define MI_INIT4(x) x(),x(),x(),x() +#define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x) +#define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x) +#define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x) +#define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x) +#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x) +#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) + + +#include <string.h> +// initialize a local variable to zero; use memset as compilers optimize constant sized memset's +#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) + +// Is `x` a power of two? (0 is considered a power of two) +static inline bool _mi_is_power_of_two(uintptr_t x) { + return ((x & (x - 1)) == 0); +} + +// Is a pointer aligned? +static inline bool _mi_is_aligned(void* p, size_t alignment) { + mi_assert_internal(alignment != 0); + return (((uintptr_t)p % alignment) == 0); +} + +// Align upwards +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return ((sz + mask) & ~mask); + } + else { + return (((sz + mask)/alignment)*alignment); + } +} + +// Align downwards +static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return (sz & ~mask); + } + else { + return ((sz / alignment) * alignment); + } +} + +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { + mi_assert_internal(divider != 0); + return (divider == 0 ? size : ((size + divider - 1) / divider)); +} + +// Is memory zero initialized? +static inline bool mi_mem_is_zero(const void* p, size_t size) { + for (size_t i = 0; i < size; i++) { + if (((uint8_t*)p)[i] != 0) return false; + } + return true; +} + + +// Align a byte size to a size in _machine words_, +// i.e. byte size == `wsize*sizeof(void*)`. +static inline size_t _mi_wsize_from_size(size_t size) { + mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t)); + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Overflow detecting multiply +#if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5)) +#include <limits.h> // UINT_MAX, ULONG_MAX +#if defined(_CLOCK_T) // for Illumos +#undef _CLOCK_T +#endif +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + #if (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, (unsigned long *)total); + #elif (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, (unsigned int *)total); + #else + return __builtin_umulll_overflow(count, size, (unsigned long long *)total); + #endif +} +#else /* __builtin_umul_overflow is unavailable */ +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + *total = count * size; + // note: gcc/clang optimize this to directly check the overflow flag + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); +} +#endif + +// Safe multiply `count*size` into `total`; return `true` on overflow. +static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { + if (count==1) { // quick check for the case where count is one (common for C++ allocators) + *total = size; + return false; + } + else if mi_unlikely(mi_mul_overflow(count, size, total)) { + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size); + #endif + *total = SIZE_MAX; + return true; + } + else return false; +} + + +/*---------------------------------------------------------------------------------------- + Heap functions +------------------------------------------------------------------------------------------- */ + +extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap + +static inline bool mi_heap_is_backing(const mi_heap_t* heap) { + return (heap->tld->heap_backing == heap); +} + +static inline bool mi_heap_is_initialized(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + return (heap != &_mi_heap_empty); +} + +static inline uintptr_t _mi_ptr_cookie(const void* p) { + extern mi_heap_t _mi_heap_main; + mi_assert_internal(_mi_heap_main.cookie != 0); + return ((uintptr_t)p ^ _mi_heap_main.cookie); +} + +/* ----------------------------------------------------------- + Pages +----------------------------------------------------------- */ + +static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; +} + +// Segment that contains the pointer +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; +// therefore we align one byte before `p`. +static inline mi_segment_t* _mi_ptr_segment(const void* p) { + mi_assert_internal(p != NULL); + return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); +} + +static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { + mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0); + return (mi_page_t*)(s); +} + +static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { + mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0); + return (mi_slice_t*)(p); +} + +// Segment belonging to a page +static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries)); + return segment; +} + +static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { + mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); + mi_assert_internal(start >= _mi_ptr_segment(slice)->slices); + mi_assert_internal(start->slice_offset == 0); + mi_assert_internal(start + start->slice_count > slice); + return start; +} + +// Get the page containing the pointer (performance critical as it is called in mi_free) +static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + mi_assert_internal(p > (void*)segment); + ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; + mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); + size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; + mi_assert_internal(idx <= segment->slice_entries); + mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; + mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data + mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries); + return mi_slice_to_page(slice); +} + +// Quick page start for initialized pages +static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { + return _mi_segment_page_start(segment, page, page_size); +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_ptr_page(void* p) { + return _mi_segment_page_of(_mi_ptr_segment(p), p); +} + +// Get the block size of a page (special case for huge objects) +static inline size_t mi_page_block_size(const mi_page_t* page) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0); + if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) { + return bsize; + } + else { + size_t psize; + _mi_segment_page_start(_mi_page_segment(page), page, &psize); + return psize; + } +} + +static inline bool mi_page_is_huge(const mi_page_t* page) { + return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); +} + +// Get the usable block size of a page without fixed padding. +// This may still include internal padding due to alignment and rounding up size classes. +static inline size_t mi_page_usable_block_size(const mi_page_t* page) { + return mi_page_block_size(page) - MI_PADDING_SIZE; +} + +// size of a segment +static inline size_t mi_segment_size(mi_segment_t* segment) { + return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; +} + +static inline uint8_t* mi_segment_end(mi_segment_t* segment) { + return (uint8_t*)segment + mi_segment_size(segment); +} + +// Thread free access +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { + return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); +} + +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { + return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3); +} + +// Heap access +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { + return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); +} + +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); + mi_atomic_store_release(&page->xheap,(uintptr_t)heap); +} + +// Thread free flag helpers +static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { + return (mi_block_t*)(tf & ~0x03); +} +static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { + return (mi_delayed_t)(tf & 0x03); +} +static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { + return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); +} +static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { + return mi_tf_make(mi_tf_block(tf),delayed); +} +static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { + return mi_tf_make(block, mi_tf_delayed(tf)); +} + +// are all blocks in a page freed? +// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. +static inline bool mi_page_all_free(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->used == 0); +} + +// are there any available blocks? +static inline bool mi_page_has_any_available(const mi_page_t* page) { + mi_assert_internal(page != NULL && page->reserved > 0); + return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); +} + +// are there immediately available blocks, i.e. blocks available on the free list. +static inline bool mi_page_immediate_available(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->free != NULL); +} + +// is more than 7/8th of a page in use? +static inline bool mi_page_mostly_used(const mi_page_t* page) { + if (page==NULL) return true; + uint16_t frac = page->reserved / 8U; + return (page->reserved - page->used <= frac); +} + +static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { + return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; +} + + + +//----------------------------------------------------------- +// Page flags +//----------------------------------------------------------- +static inline bool mi_page_is_in_full(const mi_page_t* page) { + return page->flags.x.in_full; +} + +static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { + page->flags.x.in_full = in_full; +} + +static inline bool mi_page_has_aligned(const mi_page_t* page) { + return page->flags.x.has_aligned; +} + +static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { + page->flags.x.has_aligned = has_aligned; +} + + +/* ------------------------------------------------------------------- +Encoding/Decoding the free list next pointers + +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` +with a secret key `k1`, as `p^k1`. This prevents overwriting with known +values but might be still too weak: if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +Moreover, if multiple blocks can be read as well, the attacker can +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot +about the pointers (and subsequently `k1`). + +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`. +Since these operations are not associative, the above approaches do not +work so well any more even if the `p` can be guesstimated. For example, +for the read case we can subtract two entries to discard the `+k1` term, +but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best. +We include the left-rotation since xor and addition are otherwise linear +in the lowest bit. Finally, both keys are unique per page which reduces +the re-use of keys by a large factor. + +We also pass a separate `null` value to be used as `NULL` or otherwise +`(k2<<<k1)+k1` would appear (too) often as a sentinel value. +------------------------------------------------------------------- */ + +static inline bool mi_is_in_same_segment(const void* p, const void* q) { + return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); +} + +static inline bool mi_is_in_same_page(const void* p, const void* q) { + mi_segment_t* segment = _mi_ptr_segment(p); + if (_mi_ptr_segment(q) != segment) return false; + // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q)); + mi_page_t* page = _mi_segment_page_of(segment, p); + size_t psize; + uint8_t* start = _mi_segment_page_start(segment, page, &psize); + return (start <= (uint8_t*)q && (uint8_t*)q < start + psize); +} + +static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; + return (shift==0 ? x : ((x << shift) | (x >> (MI_INTPTR_BITS - shift)))); +} +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; + return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift)))); +} + +static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { + void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); + return (p==null ? NULL : p); +} + +static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { + uintptr_t x = (uintptr_t)(p==NULL ? null : p); + return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { + mi_track_mem_defined(block,sizeof(mi_block_t)); + mi_block_t* next; + #ifdef MI_ENCODE_FREELIST + next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); + #else + MI_UNUSED(keys); MI_UNUSED(null); + next = (mi_block_t*)block->next; + #endif + mi_track_mem_noaccess(block,sizeof(mi_block_t)); + return next; +} + +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { + mi_track_mem_undefined(block,sizeof(mi_block_t)); + #ifdef MI_ENCODE_FREELIST + block->next = mi_ptr_encode(null, next, keys); + #else + MI_UNUSED(keys); MI_UNUSED(null); + block->next = (mi_encoded_t)next; + #endif + mi_track_mem_noaccess(block,sizeof(mi_block_t)); +} + +static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { + #ifdef MI_ENCODE_FREELIST + mi_block_t* next = mi_block_nextx(page,block,page->keys); + // check for free list corruption: is `next` at least in the same page? + // TODO: check if `next` is `page->block_size` aligned? + if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) { + _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); + next = NULL; + } + return next; + #else + MI_UNUSED(page); + return mi_block_nextx(page,block,NULL); + #endif +} + +static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { + #ifdef MI_ENCODE_FREELIST + mi_block_set_nextx(page,block,next, page->keys); + #else + MI_UNUSED(page); + mi_block_set_nextx(page,block,next,NULL); + #endif +} + + +// ------------------------------------------------------------------- +// commit mask +// ------------------------------------------------------------------- + +static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + cm->mask[i] = 0; + } +} + +static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + cm->mask[i] = ~((size_t)0); + } +} + +static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if (cm->mask[i] != 0) return false; + } + return true; +} + +static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if (cm->mask[i] != ~((size_t)0)) return false; + } + return true; +} + +// defined in `segment.c`: +size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); +size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); + +#define mi_commit_mask_foreach(cm,idx,count) \ + idx = 0; \ + while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { + +#define mi_commit_mask_foreach_end() \ + idx += count; \ + } + + + +/* ----------------------------------------------------------- + memory id's +----------------------------------------------------------- */ + +static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { + mi_memid_t memid; + _mi_memzero_var(memid); + memid.memkind = memkind; + return memid; +} + +static inline mi_memid_t _mi_memid_none(void) { + return _mi_memid_create(MI_MEM_NONE); +} + +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { + mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.initially_committed = committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return memid; +} + + +// ------------------------------------------------------------------- +// Fast "random" shuffle +// ------------------------------------------------------------------- + +static inline uintptr_t _mi_random_shuffle(uintptr_t x) { + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros +#if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c> + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; +#elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/> + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; +#endif + return x; +} + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); + +extern _Atomic(size_t) _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } + else return _mi_os_numa_node_get(tld); +} +static inline size_t _mi_os_numa_node_count(void) { + const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); + if mi_likely(count > 0) { return count; } + else return _mi_os_numa_node_count_get(); +} + + + +// ----------------------------------------------------------------------- +// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) +// ----------------------------------------------------------------------- + +#if defined(__GNUC__) + +#include <limits.h> // LONG_MAX +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_clzl(x); +#else + return __builtin_clzll(x); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_ctzl(x); +#else + return __builtin_ctzll(x); +#endif +} + +#elif defined(_MSC_VER) + +#include <limits.h> // LONG_MAX +#include <intrin.h> // BitScanReverse64 +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanReverse(&idx, x); +#else + _BitScanReverse64(&idx, x); +#endif + return ((MI_INTPTR_BITS - 1) - idx); +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanForward(&idx, x); +#else + _BitScanForward64(&idx, x); +#endif + return idx; +} + +#else +static inline size_t mi_ctz32(uint32_t x) { + // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf> + static const unsigned char debruijn[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + if (x==0) return 32; + return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27]; +} +static inline size_t mi_clz32(uint32_t x) { + // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf> + static const uint8_t debruijn[32] = { + 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, + 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 + }; + if (x==0) return 32; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27]; +} + +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_clz32((uint32_t)x); +#else + size_t count = mi_clz32((uint32_t)(x >> 32)); + if (count < 32) return count; + return (32 + mi_clz32((uint32_t)x)); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_ctz32((uint32_t)x); +#else + size_t count = mi_ctz32((uint32_t)x); + if (count < 32) return count; + return (32 + mi_ctz32((uint32_t)(x>>32))); +#endif +} + +#endif + +// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero) +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); +} + + +// --------------------------------------------------------------------------------- +// Provide our own `_mi_memcpy` for potential performance optimizations. +// +// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if +// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support +// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. +// --------------------------------------------------------------------------------- + +#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) +#include <intrin.h> +extern bool _mi_cpu_has_fsrm; +static inline void _mi_memcpy(void* dst, const void* src, size_t n) { + if (_mi_cpu_has_fsrm) { + __movsb((unsigned char*)dst, (const unsigned char*)src, n); + } + else { + memcpy(dst, src, n); + } +} +static inline void _mi_memzero(void* dst, size_t n) { + if (_mi_cpu_has_fsrm) { + __stosb((unsigned char*)dst, 0, n); + } + else { + memset(dst, 0, n); + } +} +#else +static inline void _mi_memcpy(void* dst, const void* src, size_t n) { + memcpy(dst, src, n); +} +static inline void _mi_memzero(void* dst, size_t n) { + memset(dst, 0, n); +} +#endif + +// ------------------------------------------------------------------------------- +// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned +// This is used for example in `mi_realloc`. +// ------------------------------------------------------------------------------- + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) +// On GCC/CLang we provide a hint that the pointers are word aligned. +static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { + mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); + const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); + _mi_memcpy(adst, asrc, n); +} + +static inline void _mi_memzero_aligned(void* dst, size_t n) { + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); + _mi_memzero(adst, n); +} +#else +// Default fallback on `_mi_memcpy` +static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { + mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); + _mi_memcpy(dst, src, n); +} + +static inline void _mi_memzero_aligned(void* dst, size_t n) { + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); + _mi_memzero(dst, n); +} +#endif + + +#endif diff --git a/Include/internal/mimalloc/mimalloc/prim.h b/Include/internal/mimalloc/mimalloc/prim.h new file mode 100644 index 0000000..4b9e4dc --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/prim.h @@ -0,0 +1,323 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_PRIM_H +#define MIMALLOC_PRIM_H + + +// -------------------------------------------------------------------------- +// This file specifies the primitive portability API. +// Each OS/host needs to implement these primitives, see `src/prim` +// for implementations on Window, macOS, WASI, and Linux/Unix. +// +// note: on all primitive functions, we always have result parameters != NUL, and: +// addr != NULL and page aligned +// size > 0 and page aligned +// return value is an error code an int where 0 is success. +// -------------------------------------------------------------------------- + +// OS memory configuration +typedef struct mi_os_mem_config_s { + size_t page_size; // 4KiB + size_t large_page_size; // 2MiB + size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + bool has_overcommit; // can we reserve more memory than can be actually committed? + bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) + bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) +} mi_os_mem_config_t; + +// Initialize +void _mi_prim_mem_init( mi_os_mem_config_t* config ); + +// Free OS memory +int _mi_prim_free(void* addr, size_t size ); + +// Allocate OS memory. Return NULL on error. +// The `try_alignment` is just a hint and the returned pointer does not have to be aligned. +// If `commit` is false, the virtual memory range only needs to be reserved (with no access) +// which will later be committed explicitly using `_mi_prim_commit`. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// pre: !commit => !allow_large +// try_alignment >= _mi_os_page_size() and a power of 2 +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); + +// Commit memory. Returns error code or 0 on success. +// For example, on Linux this would make the memory PROT_READ|PROT_WRITE. +// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows) +int _mi_prim_commit(void* addr, size_t size, bool* is_zero); + +// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true +// if the memory would need to be re-committed. For example, on Windows this is always true, +// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. +// pre: needs_recommit != NULL +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); + +// Reset memory. The range keeps being accessible but the content might be reset. +// Returns error code or 0 on success. +int _mi_prim_reset(void* addr, size_t size); + +// Protect memory. Returns error code or 0 on success. +int _mi_prim_protect(void* addr, size_t size, bool protect); + +// Allocate huge (1GiB) pages possibly associated with a NUMA node. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// pre: size > 0 and a multiple of 1GiB. +// numa_node is either negative (don't care), or a numa node number. +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr); + +// Return the current NUMA node +size_t _mi_prim_numa_node(void); + +// Return the number of logical NUMA nodes +size_t _mi_prim_numa_node_count(void); + +// Clock ticks +mi_msecs_t _mi_prim_clock_now(void); + +// Return process information (only for statistics) +typedef struct mi_process_info_s { + mi_msecs_t elapsed; + mi_msecs_t utime; + mi_msecs_t stime; + size_t current_rss; + size_t peak_rss; + size_t current_commit; + size_t peak_commit; + size_t page_faults; +} mi_process_info_t; + +void _mi_prim_process_info(mi_process_info_t* pinfo); + +// Default stderr output. (only for warnings etc. with verbose enabled) +// msg != NULL && _mi_strlen(msg) > 0 +void _mi_prim_out_stderr( const char* msg ); + +// Get an environment variable. (only for options) +// name != NULL, result != NULL, result_size >= 64 +bool _mi_prim_getenv(const char* name, char* result, size_t result_size); + + +// Fill a buffer with strong randomness; return `false` on error or if +// there is no strong randomization available. +bool _mi_prim_random_buf(void* buf, size_t buf_len); + +// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination. +void _mi_prim_thread_init_auto_done(void); + +// Called on process exit and may take action to clean up resources associated with the thread auto done. +void _mi_prim_thread_done_auto_done(void); + +// Called when the default heap for a thread changes +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); + + +//------------------------------------------------------------------- +// Thread id: `_mi_prim_thread_id()` +// +// Getting the thread id should be performant as it is called in the +// fast path of `_mi_free` and we specialize for various platforms as +// inlined definitions. Regular code should call `init.c:_mi_thread_id()`. +// We only require _mi_prim_thread_id() to return a unique id +// for each thread (unequal to zero). +//------------------------------------------------------------------- + +// defined in `init.c`; do not use these directly +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern bool _mi_process_is_initialized; // has mi_process_init been called? + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; + +#if defined(_WIN32) + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} + +// We use assembly for a fast thread id on the main platforms. The TLS layout depends on +// both the OS and libc implementation so we use specific tests for each main platform. +// If you test on another platform and it works please send a PR :-) +// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. +#elif defined(__GNUC__) && ( \ + (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ + || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + ) + +static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { + void* res; + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + res = tcb[slot]; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + res = tcb[slot]; + #endif + return res; +} + +// setting a tls slot is only used on macOS for now +static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + tcb[slot] = value; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + tcb[slot] = value; + #endif +} + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + #if defined(__BIONIC__) + // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id + // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 + return (uintptr_t)mi_prim_tls_slot(1); + #else + // in all our other targets, slot 0 is the thread id + // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h + // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 + return (uintptr_t)mi_prim_tls_slot(0); + #endif +} + +#else + +// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + return (uintptr_t)&_mi_heap_default; +} + +#endif + + + +/* ---------------------------------------------------------------------------------------- +The thread local default heap: `_mi_prim_get_default_heap()` +This is inlined here as it is on the fast path for allocation functions. + +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a +__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures +that the storage will always be available (allocated on the thread stacks). + +On some platforms though we cannot use that when overriding `malloc` since the underlying +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. +We try to circumvent this in an efficient way: +- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the + loader itself calls `malloc` even before the modules are initialized. +- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). +- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) +------------------------------------------------------------------------------------------- */ + +static inline mi_heap_t* mi_prim_get_default_heap(void); + +#if defined(MI_MALLOC_OVERRIDE) +#if defined(__APPLE__) // macOS + #define MI_TLS_SLOT 89 // seems unused? + // #define MI_TLS_RECURSE_GUARD 1 + // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) + // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h> +#elif defined(__OpenBSD__) + // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) + // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371> + #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) + // #elif defined(__DragonFly__) + // #warning "mimalloc is not working correctly on DragonFly yet." + // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458> +#elif defined(__ANDROID__) + // See issue #381 + #define MI_TLS_PTHREAD +#endif +#endif + + +#if defined(MI_TLS_SLOT) + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); + if mi_unlikely(heap == NULL) { + #ifdef __GNUC__ + __asm(""); // prevent conditional load of the address of _mi_heap_empty + #endif + heap = (mi_heap_t*)&_mi_heap_empty; + } + return heap; +} + +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) + +static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) { + pthread_t self = pthread_self(); + #if defined(__DragonFly__) + if (self==NULL) return NULL; + #endif + return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); +} + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot(); + if mi_unlikely(pheap == NULL) return _mi_heap_main_get(); + mi_heap_t* heap = *pheap; + if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty; + return heap; +} + +#elif defined(MI_TLS_PTHREAD) + +extern pthread_key_t _mi_heap_default_key; +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +} + +#else // default using a thread local variable; used on most platforms. + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + #if defined(MI_TLS_RECURSE_GUARD) + if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); + #endif + return _mi_heap_default; +} + +#endif // mi_prim_get_default_heap() + + + +#endif // MIMALLOC_PRIM_H diff --git a/Include/internal/mimalloc/mimalloc/track.h b/Include/internal/mimalloc/mimalloc/track.h new file mode 100644 index 0000000..fa1a048 --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/track.h @@ -0,0 +1,147 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TRACK_H +#define MIMALLOC_TRACK_H + +/* ------------------------------------------------------------------------------------------------------ +Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers. +These can be defined for tracking allocation: + + #define mi_track_malloc_size(p,reqsize,size,zero) + #define mi_track_free_size(p,_size) + +The macros are set up such that the size passed to `mi_track_free_size` +always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`). +The `reqsize` is what the user requested, and `size >= reqsize`. +The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled, +or otherwise it is the usable block size which may be larger than the original request. +Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc). +The `zero` parameter is `true` if the allocated block is zero initialized. + +Optional: + + #define mi_track_align(p,alignedp,offset,size) + #define mi_track_resize(p,oldsize,newsize) + #define mi_track_init() + +The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block. +The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`). +The `mi_track_resize` is currently unused but could be called on reallocations within a block. +`mi_track_init` is called at program start. + +The following macros are for tools like asan and valgrind to track whether memory is +defined, undefined, or not accessible at all: + + #define mi_track_mem_defined(p,size) + #define mi_track_mem_undefined(p,size) + #define mi_track_mem_noaccess(p,size) + +-------------------------------------------------------------------------------------------------------*/ + +#if MI_TRACK_VALGRIND +// valgrind tool + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy +#define MI_TRACK_TOOL "valgrind" + +#include <valgrind/valgrind.h> +#include <valgrind/memcheck.h> + +#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero) +#define mi_track_free_size(p,_size) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/) +#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) +#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size) +#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) +#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) + +#elif MI_TRACK_ASAN +// address sanitizer + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "asan" + +#include <sanitizer/asan_interface.h> + +#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size) +#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) + +#elif MI_TRACK_ETW +// windows event tracing + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 +#define MI_TRACK_TOOL "ETW" + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include "../src/prim/windows/etw.h" + +#define mi_track_init() EventRegistermicrosoft_windows_mimalloc(); +#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size) +#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size) + +#else +// no tracking + +#define MI_TRACK_ENABLED 0 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "none" + +#define mi_track_malloc_size(p,reqsize,size,zero) +#define mi_track_free_size(p,_size) + +#endif + +// ------------------- +// Utility definitions + +#ifndef mi_track_resize +#define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false) +#endif + +#ifndef mi_track_align +#define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset) +#endif + +#ifndef mi_track_init +#define mi_track_init() +#endif + +#ifndef mi_track_mem_defined +#define mi_track_mem_defined(p,size) +#endif + +#ifndef mi_track_mem_undefined +#define mi_track_mem_undefined(p,size) +#endif + +#ifndef mi_track_mem_noaccess +#define mi_track_mem_noaccess(p,size) +#endif + + +#if MI_PADDING +#define mi_track_malloc(p,reqsize,zero) \ + if ((p)!=NULL) { \ + mi_assert_internal(mi_usable_size(p)==(reqsize)); \ + mi_track_malloc_size(p,reqsize,reqsize,zero); \ + } +#else +#define mi_track_malloc(p,reqsize,zero) \ + if ((p)!=NULL) { \ + mi_assert_internal(mi_usable_size(p)>=(reqsize)); \ + mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \ + } +#endif + +#endif diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h new file mode 100644 index 0000000..7616f37 --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -0,0 +1,670 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TYPES_H +#define MIMALLOC_TYPES_H + +// -------------------------------------------------------------------------- +// This file contains the main type definitions for mimalloc: +// mi_heap_t : all data for a thread-local heap, contains +// lists of all managed heap pages. +// mi_segment_t : a larger chunk of memory (32GiB) from where pages +// are allocated. +// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from +// where objects are allocated. +// -------------------------------------------------------------------------- + + +#include <stddef.h> // ptrdiff_t +#include <stdint.h> // uintptr_t, uint16_t, etc +#include "mimalloc/atomic.h" // _Atomic + +#ifdef _MSC_VER +#pragma warning(disable:4214) // bitfield is not int +#endif + +// Minimal alignment necessary. On most platforms 16 bytes are needed +// due to SSE registers for example. This must be at least `sizeof(void*)` +#ifndef MI_MAX_ALIGN_SIZE +#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) +#endif + +// ------------------------------------------------------ +// Variants +// ------------------------------------------------------ + +// Define NDEBUG in the release version to disable assertions. +// #define NDEBUG + +// Define MI_TRACK_<tool> to enable tracking support +// #define MI_TRACK_VALGRIND 1 +// #define MI_TRACK_ASAN 1 +// #define MI_TRACK_ETW 1 + +// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). +// #define MI_STAT 1 + +// Define MI_SECURE to enable security mitigations +// #define MI_SECURE 1 // guard page around metadata +// #define MI_SECURE 2 // guard page around each mimalloc page +// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) +// #define MI_SECURE 4 // checks for double free. (may be more expensive) + +#if !defined(MI_SECURE) +#define MI_SECURE 0 +#endif + +// Define MI_DEBUG for debug mode +// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. +// #define MI_DEBUG 2 // + internal assertion checks +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) +#if !defined(MI_DEBUG) +#if !defined(NDEBUG) || defined(_DEBUG) +#define MI_DEBUG 2 +#else +#define MI_DEBUG 0 +#endif +#endif + +// Reserve extra padding at the end of each block to be more resilient against heap block overflows. +// The padding can detect buffer overflow on free. +#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW)) +#define MI_PADDING 1 +#endif + +// Check padding bytes; allows byte-precise buffer overflow detection +#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_PADDING_CHECK 1 +#endif + + +// Encoded free lists allow detection of corrupted free lists +// and can detect buffer overflows, modify after free, and double `free`s. +#if (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_ENCODE_FREELIST 1 +#endif + + +// We used to abandon huge pages but to eagerly deallocate if freed from another thread, +// but that makes it not possible to visit them during a heap walk or include them in a +// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from +// another thread so most memory is available until it gets properly freed by the owning thread. +// #define MI_HUGE_PAGE_ABANDON 1 + + +// ------------------------------------------------------ +// Platform specific values +// ------------------------------------------------------ + +// ------------------------------------------------------ +// Size of a pointer. +// We assume that `sizeof(void*)==sizeof(intptr_t)` +// and it holds for all platforms we know of. +// +// However, the C standard only requires that: +// p == (void*)((intptr_t)p)) +// but we also need: +// i == (intptr_t)((void*)i) +// or otherwise one might define an intptr_t type that is larger than a pointer... +// ------------------------------------------------------ + +#if INTPTR_MAX > INT64_MAX +# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example) +#elif INTPTR_MAX == INT64_MAX +# define MI_INTPTR_SHIFT (3) +#elif INTPTR_MAX == INT32_MAX +# define MI_INTPTR_SHIFT (2) +#else +#error platform pointers must be 32, 64, or 128 bits +#endif + +#if SIZE_MAX == UINT64_MAX +# define MI_SIZE_SHIFT (3) +typedef int64_t mi_ssize_t; +#elif SIZE_MAX == UINT32_MAX +# define MI_SIZE_SHIFT (2) +typedef int32_t mi_ssize_t; +#else +#error platform objects must be 32 or 64 bits +#endif + +#if (SIZE_MAX/2) > LONG_MAX +# define MI_ZU(x) x##ULL +# define MI_ZI(x) x##LL +#else +# define MI_ZU(x) x##UL +# define MI_ZI(x) x##L +#endif + +#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT) +#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8) + +#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT) +#define MI_SIZE_BITS (MI_SIZE_SIZE*8) + +#define MI_KiB (MI_ZU(1024)) +#define MI_MiB (MI_KiB*MI_KiB) +#define MI_GiB (MI_MiB*MI_KiB) + + +// ------------------------------------------------------ +// Main internal data-structures +// ------------------------------------------------------ + +// Main tuning parameters for segment and page sizes +// Sizes for 64-bit (usually divide by two for 32-bit) +#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) + +#if MI_INTPTR_SIZE > 4 +#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB +#else +#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit +#endif + +#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB + + +// Derived constants +#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT) +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_SEGMENT_MASK ((uintptr_t)(MI_SEGMENT_ALIGN - 1)) +#define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT) +#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024 + +#define MI_SMALL_PAGE_SIZE (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT) +#define MI_MEDIUM_PAGE_SIZE (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT) + +#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 8KiB on 64-bit +#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB on 64-bit +#define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit +#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) + +// Maximum number of size classes. (spaced exponentially in 12.5% increments) +#define MI_BIN_HUGE (73U) + +#if (MI_MEDIUM_OBJ_WSIZE_MAX >= 655360) +#error "mimalloc internal: define more bins" +#endif + +// Maximum slice offset (15) +#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) + +// Used as a special value to encode block sizes in 32 bits. +#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB)) + +// blocks up to this size are always allocated aligned +#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) + +// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments +#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) + + +// ------------------------------------------------------ +// Mimalloc pages contain allocated blocks +// ------------------------------------------------------ + +// The free lists use encoded next fields +// (Only actually encodes when MI_ENCODED_FREELIST is defined.) +typedef uintptr_t mi_encoded_t; + +// thread id's +typedef size_t mi_threadid_t; + +// free lists contain blocks +typedef struct mi_block_s { + mi_encoded_t next; +} mi_block_t; + + +// The delayed flags are used for efficient multi-threaded free-ing +typedef enum mi_delayed_e { + MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list + MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap + MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list + MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim +} mi_delayed_t; + + +// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// test if both are false (`full_aligned == 0`) in the `mi_free` routine. +#if !MI_TSAN +typedef union mi_page_flags_s { + uint8_t full_aligned; + struct { + uint8_t in_full : 1; + uint8_t has_aligned : 1; + } x; +} mi_page_flags_t; +#else +// under thread sanitizer, use a byte for each flag to suppress warning, issue #130 +typedef union mi_page_flags_s { + uint16_t full_aligned; + struct { + uint8_t in_full; + uint8_t has_aligned; + } x; +} mi_page_flags_t; +#endif + +// Thread free list. +// We use the bottom 2 bits of the pointer for mi_delayed_t flags +typedef uintptr_t mi_thread_free_t; + +// A page contains blocks of one specific size (`block_size`). +// Each page has three list of free blocks: +// `free` for blocks that can be allocated, +// `local_free` for freed blocks that are not yet available to `mi_malloc` +// `thread_free` for freed blocks by other threads +// The `local_free` and `thread_free` lists are migrated to the `free` list +// when it is exhausted. The separate `local_free` list is necessary to +// implement a monotonic heartbeat. The `thread_free` list is needed for +// avoiding atomic operations in the common case. +// +// +// `used - |thread_free|` == actual blocks that are in use (alive) +// `used - |thread_free| + |free| + |local_free| == capacity` +// +// We don't count `freed` (as |free|) but use `used` to reduce +// the number of memory accesses in the `mi_page_all_free` function(s). +// +// Notes: +// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) +// - Using `uint16_t` does not seem to slow things down +// - The size is 8 words on 64-bit which helps the page index calculations +// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 +// and 12 are still good for address calculation) +// - To limit the structure size, the `xblock_size` is 32-bits only; for +// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size +// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// concurrent frees where only the first concurrent free adds to the owning +// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// The invariant is that no-delayed-free is only set if there is +// at least one block that will be added, or as already been added, to +// the owning heap `thread_delayed_free` list. This guarantees that pages +// will be freed correctly even if only other threads free blocks. +typedef struct mi_page_s { + // "owned" by the segment + uint32_t slice_count; // slices in this page (0 if not a page) + uint32_t slice_offset; // distance from the actual page data slice (0 if a page) + uint8_t is_committed : 1; // `true` if the page virtual memory is committed + uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized + + // layout like this to optimize access in `mi_malloc` and `mi_free` + uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` + uint16_t reserved; // number of blocks reserved in memory + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) + uint8_t free_is_zero : 1; // `true` if the blocks in the free list are zero initialized + uint8_t retire_expire : 7; // expiration count for retired blocks + + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + + #if (MI_ENCODE_FREELIST || MI_PADDING) + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary + #endif + + _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + _Atomic(uintptr_t) xheap; + + struct mi_page_s* next; // next page owned by this thread with the same `block_size` + struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + + // 64-bit 9 words, 32-bit 12 words, (+2 for secure) + #if MI_INTPTR_SIZE==8 + uintptr_t padding[1]; + #endif +} mi_page_t; + + + +// ------------------------------------------------------ +// Mimalloc segments contain mimalloc pages +// ------------------------------------------------------ + +typedef enum mi_page_kind_e { + MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment + MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment + MI_PAGE_LARGE, // larger blocks go into a page of just one block + MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment. +} mi_page_kind_t; + +typedef enum mi_segment_kind_e { + MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside. + MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. +} mi_segment_kind_t; + +// ------------------------------------------------------ +// A segment holds a commit mask where a bit is set if +// the corresponding MI_COMMIT_SIZE area is committed. +// The MI_COMMIT_SIZE must be a multiple of the slice +// size. If it is equal we have the most fine grained +// decommit (but setting it higher can be more efficient). +// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will +// be committed in one go which can be set higher than +// MI_COMMIT_SIZE for efficiency (while the decommit mask +// is still tracked in fine-grained MI_COMMIT_SIZE chunks) +// ------------------------------------------------------ + +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) +#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB +#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) +#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS +#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) + +#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS)) +#error "the segment size must be exactly divisible by the (commit size * size_t bits)" +#endif + +typedef struct mi_commit_mask_s { + size_t mask[MI_COMMIT_MASK_FIELD_COUNT]; +} mi_commit_mask_t; + +typedef mi_page_t mi_slice_t; +typedef int64_t mi_msecs_t; + + +// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. +typedef enum mi_memkind_e { + MI_MEM_NONE, // not allocated + MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) + MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) + MI_MEM_OS, // allocated from the OS + MI_MEM_OS_HUGE, // allocated as huge os pages + MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) + MI_MEM_ARENA // allocated from an arena (the usual case) +} mi_memkind_t; + +static inline bool mi_memkind_is_os(mi_memkind_t memkind) { + return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); +} + +typedef struct mi_memid_os_info { + void* base; // actual base address of the block (used for offset aligned allocations) + size_t alignment; // alignment at allocation +} mi_memid_os_info_t; + +typedef struct mi_memid_arena_info { + size_t block_index; // index in the arena + mi_arena_id_t id; // arena id (>= 1) + bool is_exclusive; // the arena can only be used for specific arena allocations +} mi_memid_arena_info_t; + +typedef struct mi_memid_s { + union { + mi_memid_os_info_t os; // only used for MI_MEM_OS + mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA + } mem; + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) + bool initially_committed;// `true` if the memory was originally allocated as committed + bool initially_zero; // `true` if the memory was originally zero initialized + mi_memkind_t memkind; +} mi_memid_t; + + +// Segments are large allocated memory blocks (8mb on 64 bit) from +// the OS. Inside segments we allocated fixed size _pages_ that +// contain blocks. +typedef struct mi_segment_s { + // constant fields + mi_memid_t memid; // memory id for arena allocation + bool allow_decommit; + bool allow_purge; + size_t segment_size; + + // segment fields + mi_msecs_t purge_expire; + mi_commit_mask_t purge_mask; + mi_commit_mask_t commit_mask; + + _Atomic(struct mi_segment_s*) abandoned_next; + + // from here is zero initialized + struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`) + + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t used; // count of pages in use + uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + + size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT` + size_t segment_info_slices; // initial slices we are using segment info and possible guard pages. + + // layout like this to optimize access in `mi_free` + mi_segment_kind_t kind; + size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` + _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment + + mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment +} mi_segment_t; + + +// ------------------------------------------------------ +// Heaps +// Provide first-class heaps to allocate from. +// A heap just owns a set of pages for allocation and +// can only be allocate/reallocate from the thread that created it. +// Freeing blocks can be done from any thread though. +// Per thread, the segments are shared among its heaps. +// Per thread, there is always a default heap that is +// used for allocation; it is initialized to statically +// point to an empty heap to avoid initialization checks +// in the fast path. +// ------------------------------------------------------ + +// Thread local data +typedef struct mi_tld_s mi_tld_t; + +// Pages of a certain block size are held in a queue. +typedef struct mi_page_queue_s { + mi_page_t* first; + mi_page_t* last; + size_t block_size; +} mi_page_queue_t; + +#define MI_BIN_FULL (MI_BIN_HUGE+1) + +// Random context +typedef struct mi_random_cxt_s { + uint32_t input[16]; + uint32_t output[16]; + int output_available; + bool weak; +} mi_random_ctx_t; + + +// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows +#if (MI_PADDING) +typedef struct mi_padding_s { + uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) + uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) +} mi_padding_t; +#define MI_PADDING_SIZE (sizeof(mi_padding_t)) +#define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE) +#else +#define MI_PADDING_SIZE 0 +#define MI_PADDING_WSIZE 0 +#endif + +#define MI_PAGES_DIRECT (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1) + + +// A heap owns a set of pages. +struct mi_heap_s { + mi_tld_t* tld; + mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + _Atomic(mi_block_t*) thread_delayed_free; + mi_threadid_t thread_id; // thread this heap belongs too + mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) + uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list + mi_random_ctx_t random; // random number context used for secure allocation + size_t page_count; // total number of pages in the `pages` queues. + size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) + size_t page_retired_max; // largest retired index into the `pages` array. + mi_heap_t* next; // list of heaps per thread + bool no_reclaim; // `true` if this heap should not reclaim abandoned pages +}; + + + +// ------------------------------------------------------ +// Debug +// ------------------------------------------------------ + +#if !defined(MI_DEBUG_UNINIT) +#define MI_DEBUG_UNINIT (0xD0) +#endif +#if !defined(MI_DEBUG_FREED) +#define MI_DEBUG_FREED (0xDF) +#endif +#if !defined(MI_DEBUG_PADDING) +#define MI_DEBUG_PADDING (0xDE) +#endif + +#if (MI_DEBUG) +// use our own assertion to print without memory allocation +void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) +#else +#define mi_assert(x) +#endif + +#if (MI_DEBUG>1) +#define mi_assert_internal mi_assert +#else +#define mi_assert_internal(x) +#endif + +#if (MI_DEBUG>2) +#define mi_assert_expensive mi_assert +#else +#define mi_assert_expensive(x) +#endif + +// ------------------------------------------------------ +// Statistics +// ------------------------------------------------------ + +#ifndef MI_STAT +#if (MI_DEBUG>0) +#define MI_STAT 2 +#else +#define MI_STAT 0 +#endif +#endif + +typedef struct mi_stat_count_s { + int64_t allocated; + int64_t freed; + int64_t peak; + int64_t current; +} mi_stat_count_t; + +typedef struct mi_stat_counter_s { + int64_t total; + int64_t count; +} mi_stat_counter_t; + +typedef struct mi_stats_s { + mi_stat_count_t segments; + mi_stat_count_t pages; + mi_stat_count_t reserved; + mi_stat_count_t committed; + mi_stat_count_t reset; + mi_stat_count_t purged; + mi_stat_count_t page_committed; + mi_stat_count_t segments_abandoned; + mi_stat_count_t pages_abandoned; + mi_stat_count_t threads; + mi_stat_count_t normal; + mi_stat_count_t huge; + mi_stat_count_t large; + mi_stat_count_t malloc; + mi_stat_count_t segments_cache; + mi_stat_counter_t pages_extended; + mi_stat_counter_t mmap_calls; + mi_stat_counter_t commit_calls; + mi_stat_counter_t reset_calls; + mi_stat_counter_t purge_calls; + mi_stat_counter_t page_no_retire; + mi_stat_counter_t searches; + mi_stat_counter_t normal_count; + mi_stat_counter_t huge_count; + mi_stat_counter_t large_count; +#if MI_STAT>1 + mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; +#endif +} mi_stats_t; + + +void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); +void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); + +#if (MI_STAT) +#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) +#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) +#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) +#else +#define mi_stat_increase(stat,amount) (void)0 +#define mi_stat_decrease(stat,amount) (void)0 +#define mi_stat_counter_increase(stat,amount) (void)0 +#endif + +#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) +#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) +#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) + +// ------------------------------------------------------ +// Thread Local data +// ------------------------------------------------------ + +// A "span" is is an available range of slices. The span queues keep +// track of slice spans of at most the given `slice_count` (but more than the previous size class). +typedef struct mi_span_queue_s { + mi_slice_t* first; + mi_slice_t* last; + size_t slice_count; +} mi_span_queue_t; + +#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT) + +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats +} mi_os_tld_t; + + +// Segments thread local data +typedef struct mi_segments_tld_s { + mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments + size_t count; // current number of segments; + size_t peak_count; // peak number of segments + size_t current_size; // current size of all segments + size_t peak_size; // peak size of all segments + mi_stats_t* stats; // points to tld stats + mi_os_tld_t* os; // points to os stats +} mi_segments_tld_t; + +// Thread local data +struct mi_tld_s { + unsigned long long heartbeat; // monotonic heartbeat count + bool recurse; // true if deferred was called; used to prevent infinite recursion. + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) + mi_segments_tld_t segments; // segment tld + mi_os_tld_t os; // os tld + mi_stats_t stats; // statistics +}; + +#endif diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h new file mode 100644 index 0000000..c29dc82 --- /dev/null +++ b/Include/internal/pycore_mimalloc.h @@ -0,0 +1,19 @@ +#ifndef Py_INTERNAL_MIMALLOC_H +#define Py_INTERNAL_MIMALLOC_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#if defined(MIMALLOC_H) || defined(MIMALLOC_TYPES_H) +# error "pycore_mimalloc.h must be included before mimalloc.h" +#endif + +#include "pycore_pymem.h" +#define MI_DEBUG_UNINIT PYMEM_CLEANBYTE +#define MI_DEBUG_FREED PYMEM_DEADBYTE +#define MI_DEBUG_PADDING PYMEM_FORBIDDENBYTE + +#include "mimalloc.h" + +#endif // Py_INTERNAL_MIMALLOC_H diff --git a/Include/internal/pycore_pymem_init.h b/Include/internal/pycore_pymem_init.h index 119fa16..11fbe16 100644 --- a/Include/internal/pycore_pymem_init.h +++ b/Include/internal/pycore_pymem_init.h @@ -18,17 +18,18 @@ extern void * _PyMem_RawRealloc(void *, void *, size_t); extern void _PyMem_RawFree(void *, void *); #define PYRAW_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree} -#ifdef WITH_PYMALLOC +#if defined(WITH_PYMALLOC) extern void* _PyObject_Malloc(void *, size_t); extern void* _PyObject_Calloc(void *, size_t, size_t); extern void _PyObject_Free(void *, void *); extern void* _PyObject_Realloc(void *, void *, size_t); # define PYOBJ_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free} +# define PYMEM_ALLOC PYOBJ_ALLOC #else # define PYOBJ_ALLOC PYRAW_ALLOC +# define PYMEM_ALLOC PYOBJ_ALLOC #endif // WITH_PYMALLOC -#define PYMEM_ALLOC PYOBJ_ALLOC extern void* _PyMem_DebugRawMalloc(void *, size_t); extern void* _PyMem_DebugRawCalloc(void *, size_t, size_t); |