summaryrefslogtreecommitdiffstats
path: root/include/jemalloc
diff options
context:
space:
mode:
authorQi Wang <interwq@gwu.edu>2017-02-23 22:18:07 (GMT)
committerQi Wang <interwq@gmail.com>2017-03-23 07:03:28 (GMT)
commit6309df628fa4f11dce084dc53c77ea852408d347 (patch)
treeb04f7cae7e173cc3ec9fce75d4712fd200446d2b /include/jemalloc
parent32e7cf51cd879e4f2b0307bba544f913e2d77a7e (diff)
downloadjemalloc-6309df628fa4f11dce084dc53c77ea852408d347.zip
jemalloc-6309df628fa4f11dce084dc53c77ea852408d347.tar.gz
jemalloc-6309df628fa4f11dce084dc53c77ea852408d347.tar.bz2
First stage of mutex profiling.
Switched to trylock and update counters based on state.
Diffstat (limited to 'include/jemalloc')
-rw-r--r--include/jemalloc/internal/mutex_inlines.h51
-rw-r--r--include/jemalloc/internal/mutex_structs.h43
-rw-r--r--include/jemalloc/internal/mutex_types.h42
-rw-r--r--include/jemalloc/internal/nstime_types.h2
4 files changed, 106 insertions, 32 deletions
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index c0c3cfe..cf0ce23 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -1,8 +1,11 @@
#ifndef JEMALLOC_INTERNAL_MUTEX_INLINES_H
#define JEMALLOC_INTERNAL_MUTEX_INLINES_H
+void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
+
#ifndef JEMALLOC_ENABLE_INLINE
void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool malloc_mutex_trylock(malloc_mutex_t *mutex);
void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
@@ -10,22 +13,30 @@ void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
JEMALLOC_INLINE void
+malloc_mutex_lock_final(malloc_mutex_t *mutex) {
+ MALLOC_MUTEX_LOCK(mutex);
+}
+
+/* Trylock: return false if the lock is successfully acquired. */
+JEMALLOC_INLINE bool
+malloc_mutex_trylock(malloc_mutex_t *mutex) {
+ return MALLOC_MUTEX_TRYLOCK(mutex);
+}
+
+JEMALLOC_INLINE void
malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
witness_assert_not_owner(tsdn, &mutex->witness);
if (isthreaded) {
-#ifdef _WIN32
-# if _WIN32_WINNT >= 0x0600
- AcquireSRWLockExclusive(&mutex->lock);
-# else
- EnterCriticalSection(&mutex->lock);
-# endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
- os_unfair_lock_lock(&mutex->lock);
-#elif (defined(JEMALLOC_OSSPIN))
- OSSpinLockLock(&mutex->lock);
-#else
- pthread_mutex_lock(&mutex->lock);
-#endif
+ if (malloc_mutex_trylock(mutex)) {
+ malloc_mutex_lock_slow(mutex);
+ }
+ /* We own the lock now. Update a few counters. */
+ lock_prof_data_t *data = &mutex->prof_data;
+ data->n_lock_ops++;
+ if (data->prev_owner != tsdn) {
+ data->prev_owner = tsdn;
+ data->n_owner_switches++;
+ }
}
witness_lock(tsdn, &mutex->witness);
}
@@ -34,19 +45,7 @@ JEMALLOC_INLINE void
malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
witness_unlock(tsdn, &mutex->witness);
if (isthreaded) {
-#ifdef _WIN32
-# if _WIN32_WINNT >= 0x0600
- ReleaseSRWLockExclusive(&mutex->lock);
-# else
- LeaveCriticalSection(&mutex->lock);
-# endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
- os_unfair_lock_unlock(&mutex->lock);
-#elif (defined(JEMALLOC_OSSPIN))
- OSSpinLockUnlock(&mutex->lock);
-#else
- pthread_mutex_unlock(&mutex->lock);
-#endif
+ MALLOC_MUTEX_UNLOCK(mutex);
}
}
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index c34c1d4..7065c99 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -1,9 +1,50 @@
#ifndef JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
#define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
+struct lock_prof_data_s {
+ /*
+ * Counters touched on the slow path, i.e. when there is lock
+ * contention. We update them once we have the lock.
+ */
+ /* Total time spent waiting on this lock. */
+ nstime_t tot_wait_time;
+ /* Max time spent on a single lock operation. */
+ nstime_t max_wait_time;
+ /* # of times have to wait for this lock (after spinning). */
+ uint64_t n_wait_times;
+ /* # of times acquired the lock through local spinning. */
+ uint64_t n_spin_acquired;
+ /* Max # of threads waiting for the lock at the same time. */
+ uint32_t max_n_thds;
+ /* Current # of threads waiting on the lock. Atomic synced. */
+ uint32_t n_waiting_thds;
+
+ /*
+ * Data touched on the fast path. These are modified right after we
+ * grab the lock, so it's placed closest to the end (i.e. right before
+ * the lock) so that we have a higher chance of them being on the same
+ * cacheline.
+ */
+ /* # of times the new lock holder is different from the previous one. */
+ uint64_t n_owner_switches;
+ /* Previous lock holder, to facilitate n_owner_switches. */
+ tsdn_t *prev_owner;
+ /* # of lock() operations in total. */
+ uint64_t n_lock_ops;
+};
+
struct malloc_mutex_s {
union {
struct {
+ /*
+ * prof_data is defined first to reduce cacheline
+ * bouncing: the data is not touched by the lock holder
+ * during unlocking, while might be modified by
+ * contenders. Having it before the lock itself could
+ * avoid prefetching a modified cacheline (for the
+ * unlocking thread).
+ */
+ lock_prof_data_t prof_data;
#ifdef _WIN32
# if _WIN32_WINNT >= 0x0600
SRWLOCK lock;
@@ -22,7 +63,7 @@ struct malloc_mutex_s {
#endif
};
/*
- * We only touch witness when configured w/ debug. However we
+ * We only touch witness when configured w/ debug. However we
* keep the field in a union when !debug so that we don't have
* to pollute the code base with #ifdefs, while avoid paying the
* memory cost.
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index b7e3a7a..d7c7f04 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -1,31 +1,63 @@
#ifndef JEMALLOC_INTERNAL_MUTEX_TYPES_H
#define JEMALLOC_INTERNAL_MUTEX_TYPES_H
+typedef struct lock_prof_data_s lock_prof_data_t;
typedef struct malloc_mutex_s malloc_mutex_t;
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+# define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) ReleaseSRWLockExclusive(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
+# else
+# define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
+# endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+# define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
+#elif (defined(JEMALLOC_OSSPIN))
+# define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
+#else
+# define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
+#endif
+
+#define LOCK_PROF_DATA_INITIALIZER \
+ {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0}
+
+#ifdef _WIN32
# define MALLOC_MUTEX_INITIALIZER
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
# define MALLOC_MUTEX_INITIALIZER \
- {{{OS_UNFAIR_LOCK_INIT}}, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+ {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
#elif (defined(JEMALLOC_OSSPIN))
# define MALLOC_MUTEX_INITIALIZER \
- {{{0}}, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+ {{{LOCK_PROF_DATA_INITIALIZER, 0}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
# define MALLOC_MUTEX_INITIALIZER \
- {{{PTHREAD_MUTEX_INITIALIZER, NULL}}, \
+ {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
#else
+/* TODO: get rid of adaptive mutex once we do our own spin. */
# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \
defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
# define MALLOC_MUTEX_INITIALIZER \
- {{{PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}}, \
+ {{{LOCK_PROF_DATA_INITIALIZER, \
+ PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
# else
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
# define MALLOC_MUTEX_INITIALIZER \
- {{{PTHREAD_MUTEX_INITIALIZER}}, \
+ {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
# endif
#endif
diff --git a/include/jemalloc/internal/nstime_types.h b/include/jemalloc/internal/nstime_types.h
index d6039e0..6e7e74c 100644
--- a/include/jemalloc/internal/nstime_types.h
+++ b/include/jemalloc/internal/nstime_types.h
@@ -6,4 +6,6 @@ typedef struct nstime_s nstime_t;
/* Maximum supported number of seconds (~584 years). */
#define NSTIME_SEC_MAX KQU(18446744072)
+#define NSTIME_ZERO_INITIALIZER {0}
+
#endif /* JEMALLOC_INTERNAL_NSTIME_TYPES_H */