summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src/mem-pool.c
diff options
context:
space:
mode:
authorXavi Hernandez <xhernandez@redhat.com>2020-02-07 10:19:57 +0100
committerhari gowtham <hari.gowtham005@gmail.com>2020-02-28 06:12:06 +0000
commit8b0a15e273e83ceaa5bd431d6c524908be4b2bd5 (patch)
tree2481101dde64a569497f88f9440f3db4c946f36d /libglusterfs/src/mem-pool.c
parentbd37f5350ac9b85c18353069c36a6ae4e489d100 (diff)
core: fix memory pool management races
Objects allocated from a per-thread memory pool keep a reference to it to be able to return the object to the pool when not used anymore. The object holding this reference can have a long life cycle that could survive a glfs_fini() call. This means that it's unsafe to destroy memory pools from glfs_fini(). Another side effect of destroying memory pools from glfs_fini() is that the TLS variable that points to one of those pools cannot be reset for all alive threads. This means that any attempt to allocate memory from those threads will access already free'd memory, which is very dangerous. To fix these issues, mem_pools_fini() doesn't destroy pool lists anymore. They should be destroyed when the library is unloaded or the process is terminated, but this cannot be done right now because gluster doesn't stop other threads before calling exit(), which could cause some races. This patch is the backport of 2 master patches: > Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 > Fixes: bz#1801684 > Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> > > Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b > Fixes: bz#1801684 > Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b Fixes: bz#1805671 Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Diffstat (limited to 'libglusterfs/src/mem-pool.c')
-rw-r--r--libglusterfs/src/mem-pool.c191
1 files changed, 100 insertions, 91 deletions
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index d88041d47ff..73503e085d7 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL;
#define POOL_SWEEP_SECS 30
typedef struct {
- struct list_head death_row;
pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
unsigned int n_cold_lists;
} sweep_state_t;
@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
static unsigned int init_count = 0;
static pthread_t sweeper_tid;
-gf_boolean_t
+static bool
collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
{
unsigned int i;
per_thread_pool_t *pt_pool;
- gf_boolean_t poisoned;
(void)pthread_spin_lock(&pool_list->lock);
- poisoned = pool_list->poison != 0;
- if (!poisoned) {
- for (i = 0; i < NPOOLS; ++i) {
- pt_pool = &pool_list->pools[i];
- if (pt_pool->cold_list) {
- if (state->n_cold_lists >= N_COLD_LISTS) {
- break;
- }
- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
+ for (i = 0; i < NPOOLS; ++i) {
+ pt_pool = &pool_list->pools[i];
+ if (pt_pool->cold_list) {
+ if (state->n_cold_lists >= N_COLD_LISTS) {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ return true;
}
- pt_pool->cold_list = pt_pool->hot_list;
- pt_pool->hot_list = NULL;
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
}
+ pt_pool->cold_list = pt_pool->hot_list;
+ pt_pool->hot_list = NULL;
}
(void)pthread_spin_unlock(&pool_list->lock);
- return poisoned;
+ return false;
}
-void
+static void
free_obj_list(pooled_obj_hdr_t *victim)
{
pooled_obj_hdr_t *next;
@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim)
}
}
-void *
+static void *
pool_sweeper(void *arg)
{
sweep_state_t state;
per_thread_pool_list_t *pool_list;
- per_thread_pool_list_t *next_pl;
- per_thread_pool_t *pt_pool;
- unsigned int i;
- gf_boolean_t poisoned;
+ uint32_t i;
+ bool pending;
/*
* This is all a bit inelegant, but the point is to avoid doing
* expensive things (like freeing thousands of objects) while holding a
- * global lock. Thus, we split each iteration into three passes, with
+ * global lock. Thus, we split each iteration into two passes, with
* only the first and fastest holding the lock.
*/
+ pending = true;
+
for (;;) {
- sleep(POOL_SWEEP_SECS);
+ /* If we know there's pending work to do (or it's the first run), we
+ * do collect garbage more often. */
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
+
(void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
- INIT_LIST_HEAD(&state.death_row);
state.n_cold_lists = 0;
+ pending = false;
/* First pass: collect stuff that needs our attention. */
(void)pthread_mutex_lock(&pool_lock);
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list)
+ list_for_each_entry(pool_list, &pool_threads, thr_list)
{
- (void)pthread_mutex_unlock(&pool_lock);
- poisoned = collect_garbage(&state, pool_list);
- (void)pthread_mutex_lock(&pool_lock);
-
- if (poisoned) {
- list_move(&pool_list->thr_list, &state.death_row);
+ if (collect_garbage(&state, pool_list)) {
+ pending = true;
}
}
(void)pthread_mutex_unlock(&pool_lock);
- /* Second pass: free dead pools. */
- (void)pthread_mutex_lock(&pool_free_lock);
- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list)
- {
- for (i = 0; i < NPOOLS; ++i) {
- pt_pool = &pool_list->pools[i];
- free_obj_list(pt_pool->cold_list);
- free_obj_list(pt_pool->hot_list);
- pt_pool->hot_list = pt_pool->cold_list = NULL;
- }
- list_del(&pool_list->thr_list);
- list_add(&pool_list->thr_list, &pool_free_threads);
- }
- (void)pthread_mutex_unlock(&pool_free_lock);
-
- /* Third pass: free cold objects from live pools. */
+ /* Second pass: free cold objects from live pools. */
for (i = 0; i < state.n_cold_lists; ++i) {
free_obj_list(state.cold_lists[i]);
}
(void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
}
+
+ return NULL;
}
void
-mem_pool_thread_destructor(void)
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
- per_thread_pool_list_t *pool_list = thread_pool_list;
+ per_thread_pool_t *pt_pool;
+ uint32_t i;
- /* The pool-sweeper thread will take it from here.
- *
- * We can change 'poison' here without taking locks because the change
- * itself doesn't interact with other parts of the code and a simple write
- * is already atomic from the point of view of the processor.
- *
- * This change can modify what mem_put() does, but both possibilities are
- * fine until the sweeper thread kicks in. The real synchronization must be
- * between mem_put() and the sweeper thread. */
+ if (pool_list == NULL) {
+ pool_list = thread_pool_list;
+ }
+
+ /* The current thread is terminating. None of the allocated objects will
+ * be used again. We can directly destroy them here instead of delaying
+ * it until the next sweeper loop. */
if (pool_list != NULL) {
- pool_list->poison = 1;
+ /* Remove pool_list from the global list to avoid that sweeper
+ * could touch it. */
+ pthread_mutex_lock(&pool_lock);
+ list_del(&pool_list->thr_list);
+ pthread_mutex_unlock(&pool_lock);
+
+ /* We need to protect hot/cold changes from potential mem_put() calls
+ * that reference this pool_list. Once poison is set to true, we are
+ * sure that no one else will touch hot/cold lists. The only possible
+ * race is when at the same moment a mem_put() is adding a new item
+ * to the hot list. We protect from that by taking pool_list->lock.
+ * After that we don't need the lock to destroy the hot/cold lists. */
+ pthread_spin_lock(&pool_list->lock);
+ pool_list->poison = true;
+ pthread_spin_unlock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; i++) {
+ pt_pool = &pool_list->pools[i];
+
+ free_obj_list(pt_pool->hot_list);
+ pt_pool->hot_list = NULL;
+
+ free_obj_list(pt_pool->cold_list);
+ pt_pool->cold_list = NULL;
+ }
+
+ pthread_mutex_lock(&pool_free_lock);
+ list_add(&pool_list->thr_list, &pool_free_threads);
+ pthread_mutex_unlock(&pool_free_lock);
+
thread_pool_list = NULL;
}
}
@@ -528,6 +538,22 @@ mem_pools_preinit(void)
init_done = GF_MEMPOOL_INIT_EARLY;
}
+static __attribute__((destructor)) void
+mem_pools_postfini(void)
+{
+ /* TODO: This function should destroy all per thread memory pools that
+ * are still alive, but this is not possible right now because glibc
+ * starts calling destructors as soon as exit() is called, and
+ * gluster doesn't ensure that all threads have been stopped before
+ * calling exit(). Existing threads would crash when they try to use
+ * memory or they terminate if we destroy things here.
+ *
+ * When we propertly terminate all threads, we can add the needed
+ * code here. Till then we need to leave the memory allocated. Most
+ * probably this function will be executed on process termination,
+ * so the memory will be released anyway by the system. */
+}
+
/* Call mem_pools_init() once threading has been configured completely. This
* prevent the pool_sweeper thread from getting killed once the main() thread
* exits during deamonizing. */
@@ -560,10 +586,6 @@ mem_pools_fini(void)
*/
break;
case 1: {
- per_thread_pool_list_t *pool_list;
- per_thread_pool_list_t *next_pl;
- unsigned int i;
-
/* if mem_pools_init() was not called, sweeper_tid will be invalid
* and the functions will error out. That is not critical. In all
* other cases, the sweeper_tid will be valid and the thread gets
@@ -571,32 +593,11 @@ mem_pools_fini(void)
(void)pthread_cancel(sweeper_tid);
(void)pthread_join(sweeper_tid, NULL);
- /* At this point all threads should have already terminated, so
- * it should be safe to destroy all pending per_thread_pool_list_t
- * structures that are stored for each thread. */
- mem_pool_thread_destructor();
-
- /* free all objects from all pools */
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads,
- thr_list)
- {
- for (i = 0; i < NPOOLS; ++i) {
- free_obj_list(pool_list->pools[i].hot_list);
- free_obj_list(pool_list->pools[i].cold_list);
- pool_list->pools[i].hot_list = NULL;
- pool_list->pools[i].cold_list = NULL;
- }
-
- list_del(&pool_list->thr_list);
- FREE(pool_list);
- }
-
- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads,
- thr_list)
- {
- list_del(&pool_list->thr_list);
- FREE(pool_list);
- }
+ /* There could be threads still running in some cases, so we can't
+ * destroy pool_lists in use. We can also not destroy unused
+ * pool_lists because some allocated objects may still be pointing
+ * to them. */
+ mem_pool_thread_destructor(NULL);
init_done = GF_MEMPOOL_INIT_DESTROY;
/* Fall through. */
@@ -617,7 +618,7 @@ mem_pools_fini(void)
{
}
void
-mem_pool_thread_destructor(void)
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
}
@@ -738,13 +739,21 @@ mem_get_pool_list(void)
}
}
+ /* There's no need to take pool_list->lock, because this is already an
+ * atomic operation and we don't need to synchronize it with any change
+ * in hot/cold lists. */
+ pool_list->poison = false;
+
(void)pthread_mutex_lock(&pool_lock);
- pool_list->poison = 0;
list_add(&pool_list->thr_list, &pool_threads);
(void)pthread_mutex_unlock(&pool_lock);
thread_pool_list = pool_list;
+ /* Ensure that all memory objects associated to the new pool_list are
+ * destroyed when the thread terminates. */
+ gf_thread_needs_cleanup();
+
return pool_list;
}