summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavi Hernandez <xhernandez@redhat.com>2020-02-07 10:19:57 +0100
committerhari gowtham <hari.gowtham005@gmail.com>2020-02-28 06:12:06 +0000
commit8b0a15e273e83ceaa5bd431d6c524908be4b2bd5 (patch)
tree2481101dde64a569497f88f9440f3db4c946f36d
parentbd37f5350ac9b85c18353069c36a6ae4e489d100 (diff)
core: fix memory pool management races
Objects allocated from a per-thread memory pool keep a reference to it to be able to return the object to the pool when not used anymore. The object holding this reference can have a long life cycle that could survive a glfs_fini() call. This means that it's unsafe to destroy memory pools from glfs_fini(). Another side effect of destroying memory pools from glfs_fini() is that the TLS variable that points to one of those pools cannot be reset for all alive threads. This means that any attempt to allocate memory from those threads will access already free'd memory, which is very dangerous. To fix these issues, mem_pools_fini() doesn't destroy pool lists anymore. They should be destroyed when the library is unloaded or the process is terminated, but this cannot be done right now because gluster doesn't stop other threads before calling exit(), which could cause some races. This patch is the backport of 2 master patches: > Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 > Fixes: bz#1801684 > Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> > > Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b > Fixes: bz#1801684 > Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b Fixes: bz#1805671 Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
-rw-r--r--libglusterfs/src/globals.c13
-rw-r--r--libglusterfs/src/glusterfs/globals.h3
-rw-r--r--libglusterfs/src/glusterfs/mem-pool.h28
-rw-r--r--libglusterfs/src/mem-pool.c191
-rw-r--r--libglusterfs/src/syncop.c7
5 files changed, 137 insertions, 105 deletions
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index 02098e6..e433ee8 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -319,7 +319,18 @@ glusterfs_cleanup(void *ptr)
GF_FREE(thread_syncopctx.groups);
}
- mem_pool_thread_destructor();
+ mem_pool_thread_destructor(NULL);
+}
+
+void
+gf_thread_needs_cleanup(void)
+{
+ /* The value stored in free_key TLS is not really used for anything, but
+ * pthread implementation doesn't call the TLS destruction function unless
+ * it's != NULL. This function must be called whenever something is
+ * allocated for this thread so that glusterfs_cleanup() will be called
+ * and resources can be released. */
+ (void)pthread_setspecific(free_key, (void *)1);
}
static void
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
index 994301b..acb8e9a 100644
--- a/libglusterfs/src/glusterfs/globals.h
+++ b/libglusterfs/src/glusterfs/globals.h
@@ -160,6 +160,9 @@ glusterfs_leaseid_exist(void);
int
glusterfs_globals_init(glusterfs_ctx_t *ctx);
+void
+gf_thread_needs_cleanup(void);
+
struct tvec_base *
glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx);
void
diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
index be0a26d..97bf76c 100644
--- a/libglusterfs/src/glusterfs/mem-pool.h
+++ b/libglusterfs/src/glusterfs/mem-pool.h
@@ -245,24 +245,26 @@ typedef struct per_thread_pool {
} per_thread_pool_t;
typedef struct per_thread_pool_list {
- /*
- * These first two members are protected by the global pool lock. When
- * a thread first tries to use any pool, we create one of these. We
- * link it into the global list using thr_list so the pool-sweeper
- * thread can find it, and use pthread_setspecific so this thread can
- * find it. When the per-thread destructor runs, we "poison" the pool
- * list to prevent further allocations. This also signals to the
- * pool-sweeper thread that the list should be detached and freed after
- * the next time it's swept.
- */
+ /* thr_list is used to place the TLS pool_list into the active global list
+ * (pool_threads) or the inactive global list (pool_free_threads). It's
+ * protected by the global pool_lock. */
struct list_head thr_list;
- unsigned int poison;
+
+ /* This lock is used to update poison and the hot/cold lists of members
+ * of 'pools' array. */
+ pthread_spinlock_t lock;
+
+ /* This field is used to mark a pool_list as not being owned by any thread.
+ * This means that the sweeper thread won't be cleaning objects stored in
+ * its pools. mem_put() uses it to decide if the object being released is
+ * placed into its original pool_list or directly destroyed. */
+ bool poison;
+
/*
* There's really more than one pool, but the actual number is hidden
* in the implementation code so we just make it a single-element array
* here.
*/
- pthread_spinlock_t lock;
per_thread_pool_t pools[1];
} per_thread_pool_list_t;
@@ -307,7 +309,7 @@ void
mem_pool_destroy(struct mem_pool *pool);
void
-mem_pool_thread_destructor(void);
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
void
gf_mem_acct_enable_set(void *ctx);
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index d88041d..73503e0 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL;
#define POOL_SWEEP_SECS 30
typedef struct {
- struct list_head death_row;
pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
unsigned int n_cold_lists;
} sweep_state_t;
@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
static unsigned int init_count = 0;
static pthread_t sweeper_tid;
-gf_boolean_t
+static bool
collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
{
unsigned int i;
per_thread_pool_t *pt_pool;
- gf_boolean_t poisoned;
(void)pthread_spin_lock(&pool_list->lock);
- poisoned = pool_list->poison != 0;
- if (!poisoned) {
- for (i = 0; i < NPOOLS; ++i) {
- pt_pool = &pool_list->pools[i];
- if (pt_pool->cold_list) {
- if (state->n_cold_lists >= N_COLD_LISTS) {
- break;
- }
- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
+ for (i = 0; i < NPOOLS; ++i) {
+ pt_pool = &pool_list->pools[i];
+ if (pt_pool->cold_list) {
+ if (state->n_cold_lists >= N_COLD_LISTS) {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ return true;
}
- pt_pool->cold_list = pt_pool->hot_list;
- pt_pool->hot_list = NULL;
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
}
+ pt_pool->cold_list = pt_pool->hot_list;
+ pt_pool->hot_list = NULL;
}
(void)pthread_spin_unlock(&pool_list->lock);
- return poisoned;
+ return false;
}
-void
+static void
free_obj_list(pooled_obj_hdr_t *victim)
{
pooled_obj_hdr_t *next;
@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim)
}
}
-void *
+static void *
pool_sweeper(void *arg)
{
sweep_state_t state;
per_thread_pool_list_t *pool_list;
- per_thread_pool_list_t *next_pl;
- per_thread_pool_t *pt_pool;
- unsigned int i;
- gf_boolean_t poisoned;
+ uint32_t i;
+ bool pending;
/*
* This is all a bit inelegant, but the point is to avoid doing
* expensive things (like freeing thousands of objects) while holding a
- * global lock. Thus, we split each iteration into three passes, with
+ * global lock. Thus, we split each iteration into two passes, with
* only the first and fastest holding the lock.
*/
+ pending = true;
+
for (;;) {
- sleep(POOL_SWEEP_SECS);
+ /* If we know there's pending work to do (or it's the first run), we
+ * do collect garbage more often. */
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
+
(void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
- INIT_LIST_HEAD(&state.death_row);
state.n_cold_lists = 0;
+ pending = false;
/* First pass: collect stuff that needs our attention. */
(void)pthread_mutex_lock(&pool_lock);
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list)
+ list_for_each_entry(pool_list, &pool_threads, thr_list)
{
- (void)pthread_mutex_unlock(&pool_lock);
- poisoned = collect_garbage(&state, pool_list);
- (void)pthread_mutex_lock(&pool_lock);
-
- if (poisoned) {
- list_move(&pool_list->thr_list, &state.death_row);
+ if (collect_garbage(&state, pool_list)) {
+ pending = true;
}
}
(void)pthread_mutex_unlock(&pool_lock);
- /* Second pass: free dead pools. */
- (void)pthread_mutex_lock(&pool_free_lock);
- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list)
- {
- for (i = 0; i < NPOOLS; ++i) {
- pt_pool = &pool_list->pools[i];
- free_obj_list(pt_pool->cold_list);
- free_obj_list(pt_pool->hot_list);
- pt_pool->hot_list = pt_pool->cold_list = NULL;
- }
- list_del(&pool_list->thr_list);
- list_add(&pool_list->thr_list, &pool_free_threads);
- }
- (void)pthread_mutex_unlock(&pool_free_lock);
-
- /* Third pass: free cold objects from live pools. */
+ /* Second pass: free cold objects from live pools. */
for (i = 0; i < state.n_cold_lists; ++i) {
free_obj_list(state.cold_lists[i]);
}
(void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
}
+
+ return NULL;
}
void
-mem_pool_thread_destructor(void)
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
- per_thread_pool_list_t *pool_list = thread_pool_list;
+ per_thread_pool_t *pt_pool;
+ uint32_t i;
- /* The pool-sweeper thread will take it from here.
- *
- * We can change 'poison' here without taking locks because the change
- * itself doesn't interact with other parts of the code and a simple write
- * is already atomic from the point of view of the processor.
- *
- * This change can modify what mem_put() does, but both possibilities are
- * fine until the sweeper thread kicks in. The real synchronization must be
- * between mem_put() and the sweeper thread. */
+ if (pool_list == NULL) {
+ pool_list = thread_pool_list;
+ }
+
+ /* The current thread is terminating. None of the allocated objects will
+ * be used again. We can directly destroy them here instead of delaying
+ * it until the next sweeper loop. */
if (pool_list != NULL) {
- pool_list->poison = 1;
+ /* Remove pool_list from the global list to avoid that sweeper
+ * could touch it. */
+ pthread_mutex_lock(&pool_lock);
+ list_del(&pool_list->thr_list);
+ pthread_mutex_unlock(&pool_lock);
+
+ /* We need to protect hot/cold changes from potential mem_put() calls
+ * that reference this pool_list. Once poison is set to true, we are
+ * sure that no one else will touch hot/cold lists. The only possible
+ * race is when at the same moment a mem_put() is adding a new item
+ * to the hot list. We protect from that by taking pool_list->lock.
+ * After that we don't need the lock to destroy the hot/cold lists. */
+ pthread_spin_lock(&pool_list->lock);
+ pool_list->poison = true;
+ pthread_spin_unlock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; i++) {
+ pt_pool = &pool_list->pools[i];
+
+ free_obj_list(pt_pool->hot_list);
+ pt_pool->hot_list = NULL;
+
+ free_obj_list(pt_pool->cold_list);
+ pt_pool->cold_list = NULL;
+ }
+
+ pthread_mutex_lock(&pool_free_lock);
+ list_add(&pool_list->thr_list, &pool_free_threads);
+ pthread_mutex_unlock(&pool_free_lock);
+
thread_pool_list = NULL;
}
}
@@ -528,6 +538,22 @@ mem_pools_preinit(void)
init_done = GF_MEMPOOL_INIT_EARLY;
}
+static __attribute__((destructor)) void
+mem_pools_postfini(void)
+{
+ /* TODO: This function should destroy all per thread memory pools that
+ * are still alive, but this is not possible right now because glibc
+ * starts calling destructors as soon as exit() is called, and
+ * gluster doesn't ensure that all threads have been stopped before
+ * calling exit(). Existing threads would crash when they try to use
+ * memory or they terminate if we destroy things here.
+ *
+ * When we propertly terminate all threads, we can add the needed
+ * code here. Till then we need to leave the memory allocated. Most
+ * probably this function will be executed on process termination,
+ * so the memory will be released anyway by the system. */
+}
+
/* Call mem_pools_init() once threading has been configured completely. This
* prevent the pool_sweeper thread from getting killed once the main() thread
* exits during deamonizing. */
@@ -560,10 +586,6 @@ mem_pools_fini(void)
*/
break;
case 1: {
- per_thread_pool_list_t *pool_list;
- per_thread_pool_list_t *next_pl;
- unsigned int i;
-
/* if mem_pools_init() was not called, sweeper_tid will be invalid
* and the functions will error out. That is not critical. In all
* other cases, the sweeper_tid will be valid and the thread gets
@@ -571,32 +593,11 @@ mem_pools_fini(void)
(void)pthread_cancel(sweeper_tid);
(void)pthread_join(sweeper_tid, NULL);
- /* At this point all threads should have already terminated, so
- * it should be safe to destroy all pending per_thread_pool_list_t
- * structures that are stored for each thread. */
- mem_pool_thread_destructor();
-
- /* free all objects from all pools */
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads,
- thr_list)
- {
- for (i = 0; i < NPOOLS; ++i) {
- free_obj_list(pool_list->pools[i].hot_list);
- free_obj_list(pool_list->pools[i].cold_list);
- pool_list->pools[i].hot_list = NULL;
- pool_list->pools[i].cold_list = NULL;
- }
-
- list_del(&pool_list->thr_list);
- FREE(pool_list);
- }
-
- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads,
- thr_list)
- {
- list_del(&pool_list->thr_list);
- FREE(pool_list);
- }
+ /* There could be threads still running in some cases, so we can't
+ * destroy pool_lists in use. We can also not destroy unused
+ * pool_lists because some allocated objects may still be pointing
+ * to them. */
+ mem_pool_thread_destructor(NULL);
init_done = GF_MEMPOOL_INIT_DESTROY;
/* Fall through. */
@@ -617,7 +618,7 @@ mem_pools_fini(void)
{
}
void
-mem_pool_thread_destructor(void)
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
}
@@ -738,13 +739,21 @@ mem_get_pool_list(void)
}
}
+ /* There's no need to take pool_list->lock, because this is already an
+ * atomic operation and we don't need to synchronize it with any change
+ * in hot/cold lists. */
+ pool_list->poison = false;
+
(void)pthread_mutex_lock(&pool_lock);
- pool_list->poison = 0;
list_add(&pool_list->thr_list, &pool_threads);
(void)pthread_mutex_unlock(&pool_lock);
thread_pool_list = pool_list;
+ /* Ensure that all memory objects associated to the new pool_list are
+ * destroyed when the thread terminates. */
+ gf_thread_needs_cleanup();
+
return pool_list;
}
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index 2eb7b49..0de53c6 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups)
/* set/reset the ngrps, this is where reset of groups is handled */
opctx->ngrps = count;
+
+ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) {
+ /* This is the first time we are storing groups into the TLS structure
+ * so we mark the current thread so that it will be properly cleaned
+ * up when the thread terminates. */
+ gf_thread_needs_cleanup();
+ }
opctx->valid |= SYNCOPCTX_GROUPS;
out: