summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src/mem-pool.c
diff options
context:
space:
mode:
Diffstat (limited to 'libglusterfs/src/mem-pool.c')
-rw-r--r--libglusterfs/src/mem-pool.c1199
1 files changed, 790 insertions, 409 deletions
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index c5ff58f4f1b..2d5a12b0a00 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -8,544 +8,925 @@
cases as published by the Free Software Foundation.
*/
-#include "mem-pool.h"
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h" // for GF_ASSERT, gf_thread_cr...
+#include "glusterfs/globals.h" // for xlator_t, THIS
#include <stdlib.h>
#include <stdarg.h>
-#define GF_MEM_POOL_LIST_BOUNDARY (sizeof(struct list_head))
-#define GF_MEM_POOL_PTR (sizeof(struct mem_pool*))
-#define GF_MEM_POOL_PAD_BOUNDARY (GF_MEM_POOL_LIST_BOUNDARY + GF_MEM_POOL_PTR + sizeof(int))
-#define mem_pool_chunkhead2ptr(head) ((head) + GF_MEM_POOL_PAD_BOUNDARY)
-#define mem_pool_ptr2chunkhead(ptr) ((ptr) - GF_MEM_POOL_PAD_BOUNDARY)
-#define is_mem_chunk_in_use(ptr) (*ptr == 1)
-#define mem_pool_from_ptr(ptr) ((ptr) + GF_MEM_POOL_LIST_BOUNDARY)
-
-#define GLUSTERFS_ENV_MEM_ACCT_STR "GLUSTERFS_DISABLE_MEM_ACCT"
+#include "unittest/unittest.h"
+#include "glusterfs/libglusterfs-messages.h"
void
-gf_mem_acct_enable_set (void *data)
+gf_mem_acct_enable_set(void *data)
{
- glusterfs_ctx_t *ctx = NULL;
+ glusterfs_ctx_t *ctx = NULL;
- ctx = data;
+ REQUIRE(data != NULL);
- GF_ASSERT (ctx != NULL);
+ ctx = data;
- ctx->mem_acct_enable = 1;
+ GF_ASSERT(ctx != NULL);
- return;
+ ctx->mem_acct_enable = 1;
+
+ ENSURE(1 == ctx->mem_acct_enable);
+
+ return;
}
-int
-gf_mem_set_acct_info (xlator_t *xl, char **alloc_ptr,
- size_t size, uint32_t type)
+static void *
+gf_mem_header_prepare(struct mem_header *header, size_t size)
{
+ void *ptr;
+
+ header->size = size;
+
+ ptr = header + 1;
- char *ptr = NULL;
+ /* data follows in this gap of 'size' bytes */
+ *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
- if (!alloc_ptr)
- return -1;
+ return ptr;
+}
+
+static void *
+gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size, uint32_t type, const char *typestr)
+{
+ struct mem_acct_rec *rec = NULL;
+ bool new_ref = false;
+
+ if (mem_acct != NULL) {
+ GF_ASSERT(type <= mem_acct->num_types);
+
+ rec = &mem_acct->rec[type];
+ LOCK(&rec->lock);
+ {
+ if (!rec->typestr) {
+ rec->typestr = typestr;
+ }
+ rec->size += size;
+ new_ref = (rec->num_allocs == 0);
+ rec->num_allocs++;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+ rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs);
+
+#ifdef DEBUG
+ list_add(&header->acct_list, &rec->obj_list);
+#endif
+ }
+ UNLOCK(&rec->lock);
- ptr = (char *) (*alloc_ptr);
+ /* We only take a reference for each memory type used, not for each
+ * allocation. This minimizes the use of atomic operations. */
+ if (new_ref) {
+ GF_ATOMIC_INC(mem_acct->refcnt);
+ }
+ }
- GF_ASSERT (xl != NULL);
+ header->type = type;
+ header->mem_acct = mem_acct;
+ header->magic = GF_MEM_HEADER_MAGIC;
- GF_ASSERT (xl->mem_acct.rec != NULL);
+ return gf_mem_header_prepare(header, size);
+}
- GF_ASSERT (type <= xl->mem_acct.num_types);
+static void *
+gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size)
+{
+ struct mem_acct_rec *rec = NULL;
- LOCK(&xl->mem_acct.rec[type].lock);
+ if (mem_acct != NULL) {
+ rec = &mem_acct->rec[header->type];
+ LOCK(&rec->lock);
{
- xl->mem_acct.rec[type].size += size;
- xl->mem_acct.rec[type].num_allocs++;
- xl->mem_acct.rec[type].total_allocs++;
- xl->mem_acct.rec[type].max_size =
- max (xl->mem_acct.rec[type].max_size,
- xl->mem_acct.rec[type].size);
- xl->mem_acct.rec[type].max_num_allocs =
- max (xl->mem_acct.rec[type].max_num_allocs,
- xl->mem_acct.rec[type].num_allocs);
+ rec->size += size - header->size;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+
+#ifdef DEBUG
+ /* The old 'header' already was present in 'obj_list', but
+ * realloc() could have changed its address. We need to remove
+ * the old item from the list and add the new one. This can be
+ * done this way because list_move() doesn't use the pointers
+ * to the old location (which are not valid anymore) already
+ * present in the list, it simply overwrites them. */
+ list_move(&header->acct_list, &rec->obj_list);
+#endif
}
- UNLOCK(&xl->mem_acct.rec[type].lock);
-
- *(uint32_t *)(ptr) = type;
- ptr = ptr + 4;
- memcpy (ptr, &size, sizeof(size_t));
- ptr += sizeof (size_t);
- memcpy (ptr, &xl, sizeof(xlator_t *));
- ptr += sizeof (xlator_t *);
- *(uint32_t *)(ptr) = GF_MEM_HEADER_MAGIC;
- ptr = ptr + 4;
- ptr = ptr + 8; //padding
- *(uint32_t *) (ptr + size) = GF_MEM_TRAILER_MAGIC;
-
- *alloc_ptr = (void *)ptr;
- return 0;
+ UNLOCK(&rec->lock);
+ }
+
+ return gf_mem_header_prepare(header, size);
}
+static bool
+gf_mem_acct_enabled(void)
+{
+ xlator_t *x = THIS;
+ /* Low-level __gf_xxx() may be called
+ before ctx is initialized. */
+ return x->ctx && x->ctx->mem_acct_enable;
+}
void *
-__gf_calloc (size_t nmemb, size_t size, uint32_t type)
+__gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
{
- size_t tot_size = 0;
- size_t req_size = 0;
- char *ptr = NULL;
- xlator_t *xl = NULL;
+ size_t tot_size = 0;
+ size_t req_size = 0;
+ void *ptr = NULL;
+ xlator_t *xl = NULL;
- if (!THIS->ctx->mem_acct_enable)
- return CALLOC (nmemb, size);
+ if (!gf_mem_acct_enabled())
+ return CALLOC(nmemb, size);
- xl = THIS;
+ xl = THIS;
- req_size = nmemb * size;
- tot_size = req_size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ req_size = nmemb * size;
+ tot_size = req_size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
- ptr = calloc (1, tot_size);
+ ptr = calloc(1, tot_size);
- if (!ptr) {
- gf_msg_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
- gf_mem_set_acct_info (xl, &ptr, req_size, type);
+ if (!ptr) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr);
}
void *
-__gf_malloc (size_t size, uint32_t type)
+__gf_malloc(size_t size, uint32_t type, const char *typestr)
{
- size_t tot_size = 0;
- char *ptr = NULL;
- xlator_t *xl = NULL;
+ size_t tot_size = 0;
+ void *ptr = NULL;
+ xlator_t *xl = NULL;
- if (!THIS->ctx->mem_acct_enable)
- return MALLOC (size);
+ if (!gf_mem_acct_enabled())
+ return MALLOC(size);
- xl = THIS;
+ xl = THIS;
- tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
- ptr = malloc (tot_size);
- if (!ptr) {
- gf_msg_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
- gf_mem_set_acct_info (xl, &ptr, size, type);
+ ptr = malloc(tot_size);
+ if (!ptr) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr);
}
void *
-__gf_realloc (void *ptr, size_t size)
+__gf_realloc(void *ptr, size_t size)
{
- size_t tot_size = 0;
- char *orig_ptr = NULL;
- xlator_t *xl = NULL;
- uint32_t type = 0;
- char *new_ptr;
+ size_t tot_size = 0;
+ struct mem_header *header = NULL;
+
+ if (!gf_mem_acct_enabled())
+ return REALLOC(ptr, size);
- if (!THIS->ctx->mem_acct_enable)
- return REALLOC (ptr, size);
+ REQUIRE(NULL != ptr);
- tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
+ GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC);
- orig_ptr = (char *)ptr - 8 - 4;
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ header = realloc(header, tot_size);
+ if (!header) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- GF_ASSERT (*(uint32_t *)orig_ptr == GF_MEM_HEADER_MAGIC);
+ return gf_mem_update_acct_info(header->mem_acct, header, size);
+}
- orig_ptr = orig_ptr - sizeof(xlator_t *);
- xl = *((xlator_t **)orig_ptr);
+int
+gf_vasprintf(char **string_ptr, const char *format, va_list arg)
+{
+ va_list arg_save;
+ char *str = NULL;
+ int size = 0;
+ int rv = 0;
+
+ if (!string_ptr || !format)
+ return -1;
+
+ va_copy(arg_save, arg);
+
+ size = vsnprintf(NULL, 0, format, arg);
+ size++;
+ str = GF_MALLOC(size, gf_common_mt_asprintf);
+ if (str == NULL) {
+ /* log is done in GF_MALLOC itself */
+ va_end(arg_save);
+ return -1;
+ }
+ rv = vsnprintf(str, size, format, arg_save);
+
+ *string_ptr = str;
+ va_end(arg_save);
+ return (rv);
+}
- orig_ptr = (char *)ptr - GF_MEM_HEADER_SIZE;
- type = *(uint32_t *)orig_ptr;
+int
+gf_asprintf(char **string_ptr, const char *format, ...)
+{
+ va_list arg;
+ int rv = 0;
- new_ptr = realloc (orig_ptr, tot_size);
- if (!new_ptr) {
- gf_msg_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
+ va_start(arg, format);
+ rv = gf_vasprintf(string_ptr, format, arg);
+ va_end(arg);
- /*
- * We used to pass (char **)&ptr as the second
- * argument after the value of realloc was saved
- * in ptr, but the compiler warnings complained
- * about the casting to and forth from void ** to
- * char **.
- */
- gf_mem_set_acct_info (xl, &new_ptr, size, type);
+ return rv;
+}
- return (void *)new_ptr;
+#ifdef DEBUG
+void
+__gf_mem_invalidate(void *ptr)
+{
+ struct mem_header *header = ptr;
+ void *end = NULL;
+
+ struct mem_invalid inval = {
+ .magic = GF_MEM_INVALID_MAGIC,
+ .mem_acct = header->mem_acct,
+ .type = header->type,
+ .size = header->size,
+ .baseaddr = ptr + GF_MEM_HEADER_SIZE,
+ };
+
+ /* calculate the last byte of the allocated area */
+ end = ptr + GF_MEM_HEADER_SIZE + inval.size + GF_MEM_TRAILER_SIZE;
+
+ /* overwrite the old mem_header */
+ memcpy(ptr, &inval, sizeof(inval));
+ ptr += sizeof(inval);
+
+ /* zero out remaining (old) mem_header bytes) */
+ memset(ptr, 0x00, sizeof(*header) - sizeof(inval));
+ ptr += sizeof(*header) - sizeof(inval);
+
+ /* zero out the first byte of data */
+ *(uint32_t *)(ptr) = 0x00;
+ ptr += 1;
+
+ /* repeated writes of invalid structurein data area */
+ while ((ptr + (sizeof(inval))) < (end - 1)) {
+ memcpy(ptr, &inval, sizeof(inval));
+ ptr += sizeof(inval);
+ }
+
+ /* fill out remaining data area with 0xff */
+ memset(ptr, 0xff, end - ptr);
+}
+#endif /* DEBUG */
+
+/* Coverity taint NOTE: pointers passed to free, would operate on
+pointer-GF_MEM_HEADER_SIZE content and if the pointer was used for any IO
+related purpose, the pointer stands tainted, and hence coverity would consider
+access to the said region as tainted. The following directive to coverity hence
+sanitizes the pointer, thus removing any taint to the same within this function.
+If the pointer is accessed outside the scope of this function without any
+checks on content read from an IO operation, taints will still be reported, and
+needs appropriate addressing. */
+
+/* coverity[ +tainted_data_sanitize : arg-0 ] */
+static void
+gf_free_sanitize(void *s)
+{
}
-int
-gf_vasprintf (char **string_ptr, const char *format, va_list arg)
+void
+__gf_free(void *free_ptr)
{
- va_list arg_save;
- char *str = NULL;
- int size = 0;
- int rv = 0;
-
- if (!string_ptr || !format)
- return -1;
-
- va_copy (arg_save, arg);
-
- size = vsnprintf (NULL, 0, format, arg);
- size++;
- str = GF_MALLOC (size, gf_common_mt_asprintf);
- if (str == NULL) {
- /* log is done in GF_MALLOC itself */
- return -1;
+ void *ptr = NULL;
+ struct mem_acct *mem_acct;
+ struct mem_header *header = NULL;
+ bool last_ref = false;
+
+ if (!gf_mem_acct_enabled()) {
+ FREE(free_ptr);
+ return;
+ }
+
+ if (!free_ptr)
+ return;
+
+ gf_free_sanitize(free_ptr);
+ ptr = free_ptr - GF_MEM_HEADER_SIZE;
+ header = (struct mem_header *)ptr;
+
+ // Possible corruption, assert here
+ GF_ASSERT(GF_MEM_HEADER_MAGIC == header->magic);
+
+ mem_acct = header->mem_acct;
+ if (!mem_acct) {
+ goto free;
+ }
+
+ // This points to a memory overrun
+ GF_ASSERT(GF_MEM_TRAILER_MAGIC ==
+ *(uint32_t *)((char *)free_ptr + header->size));
+
+ LOCK(&mem_acct->rec[header->type].lock);
+ {
+ mem_acct->rec[header->type].size -= header->size;
+ mem_acct->rec[header->type].num_allocs--;
+ /* If all the instances are freed up then ensure typestr is set
+ * to NULL */
+ if (!mem_acct->rec[header->type].num_allocs) {
+ last_ref = true;
+ mem_acct->rec[header->type].typestr = NULL;
}
- rv = vsnprintf (str, size, format, arg_save);
+#ifdef DEBUG
+ list_del(&header->acct_list);
+#endif
+ }
+ UNLOCK(&mem_acct->rec[header->type].lock);
+
+ if (last_ref) {
+ xlator_mem_acct_unref(mem_acct);
+ }
- *string_ptr = str;
- return (rv);
+free:
+#ifdef DEBUG
+ __gf_mem_invalidate(ptr);
+#endif
+
+ FREE(ptr);
}
-int
-gf_asprintf (char **string_ptr, const char *format, ...)
+#if defined(GF_DISABLE_MEMPOOL)
+
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
{
- va_list arg;
- int rv = 0;
+ struct mem_pool *new;
- va_start (arg, format);
- rv = gf_vasprintf (string_ptr, format, arg);
- va_end (arg);
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
- return rv;
+ new->sizeof_type = sizeof_type;
+ return new;
}
void
-__gf_free (void *free_ptr)
+mem_pool_destroy(struct mem_pool *pool)
{
- size_t req_size = 0;
- char *ptr = NULL;
- uint32_t type = 0;
- xlator_t *xl = NULL;
-
- if (!THIS->ctx->mem_acct_enable) {
- FREE (free_ptr);
- return;
- }
+ GF_FREE(pool);
+}
+
+#else /* !GF_DISABLE_MEMPOOL */
- if (!free_ptr)
- return;
+static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct list_head pool_threads;
+static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct list_head pool_free_threads;
+static struct mem_pool_shared pools[NPOOLS];
+static size_t pool_list_size;
- ptr = (char *)free_ptr - 8 - 4;
+static __thread per_thread_pool_list_t *thread_pool_list = NULL;
- //Possible corruption, assert here
- GF_ASSERT (GF_MEM_HEADER_MAGIC == *(uint32_t *)ptr);
+#define N_COLD_LISTS 1024
+#define POOL_SWEEP_SECS 30
- *(uint32_t *)ptr = 0;
+typedef struct {
+ pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
+ unsigned int n_cold_lists;
+} sweep_state_t;
- ptr = ptr - sizeof(xlator_t *);
- memcpy (&xl, ptr, sizeof(xlator_t *));
+enum init_state {
+ GF_MEMPOOL_INIT_NONE = 0,
+ GF_MEMPOOL_INIT_EARLY,
+ GF_MEMPOOL_INIT_LATE,
+ GF_MEMPOOL_INIT_DESTROY
+};
- //gf_free expects xl to be available
- GF_ASSERT (xl != NULL);
+static enum init_state init_done = GF_MEMPOOL_INIT_NONE;
+static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+static unsigned int init_count = 0;
+static pthread_t sweeper_tid;
- if (!xl->mem_acct.rec) {
- ptr = (char *)free_ptr - GF_MEM_HEADER_SIZE;
- goto free;
+static bool
+collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
+{
+ unsigned int i;
+ per_thread_pool_t *pt_pool;
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; ++i) {
+ pt_pool = &pool_list->pools[i];
+ if (pt_pool->cold_list) {
+ if (state->n_cold_lists >= N_COLD_LISTS) {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ return true;
+ }
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
}
+ pt_pool->cold_list = pt_pool->hot_list;
+ pt_pool->hot_list = NULL;
+ }
+ (void)pthread_spin_unlock(&pool_list->lock);
- ptr = ptr - sizeof(size_t);
- memcpy (&req_size, ptr, sizeof (size_t));
- ptr = ptr - 4;
- type = *(uint32_t *)ptr;
+ return false;
+}
- // This points to a memory overrun
- GF_ASSERT (GF_MEM_TRAILER_MAGIC ==
- *(uint32_t *)((char *)free_ptr + req_size));
+static void
+free_obj_list(pooled_obj_hdr_t *victim)
+{
+ pooled_obj_hdr_t *next;
- *(uint32_t *) ((char *)free_ptr + req_size) = 0;
+ while (victim) {
+ next = victim->next;
+ free(victim);
+ victim = next;
+ }
+}
- LOCK (&xl->mem_acct.rec[type].lock);
+static void *
+pool_sweeper(void *arg)
+{
+ sweep_state_t state;
+ per_thread_pool_list_t *pool_list;
+ uint32_t i;
+ bool pending;
+
+ /*
+ * This is all a bit inelegant, but the point is to avoid doing
+ * expensive things (like freeing thousands of objects) while holding a
+ * global lock. Thus, we split each iteration into two passes, with
+ * only the first and fastest holding the lock.
+ */
+
+ pending = true;
+
+ for (;;) {
+ /* If we know there's pending work to do (or it's the first run), we
+ * do collect garbage more often. */
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
+
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+ state.n_cold_lists = 0;
+ pending = false;
+
+ /* First pass: collect stuff that needs our attention. */
+ (void)pthread_mutex_lock(&pool_lock);
+ list_for_each_entry(pool_list, &pool_threads, thr_list)
{
- xl->mem_acct.rec[type].size -= req_size;
- xl->mem_acct.rec[type].num_allocs--;
+ if (collect_garbage(&state, pool_list)) {
+ pending = true;
+ }
}
- UNLOCK (&xl->mem_acct.rec[type].lock);
-free:
- FREE (ptr);
-}
+ (void)pthread_mutex_unlock(&pool_lock);
+ /* Second pass: free cold objects from live pools. */
+ for (i = 0; i < state.n_cold_lists; ++i) {
+ free_obj_list(state.cold_lists[i]);
+ }
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
+ return NULL;
+}
-struct mem_pool *
-mem_pool_new_fn (unsigned long sizeof_type,
- unsigned long count, char *name)
+void
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
- struct mem_pool *mem_pool = NULL;
- unsigned long padded_sizeof_type = 0;
- void *pool = NULL;
- int i = 0;
- int ret = 0;
- struct list_head *list = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- if (!sizeof_type || !count) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
+ per_thread_pool_t *pt_pool;
+ uint32_t i;
+
+ if (pool_list == NULL) {
+ pool_list = thread_pool_list;
+ }
+
+ /* The current thread is terminating. None of the allocated objects will
+ * be used again. We can directly destroy them here instead of delaying
+ * it until the next sweeper loop. */
+ if (pool_list != NULL) {
+ /* Remove pool_list from the global list to avoid that sweeper
+ * could touch it. */
+ pthread_mutex_lock(&pool_lock);
+ list_del(&pool_list->thr_list);
+ pthread_mutex_unlock(&pool_lock);
+
+ /* We need to protect hot/cold changes from potential mem_put() calls
+ * that reference this pool_list. Once poison is set to true, we are
+ * sure that no one else will touch hot/cold lists. The only possible
+ * race is when at the same moment a mem_put() is adding a new item
+ * to the hot list. We protect from that by taking pool_list->lock.
+ * After that we don't need the lock to destroy the hot/cold lists. */
+ pthread_spin_lock(&pool_list->lock);
+ pool_list->poison = true;
+ pthread_spin_unlock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; i++) {
+ pt_pool = &pool_list->pools[i];
+
+ free_obj_list(pt_pool->hot_list);
+ pt_pool->hot_list = NULL;
+
+ free_obj_list(pt_pool->cold_list);
+ pt_pool->cold_list = NULL;
}
- padded_sizeof_type = sizeof_type + GF_MEM_POOL_PAD_BOUNDARY;
- mem_pool = GF_CALLOC (sizeof (*mem_pool), 1, gf_common_mt_mem_pool);
- if (!mem_pool)
- return NULL;
-
- ret = gf_asprintf (&mem_pool->name, "%s:%s", THIS->name, name);
- if (ret < 0)
- return NULL;
-
- if (!mem_pool->name) {
- GF_FREE (mem_pool);
- return NULL;
- }
+ pthread_mutex_lock(&pool_free_lock);
+ list_add(&pool_list->thr_list, &pool_free_threads);
+ pthread_mutex_unlock(&pool_free_lock);
- LOCK_INIT (&mem_pool->lock);
- INIT_LIST_HEAD (&mem_pool->list);
- INIT_LIST_HEAD (&mem_pool->global_list);
+ thread_pool_list = NULL;
+ }
+}
- mem_pool->padded_sizeof_type = padded_sizeof_type;
- mem_pool->cold_count = count;
- mem_pool->real_sizeof_type = sizeof_type;
+static __attribute__((constructor)) void
+mem_pools_preinit(void)
+{
+ unsigned int i;
- pool = GF_CALLOC (count, padded_sizeof_type, gf_common_mt_long);
- if (!pool) {
- GF_FREE (mem_pool->name);
- GF_FREE (mem_pool);
- return NULL;
- }
+ INIT_LIST_HEAD(&pool_threads);
+ INIT_LIST_HEAD(&pool_free_threads);
- for (i = 0; i < count; i++) {
- list = pool + (i * (padded_sizeof_type));
- INIT_LIST_HEAD (list);
- list_add_tail (list, &mem_pool->list);
- }
+ for (i = 0; i < NPOOLS; ++i) {
+ pools[i].power_of_two = POOL_SMALLEST + i;
- mem_pool->pool = pool;
- mem_pool->pool_end = pool + (count * (padded_sizeof_type));
+ GF_ATOMIC_INIT(pools[i].allocs_hot, 0);
+ GF_ATOMIC_INIT(pools[i].allocs_cold, 0);
+ GF_ATOMIC_INIT(pools[i].allocs_stdc, 0);
+ GF_ATOMIC_INIT(pools[i].frees_to_list, 0);
+ }
- /* add this pool to the global list */
- ctx = THIS->ctx;
- if (!ctx)
- goto out;
+ pool_list_size = sizeof(per_thread_pool_list_t) +
+ sizeof(per_thread_pool_t) * (NPOOLS - 1);
- list_add (&mem_pool->global_list, &ctx->mempool_list);
+ init_done = GF_MEMPOOL_INIT_EARLY;
+}
-out:
- return mem_pool;
+static __attribute__((destructor)) void
+mem_pools_postfini(void)
+{
+ /* TODO: This function should destroy all per thread memory pools that
+ * are still alive, but this is not possible right now because glibc
+ * starts calling destructors as soon as exit() is called, and
+ * gluster doesn't ensure that all threads have been stopped before
+ * calling exit(). Existing threads would crash when they try to use
+ * memory or they terminate if we destroy things here.
+ *
+ * When we propertly terminate all threads, we can add the needed
+ * code here. Till then we need to leave the memory allocated. Most
+ * probably this function will be executed on process termination,
+ * so the memory will be released anyway by the system. */
}
-void*
-mem_get0 (struct mem_pool *mem_pool)
+/* Call mem_pools_init() once threading has been configured completely. This
+ * prevent the pool_sweeper thread from getting killed once the main() thread
+ * exits during deamonizing. */
+void
+mem_pools_init(void)
{
- void *ptr = NULL;
+ pthread_mutex_lock(&init_mutex);
+ if ((init_count++) == 0) {
+ (void)gf_thread_create(&sweeper_tid, NULL, pool_sweeper, NULL,
+ "memsweep");
+
+ init_done = GF_MEMPOOL_INIT_LATE;
+ }
+ pthread_mutex_unlock(&init_mutex);
+}
- if (!mem_pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
+void
+mem_pools_fini(void)
+{
+ pthread_mutex_lock(&init_mutex);
+ switch (init_count) {
+ case 0:
+ /*
+ * If init_count is already zero (as e.g. if somebody called this
+ * before mem_pools_init) then the sweeper was probably never even
+ * started so we don't need to stop it. Even if there's some crazy
+ * circumstance where there is a sweeper but init_count is still
+ * zero, that just means we'll leave it running. Not perfect, but
+ * far better than any known alternative.
+ */
+ break;
+ case 1: {
+ /* if mem_pools_init() was not called, sweeper_tid will be invalid
+ * and the functions will error out. That is not critical. In all
+ * other cases, the sweeper_tid will be valid and the thread gets
+ * stopped. */
+ (void)pthread_cancel(sweeper_tid);
+ (void)pthread_join(sweeper_tid, NULL);
+
+ /* There could be threads still running in some cases, so we can't
+ * destroy pool_lists in use. We can also not destroy unused
+ * pool_lists because some allocated objects may still be pointing
+ * to them. */
+ mem_pool_thread_destructor(NULL);
+
+ init_done = GF_MEMPOOL_INIT_DESTROY;
+ /* Fall through. */
}
+ default:
+ --init_count;
+ }
+ pthread_mutex_unlock(&init_mutex);
+}
- ptr = mem_get(mem_pool);
+void
+mem_pool_destroy(struct mem_pool *pool)
+{
+ if (!pool)
+ return;
- if (ptr)
- memset(ptr, 0, mem_pool->real_sizeof_type);
+ /* remove this pool from the owner (glusterfs_ctx_t) */
+ LOCK(&pool->ctx->lock);
+ {
+ list_del(&pool->owner);
+ }
+ UNLOCK(&pool->ctx->lock);
+
+ /* free this pool, but keep the mem_pool_shared */
+ GF_FREE(pool);
+
+ /*
+ * Pools are now permanent, so the mem_pool->pool is kept around. All
+ * of the objects *in* the pool will eventually be freed via the
+ * pool-sweeper thread, and this way we don't have to add a lot of
+ * reference-counting complexity.
+ */
+}
- return ptr;
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
+{
+ unsigned long extra_size, size;
+ unsigned int power;
+ struct mem_pool *new = NULL;
+ struct mem_pool_shared *pool = NULL;
+
+ if (!sizeof_type) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+
+ /* This is the overhead we'll have because of memory accounting for each
+ * memory block. */
+ extra_size = sizeof(pooled_obj_hdr_t);
+
+ /* We need to compute the total space needed to hold the data type and
+ * the header. Given that the smallest block size we have in the pools
+ * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST).
+ * However, since this value is only needed to compute its rounded
+ * logarithm in base 2, and this only depends on the highest bit set,
+ * we can simply do a bitwise or with the minimum size. We need to
+ * subtract 1 for correct handling of sizes that are exactly a power
+ * of 2. */
+ size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL);
+
+ /* We compute the logarithm in base 2 rounded up of the resulting size.
+ * This value will identify which pool we need to use from the pools of
+ * powers of 2. This is equivalent to finding the position of the highest
+ * bit set. */
+ power = sizeof(size) * 8 - __builtin_clzl(size);
+ if (power > POOL_LARGEST) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+ pool = &pools[power - POOL_SMALLEST];
+
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
+
+ new->ctx = ctx;
+ new->sizeof_type = sizeof_type;
+ new->count = count;
+ new->name = name;
+ new->xl_name = THIS->name;
+ new->pool = pool;
+ GF_ATOMIC_INIT(new->active, 0);
+#ifdef DEBUG
+ GF_ATOMIC_INIT(new->hit, 0);
+ GF_ATOMIC_INIT(new->miss, 0);
+#endif
+ INIT_LIST_HEAD(&new->owner);
+
+ LOCK(&ctx->lock);
+ {
+ list_add(&new->owner, &ctx->mempool_list);
+ }
+ UNLOCK(&ctx->lock);
+
+ return new;
}
-void *
-mem_get (struct mem_pool *mem_pool)
+per_thread_pool_list_t *
+mem_get_pool_list(void)
{
- struct list_head *list = NULL;
- void *ptr = NULL;
- int *in_use = NULL;
- struct mem_pool **pool_ptr = NULL;
-
- if (!mem_pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
+ per_thread_pool_list_t *pool_list;
+ unsigned int i;
+
+ pool_list = thread_pool_list;
+ if (pool_list) {
+ return pool_list;
+ }
+
+ (void)pthread_mutex_lock(&pool_free_lock);
+ if (!list_empty(&pool_free_threads)) {
+ pool_list = list_entry(pool_free_threads.next, per_thread_pool_list_t,
+ thr_list);
+ list_del(&pool_list->thr_list);
+ }
+ (void)pthread_mutex_unlock(&pool_free_lock);
+
+ if (!pool_list) {
+ pool_list = MALLOC(pool_list_size);
+ if (!pool_list) {
+ return NULL;
}
- LOCK (&mem_pool->lock);
- {
- mem_pool->alloc_count++;
- if (mem_pool->cold_count) {
- list = mem_pool->list.next;
- list_del (list);
-
- mem_pool->hot_count++;
- mem_pool->cold_count--;
-
- if (mem_pool->max_alloc < mem_pool->hot_count)
- mem_pool->max_alloc = mem_pool->hot_count;
-
- ptr = list;
- in_use = (ptr + GF_MEM_POOL_LIST_BOUNDARY +
- GF_MEM_POOL_PTR);
- *in_use = 1;
-
- goto fwd_addr_out;
- }
-
- /* This is a problem area. If we've run out of
- * chunks in our slab above, we need to allocate
- * enough memory to service this request.
- * The problem is, these individual chunks will fail
- * the first address range check in __is_member. Now, since
- * we're not allocating a full second slab, we wont have
- * enough info perform the range check in __is_member.
- *
- * I am working around this by performing a regular allocation
- * , just the way the caller would've done when not using the
- * mem-pool. That also means, we're not padding the size with
- * the list_head structure because, this will not be added to
- * the list of chunks that belong to the mem-pool allocated
- * initially.
- *
- * This is the best we can do without adding functionality for
- * managing multiple slabs. That does not interest us at present
- * because it is too much work knowing that a better slab
- * allocator is coming RSN.
- */
- mem_pool->pool_misses++;
- mem_pool->curr_stdalloc++;
- if (mem_pool->max_stdalloc < mem_pool->curr_stdalloc)
- mem_pool->max_stdalloc = mem_pool->curr_stdalloc;
- ptr = GF_CALLOC (1, mem_pool->padded_sizeof_type,
- gf_common_mt_mem_pool);
-
- /* Memory coming from the heap need not be transformed from a
- * chunkhead to a usable pointer since it is not coming from
- * the pool.
- */
+ INIT_LIST_HEAD(&pool_list->thr_list);
+ (void)pthread_spin_init(&pool_list->lock, PTHREAD_PROCESS_PRIVATE);
+ for (i = 0; i < NPOOLS; ++i) {
+ pool_list->pools[i].parent = &pools[i];
+ pool_list->pools[i].hot_list = NULL;
+ pool_list->pools[i].cold_list = NULL;
}
-fwd_addr_out:
- pool_ptr = mem_pool_from_ptr (ptr);
- *pool_ptr = (struct mem_pool *)mem_pool;
- ptr = mem_pool_chunkhead2ptr (ptr);
- UNLOCK (&mem_pool->lock);
+ }
- return ptr;
-}
+ /* There's no need to take pool_list->lock, because this is already an
+ * atomic operation and we don't need to synchronize it with any change
+ * in hot/cold lists. */
+ pool_list->poison = false;
+ (void)pthread_mutex_lock(&pool_lock);
+ list_add(&pool_list->thr_list, &pool_threads);
+ (void)pthread_mutex_unlock(&pool_lock);
-static int
-__is_member (struct mem_pool *pool, void *ptr)
-{
- if (!pool || !ptr) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- if (ptr < pool->pool || ptr >= pool->pool_end)
- return 0;
+ thread_pool_list = pool_list;
- if ((mem_pool_ptr2chunkhead (ptr) - pool->pool)
- % pool->padded_sizeof_type)
- return -1;
+ /* Ensure that all memory objects associated to the new pool_list are
+ * destroyed when the thread terminates. */
+ gf_thread_needs_cleanup();
- return 1;
+ return pool_list;
}
-
-void
-mem_put (void *ptr)
+static pooled_obj_hdr_t *
+mem_get_from_pool(struct mem_pool *mem_pool)
{
- struct list_head *list = NULL;
- int *in_use = NULL;
- void *head = NULL;
- struct mem_pool **tmp = NULL;
- struct mem_pool *pool = NULL;
-
- if (!ptr) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return;
+ per_thread_pool_list_t *pool_list;
+ per_thread_pool_t *pt_pool;
+ pooled_obj_hdr_t *retval;
+#ifdef DEBUG
+ gf_boolean_t hit = _gf_true;
+#endif
+
+ pool_list = mem_get_pool_list();
+ if (!pool_list || pool_list->poison) {
+ return NULL;
+ }
+
+ pt_pool = &pool_list->pools[mem_pool->pool->power_of_two - POOL_SMALLEST];
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+ retval = pt_pool->hot_list;
+ if (retval) {
+ pt_pool->hot_list = retval->next;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_hot);
+ } else {
+ retval = pt_pool->cold_list;
+ if (retval) {
+ pt_pool->cold_list = retval->next;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_cold);
+ } else {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_stdc);
+ retval = malloc(1 << pt_pool->parent->power_of_two);
+#ifdef DEBUG
+ hit = _gf_false;
+#endif
}
+ }
+
+ if (retval != NULL) {
+ retval->pool = mem_pool;
+ retval->power_of_two = mem_pool->pool->power_of_two;
+#ifdef DEBUG
+ if (hit == _gf_true)
+ GF_ATOMIC_INC(mem_pool->hit);
+ else
+ GF_ATOMIC_INC(mem_pool->miss);
+#endif
+ retval->magic = GF_MEM_HEADER_MAGIC;
+ retval->pool_list = pool_list;
+ }
+
+ return retval;
+}
- list = head = mem_pool_ptr2chunkhead (ptr);
- tmp = mem_pool_from_ptr (head);
- if (!tmp) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR,
- "ptr header is corrupted");
- return;
- }
+#endif /* GF_DISABLE_MEMPOOL */
- pool = *tmp;
- if (!pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR,
- "mem-pool ptr is NULL");
- return;
- }
- LOCK (&pool->lock);
- {
+void *
+mem_get0(struct mem_pool *mem_pool)
+{
+ void *ptr = mem_get(mem_pool);
+ if (ptr) {
+#if defined(GF_DISABLE_MEMPOOL)
+ memset(ptr, 0, mem_pool->sizeof_type);
+#else
+ memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
+#endif
+ }
+
+ return ptr;
+}
- switch (__is_member (pool, ptr))
- {
- case 1:
- in_use = (head + GF_MEM_POOL_LIST_BOUNDARY +
- GF_MEM_POOL_PTR);
- if (!is_mem_chunk_in_use(in_use)) {
- gf_log_callingfn ("mem-pool", GF_LOG_CRITICAL,
- "mem_put called on freed ptr %p of mem "
- "pool %p", ptr, pool);
- break;
- }
- pool->hot_count--;
- pool->cold_count++;
- *in_use = 0;
- list_add (list, &pool->list);
- break;
- case -1:
- /* For some reason, the address given is within
- * the address range of the mem-pool but does not align
- * with the expected start of a chunk that includes
- * the list headers also. Sounds like a problem in
- * layers of clouds up above us. ;)
- */
- abort ();
- break;
- case 0:
- /* The address is outside the range of the mem-pool. We
- * assume here that this address was allocated at a
- * point when the mem-pool was out of chunks in mem_get
- * or the programmer has made a mistake by calling the
- * wrong de-allocation interface. We do
- * not have enough info to distinguish between the two
- * situations.
- */
- pool->curr_stdalloc--;
- GF_FREE (list);
- break;
- default:
- /* log error */
- break;
- }
- }
- UNLOCK (&pool->lock);
+void *
+mem_get(struct mem_pool *mem_pool)
+{
+ if (!mem_pool) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+
+#if defined(GF_DISABLE_MEMPOOL)
+ return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+#else
+ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool);
+ if (!retval) {
+ return NULL;
+ }
+
+ GF_ATOMIC_INC(mem_pool->active);
+
+ return retval + 1;
+#endif /* GF_DISABLE_MEMPOOL */
}
void
-mem_pool_destroy (struct mem_pool *pool)
+mem_put(void *ptr)
{
- if (!pool)
- return;
-
- gf_log (THIS->name, GF_LOG_INFO, "size=%lu max=%d total=%"PRIu64,
- pool->padded_sizeof_type, pool->max_alloc, pool->alloc_count);
-
- list_del (&pool->global_list);
+#if defined(GF_DISABLE_MEMPOOL)
+ GF_FREE(ptr);
+#else
+ pooled_obj_hdr_t *hdr;
+ per_thread_pool_list_t *pool_list;
+ per_thread_pool_t *pt_pool;
+
+ if (!ptr) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
- LOCK_DESTROY (&pool->lock);
- GF_FREE (pool->name);
- GF_FREE (pool->pool);
- GF_FREE (pool);
+ hdr = ((pooled_obj_hdr_t *)ptr) - 1;
+ if (hdr->magic != GF_MEM_HEADER_MAGIC) {
+ /* Not one of ours; don't touch it. */
+ return;
+ }
+ if (!hdr->pool_list) {
+ gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL,
+ LG_MSG_INVALID_ARG,
+ "invalid argument hdr->pool_list NULL");
return;
+ }
+
+ pool_list = hdr->pool_list;
+ pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST];
+
+ if (hdr->pool)
+ GF_ATOMIC_DEC(hdr->pool->active);
+
+ hdr->magic = GF_MEM_INVALID_MAGIC;
+
+ (void)pthread_spin_lock(&pool_list->lock);
+ if (!pool_list->poison) {
+ hdr->next = pt_pool->hot_list;
+ pt_pool->hot_list = hdr;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->frees_to_list);
+ } else {
+ /* If the owner thread of this element has terminated, we simply
+ * release its memory. */
+ (void)pthread_spin_unlock(&pool_list->lock);
+ free(hdr);
+ }
+#endif /* GF_DISABLE_MEMPOOL */
}