summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src
diff options
context:
space:
mode:
authorAmar Tumballi <amarts@redhat.com>2019-01-04 07:04:50 +0000
committerAmar Tumballi <amarts@redhat.com>2019-01-08 11:16:03 +0000
commit37653efdc7681d1b0f255054ec2f9c9ddd4c8b14 (patch)
tree61688051f8f374ea6a1b661ef357844a477361f6 /libglusterfs/src
parent054c7ea91603acfcb01db8455b25dda7e5e831b2 (diff)
Revert "iobuf: Get rid of pre allocated iobuf_pool and use per thread mem pool"
This reverts commit b87c397091bac6a4a6dec4e45a7671fad4a11770. There seems to be some performance regression with the patch and hence recommended to have it reverted. Updates: #325 Change-Id: Id85d6203173a44fad6cf51d39b3e96f37afcec09
Diffstat (limited to 'libglusterfs/src')
-rw-r--r--libglusterfs/src/glusterfs/iobuf.h111
-rw-r--r--libglusterfs/src/iobuf.c836
-rw-r--r--libglusterfs/src/mem-pool.c2
3 files changed, 877 insertions, 72 deletions
diff --git a/libglusterfs/src/glusterfs/iobuf.h b/libglusterfs/src/glusterfs/iobuf.h
index 7516bc8034b..6de0f13ae36 100644
--- a/libglusterfs/src/glusterfs/iobuf.h
+++ b/libglusterfs/src/glusterfs/iobuf.h
@@ -17,38 +17,118 @@
#include <sys/mman.h>
#include <sys/uio.h>
+#define GF_VARIABLE_IOBUF_COUNT 32
+
+#define GF_RDMA_DEVICE_COUNT 8
+
+/* Lets try to define the new anonymous mapping
+ * flag, in case the system is still using the
+ * now deprecated MAP_ANON flag.
+ *
+ * Also, this should ideally be in a centralized/common
+ * header which can be used by other source files also.
+ */
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
#define GF_ALIGN_BUF(ptr, bound) \
((void *)((unsigned long)(ptr + bound - 1) & (unsigned long)(~(bound - 1))))
#define GF_IOBUF_ALIGN_SIZE 512
-#define GF_IOBUF_DEFAULT_PAGE_SIZE (128 * GF_UNIT_KB)
-
/* one allocatable unit for the consumers of the IOBUF API */
/* each unit hosts @page_size bytes of memory */
struct iobuf;
+/* one region of memory mapped from the operating system */
+/* each region MMAPs @arena_size bytes of memory */
+/* each arena hosts @arena_size / @page_size IOBUFs */
+struct iobuf_arena;
+
/* expandable and contractable pool of memory, internally broken into arenas */
struct iobuf_pool;
+struct iobuf_init_config {
+ size_t pagesize;
+ int32_t num_pages;
+};
+
struct iobuf {
- gf_boolean_t stdalloc; /* indicates whether iobuf is allocated from
- mem pool or standard alloc*/
- gf_lock_t lock; /* for ->ptr and ->ref */
- gf_atomic_t ref; /* 0 == passive, >0 == active */
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf *next;
+ struct iobuf *prev;
+ };
+ };
+ struct iobuf_arena *iobuf_arena;
+
+ gf_lock_t lock; /* for ->ptr and ->ref */
+ gf_atomic_t ref; /* 0 == passive, >0 == active */
void *ptr; /* usable memory region by the consumer */
- void *free_ptr; /* in case of stdalloc, this is the
- one to be freed */
- size_t page_size; /* iobuf's page size */
- struct iobuf_pool *iobuf_pool; /* iobuf_pool iobuf is associated with */
+ void *free_ptr; /* in case of stdalloc, this is the
+ one to be freed */
+};
+
+struct iobuf_arena {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf_arena *next;
+ struct iobuf_arena *prev;
+ };
+ };
+
+ struct list_head all_list;
+ size_t page_size; /* size of all iobufs in this arena */
+ size_t arena_size;
+ /* this is equal to rounded_size * num_iobufs.
+ (rounded_size comes with gf_iobuf_get_pagesize().) */
+ size_t page_count;
+
+ struct iobuf_pool *iobuf_pool;
+
+ void *mem_base;
+ struct iobuf *iobufs; /* allocated iobufs list */
+
+ int active_cnt;
+ struct iobuf active; /* head node iobuf
+ (unused by itself) */
+ int passive_cnt;
+ struct iobuf passive; /* head node iobuf
+ (unused by itself) */
+ uint64_t alloc_cnt; /* total allocs in this pool */
+ int max_active; /* max active buffers at a given time */
};
struct iobuf_pool {
- gf_atomic_t mem_pool_hit;
- gf_atomic_t mem_pool_miss;
- gf_atomic_t active_cnt;
+ pthread_mutex_t mutex;
+ size_t arena_size; /* size of memory region in
+ arena */
+ size_t default_page_size; /* default size of iobuf */
+
+ int arena_cnt;
+ struct list_head all_arenas;
+ struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas. Each element of the array is a list of arenas
+ holding iobufs of particular page_size */
+
+ struct list_head filled[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas without free iobufs */
+
+ struct list_head purge[GF_VARIABLE_IOBUF_COUNT];
+ /* array of of arenas which can be purged */
+
+ uint64_t request_misses; /* mostly the requests for higher
+ value of iobufs */
+ int rdma_device_count;
+ struct list_head *mr_list[GF_RDMA_DEVICE_COUNT];
+ void *device[GF_RDMA_DEVICE_COUNT];
+ int (*rdma_registration)(void **, void *);
+ int (*rdma_deregistration)(struct list_head **, struct iobuf_arena *);
};
struct iobuf_pool *
@@ -62,10 +142,13 @@ iobuf_unref(struct iobuf *iobuf);
struct iobuf *
iobuf_ref(struct iobuf *iobuf);
void
+iobuf_pool_destroy(struct iobuf_pool *iobuf_pool);
+void
iobuf_to_iovec(struct iobuf *iob, struct iovec *iov);
#define iobuf_ptr(iob) ((iob)->ptr)
-#define iobuf_pagesize(iob) (iob->page_size)
+#define iobpool_default_pagesize(iobpool) ((iobpool)->default_page_size)
+#define iobuf_pagesize(iob) (iob->iobuf_arena->page_size)
struct iobref {
gf_lock_t lock;
diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index aff0a3121ff..0e37c4fc6e2 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -12,80 +12,588 @@
#include "glusterfs/statedump.h"
#include <stdio.h>
#include "glusterfs/libglusterfs-messages.h"
-#include "glusterfs/atomic.h"
+
+/*
+ TODO: implement destroy margins and prefetching of arenas
+*/
+
+#define IOBUF_ARENA_MAX_INDEX \
+ (sizeof(gf_iobuf_init_config) / (sizeof(struct iobuf_init_config)))
+
+/* Make sure this array is sorted based on pagesize */
+struct iobuf_init_config gf_iobuf_init_config[] = {
+ /* { pagesize, num_pages }, */
+ {128, 1024}, {512, 512}, {2 * 1024, 512}, {8 * 1024, 128},
+ {32 * 1024, 64}, {128 * 1024, 32}, {256 * 1024, 8}, {1 * 1024 * 1024, 2},
+};
+
+static int
+gf_iobuf_get_arena_index(const size_t page_size)
+{
+ int i;
+
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ if (page_size <= gf_iobuf_init_config[i].pagesize)
+ return i;
+ }
+
+ return -1;
+}
+
+static size_t
+gf_iobuf_get_pagesize(const size_t page_size)
+{
+ int i;
+ size_t size = 0;
+
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ size = gf_iobuf_init_config[i].pagesize;
+ if (page_size <= size)
+ return size;
+ }
+
+ return -1;
+}
+
+void
+__iobuf_arena_init_iobufs(struct iobuf_arena *iobuf_arena)
+{
+ int iobuf_cnt = 0;
+ struct iobuf *iobuf = NULL;
+ int offset = 0;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
+
+ iobuf_cnt = iobuf_arena->page_count;
+
+ iobuf_arena->iobufs = GF_CALLOC(sizeof(*iobuf), iobuf_cnt,
+ gf_common_mt_iobuf);
+ if (!iobuf_arena->iobufs)
+ return;
+
+ iobuf = iobuf_arena->iobufs;
+ for (i = 0; i < iobuf_cnt; i++) {
+ INIT_LIST_HEAD(&iobuf->list);
+ LOCK_INIT(&iobuf->lock);
+
+ iobuf->iobuf_arena = iobuf_arena;
+
+ iobuf->ptr = iobuf_arena->mem_base + offset;
+
+ list_add(&iobuf->list, &iobuf_arena->passive.list);
+ iobuf_arena->passive_cnt++;
+
+ offset += iobuf_arena->page_size;
+ iobuf++;
+ }
+
+out:
+ return;
+}
+
+void
+__iobuf_arena_destroy_iobufs(struct iobuf_arena *iobuf_arena)
+{
+ int iobuf_cnt = 0;
+ struct iobuf *iobuf = NULL;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
+
+ iobuf_cnt = iobuf_arena->page_count;
+
+ if (!iobuf_arena->iobufs) {
+ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, LG_MSG_IOBUFS_NOT_FOUND,
+ "iobufs not found");
+ return;
+ }
+
+ iobuf = iobuf_arena->iobufs;
+ for (i = 0; i < iobuf_cnt; i++) {
+ GF_ASSERT(GF_ATOMIC_GET(iobuf->ref) == 0);
+
+ LOCK_DESTROY(&iobuf->lock);
+ list_del_init(&iobuf->list);
+ iobuf++;
+ }
+
+ GF_FREE(iobuf_arena->iobufs);
+
+out:
+ return;
+}
+
+void
+__iobuf_arena_destroy(struct iobuf_pool *iobuf_pool,
+ struct iobuf_arena *iobuf_arena)
+{
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
+
+ if (iobuf_pool->rdma_deregistration)
+ iobuf_pool->rdma_deregistration(iobuf_pool->mr_list, iobuf_arena);
+
+ __iobuf_arena_destroy_iobufs(iobuf_arena);
+
+ if (iobuf_arena->mem_base && iobuf_arena->mem_base != MAP_FAILED)
+ munmap(iobuf_arena->mem_base, iobuf_arena->arena_size);
+
+ GF_FREE(iobuf_arena);
+out:
+ return;
+}
+
+struct iobuf_arena *
+__iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size,
+ int32_t num_iobufs)
+{
+ struct iobuf_arena *iobuf_arena = NULL;
+ size_t rounded_size = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+
+ iobuf_arena = GF_CALLOC(sizeof(*iobuf_arena), 1, gf_common_mt_iobuf_arena);
+ if (!iobuf_arena)
+ goto err;
+
+ INIT_LIST_HEAD(&iobuf_arena->list);
+ INIT_LIST_HEAD(&iobuf_arena->all_list);
+ INIT_LIST_HEAD(&iobuf_arena->active.list);
+ INIT_LIST_HEAD(&iobuf_arena->passive.list);
+ iobuf_arena->iobuf_pool = iobuf_pool;
+
+ rounded_size = gf_iobuf_get_pagesize(page_size);
+
+ iobuf_arena->page_size = rounded_size;
+ iobuf_arena->page_count = num_iobufs;
+
+ iobuf_arena->arena_size = rounded_size * num_iobufs;
+
+ iobuf_arena->mem_base = mmap(NULL, iobuf_arena->arena_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (iobuf_arena->mem_base == MAP_FAILED) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_MAPPING_FAILED,
+ "mapping failed");
+ goto err;
+ }
+
+ if (iobuf_pool->rdma_registration) {
+ iobuf_pool->rdma_registration(iobuf_pool->device, iobuf_arena);
+ }
+
+ list_add_tail(&iobuf_arena->all_list, &iobuf_pool->all_arenas);
+
+ __iobuf_arena_init_iobufs(iobuf_arena);
+ if (!iobuf_arena->iobufs) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_IOBUF_FAILED,
+ "init failed");
+ goto err;
+ }
+
+ iobuf_pool->arena_cnt++;
+
+ return iobuf_arena;
+
+err:
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+
+out:
+ return NULL;
+}
+
+static struct iobuf_arena *
+__iobuf_arena_unprune(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
+{
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+
+ list_for_each_entry(tmp, &iobuf_pool->purge[index], list)
+ {
+ list_del_init(&tmp->list);
+ iobuf_arena = tmp;
+ break;
+ }
+out:
+ return iobuf_arena;
+}
+
+static struct iobuf_arena *
+__iobuf_pool_add_arena(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int32_t num_pages, const int index)
+{
+ struct iobuf_arena *iobuf_arena = NULL;
+
+ iobuf_arena = __iobuf_arena_unprune(iobuf_pool, page_size, index);
+
+ if (!iobuf_arena) {
+ iobuf_arena = __iobuf_arena_alloc(iobuf_pool, page_size, num_pages);
+ if (!iobuf_arena) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
+ "arena not found");
+ return NULL;
+ }
+ }
+ list_add(&iobuf_arena->list, &iobuf_pool->arenas[index]);
+
+ return iobuf_arena;
+}
+
+/* This function destroys all the iobufs and the iobuf_pool */
+void
+iobuf_pool_destroy(struct iobuf_pool *iobuf_pool)
+{
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->arenas[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->purge[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ /* If there are no iobuf leaks, there should be no
+ * arenas in the filled list. If at all there are any
+ * arenas in the filled list, the below function will
+ * assert.
+ */
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->filled[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ /* If there are no iobuf leaks, there shoould be
+ * no standard allocated arenas, iobuf_put will free
+ * such arenas.
+ * TODO: Free the stdalloc arenas forcefully if present?
+ */
+ }
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+
+ pthread_mutex_destroy(&iobuf_pool->mutex);
+
+ GF_FREE(iobuf_pool);
+
+out:
+ return;
+}
+
+static void
+iobuf_create_stdalloc_arena(struct iobuf_pool *iobuf_pool)
+{
+ struct iobuf_arena *iobuf_arena = NULL;
+
+ /* No locking required here as its called only once during init */
+ iobuf_arena = GF_CALLOC(sizeof(*iobuf_arena), 1, gf_common_mt_iobuf_arena);
+ if (!iobuf_arena)
+ goto err;
+
+ INIT_LIST_HEAD(&iobuf_arena->list);
+ INIT_LIST_HEAD(&iobuf_arena->active.list);
+ INIT_LIST_HEAD(&iobuf_arena->passive.list);
+
+ iobuf_arena->iobuf_pool = iobuf_pool;
+
+ iobuf_arena->page_size = 0x7fffffff;
+
+ list_add_tail(&iobuf_arena->list,
+ &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX]);
+
+err:
+ return;
+}
struct iobuf_pool *
iobuf_pool_new(void)
{
struct iobuf_pool *iobuf_pool = NULL;
+ int i = 0;
+ size_t page_size = 0;
+ size_t arena_size = 0;
+ int32_t num_pages = 0;
+ int index;
iobuf_pool = GF_CALLOC(sizeof(*iobuf_pool), 1, gf_common_mt_iobuf_pool);
if (!iobuf_pool)
goto out;
+ INIT_LIST_HEAD(&iobuf_pool->all_arenas);
+ pthread_mutex_init(&iobuf_pool->mutex, NULL);
+ for (i = 0; i <= IOBUF_ARENA_MAX_INDEX; i++) {
+ INIT_LIST_HEAD(&iobuf_pool->arenas[i]);
+ INIT_LIST_HEAD(&iobuf_pool->filled[i]);
+ INIT_LIST_HEAD(&iobuf_pool->purge[i]);
+ }
+
+ iobuf_pool->default_page_size = 128 * GF_UNIT_KB;
+
+ iobuf_pool->rdma_registration = NULL;
+ iobuf_pool->rdma_deregistration = NULL;
+
+ for (i = 0; i < GF_RDMA_DEVICE_COUNT; i++) {
+ iobuf_pool->device[i] = NULL;
+ iobuf_pool->mr_list[i] = NULL;
+ }
+
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ page_size = gf_iobuf_init_config[i].pagesize;
+ num_pages = gf_iobuf_init_config[i].num_pages;
+
+ index = gf_iobuf_get_arena_index(page_size);
+ if (index == -1) {
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+ gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size (%zu) of iobufs in arena being added is "
+ "greater than max available",
+ page_size);
+ return NULL;
+ }
+
+ __iobuf_pool_add_arena(iobuf_pool, page_size, num_pages, index);
+
+ arena_size += page_size * num_pages;
+ }
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
- GF_ATOMIC_INIT(iobuf_pool->mem_pool_hit, 0);
- GF_ATOMIC_INIT(iobuf_pool->mem_pool_miss, 0);
- GF_ATOMIC_INIT(iobuf_pool->active_cnt, 0);
+ /* Need an arena to handle all the bigger iobuf requests */
+ iobuf_create_stdalloc_arena(iobuf_pool);
+ iobuf_pool->arena_size = arena_size;
out:
+
return iobuf_pool;
}
+static void
+__iobuf_arena_prune(struct iobuf_pool *iobuf_pool,
+ struct iobuf_arena *iobuf_arena, const int index)
+{
+ /* code flow comes here only if the arena is in purge list and we can
+ * free the arena only if we have at least one arena in 'arenas' list
+ * (ie, at least few iobufs free in arena), that way, there won't
+ * be spurious mmap/unmap of buffers
+ */
+ if (list_empty(&iobuf_pool->arenas[index]))
+ goto out;
+
+ /* All cases matched, destroy */
+ list_del_init(&iobuf_arena->list);
+ list_del_init(&iobuf_arena->all_list);
+ iobuf_pool->arena_cnt--;
+
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+
+out:
+ return;
+}
+
void
-iobuf_pool_destroy(struct iobuf_pool *iobuf_pool)
+iobuf_pool_prune(struct iobuf_pool *iobuf_pool)
{
- if (!iobuf_pool)
- return;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int i = 0;
- if (GF_ATOMIC_GET(iobuf_pool->active_cnt) != 0)
- gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, LG_MSG_IOBUFS_NOT_FOUND,
- "iobuf_pool_destroy called, but there"
- " are unfreed active iobufs:%" PRId64,
- GF_ATOMIC_GET(iobuf_pool->active_cnt));
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- GF_FREE(iobuf_pool);
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ if (list_empty(&iobuf_pool->arenas[i])) {
+ continue;
+ }
+
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->purge[i],
+ list)
+ {
+ __iobuf_arena_prune(iobuf_pool, iobuf_arena, i);
+ }
+ }
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+
+out:
return;
}
+/* Always called under the iobuf_pool mutex lock */
+static struct iobuf_arena *
+__iobuf_select_arena(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
+{
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *trav = NULL;
+
+ /* look for unused iobuf from the head-most arena */
+ list_for_each_entry(trav, &iobuf_pool->arenas[index], list)
+ {
+ if (trav->passive_cnt) {
+ iobuf_arena = trav;
+ break;
+ }
+ }
+
+ if (!iobuf_arena) {
+ /* all arenas were full, find the right count to add */
+ iobuf_arena = __iobuf_pool_add_arena(
+ iobuf_pool, page_size, gf_iobuf_init_config[index].num_pages,
+ index);
+ }
+
+ return iobuf_arena;
+}
+
+/* Always called under the iobuf_pool mutex lock */
+static struct iobuf *
+__iobuf_get(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
+{
+ struct iobuf *iobuf = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+
+ /* most eligible arena for picking an iobuf */
+ iobuf_arena = __iobuf_select_arena(iobuf_pool, page_size, index);
+ if (!iobuf_arena)
+ return NULL;
+
+ list_for_each_entry(iobuf, &iobuf_arena->passive.list, list) break;
+
+ list_del(&iobuf->list);
+ iobuf_arena->passive_cnt--;
+
+ list_add(&iobuf->list, &iobuf_arena->active.list);
+ iobuf_arena->active_cnt++;
+
+ /* no resetting requied for this element */
+ iobuf_arena->alloc_cnt++;
+
+ if (iobuf_arena->max_active < iobuf_arena->active_cnt)
+ iobuf_arena->max_active = iobuf_arena->active_cnt;
+
+ if (iobuf_arena->passive_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add(&iobuf_arena->list, &iobuf_pool->filled[index]);
+ }
+
+ return iobuf;
+}
+
struct iobuf *
-iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size)
+iobuf_get_from_stdalloc(struct iobuf_pool *iobuf_pool, size_t page_size)
{
struct iobuf *iobuf = NULL;
- gf_boolean_t hit = _gf_false;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *trav = NULL;
+ int ret = -1;
- if (page_size == 0) {
- page_size = GF_IOBUF_DEFAULT_PAGE_SIZE;
+ /* The first arena in the 'MAX-INDEX' will always be used for misc */
+ list_for_each_entry(trav, &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX], list)
+ {
+ iobuf_arena = trav;
+ break;
}
- iobuf = mem_pool_get0(sizeof(struct iobuf), &hit);
+ iobuf = GF_CALLOC(1, sizeof(*iobuf), gf_common_mt_iobuf);
if (!iobuf)
goto out;
- iobuf->free_ptr = mem_pool_get(page_size, &hit);
- if (!iobuf->free_ptr) {
- iobuf->free_ptr = GF_MALLOC(page_size, gf_common_mt_char);
- iobuf->stdalloc = _gf_true;
- }
- if (!iobuf->free_ptr) {
- mem_put(iobuf);
- iobuf = NULL;
+ /* 4096 is the alignment */
+ iobuf->free_ptr = GF_CALLOC(1, ((page_size + GF_IOBUF_ALIGN_SIZE) - 1),
+ gf_common_mt_char);
+ if (!iobuf->free_ptr)
goto out;
- }
- if (hit == _gf_true)
- GF_ATOMIC_INC(iobuf_pool->mem_pool_hit);
- else
- GF_ATOMIC_INC(iobuf_pool->mem_pool_miss);
- iobuf->ptr = iobuf->free_ptr;
+ iobuf->ptr = GF_ALIGN_BUF(iobuf->free_ptr, GF_IOBUF_ALIGN_SIZE);
+ iobuf->iobuf_arena = iobuf_arena;
LOCK_INIT(&iobuf->lock);
- iobuf->page_size = page_size;
- iobuf->iobuf_pool = iobuf_pool;
-
/* Hold a ref because you are allocating and using it */
- iobuf_ref(iobuf);
- GF_ATOMIC_INC(iobuf_pool->active_cnt);
+ GF_ATOMIC_INIT(iobuf->ref, 1);
+
+ ret = 0;
out:
+ if (ret && iobuf) {
+ GF_FREE(iobuf->free_ptr);
+ GF_FREE(iobuf);
+ iobuf = NULL;
+ }
+
+ return iobuf;
+}
+
+struct iobuf *
+iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size)
+{
+ struct iobuf *iobuf = NULL;
+ size_t rounded_size = 0;
+ int index = 0;
+
+ if (page_size == 0) {
+ page_size = iobuf_pool->default_page_size;
+ }
+
+ rounded_size = gf_iobuf_get_pagesize(page_size);
+ if (rounded_size == -1) {
+ /* make sure to provide the requested buffer with standard
+ memory allocations */
+ iobuf = iobuf_get_from_stdalloc(iobuf_pool, page_size);
+
+ gf_msg_debug("iobuf", 0,
+ "request for iobuf of size %zu "
+ "is serviced using standard calloc() (%p) as it "
+ "exceeds the maximum available buffer size",
+ page_size, iobuf);
+
+ iobuf_pool->request_misses++;
+ return iobuf;
+ }
+
+ index = gf_iobuf_get_arena_index(page_size);
+ if (index == -1) {
+ gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size (%zu) of iobufs in arena being added is "
+ "greater than max available",
+ page_size);
+ return NULL;
+ }
+
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ iobuf = __iobuf_get(iobuf_pool, rounded_size, index);
+ if (!iobuf) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
+ "iobuf not found");
+ goto unlock;
+ }
+
+ iobuf_ref(iobuf);
+ }
+unlock:
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+
return iobuf;
}
@@ -99,13 +607,23 @@ iobuf_get_page_aligned(struct iobuf_pool *iobuf_pool, size_t page_size,
req_size = page_size;
if (req_size == 0) {
- req_size = GF_IOBUF_DEFAULT_PAGE_SIZE;
+ req_size = iobuf_pool->default_page_size;
}
iobuf = iobuf_get2(iobuf_pool, req_size + align_size);
if (!iobuf)
return NULL;
-
+ /* If std allocation was used, then free_ptr will be non-NULL. In this
+ * case, we do not want to modify the original free_ptr.
+ * On the other hand, if the buf was gotten through the available
+ * arenas, then we use iobuf->free_ptr to store the original
+ * pointer to the offset into the mmap'd block of memory and in turn
+ * reuse iobuf->ptr to hold the page-aligned address. And finally, in
+ * iobuf_put(), we copy iobuf->free_ptr into iobuf->ptr - back to where
+ * it was originally when __iobuf_get() returned this iobuf.
+ */
+ if (!iobuf->free_ptr)
+ iobuf->free_ptr = iobuf->ptr;
iobuf->ptr = GF_ALIGN_BUF(iobuf->ptr, align_size);
return iobuf;
@@ -114,22 +632,118 @@ iobuf_get_page_aligned(struct iobuf_pool *iobuf_pool, size_t page_size,
struct iobuf *
iobuf_get(struct iobuf_pool *iobuf_pool)
{
- return iobuf_get2(iobuf_pool, GF_IOBUF_DEFAULT_PAGE_SIZE);
+ struct iobuf *iobuf = NULL;
+ int index = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+
+ index = gf_iobuf_get_arena_index(iobuf_pool->default_page_size);
+ if (index == -1) {
+ gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size (%zu) of iobufs in arena being added is "
+ "greater than max available",
+ iobuf_pool->default_page_size);
+ return NULL;
+ }
+
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ iobuf = __iobuf_get(iobuf_pool, iobuf_pool->default_page_size, index);
+ if (!iobuf) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
+ "iobuf not found");
+ goto unlock;
+ }
+
+ iobuf_ref(iobuf);
+ }
+unlock:
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+
+out:
+ return iobuf;
+}
+
+void
+__iobuf_put(struct iobuf *iobuf, struct iobuf_arena *iobuf_arena)
+{
+ struct iobuf_pool *iobuf_pool = NULL;
+ int index = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
+
+ iobuf_pool = iobuf_arena->iobuf_pool;
+
+ index = gf_iobuf_get_arena_index(iobuf_arena->page_size);
+ if (index == -1) {
+ gf_msg_debug("iobuf", 0,
+ "freeing the iobuf (%p) "
+ "allocated with standard calloc()",
+ iobuf);
+
+ /* free up properly without bothering about lists and all */
+ LOCK_DESTROY(&iobuf->lock);
+ GF_FREE(iobuf->free_ptr);
+ GF_FREE(iobuf);
+ return;
+ }
+
+ if (iobuf_arena->passive_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add_tail(&iobuf_arena->list, &iobuf_pool->arenas[index]);
+ }
+
+ list_del_init(&iobuf->list);
+ iobuf_arena->active_cnt--;
+
+ if (iobuf->free_ptr) {
+ iobuf->ptr = iobuf->free_ptr;
+ iobuf->free_ptr = NULL;
+ }
+
+ list_add(&iobuf->list, &iobuf_arena->passive.list);
+ iobuf_arena->passive_cnt++;
+
+ if (iobuf_arena->active_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add_tail(&iobuf_arena->list, &iobuf_pool->purge[index]);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+ __iobuf_arena_prune(iobuf_pool, iobuf_arena, index);
+ }
+out:
+ return;
}
void
iobuf_put(struct iobuf *iobuf)
{
- LOCK_DESTROY(&iobuf->lock);
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
- if (iobuf->stdalloc)
- GF_FREE(iobuf->free_ptr);
- else
- mem_put(iobuf->free_ptr);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
+
+ iobuf_arena = iobuf->iobuf_arena;
+ if (!iobuf_arena) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
+ "arena not found");
+ return;
+ }
+
+ iobuf_pool = iobuf_arena->iobuf_pool;
+ if (!iobuf_pool) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND,
+ "iobuf pool not found");
+ return;
+ }
- GF_ATOMIC_DEC(iobuf->iobuf_pool->active_cnt);
- mem_put(iobuf);
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ __iobuf_put(iobuf, iobuf_arena);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+out:
return;
}
@@ -353,10 +967,25 @@ out:
size_t
iobuf_size(struct iobuf *iobuf)
{
- if (!iobuf)
- return 0;
+ size_t size = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- return iobuf->page_size;
+ if (!iobuf->iobuf_arena) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
+ "arena not found");
+ goto out;
+ }
+
+ if (!iobuf->iobuf_arena->iobuf_pool) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND,
+ "pool not found");
+ goto out;
+ }
+
+ size = iobuf->iobuf_arena->page_size;
+out:
+ return size;
}
size_t
@@ -381,20 +1010,113 @@ out:
}
void
+iobuf_info_dump(struct iobuf *iobuf, const char *key_prefix)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+ struct iobuf my_iobuf;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
+
+ ret = TRY_LOCK(&iobuf->lock);
+ if (ret) {
+ return;
+ }
+ memcpy(&my_iobuf, iobuf, sizeof(my_iobuf));
+ UNLOCK(&iobuf->lock);
+
+ gf_proc_dump_build_key(key, key_prefix, "ref");
+ gf_proc_dump_write(key, "%" GF_PRI_ATOMIC, GF_ATOMIC_GET(my_iobuf.ref));
+ gf_proc_dump_build_key(key, key_prefix, "ptr");
+ gf_proc_dump_write(key, "%p", my_iobuf.ptr);
+
+out:
+ return;
+}
+
+void
+iobuf_arena_info_dump(struct iobuf_arena *iobuf_arena, const char *key_prefix)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 1;
+ struct iobuf *trav;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
+
+ gf_proc_dump_build_key(key, key_prefix, "mem_base");
+ gf_proc_dump_write(key, "%p", iobuf_arena->mem_base);
+ gf_proc_dump_build_key(key, key_prefix, "active_cnt");
+ gf_proc_dump_write(key, "%d", iobuf_arena->active_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "passive_cnt");
+ gf_proc_dump_write(key, "%d", iobuf_arena->passive_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "alloc_cnt");
+ gf_proc_dump_write(key, "%" PRIu64, iobuf_arena->alloc_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "max_active");
+ gf_proc_dump_write(key, "%d", iobuf_arena->max_active);
+ gf_proc_dump_build_key(key, key_prefix, "page_size");
+ gf_proc_dump_write(key, "%" GF_PRI_SIZET, iobuf_arena->page_size);
+ list_for_each_entry(trav, &iobuf_arena->active.list, list)
+ {
+ gf_proc_dump_build_key(key, key_prefix, "active_iobuf.%d", i++);
+ gf_proc_dump_add_section("%s", key);
+ iobuf_info_dump(trav, key);
+ }
+
+out:
+ return;
+}
+
+void
iobuf_stats_dump(struct iobuf_pool *iobuf_pool)
{
+ char msg[1024];
+ struct iobuf_arena *trav = NULL;
+ int i = 1;
+ int j = 0;
+ int ret = -1;
+
GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+ ret = pthread_mutex_trylock(&iobuf_pool->mutex);
+
+ if (ret) {
+ return;
+ }
gf_proc_dump_add_section("iobuf.global");
gf_proc_dump_write("iobuf_pool", "%p", iobuf_pool);
- gf_proc_dump_write("iobuf_pool.default_page_size", "%llu",
- GF_IOBUF_DEFAULT_PAGE_SIZE);
- gf_proc_dump_write("iobuf_pool.request_hits", "%" PRId64,
- GF_ATOMIC_GET(iobuf_pool->mem_pool_hit));
+ gf_proc_dump_write("iobuf_pool.default_page_size", "%" GF_PRI_SIZET,
+ iobuf_pool->default_page_size);
+ gf_proc_dump_write("iobuf_pool.arena_size", "%" GF_PRI_SIZET,
+ iobuf_pool->arena_size);
+ gf_proc_dump_write("iobuf_pool.arena_cnt", "%d", iobuf_pool->arena_cnt);
gf_proc_dump_write("iobuf_pool.request_misses", "%" PRId64,
- GF_ATOMIC_GET(iobuf_pool->mem_pool_miss));
- gf_proc_dump_write("iobuf_pool.active_cnt", "%" PRId64,
- GF_ATOMIC_GET(iobuf_pool->active_cnt));
+ iobuf_pool->request_misses);
+
+ for (j = 0; j < IOBUF_ARENA_MAX_INDEX; j++) {
+ list_for_each_entry(trav, &iobuf_pool->arenas[j], list)
+ {
+ snprintf(msg, sizeof(msg), "arena.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
+ }
+ list_for_each_entry(trav, &iobuf_pool->purge[j], list)
+ {
+ snprintf(msg, sizeof(msg), "purge.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
+ }
+ list_for_each_entry(trav, &iobuf_pool->filled[j], list)
+ {
+ snprintf(msg, sizeof(msg), "filled.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
+ }
+ }
+
+ pthread_mutex_unlock(&iobuf_pool->mutex);
out:
return;
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index 4f46b1e3ca0..81badc0ba0b 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -920,7 +920,7 @@ mem_pool_get(unsigned long sizeof_type, gf_boolean_t *hit)
sizeof_type |= (1 << POOL_SMALLEST) - 1;
power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1;
if (power > POOL_LARGEST) {
- gf_msg_callingfn("mem-pool", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
"invalid argument");
return NULL;
}