summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src/rot-buffs.h
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-02-03 19:03:30 +0530
committerVijay Bellur <vbellur@redhat.com>2015-03-18 21:22:33 -0700
commit87a9d23627586a5d5cd8a502a08ecbdb6f0f7bb2 (patch)
tree7e627c0810e151177364f66a9442529d1e95826f /libglusterfs/src/rot-buffs.h
parentd236b01a8bf68b83c76ea1cfa8351833e19695f7 (diff)
libglusterfs/rot-buffs: rotational buffers
This patch introduces rotational buffers aiming at the classic multiple producer and multiple consumer problem. A fixed set of buffer list is allocated during initialization, where each list consist of a list of buffers. Each buffer is an iovec pointing to a memory region of fixed allocation size. Multiple producers write data to these buffers. A buffer list starts with a single buffer (iovec) and allocates more when required (although this can be preallocatd in multiples of k). rot-buffs allow multiple producers to write data parallely with a bit of extra cost of taking locks. Therefore, it's much suited for large writes. Multiple producers are allowed to write in the buffer parallely by "reserving" write space for selected number of bytes and returning pointer to the start of the reserved area. The write size is selected by the producer before it starts the write (which is often known). Therefore, the write itself need not be serialized -- just the space reservation needs to be done safely. The other part is when a consumer kicks in to consume what has been produced. At this point, a buffer list switch is performed. The "current" buffer list pointer is safely pointed to the next available buffer list. New writes are now directed to the just switched buffer list (the old buffer list is now considered out of rotation). Note that the old buffer still may have producers in progress (pending writes), so the consumer has to wait till the writers are drained. Currently this is the slow path for producers (write completion) and needs to be improved. Currently, there is special handling for cases where the number of consumers match (or exceed) the number of producers, which could result in writer starvation. In this scenario, when a consumers requests a buffer list for consumption, a check is performed for writer starvation and consumption is denied until at least another buffer list is ready of the producer for writes, i.e., one (or more) consumer(s) completed, thereby putting the buffer list back in rotation. [ NOTE: I've not performance tested this producer-consumer model yet. It's being used in changelog for event notification. The list of buffers (iovecs) are directly passed to RPC layer. ] Change-Id: I88d235522b05ab82509aba861374a2312bff57f2 BUG: 1170075 Signed-off-by: Venky Shankar <vshankar@redhat.com> Reviewed-on: http://review.gluster.org/9706 Tested-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'libglusterfs/src/rot-buffs.h')
-rw-r--r--libglusterfs/src/rot-buffs.h121
1 files changed, 121 insertions, 0 deletions
diff --git a/libglusterfs/src/rot-buffs.h b/libglusterfs/src/rot-buffs.h
new file mode 100644
index 00000000000..aac24a4f571
--- /dev/null
+++ b/libglusterfs/src/rot-buffs.h
@@ -0,0 +1,121 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ROT_BUFFS_H
+#define __ROT_BUFFS_H
+
+#include "list.h"
+#include "locking.h"
+#include "common-utils.h"
+
+typedef struct rbuf_iovec {
+ struct iovec iov;
+
+ struct list_head list;
+} rbuf_iovec_t;
+
+#define RBUF_IOVEC_SIZE (sizeof (rbuf_iovec_t))
+
+typedef struct rbuf_list {
+ gf_lock_t c_lock;
+
+ pthread_mutex_t b_lock; /* protects this structure */
+ pthread_cond_t b_cond; /* signal for writer completion */
+
+ gf_boolean_t awaiting;
+
+ unsigned long long pending; /* pending writers */
+ unsigned long long completed; /* completed writers */
+
+ rbuf_iovec_t *rvec; /* currently used IO vector */
+
+ struct list_head veclist; /* list of attached rbuf_iov */
+
+ unsigned long long used; /* consumable entries
+ attached in ->veclist */
+ unsigned long long total; /* total entries in ->veclist (used
+ during deallocation) */
+
+ unsigned long seq[2]; /* if interested, this whould store
+ the start sequence number and the
+ range */
+
+ struct list_head list; /* attachment to rbuf_t */
+} rbuf_list_t;
+
+struct rlist_iter {
+ struct list_head veclist;
+
+ unsigned long long iter;
+};
+
+#define RLIST_ENTRY_COUNT(rlist) rlist->used
+
+#define rlist_iter_init(riter, rlist) \
+ do { \
+ (riter)->iter = rlist->used; \
+ (riter)->veclist = rlist->veclist; \
+ } while (0)
+
+#define rvec_for_each_entry(pos, riter) \
+ for (pos = list_entry \
+ ((riter)->veclist.next, typeof(*pos), list); \
+ (riter)->iter > 0; \
+ pos = list_entry \
+ (pos->list.next, typeof(*pos), list), \
+ --((riter)->iter))
+
+/**
+ * Sequence number assigment routine is called during buffer
+ * switch under rbuff ->lock.
+ */
+typedef void (sequence_fn) (rbuf_list_t *, void *);
+
+#define RLIST_STORE_SEQ(rlist, start, range) \
+ do { \
+ rlist->seq[0] = start; \
+ rlist->seq[1] = range; \
+ } while (0)
+
+#define RLIST_GET_SEQ(rlist, start, range) \
+ do { \
+ start = rlist->seq[0]; \
+ range = rlist->seq[1]; \
+ } while (0)
+
+typedef struct rbuf {
+ gf_lock_t lock; /* protects "current" rlist */
+
+ rbuf_list_t *current; /* cached pointer to first free rlist */
+
+ struct list_head freelist;
+} rbuf_t;
+
+typedef enum {
+ RBUF_CONSUMABLE = 1,
+ RBUF_BUSY,
+ RBUF_EMPTY,
+ RBUF_WOULD_STARVE,
+} rlist_retval_t;
+
+/* Initialization/Destruction */
+rbuf_t *rbuf_init (int);
+void rbuf_dtor (rbuf_t *);
+
+/* Producer API */
+char *rbuf_reserve_write_area (rbuf_t *, size_t, void **);
+int rbuf_write_complete (void *);
+
+/* Consumer API */
+int rbuf_get_buffer (rbuf_t *, void **, sequence_fn *, void *);
+int rbuf_wait_for_completion (rbuf_t *, void *,
+ void (*)(rbuf_list_t *, void *), void *);
+
+#endif