summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libglusterfs/src/globals.h4
-rw-r--r--xlators/cluster/afr/src/afr-common.c55
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c73
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c26
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c391
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h4
-rw-r--r--xlators/cluster/afr/src/afr.c12
-rw-r--r--xlators/cluster/afr/src/afr.h38
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c7
9 files changed, 546 insertions, 64 deletions
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index ab25c7f0ec1..e186d58cea6 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -68,6 +68,10 @@
#define GD_OP_VERSION_3_8_0 30800 /* Op-version for GlusterFS 3.8.0 */
+#define GD_OP_VERSION_3_8_3 30803 /* Op-version for GlusterFS 3.8.3 */
+
+#define GD_OP_VERSION_3_8_4 30804 /* Op-version for GlusterFS 3.8.4 */
+
#define GD_OP_VERSION_3_9_0 30900 /* Op-version for GlusterFS 3.9.0 */
#define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index dec667fd460..cf838846cbd 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -42,6 +42,7 @@
#include "afr-self-heal.h"
#include "afr-self-heald.h"
#include "afr-messages.h"
+#include "compound-fop-utils.h"
gf_boolean_t
afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv,
@@ -4592,6 +4593,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
local->need_full_crawl = _gf_false;
+ local->compound = _gf_false;
INIT_LIST_HEAD (&local->healer);
return 0;
out:
@@ -4743,6 +4745,7 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->pending)
goto out;
+ local->compound = _gf_false;
INIT_LIST_HEAD (&local->transaction.eager_locked);
ret = 0;
@@ -5536,3 +5539,55 @@ afr_get_msg_id (char *op_type)
return AFR_MSG_ADD_BRICK_STATUS;
return -1;
}
+
+gf_boolean_t
+afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop)
+{
+ if (priv->arbiter_count != 0)
+ return _gf_false;
+
+ if (!priv->use_compound_fops)
+ return _gf_false;
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+}
+
+afr_compound_cbk_t
+afr_pack_fop_args (call_frame_t *frame, compound_args_t *args,
+ glusterfs_fop_t fop, int index)
+{
+ afr_local_t *local = frame->local;
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ COMPOUND_PACK_ARGS (writev, GF_FOP_WRITE,
+ args, index,
+ local->fd, local->cont.writev.vector,
+ local->cont.writev.count,
+ local->cont.writev.offset,
+ local->cont.writev.flags,
+ local->cont.writev.iobref,
+ local->xdata_req);
+ return afr_pre_op_writev_cbk;
+ default:
+ break;
+ }
+ return NULL;
+}
+
+void
+afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
+ dict_t *newloc_xdata)
+{
+ if (args)
+ compound_args_cleanup (args);
+ if (xdata)
+ dict_unref (xdata);
+ if (newloc_xdata)
+ dict_unref (newloc_xdata);
+}
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 24ab52f0266..200b420f5f0 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -292,21 +292,16 @@ afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
}
}
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+void
+afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- call_frame_t *fop_frame = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
int ret = 0;
+ afr_local_t *local = frame->local;
uint32_t open_fd_count = 0;
uint32_t write_is_append = 0;
- local = frame->local;
-
LOCK (&frame->lock);
{
__afr_inode_write_fill (frame, this, child_index, op_ret,
@@ -324,32 +319,60 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&open_fd_count);
if (ret == -1)
goto unlock;
- if ((open_fd_count > local->open_fd_count)) {
- local->open_fd_count = open_fd_count;
- local->update_open_fd_count = _gf_true;
+ if (open_fd_count > local->open_fd_count) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
}
}
unlock:
UNLOCK (&frame->lock);
+}
- call_count = afr_frame_return (frame);
+void
+afr_process_post_writev (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- if (call_count == 0) {
- if (!local->stable_write && !local->append_write)
- /* An appended write removes the necessity to
- fsync() the file. This is because self-heal
- has the logic to check for larger file when
- the xattrs are not reliably pointing at
- a stale file.
- */
- afr_fd_report_unstable_write (this, local->fd);
+ local = frame->local;
- __afr_inode_write_finalize (frame, this);
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ __afr_inode_write_finalize (frame, this);
- afr_writev_handle_short_writes (frame, this);
+ afr_writev_handle_short_writes (frame, this);
- if (local->update_open_fd_count)
- afr_handle_open_fd_count (frame, this);
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+}
+
+int
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int ret = 0;
+
+ local = frame->local;
+
+ afr_inode_write_fill (frame, this, child_index, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_process_post_writev (frame, this);
if (!afr_txn_nothing_failed (frame, this)) {
//Don't unwind until post-op is complete
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 718ba318cfe..233672898f1 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -463,8 +463,8 @@ transaction_lk_op (afr_local_t *local)
}
-static int
-is_afr_lock_transaction (afr_local_t *local)
+int
+afr_is_inodelk_transaction(afr_local_t *local)
{
int ret = 0;
@@ -636,13 +636,25 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+void
+afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock,
+ int32_t child_index)
+{
+ afr_inodelk_t *inodelk = NULL;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+
+}
+
static int32_t
afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
int32_t child_index = (long)cookie;
afr_private_t *priv = NULL;
@@ -665,11 +677,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
lkowner_utoa (&frame->root->lk_owner));
}
-
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- inodelk->locked_nodes[child_index] &= LOCKED_NO;
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
+ afr_update_uninodelk (local, int_lock, child_index);
afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
@@ -1712,7 +1720,7 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
local = frame->local;
if (transaction_lk_op (local)) {
- if (is_afr_lock_transaction (local))
+ if (afr_is_inodelk_transaction(local))
afr_unlock_inodelk (frame, this);
else
afr_unlock_entrylk (frame, this);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 64a42d9fc7e..db7c6dbd2b0 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -17,6 +17,7 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
+#include "compound-fop-utils.h"
#include <signal.h>
@@ -32,6 +33,14 @@ gf_boolean_t
afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);
int
+afr_changelog_call_count (afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned int child_count);
+int
+afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op);
+int
afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
afr_changelog_resume_t changelog_resume,
afr_xattrop_type_t op);
@@ -821,14 +830,16 @@ afr_handle_quorum (call_frame_t *frame)
int
afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = this->private;
- int i = 0;
- int ret = 0;
- int idx = 0;
- afr_local_t * local = NULL;
- dict_t *xattr = NULL;
- int nothing_failed = 1;
- gf_boolean_t need_undirty = _gf_false;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ int ret = 0;
+ int idx = 0;
+ int nothing_failed = 1;
+ int piggyback = 0;
+ gf_boolean_t need_undirty = _gf_false;
afr_handle_quorum (frame);
local = frame->local;
@@ -893,8 +904,34 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
goto out;
}
- afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done,
- AFR_TRANSACTION_POST_OP);
+ if (local->compound && local->fd) {
+ LOCK (&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (local->fd, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ local->transaction.eager_lock[i]) {
+ if (fd_ctx->lock_piggyback[i])
+ piggyback = 1;
+ }
+ if (piggyback == 1)
+ break;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+ }
+
+ /* Do not compound if any brick got piggybacked lock as
+ * unlock should not be done for that. */
+ if (local->compound && !piggyback) {
+ afr_post_op_unlock_do (frame, this, xattr,
+ afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
+ } else {
+ afr_changelog_do (frame, this, xattr,
+ afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
+ }
out:
if (xattr)
dict_unref (xattr);
@@ -1189,6 +1226,299 @@ out:
}
int
+afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ void *data, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *fop_frame = NULL;
+ default_args_cbk_t *write_args_cbk = NULL;
+ compound_args_cbk_t *args_cbk = data;
+ int call_count = -1;
+ int child_index = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ child_index = (long) cookie;
+
+ if (local->pre_op_compat)
+ afr_changelog_pre_op_update (frame, this);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+ write_args_cbk = &args_cbk->rsp_list[1];
+ afr_inode_write_fill (frame, this, (long) i, write_args_cbk->op_ret,
+ write_args_cbk->op_errno,
+ &write_args_cbk->prestat,
+ &write_args_cbk->poststat,
+ write_args_cbk->xdata);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_process_post_writev (frame, this);
+ if (!afr_txn_nothing_failed (frame, this)) {
+ /* Don't unwind until post-op is complete */
+ local->transaction.resume (frame, this);
+ } else {
+ /* frame change, place frame in post-op delay and unwind */
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
+ }
+ return 0;
+}
+
+int
+afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op, dict_t **xdata,
+ dict_t **newloc_xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ *call_count = afr_changelog_call_count (local->transaction.type,
+ local->transaction.pre_op,
+ priv->child_count);
+
+ if (*call_count == 0) {
+ changelog_resume (frame, this);
+ return -1;
+ }
+
+ afr_changelog_populate_xdata (frame, op, xdata, newloc_xdata);
+ local->call_count = *call_count;
+
+ local->transaction.changelog_resume = changelog_resume;
+ return 0;
+}
+
+int
+afr_pre_op_fop_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ dict_t *newloc_xdata = NULL;
+ compound_args_t *args = NULL;
+ int i = 0, call_count = 0;
+ afr_compound_cbk_t compound_cbk;
+ int ret = 0;
+ int op_errno = ENOMEM;
+
+ local = frame->local;
+ priv = this->private;
+
+ /* If lock failed on all, just unlock and unwind */
+ ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+ op, &xdata, &newloc_xdata);
+
+ if (ret)
+ return 0;
+
+ local->call_count = call_count;
+
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
+
+ args = compound_fop_alloc (2, GF_CFOP_XATTROP_WRITEV, NULL);
+
+ if (!args)
+ goto err;
+
+ /* pack pre-op part */
+ i = 0;
+ COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP,
+ args, i,
+ local->fd, GF_XATTROP_ADD_ARRAY,
+ xattr, xdata);
+ i++;
+ /* pack whatever fop needs to be packed
+ * @compound_cbk holds the cbk that would need to be called
+ */
+ compound_cbk = afr_pack_fop_args (frame, args, local->op, i);
+
+ for (i = 0; i < priv->child_count; i++) {
+ /* Means lock did not succeed on this brick */
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, compound_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->compound,
+ args,
+ NULL);
+ if (!--call_count)
+ break;
+ }
+
+ afr_compound_cleanup (args, xdata, newloc_xdata);
+ return 0;
+err:
+ local->internal_lock.lock_cbk = local->transaction.done;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_restore_lk_owner (frame);
+ afr_unlock (frame, this);
+
+ afr_compound_cleanup (args, xdata, newloc_xdata);
+ return 0;
+}
+
+int
+afr_post_op_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ void *data, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ compound_args_cbk_t *args_cbk = data;
+ int call_count = -1;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ int32_t child_index = (long)cookie;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ child_index = (long) cookie;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ afr_update_uninodelk (local, int_lock, child_index);
+
+ LOCK (&frame->lock);
+ {
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+ gf_msg_trace (this->name, 0,
+ "All internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ dict_t *newloc_xdata = NULL;
+ compound_args_t *args = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ struct gf_flock *flock_use = NULL;
+ int i = 0;
+ int call_count = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+
+ if (afr_is_inodelk_transaction(local)) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = F_UNLCK;
+ full_flock.l_type = F_UNLCK;
+
+ }
+
+ ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+ op, &xdata, &newloc_xdata);
+
+ if (ret)
+ return 0;
+
+ int_lock->lk_call_count = call_count;
+
+ int_lock->lock_cbk = local->transaction.done;
+
+ args = compound_fop_alloc (2, GF_CFOP_XATTROP_UNLOCK, NULL);
+
+ if (!args) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ i = 0;
+ COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP,
+ args, i,
+ local->fd, GF_XATTROP_ADD_ARRAY,
+ xattr, xdata);
+ i++;
+ if (!local->transaction.eager_lock_on)
+ flock_use = &flock;
+ else
+ flock_use = &full_flock;
+
+ if (afr_is_inodelk_transaction(local)) {
+ if (local->fd) {
+ COMPOUND_PACK_ARGS (finodelk, GF_FOP_FINODELK,
+ args, i,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+ } else {
+ COMPOUND_PACK_ARGS (inodelk, GF_FOP_INODELK,
+ args, i,
+ int_lock->domain, &local->loc,
+ F_SETLK, flock_use, NULL);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ /* pre_op[i] has to be true for all nodes that were
+ * successfully locked. */
+ if (!local->transaction.pre_op[i])
+ continue;
+ STACK_WIND_COOKIE (frame, afr_post_op_unlock_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->compound,
+ args,
+ NULL);
+ if (!--call_count)
+ break;
+ }
+out:
+ afr_compound_cleanup (args, xdata, newloc_xdata);
+ return 0;
+}
+
+int
afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
afr_changelog_resume_t changelog_resume,
afr_xattrop_type_t op)
@@ -1199,23 +1529,16 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
dict_t *newloc_xdata = NULL;
int i = 0;
int call_count = 0;
+ int ret = 0;
local = frame->local;
priv = this->private;
- call_count = afr_changelog_call_count (local->transaction.type,
- local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- changelog_resume (frame, this);
- return 0;
- }
-
- afr_changelog_populate_xdata (frame, op, &xdata, &newloc_xdata);
- local->call_count = call_count;
+ ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+ op, &xdata, &newloc_xdata);
- local->transaction.changelog_resume = changelog_resume;
+ if (ret)
+ return 0;
for (i = 0; i < priv->child_count; i++) {
if (!local->transaction.pre_op[i])
@@ -1380,8 +1703,21 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
goto next;
}
- afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop,
- AFR_TRANSACTION_PRE_OP);
+ /* Till here we have already decided if pre-op needs to be done,
+ * based on various criteria. The only thing that needs to be checked
+ * now on is whether compound-fops is enabled or not.
+ * If it is, then perform pre-op and fop together for writev op.
+ */
+ if (afr_can_compound_pre_op_and_op (priv, local->op)) {
+ local->compound = _gf_true;
+ afr_pre_op_fop_do (frame, this, xdata_req,
+ afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
+ } else {
+ afr_changelog_do (frame, this, xdata_req,
+ afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
+ }
if (xdata_req)
dict_unref (xdata_req);
@@ -1738,10 +2074,6 @@ out:
void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
- call_stub_t *stub);
-
-void
afr_delayed_changelog_wake_up_cbk (void *data)
{
fd_t *fd = NULL;
@@ -2031,7 +2363,6 @@ afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
afr_delayed_changelog_post_op (this, NULL, fd, NULL);
}
-
int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
@@ -2082,7 +2413,7 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
- static gf_boolean_t
+static gf_boolean_t
afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
{
uint64_t start1 = local1->transaction.start;
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index ca8fcfefa89..db8245691ba 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -59,4 +59,8 @@ void
afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
inode_t *inode1, unsigned char *readable1,
inode_t *inode2, unsigned char *readable2);
+int
+afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ void *data, dict_t *xdata);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index bf5a0d16fc7..24526313e89 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -210,6 +210,9 @@ reconfigure (xlator_t *this, dict_t *options)
out);
GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,
out);
+ GF_OPTION_RECONF ("use-compound-fops", priv->use_compound_fops,
+ options, bool,
+ out);
GF_OPTION_RECONF ("granular-entry-heal", priv->esh_granular, options,
bool, out);
@@ -479,6 +482,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out);
+ GF_OPTION_INIT ("use-compound-fops", priv->use_compound_fops,
+ bool, out);
GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out);
GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
@@ -1004,5 +1009,12 @@ struct volume_options options[] = {
.description = "If this option is enabled, i/o will fail even if "
"one of the bricks is down in the replicas",
},
+ { .key = {"use-compound-fops"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Use compound fops framework to modify afr "
+ "transaction such that network roundtrips are "
+ "reduced, thus improving the performance.",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 983f07fcce9..ff136c0b093 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -45,6 +45,10 @@ typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int
typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
+typedef int (*afr_compound_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ void *data, dict_t *xdata);
+
#define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})
#define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})
#define AFR_CMP(a1,a2,len) ({int __cmp = 0; int __i; for (__i = 0; __i < len; __i++) if (a1[__i] != a2[__i]) { __cmp = 1; break;} __cmp;})
@@ -153,6 +157,7 @@ typedef struct _afr_private {
char *locking_scheme;
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
+ gf_boolean_t use_compound_fops;
} afr_private_t;
@@ -786,6 +791,7 @@ typedef struct _afr_local {
call_frame_t *heal_frame;
gf_boolean_t need_full_crawl;
+ gf_boolean_t compound;
} afr_local_t;
@@ -1181,4 +1187,36 @@ afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv,
void
afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret,
int32_t *op_errno);
+
+void
+afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+void
+afr_process_post_writev (call_frame_t *frame, xlator_t *this);
+
+void
+afr_writev_unwind (call_frame_t *frame, xlator_t *this);
+
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame);
+
+void
+afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock,
+ int32_t child_index);
+gf_boolean_t
+afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop);
+
+afr_compound_cbk_t
+afr_pack_fop_args (call_frame_t *frame, compound_args_t *args,
+ glusterfs_fop_t fop, int index);
+int
+afr_is_inodelk_transaction(afr_local_t *local);
+
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+
+void
+afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
+ dict_t *newloc_xdata);
#endif /* __AFR_H__ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 5a00811ec6f..ce34ffd2b05 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3029,6 +3029,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_9_0,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "cluster.use-compound-fops",
+ .voltype = "cluster/replicate",
+ .value = "off",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_8_4,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = NULL
}
};