summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorAshish Pandey <aspandey@redhat.com>2018-12-21 14:31:15 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2019-02-18 14:39:55 +0000
commitb2f561c069d38f5075907b9f5a255f585a602c79 (patch)
tree0aa90b7120673e2f701d798c2f288d168fe38909 /xlators/cluster
parent183cac3a642ae4a1b750bc11673045fc0ff66a6d (diff)
cluster/thin-arbiter: Consider thin-arbiter before marking new entry changelog
If a fop to create an entry fails on one of the data brick, we mark the pending changelog on the entry on brick for which it was successful. This is done as part of post op phase to make sure that entry gets healed even if it gets renamed to some other path where its parent was not marked as bad. As it happens as part of post op, we should consider thin-arbiter to check if the brick, which was successful, is the good brick or not. This will avoide split brain and other issues. >Change-Id: I12686675be98f02f70a5186b3ed748c541514d53 >Signed-off-by: Ashish Pandey <aspandey@redhat.com> Change-Id: I12686675be98f02f70a5186b3ed748c541514d53 updates: bz#1672314 Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-common.c1
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c6
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c94
-rw-r--r--xlators/cluster/afr/src/afr.h5
4 files changed, 87 insertions, 19 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 54889e0a9b1..f030de5e5cc 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -5694,6 +5694,7 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
local->ta_child_up = priv->ta_child_up;
local->ta_failed_subvol = AFR_CHILD_UNKNOWN;
}
+ local->is_new_entry = _gf_false;
INIT_LIST_HEAD(&local->healer);
return 0;
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 5725b1c5cb3..a9a8e91c898 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -365,6 +365,12 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
if (pre_op_count == priv->child_count && !failed_count)
return;
+ if (priv->thin_arbiter_count) {
+ /*Mark new entry using ta file*/
+ local->is_new_entry = _gf_true;
+ return;
+ }
+
afr_mark_new_entry_changelog(frame, this);
return;
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index fb78c198d9c..74c55f99ec2 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -28,6 +28,12 @@ typedef enum {
static void
afr_lock_resume_shared(struct list_head *list);
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this);
+
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this);
+
void
__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared);
@@ -660,6 +666,7 @@ afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int **pending)
return ret;
}
+
static void
afr_ta_dom_lock_check_and_release(afr_local_t *local, xlator_t *this)
{
@@ -739,9 +746,9 @@ afr_ta_process_onwireq(afr_local_t *local, xlator_t *this)
--priv->ta_on_wire_txn_count;
UNLOCK(&priv->lock);
if (entry->ta_failed_subvol == bad_child) {
- afr_changelog_post_op_do(entry->transaction.frame, this);
+ afr_post_op_handle_success(entry->transaction.frame, this);
} else {
- afr_changelog_post_op_fail(entry->transaction.frame, this, EIO);
+ afr_post_op_handle_failure(entry->transaction.frame, this);
}
}
}
@@ -1187,6 +1194,34 @@ afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
return 0;
}
+int **
+afr_set_changelog_xattr(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, afr_local_t *local)
+{
+ int **changelog = NULL;
+ int idx = 0;
+ int i;
+
+ if (local->is_new_entry == _gf_true) {
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ } else {
+ idx = afr_index_for_transaction_type(local->transaction.type);
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog) {
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ changelog[i][idx] = hton32(1);
+ }
+ afr_set_pending_dict(priv, xattr, changelog);
+ }
+
+out:
+ return changelog;
+}
+
static int
afr_ta_post_op_do(void *opaque)
{
@@ -1195,13 +1230,13 @@ afr_ta_post_op_do(void *opaque)
xlator_t *this = NULL;
call_frame_t *txn_frame = NULL;
dict_t *xattr = NULL;
- int **pending = NULL;
+ unsigned char *pending = NULL;
+ int **changelog = NULL;
int failed_subvol = -1;
int success_subvol = -1;
loc_t loc = {
0,
};
- int idx = 0;
int i = 0;
int ret = 0;
@@ -1209,7 +1244,6 @@ afr_ta_post_op_do(void *opaque)
txn_frame = local->transaction.frame;
this = txn_frame->this;
priv = this->private;
- idx = afr_index_for_transaction_type(local->transaction.type);
ret = afr_fill_ta_loc(this, &loc);
if (ret) {
@@ -1224,22 +1258,20 @@ afr_ta_post_op_do(void *opaque)
goto out;
}
- pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
- if (!pending) {
- ret = -ENOMEM;
- goto out;
- }
+ pending = alloca0(priv->child_count);
+
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.failed_subvols[i]) {
- pending[i][idx] = hton32(1);
+ pending[i] = 1;
failed_subvol = i;
} else {
success_subvol = i;
}
}
- ret = afr_set_pending_dict(priv, xattr, pending);
- if (ret < 0)
+ changelog = afr_set_changelog_xattr(priv, pending, xattr, local);
+
+ if (!changelog)
goto out;
ret = afr_ta_post_op_lock(this, &loc);
@@ -1271,16 +1303,16 @@ out:
if (xattr)
dict_unref(xattr);
- if (pending)
- afr_matrix_cleanup(pending, priv->child_count);
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
loc_wipe(&loc);
if (ret == 0) {
/*Mark pending xattrs on the up data brick.*/
- afr_changelog_post_op_do(local->transaction.frame, this);
+ afr_post_op_handle_success(local->transaction.frame, this);
} else {
- afr_changelog_post_op_fail(local->transaction.frame, this, -ret);
+ afr_post_op_handle_failure(local->transaction.frame, this);
}
return ret;
}
@@ -1359,6 +1391,28 @@ afr_ta_fill_failed_subvol(afr_private_t *priv, afr_local_t *local)
}
static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->is_new_entry == _gf_true) {
+ afr_mark_new_entry_changelog(frame, this);
+ }
+ afr_changelog_post_op_do(frame, this);
+
+ return;
+}
+
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this)
+{
+ afr_changelog_post_op_fail(frame, this, EIO);
+
+ return;
+}
+
+static void
afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
@@ -1380,10 +1434,12 @@ afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
/*Post releasing the notify lock, we will act on this queue*/
break;
case TA_INFO_IN_MEMORY_SUCCESS:
- afr_changelog_post_op_do(frame, this);
+ afr_post_op_handle_success(frame, this);
break;
case TA_INFO_IN_MEMORY_FAILED:
- afr_changelog_post_op_fail(frame, this, EIO);
+ afr_post_op_handle_failure(frame, this);
+ break;
+ default:
break;
}
return;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 6f8015380f0..6ddb992f448 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -880,6 +880,7 @@ typedef struct _afr_local {
struct list_head ta_onwireq;
afr_ta_fop_state_t fop_state;
int ta_failed_subvol;
+ gf_boolean_t is_new_entry;
} afr_local_t;
typedef struct afr_spbc_timeout {
@@ -1320,4 +1321,8 @@ afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local);
void
afr_ta_lock_release_synctask(xlator_t *this);
+
+void
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
+
#endif /* __AFR_H__ */