summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnuradha <atalur@redhat.com>2015-06-11 14:58:05 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-06-25 19:34:34 -0700
commitcd05e036117a27377af8ff4a1cdba09f1de0a8eb (patch)
tree43a970d8c3c31c71ef58937bb0eac5d5f6701876
parente472e5d279e57cbddd75f81775505810664f29b3 (diff)
cluster/afr : set pending xattrs for replaced brick
This patch is part two change to prevent data loss in a replicate volume on doing a replace-brick commit force operation. Problem: After doing replace-brick commit force, there is a chance that self heal might happen from the replaced (sink) brick rather than the source brick leading to data loss. Solution: Mark pending changelogs on afr children for the replaced afr-child so that heal is performed in the correct direction. Change-Id: Icb9807e49b4c1c4f1dcab115318d9a58ccf95675 BUG: 1207829 Signed-off-by: Anuradha Talur <atalur@redhat.com> Reviewed-on: http://review.gluster.org/10448 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
-rw-r--r--tests/basic/afr/replace-brick-self-heal.t64
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c217
-rw-r--r--xlators/cluster/afr/src/afr-messages.h12
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c16
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c7
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h8
6 files changed, 314 insertions, 10 deletions
diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t
new file mode 100644
index 00000000000..8ced7df3c76
--- /dev/null
+++ b/tests/basic/afr/replace-brick-self-heal.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Replace brick1
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
+
+# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
+
+# Check if pending xattr and dirty-xattr are set for replaced-brick
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
+
+# To make sure that files were not lost from brick0
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
+# To make sure that data was not lost from brick0
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 866294ea20e..ecbbe282da5 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -30,9 +30,10 @@
#include "compat-errno.h"
#include "compat.h"
#include "protocol-common.h"
-
+#include "byte-order.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
+#include "afr-messages.h"
static void
__afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
@@ -968,6 +969,179 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
}
int
+afr_rb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ i = (long) cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ gf_msg (this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
+ op_ret ? op_errno : 0,
+ AFR_MSG_REPLACE_BRICK_STATUS, "Set of pending xattr %s on"
+ " %s.", op_ret ? "failed" : "succeeded",
+ priv->children[i]->name);
+
+ syncbarrier_wake (&local->barrier);
+ return 0;
+}
+
+int
+afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_nodes)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0, i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ AFR_ONLIST (locked_nodes, frame, afr_rb_set_pending_changelog_cbk,
+ xattrop, &local->loc, GF_XATTROP_ADD_ARRAY,
+ local->xdata_req, NULL);
+
+ /* It is sufficient if xattrop was successful on one child */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret == 0) {
+ ret = 0;
+ goto out;
+ } else {
+ ret = afr_higher_errno (ret,
+ local->replies[i].op_errno);
+ }
+ }
+out:
+ return -ret;
+}
+
+int
+_afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame,
+ loc_t *loc, int rb_index,
+ afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_nodes = NULL;
+ int count = 0;
+ int ret = -ENOMEM;
+ int idx = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ locked_nodes = alloca0 (priv->child_count);
+
+ idx = afr_index_for_transaction_type (type);
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->pending[rb_index][idx] = hton32 (1);
+
+ local->xdata_req = dict_new ();
+ if (!local->xdata_req)
+ goto out;
+
+ ret = afr_set_pending_dict (priv, local->xdata_req, local->pending);
+ if (ret < 0)
+ goto out;
+
+ if (AFR_ENTRY_TRANSACTION == type) {
+ count = afr_selfheal_entrylk (frame, this, loc->inode,
+ this->name, NULL, locked_nodes);
+ } else {
+ count = afr_selfheal_inodelk (frame, this, loc->inode,
+ this->name, LLONG_MAX - 1, 0,
+ locked_nodes);
+ }
+
+ if (!count) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't acquire lock on"
+ " any child.");
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ ret = afr_rb_set_pending_changelog (frame, this, locked_nodes);
+ if (ret)
+ goto unlock;
+ ret = 0;
+unlock:
+ if (AFR_ENTRY_TRANSACTION == type) {
+ afr_selfheal_unentrylk (frame, this, loc->inode, this->name,
+ NULL, locked_nodes);
+ } else {
+ afr_selfheal_uninodelk (frame, this, loc->inode, this->name,
+ LLONG_MAX - 1, 0, locked_nodes);
+ }
+out:
+ return ret;
+}
+
+int
+_afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
+ int rb_index)
+{
+
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy (&local->loc, loc);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Child being replaced is : %s",
+ priv->children[rb_index]->name);
+
+ ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index,
+ AFR_METADATA_TRANSACTION);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dict_unref (local->xdata_req);
+ afr_matrix_cleanup (local->pending, priv->child_count);
+ local->pending = NULL;
+ local->xdata_req = NULL;
+
+ ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index,
+ AFR_ENTRY_TRANSACTION);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL);
+ return 0;
+}
+
+
+int
afr_split_brain_resolve_do (call_frame_t *frame, xlator_t *this, loc_t *loc,
char *data)
{
@@ -1165,6 +1339,43 @@ afr_handle_spb_choice_timeout (xlator_t *this, call_frame_t *frame,
return ret;
}
+int
+afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
+{
+ int ret = -1;
+ int rb_index = -1;
+ char *replace_brick = NULL;
+
+ ret = dict_get_str (dict, GF_AFR_REPLACE_BRICK, &replace_brick);
+
+ if (!ret) {
+ if (frame->root->pid != GF_CLIENT_PID_AFR_SELF_HEALD) {
+ ret = 1;
+ goto out;
+ }
+ rb_index = afr_get_child_index_from_name (this, replace_brick);
+
+ if (rb_index < 0)
+ /* Didn't belong to this replica pair
+ * Just do a no-op
+ */
+ AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL);
+ else
+ _afr_handle_replace_brick (this, frame, loc, rb_index);
+ ret = 0;
+ }
+out:
+ if (ret == 1) {
+ gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal"
+ " extended attribute : %s.",
+ GF_AFR_REPLACE_BRICK, strerror (EPERM));
+ AFR_STACK_UNWIND (setxattr, frame, -1, EPERM, NULL);
+ ret = 0;
+ }
+ return ret;
+}
+
static int
afr_handle_special_xattr (xlator_t *this, call_frame_t *frame, loc_t *loc,
dict_t *dict)
@@ -1176,6 +1387,10 @@ afr_handle_special_xattr (xlator_t *this, call_frame_t *frame, loc_t *loc,
goto out;
ret = afr_handle_spb_choice_timeout (this, frame, dict);
+ if (ret == 0)
+ goto out;
+
+ ret = afr_handle_replace_brick (this, frame, loc, dict);
out:
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index b7e761bedf5..1bddbc74891 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR
-#define GLFS_NUM_MESSAGES 10
+#define GLFS_NUM_MESSAGES 11
#define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)
#define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages"
@@ -137,6 +137,16 @@
#define AFR_MSG_INODE_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10)
+/*!
+ * @messageid 108011
+ * @diagnosis Setting of pending xattrs succeeded/failed during replace-brick
+ * operation.
+ * @recommendedaction In case of failure, error number in the log should give
+ * the reason why it failed. Also observe brick logs for more information.
+*/
+#define AFR_MSG_REPLACE_BRICK_STATUS (GLFS_COMP_BASE_AFR + 11)
+
+
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 9a9a852b4d7..1534531bf88 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -802,8 +802,8 @@ afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
-afr_selfheal_locked_fill (call_frame_t *frame, xlator_t *this,
- unsigned char *locked_on)
+afr_locked_fill (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_on)
{
int i = 0;
afr_private_t *priv = NULL;
@@ -846,7 +846,7 @@ afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
loc_wipe (&loc);
- return afr_selfheal_locked_fill (frame, this, locked_on);
+ return afr_locked_fill (frame, this, locked_on);
}
@@ -877,7 +877,7 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
for (i = 0; i < priv->child_count; i++) {
if (local->replies[i].op_ret == -1 &&
local->replies[i].op_errno == EAGAIN) {
- afr_selfheal_locked_fill (frame, this, locked_on);
+ afr_locked_fill (frame, this, locked_on);
afr_selfheal_uninodelk (frame, this, inode, dom, off,
size, locked_on);
@@ -889,7 +889,7 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
loc_wipe (&loc);
- return afr_selfheal_locked_fill (frame, this, locked_on);
+ return afr_locked_fill (frame, this, locked_on);
}
@@ -932,7 +932,7 @@ afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
loc_wipe (&loc);
- return afr_selfheal_locked_fill (frame, this, locked_on);
+ return afr_locked_fill (frame, this, locked_on);
}
@@ -957,7 +957,7 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
for (i = 0; i < priv->child_count; i++) {
if (local->replies[i].op_ret == -1 &&
local->replies[i].op_errno == EAGAIN) {
- afr_selfheal_locked_fill (frame, this, locked_on);
+ afr_locked_fill (frame, this, locked_on);
afr_selfheal_unentrylk (frame, this, inode, dom, name,
locked_on);
@@ -969,7 +969,7 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
loc_wipe (&loc);
- return afr_selfheal_locked_fill (frame, this, locked_on);
+ return afr_locked_fill (frame, this, locked_on);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 28e332db740..b593c8054c0 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -290,6 +290,13 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
return 0;
}
+ /* Set all the sources as 1, otheriwse newentry_mark won't be set */
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ sources[i] = 1;
+ }
+ }
+
/* In case of a gfid or type mismatch on the entry, return -1.*/
ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies,
fd->inode->gfid,
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 956f075e25b..a707e20e222 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -251,4 +251,12 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
int
afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid);
+
+int
+afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+int
+afr_locked_fill (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_on);
#endif /* !_AFR_SELFHEAL_H */