diff options
| -rw-r--r-- | tests/basic/afr/replace-brick-self-heal.t | 64 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 217 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-messages.h | 12 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 16 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 7 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 8 | 
6 files changed, 314 insertions, 10 deletions
diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t new file mode 100644 index 00000000000..8ced7df3c76 --- /dev/null +++ b/tests/basic/afr/replace-brick-self-heal.t @@ -0,0 +1,64 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off + +TEST $CLI volume set $V0 self-heal-daemon off +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; + +# Create files +for i in {1..5} +do +        echo $i > $M0/file$i.txt +done + +# Metadata changes +TEST setfattr -n user.test -v qwerty $M0/file5.txt + +# Replace brick1 +TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force + +# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss) +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/ + +# Check if pending xattr and dirty-xattr are set for replaced-brick +EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 + +# Wait for heal to complete +EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0 + +# Check if entry-heal has happened +TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort) + +# To make sure that files were not lost from brick0 +TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort) +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 + +# Test if data was healed +TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt +# To make sure that data was not lost from brick0 +TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt + +# Test if metadata was healed and exists on both the bricks +EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt +EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt + +cleanup; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 866294ea20e..ecbbe282da5 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -30,9 +30,10 @@  #include "compat-errno.h"  #include "compat.h"  #include "protocol-common.h" - +#include "byte-order.h"  #include "afr-transaction.h"  #include "afr-self-heal.h" +#include "afr-messages.h"  static void  __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this) @@ -968,6 +969,179 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)  }  int +afr_rb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie, +                                  xlator_t *this, int op_ret, int op_errno, +                                  dict_t *xattr, dict_t *xdata) + +{ +        afr_local_t *local = NULL; +        afr_private_t *priv = NULL; +        int i = 0; + +        local = frame->local; +        priv = this->private; +        i = (long) cookie; + +        local->replies[i].valid = 1; +        local->replies[i].op_ret = op_ret; +        local->replies[i].op_errno = op_errno; +        gf_msg (this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO, +                op_ret ? op_errno : 0, +                AFR_MSG_REPLACE_BRICK_STATUS, "Set of pending xattr %s on" +                " %s.", op_ret ? "failed" : "succeeded", +                priv->children[i]->name); + +        syncbarrier_wake (&local->barrier); +        return 0; +} + +int +afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this, +                              unsigned char *locked_nodes) +{ +        afr_local_t *local = NULL; +        afr_private_t *priv = NULL; +        int ret = 0, i = 0; + +        local = frame->local; +        priv = this->private; + +        AFR_ONLIST (locked_nodes, frame, afr_rb_set_pending_changelog_cbk, +                    xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, +                    local->xdata_req, NULL); + +        /* It is sufficient if xattrop was successful on one child */ +        for (i = 0; i < priv->child_count; i++) { +                if (!local->replies[i].valid) +                        continue; + +                if (local->replies[i].op_ret == 0) { +                        ret = 0; +                        goto out; +                } else { +                        ret = afr_higher_errno (ret, +                                                local->replies[i].op_errno); +                } +        } +out: +        return -ret; +} + +int +_afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame, +                                loc_t *loc, int rb_index, +                                afr_transaction_type type) +{ +        afr_local_t     *local            = NULL; +        afr_private_t   *priv             = NULL; +        unsigned char   *locked_nodes     = NULL; +        int              count            = 0; +        int              ret              = -ENOMEM; +        int              idx              = -1; + +        priv = this->private; +        local = frame->local; + +        locked_nodes = alloca0 (priv->child_count); + +        idx = afr_index_for_transaction_type (type); + +        local->pending = afr_matrix_create (priv->child_count, +                                            AFR_NUM_CHANGE_LOGS); +        if (!local->pending) +                goto out; + +        local->pending[rb_index][idx] = hton32 (1); + +        local->xdata_req = dict_new (); +        if (!local->xdata_req) +                goto out; + +        ret = afr_set_pending_dict (priv, local->xdata_req, local->pending); +        if (ret < 0) +                goto out; + +        if (AFR_ENTRY_TRANSACTION == type) { +                count = afr_selfheal_entrylk (frame, this, loc->inode, +                                              this->name, NULL, locked_nodes); +        } else { +                count = afr_selfheal_inodelk (frame, this, loc->inode, +                                              this->name, LLONG_MAX - 1, 0, +                                              locked_nodes); +        } + +        if (!count) { +                gf_log (this->name, GF_LOG_ERROR, "Couldn't acquire lock on" +                        " any child."); +                ret = -EAGAIN; +                goto unlock; +        } + +        ret = afr_rb_set_pending_changelog (frame, this, locked_nodes); +        if (ret) +                goto unlock; +        ret = 0; +unlock: +        if (AFR_ENTRY_TRANSACTION == type) { +                afr_selfheal_unentrylk (frame, this, loc->inode, this->name, +                                        NULL, locked_nodes); +        } else { +                afr_selfheal_uninodelk (frame, this, loc->inode, this->name, +                                        LLONG_MAX - 1, 0, locked_nodes); +        } +out: +        return ret; +} + +int +_afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, +                           int rb_index) +{ + +        afr_local_t     *local          = NULL; +        afr_private_t   *priv           = NULL; +        int              ret            = -1; +        int              op_errno       = ENOMEM; + +        priv = this->private; + +        local = AFR_FRAME_INIT (frame, op_errno); +        if (!local) +                goto out; + +        loc_copy (&local->loc, loc); + +        gf_log (this->name, GF_LOG_DEBUG, "Child being replaced is : %s", +                priv->children[rb_index]->name); + +        ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index, +                                              AFR_METADATA_TRANSACTION); +        if (ret) { +                op_errno = -ret; +                ret = -1; +                goto out; +        } + +        dict_unref (local->xdata_req); +        afr_matrix_cleanup (local->pending, priv->child_count); +        local->pending = NULL; +        local->xdata_req = NULL; + +        ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index, +                                              AFR_ENTRY_TRANSACTION); +        if (ret) { +                op_errno = -ret; +                ret = -1; +                goto out; +        } +        ret = 0; +out: +        AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL); +        return 0; +} + + +int  afr_split_brain_resolve_do (call_frame_t *frame, xlator_t *this, loc_t *loc,                              char *data)  { @@ -1165,6 +1339,43 @@ afr_handle_spb_choice_timeout (xlator_t *this, call_frame_t *frame,          return ret;  } +int +afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, +                          dict_t *dict) +{ +        int             ret               = -1; +        int             rb_index          = -1; +        char           *replace_brick     = NULL; + +        ret =  dict_get_str (dict, GF_AFR_REPLACE_BRICK, &replace_brick); + +        if (!ret) { +                if (frame->root->pid != GF_CLIENT_PID_AFR_SELF_HEALD) { +                        ret = 1; +                        goto out; +                } +                rb_index = afr_get_child_index_from_name (this, replace_brick); + +                if (rb_index < 0) +                         /* Didn't belong to this replica pair +                          * Just do a no-op +                          */ +                        AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL); +                else +                        _afr_handle_replace_brick (this, frame, loc, rb_index); +                ret = 0; +        } +out: +        if (ret == 1) { +                gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal" +                        " extended attribute : %s.", +                        GF_AFR_REPLACE_BRICK, strerror (EPERM)); +                AFR_STACK_UNWIND (setxattr, frame, -1, EPERM, NULL); +                ret = 0; +        } +        return ret; +} +  static int  afr_handle_special_xattr (xlator_t *this, call_frame_t *frame, loc_t *loc,                            dict_t *dict) @@ -1176,6 +1387,10 @@ afr_handle_special_xattr (xlator_t *this, call_frame_t *frame, loc_t *loc,                  goto out;          ret = afr_handle_spb_choice_timeout (this, frame, dict); +        if (ret == 0) +                goto out; + +        ret = afr_handle_replace_brick (this, frame, loc, dict);  out:          return ret;  } diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index b7e761bedf5..1bddbc74891 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -40,7 +40,7 @@   */  #define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR -#define GLFS_NUM_MESSAGES 10 +#define GLFS_NUM_MESSAGES 11  #define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)  #define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages" @@ -137,6 +137,16 @@  #define AFR_MSG_INODE_UNLOCK_FAIL       (GLFS_COMP_BASE_AFR + 10) +/*! + * @messageid 108011 + * @diagnosis Setting of pending xattrs succeeded/failed during replace-brick + * operation. + * @recommendedaction In case of failure, error number in the log should give + * the reason why it failed. Also observe brick logs for more information. +*/ +#define AFR_MSG_REPLACE_BRICK_STATUS     (GLFS_COMP_BASE_AFR + 11) + +  #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"  #endif /* !_AFR_MESSAGES_H_ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 9a9a852b4d7..1534531bf88 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -802,8 +802,8 @@ afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  int -afr_selfheal_locked_fill (call_frame_t *frame, xlator_t *this, -			  unsigned char *locked_on) +afr_locked_fill (call_frame_t *frame, xlator_t *this, +                 unsigned char *locked_on)  {  	int i = 0;  	afr_private_t *priv = NULL; @@ -846,7 +846,7 @@ afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	loc_wipe (&loc); -	return afr_selfheal_locked_fill (frame, this, locked_on); +	return afr_locked_fill (frame, this, locked_on);  } @@ -877,7 +877,7 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	for (i = 0; i < priv->child_count; i++) {  		if (local->replies[i].op_ret == -1 &&  		    local->replies[i].op_errno == EAGAIN) { -			afr_selfheal_locked_fill (frame, this, locked_on); +			afr_locked_fill (frame, this, locked_on);  			afr_selfheal_uninodelk (frame, this, inode, dom, off,  						size, locked_on); @@ -889,7 +889,7 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	loc_wipe (&loc); -	return afr_selfheal_locked_fill (frame, this, locked_on); +	return afr_locked_fill (frame, this, locked_on);  } @@ -932,7 +932,7 @@ afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	loc_wipe (&loc); -	return afr_selfheal_locked_fill (frame, this, locked_on); +	return afr_locked_fill (frame, this, locked_on);  } @@ -957,7 +957,7 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	for (i = 0; i < priv->child_count; i++) {  		if (local->replies[i].op_ret == -1 &&  		    local->replies[i].op_errno == EAGAIN) { -			afr_selfheal_locked_fill (frame, this, locked_on); +			afr_locked_fill (frame, this, locked_on);  			afr_selfheal_unentrylk (frame, this, inode, dom, name,  						locked_on); @@ -969,7 +969,7 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	loc_wipe (&loc); -	return afr_selfheal_locked_fill (frame, this, locked_on); +	return afr_locked_fill (frame, this, locked_on);  } diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 28e332db740..b593c8054c0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -290,6 +290,13 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,  		return 0;  	} +        /* Set all the sources as 1, otheriwse newentry_mark won't be set */ +	for (i = 0; i < priv->child_count; i++) { +		if (replies[i].valid && replies[i].op_ret == 0) { +			sources[i] = 1; +		} +	} +          /* In case of a gfid or type mismatch on the entry, return -1.*/          ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies,                                                            fd->inode->gfid, diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 956f075e25b..a707e20e222 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -251,4 +251,12 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,  int  afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid); + +int +afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		       int op_ret, int op_errno, dict_t *xdata); + +int +afr_locked_fill (call_frame_t *frame, xlator_t *this, +                 unsigned char *locked_on);  #endif /* !_AFR_SELFHEAL_H */  | 
