diff options
| -rw-r--r-- | tests/basic/afr/add-brick-self-heal.t | 67 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 11 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 187 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-mem-types.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-messages.h | 16 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 9 | 
6 files changed, 211 insertions, 81 deletions
diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t new file mode 100644 index 00000000000..748d36758e7 --- /dev/null +++ b/tests/basic/afr/add-brick-self-heal.t @@ -0,0 +1,67 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off + +TEST $CLI volume set $V0 self-heal-daemon off +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; + +# Create files +for i in {1..5} +do +        echo $i > $M0/file$i.txt +done + +# Metadata changes +TEST setfattr -n user.test -v qwerty $M0/file5.txt + +# Add brick1 +TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 + +# New-brick should accuse the old-bricks (Simulating case for data-loss) +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/ +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/ + +# Check if pending xattr and dirty-xattr are set for newly-added-brick +EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 +EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 + +# Wait for heal to complete +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +# Check if entry-heal has happened +TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort) +TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort) + +# Test if data was healed +TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt + +# Test if metadata was healed and exists on both the bricks +EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt +EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt + +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2 + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index b5d07acf338..6b393c77a96 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5113,3 +5113,14 @@ afr_get_need_heal (xlator_t *this)          UNLOCK (&priv->lock);          return need_heal;  } + +int +afr_get_msg_id (char *op_type) +{ + +        if (!strcmp (op_type, GF_AFR_REPLACE_BRICK)) +                return AFR_MSG_REPLACE_BRICK_STATUS; +        else if (!strcmp (op_type, GF_AFR_ADD_BRICK)) +                return AFR_MSG_ADD_BRICK_STATUS; +        return -1; +} diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 8069d6cc3b3..c86fb49af88 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -1014,14 +1014,15 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)  }  int -afr_rb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie, +afr_emptyb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie,                                    xlator_t *this, int op_ret, int op_errno,                                    dict_t *xattr, dict_t *xdata)  {          afr_local_t *local = NULL;          afr_private_t *priv = NULL; -        int i = 0; +        int i, ret = 0; +        char *op_type = NULL;          local = frame->local;          priv = this->private; @@ -1030,19 +1031,26 @@ afr_rb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie,          local->replies[i].valid = 1;          local->replies[i].op_ret = op_ret;          local->replies[i].op_errno = op_errno; + +        ret = dict_get_str (local->xdata_req, "replicate-brick-op", &op_type); +        if (ret) +                goto out; +          gf_msg (this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,                  op_ret ? op_errno : 0, -                AFR_MSG_REPLACE_BRICK_STATUS, "Set of pending xattr %s on" +                afr_get_msg_id (op_type), +                "Set of pending xattr %s on"                  " %s.", op_ret ? "failed" : "succeeded",                  priv->children[i]->name); +out:          syncbarrier_wake (&local->barrier);          return 0;  }  int -afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this, -                              unsigned char *locked_nodes) +afr_emptyb_set_pending_changelog (call_frame_t *frame, xlator_t *this, +                                unsigned char *locked_nodes)  {          afr_local_t *local = NULL;          afr_private_t *priv = NULL; @@ -1051,9 +1059,9 @@ afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this,          local = frame->local;          priv = this->private; -        AFR_ONLIST (locked_nodes, frame, afr_rb_set_pending_changelog_cbk, +        AFR_ONLIST (locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk,                      xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, -                    local->xdata_req, NULL); +                    local->xattr_req, NULL);          /* It is sufficient if xattrop was successful on one child */          for (i = 0; i < priv->child_count; i++) { @@ -1073,9 +1081,10 @@ out:  }  int -_afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame, -                                loc_t *loc, int rb_index, -                                afr_transaction_type type) +_afr_handle_empty_brick_type (xlator_t *this, call_frame_t *frame, +                            loc_t *loc, int empty_index, +                            afr_transaction_type type, +                            char *op_type)  {          afr_local_t     *local            = NULL;          afr_private_t   *priv             = NULL; @@ -1096,13 +1105,21 @@ _afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame,          if (!local->pending)                  goto out; -        local->pending[rb_index][idx] = hton32 (1); +        local->pending[empty_index][idx] = hton32 (1);          local->xdata_req = dict_new ();          if (!local->xdata_req)                  goto out; -        ret = afr_set_pending_dict (priv, local->xdata_req, local->pending); +        ret = dict_set_str (local->xdata_req, "replicate-brick-op", op_type); +        if (ret) +                goto out; + +        local->xattr_req = dict_new (); +        if (!local->xattr_req) +                goto out; + +        ret = afr_set_pending_dict (priv, local->xattr_req, local->pending);          if (ret < 0)                  goto out; @@ -1123,7 +1140,7 @@ _afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame,                  goto unlock;          } -        ret = afr_rb_set_pending_changelog (frame, this, locked_nodes); +        ret = afr_emptyb_set_pending_changelog (frame, this, locked_nodes);          if (ret)                  goto unlock;          ret = 0; @@ -1139,33 +1156,41 @@ out:          return ret;  } -int -_afr_handle_replace_brick_cbk (int ret, call_frame_t *frame, void *opaque) +void +afr_brick_args_cleanup (void *opaque)  { -        afr_replace_brick_args_t *data = NULL; +        afr_empty_brick_args_t *data = NULL;          data = opaque;          loc_wipe (&data->loc);          GF_FREE (data); +} + +int +_afr_handle_empty_brick_cbk (int ret, call_frame_t *frame, void *opaque) +{ +        afr_brick_args_cleanup (opaque);          return 0;  }  int -_afr_handle_replace_brick (void *opaque) +_afr_handle_empty_brick (void *opaque)  {          afr_local_t     *local          = NULL;          afr_private_t   *priv           = NULL; -        int              rb_index       = -1; +        int              empty_index       = -1;          int              ret            = -1;          int              op_errno       = ENOMEM;          call_frame_t    *frame          = NULL;          xlator_t        *this           = NULL; -        afr_replace_brick_args_t *data  = NULL; +        char            *op_type        = NULL; +        afr_empty_brick_args_t *data  = NULL;          data = opaque;          frame = data->frame; -        rb_index = data->rb_index; +        empty_index = data->empty_index; +        op_type = data->op_type;          this = frame->this;          priv = this->private; @@ -1175,11 +1200,13 @@ _afr_handle_replace_brick (void *opaque)          loc_copy (&local->loc, &data->loc); -        gf_msg_debug (this->name, 0, "Child being replaced is : %s", -                      priv->children[rb_index]->name); +        gf_msg_debug (this->name, 0, "New brick is : %s", +                      priv->children[empty_index]->name); -        ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index, -                                              AFR_METADATA_TRANSACTION); +        ret = _afr_handle_empty_brick_type (this, frame, &local->loc, +                                            empty_index, +                                            AFR_METADATA_TRANSACTION, +                                            op_type);          if (ret) {                  op_errno = -ret;                  ret = -1; @@ -1187,12 +1214,15 @@ _afr_handle_replace_brick (void *opaque)          }          dict_unref (local->xdata_req); +        dict_unref (local->xattr_req);          afr_matrix_cleanup (local->pending, priv->child_count);          local->pending = NULL; +        local->xattr_req = NULL;          local->xdata_req = NULL; -        ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index, -                                              AFR_ENTRY_TRANSACTION); +        ret = _afr_handle_empty_brick_type (this, frame, &local->loc, +                                            empty_index, +                                            AFR_ENTRY_TRANSACTION, op_type);          if (ret) {                  op_errno = -ret;                  ret = -1; @@ -1413,63 +1443,71 @@ afr_handle_spb_choice_timeout (xlator_t *this, call_frame_t *frame,  }  int -afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, -                          dict_t *dict) +afr_handle_empty_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, +                        dict_t *dict)  {          int             ret               = -1; -        int             rb_index          = -1; +        int             ab_ret            = -1; +        int             empty_index        = -1;          int             op_errno          = EPERM; -        char           *replace_brick     = NULL; -        afr_replace_brick_args_t *data    = NULL; +        char           *empty_brick         = NULL; +        char           *op_type           = NULL; +        afr_empty_brick_args_t *data        = NULL; -        ret =  dict_get_str (dict, GF_AFR_REPLACE_BRICK, &replace_brick); +        ret =  dict_get_str (dict, GF_AFR_REPLACE_BRICK, &empty_brick); +        if (!ret) +                op_type = GF_AFR_REPLACE_BRICK; -        if (!ret) { -                if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { -                        gf_msg (this->name, GF_LOG_ERROR, EPERM, -                                AFR_MSG_REPLACE_BRICK_STATUS, "'%s' is an " -                                "internal extended attribute", -                                GF_AFR_REPLACE_BRICK); +        ab_ret = dict_get_str (dict, GF_AFR_ADD_BRICK, &empty_brick); +        if (!ab_ret) +                op_type = GF_AFR_ADD_BRICK; + +        if (ret && ab_ret) +                goto out; + +        if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { +                gf_msg (this->name, GF_LOG_ERROR, EPERM, +                        afr_get_msg_id (op_type), +                        "'%s' is an internal extended attribute.", +                        op_type); +                ret = 1; +                goto out; +        } +        empty_index = afr_get_child_index_from_name (this, empty_brick); + +        if (empty_index < 0) { +                 /* Didn't belong to this replica pair +                  * Just do a no-op +                  */ +                AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL); +                return 0; +        } else { +                data = GF_CALLOC (1, sizeof (*data), +                                  gf_afr_mt_empty_brick_t); +                if (!data) {                          ret = 1; +                        op_errno = ENOMEM;                          goto out;                  } -                rb_index = afr_get_child_index_from_name (this, replace_brick); - -                if (rb_index < 0) { -                         /* Didn't belong to this replica pair -                          * Just do a no-op -                          */ -                        AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL); -                        return 0; -                } else { -                        data = GF_CALLOC (1, sizeof (*data), -                                          gf_afr_mt_replace_brick_t); -                        if (!data) { -                                ret = 1; -                                op_errno = ENOMEM; -                                goto out; -                        } -                        data->frame = frame; -                        loc_copy (&data->loc, loc); -                        data->rb_index = rb_index; -                        ret = synctask_new (this->ctx->env, -                                            _afr_handle_replace_brick, -                                            _afr_handle_replace_brick_cbk, -                                            NULL, data); -                        if (ret) { -                                gf_msg (this->name, GF_LOG_ERROR, 0, -                                        AFR_MSG_REPLACE_BRICK_FAILED, -                                        "Failed to create synctask. Unable to " -                                        "perform replace-brick."); -                                ret = 1; -                                op_errno = ENOMEM; -                                loc_wipe (&data->loc); -                                GF_FREE (data); -                                goto out; -                        } +                data->frame = frame; +                loc_copy (&data->loc, loc); +                data->empty_index = empty_index; +                data->op_type = op_type; +                ret = synctask_new (this->ctx->env, +                                    _afr_handle_empty_brick, +                                    _afr_handle_empty_brick_cbk, +                                    NULL, data); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                afr_get_msg_id (op_type), +                                "Failed to create synctask."); +                        ret = 1; +                        op_errno = ENOMEM; +                        afr_brick_args_cleanup (data); +                        goto out;                  } -                ret = 0;          } +        ret = 0;  out:          if (ret == 1) {                  AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); @@ -1492,7 +1530,8 @@ afr_handle_special_xattr (xlator_t *this, call_frame_t *frame, loc_t *loc,          if (ret == 0)                  goto out; -        ret = afr_handle_replace_brick (this, frame, loc, dict); +        /* Applicable for replace-brick and add-brick commands */ +        ret = afr_handle_empty_brick (this, frame, loc, dict);  out:          return ret;  } diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index 6f1eee95322..7f7962013d7 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -45,7 +45,7 @@ enum gf_afr_mem_types_ {  	gf_afr_mt_subvol_healer_t,  	gf_afr_mt_spbc_timeout_t,          gf_afr_mt_spb_status_t, -        gf_afr_mt_replace_brick_t, +        gf_afr_mt_empty_brick_t,          gf_afr_mt_end  };  #endif diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index d07c7bc2170..1fb5b51b20a 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -341,11 +341,21 @@  /*!   * @messageid 108039 - * @diagnosis - * @recommendedaction + * @diagnosis Setting of pending xattrs succeeded/failed during add-brick + * operation. + * @recommendedaction In case of failure, error number in the log should give + * the reason why it failed. Also observe brick logs for more information.  */ -#define AFR_MSG_REPLACE_BRICK_FAILED                (GLFS_COMP_BASE_AFR + 39) +#define AFR_MSG_ADD_BRICK_STATUS        (GLFS_COMP_BASE_AFR + 39) +/*! + * @messageid 108040 + * @diagnosis AFR was unable to be loaded because the pending-changelog xattrs + * were not found in the volfile. + * @recommendedaction Please ensure cluster op-version is atleast 30707 and the + * volfiles are regenerated. +*/ +#define AFR_MSG_NO_CHANGELOG  (GLFS_COMP_BASE_AFR + 40)  #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"  #endif /* !_AFR_MESSAGES_H_ */ diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 9915344c1f5..dec4eaa8a4a 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -777,11 +777,12 @@ typedef struct afr_spb_status {          loc_t        *loc;  } afr_spb_status_t; -typedef struct afr_replace_brick_args { +typedef struct afr_empty_brick_args {          call_frame_t *frame;          loc_t loc; -        int rb_index; -} afr_replace_brick_args_t; +        int empty_index; +        char *op_type; +} afr_empty_brick_args_t;  typedef struct afr_read_subvol_args {          ia_type_t ia_type; @@ -1117,4 +1118,6 @@ afr_set_need_heal (xlator_t *this, afr_local_t *local);  int  afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd); +int +afr_get_msg_id (char *op_type);  #endif /* __AFR_H__ */  | 
