diff options
| author | Anuradha Talur <atalur@redhat.com> | 2016-03-22 14:03:38 +0530 | 
|---|---|---|
| committer | Raghavendra G <rgowdapp@redhat.com> | 2016-03-23 04:49:36 -0700 | 
| commit | cf91abba2e6bf22794b04d143f226906e44bf5d5 (patch) | |
| tree | 51e769956e57cf0e79cbfa82c586cb0274a0bceb /xlators/cluster/afr/src/afr-inode-write.c | |
| parent | ed4feadc8c60fce1d8d99e0732351b92230d6321 (diff) | |
afr : Enable auto heal when replica count increases
            Backport of: http://review.gluster.org/12454
    This patch is part two change to prevent data loss
    in a replicate volume on doing a add-brick operation.
    Problem: After doing add-brick, there is a chance that
    self heal might happen from the newly added brick rather
    than the source brick, leading to data loss.
    Solution: Mark pending changelogs on afr children for
    the new afr-child so that heal is performed in the
    correct direction.
            >Change-Id: I11871e55eef3593aec874f92214a2d97da229b17
            >BUG: 1276203
            >Signed-off-by: Anuradha Talur <atalur@redhat.com>
            >Reviewed-on: http://review.gluster.org/12454
            >Smoke: Gluster Build System <jenkins@build.gluster.com>
            >Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
            >Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
            >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
            >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Change-Id: Iae6af44f97e612cb3ee8c642254ec3d15ac063f5
BUG: 1320020
Signed-off-by: Anuradha Talur <atalur@redhat.com>
Reviewed-on: http://review.gluster.org/13807
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-inode-write.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 187 | 
1 files changed, 113 insertions, 74 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 8069d6cc3b3..c86fb49af88 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -1014,14 +1014,15 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)  }  int -afr_rb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie, +afr_emptyb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie,                                    xlator_t *this, int op_ret, int op_errno,                                    dict_t *xattr, dict_t *xdata)  {          afr_local_t *local = NULL;          afr_private_t *priv = NULL; -        int i = 0; +        int i, ret = 0; +        char *op_type = NULL;          local = frame->local;          priv = this->private; @@ -1030,19 +1031,26 @@ afr_rb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie,          local->replies[i].valid = 1;          local->replies[i].op_ret = op_ret;          local->replies[i].op_errno = op_errno; + +        ret = dict_get_str (local->xdata_req, "replicate-brick-op", &op_type); +        if (ret) +                goto out; +          gf_msg (this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,                  op_ret ? op_errno : 0, -                AFR_MSG_REPLACE_BRICK_STATUS, "Set of pending xattr %s on" +                afr_get_msg_id (op_type), +                "Set of pending xattr %s on"                  " %s.", op_ret ? "failed" : "succeeded",                  priv->children[i]->name); +out:          syncbarrier_wake (&local->barrier);          return 0;  }  int -afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this, -                              unsigned char *locked_nodes) +afr_emptyb_set_pending_changelog (call_frame_t *frame, xlator_t *this, +                                unsigned char *locked_nodes)  {          afr_local_t *local = NULL;          afr_private_t *priv = NULL; @@ -1051,9 +1059,9 @@ afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this,          local = frame->local;          priv = this->private; -        AFR_ONLIST (locked_nodes, frame, afr_rb_set_pending_changelog_cbk, +        AFR_ONLIST (locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk,                      xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, -                    local->xdata_req, NULL); +                    local->xattr_req, NULL);          /* It is sufficient if xattrop was successful on one child */          for (i = 0; i < priv->child_count; i++) { @@ -1073,9 +1081,10 @@ out:  }  int -_afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame, -                                loc_t *loc, int rb_index, -                                afr_transaction_type type) +_afr_handle_empty_brick_type (xlator_t *this, call_frame_t *frame, +                            loc_t *loc, int empty_index, +                            afr_transaction_type type, +                            char *op_type)  {          afr_local_t     *local            = NULL;          afr_private_t   *priv             = NULL; @@ -1096,13 +1105,21 @@ _afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame,          if (!local->pending)                  goto out; -        local->pending[rb_index][idx] = hton32 (1); +        local->pending[empty_index][idx] = hton32 (1);          local->xdata_req = dict_new ();          if (!local->xdata_req)                  goto out; -        ret = afr_set_pending_dict (priv, local->xdata_req, local->pending); +        ret = dict_set_str (local->xdata_req, "replicate-brick-op", op_type); +        if (ret) +                goto out; + +        local->xattr_req = dict_new (); +        if (!local->xattr_req) +                goto out; + +        ret = afr_set_pending_dict (priv, local->xattr_req, local->pending);          if (ret < 0)                  goto out; @@ -1123,7 +1140,7 @@ _afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame,                  goto unlock;          } -        ret = afr_rb_set_pending_changelog (frame, this, locked_nodes); +        ret = afr_emptyb_set_pending_changelog (frame, this, locked_nodes);          if (ret)                  goto unlock;          ret = 0; @@ -1139,33 +1156,41 @@ out:          return ret;  } -int -_afr_handle_replace_brick_cbk (int ret, call_frame_t *frame, void *opaque) +void +afr_brick_args_cleanup (void *opaque)  { -        afr_replace_brick_args_t *data = NULL; +        afr_empty_brick_args_t *data = NULL;          data = opaque;          loc_wipe (&data->loc);          GF_FREE (data); +} + +int +_afr_handle_empty_brick_cbk (int ret, call_frame_t *frame, void *opaque) +{ +        afr_brick_args_cleanup (opaque);          return 0;  }  int -_afr_handle_replace_brick (void *opaque) +_afr_handle_empty_brick (void *opaque)  {          afr_local_t     *local          = NULL;          afr_private_t   *priv           = NULL; -        int              rb_index       = -1; +        int              empty_index       = -1;          int              ret            = -1;          int              op_errno       = ENOMEM;          call_frame_t    *frame          = NULL;          xlator_t        *this           = NULL; -        afr_replace_brick_args_t *data  = NULL; +        char            *op_type        = NULL; +        afr_empty_brick_args_t *data  = NULL;          data = opaque;          frame = data->frame; -        rb_index = data->rb_index; +        empty_index = data->empty_index; +        op_type = data->op_type;          this = frame->this;          priv = this->private; @@ -1175,11 +1200,13 @@ _afr_handle_replace_brick (void *opaque)          loc_copy (&local->loc, &data->loc); -        gf_msg_debug (this->name, 0, "Child being replaced is : %s", -                      priv->children[rb_index]->name); +        gf_msg_debug (this->name, 0, "New brick is : %s", +                      priv->children[empty_index]->name); -        ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index, -                                              AFR_METADATA_TRANSACTION); +        ret = _afr_handle_empty_brick_type (this, frame, &local->loc, +                                            empty_index, +                                            AFR_METADATA_TRANSACTION, +                                            op_type);          if (ret) {                  op_errno = -ret;                  ret = -1; @@ -1187,12 +1214,15 @@ _afr_handle_replace_brick (void *opaque)          }          dict_unref (local->xdata_req); +        dict_unref (local->xattr_req);          afr_matrix_cleanup (local->pending, priv->child_count);          local->pending = NULL; +        local->xattr_req = NULL;          local->xdata_req = NULL; -        ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index, -                                              AFR_ENTRY_TRANSACTION); +        ret = _afr_handle_empty_brick_type (this, frame, &local->loc, +                                            empty_index, +                                            AFR_ENTRY_TRANSACTION, op_type);          if (ret) {                  op_errno = -ret;                  ret = -1; @@ -1413,63 +1443,71 @@ afr_handle_spb_choice_timeout (xlator_t *this, call_frame_t *frame,  }  int -afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, -                          dict_t *dict) +afr_handle_empty_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, +                        dict_t *dict)  {          int             ret               = -1; -        int             rb_index          = -1; +        int             ab_ret            = -1; +        int             empty_index        = -1;          int             op_errno          = EPERM; -        char           *replace_brick     = NULL; -        afr_replace_brick_args_t *data    = NULL; +        char           *empty_brick         = NULL; +        char           *op_type           = NULL; +        afr_empty_brick_args_t *data        = NULL; -        ret =  dict_get_str (dict, GF_AFR_REPLACE_BRICK, &replace_brick); +        ret =  dict_get_str (dict, GF_AFR_REPLACE_BRICK, &empty_brick); +        if (!ret) +                op_type = GF_AFR_REPLACE_BRICK; -        if (!ret) { -                if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { -                        gf_msg (this->name, GF_LOG_ERROR, EPERM, -                                AFR_MSG_REPLACE_BRICK_STATUS, "'%s' is an " -                                "internal extended attribute", -                                GF_AFR_REPLACE_BRICK); +        ab_ret = dict_get_str (dict, GF_AFR_ADD_BRICK, &empty_brick); +        if (!ab_ret) +                op_type = GF_AFR_ADD_BRICK; + +        if (ret && ab_ret) +                goto out; + +        if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { +                gf_msg (this->name, GF_LOG_ERROR, EPERM, +                        afr_get_msg_id (op_type), +                        "'%s' is an internal extended attribute.", +                        op_type); +                ret = 1; +                goto out; +        } +        empty_index = afr_get_child_index_from_name (this, empty_brick); + +        if (empty_index < 0) { +                 /* Didn't belong to this replica pair +                  * Just do a no-op +                  */ +                AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL); +                return 0; +        } else { +                data = GF_CALLOC (1, sizeof (*data), +                                  gf_afr_mt_empty_brick_t); +                if (!data) {                          ret = 1; +                        op_errno = ENOMEM;                          goto out;                  } -                rb_index = afr_get_child_index_from_name (this, replace_brick); - -                if (rb_index < 0) { -                         /* Didn't belong to this replica pair -                          * Just do a no-op -                          */ -                        AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL); -                        return 0; -                } else { -                        data = GF_CALLOC (1, sizeof (*data), -                                          gf_afr_mt_replace_brick_t); -                        if (!data) { -                                ret = 1; -                                op_errno = ENOMEM; -                                goto out; -                        } -                        data->frame = frame; -                        loc_copy (&data->loc, loc); -                        data->rb_index = rb_index; -                        ret = synctask_new (this->ctx->env, -                                            _afr_handle_replace_brick, -                                            _afr_handle_replace_brick_cbk, -                                            NULL, data); -                        if (ret) { -                                gf_msg (this->name, GF_LOG_ERROR, 0, -                                        AFR_MSG_REPLACE_BRICK_FAILED, -                                        "Failed to create synctask. Unable to " -                                        "perform replace-brick."); -                                ret = 1; -                                op_errno = ENOMEM; -                                loc_wipe (&data->loc); -                                GF_FREE (data); -                                goto out; -                        } +                data->frame = frame; +                loc_copy (&data->loc, loc); +                data->empty_index = empty_index; +                data->op_type = op_type; +                ret = synctask_new (this->ctx->env, +                                    _afr_handle_empty_brick, +                                    _afr_handle_empty_brick_cbk, +                                    NULL, data); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                afr_get_msg_id (op_type), +                                "Failed to create synctask."); +                        ret = 1; +                        op_errno = ENOMEM; +                        afr_brick_args_cleanup (data); +                        goto out;                  } -                ret = 0;          } +        ret = 0;  out:          if (ret == 1) {                  AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); @@ -1492,7 +1530,8 @@ afr_handle_special_xattr (xlator_t *this, call_frame_t *frame, loc_t *loc,          if (ret == 0)                  goto out; -        ret = afr_handle_replace_brick (this, frame, loc, dict); +        /* Applicable for replace-brick and add-brick commands */ +        ret = afr_handle_empty_brick (this, frame, loc, dict);  out:          return ret;  }  | 
