summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-selfheal.c
diff options
context:
space:
mode:
authorRaghavendra G <rgowdapp@redhat.com>2015-02-18 12:15:55 +0530
committerRaghavendra G <rgowdapp@redhat.com>2015-02-20 02:35:00 -0800
commit571a71f0acd0ec59340b9d0d2519793e33a1dc16 (patch)
treeae52a527578c9138388ba38240bb369c1800e1a0 /xlators/cluster/dht/src/dht-selfheal.c
parentd8f181d3171ed301a9992615083fcf98992577c8 (diff)
cluster/dht: synchronize with other concurrent healers while healing layout.
Current layout heal code assumes layout setting is idempotent. This allowed multiple concurrent healers to set the layout without any synchronization. However, this is not the case as different healers can come up with different layout for same directory and making layout setting non-idempotent. So, we bring in synchronization among healers to 1. Not to overwrite an ondisk well-formed layout. 2. Refresh the in-memory layout with the ondisk layout if in-memory layout needs healing and ondisk layout is well formed. This patch can synchronize 1. among multiple healers. 2. among multiple fix-layouts (which extends layout to consider added or removed brick) 3. (but) not between healers and fix-layouts. So, the problem of in-memory stale layouts (not matching with layout ondisk), is not _completely_ fixed by this patch. Signed-off-by: Raghavendra G <rgowdapp@redhat.com> Change-Id: Ia285f25e8d043bb3175c61468d0d11090acee539 BUG: 1176008 Reviewed-on: http://review.gluster.org/9302 Reviewed-by: N Balachandran <nbalacha@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-selfheal.c')
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c500
1 files changed, 483 insertions, 17 deletions
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 2d12abe9747..f4b0ceeade8 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -39,6 +39,12 @@
} \
} while (0)
+int
+dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir,
+ dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal);
+
static uint32_t
dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
{
@@ -64,19 +70,443 @@ dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
max (old->list[o].start, new->list[n].start) + 1;
}
+int
+dht_selfheal_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
int
dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_selfheal_unlock_cbk);
+ lock_frame = NULL;
+
+done:
local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
local->op_errno, NULL);
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
return 0;
}
+int
+dht_refresh_layout_done (call_frame_t *frame)
+{
+ int ret = -1;
+ dht_layout_t *refreshed = NULL, *heal = NULL;
+ dht_local_t *local = NULL;
+ dht_need_heal_t should_heal = NULL;
+ dht_selfheal_layout_t healer = NULL;
+
+ local = frame->local;
+
+ refreshed = local->selfheal.refreshed_layout;
+ heal = local->selfheal.layout;
+
+ healer = local->selfheal.healer;
+ should_heal = local->selfheal.should_heal;
+
+ ret = dht_layout_sort (refreshed);
+ if (ret == -1) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "sorting the layout failed");
+ goto err;
+ }
+
+ if (should_heal (frame, &heal, &refreshed)) {
+ healer (frame, &local->loc, heal);
+ } else {
+ local->selfheal.layout = NULL;
+ local->selfheal.refreshed_layout = NULL;
+ local->selfheal.layout = refreshed;
+
+ dht_layout_unref (frame->this, heal);
+
+ dht_selfheal_dir_finish (frame, frame->this, 0);
+ }
+
+ return 0;
+
+err:
+ dht_selfheal_dir_finish (frame, frame->this, -1);
+ return 0;
+}
+
+int
+dht_refresh_layout_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+
+ layout = local->selfheal.refreshed_layout;
+
+ LOCK (&frame->lock);
+ {
+ op_ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, 0,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
+
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == 0) {
+ dht_refresh_layout_done (frame);
+ } else {
+ goto err;
+ }
+
+ }
+
+ return 0;
+
+err:
+ dht_selfheal_dir_finish (frame, this, -1);
+ return 0;
+}
+
+int
+dht_refresh_layout (call_frame_t *frame)
+{
+ int call_cnt = 0;
+ int i = 0, ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+ local->op_ret = -1;
+
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref (this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
+
+ local->selfheal.refreshed_layout = dht_layout_new (this,
+ conf->subvolume_cnt);
+ if (!local->selfheal.refreshed_layout) {
+ goto out;
+ }
+
+ if (local->xattr != NULL) {
+ dict_del (local->xattr, conf->xattr_name);
+ }
+
+ if (dict_get (local->xattr_req, conf->xattr_name) == 0) {
+ ret = dict_set_uint32 (local->xattr_req, conf->xattr_name,
+ 4 * 4);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:key = %s",
+ local->loc.path, conf->xattr_name);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_refresh_layout_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ dht_selfheal_dir_finish (frame, this, -1);
+ return 0;
+}
+
+
+int32_t
+dht_selfheal_layout_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ if (op_ret < 0) {
+ goto err;
+ }
+
+ dht_refresh_layout (frame);
+ return 0;
+
+err:
+ dht_selfheal_dir_finish (frame, this, -1);
+ return 0;
+}
+
+
+gf_boolean_t
+dht_should_heal_layout (call_frame_t *frame, dht_layout_t **heal,
+ dht_layout_t **ondisk)
+{
+ gf_boolean_t fixit = _gf_true;
+ dht_local_t *local = NULL;
+ int ret = -1, heal_missing_dirs = 0;
+
+ local = frame->local;
+
+ if ((heal == NULL) || (*heal == NULL) || (ondisk == NULL)
+ || (*ondisk == NULL))
+ goto out;
+
+ ret = dht_layout_anomalies (frame->this, &local->loc, *ondisk,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ /* Directories might've been created as part of this self-heal. We've to
+ * sync non-layout xattrs and set range 0-0 on new directories
+ */
+ heal_missing_dirs = local->selfheal.force_mkdir
+ ? local->selfheal.force_mkdir : dht_layout_missing_dirs (*heal);
+
+ if ((local->selfheal.hole_cnt == 0)
+ && (local->selfheal.overlaps_cnt == 0) && heal_missing_dirs) {
+ dht_layout_t *tmp = NULL;
+
+ /* Just added a brick and need to set 0-0 range on this brick.
+ * But ondisk layout is well-formed. So, swap layouts "heal" and
+ * "ondisk". Now "ondisk" layout will be used for healing
+ * xattrs. If there are any non-participating subvols in
+ * "ondisk" layout, dht_selfheal_dir_xattr_persubvol will set
+ * 0-0 and non-layout xattrs. This way we won't end up in
+ * "corrupting" already set and well-formed "ondisk" layout.
+ */
+ tmp = *heal;
+ *heal = *ondisk;
+ *ondisk = tmp;
+
+ /* Current selfheal code, heals non-layout xattrs only after
+ * an add-brick. In fact non-layout xattrs are considered as
+ * secondary citizens which are healed only if layout xattrs
+ * need to be healed. This is wrong, since for eg., quota can be
+ * set when layout is well-formed, but a node is down. Also,
+ * just for healing non-layout xattrs, we don't need locking.
+ * This issue is _NOT FIXED_ by this patch.
+ */
+ }
+
+ fixit = (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt
+ || heal_missing_dirs);
+
+out:
+ return fixit;
+}
+
+inline int
+dht_layout_span (dht_layout_t *layout)
+{
+ int i = 0, count = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err)
+ continue;
+
+ if (layout->list[i].start != layout->list[i].stop)
+ count++;
+ }
+
+ return count;
+}
+
+gf_boolean_t
+dht_should_fix_layout (call_frame_t *frame, dht_layout_t **inmem,
+ dht_layout_t **ondisk)
+{
+ gf_boolean_t fixit = _gf_true;
+ dht_local_t *local = NULL;
+ int layout_span = 0;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = frame->this->private;
+
+ local = frame->local;
+
+ if ((inmem == NULL) || (*inmem == NULL) || (ondisk == NULL)
+ || (*ondisk == NULL))
+ goto out;
+
+ ret = dht_layout_anomalies (frame->this, &local->loc, *ondisk,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt, NULL,
+ &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+ if (ret < 0) {
+ fixit = _gf_false;
+ goto out;
+ }
+
+ if (local->selfheal.down || local->selfheal.misc) {
+ fixit = _gf_false;
+ goto out;
+ }
+
+ if (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt)
+ goto out;
+
+ layout_span = dht_layout_span (*ondisk);
+
+ if (layout_span == conf->subvolume_cnt)
+ fixit = _gf_false;
+
+out:
+ return fixit;
+}
+
+int
+dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir,
+ dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
+
+ local = frame->local;
+
+ conf = frame->this->private;
+
+ local->selfheal.healer = healer;
+ local->selfheal.should_heal = should_heal;
+
+ tmp = local->selfheal.layout;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ dht_layout_unref (frame->this, tmp);
+
+ if (!newdir) {
+ count = conf->subvolume_cnt;
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array),
+ gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->subvolumes[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL)
+ goto err;
+ }
+ } else {
+ count = 1;
+ lk_array = GF_CALLOC (count, sizeof (*lk_array),
+ gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new (frame->this, local->hashed_subvol,
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[0] == NULL)
+ goto err;
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ dht_selfheal_layout_lock_cbk);
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ int tmp_count = 0, i = 0;
+
+ for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++) {
+ ;
+ }
+
+ dht_lock_array_free (lk_array, tmp_count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
+}
int
dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -194,6 +624,7 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
}
}
}
+
if (!uuid_is_null (local->gfid))
uuid_copy (loc->gfid, local->gfid);
@@ -485,7 +916,7 @@ dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
- int this_call_cnt = 0;
+ int this_call_cnt = 0, ret = -1;
local = frame->local;
layout = local->selfheal.layout;
@@ -493,7 +924,13 @@ dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
+ ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
+
+ if (ret < 0) {
+ dht_selfheal_dir_finish (frame, this, -1);
+ }
}
return 0;
@@ -505,7 +942,7 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
int32_t valid, dht_layout_t *layout)
{
int missing_attr = 0;
- int i = 0;
+ int i = 0, ret = -1;
dht_local_t *local = NULL;
xlator_t *this = NULL;
@@ -518,7 +955,14 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
}
if (missing_attr == 0) {
- dht_selfheal_dir_xattr (frame, loc, layout);
+ ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
+
+ if (ret < 0) {
+ dht_selfheal_dir_finish (frame, this, -1);
+ }
+
return 0;
}
@@ -656,6 +1100,8 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
local = frame->local;
this = frame->this;
+ local->selfheal.force_mkdir = force ? _gf_true : _gf_false;
+
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == ENOENT || force)
missing_dirs++;
@@ -763,6 +1209,13 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
for (i = 0; i < layout->cnt; i++) {
err = layout->list[i].err;
if (err == -1 || err == 0 || err == ENOENT) {
+ /* Take this with a pinch of salt. The behaviour seems
+ * to be slightly different when this function is
+ * invoked from mkdir codepath. For eg., err == 0 in
+ * mkdir codepath means directory created but xattr
+ * is not set yet.
+ */
+
/* Setting list[i].err = -1 is an indication for
dht_selfheal_layout_new_directory() to assign
a range. We set it to -1 based on any one of
@@ -779,12 +1232,6 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
not exist (possibly racing with mkdir or
finishing half done mkdir). The missing
directory will be attempted to be recreated.
-
- It is important to note that it is safe
- to race with mkdir() as self-heal and
- mkdir are idempotent operations. Both will
- strive to set the directory and layouts to
- the same final state.
*/
count++;
if (!err)
@@ -1153,6 +1600,7 @@ dht_selfheal_new_directory (call_frame_t *frame,
dht_layout_t *layout)
{
dht_local_t *local = NULL;
+ int ret = 0;
local = frame->local;
@@ -1161,7 +1609,15 @@ dht_selfheal_new_directory (call_frame_t *frame,
dht_layout_sort_volname (layout);
dht_selfheal_layout_new_directory (frame, &local->loc, layout);
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
+
+ ret = dht_selfheal_layout_lock (frame, layout, _gf_true,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
+
+ if (ret < 0) {
+ dir_cbk (frame, NULL, frame->this, -1, ENOMEM, NULL);
+ }
+
return 0;
}
@@ -1170,8 +1626,9 @@ dht_fix_directory_layout (call_frame_t *frame,
dht_selfheal_dir_cbk_t dir_cbk,
dht_layout_t *layout)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
dht_layout_t *tmp_layout = NULL;
+ int ret = 0;
local = frame->local;
@@ -1183,9 +1640,12 @@ dht_fix_directory_layout (call_frame_t *frame,
if (!tmp_layout) {
return -1;
}
- dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
- return 0;
+ ret = dht_selfheal_layout_lock (frame, tmp_layout, _gf_false,
+ dht_fix_dir_xattr,
+ dht_should_fix_layout);
+
+ return ret;
}
@@ -1205,7 +1665,6 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
uuid_unparse(loc->gfid, gfid);
-
dht_layout_anomalies (this, loc, layout,
&local->selfheal.hole_cnt,
&local->selfheal.overlaps_cnt,
@@ -1310,7 +1769,14 @@ dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame,
goto sorry_no_fix;
}
- dht_selfheal_dir_xattr_for_nameless_lookup (frame, &local->loc, layout);
+ ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
+ dht_selfheal_dir_xattr_for_nameless_lookup,
+ dht_should_heal_layout);
+
+ if (ret < 0) {
+ goto sorry_no_fix;
+ }
+
return 0;
sorry_no_fix: