summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorkarthik-us <ksubrahm@redhat.com>2018-05-30 15:27:52 +0530
committerRavishankar N <ravishankar@redhat.com>2018-09-20 09:18:20 +0000
commit5784a00f997212d34bd52b2303e20c097240d91c (patch)
tree2dc25588df397d0e029066a7510977dfa48c3481 /xlators/cluster
parent4f6ae853ffa9d06446407f389aaef61ac0b3b424 (diff)
cluster/afr: Use 2 domain locking in SHD for thin-arbiter
With this change when SHD starts the index crawl it requests all the clients to release the AFR_TA_DOM_NOTIFY lock so that clients will know the in memory state is no more valid and any new operations needs to query the thin-arbiter if required. When SHD completes healing all the files without any failure, it will again take the AFR_TA_DOM_NOTIFY lock and gets the xattrs on TA to see whether there are any new failures happened by that time. If there are new failures marked on TA, SHD will start the crawl immediately to heal those failures as well. If there are no new failures, then SHD will take the AFR_TA_DOM_MODIFY lock and unsets the xattrs on TA, so that both the data bricks will be considered as good there after. Change-Id: I037b89a0823648f314580ba0716d877bd5ddb1f1 fixes: bz#1579788 Signed-off-by: karthik-us <ksubrahm@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-common.c7
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c245
-rw-r--r--xlators/cluster/afr/src/afr.c1
3 files changed, 162 insertions, 91 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index eb0e733..73f1d72 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -6717,7 +6717,8 @@ afr_ta_post_op_lock(xlator_t *this, loc_t *loc)
};
int32_t cmd = 0;
- GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
flock1.l_type = F_WRLCK;
while (!locked) {
@@ -6725,7 +6726,6 @@ afr_ta_post_op_lock(xlator_t *this, loc_t *loc)
cmd = F_SETLKW;
flock1.l_start = 0;
flock1.l_len = 0;
-
} else {
cmd = F_SETLK;
if (priv->ta_notify_dom_lock_offset) {
@@ -6780,7 +6780,8 @@ afr_ta_post_op_unlock(xlator_t *this, loc_t *loc)
};
int ret = 0;
- GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
flock.l_type = F_UNLCK;
flock.l_start = 0;
flock.l_len = 0;
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 0cf01a0..53d7ef8 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -546,14 +546,128 @@ afr_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
GF_CLIENT_PID_SELF_HEALD, healer, afr_shd_full_heal);
}
-void
-afr_shd_ta_set_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
+int
+afr_shd_fill_ta_loc(xlator_t *this, loc_t *loc)
{
afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- struct gf_flock flock = {
+ struct iatt stbuf = {
0,
};
+ int ret = -1;
+
+ priv = this->private;
+ loc->parent = inode_ref(this->itable->root);
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ loc->inode = inode_new(loc->parent->table);
+ GF_CHECK_ALLOC(loc->inode, ret, out);
+
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto assign_gfid;
+
+ ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], loc, &stbuf,
+ 0, 0, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed lookup on file %s.", loc->name);
+ goto out;
+ }
+
+ gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
+
+assign_gfid:
+ gf_uuid_copy(loc->gfid, priv->ta_gfid);
+ ret = 0;
+
+out:
+ if (ret)
+ loc_wipe(loc);
+
+ return ret;
+}
+
+int
+_afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int *raw = NULL;
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_GET_FAILED,
+ "Failed to create dict.");
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t);
+ if (!raw)
+ goto out;
+
+ ret = dict_set_bin(xattr, priv->pending_key[i], raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ GF_FREE(raw);
+ goto out;
+ }
+ }
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, xdata, NULL);
+ if (ret || !(*xdata)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Xattrop failed on %s.", loc->name);
+ }
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
+}
+
+void
+afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, struct subvol_healer *healer,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ loc_wipe(loc);
+ if (afr_shd_fill_ta_loc(this, loc)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc->name);
+ goto out;
+ }
+
+ ret = afr_ta_post_op_lock(this, loc);
+ if (ret)
+ goto out;
+
+ ret = _afr_shd_ta_get_xattrs(this, loc, xdata);
+ if (ret) {
+ if (*xdata) {
+ dict_unref(*xdata);
+ *xdata = NULL;
+ }
+ }
+
+ afr_ta_post_op_unlock(this, loc);
+
+out:
+ if (ret)
+ healer->rerun = 1;
+}
+
+int
+afr_shd_ta_unset_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
gf_boolean_t need_xattrop = _gf_false;
void *pending_raw = NULL;
int *raw = NULL;
@@ -563,7 +677,7 @@ afr_shd_ta_set_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
int i = 0;
int j = 0;
int val = 0;
- int ret = 0;
+ int ret = -1;
priv = this->private;
@@ -598,6 +712,7 @@ afr_shd_ta_set_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
"not the good shd. Skipping. "
"SHD = %d.",
healer);
+ ret = 0;
GF_FREE(raw);
goto out;
}
@@ -607,113 +722,69 @@ afr_shd_ta_set_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
}
ret = dict_set_bin(xattr, priv->pending_key[i], raw,
- AFR_NUM_CHANGE_LOGS * sizeof(int));
+ AFR_NUM_CHANGE_LOGS * sizeof (int));
if (ret) {
- GF_FREE(raw);
+ GF_FREE (raw);
goto out;
}
- memset(pending, 0, sizeof(pending));
+ if (need_xattrop)
+ break;
}
if (!need_xattrop) {
+ ret = 0;
goto out;
}
- flock.l_type = F_WRLCK;
- flock.l_start = 0;
- flock.l_len = 0;
-
- ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
- AFR_TA_DOM_NOTIFY, loc, F_SETLKW, &flock, NULL, NULL);
- if (ret)
- goto out;
-
ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Xattrop failed.");
- flock.l_type = F_UNLCK;
- syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY,
- loc, F_SETLKW, &flock, NULL, NULL);
-
out:
if (xattr)
dict_unref(xattr);
- return;
+
+ return ret;
}
void
-afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
+afr_shd_ta_check_and_unset_xattrs(xlator_t *this, loc_t *loc,
+ struct subvol_healer *healer,
+ dict_t *pre_crawl_xdata)
{
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- struct iatt stbuf = {
- 0,
- };
- int *raw = NULL;
+ int ret_lock = 0;
int ret = 0;
- int i = 0;
+ dict_t *post_crawl_xdata = NULL;
- priv = this->private;
-
- loc->parent = inode_ref(this->itable->root);
- gf_uuid_copy(loc->pargfid, loc->parent->gfid);
- loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
- loc->inode = inode_new(loc->parent->table);
- if (!loc->inode) {
- goto out;
- }
+ ret_lock = afr_ta_post_op_lock(this, loc);
+ if (ret_lock)
+ goto unref;
- ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], loc, &stbuf,
- 0, 0, 0);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Failed lookup on file %s.", loc->name);
- goto out;
- }
-
- gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
- gf_uuid_copy(loc->gfid, priv->ta_gfid);
+ ret = _afr_shd_ta_get_xattrs(this, loc, &post_crawl_xdata);
+ if (ret)
+ goto unref;
- xattr = dict_new();
- if (!xattr) {
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_GET_FAILED,
- "Failed to create dict.");
- goto out;
+ if (!are_dicts_equal(pre_crawl_xdata, post_crawl_xdata, NULL, NULL)) {
+ ret = -1;
+ goto unref;
}
- for (i = 0; i < priv->child_count; i++) {
- raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t);
- if (!raw) {
- goto out;
- }
+ ret = afr_shd_ta_unset_xattrs(this, loc, &post_crawl_xdata, healer->subvol);
- ret = dict_set_bin(xattr, priv->pending_key[i], raw,
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret) {
- GF_FREE(raw);
- goto out;
- }
+unref:
+ if (post_crawl_xdata) {
+ dict_unref(post_crawl_xdata);
+ post_crawl_xdata = NULL;
}
- ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
- GF_XATTROP_ADD_ARRAY, xattr, NULL, xdata, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Xattrop failed.");
- goto out;
- }
- if (!(*xdata))
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_GET_FAILED,
- "Xdata response is empty.");
+ if (ret || ret_lock)
+ healer->rerun = 1;
-out:
- if (xattr)
- dict_unref(xattr);
- return;
+ if (!ret_lock)
+ afr_ta_post_op_unlock(this, loc);
}
void *
@@ -723,7 +794,7 @@ afr_shd_index_healer(void *data)
xlator_t *this = NULL;
int ret = 0;
afr_private_t *priv = NULL;
- dict_t *xdata = NULL;
+ dict_t *pre_crawl_xdata = NULL;
loc_t loc = {
0,
};
@@ -739,8 +810,7 @@ afr_shd_index_healer(void *data)
priv->local[healer->subvol] = healer->local;
if (priv->thin_arbiter_count) {
- loc_wipe(&loc);
- afr_shd_ta_get_xattrs(this, &loc, &xdata);
+ afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
}
do {
@@ -770,15 +840,14 @@ afr_shd_index_healer(void *data)
sleep(1);
} while (ret > 0);
- if (xdata && !healer->crawl_event.heal_failed_count) {
- afr_shd_ta_set_xattrs(this, &loc, &xdata, healer->subvol);
- dict_unref(xdata);
- xdata = NULL;
+ if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) {
+ afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
+ pre_crawl_xdata);
+ dict_unref(pre_crawl_xdata);
+ pre_crawl_xdata = NULL;
}
}
- loc_wipe(&loc);
-
return NULL;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 568293c..26950fd 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -384,6 +384,7 @@ init(xlator_t *this)
priv->child_count--;
priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
priv->ta_notify_dom_lock_offset = 0;
+ *priv->ta_gfid = 0;
}
INIT_LIST_HEAD(&priv->healing);
INIT_LIST_HEAD(&priv->heal_waiting);