summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-heald.c
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2015-02-27 16:01:31 +0530
committerVijay Bellur <vbellur@redhat.com>2015-03-09 15:36:31 -0700
commita70231c78aaea436575d427a1386a64d1471b776 (patch)
tree458c971ecd520163e2406b91f8d26ac26e86cf2c /xlators/cluster/ec/src/ec-heald.c
parent72dc1025dc17a650f3838223c78e3205132deba9 (diff)
cluster/ec: Add self-heal-daemon command handlers
This patch introduces the changes required in ec xlator to handle index/full heal. Index healer threads: Ec xlator start an index healer thread per local brick. This thread keeps waking up every minute to check if there are any files to be healed based on the indices kept in index directory. Whenever child_up event comes, then also this index healer thread wakes up and crawls the indices and triggers heal. When self-heal-daemon is disabled on this particular volume then the healer thread keeps waiting until it is enabled again to perform heals. Full healer threads: Ec xlator starts a full healer thread for the local subvolume provided by glusterd to perform full crawl on the directory hierarchy to perform heals. Once the crawl completes the thread exits if no more full heals are issued. Changed xl-op prefix GF_AFR_OP to GF_SHD_OP to make it more generic. Change-Id: Idf9b2735d779a6253717be064173dfde6f8f824b BUG: 1177601 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/9787 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Ravishankar N <ravishankar@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-heald.c')
-rw-r--r--xlators/cluster/ec/src/ec-heald.c598
1 files changed, 598 insertions, 0 deletions
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
new file mode 100644
index 00000000000..6b899414d4d
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -0,0 +1,598 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+#include "compat-errno.h"
+#include "ec.h"
+#include "ec-heald.h"
+#include "ec-mem-types.h"
+#include "syncop.h"
+#include "syncop-utils.h"
+#include "protocol-common.h"
+
+#define SHD_INODE_LRU_LIMIT 2048
+#define ASSERT_LOCAL(this, healer) \
+ do { \
+ if (!ec_shd_is_subvol_local (this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break (healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ } \
+ } while (0);
+
+
+#define NTH_INDEX_HEALER(this, n) (&((((ec_t *)this->private))->shd.index_healers[n]))
+#define NTH_FULL_HEALER(this, n) (&((((ec_t *)this->private))->shd.full_healers[n]))
+
+gf_boolean_t
+ec_shd_is_subvol_local (xlator_t *this, int subvol)
+{
+ ec_t *ec = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {0, };
+
+ ec = this->private;
+ loc.inode = this->itable->root;
+ syncop_is_subvol_local (ec->xl_list[subvol], &loc, &is_local);
+ return is_local;
+}
+
+char *
+ec_subvol_name (xlator_t *this, int subvol)
+{
+ ec_t *ec = NULL;
+
+ ec = this->private;
+ if (subvol < 0 || subvol > ec->nodes)
+ return NULL;
+
+ return ec->xl_list[subvol]->name;
+}
+
+int
+__ec_shd_healer_wait (struct subvol_healer *healer)
+{
+ ec_t *ec = NULL;
+ struct timespec wait_till = {0, };
+ int ret = 0;
+
+ ec = healer->this->private;
+
+disabled_loop:
+ wait_till.tv_sec = time (NULL) + 60;
+
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait (&healer->cond,
+ &healer->mutex,
+ &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!ec->shd.enabled || !ec->up)
+ goto disabled_loop;
+
+ return ret;
+}
+
+
+int
+ec_shd_healer_wait (struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ ret = __ec_shd_healer_wait (healer);
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+gf_boolean_t
+safe_break (struct subvol_healer *healer)
+{
+ gf_boolean_t ret = _gf_false;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
+
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+inode_t *
+ec_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)
+{
+ inode_t *inode = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+
+ inode = inode_find (this->itable, gfid);
+ if (inode) {
+ inode_lookup (inode);
+ goto out;
+ }
+
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode)
+ goto out;
+ uuid_copy (loc.gfid, gfid);
+
+ ret = syncop_lookup (subvol, &loc, NULL, &iatt, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ inode = inode_link (loc.inode, NULL, NULL, &iatt);
+ if (inode)
+ inode_lookup (inode);
+out:
+ loc_wipe (&loc);
+ return inode;
+}
+
+
+inode_t*
+ec_shd_index_inode (xlator_t *this, xlator_t *subvol)
+{
+ loc_t rootloc = {0, };
+ inode_t *inode = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+
+ rootloc.inode = inode_ref (this->itable->root);
+ uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s",
+ subvol->name, uuid_utoa (index_gfid));
+
+ inode = ec_shd_inode_find (this, subvol, index_gfid);
+
+out:
+ loc_wipe (&rootloc);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return inode;
+}
+
+int
+ec_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
+{
+ loc_t loc = {0, };
+ int ret = 0;
+
+ loc.parent = inode_ref (inode);
+ loc.name = name;
+
+ ret = syncop_unlink (subvol, &loc);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+ec_shd_selfheal (struct subvol_healer *healer, int child, loc_t *loc)
+{
+ return syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL);
+}
+
+
+int
+ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = healer->this->private;
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ gf_log (healer->this->name, GF_LOG_DEBUG, "got entry: %s",
+ entry->d_name);
+
+ ret = uuid_parse (entry->d_name, loc.gfid);
+ if (ret)
+ return 0;
+
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path (healer->this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret == -ENOENT || ret == -ESTALE) {
+ ec_shd_index_purge (subvol, parent->inode, entry->d_name);
+ goto out;
+ }
+
+ loc.inode = ec_shd_inode_find (healer->this, healer->this, loc.gfid);
+ if (!loc.inode)
+ goto out;
+
+ ec_shd_selfheal (healer, healer->subvol, &loc);
+
+out:
+ loc_wipe (&loc);
+
+ return 0;
+}
+
+int
+ec_shd_index_sweep (struct subvol_healer *healer)
+{
+ loc_t loc = {0};
+ ec_t *ec = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+
+ ec = healer->this->private;
+ subvol = ec->xl_list[healer->subvol];
+
+ loc.inode = ec_shd_index_inode (healer->this, subvol);
+ if (!loc.inode) {
+ gf_log (healer->this->name, GF_LOG_WARNING,
+ "unable to get index-dir on %s", subvol->name);
+ return -errno;
+ }
+
+ ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_AFR_SELF_HEALD,
+ healer, ec_shd_index_heal);
+
+ inode_forget (loc.inode, 1);
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+int
+ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = this->private;
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ loc.parent = inode_ref (parent->inode);
+ loc.name = entry->d_name;
+ uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
+
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path (this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret < 0)
+ goto out;
+
+ loc.inode = ec_shd_inode_find (this, this, loc.gfid);
+ if (!loc.inode) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ec_shd_selfheal (healer, healer->subvol, &loc);
+
+ loc_wipe (&loc);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+ec_shd_full_sweep (struct subvol_healer *healer, inode_t *inode)
+{
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+
+ ec = healer->this->private;
+ loc.inode = inode;
+ return syncop_ftw (ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_AFR_SELF_HEALD, healer,
+ ec_shd_full_heal);
+}
+
+
+void *
+ec_shd_index_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ ec_shd_healer_wait (healer);
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "starting index sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+
+ ec_shd_index_sweep (healer);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finished index sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+
+void *
+ec_shd_full_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ pthread_mutex_lock (&healer->mutex);
+ {
+ run = __ec_shd_healer_wait (healer);
+ if (!run)
+ healer->running = _gf_false;
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ if (!run)
+ break;
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "starting full sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+
+ ec_shd_full_sweep (healer, this->itable->root);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "finished full sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+
+int
+ec_shd_healer_init (xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ ret = pthread_mutex_init (&healer->mutex, NULL);
+ if (ret)
+ goto out;
+
+ ret = pthread_cond_init (&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
+out:
+ return ret;
+}
+
+
+int
+ec_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal (&healer->cond);
+ } else {
+ ret = gf_thread_create (&healer->thread, NULL,
+ threadfn, healer);
+ if (ret)
+ goto unlock;
+ healer->running = 1;
+ }
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+int
+ec_shd_full_healer_spawn (xlator_t *this, int subvol)
+{
+ return ec_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol),
+ ec_shd_full_healer);
+}
+
+
+int
+ec_shd_index_healer_spawn (xlator_t *this, int subvol)
+{
+ return ec_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol),
+ ec_shd_index_healer);
+}
+
+void
+ec_selfheal_childup (ec_t *ec, int child)
+{
+ if (!ec->shd.iamshd)
+ return;
+ ec_shd_index_healer_spawn (ec->xl, child);
+}
+
+int
+ec_selfheal_daemon_init (xlator_t *this)
+{
+ ec_t *ec = NULL;
+ ec_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ ec = this->private;
+ shd = &ec->shd;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+
+ shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers),
+ ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = ec_shd_healer_init (this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers),
+ ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = ec_shd_healer_init (this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+ec_heal_op (xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id)
+{
+ char key[64] = {0};
+ int op_ret = 0;
+ ec_t *ec = NULL;
+ int i = 0;
+ GF_UNUSED int ret = 0;
+
+ ec = this->private;
+
+ for (i = 0; i < ec->nodes; i++) {
+ snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
+
+ op_ret = -1;
+ if (((ec->xl_up >> i) & 1) == 0) {
+ ret = dict_set_str (output, key, "Brick is not connected");
+ } else if (!ec->up) {
+ ret = dict_set_str (output, key,
+ "Disperse subvolume is not up");
+ } else if (!ec_shd_is_subvol_local (this, i)) {
+ ret = dict_set_str (output, key, "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key, "Started self-heal");
+ if (op == GF_SHD_OP_HEAL_FULL) {
+ ec_shd_full_healer_spawn (this, i);
+ } else if (op == GF_SHD_OP_HEAL_INDEX) {
+ ec_shd_index_healer_spawn (this, i);
+ }
+ op_ret = 0;
+ }
+ }
+ return op_ret;
+}
+
+int
+ec_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t *)&op);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+
+ switch (op) {
+ case GF_SHD_OP_HEAL_FULL:
+ ret = ec_heal_op (this, output, op, xl_id);
+ break;
+
+ case GF_SHD_OP_HEAL_INDEX:
+ ret = ec_heal_op (this, output, op, xl_id);
+ break;
+
+ default:
+ ret = -1;
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}