summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-heald.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-heald.c')
-rw-r--r--xlators/cluster/ec/src/ec-heald.c598
1 files changed, 598 insertions, 0 deletions
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
new file mode 100644
index 0000000..6b89941
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -0,0 +1,598 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+#include "compat-errno.h"
+#include "ec.h"
+#include "ec-heald.h"
+#include "ec-mem-types.h"
+#include "syncop.h"
+#include "syncop-utils.h"
+#include "protocol-common.h"
+
+#define SHD_INODE_LRU_LIMIT 2048
+#define ASSERT_LOCAL(this, healer) \
+ do { \
+ if (!ec_shd_is_subvol_local (this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break (healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ } \
+ } while (0);
+
+
+#define NTH_INDEX_HEALER(this, n) (&((((ec_t *)this->private))->shd.index_healers[n]))
+#define NTH_FULL_HEALER(this, n) (&((((ec_t *)this->private))->shd.full_healers[n]))
+
+gf_boolean_t
+ec_shd_is_subvol_local (xlator_t *this, int subvol)
+{
+ ec_t *ec = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {0, };
+
+ ec = this->private;
+ loc.inode = this->itable->root;
+ syncop_is_subvol_local (ec->xl_list[subvol], &loc, &is_local);
+ return is_local;
+}
+
+char *
+ec_subvol_name (xlator_t *this, int subvol)
+{
+ ec_t *ec = NULL;
+
+ ec = this->private;
+ if (subvol < 0 || subvol > ec->nodes)
+ return NULL;
+
+ return ec->xl_list[subvol]->name;
+}
+
+int
+__ec_shd_healer_wait (struct subvol_healer *healer)
+{
+ ec_t *ec = NULL;
+ struct timespec wait_till = {0, };
+ int ret = 0;
+
+ ec = healer->this->private;
+
+disabled_loop:
+ wait_till.tv_sec = time (NULL) + 60;
+
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait (&healer->cond,
+ &healer->mutex,
+ &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!ec->shd.enabled || !ec->up)
+ goto disabled_loop;
+
+ return ret;
+}
+
+
+int
+ec_shd_healer_wait (struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ ret = __ec_shd_healer_wait (healer);
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+gf_boolean_t
+safe_break (struct subvol_healer *healer)
+{
+ gf_boolean_t ret = _gf_false;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
+
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+inode_t *
+ec_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)
+{
+ inode_t *inode = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+
+ inode = inode_find (this->itable, gfid);
+ if (inode) {
+ inode_lookup (inode);
+ goto out;
+ }
+
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode)
+ goto out;
+ uuid_copy (loc.gfid, gfid);
+
+ ret = syncop_lookup (subvol, &loc, NULL, &iatt, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ inode = inode_link (loc.inode, NULL, NULL, &iatt);
+ if (inode)
+ inode_lookup (inode);
+out:
+ loc_wipe (&loc);
+ return inode;
+}
+
+
+inode_t*
+ec_shd_index_inode (xlator_t *this, xlator_t *subvol)
+{
+ loc_t rootloc = {0, };
+ inode_t *inode = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+
+ rootloc.inode = inode_ref (this->itable->root);
+ uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s",
+ subvol->name, uuid_utoa (index_gfid));
+
+ inode = ec_shd_inode_find (this, subvol, index_gfid);
+
+out:
+ loc_wipe (&rootloc);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return inode;
+}
+
+int
+ec_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
+{
+ loc_t loc = {0, };
+ int ret = 0;
+
+ loc.parent = inode_ref (inode);
+ loc.name = name;
+
+ ret = syncop_unlink (subvol, &loc);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+ec_shd_selfheal (struct subvol_healer *healer, int child, loc_t *loc)
+{
+ return syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL);
+}
+
+
+int
+ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = healer->this->private;
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ gf_log (healer->this->name, GF_LOG_DEBUG, "got entry: %s",
+ entry->d_name);
+
+ ret = uuid_parse (entry->d_name, loc.gfid);
+ if (ret)
+ return 0;
+
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path (healer->this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret == -ENOENT || ret == -ESTALE) {
+ ec_shd_index_purge (subvol, parent->inode, entry->d_name);
+ goto out;
+ }
+
+ loc.inode = ec_shd_inode_find (healer->this, healer->this, loc.gfid);
+ if (!loc.inode)
+ goto out;
+
+ ec_shd_selfheal (healer, healer->subvol, &loc);
+
+out:
+ loc_wipe (&loc);
+
+ return 0;
+}
+
+int
+ec_shd_index_sweep (struct subvol_healer *healer)
+{
+ loc_t loc = {0};
+ ec_t *ec = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+
+ ec = healer->this->private;
+ subvol = ec->xl_list[healer->subvol];
+
+ loc.inode = ec_shd_index_inode (healer->this, subvol);
+ if (!loc.inode) {
+ gf_log (healer->this->name, GF_LOG_WARNING,
+ "unable to get index-dir on %s", subvol->name);
+ return -errno;
+ }
+
+ ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_AFR_SELF_HEALD,
+ healer, ec_shd_index_heal);
+
+ inode_forget (loc.inode, 1);
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+int
+ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = this->private;
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ loc.parent = inode_ref (parent->inode);
+ loc.name = entry->d_name;
+ uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
+
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path (this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret < 0)
+ goto out;
+
+ loc.inode = ec_shd_inode_find (this, this, loc.gfid);
+ if (!loc.inode) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ec_shd_selfheal (healer, healer->subvol, &loc);
+
+ loc_wipe (&loc);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+ec_shd_full_sweep (struct subvol_healer *healer, inode_t *inode)
+{
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+
+ ec = healer->this->private;
+ loc.inode = inode;
+ return syncop_ftw (ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_AFR_SELF_HEALD, healer,
+ ec_shd_full_heal);
+}
+
+
+void *
+ec_shd_index_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ ec_shd_healer_wait (healer);
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "starting index sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+
+ ec_shd_index_sweep (healer);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finished index sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+
+void *
+ec_shd_full_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ pthread_mutex_lock (&healer->mutex);
+ {
+ run = __ec_shd_healer_wait (healer);
+ if (!run)
+ healer->running = _gf_false;
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ if (!run)
+ break;
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "starting full sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+
+ ec_shd_full_sweep (healer, this->itable->root);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "finished full sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+
+int
+ec_shd_healer_init (xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ ret = pthread_mutex_init (&healer->mutex, NULL);
+ if (ret)
+ goto out;
+
+ ret = pthread_cond_init (&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
+out:
+ return ret;
+}
+
+
+int
+ec_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal (&healer->cond);
+ } else {
+ ret = gf_thread_create (&healer->thread, NULL,
+ threadfn, healer);
+ if (ret)
+ goto unlock;
+ healer->running = 1;
+ }
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+int
+ec_shd_full_healer_spawn (xlator_t *this, int subvol)
+{
+ return ec_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol),
+ ec_shd_full_healer);
+}
+
+
+int
+ec_shd_index_healer_spawn (xlator_t *this, int subvol)
+{
+ return ec_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol),
+ ec_shd_index_healer);
+}
+
+void
+ec_selfheal_childup (ec_t *ec, int child)
+{
+ if (!ec->shd.iamshd)
+ return;
+ ec_shd_index_healer_spawn (ec->xl, child);
+}
+
+int
+ec_selfheal_daemon_init (xlator_t *this)
+{
+ ec_t *ec = NULL;
+ ec_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ ec = this->private;
+ shd = &ec->shd;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+
+ shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers),
+ ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = ec_shd_healer_init (this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers),
+ ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = ec_shd_healer_init (this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+ec_heal_op (xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id)
+{
+ char key[64] = {0};
+ int op_ret = 0;
+ ec_t *ec = NULL;
+ int i = 0;
+ GF_UNUSED int ret = 0;
+
+ ec = this->private;
+
+ for (i = 0; i < ec->nodes; i++) {
+ snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
+
+ op_ret = -1;
+ if (((ec->xl_up >> i) & 1) == 0) {
+ ret = dict_set_str (output, key, "Brick is not connected");
+ } else if (!ec->up) {
+ ret = dict_set_str (output, key,
+ "Disperse subvolume is not up");
+ } else if (!ec_shd_is_subvol_local (this, i)) {
+ ret = dict_set_str (output, key, "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key, "Started self-heal");
+ if (op == GF_SHD_OP_HEAL_FULL) {
+ ec_shd_full_healer_spawn (this, i);
+ } else if (op == GF_SHD_OP_HEAL_INDEX) {
+ ec_shd_index_healer_spawn (this, i);
+ }
+ op_ret = 0;
+ }
+ }
+ return op_ret;
+}
+
+int
+ec_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t *)&op);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+
+ switch (op) {
+ case GF_SHD_OP_HEAL_FULL:
+ ret = ec_heal_op (this, output, op, xl_id);
+ break;
+
+ case GF_SHD_OP_HEAL_INDEX:
+ ret = ec_heal_op (this, output, op, xl_id);
+ break;
+
+ default:
+ ret = -1;
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}