summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/Makefile.am2
-rw-r--r--xlators/cluster/afr/src/afr-common.c32
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h10
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c776
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h24
-rw-r--r--xlators/cluster/afr/src/afr.c79
-rw-r--r--xlators/cluster/afr/src/afr.h23
-rw-r--r--xlators/cluster/afr/src/pump.c2
8 files changed, 690 insertions, 258 deletions
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 16ed25af10b..ed090181316 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -15,7 +15,7 @@ noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-
AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+ -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src
CLEANFILES =
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 1895150cd1d..9a78f6d3d4d 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3399,7 +3399,7 @@ find_child_index (xlator_t *this, xlator_t *child)
int32_t
afr_notify (xlator_t *this, int32_t event,
- void *data, ...)
+ void *data, void *data2)
{
afr_private_t *priv = NULL;
int i = -1;
@@ -3412,6 +3412,8 @@ afr_notify (xlator_t *this, int32_t event,
int ret = -1;
int call_psh = 0;
int up_child = AFR_ALL_CHILDREN;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
priv = this->private;
@@ -3499,10 +3501,11 @@ afr_notify (xlator_t *this, int32_t event,
break;
- case GF_EVENT_TRIGGER_HEAL:
- gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered"
- " manually. Start crawling");
- call_psh = 1;
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ goto out;
break;
default:
@@ -3552,7 +3555,7 @@ afr_notify (xlator_t *this, int32_t event,
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
- if (call_psh) {
+ if (call_psh && priv->shd.enabled) {
gf_log (this->name, GF_LOG_DEBUG, "start crawl: %d", up_child);
afr_do_poll_self_heal ((void*) (long) up_child);
}
@@ -3925,6 +3928,23 @@ afr_priv_destroy (afr_private_t *priv)
goto out;
inode_unref (priv->root_inode);
GF_FREE (priv->shd.pos);
+ GF_FREE (priv->shd.pending);
+ GF_FREE (priv->shd.inprogress);
+ GF_FREE (priv->shd.sh_times);
+// for (i = 0; i < priv->child_count; i++)
+// if (priv->shd.timer && priv->shd.timer[i])
+// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
+ GF_FREE (priv->shd.timer);
+
+ if (priv->shd.healed)
+ eh_destroy (priv->shd.healed);
+
+ if (priv->shd.heal_failed)
+ eh_destroy (priv->shd.heal_failed);
+
+ if (priv->shd.split_brain)
+ eh_destroy (priv->shd.split_brain);
+
GF_FREE (priv->last_event);
if (priv->pending_key) {
for (i = 0; i < priv->child_count; i++)
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 22813940867..a138c967676 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -44,10 +44,12 @@ enum gf_afr_mem_types_ {
gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
gf_afr_fd_paused_call_t,
- gf_afr_mt_afr_crawl_data_t,
- gf_afr_mt_afr_brick_pos_t,
- gf_afr_mt_afr_shd_bool_t,
- gf_afr_mt_afr_shd_timer_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 1f071b87150..fa7e61e49e8 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -25,11 +25,453 @@
#include "syncop.h"
#include "afr-self-heald.h"
#include "afr-self-heal-common.h"
+#include "protocol-common.h"
+#include "event-history.h"
#define AFR_POLL_TIMEOUT 600
+typedef enum {
+ STOP_CRAWL_ON_SINGLE_SUBVOL = 1
+} afr_crawl_flags_t;
+
+typedef struct shd_dump {
+ dict_t *dict;
+ time_t sh_time;
+ xlator_t *this;
+ int child;
+} shd_dump_t;
+
+typedef struct shd_event_ {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef int
+(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done);
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+
+void
+shd_cleanup_event (void *event)
+{
+ shd_event_t *shd_event = event;
+
+ if (!shd_event)
+ goto out;
+ if (shd_event->path)
+ GF_FREE (shd_event->path);
+ GF_FREE (shd_event);
+out:
+ return;
+}
+
+int
+afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+{
+ int i = 0;
+ int ret = -1;
+ for (i = 0; i < child_count; i++) {
+ if (shd->pos[i] == AFR_POS_LOCAL) {
+ ret = i;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int
+_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+{
+ int ret = 0;
+
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+ loc->path = "";
+ loc->name = name;
+ loc->parent = inode_ref (parent->inode);
+ if (!loc->parent) {
+ loc->path = NULL;
+ loc_wipe (loc);
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+_add_str_to_dict (xlator_t *this, dict_t *output, int child, char *str,
+ gf_boolean_t dyn)
+{
+ //subkey not used for now
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ if (dyn)
+ ret = dict_set_dynstr (output, key, str);
+ else
+ ret = dict_set_str (output, key, str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ str);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
+ char **fpath)
+{
+ dict_t *xattr = NULL;
+ char *path = NULL;
+ int ret = -1;
+
+ ret = syncop_getxattr (readdir_xl, child, &xattr,
+ GFID_TO_PATH_KEY);
+ if (ret)
+ goto out;
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to get path for "
+ "gfid %s", uuid_utoa (child->gfid));
+ goto out;
+ }
+ path = gf_strdup (path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *fpath = path;
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+int
+_add_event_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ if (shd_event->child != dump_data->child)
+ goto out;
+ if (cb->tv.tv_sec >= dump_data->sh_time)
+ ret = _add_str_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event->path,
+ _gf_false);
+out:
+ return ret;
+}
+
+int
+_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, time_t sh_time,
+ int child)
+{
+ shd_dump_t dump_data = {0};
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.sh_time = sh_time;
+ dump_data.child = child;
+ eh_dump (eh, &dump_data, _add_event_to_dict);
+ return 0;
+}
+
+int
+_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry,
+ loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+{
+ dict_t *output = NULL;
+ xlator_t *readdir_xl = NULL;
+ int ret = -1;
+ char *path = NULL;
+
+ if (uuid_is_null (childloc->gfid))
+ goto out;
+
+ output = crawl_data->op_data;
+ readdir_xl = crawl_data->readdir_xl;
+
+ ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path);
+ if (ret)
+ goto out;
+
+ ret = _add_str_to_dict (this, output, crawl_data->child, path,
+ _gf_true);
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return ret;
+}
+
+void
+_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
+ loc_t *parent, char *fname)
+{
+ int ret = 0;
+ loc_t index_loc = {0};
+
+ ret = _build_index_loc (this, &index_loc, fname, parent);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_INFO, "Removing stale index "
+ "for %s on %s", index_loc.name, readdir_xl->name);
+ ret = syncop_unlink (readdir_xl, &index_loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove"
+ " index on %s - %s", index_loc.name,
+ readdir_xl->name, strerror (errno));
+ }
+ index_loc.path = NULL;
+ loc_wipe (&index_loc);
+out:
+ return;
+}
+
+void
+_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t op_ret, int32_t op_errno,
+ afr_crawl_data_t *crawl_data)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ eh_t *eh = NULL;
+ char *path = NULL;
+ shd_event_t *event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (crawl_data->crawl == INDEX) {
+ if ((op_ret < 0) && (op_errno == ENOENT)) {
+ _remove_stale_index (this, crawl_data->readdir_xl,
+ parent, uuid_utoa (child->gfid));
+ goto out;
+ }
+ ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
+ child, &path);
+ if (ret)
+ goto out;
+ } else {
+ path = gf_strdup (child->path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (op_ret < 0 && op_errno == EIO)
+ eh = shd->split_brain;
+ else if (op_ret < 0)
+ eh = shd->heal_failed;
+ else
+ eh = shd->healed;
+ ret = -1;
+ event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
+ if (!event)
+ goto out;
+ event->child = crawl_data->child;
+ event->path = path;
+ ret = eh_save_history (eh, event);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save to "
+ "eh, (%d, %s)", path, op_ret, strerror (op_errno));
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return;
+}
+
+int
+_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
+ loc_t *child, loc_t *parent, struct iatt *iattr)
+{
+ struct iatt parentbuf = {0};
+ int ret = 0;
+
+ if (uuid_is_null (child->gfid))
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s",
+ uuid_utoa (child->gfid));
+
+ ret = syncop_lookup (this, child, NULL,
+ iattr, NULL, &parentbuf);
+ _crawl_post_sh_action (this, parent, child, ret, errno, crawl_data);
+ return ret;
+}
+
+static int
+afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
void
-afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl);
+_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ time (&shd->sh_times[child]);
+ afr_start_crawl (this, child, crawl, _self_heal_entry,
+ NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
+ afr_crawl_done);
+}
+
+void
+_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++)
+ _do_self_heal_on_subvol (this, i, INDEX);
+}
+
+void
+_do_self_heal_on_local_subvol (xlator_t *this, afr_crawl_type_t crawl)
+{
+ int local_child = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local_child = afr_get_local_child (&priv->shd,
+ priv->child_count);
+ if (local_child < -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "No local bricks found");
+ }
+ _do_self_heal_on_subvol (this, local_child, FULL);
+}
+
+int
+_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++)
+ afr_start_crawl (this, i, INDEX, _add_summary_to_dict,
+ output, _gf_false, 0, NULL);
+ return 0;
+}
+
+int
+_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_eh_to_dict (this, eh, dict, shd->sh_times[i], i);
+ }
+ return 0;
+}
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int xl_id = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ _do_self_heal_on_local_subvols (this, INDEX);
+ ret = 0;
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ _do_self_heal_on_local_subvol (this, FULL);
+ ret = 0;
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ ret = _get_index_summary_on_local_subvols (this, output);
+ if (ret)
+ goto out;
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ ret = _add_all_subvols_eh_to_dict (this, shd->healed, output);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed,
+ output);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ ret = _add_all_subvols_eh_to_dict (this, shd->split_brain,
+ output);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}
void
afr_do_poll_self_heal (void *data)
@@ -39,21 +481,14 @@ afr_do_poll_self_heal (void *data)
struct timeval timeout = {0};
xlator_t *this = NULL;
long child = (long)data;
- int i = 0;
this = THIS;
priv = this->private;
shd = &priv->shd;
- if (child == AFR_ALL_CHILDREN) { //done by command
- for (i = 0; i < priv->child_count; i++)
- afr_start_crawl (this, i, INDEX);
+ _do_self_heal_on_subvol (this, child, INDEX);
+ if (shd->pos[child] == AFR_POS_REMOTE)
goto out;
- } else {
- afr_start_crawl (this, child, INDEX);
- if (shd->pos[child] == AFR_POS_REMOTE)
- goto out;
- }
timeout.tv_sec = AFR_POLL_TIMEOUT;
timeout.tv_usec = 0;
if (shd->timer[child])
@@ -71,9 +506,6 @@ out:
}
static int
-_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,
- xlator_t *readdir_xl);
-static int
get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
{
char *start = NULL;
@@ -132,15 +564,16 @@ out:
int
afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
- loc_t *dirloc, xlator_t *readdir_xl)
+ loc_t *dirloc)
{
afr_private_t *priv = NULL;
dict_t *xattr = NULL;
void *index_gfid = NULL;
loc_t rootloc = {0};
- struct iatt iatt = {0};
+ struct iatt iattr = {0};
struct iatt parent = {0};
int ret = 0;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
priv = this->private;
if (crawl_data->crawl == FULL) {
@@ -167,13 +600,13 @@ afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
dirloc->path = "";
dirloc->inode = inode_new (priv->root_inode->table);
ret = syncop_lookup (readdir_xl, dirloc, NULL,
- &iatt, NULL, &parent);
+ &iattr, NULL, &parent);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "lookup failed on "
"index dir on %s", readdir_xl->name);
goto out;
}
- inode_link (dirloc->inode, NULL, NULL, &iatt);
+ inode_link (dirloc->inode, NULL, NULL, &iattr);
}
ret = 0;
out:
@@ -185,7 +618,7 @@ out:
int
afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
- loc_t *dirloc, xlator_t *readdir_xl)
+ loc_t *dirloc)
{
fd_t *fd = NULL;
int ret = 0;
@@ -199,7 +632,7 @@ afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
goto out;
}
- ret = syncop_opendir (readdir_xl, dirloc, fd);
+ ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"opendir failed on %s", dirloc->path);
@@ -247,7 +680,7 @@ afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
}
gf_boolean_t
-_crawl_proceed (xlator_t *this, int child)
+_crawl_proceed (xlator_t *this, int child, int crawl_flags)
{
afr_private_t *priv = this->private;
gf_boolean_t proceed = _gf_false;
@@ -258,77 +691,33 @@ _crawl_proceed (xlator_t *this, int child)
goto out;
}
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
- "< 2 children are up");
- goto out;
- }
- proceed = _gf_true;
-out:
- return proceed;
-}
-
-static int
-_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
-{
- int ret = 0;
-
- uuid_copy (loc->pargfid, parent->inode->gfid);
- loc->path = "";
- loc->name = name;
- loc->parent = inode_ref (parent->inode);
- if (!loc->parent) {
- loc->path = NULL;
- loc_wipe (loc);
- ret = -1;
- }
- return ret;
-}
-
-void
-_index_crawl_post_lookup_fop (xlator_t *this, loc_t *parentloc,
- gf_dirent_t *entry, int op_ret, int op_errno,
- xlator_t *readdir_xl)
-{
- loc_t index_loc = {0};
- int ret = 0;
-
- if (op_ret && (op_errno == ENOENT)) {
- ret = _build_index_loc (this, &index_loc, entry->d_name,
- parentloc);
- if (ret)
+ if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
+ "< 2 children are up");
goto out;
- gf_log (this->name, GF_LOG_INFO, "Removing stale index "
- "for %s on %s", index_loc.name, readdir_xl->name);
- ret = syncop_unlink (readdir_xl, &index_loc);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove"
- " index on %s - %s", index_loc.name,
- readdir_xl->name, strerror (errno));
}
- index_loc.path = NULL;
- loc_wipe (&index_loc);
}
+ proceed = _gf_true;
out:
- return;
+ return proceed;
}
static int
-_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
- off_t *offset, afr_crawl_data_t *crawl_data,
- xlator_t *readdir_xl)
+_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
+ off_t *offset, afr_crawl_data_t *crawl_data)
{
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
- struct iatt iatt = {0};
- struct iatt parent = {0};
int ret = 0;
loc_t entry_loc = {0};
fd_t *fd = NULL;
+ struct iatt iattr = {0};
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if (!_crawl_proceed (this, crawl_data->child)) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags)) {
ret = -1;
goto out;
}
@@ -344,62 +733,51 @@ _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
continue;
}
+ if (crawl_data->crawl == INDEX)
+ entry_loc.path = NULL;//HACK
loc_wipe (&entry_loc);
ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
entry, crawl_data);
if (ret)
goto out;
- if (uuid_is_null (entry_loc.gfid)) {
- gf_log (this->name, GF_LOG_WARNING, "failed to build "
- "location for %s", entry->d_name);
- continue;
- }
- if (entry_loc.path)
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s",
- entry_loc.path);
- else
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s",
- uuid_utoa (entry_loc.gfid));
-
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, NULL, &parent);
- if (crawl_data->crawl == INDEX) {
- _index_crawl_post_lookup_fop (this, parentloc, entry,
- ret, errno, readdir_xl);
- entry_loc.path = NULL;
- loc_wipe (&entry_loc);
+ ret = crawl_data->process_entry (this, crawl_data, entry,
+ &entry_loc, parentloc, &iattr);
+
+ if (crawl_data->crawl == INDEX)
continue;
- }
- //Don't fail the crawl if lookup fails as it
- //could be because of split-brain
- if (ret || (!IA_ISDIR (iatt.ia_type)))
+ if (ret || !IA_ISDIR (iattr.ia_type))
continue;
- inode_link (entry_loc.inode, parentloc->inode, NULL, &iatt);
- ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc,
- readdir_xl);
+
+ inode_link (entry_loc.inode, parentloc->inode, NULL, &iattr);
+
+ fd = NULL;
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);
if (ret)
continue;
- ret = _crawl_directory (fd, &entry_loc, crawl_data, readdir_xl);
- fd_unref (fd);
+ ret = _crawl_directory (fd, &entry_loc, crawl_data);
+ if (fd)
+ fd_unref (fd);
}
ret = 0;
out:
+ if (crawl_data->crawl == INDEX)
+ entry_loc.path = NULL;
if (entry_loc.path)
loc_wipe (&entry_loc);
return ret;
}
static int
-_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,
- xlator_t *readdir_xl)
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
{
xlator_t *this = NULL;
off_t offset = 0;
gf_dirent_t entries;
int ret = 0;
gf_boolean_t free_entries = _gf_false;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
INIT_LIST_HEAD (&entries.list);
this = THIS;
@@ -424,15 +802,16 @@ _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,
ret = 0;
free_entries = _gf_true;
- if (!_crawl_proceed (this, crawl_data->child)) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags)) {
ret = -1;
goto out;
}
if (list_empty (&entries.list))
goto out;
- ret = _perform_self_heal (this, loc, &entries, &offset,
- crawl_data, readdir_xl);
+ ret = _process_entries (this, loc, &entries, &offset,
+ crawl_data);
gf_dirent_free (&entries);
free_entries = _gf_false;
}
@@ -515,14 +894,6 @@ out:
return ret;
}
-static int
-afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
-{
- GF_FREE (data);
- STACK_DESTROY (sync_frame->root);
- return 0;
-}
-
static inline int
afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
{
@@ -530,17 +901,74 @@ afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
}
static int
-afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
+afr_dir_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ xlator_t *readdir_xl = NULL;
+ fd_t *fd = NULL;
+ loc_t dirloc = {0};
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags))
+ goto out;
+
+ ret = afr_find_child_position (this, crawl_data->child);
+ if (ret)
+ goto out;
+
+ if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
+ goto out;
+
+ readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
+ if (!readdir_xl)
+ goto out;
+ crawl_data->readdir_xl = readdir_xl;
+
+ ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc);
+ if (ret)
+ goto out;
+
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc);
+ if (ret)
+ goto out;
+
+ ret = _crawl_directory (fd, &dirloc, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
+ readdir_xl->name);
+ else
+ gf_log (this->name, GF_LOG_INFO, "Crawl completed "
+ "on %s", readdir_xl->name);
+ if (crawl_data->crawl == INDEX)
+ dirloc.path = NULL;
+out:
+ if (fd)
+ fd_unref (fd);
+ if (crawl_data->crawl == INDEX)
+ dirloc.path = NULL;
+ loc_wipe (&dirloc);
+ return ret;
+}
+
+static int
+afr_dir_exclusive_crawl (void *data)
{
afr_private_t *priv = NULL;
afr_self_heald_t *shd = NULL;
- loc_t dirloc = {0};
gf_boolean_t crawl = _gf_false;
int ret = 0;
- xlator_t *readdir_xl = NULL;
- fd_t *fd = NULL;
int child = -1;
+ xlator_t *this = NULL;
+ afr_crawl_data_t *crawl_data = data;
+ this = THIS;
priv = this->private;
shd = &priv->shd;
child = crawl_data->child;
@@ -548,7 +976,8 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
LOCK (&priv->lock);
{
if (shd->inprogress[child]) {
- shd->pending[child] = _gf_true;
+ if (shd->pending[child] != FULL)
+ shd->pending[child] = crawl_data->crawl;
} else {
shd->inprogress[child] = _gf_true;
crawl = _gf_true;
@@ -556,11 +985,6 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
}
UNLOCK (&priv->lock);
- if (!priv->root_inode) {
- ret = -1;
- goto out;
- }
-
if (!crawl) {
gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
"for %s", priv->children[child]->name);
@@ -568,87 +992,35 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
}
do {
- readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
- if (!readdir_xl)
- goto done;
- ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc,
- readdir_xl);
- if (ret)
- goto done;
- ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc,
- readdir_xl);
- if (ret)
- goto done;
- ret = _crawl_directory (fd, &dirloc, crawl_data, readdir_xl);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
- readdir_xl->name);
- else
- gf_log (this->name, GF_LOG_INFO, "Crawl completed "
- "on %s", readdir_xl->name);
- fd_unref (fd);
- fd = NULL;
-done:
+ afr_dir_crawl (data);
LOCK (&priv->lock);
{
- if (shd->pending[child]) {
- shd->pending[child] = _gf_false;
+ if (shd->pending[child] != NONE) {
+ crawl_data->crawl = shd->pending[child];
+ shd->pending[child] = NONE;
} else {
shd->inprogress[child] = _gf_false;
crawl = _gf_false;
}
}
UNLOCK (&priv->lock);
- if (crawl_data->crawl == INDEX) {
- dirloc.path = NULL;
- loc_wipe (&dirloc);
- }
} while (crawl);
out:
- if (fd)
- fd_unref (fd);
- if (crawl_data->crawl == INDEX) {
- dirloc.path = NULL;
- loc_wipe (&dirloc);
- }
- return ret;
-}
-
-static int
-afr_crawl (void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int ret = -1;
- afr_crawl_data_t *crawl_data = data;
-
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
-
- if (!_crawl_proceed (this, crawl_data->child))
- goto out;
- ret = afr_find_child_position (this, crawl_data->child);
- if (ret)
- goto out;
-
- if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
- goto out;
-
- ret = afr_crawl_directory (this, crawl_data);
-out:
return ret;
}
void
-afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl)
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done)
{
afr_private_t *priv = NULL;
afr_self_heald_t *shd = NULL;
call_frame_t *frame = NULL;
afr_crawl_data_t *crawl_data = NULL;
int ret = 0;
+ int (*crawler) (void*) = NULL;
priv = this->private;
shd = &priv->shd;
@@ -662,16 +1034,24 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl)
afr_set_lk_owner (frame, this);
afr_set_low_priority (frame);
crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
- gf_afr_mt_afr_crawl_data_t);
+ gf_afr_mt_crawl_data_t);
if (!crawl_data)
goto out;
+ crawl_data->process_entry = process_entry;
crawl_data->child = idx;
crawl_data->pid = frame->root->pid;
crawl_data->crawl = crawl;
- gf_log (this->name, GF_LOG_INFO, "starting crawl for %s",
- priv->children[idx]->name);
- ret = synctask_new (this->ctx->env, afr_crawl,
- afr_crawl_done, frame, crawl_data);
+ crawl_data->op_data = op_data;
+ crawl_data->crawl_flags = crawl_flags;
+ gf_log (this->name, GF_LOG_INFO, "starting crawl %d for %s",
+ crawl_data->crawl, priv->children[idx]->name);
+
+ if (exclusive)
+ crawler = afr_dir_exclusive_crawl;
+ else
+ crawler = afr_dir_crawl;
+ ret = synctask_new (this->ctx->env, crawler,
+ crawl_done, frame, crawl_data);
if (ret)
gf_log (this->name, GF_LOG_ERROR, "Could not create the "
"task for %d ret %d", idx, ret);
@@ -679,16 +1059,6 @@ out:
return;
}
-//void
-//afr_full_self_heal (xlator_t *this)
-//{
-// int i = 0;
-// afr_private_t *priv = this->private;
-//
-// for (i = 0; i < priv->child_count; i++)
-// afr_start_crawl (this, i, FULL);
-//}
-
void
afr_build_root_loc (xlator_t *this, loc_t *loc)
{
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index eb10219957d..44fd9f38566 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -26,17 +26,22 @@
#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
#define AFR_ALL_CHILDREN -1
-typedef enum {
- INDEX,
- FULL,
-} afr_crawl_type_t;
typedef struct afr_crawl_data_ {
- int child;
- pid_t pid;
- afr_crawl_type_t crawl;
- xlator_t *readdir_xl;
+ int child;
+ pid_t pid;
+ afr_crawl_type_t crawl;
+ xlator_t *readdir_xl;
+ void *op_data;
+ int crawl_flags;
+ int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
} afr_crawl_data_t;
+typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
+
void afr_proactive_self_heal (xlator_t *this, int idx);
void afr_build_root_loc (xlator_t *this, loc_t *loc);
@@ -48,4 +53,7 @@ afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent);
void
afr_do_poll_self_heal (void *data);
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index abc6aa3e567..8e2ef10080e 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -30,7 +30,10 @@
#endif
#include "afr-common.c"
-#define SHD_INODE_LRU_LIMIT 100
+#define SHD_INODE_LRU_LIMIT 100
+#define AFR_EH_HEALED_LIMIT 1024
+#define AFR_EH_HEAL_FAIL_LIMIT 1024
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
struct volume_options options[];
@@ -39,8 +42,13 @@ notify (xlator_t *this, int32_t event,
void *data, ...)
{
int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- ret = afr_notify (this, event, data);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
return ret;
}
@@ -342,42 +350,55 @@ init (xlator_t *this)
goto out;
}
- priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
- gf_afr_mt_afr_brick_pos_t);
- if (!priv->shd.pos) {
- ret = -ENOMEM;
+ priv->first_lookup = 1;
+ priv->root_inode = NULL;
+
+ if (!priv->shd.enabled) {
+ ret = 0;
goto out;
}
+ ret = -ENOMEM;
+ priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
+ gf_afr_mt_brick_pos_t);
+ if (!priv->shd.pos)
+ goto out;
+
priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
- gf_afr_mt_afr_shd_bool_t);
- if (!priv->shd.pending) {
- ret = -ENOMEM;
+ gf_afr_mt_int32_t);
+ if (!priv->shd.pending)
goto out;
- }
priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
- child_count,
- gf_afr_mt_afr_shd_bool_t);
- if (!priv->shd.inprogress) {
- ret = -ENOMEM;
+ child_count, gf_afr_mt_shd_bool_t);
+ if (!priv->shd.inprogress)
goto out;
- }
priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
- gf_afr_mt_afr_shd_timer_t);
- if (!priv->shd.timer) {
- ret = -ENOMEM;
+ gf_afr_mt_shd_timer_t);
+ if (!priv->shd.timer)
+ goto out;
+
+ priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false);
+ if (!priv->shd.healed)
+ goto out;
+
+ priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false);
+ if (!priv->shd.heal_failed)
+ goto out;
+
+ priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false);
+ if (!priv->shd.split_brain)
+ goto out;
+
+ priv->shd.sh_times = GF_CALLOC (priv->child_count,
+ sizeof (*priv->shd.sh_times),
+ gf_afr_mt_time_t);
+ if (!priv->shd.sh_times)
+ goto out;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
goto out;
- }
- if (priv->shd.enabled) {
- this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
- if (!this->itable) {
- ret = -ENOMEM;
- goto out;
- }
- }
- priv->first_lookup = 1;
- priv->root_inode = NULL;
ret = 0;
out:
@@ -393,6 +414,8 @@ fini (xlator_t *this)
priv = this->private;
this->private = NULL;
afr_priv_destroy (priv);
+ if (this->itable);//I dont see any destroy func
+
return 0;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index f3d372de5a2..0f4a6d90a72 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -88,12 +88,22 @@ typedef struct afr_inode_ctx_ {
int32_t *fresh_children;//increasing order of latency
} afr_inode_ctx_t;
+typedef enum {
+ NONE,
+ INDEX,
+ FULL,
+} afr_crawl_type_t;
+
typedef struct afr_self_heald_ {
- gf_boolean_t enabled;
- gf_boolean_t *pending;
- gf_boolean_t *inprogress;
- afr_child_pos_t *pos;
- gf_timer_t **timer;
+ gf_boolean_t enabled;
+ afr_crawl_type_t *pending;
+ gf_boolean_t *inprogress;
+ afr_child_pos_t *pos;
+ time_t *sh_times;
+ gf_timer_t **timer;
+ eh_t *healed;
+ eh_t *heal_failed;
+ eh_t *split_brain;
} afr_self_heald_t;
typedef struct _afr_private {
@@ -747,8 +757,7 @@ int
pump_command_reply (call_frame_t *frame, xlator_t *this);
int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, ...);
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
int
afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 281bfd722fa..eae7899e9e8 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -2334,7 +2334,7 @@ notify (xlator_t *this, int32_t event,
child_xl = (xlator_t *) data;
- ret = afr_notify (this, event, data);
+ ret = afr_notify (this, event, data, NULL);
switch (event) {
case GF_EVENT_CHILD_DOWN: