summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2012-02-14 22:40:34 +0530
committerVijay Bellur <vijay@gluster.com>2012-02-20 21:23:21 -0800
commit5f117a4a1fca3ec2d163fe77615cf6859c0450e4 (patch)
tree695913cadfb8d8fdacd240bf5e84dcb78a0cfdbc
parent8456c28af75a4083286fc6ceadc03f2703f4c9b6 (diff)
cluster/afr: Self-heald, Index integration
Change-Id: Ic68eb00b356a6ee3cb88fe2bde50374be7a64ba3 BUG: 763820 Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Reviewed-on: http://review.gluster.com/2749 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r--libglusterfs/src/syncop.c59
-rw-r--r--libglusterfs/src/syncop.h3
-rw-r--r--xlators/cluster/afr/src/afr-common.c19
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c22
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h3
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c4
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c551
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h14
-rw-r--r--xlators/cluster/afr/src/afr.c29
-rw-r--r--xlators/cluster/afr/src/afr.h6
-rw-r--r--xlators/cluster/afr/src/pump.c21
12 files changed, 529 insertions, 204 deletions
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index 096c29efe0a..4acac5f8fa9 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -548,6 +548,65 @@ syncop_readdirp (xlator_t *subvol,
}
int32_t
+syncop_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ struct syncargs *args = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ int count = 0;
+
+ args = cookie;
+
+ INIT_LIST_HEAD (&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ list_for_each_entry (entry, &entries->list, list) {
+ tmp = entry_copy (entry);
+ gf_log (this->name, GF_LOG_TRACE,
+ "adding entry=%s, count=%d",
+ tmp->d_name, count);
+ list_add_tail (&tmp->list, &(args->entries.list));
+ count++;
+ }
+ }
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_readdir (xlator_t *subvol,
+ fd_t *fd,
+ size_t size,
+ off_t off,
+ gf_dirent_t *entries)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_readdir_cbk, subvol->fops->readdir,
+ fd, size, off);
+
+ if (entries)
+ list_splice_init (&args.entries.list, &entries->list);
+ /* TODO: need to free all the 'args.entries' in 'else' case */
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int32_t
syncop_opendir_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h
index 9554edb7250..1bea189a7da 100644
--- a/libglusterfs/src/syncop.h
+++ b/libglusterfs/src/syncop.h
@@ -182,6 +182,9 @@ int syncop_readdirp (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
/* out */
gf_dirent_t *entries);
+int syncop_readdir (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ gf_dirent_t *entries);
+
int syncop_opendir (xlator_t *subvol, loc_t *loc, fd_t *fd);
int syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index a5dee65f621..1895150cd1d 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3448,6 +3448,8 @@ afr_notify (xlator_t *this, int32_t event,
priv->child_up[idx] = 1;
priv->up_count++;
+ call_psh = 1;
+ up_child = idx;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 1)
up_children++;
@@ -3457,12 +3459,6 @@ afr_notify (xlator_t *this, int32_t event,
"going online.", ((xlator_t *)data)->name);
} else {
event = GF_EVENT_CHILD_MODIFIED;
- gf_log (this->name, GF_LOG_INFO, "subvol %d came up, "
- "start crawl", idx);
- if (had_heard_from_all) {
- call_psh = 1;
- up_child = idx;
- }
}
priv->last_event[idx] = event;
@@ -3551,18 +3547,15 @@ afr_notify (xlator_t *this, int32_t event,
}
}
UNLOCK (&priv->lock);
- if (up_children > 1) {
- gf_log (this->name, GF_LOG_INFO, "All subvolumes came "
- "up, start crawl");
- call_psh = 1;
- }
}
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
- if (call_psh)
- afr_proactive_self_heal (this, up_child);
+ if (call_psh) {
+ gf_log (this->name, GF_LOG_DEBUG, "start crawl: %d", up_child);
+ afr_do_poll_self_heal ((void*) (long) up_child);
+ }
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index ebe189c3575..22813940867 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -46,6 +46,8 @@ enum gf_afr_mem_types_ {
gf_afr_fd_paused_call_t,
gf_afr_mt_afr_crawl_data_t,
gf_afr_mt_afr_brick_pos_t,
+ gf_afr_mt_afr_shd_bool_t,
+ gf_afr_mt_afr_shd_timer_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 8fbea8c9d09..36a1e04c9bc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2224,13 +2224,21 @@ afr_self_heal_type_for_transaction (afr_transaction_type type)
}
int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name, uuid_t gfid)
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
{
int ret = -1;
+ uuid_t pargfid = {0};
- if (!child) {
+ if (!child)
+ goto out;
+
+ if (!uuid_is_null (parent->inode->gfid))
+ uuid_copy (pargfid, parent->inode->gfid);
+ else if (!uuid_is_null (parent->gfid))
+ uuid_copy (pargfid, parent->gfid);
+
+ if (uuid_is_null (pargfid))
goto out;
- }
if (strcmp (parent->path, "/") == 0)
ret = gf_asprintf ((char **)&child->path, "/%s", name);
@@ -2243,26 +2251,22 @@ afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name, uu
"asprintf failed while setting child path");
}
- if (!child->path) {
- goto out;
- }
-
child->name = strrchr (child->path, '/');
if (child->name)
child->name++;
child->parent = inode_ref (parent->inode);
child->inode = inode_new (parent->inode->table);
+ uuid_copy (child->pargfid, pargfid);
if (!child->inode) {
ret = -1;
goto out;
}
- uuid_copy (child->gfid, gfid);
ret = 0;
out:
- if (ret == -1)
+ if ((ret == -1) && child)
loc_wipe (child);
return ret;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index 114c177772b..715ed3dc195 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -120,8 +120,7 @@ typedef int
xlator_t *this, int32_t op_ret, int32_t op_errno,
dict_t *xattr);
int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name,
- uuid_t gfid);
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
int
afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
int active_source, call_frame_t **impunge_frame);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 570c7080fed..6531615dfcf 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -733,7 +733,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
expunge_sh->entrybuf = entry->d_stat;
ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
- name, entry->d_stat.ia_gfid);
+ name);
if (ret != 0) {
op_errno = EINVAL;
goto out;
@@ -1819,7 +1819,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
- entry->d_name, entry->d_stat.ia_gfid);
+ entry->d_name);
loc_copy (&impunge_sh->parent_loc, &local->loc);
if (ret != 0) {
op_errno = ENOMEM;
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 186d7dd26a5..1f071b87150 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -26,8 +26,53 @@
#include "afr-self-heald.h"
#include "afr-self-heal-common.h"
+#define AFR_POLL_TIMEOUT 600
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl);
+
+void
+afr_do_poll_self_heal (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct timeval timeout = {0};
+ xlator_t *this = NULL;
+ long child = (long)data;
+ int i = 0;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ if (child == AFR_ALL_CHILDREN) { //done by command
+ for (i = 0; i < priv->child_count; i++)
+ afr_start_crawl (this, i, INDEX);
+ goto out;
+ } else {
+ afr_start_crawl (this, child, INDEX);
+ if (shd->pos[child] == AFR_POS_REMOTE)
+ goto out;
+ }
+ timeout.tv_sec = AFR_POLL_TIMEOUT;
+ timeout.tv_usec = 0;
+ if (shd->timer[child])
+ gf_timer_call_cancel (this->ctx, shd->timer[child]);
+ shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
+ afr_do_poll_self_heal, data);
+
+ if (shd->timer[child] == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Cannot create pending self-heal polling timer for %s",
+ priv->children[child]->name);
+ }
+out:
+ return;
+}
+
static int
-_crawl_directory (loc_t *loc, pid_t pid);
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,
+ xlator_t *readdir_xl);
static int
get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
{
@@ -84,30 +129,215 @@ out:
return ret;
}
-inline void
-afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+
+int
+afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
+ loc_t *dirloc, xlator_t *readdir_xl)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+ loc_t rootloc = {0};
+ struct iatt iatt = {0};
+ struct iatt parent = {0};
+ int ret = 0;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ afr_build_root_loc (this, dirloc);
+ } else {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get index "
+ "dir gfid on %s", readdir_xl->name);
+ goto out;
+ }
+ if (!index_gfid) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL,
+ &iatt, NULL, &parent);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup failed on "
+ "index dir on %s", readdir_xl->name);
+ goto out;
+ }
+ inode_link (dirloc->inode, NULL, NULL, &iatt);
+ }
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&rootloc);
+ return ret;
+}
+
+int
+afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
+ loc_t *dirloc, xlator_t *readdir_xl)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ if (crawl_data->crawl == FULL) {
+ fd = fd_create (dirloc->inode, crawl_data->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", dirloc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (readdir_xl, dirloc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s", dirloc->path);
+ goto out;
+ }
+ } else {
+ fd = fd_anonymous (dirloc->inode);
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *dirfd = fd;
+ return ret;
+}
+
+xlator_t*
+afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data)
{
- afr_update_loc_gfids (loc, iatt, parent);
- uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+ afr_private_t *priv = this->private;
+
+ if (crawl_data->crawl == FULL) {
+ return this;
+ } else {
+ return priv->children[crawl_data->child];
+ }
+ return NULL;
+}
+
+int
+afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
+ gf_dirent_t *entry, afr_crawl_data_t *crawl_data)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ ret = afr_build_child_loc (this, child, parent, entry->d_name);
+ } else {
+ child->path = "";
+ child->inode = inode_new (priv->root_inode->table);
+ uuid_parse (entry->d_name, child->gfid);
+ }
+ return ret;
+}
+
+gf_boolean_t
+_crawl_proceed (xlator_t *this, int child)
+{
+ afr_private_t *priv = this->private;
+ gf_boolean_t proceed = _gf_false;
+
+ if (!priv->child_up[child]) {
+ gf_log (this->name, GF_LOG_ERROR, "Stopping crawl for %s "
+ ", subvol went down", priv->children[child]->name);
+ goto out;
+ }
+
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
+ "< 2 children are up");
+ goto out;
+ }
+ proceed = _gf_true;
+out:
+ return proceed;
+}
+
+static int
+_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+{
+ int ret = 0;
+
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+ loc->path = "";
+ loc->name = name;
+ loc->parent = inode_ref (parent->inode);
+ if (!loc->parent) {
+ loc->path = NULL;
+ loc_wipe (loc);
+ ret = -1;
+ }
+ return ret;
+}
+
+void
+_index_crawl_post_lookup_fop (xlator_t *this, loc_t *parentloc,
+ gf_dirent_t *entry, int op_ret, int op_errno,
+ xlator_t *readdir_xl)
+{
+ loc_t index_loc = {0};
+ int ret = 0;
+
+ if (op_ret && (op_errno == ENOENT)) {
+ ret = _build_index_loc (this, &index_loc, entry->d_name,
+ parentloc);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_INFO, "Removing stale index "
+ "for %s on %s", index_loc.name, readdir_xl->name);
+ ret = syncop_unlink (readdir_xl, &index_loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove"
+ " index on %s - %s", index_loc.name,
+ readdir_xl->name, strerror (errno));
+ }
+ index_loc.path = NULL;
+ loc_wipe (&index_loc);
+ }
+out:
+ return;
}
static int
_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
- off_t *offset, pid_t pid)
+ off_t *offset, afr_crawl_data_t *crawl_data,
+ xlator_t *readdir_xl)
{
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
struct iatt iatt = {0};
- struct iatt parent = {0};;
+ struct iatt parent = {0};
int ret = 0;
loc_t entry_loc = {0};
+ fd_t *fd = NULL;
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (!_crawl_proceed (this, crawl_data->child)) {
+ ret = -1;
+ goto out;
+ }
*offset = entry->d_off;
if (IS_ENTRY_CWD (entry->d_name) ||
IS_ENTRY_PARENT (entry->d_name))
continue;
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ if ((crawl_data->crawl == FULL) &&
+ uuid_is_null (entry->d_stat.ia_gfid)) {
gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
"gfid present skipping",
parentloc->path, entry->d_name);
@@ -115,21 +345,44 @@ _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
}
loc_wipe (&entry_loc);
- ret = afr_build_child_loc (this, &entry_loc, parentloc,
- entry->d_name, entry->d_stat.ia_gfid);
+ ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
+ entry, crawl_data);
if (ret)
goto out;
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s", entry_loc.path);
+ if (uuid_is_null (entry_loc.gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to build "
+ "location for %s", entry->d_name);
+ continue;
+ }
+ if (entry_loc.path)
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s",
+ entry_loc.path);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s",
+ uuid_utoa (entry_loc.gfid));
ret = syncop_lookup (this, &entry_loc, NULL,
&iatt, NULL, &parent);
+ if (crawl_data->crawl == INDEX) {
+ _index_crawl_post_lookup_fop (this, parentloc, entry,
+ ret, errno, readdir_xl);
+ entry_loc.path = NULL;
+ loc_wipe (&entry_loc);
+ continue;
+ }
+
//Don't fail the crawl if lookup fails as it
//could be because of split-brain
if (ret || (!IA_ISDIR (iatt.ia_type)))
continue;
- afr_fill_loc_info (&entry_loc, &iatt, &parent);
- ret = _crawl_directory (&entry_loc, pid);
+ inode_link (entry_loc.inode, parentloc->inode, NULL, &iatt);
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc,
+ readdir_xl);
+ if (ret)
+ continue;
+ ret = _crawl_directory (fd, &entry_loc, crawl_data, readdir_xl);
+ fd_unref (fd);
}
ret = 0;
out:
@@ -139,64 +392,50 @@ out:
}
static int
-_crawl_directory (loc_t *loc, pid_t pid)
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,
+ xlator_t *readdir_xl)
{
xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
off_t offset = 0;
gf_dirent_t entries;
- struct iatt iatt = {0};
- struct iatt parent = {0};;
int ret = 0;
gf_boolean_t free_entries = _gf_false;
INIT_LIST_HEAD (&entries.list);
this = THIS;
- priv = this->private;
GF_ASSERT (loc->inode);
- gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
- fd = fd_create (loc->inode, pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", loc->path);
- goto out;
- }
-
- if (!loc->parent) {
- ret = syncop_lookup (this, loc, NULL,
- &iatt, NULL, &parent);
- }
-
- ret = syncop_opendir (this, loc, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s", loc->path);
- goto out;
- }
+ if (loc->path)
+ gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "crawling %s",
+ uuid_utoa (loc->gfid));
- while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
+ while (1) {
+ if (crawl_data->crawl == FULL)
+ ret = syncop_readdirp (readdir_xl, fd, 131072, offset,
+ NULL, &entries);
+ else
+ ret = syncop_readdir (readdir_xl, fd, 131072, offset,
+ &entries);
+ if (ret <= 0)
+ break;
ret = 0;
free_entries = _gf_true;
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
- "< 2 children are up");
+
+ if (!_crawl_proceed (this, crawl_data->child)) {
ret = -1;
goto out;
}
-
if (list_empty (&entries.list))
goto out;
- ret = _perform_self_heal (this, loc, &entries, &offset, pid);
+ ret = _perform_self_heal (this, loc, &entries, &offset,
+ crawl_data, readdir_xl);
gf_dirent_free (&entries);
free_entries = _gf_false;
}
- if (fd)
- fd_unref (fd);
ret = 0;
out:
if (free_entries)
@@ -204,6 +443,20 @@ out:
return ret;
}
+static char*
+position_str_get (afr_child_pos_t pos)
+{
+ switch (pos) {
+ case AFR_POS_UNKNOWN:
+ return "unknown";
+ case AFR_POS_LOCAL:
+ return "local";
+ case AFR_POS_REMOTE:
+ return "remote";
+ }
+ return NULL;
+}
+
int
afr_find_child_position (xlator_t *this, int child)
{
@@ -214,35 +467,21 @@ afr_find_child_position (xlator_t *this, int child)
gf_boolean_t local = _gf_false;
char *pathinfo = NULL;
afr_child_pos_t *pos = NULL;
- inode_table_t *itable = NULL;
priv = this->private;
pos = &priv->shd.pos[child];
- if (*pos != AFR_POS_UNKNOWN) {
- goto out;
- }
-
- //TODO: Hack to make the root_loc hack work
- LOCK (&priv->lock);
- {
- if (!priv->root_inode) {
- itable = inode_table_new (0, this);
- if (!itable)
- goto unlock;
- priv->root_inode = inode_new (itable);
+ if (!priv->root_inode) {
+ LOCK (&priv->lock);
+ {
if (!priv->root_inode)
- goto unlock;
+ priv->root_inode = inode_ref
+ (this->itable->root);
}
+ UNLOCK (&priv->lock);
}
-unlock:
- UNLOCK (&priv->lock);
- if (!priv->root_inode) {
- ret = -1;
- goto out;
- }
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
GF_XATTR_PATHINFO_KEY);
@@ -267,8 +506,12 @@ unlock:
else
*pos = AFR_POS_REMOTE;
- gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos);
+ gf_log (this->name, GF_LOG_INFO, "child %s is %s",
+ priv->children[child]->name, position_str_get (*pos));
out:
+ if (ret)
+ *pos = AFR_POS_UNKNOWN;
+ loc_wipe (&loc);
return ret;
}
@@ -280,93 +523,34 @@ afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
return 0;
}
-static int
-afr_find_all_children_postions (xlator_t *this)
-{
- int ret = -1;
- int i = 0;
- gf_boolean_t succeeded = _gf_false;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] != 1)
- continue;
- ret = afr_find_child_position (this, i);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to determine if the "
- "child %s is local.",
- priv->children[i]->name);
- continue;
- }
- succeeded = _gf_true;
- }
- if (succeeded)
- ret = 0;
- return ret;
-}
-
-static gf_boolean_t
-afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count)
-{
- int i = 0;
- gf_boolean_t local = _gf_false;
-
- for (i = 0; i < child_count; i++, pos++) {
- if (*pos == AFR_POS_LOCAL) {
- local = _gf_true;
- break;
- }
- }
- return local;
-}
-
-int
-afr_init_child_position (xlator_t *this, int child)
-{
- int ret = 0;
-
- if (child == AFR_ALL_CHILDREN) {
- ret = afr_find_all_children_postions (this);
- } else {
- ret = afr_find_child_position (this, child);
- }
- return ret;
-}
-
-int
+static inline int
afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
{
- gf_boolean_t local = _gf_false;
-
- if (child == AFR_ALL_CHILDREN)
- local = afr_local_child_exists (shd->pos, child_count);
- else
- local = (shd->pos[child] == AFR_POS_LOCAL);
-
- return local;
+ return (shd->pos[child] == AFR_POS_LOCAL);
}
static int
-afr_crawl_directory (xlator_t *this, pid_t pid)
+afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
{
afr_private_t *priv = NULL;
afr_self_heald_t *shd = NULL;
- loc_t loc = {0};
+ loc_t dirloc = {0};
gf_boolean_t crawl = _gf_false;
- int ret = 0;
+ int ret = 0;
+ xlator_t *readdir_xl = NULL;
+ fd_t *fd = NULL;
+ int child = -1;
priv = this->private;
shd = &priv->shd;
-
+ child = crawl_data->child;
LOCK (&priv->lock);
{
- if (shd->inprogress) {
- shd->pending = _gf_true;
+ if (shd->inprogress[child]) {
+ shd->pending[child] = _gf_true;
} else {
- shd->inprogress = _gf_true;
+ shd->inprogress[child] = _gf_true;
crawl = _gf_true;
}
}
@@ -377,28 +561,56 @@ afr_crawl_directory (xlator_t *this, pid_t pid)
goto out;
}
- if (!crawl)
+ if (!crawl) {
+ gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
+ "for %s", priv->children[child]->name);
goto out;
+ }
- afr_build_root_loc (priv->root_inode, &loc);
- while (crawl) {
- ret = _crawl_directory (&loc, pid);
+ do {
+ readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
+ if (!readdir_xl)
+ goto done;
+ ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc,
+ readdir_xl);
+ if (ret)
+ goto done;
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc,
+ readdir_xl);
+ if (ret)
+ goto done;
+ ret = _crawl_directory (fd, &dirloc, crawl_data, readdir_xl);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Crawl failed");
+ gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
+ readdir_xl->name);
else
- gf_log (this->name, GF_LOG_INFO, "Crawl completed");
+ gf_log (this->name, GF_LOG_INFO, "Crawl completed "
+ "on %s", readdir_xl->name);
+ fd_unref (fd);
+ fd = NULL;
+done:
LOCK (&priv->lock);
{
- if (shd->pending) {
- shd->pending = _gf_false;
+ if (shd->pending[child]) {
+ shd->pending[child] = _gf_false;
} else {
- shd->inprogress = _gf_false;
+ shd->inprogress[child] = _gf_false;
crawl = _gf_false;
}
}
UNLOCK (&priv->lock);
- }
+ if (crawl_data->crawl == INDEX) {
+ dirloc.path = NULL;
+ loc_wipe (&dirloc);
+ }
+ } while (crawl);
out:
+ if (fd)
+ fd_unref (fd);
+ if (crawl_data->crawl == INDEX) {
+ dirloc.path = NULL;
+ loc_wipe (&dirloc);
+ }
return ret;
}
@@ -415,20 +627,22 @@ afr_crawl (void *data)
priv = this->private;
shd = &priv->shd;
- ret = afr_init_child_position (this, crawl_data->child);
+ if (!_crawl_proceed (this, crawl_data->child))
+ goto out;
+ ret = afr_find_child_position (this, crawl_data->child);
if (ret)
goto out;
if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
goto out;
- ret = afr_crawl_directory (this, crawl_data->pid);
+ ret = afr_crawl_directory (this, crawl_data);
out:
return ret;
}
void
-afr_proactive_self_heal (xlator_t *this, int idx)
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl)
{
afr_private_t *priv = NULL;
afr_self_heald_t *shd = NULL;
@@ -441,10 +655,6 @@ afr_proactive_self_heal (xlator_t *this, int idx)
if (!shd->enabled)
goto out;
- if ((idx != AFR_ALL_CHILDREN) &&
- (shd->pos[idx] == AFR_POS_REMOTE))
- goto out;
-
frame = create_frame (this, this->ctx->pool);
if (!frame)
goto out;
@@ -457,7 +667,9 @@ afr_proactive_self_heal (xlator_t *this, int idx)
goto out;
crawl_data->child = idx;
crawl_data->pid = frame->root->pid;
- gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx);
+ crawl_data->crawl = crawl;
+ gf_log (this->name, GF_LOG_INFO, "starting crawl for %s",
+ priv->children[idx]->name);
ret = synctask_new (this->ctx->env, afr_crawl,
afr_crawl_done, frame, crawl_data);
if (ret)
@@ -467,16 +679,25 @@ out:
return;
}
-//TODO: This is a hack
+//void
+//afr_full_self_heal (xlator_t *this)
+//{
+// int i = 0;
+// afr_private_t *priv = this->private;
+//
+// for (i = 0; i < priv->child_count; i++)
+// afr_start_crawl (this, i, FULL);
+//}
+
void
-afr_build_root_loc (inode_t *inode, loc_t *loc)
+afr_build_root_loc (xlator_t *this, loc_t *loc)
{
- loc->path = "/";
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->path = gf_strdup ("/");
loc->name = "";
- loc->inode = inode;
- loc->inode->ia_type = IA_IFDIR;
- memset (loc->inode->gfid, 0, 16);
- loc->inode->gfid[15] = 1;
+ loc->inode = inode_ref (priv->root_inode);
uuid_copy (loc->gfid, loc->inode->gfid);
}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 6eb119b073f..eb10219957d 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -26,18 +26,26 @@
#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
#define AFR_ALL_CHILDREN -1
+typedef enum {
+ INDEX,
+ FULL,
+} afr_crawl_type_t;
typedef struct afr_crawl_data_ {
- int child;
- pid_t pid;
+ int child;
+ pid_t pid;
+ afr_crawl_type_t crawl;
+ xlator_t *readdir_xl;
} afr_crawl_data_t;
void afr_proactive_self_heal (xlator_t *this, int idx);
-void afr_build_root_loc (inode_t *inode, loc_t *loc);
+void afr_build_root_loc (xlator_t *this, loc_t *loc);
int afr_set_root_gfid (dict_t *dict);
inline void
afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent);
+void
+afr_do_poll_self_heal (void *data);
#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 18cd030f1d4..abc6aa3e567 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -30,6 +30,8 @@
#endif
#include "afr-common.c"
+#define SHD_INODE_LRU_LIMIT 100
+
struct volume_options options[];
int32_t
@@ -347,6 +349,33 @@ init (xlator_t *this)
goto out;
}
+ priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
+ gf_afr_mt_afr_shd_bool_t);
+ if (!priv->shd.pending) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
+ child_count,
+ gf_afr_mt_afr_shd_bool_t);
+ if (!priv->shd.inprogress) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
+ gf_afr_mt_afr_shd_timer_t);
+ if (!priv->shd.timer) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (priv->shd.enabled) {
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
priv->first_lookup = 1;
priv->root_inode = NULL;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 003d666e00d..f3d372de5a2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -32,6 +32,7 @@
#include "afr-self-heal-algorithm.h"
#include "libxlator.h"
+#include "timer.h"
#define AFR_XATTR_PREFIX "trusted.afr"
#define AFR_PATHINFO_HEADER "REPLICATE:"
@@ -89,9 +90,10 @@ typedef struct afr_inode_ctx_ {
typedef struct afr_self_heald_ {
gf_boolean_t enabled;
- gf_boolean_t pending;
- gf_boolean_t inprogress;
+ gf_boolean_t *pending;
+ gf_boolean_t *inprogress;
afr_child_pos_t *pos;
+ gf_timer_t **timer;
} afr_self_heald_t;
typedef struct _afr_private {
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index df0b31166ef..281bfd722fa 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -167,7 +167,7 @@ pump_save_path (xlator_t *this, const char *path)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
@@ -187,6 +187,7 @@ pump_save_path (xlator_t *this, const char *path)
dict_unref (dict);
+ loc_wipe (&loc);
return 0;
}
@@ -384,8 +385,7 @@ gf_pump_traverse_directory (loc_t *loc)
}
loc_wipe (&entry_loc);
ret = afr_build_child_loc (this, &entry_loc, loc,
- entry->d_name,
- entry->d_stat.ia_gfid);
+ entry->d_name);
if (ret)
goto out;
@@ -491,7 +491,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
ret = syncop_removexattr (priv->children[source], &loc,
PUMP_PATH);
@@ -507,6 +507,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
"failed with %s", strerror (errno));
}
+ loc_wipe (&loc);
return pump_command_reply (frame, this);
}
@@ -526,7 +527,7 @@ pump_complete_migration (xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
@@ -568,6 +569,7 @@ pump_complete_migration (xlator_t *this)
call_resume (pump_priv->cleaner);
}
+ loc_wipe (&loc);
return 0;
}
@@ -623,7 +625,7 @@ pump_task (void *data)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
xattr_req = dict_new ();
if (!xattr_req) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -661,6 +663,7 @@ out:
if (xattr_req)
dict_unref (xattr_req);
+ loc_wipe (&loc);
return 0;
}
@@ -795,7 +798,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
if (!data) {
@@ -850,6 +853,7 @@ out:
if (ret && clnt_cmd)
GF_FREE (clnt_cmd);
+ loc_wipe (&loc);
return ret;
}
@@ -1033,7 +1037,7 @@ pump_execute_start (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
STACK_WIND (frame,
pump_cmd_start_getxattr_cbk,
@@ -1050,6 +1054,7 @@ out:
pump_command_reply (frame, this);
}
+ loc_wipe (&loc);
return 0;
}