diff options
Diffstat (limited to 'xlators')
23 files changed, 1592 insertions, 523 deletions
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index e192b599b..16ed25af1 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -1,7 +1,7 @@  xlator_LTLIBRARIES = afr.la pump.la  xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster -afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c $(top_builddir)/xlators/lib/src/libxlator.c +afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c $(top_builddir)/xlators/lib/src/libxlator.c  afr_la_LDFLAGS = -module -avoidversion  afr_la_SOURCES = $(afr_common_source) afr.c @@ -11,7 +11,7 @@ pump_la_LDFLAGS = -module -avoidversion  pump_la_SOURCES =  $(afr_common_source) pump.c  pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c $(top_builddir)/xlators/lib/src/libxlator.h +noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h $(top_builddir)/glusterfsd/src/glusterfsd.h  AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \  	    -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \ diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 2d5e98196..0e4e97355 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -54,6 +54,7 @@  #include "afr-transaction.h"  #include "afr-self-heal.h"  #include "afr-self-heal-common.h" +#include "afr-self-heald.h"  #include "pump.h"  #define AFR_ICTX_OPENDIR_DONE_MASK     0x0000000200000000ULL @@ -132,7 +133,7 @@ afr_set_dict_gfid (dict_t *dict, uuid_t gfid)          ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));          if (ret) -                gf_log (THIS->name, GF_LOG_DEBUG, "gfid set failed"); +                gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");  out:          if (ret && pgfid) @@ -1961,15 +1962,15 @@ afr_lookup (call_frame_t *frame, xlator_t *this,                          loc->path, GLUSTERFS_ENTRYLK_COUNT);          } -        ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req); +        ret = dict_get_ptr (local->xattr_req, "gfid-req", &gfid_req);          if (ret) {                  gf_log (this->name, GF_LOG_DEBUG,                          "failed to get the gfid from dict");          } else {                  uuid_copy (local->cont.lookup.gfid_req, gfid_req); +                if (local->loc.parent) +                        dict_del (local->xattr_req, "gfid-req");          } -        if (local->loc.parent != NULL) -                dict_del (xattr_req, "gfid-req");          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) { @@ -3395,11 +3396,12 @@ afr_notify (xlator_t *this, int32_t event,          int             up_children         = 0;          int             down_children       = 0;          int             propagate           = 0; -          int             had_heard_from_all  = 0;          int             have_heard_from_all = 0;          int             idx                 = -1;          int             ret                 = -1; +        int             call_psh            = 0; +        int             up_child            = AFR_ALL_CHILDREN;          priv = this->private; @@ -3445,6 +3447,12 @@ afr_notify (xlator_t *this, int32_t event,                                          "going online.", ((xlator_t *)data)->name);                          } else {                                  event = GF_EVENT_CHILD_MODIFIED; +                                gf_log (this->name, GF_LOG_INFO, "subvol %d came up, " +                                        "start crawl", idx); +                                if (had_heard_from_all) { +                                        call_psh = 1; +                                        up_child = idx; +                                }                          }                          priv->last_event[idx] = event; @@ -3509,6 +3517,8 @@ afr_notify (xlator_t *this, int32_t event,                  LOCK (&priv->lock);                  { +                        up_children = afr_up_children_count (priv->child_up, +                                                             priv->child_count);                          for (i = 0; i < priv->child_count; i++) {                                  if (priv->last_event[i] == GF_EVENT_CHILD_UP) {                                          event = GF_EVENT_CHILD_UP; @@ -3523,11 +3533,18 @@ afr_notify (xlator_t *this, int32_t event,                          }                  }                  UNLOCK (&priv->lock); +                if (up_children > 1) { +                        gf_log (this->name, GF_LOG_INFO, "All subvolumes came " +                                "up, start crawl"); +                        call_psh = 1; +                }          }          ret = 0;          if (propagate)                  ret = default_notify (this, event, data); +        if (call_psh) +                afr_proactive_self_heal (this, up_child);  out:          return ret; @@ -3767,3 +3784,9 @@ afr_get_children_count (int32_t *children, unsigned int child_count)          }          return count;  } + +void +afr_set_low_priority (call_frame_t *frame) +{ +        frame->root->pid = LOW_PRIO_PROC_PID; +} diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index d5a988708..ebe189c35 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -44,6 +44,8 @@ enum gf_afr_mem_types_ {          gf_afr_mt_locked_fd,          gf_afr_mt_inode_ctx_t,          gf_afr_fd_paused_call_t, +        gf_afr_mt_afr_crawl_data_t, +        gf_afr_mt_afr_brick_pos_t,          gf_afr_mt_end  };  #endif diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index eeca62724..8f50c6251 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2031,12 +2031,6 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)          return 0;  } -static inline void -afr_set_low_priority (call_frame_t *frame) -{ -        frame->root->pid = -1; -} -  int  afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)  { diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c new file mode 100644 index 000000000..d27d9e09b --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -0,0 +1,512 @@ +/* +   Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include "afr.h" +#include "syncop.h" +#include "afr-self-heald.h" + +static int +get_pathinfo_host (char *pathinfo, char *hostname, size_t size) +{ +        char    *start = NULL; +        char    *end = NULL; +        int     ret  = -1; +        int     i    = 0; + +        if (!pathinfo) +                goto out; + +        start = strchr (pathinfo, ':'); +        if (!start) +                goto out; +        end = strrchr (pathinfo, ':'); +        if (start == end) +                goto out; + +        memset (hostname, 0, size); +        i = 0; +        while (++start != end) +                hostname[i++] = *start; +        ret = 0; +out: +        return ret; +} + +int +afr_local_pathinfo (char *pathinfo, gf_boolean_t *local) +{ +        int             ret   = 0; +        char            pathinfohost[1024] = {0}; +        char            localhost[1024] = {0}; +        xlator_t        *this = THIS; + +        *local = _gf_false; +        ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost)); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s", +                        pathinfo); +                goto out; +        } + +        ret = gethostname (localhost, sizeof (localhost)); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, " +                        "reason: %s", strerror (errno)); +                goto out; +        } + +        if (!strcmp (localhost, pathinfohost)) +                *local = _gf_true; +out: +        return ret; +} + +static int +_crawl_directory (loc_t *loc, pid_t pid) +{ +        xlator_t        *this = NULL; +        afr_private_t   *priv = NULL; +        fd_t            *fd   = NULL; +        off_t           offset   = 0; +        loc_t           entry_loc = {0}; +        gf_dirent_t     *entry = NULL; +        gf_dirent_t     *tmp = NULL; +        gf_dirent_t     entries; +        struct iatt     iatt = {0}; +        struct iatt     parent = {0};; +        char            *file_path = NULL; +        int             ret = 0; +        gf_boolean_t    free_entries = _gf_false; + +        INIT_LIST_HEAD (&entries.list); +        this = THIS; +        priv = this->private; + +        GF_ASSERT (loc->inode); + +        gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path); +        fd = fd_create (loc->inode, pid); +        if (!fd) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Failed to create fd for %s", loc->path); +                goto out; +        } + +        if (!loc->parent) { +                ret = syncop_lookup (this, loc, NULL, +                                     &iatt, NULL, &parent); +        } + +        ret = syncop_opendir (this, loc, fd); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "opendir failed on %s", loc->path); +                goto out; +        } + +        while (syncop_readdirp (this, fd, 131072, offset, &entries)) { +                ret = 0; +                free_entries = _gf_true; +                if (afr_up_children_count (priv->child_up, +                                           priv->child_count) < 2) { +                        gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " +                                "< 2 children are up"); +                        ret = -1; +                        goto out; +                } + +                if (list_empty (&entries.list)) +                        goto out; + +                list_for_each_entry_safe (entry, tmp, &entries.list, list) { +                        offset = entry->d_off; +                        if (IS_ENTRY_CWD (entry->d_name) || +                            IS_ENTRY_PARENT (entry->d_name)) +                                continue; + +                        file_path = afr_build_file_path (loc, entry); +                        if (!file_path) { +                                ret = -1; +                                goto out; +                        } + +                        loc_wipe (&entry_loc); +                        afr_build_child_loc (loc, &entry_loc, +                                             file_path, entry->d_name); + +                        gf_log (this->name, GF_LOG_DEBUG, +                                "found readdir entry=%s", entry->d_name); + +                        ret = syncop_lookup (this, &entry_loc, NULL, +                                             &iatt, NULL, &parent); + +                        //Don't fail the crawl if lookup fails as it +                        //could be because of split-brain +                        if (ret || (!IA_ISDIR (iatt.ia_type))) +                                continue; +                        ret = _crawl_directory (&entry_loc, pid); +                } + +                gf_dirent_free (&entries); +                free_entries = _gf_false; +        } +        ret = 0; +out: +        if (entry_loc.path) +                loc_wipe (&entry_loc); +        if (free_entries) +                gf_dirent_free (&entries); +        return ret; +} + +int +afr_find_child_position (xlator_t *this, int child) +{ +        afr_private_t    *priv = NULL; +        dict_t           *xattr_rsp = NULL; +        loc_t            loc = {0}; +        int              ret = 0; +        gf_boolean_t     local = _gf_false; +        char             *pathinfo = NULL; +        afr_child_pos_t  *pos = NULL; +        inode_table_t    *itable = NULL; + +        priv = this->private; +        pos = &priv->shd.pos[child]; + +        if (*pos != AFR_POS_UNKNOWN) { +                goto out; +        } + +        //TODO: Hack to make the root_loc hack work +        LOCK (&priv->lock); +        { +                if (!priv->root_inode) { +                        itable = inode_table_new (0, this); +                        if (!itable) +                                goto unlock; +                        priv->root_inode = inode_new (itable); +                        if (!priv->root_inode) +                                goto unlock; +                } +        } +unlock: +        UNLOCK (&priv->lock); + +        if (!priv->root_inode) { +                ret = -1; +                goto out; +        } +        afr_build_root_loc (priv->root_inode, &loc); + +        ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp, +                               GF_XATTR_PATHINFO_KEY); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child " +                        "%d", child); +                goto out; +        } + +        ret = dict_get_str (xattr_rsp, GF_XATTR_PATHINFO_KEY, &pathinfo); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Pathinfo key not found on " +                        "child %d", child); +                goto out; +        } + +        ret = afr_local_pathinfo (pathinfo, &local); +        if (ret) +                goto out; +        if (local) +                *pos = AFR_POS_LOCAL; +        else +                *pos = AFR_POS_REMOTE; + +        gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos); +out: +        return ret; +} + +static int +afr_crawl_done  (int ret, call_frame_t *sync_frame, void *data) +{ +        GF_FREE (data); +        STACK_DESTROY (sync_frame->root); +        return 0; +} + +static int +afr_find_all_children_postions (xlator_t *this) +{ +        int              ret = -1; +        int              i = 0; +        gf_boolean_t     succeeded = _gf_false; +        afr_private_t    *priv = NULL; + +        priv = this->private; +        for (i = 0; i < priv->child_count; i++) { +                if (priv->child_up[i] != 1) +                        continue; +                ret = afr_find_child_position (this, i); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "Failed to determine if the " +                                "child %s is local.", +                                priv->children[i]->name); +                        continue; +                } +                succeeded = _gf_true; +        } +        if (succeeded) +                ret = 0; +        return ret; +} + +static gf_boolean_t +afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count) +{ +        int             i = 0; +        gf_boolean_t    local = _gf_false; + +        for (i = 0; i < child_count; i++, pos++) { +                if (*pos == AFR_POS_LOCAL) { +                        local = _gf_true; +                        break; +                } +        } +        return local; +} + +int +afr_init_child_position (xlator_t *this, int child) +{ +        int     ret = 0; + +        if (child == AFR_ALL_CHILDREN) { +                ret = afr_find_all_children_postions (this); +        } else { +                ret = afr_find_child_position (this, child); +        } +        return ret; +} + +int +afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count) +{ +        gf_boolean_t local = _gf_false; + +        if (child == AFR_ALL_CHILDREN) +                local = afr_local_child_exists (shd->pos, child_count); +        else +                local = (shd->pos[child] == AFR_POS_LOCAL); + +        return local; +} + +static int +afr_crawl_directory (xlator_t *this, pid_t pid) +{ +        afr_private_t    *priv = NULL; +        afr_self_heald_t *shd = NULL; +        loc_t            loc = {0}; +        gf_boolean_t     crawl = _gf_false; +        int             ret = 0; + +        priv = this->private; +        shd = &priv->shd; + + +        LOCK (&priv->lock); +        { +                if (shd->inprogress) { +                        shd->pending = _gf_true; +                } else { +                        shd->inprogress = _gf_true; +                        crawl = _gf_true; +                } +        } +        UNLOCK (&priv->lock); + +        if (!priv->root_inode) { +                ret = -1; +                goto out; +        } + +        if (!crawl) +                goto out; + +        afr_build_root_loc (priv->root_inode, &loc); +        while (crawl) { +                ret = _crawl_directory (&loc, pid); +                if (ret) +                        gf_log (this->name, GF_LOG_ERROR, "Crawl failed"); +                else +                        gf_log (this->name, GF_LOG_INFO, "Crawl completed"); +                LOCK (&priv->lock); +                { +                        if (shd->pending) { +                                shd->pending = _gf_false; +                        } else { +                                shd->inprogress = _gf_false; +                                crawl = _gf_false; +                        } +                } +                UNLOCK (&priv->lock); +        } +out: +        return ret; +} + +static int +afr_crawl (void *data) +{ +        xlator_t         *this = NULL; +        afr_private_t    *priv = NULL; +        afr_self_heald_t *shd = NULL; +        int              ret = -1; +        afr_crawl_data_t *crawl_data = data; + +        this = THIS; +        priv = this->private; +        shd = &priv->shd; + +        ret = afr_init_child_position (this, crawl_data->child); +        if (ret) +                goto out; + +        if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) +                goto out; + +        ret = afr_crawl_directory (this, crawl_data->pid); +out: +        return ret; +} + +void +afr_proactive_self_heal (xlator_t *this, int idx) +{ +        afr_private_t              *priv = NULL; +        afr_self_heald_t           *shd = NULL; +        call_frame_t               *frame = NULL; +        afr_crawl_data_t           *crawl_data = NULL; +        int                        ret = 0; + +        priv = this->private; +        shd = &priv->shd; +        if (!shd->enabled) +                goto out; + +        if ((idx != AFR_ALL_CHILDREN) && +            (shd->pos[idx] == AFR_POS_REMOTE)) +                goto out; + +        frame = create_frame (this, this->ctx->pool); +        if (!frame) +                goto out; + +        afr_set_lk_owner (frame, this); +        afr_set_low_priority (frame); +        crawl_data = GF_CALLOC (1, sizeof (*crawl_data), +                                gf_afr_mt_afr_crawl_data_t); +        if (!crawl_data) +                goto out; +        crawl_data->child = idx; +        crawl_data->pid = frame->root->pid; +        gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx); +        ret = synctask_new (this->ctx->env, afr_crawl, +                            afr_crawl_done, frame, crawl_data); +        if (ret) +                gf_log (this->name, GF_LOG_ERROR, "Could not create the " +                        "task for %d ret %d", idx, ret); +out: +        return; +} + +//TODO: This is a hack +void +afr_build_root_loc (inode_t *inode, loc_t *loc) +{ +        loc->path = "/"; +        loc->name = ""; +        loc->inode = inode; +        loc->ino = 1; +        loc->inode->ino = 1; +        loc->inode->ia_type = IA_IFDIR; +        memset (loc->inode->gfid, 0, 16); +        loc->inode->gfid[15] = 1; + +} + +int +afr_set_root_gfid (dict_t *dict) +{ +        uuid_t gfid; +        int ret = 0; + +        memset (gfid, 0, 16); +        gfid[15] = 1; + +        ret = afr_set_dict_gfid (dict, gfid); + +        return ret; +} + +char * +afr_build_file_path (loc_t *loc, gf_dirent_t *entry) +{ +        xlator_t        *this = NULL; +        char            *file_path = NULL; +        int             pathlen = 0; +        size_t          total_size = 0; +        char            *fmt = NULL; + +        this = THIS; + +        pathlen = STRLEN_0 (loc->path); + +        if (IS_ROOT_PATH (loc->path)) { +                total_size = pathlen + entry->d_len; +                fmt = "%s%s"; +        } else { +                total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */ +                fmt = "%s/%s"; +        } + +        file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char); +        if (!file_path) +                goto out; + +        snprintf(file_path, total_size, fmt, loc->path, entry->d_name); +out: +        return file_path; +} + +void +afr_build_child_loc (loc_t *parent, loc_t *child, char *path, char *name) +{ +        child->path = path; +        child->name = name; + +        child->parent = inode_ref (parent->inode); +        child->inode = inode_new (parent->inode->table); +} diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h new file mode 100644 index 000000000..c85c97b25 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -0,0 +1,44 @@ +/* +   Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef __AFR_SELF_HEALD_H__ +#define __AFR_SELF_HEALD_H__ +#include "xlator.h" + +#define IS_ROOT_PATH(path) (!strcmp (path, "/")) +#define IS_ENTRY_CWD(entry) (!strcmp (entry, ".")) +#define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) +#define AFR_ALL_CHILDREN -1 + +typedef struct afr_crawl_data_ { +        int     child; +        pid_t   pid; +} afr_crawl_data_t; + +void afr_proactive_self_heal (xlator_t *this, int idx); + +void afr_build_root_loc (inode_t *inode, loc_t *loc); + +int afr_set_root_gfid (dict_t *dict); + +char * afr_build_file_path (loc_t *loc, gf_dirent_t *entry); + +void +afr_build_child_loc (loc_t *parent, loc_t *child, char *path, char *name); +#endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 044213e07..8bb94e205 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -140,6 +140,8 @@ reconfigure (xlator_t *this, dict_t *options)          GF_OPTION_RECONF ("data-self-heal-algorithm",                            priv->data_self_heal_algorithm, options, str, out); +        GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out); +          GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);          if (read_subvol) { @@ -240,6 +242,8 @@ init (xlator_t *this)          GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out); +        GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out); +          GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);          GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool, @@ -320,6 +324,13 @@ init (xlator_t *this)                  goto out;          } +        priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, +                                   gf_afr_mt_afr_brick_pos_t); +        if (!priv->shd.pos) { +                ret = -ENOMEM; +                goto out; +        } +          LOCK_INIT (&priv->root_inode_lk);          priv->first_lookup = 1;          priv->root_inode = NULL; @@ -475,5 +486,9 @@ struct volume_options options[] = {            .type = GF_OPTION_TYPE_BOOL,            .default_value = "off",          }, +        { .key = {"self-heal-daemon"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "off", +        },          { .key  = {NULL} },  }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index b9a11c486..92ccf607f 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -50,6 +50,12 @@ typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);  typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);  typedef enum { +        AFR_POS_UNKNOWN, +        AFR_POS_LOCAL, +        AFR_POS_REMOTE +} afr_child_pos_t; + +typedef enum {          AFR_INODE_SET_READ_CTX = 1,          AFR_INODE_RM_STALE_CHILDREN,          AFR_INODE_SET_OPENDIR_DONE, @@ -75,6 +81,13 @@ typedef struct afr_inode_ctx_ {          int32_t  *fresh_children;//increasing order of latency  } afr_inode_ctx_t; +typedef struct afr_self_heald_ { +        gf_boolean_t    enabled; +        gf_boolean_t    pending; +        gf_boolean_t    inprogress; +        afr_child_pos_t *pos; +} afr_self_heald_t; +  typedef struct _afr_private {          gf_lock_t lock;               /* to guard access to child_count, etc */          unsigned int child_count;     /* total number of children   */ @@ -134,6 +147,7 @@ typedef struct _afr_private {          char                   vol_uuid[UUID_SIZE + 1];          int32_t                *last_event; +        afr_self_heald_t       shd;  } afr_private_t;  typedef struct { @@ -241,7 +255,6 @@ typedef struct {          call_frame_t *sh_frame;  } afr_self_heal_t; -  typedef enum {          AFR_DATA_TRANSACTION,          /* truncate, write, ... */          AFR_METADATA_TRANSACTION,      /* chmod, chown, ... */ @@ -1001,4 +1014,6 @@ afr_open_only_data_self_heal (char *data_self_heal);  gf_boolean_t  afr_data_self_heal_enabled (char *data_self_heal); +void +afr_set_low_priority (call_frame_t *frame);  #endif /* __AFR_H__ */ diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index ede9f3b49..0623b817a 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -149,71 +149,6 @@ pump_set_resume_path (xlator_t *this, const char *path)          return ret;  } -static void -build_child_loc (loc_t *parent, loc_t *child, char *path, char *name) -{ -        child->path = path; -        child->name = name; - -        child->parent = inode_ref (parent->inode); -        child->inode = inode_new (parent->inode->table); -} - -static char * -build_file_path (loc_t *loc, gf_dirent_t *entry) -{ -        xlator_t *this = NULL; -        char *file_path = NULL; -        int pathlen = 0; -        int total_size = 0; - -        this = THIS; - -        pathlen = STRLEN_0 (loc->path); - -        if (IS_ROOT_PATH (loc->path)) { -                total_size = pathlen + entry->d_len; -                file_path = GF_CALLOC (1, total_size, gf_afr_mt_char); -                if (!file_path) { -                        gf_log (this->name, GF_LOG_ERROR, -                                "Out of memory"); -                        return NULL; -                } - -                gf_log (this->name, GF_LOG_TRACE, -                        "constructing file path of size=%d" -                        "pathlen=%d, d_len=%d", -                        total_size, pathlen, -                        entry->d_len); - -                snprintf(file_path, total_size, "%s%s", loc->path, entry->d_name); - -        } else { -                total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */ -                file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char); -                if (!file_path) { -                        gf_log (this->name, GF_LOG_ERROR, -                                "Out of memory"); -                        return NULL; -                } - -                gf_log (this->name, GF_LOG_TRACE, -                        "constructing file path of size=%d" -                        "pathlen=%d, d_len=%d", -                        total_size, pathlen, -                        entry->d_len); - -                snprintf(file_path, total_size, "%s/%s", loc->path, entry->d_name); -        } - -        gf_log (this->name, GF_LOG_TRACE, -                "path=%s and d_name=%s", loc->path, entry->d_name); -        gf_log (this->name, GF_LOG_TRACE, -                "constructed file_path=%s of size=%d", file_path, total_size); - -        return file_path; -} -  static int  pump_save_path (xlator_t *this, const char *path)  { @@ -232,7 +167,7 @@ pump_save_path (xlator_t *this, const char *path)          GF_ASSERT (priv->root_inode); -        build_root_loc (priv->root_inode, &loc); +        afr_build_root_loc (priv->root_inode, &loc);          dict = dict_new ();          dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path); @@ -450,14 +385,15 @@ gf_pump_traverse_directory (loc_t *loc)                          gf_log (this->name, GF_LOG_DEBUG,                                  "found readdir entry=%s", entry->d_name); -                        file_path = build_file_path (loc, entry); +                        file_path = afr_build_file_path (loc, entry);                          if (!file_path) {                                  gf_log (this->name, GF_LOG_DEBUG,                                          "file path construction failed");                                  goto out;                          } -                        build_child_loc (loc, &entry_loc, file_path, entry->d_name); +                        afr_build_child_loc (loc, &entry_loc, file_path, +                                             entry->d_name);                          if (!IS_ENTRY_CWD (entry->d_name) &&                                             !IS_ENTRY_PARENT (entry->d_name)) { @@ -530,19 +466,6 @@ out:  } -void -build_root_loc (inode_t *inode, loc_t *loc) -{ -        loc->path = "/"; -        loc->name = ""; -        loc->inode = inode; -        loc->ino = 1; -        loc->inode->ino = 1; -        memset (loc->inode->gfid, 0, 16); -        loc->inode->gfid[15] = 1; - -} -  static int  pump_update_resume_path (xlator_t *this)  { @@ -583,7 +506,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,          priv      = this->private; -        build_root_loc (priv->root_inode, &loc); +        afr_build_root_loc (priv->root_inode, &loc);          ret = syncop_removexattr (priv->children[source], &loc,                                            PUMP_PATH); @@ -618,7 +541,7 @@ pump_complete_migration (xlator_t *this)          GF_ASSERT (priv->root_inode); -        build_root_loc (priv->root_inode, &loc); +        afr_build_root_loc (priv->root_inode, &loc);          dict = dict_new (); @@ -656,20 +579,6 @@ pump_complete_migration (xlator_t *this)  }  static int -pump_set_root_gfid (dict_t *dict) -{ -        uuid_t gfid; -        int ret = 0; - -        memset (gfid, 0, 16); -        gfid[15] = 1; - -        ret = afr_set_dict_gfid (dict, gfid); - -        return ret; -} - -static int  pump_lookup_sink (loc_t *loc)  {          xlator_t *this = NULL; @@ -682,7 +591,7 @@ pump_lookup_sink (loc_t *loc)          xattr_req = dict_new (); -        ret = pump_set_root_gfid (xattr_req); +        ret = afr_set_root_gfid (xattr_req);          if (ret)                  goto out; @@ -721,7 +630,7 @@ pump_task (void *data)          GF_ASSERT (priv->root_inode); -        build_root_loc (priv->root_inode, &loc); +        afr_build_root_loc (priv->root_inode, &loc);          xattr_req = dict_new ();          if (!xattr_req) {                  gf_log (this->name, GF_LOG_DEBUG, @@ -730,7 +639,7 @@ pump_task (void *data)                  goto out;          } -        pump_set_root_gfid (xattr_req); +        afr_set_root_gfid (xattr_req);          ret = syncop_lookup (this, &loc, xattr_req,                               &iatt, &xattr_rsp, &parent); @@ -746,7 +655,7 @@ pump_task (void *data)          pump_update_resume_path (this); -        pump_set_root_gfid (xattr_req); +        afr_set_root_gfid (xattr_req);          ret = pump_lookup_sink (&loc);          if (ret) {                  pump_update_resume_path (this); @@ -894,7 +803,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)          GF_ASSERT (priv->root_inode); -        build_root_loc (priv->root_inode, &loc); +        afr_build_root_loc (priv->root_inode, &loc);          data = data_ref (dict_get (local->dict, PUMP_CMD_START));          if (!data) { @@ -1132,7 +1041,7 @@ pump_execute_start (call_frame_t *frame, xlator_t *this)          GF_ASSERT (priv->root_inode); -        build_root_loc (priv->root_inode, &loc); +        afr_build_root_loc (priv->root_inode, &loc);  	STACK_WIND (frame,  		    pump_cmd_start_getxattr_cbk, diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h index 027524227..02eede49c 100644 --- a/xlators/cluster/afr/src/pump.h +++ b/xlators/cluster/afr/src/pump.h @@ -26,10 +26,6 @@  #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"  #define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect" -#define IS_ROOT_PATH(path) (!strcmp (path, "/")) -#define IS_ENTRY_CWD(entry) (!strcmp (entry, ".")) -#define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) -  #define PUMP_CMD_START  "trusted.glusterfs.pump.start"  #define PUMP_CMD_COMMIT "trusted.glusterfs.pump.commit"  #define PUMP_CMD_ABORT  "trusted.glusterfs.pump.abort" diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 66467373b..faf4960df 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -961,7 +961,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)                  goto out;          if (GLUSTERD_STATUS_STARTED == volinfo->status) -                ret = glusterd_check_generate_start_nfs (); +                ret = glusterd_nodesvcs_handle_graph_change (volinfo);  out:          return ret; @@ -1133,7 +1133,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)                  }          } else {                  if (GLUSTERD_STATUS_STARTED == volinfo->status) -                        ret = glusterd_check_generate_start_nfs (); +                        ret = glusterd_nodesvcs_handle_graph_change (volinfo);          }  out: diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 6dca708f7..19975a689 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -1310,7 +1310,7 @@ glusterd_marker_create_volfile (glusterd_volinfo_t *volinfo)                  goto out;          if (GLUSTERD_STATUS_STARTED == volinfo->status) -                ret = glusterd_check_generate_start_nfs (); +                ret = glusterd_nodesvcs_handle_graph_change (volinfo);          ret = 0;  out:          return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 25b1e6695..8331a91f6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -53,10 +53,17 @@ build_volfile_path (const char *volname, char *path,          char               *free_ptr    = NULL;          char               *tmp         = NULL;          glusterd_volinfo_t *volinfo     = NULL; +        char               *server      = NULL;          priv    = THIS->private; -        if (volname[0] != '/') { +        if (strstr (volname, "gluster/")) { +                server = strchr (volname, '/') + 1; +                glusterd_get_nodesvc_volfile (server, priv->workdir, +                                                    path, path_len); +                ret = 1; +                goto out; +        } else if (volname[0] != '/') {                  /* Normal behavior */                  dup_volname = gf_strdup (volname);          } else { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 53556984a..b9d4606c7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -96,11 +96,6 @@ static char *glusterd_op_sm_event_names[] = {          "GD_OP_EVENT_INVALID"  }; - -static int -glusterd_restart_brick_servers (glusterd_volinfo_t *); - -  char*  glusterd_op_sm_state_name_get (int state)  { @@ -271,12 +266,12 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)  {          int                                      ret           = 0;          char                                    *volname       = NULL; - 	int                                      exists        = 0; - 	char					*key	       = NULL; +        int                                      exists        = 0; +        char                                    *key               = NULL;          char                                    *key_fixed     = NULL;          char                                    *value         = NULL; - 	char					 str[100]      = {0, }; - 	int					 count	       = 0; +        char                                     str[100]      = {0, }; +        int                                      count         = 0;          int                                      dict_count    = 0;          char                                     errstr[2048]  = {0, };          glusterd_volinfo_t                      *volinfo       = NULL; @@ -352,21 +347,21 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)                  goto out;          } -	for ( count = 1; ret != 1 ; count++ ) { +        for ( count = 1; ret != 1 ; count++ ) {                  global_opt = _gf_false; -		sprintf (str, "key%d", count); -		ret = dict_get_str (dict, str, &key); +                sprintf (str, "key%d", count); +                ret = dict_get_str (dict, str, &key); -		if (ret) +                if (ret)                          break; -		exists = glusterd_check_option_exists (key, &key_fixed); +                exists = glusterd_check_option_exists (key, &key_fixed);                  if (exists == -1) {                          ret = -1;                          goto out;                  } -		if (!exists) { +                if (!exists) {                          gf_log ("", GF_LOG_ERROR, "Option with name: %s "                                  "does not exist", key);                          ret = snprintf (errstr, 2048, @@ -378,7 +373,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)                          *op_errstr = gf_strdup (errstr);                          ret = -1;                          goto out; -        	} +                }                  sprintf (str, "value%d", count);                  ret = dict_get_str (dict, str, &value); @@ -734,10 +729,11 @@ glusterd_options_reset (glusterd_volinfo_t *volinfo, int32_t is_force)          if (ret)                  goto out; -        if (GLUSTERD_STATUS_STARTED == volinfo->status) -                ret = glusterd_check_generate_start_nfs (); -        if (ret) -                goto out; +        if (GLUSTERD_STATUS_STARTED == volinfo->status) { +                ret = glusterd_nodesvcs_handle_reconfigure (volinfo); +                if (ret) +                        goto out; +        }          ret = 0; @@ -807,25 +803,6 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)  }  static int -glusterd_restart_brick_servers (glusterd_volinfo_t *volinfo) -{ -        if (!volinfo) -                return -1; -        if (glusterd_stop_bricks (volinfo)) { -                gf_log ("", GF_LOG_ERROR, "Restart Failed: Unable to " -                                          "stop brick servers"); -                return -1; -        } -        usleep (500000); -        if (glusterd_start_bricks (volinfo)) { -                gf_log ("", GF_LOG_ERROR, "Restart Failed: Unable to " -                                          "start brick servers"); -                return -1; -        } -        return 0; -} - -static int  glusterd_volset_help (dict_t *dict)  {          int                     ret = -1; @@ -853,11 +830,10 @@ glusterd_op_set_volume (dict_t *dict)          xlator_t                                *this = NULL;          glusterd_conf_t                         *priv = NULL;          int                                      count = 1; -        int                                      restart_flag = 0; -	char					*key = NULL; -	char					*key_fixed = NULL; -	char					*value = NULL; -	char					 str[50] = {0, }; +        char                                    *key = NULL; +        char                                    *key_fixed = NULL; +        char                                    *value = NULL; +        char                                     str[50] = {0, };          gf_boolean_t                             global_opt    = _gf_false;          glusterd_volinfo_t                      *voliter = NULL;          int32_t                                  dict_count = 0; @@ -894,7 +870,7 @@ glusterd_op_set_volume (dict_t *dict)                  goto out;          } -	for ( count = 1; ret != -1 ; count++ ) { +        for ( count = 1; ret != -1 ; count++ ) {                  global_opt = _gf_false;                  sprintf (str, "key%d", count); @@ -976,19 +952,12 @@ glusterd_op_set_volume (dict_t *dict)                          goto out;                  } -                if (restart_flag) { -                        if (glusterd_restart_brick_servers (volinfo)) { -                                ret = -1; -                                goto out; -                        } -                } -                  ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);                  if (ret)                          goto out;                  if (GLUSTERD_STATUS_STARTED == volinfo->status) { -                        ret = glusterd_check_generate_start_nfs (); +                        ret = glusterd_nodesvcs_handle_reconfigure (volinfo);                          if (ret) {                                  gf_log ("", GF_LOG_WARNING,                                           "Unable to restart NFS-Server"); @@ -1008,20 +977,13 @@ glusterd_op_set_volume (dict_t *dict)                                  goto out;                          } -                        if (restart_flag) { -                                if (glusterd_restart_brick_servers (volinfo)) { -                                        ret = -1; -                                        goto out; -                                } -                        } -                          ret = glusterd_store_volinfo (volinfo,                                        GLUSTERD_VOLINFO_VER_AC_INCREMENT);                          if (ret)                                  goto out;                          if (GLUSTERD_STATUS_STARTED == volinfo->status) { -                                ret = glusterd_check_generate_start_nfs (); +                                ret = glusterd_nodesvcs_handle_reconfigure (volinfo);                                  if (ret) {                                          gf_log ("", GF_LOG_WARNING,                                                  "Unable to restart NFS-Server"); @@ -1212,13 +1174,13 @@ glusterd_op_stats_volume (dict_t *dict, char **op_errstr,                  goto out;                  break;          } -	ret = glusterd_create_volfiles_and_notify_services (volinfo); +        ret = glusterd_create_volfiles_and_notify_services (volinfo); -	if (ret) { +        if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" -					  " 'volume set'"); -		ret = -1; -		goto out; +                                          " 'volume set'"); +                ret = -1; +                goto out;          }          ret = glusterd_store_volinfo (volinfo, @@ -1227,7 +1189,7 @@ glusterd_op_stats_volume (dict_t *dict, char **op_errstr,                  goto out;          if (GLUSTERD_STATUS_STARTED == volinfo->status) -                ret = glusterd_check_generate_start_nfs (); +                ret = glusterd_nodesvcs_handle_reconfigure (volinfo);          ret = 0; @@ -1249,7 +1211,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr,          glusterd_brickinfo_t    *brickinfo = NULL;          glusterd_conf_t         *priv = NULL;          xlator_t                *this = NULL; -	int32_t			brick_index = 0; +        int32_t                 brick_index = 0;          this = THIS;          GF_ASSERT (this); @@ -1286,7 +1248,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr,                                  count++;                                  brick_count = count;                          } -			brick_index++; +                        brick_index++;                  }          } @@ -1908,12 +1870,12 @@ glusterd_op_brick_disconnect (void *data)          brickinfo = ev_ctx->brickinfo;          GF_ASSERT (brickinfo); -	if (brickinfo->timer) { -		gf_timer_call_cancel (THIS->ctx, brickinfo->timer); -		brickinfo->timer = NULL; +        if (brickinfo->timer) { +                gf_timer_call_cancel (THIS->ctx, brickinfo->timer); +                brickinfo->timer = NULL;                  gf_log ("", GF_LOG_DEBUG,                          "Cancelled timer thread"); -	} +        }          glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_ACC, ev_ctx);          glusterd_op_sm (); diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 4a4289910..1f424f6c6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -528,7 +528,7 @@ rb_src_brick_restart (glusterd_volinfo_t *volinfo,          gf_log ("", GF_LOG_DEBUG,                  "Attempting to kill src"); -        ret = glusterd_nfs_server_stop (); +        ret = glusterd_nfs_server_stop (volinfo);          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to stop nfs, ret: %d", @@ -570,7 +570,7 @@ rb_src_brick_restart (glusterd_volinfo_t *volinfo,          }  out: -        ret = glusterd_nfs_server_start (); +        ret = glusterd_nfs_server_start (volinfo);          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to start nfs, ret: %d",                          ret); @@ -1678,7 +1678,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict)                  } -                ret = glusterd_nfs_server_stop (); +                ret = glusterd_nodesvcs_stop (volinfo);                  if (ret) {                          gf_log ("", GF_LOG_ERROR,                                  "Unable to stop nfs server, ret: %d", ret); @@ -1690,13 +1690,13 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict)  			gf_log ("", GF_LOG_CRITICAL, "Unable to add "  				"dst-brick: %s to volume: %s",  				dst_brick, volinfo->volname); -		        (void) glusterd_check_generate_start_nfs (); +		        (void) glusterd_nodesvcs_handle_graph_change (volinfo);  			goto out;  		}  		volinfo->defrag_status = 0; -		ret = glusterd_check_generate_start_nfs (); +		ret = glusterd_nodesvcs_handle_graph_change (volinfo);  		if (ret) {                          gf_log ("", GF_LOG_CRITICAL,                                  "Failed to generate nfs volume file"); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 5b247b6a9..18cda46eb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -822,6 +822,23 @@ out:          return ret;  } +gf_boolean_t +glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname, +                                  char *path) +{ +        gf_boolean_t            decommissioned = _gf_false; +        glusterd_brickinfo_t    *brickinfo = NULL; +        int                     ret = -1; + +        ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo, +                                             &brickinfo); +        if (ret) +                goto out; +        decommissioned = brickinfo->decommissioned; +out: +        return decommissioned; +} +  int32_t  glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)  { @@ -2204,6 +2221,7 @@ glusterd_compare_friend_data (dict_t  *vols, int32_t *status)          int                     i = 1;          gf_boolean_t            update = _gf_false;          gf_boolean_t            stale_nfs = _gf_false; +        gf_boolean_t            stale_shd = _gf_false;          GF_ASSERT (vols);          GF_ASSERT (status); @@ -2228,16 +2246,20 @@ glusterd_compare_friend_data (dict_t  *vols, int32_t *status)          }          if (update) { -                if (glusterd_is_nfs_started ()) +                if (glusterd_is_nodesvc_running ("nfs"))                          stale_nfs = _gf_true; +                if (glusterd_is_nodesvc_running ("glustershd")) +                        stale_shd = _gf_true;                  ret = glusterd_import_friend_volumes (vols);                  if (ret)                          goto out;                  if (_gf_false == glusterd_are_all_volumes_stopped ()) { -                        ret = glusterd_check_generate_start_nfs (); +                        ret = glusterd_nodesvcs_handle_graph_change (NULL);                  } else {                          if (stale_nfs)                                  glusterd_nfs_server_stop (); +                        if (stale_shd) +                                glusterd_shd_stop ();                  }          } @@ -2249,29 +2271,81 @@ out:  }  gf_boolean_t -glusterd_is_nfs_started () +glusterd_is_service_running (char *pidfile)  { -        int32_t                 ret = -1; -        xlator_t                *this = NULL; -        glusterd_conf_t         *priv = NULL; -        char                    pidfile[PATH_MAX] = {0,}; +        FILE            *file = NULL; +        gf_boolean_t    running = _gf_false; +        gf_boolean_t    locked = _gf_false; +        int             ret = 0; +        int             fno = 0; -        this = THIS; -        GF_ASSERT(this); +        file = fopen (pidfile, "r+"); +        if (!file) +                goto out; -        priv = this->private; +        fno = fileno (file); +        ret = lockf (fno, F_TLOCK, 0); +        if (!ret) { +                locked = _gf_true; +                goto out; +        } -        GLUSTERD_GET_NFS_PIDFILE(pidfile); -        ret = access (pidfile, F_OK); +        running = _gf_true; +out: +        if (locked) { +                GF_ASSERT (file); +                if (lockf (fno, F_ULOCK, 0) < 0) +                        gf_log ("", GF_LOG_WARNING, "Cannot unlock pidfile: %s" +                                " reason: %s", pidfile, strerror(errno)); +        } +        if (file) +                fclose (file); +        return running; +} -        if (ret == 0) -                return _gf_true; -        else -                return _gf_false; +void +glusterd_get_nodesvc_dir (char *server, char *workdir, +                                char *path, size_t len) +{ +        GF_ASSERT (len == PATH_MAX); +        snprintf (path, len, "%s/%s", workdir, server); +} + +void +glusterd_get_nodesvc_rundir (char *server, char *workdir, +                                   char *path, size_t len) +{ +        char    dir[PATH_MAX] = {0}; +        GF_ASSERT (len == PATH_MAX); + +        glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir)); +        snprintf (path, len, "%s/run", dir); +} + +void +glusterd_get_nodesvc_pidfile (char *server, char *workdir, +                                    char *path, size_t len) +{ +        char    dir[PATH_MAX] = {0}; +        GF_ASSERT (len == PATH_MAX); + +        glusterd_get_nodesvc_rundir (server, workdir, dir, sizeof (dir)); +        snprintf (path, len, "%s/%s.pid", dir, server); +} + +void +glusterd_get_nodesvc_volfile (char *server, char *workdir, +                                    char *volfile, size_t len) +{ +        char  dir[PATH_MAX] = {0,}; +        GF_ASSERT (len == PATH_MAX); + +        glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir)); +        snprintf (volfile, len, "%s/%s-server.vol", dir, server);  }  int32_t -glusterd_nfs_server_start () +glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)  {          int32_t                 ret = -1;          xlator_t                *this = NULL; @@ -2279,16 +2353,16 @@ glusterd_nfs_server_start ()          char                    pidfile[PATH_MAX] = {0,};          char                    logfile[PATH_MAX] = {0,};          char                    volfile[PATH_MAX] = {0,}; -        char                    path[PATH_MAX] = {0,};          char                    rundir[PATH_MAX] = {0,}; +        char                    volfileid[256]   = {0};          this = THIS;          GF_ASSERT(this);          priv = this->private; -        GLUSTERD_GET_NFS_DIR(path, priv); -        snprintf (rundir, PATH_MAX, "%s/run", path); +        glusterd_get_nodesvc_rundir (server, priv->workdir, +                                           rundir, sizeof (rundir));          ret = mkdir (rundir, 0777);          if ((ret == -1) && (EEXIST != errno)) { @@ -2297,25 +2371,72 @@ glusterd_nfs_server_start ()                  goto out;          } -        GLUSTERD_GET_NFS_PIDFILE(pidfile); -        glusterd_get_nfs_filepath (volfile); - +        glusterd_get_nodesvc_pidfile (server, priv->workdir, +                                            pidfile, sizeof (pidfile)); +        glusterd_get_nodesvc_volfile (server, priv->workdir, +                                            volfile, sizeof (volfile));          ret = access (volfile, F_OK);          if (ret) { -                gf_log ("", GF_LOG_ERROR, "Nfs Volfile %s is not present", -                        volfile); +                gf_log ("", GF_LOG_ERROR, "%s Volfile %s is not present", +                        server, volfile);                  goto out;          } -        snprintf (logfile, PATH_MAX, "%s/nfs.log", DEFAULT_LOG_FILE_DIRECTORY); +        snprintf (logfile, PATH_MAX, "%s/%s.log", DEFAULT_LOG_FILE_DIRECTORY, +                  server); +        snprintf (volfileid, sizeof (volfileid), "gluster/%s", server); -        ret = runcmd (GFS_PREFIX"/sbin/glusterfs", "-f", volfile, -                      "-p", pidfile, "-l", logfile, NULL); +        if (pmap_signin) +                ret = runcmd (GFS_PREFIX"/sbin/glusterfs", "-s", "localhost", +                              "--volfile-id", volfileid, +                              "-p", pidfile, "-l", logfile, NULL); +        else +                ret = runcmd (GFS_PREFIX"/sbin/glusterfs", "-f", volfile, +                              "-p", pidfile, "-l", logfile, NULL);  out:          return ret;  } +int +glusterd_nfs_server_start () +{ +        return glusterd_nodesvc_start ("nfs", _gf_false); +} + +int +glusterd_shd_start () +{ +        return glusterd_nodesvc_start ("glustershd", _gf_true); +} + +gf_boolean_t +glusterd_is_nodesvc_running (char *server) +{ +        char                    pidfile[PATH_MAX] = {0,}; +        glusterd_conf_t         *priv = THIS->private; + +        glusterd_get_nodesvc_pidfile (server, priv->workdir, +                                            pidfile, sizeof (pidfile)); +        return glusterd_is_service_running (pidfile); +} + +int32_t +glusterd_nodesvc_stop (char *server, int sig) +{ +        char                    pidfile[PATH_MAX] = {0,}; +        glusterd_conf_t         *priv = THIS->private; +        int                     ret = 0; + +        if (!glusterd_is_nodesvc_running (server)) +                goto out; +        glusterd_get_nodesvc_pidfile (server, priv->workdir, +                                            pidfile, sizeof (pidfile)); +        ret = glusterd_service_stop (server, pidfile, sig, _gf_true); +out: +        return ret; +} +  void  glusterd_nfs_pmap_deregister ()  { @@ -2336,26 +2457,27 @@ glusterd_nfs_pmap_deregister ()  } -int32_t +int  glusterd_nfs_server_stop ()  { -        xlator_t                *this = NULL; -        glusterd_conf_t         *priv = NULL; -        char                    pidfile[PATH_MAX] = {0,}; -        char                    path[PATH_MAX] = {0,}; - -        this = THIS; -        GF_ASSERT(this); - -        priv = this->private; - -        GLUSTERD_GET_NFS_DIR(path, priv); -        GLUSTERD_GET_NFS_PIDFILE(pidfile); +        int                     ret = 0; +        gf_boolean_t            deregister = _gf_false; -        glusterd_service_stop ("nfsd", pidfile, SIGKILL, _gf_true); -        glusterd_nfs_pmap_deregister (); +        if (glusterd_is_nodesvc_running ("nfs")) +                deregister = _gf_true; +        ret = glusterd_nodesvc_stop ("nfs", SIGKILL); +        if (ret) +                goto out; +        if (deregister) +                glusterd_nfs_pmap_deregister (); +out: +        return ret; +} -        return 0; +int +glusterd_shd_stop () +{ +        return glusterd_nodesvc_stop ("glustershd", SIGTERM);  }  int @@ -2392,26 +2514,122 @@ out:  }  int -glusterd_check_generate_start_nfs () +glusterd_check_generate_start_service (int (*create_volfile) (), +                                       int (*stop) (), int (*start) ())  {          int ret = -1; -        ret = glusterd_create_nfs_volfile (); +        ret = create_volfile ();          if (ret)                  goto out; -        if (glusterd_is_nfs_started ()) { -                ret = glusterd_nfs_server_stop (); -                if (ret) -                        goto out; -        } +        ret = stop (); +        if (ret) +                goto out; + +        ret = start (); +out: +        return ret; +} + +int +glusterd_reconfigure_nodesvc (int (*create_volfile) ()) +{ +        int ret = -1; + +        ret = create_volfile (); +        if (ret) +                goto out; + +        ret = glusterd_fetchspec_notify (THIS); +out: +        return ret; +} + +int +glusterd_reconfigure_shd () +{ +        int (*create_volfile) () = glusterd_create_shd_volfile; +        return glusterd_reconfigure_nodesvc (create_volfile); +} + +int +glusterd_check_generate_start_nfs () +{ +        int ret = 0; + +        ret = glusterd_check_generate_start_service (glusterd_create_nfs_volfile, +                                                     glusterd_nfs_server_stop, +                                                     glusterd_nfs_server_start); +        return ret; +} + +int +glusterd_check_generate_start_shd () +{ +        int ret = 0; + +        ret = glusterd_check_generate_start_service (glusterd_create_shd_volfile, +                                                     glusterd_shd_stop, +                                                     glusterd_shd_start); +        if (ret == -EINVAL) +                ret = 0; +        return ret; +} + +int +glusterd_nodesvcs_batch_op (glusterd_volinfo_t *volinfo, +                             int (*nfs_op) (), int (*shd_op) ()) +{ +        int     ret = 0; + +        ret = nfs_op (); +        if (ret) +                goto out; + +        if (volinfo && !glusterd_is_volume_replicate (volinfo)) +                goto out; -        ret = glusterd_nfs_server_start (); +        ret = shd_op (); +        if (ret) +                goto out;  out:          return ret;  }  int +glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo) +{ +        return glusterd_nodesvcs_batch_op (volinfo, +                                            glusterd_nfs_server_start, +                                            glusterd_shd_start); +} + +int +glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo) +{ +        return glusterd_nodesvcs_batch_op (volinfo, +                                            glusterd_nfs_server_stop, +                                            glusterd_shd_stop); +} + +int +glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo) +{ +        return glusterd_nodesvcs_batch_op (volinfo, +                                      glusterd_check_generate_start_nfs, +                                      glusterd_check_generate_start_shd); +} + +int +glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo) +{ +        return glusterd_nodesvcs_batch_op (volinfo, +                                            glusterd_check_generate_start_nfs, +                                            glusterd_reconfigure_shd); +} + +int  glusterd_volume_count_get (void)  {          glusterd_volinfo_t      *tmp_volinfo = NULL; @@ -2510,7 +2728,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)          glusterd_volinfo_t       *volinfo = NULL;          glusterd_brickinfo_t     *brickinfo = NULL;          int                      ret = 0; -        gf_boolean_t             start_nfs = _gf_false; +        gf_boolean_t             start_nodesvcs = _gf_false;          GF_ASSERT (conf); @@ -2521,11 +2739,11 @@ glusterd_restart_bricks (glusterd_conf_t *conf)                                               brick_list) {                                  glusterd_brick_start (volinfo, brickinfo);                          } -                        start_nfs = _gf_true; +                        start_nodesvcs = _gf_true;                  }          } -        if (start_nfs) -                glusterd_check_generate_start_nfs (); +        if (start_nodesvcs) +                glusterd_nodesvcs_handle_graph_change (NULL);          return ret;  } @@ -3740,3 +3958,12 @@ out:          return ret;  } +gf_boolean_t +glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo) +{ +        gf_boolean_t    replicates = _gf_false; +        if (volinfo && ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || +            (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE))) +                replicates = _gf_true; +        return replicates; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 2ee36936a..f06a1ce17 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -142,15 +142,31 @@ glusterd_compare_friend_data (dict_t  *vols, int32_t *status);  int  glusterd_volume_compute_cksum (glusterd_volinfo_t  *volinfo); +void +glusterd_get_nodesvc_volfile (char *server, char *workdir, +                                    char *volfile, size_t len); + +gf_boolean_t +glusterd_is_nodesvc_running (); +  gf_boolean_t -glusterd_is_nfs_started (); +glusterd_is_nodesvc_running (); +void +glusterd_get_nodesvc_dir (char *server, char *workdir, +                                char *path, size_t len);  int32_t  glusterd_nfs_server_start ();  int32_t  glusterd_nfs_server_stop (); +int32_t +glusterd_shd_start (); + +int32_t +glusterd_shd_stop (); +  int  glusterd_remote_hostname_get (rpcsvc_request_t *req,                                char *remote_host, int len); @@ -161,6 +177,22 @@ glusterd_set_volume_status (glusterd_volinfo_t  *volinfo,                              glusterd_volume_status status);  int  glusterd_check_generate_start_nfs (void); + +int +glusterd_check_generate_start_shd (void); + +int +glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo); +  int32_t  glusterd_volume_count_get (void);  int32_t @@ -290,4 +322,9 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,  gf_boolean_t  glusterd_is_fuse_available (); +gf_boolean_t +glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo); +gf_boolean_t +glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname, +                                  char *path);  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index d0533b1fc..fe7cfc1d1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -116,6 +116,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {          {"cluster.metadata-self-heal",           "cluster/replicate",  NULL, NULL, NO_DOC, 0     },          {"cluster.data-self-heal",               "cluster/replicate",  NULL, NULL, NO_DOC, 0     },          {"cluster.entry-self-heal",              "cluster/replicate",  NULL, NULL, NO_DOC, 0     }, +        {"cluster.self-heal-daemon",             "cluster/replicate",  "!self-heal-daemon" , NULL, NO_DOC, 0     },          {"cluster.strict-readdir",               "cluster/replicate",  NULL, NULL, NO_DOC, 0     },          {"cluster.self-heal-window-size",        "cluster/replicate",         "data-self-heal-window-size", NULL, DOC, 0},          {"cluster.data-change-log",              "cluster/replicate",  NULL, NULL, NO_DOC, 0     }, @@ -382,6 +383,13 @@ xlator_set_option (xlator_t *xl, char *key, char *value)          return dict_set_dynstr (xl->options, key, dval);  } +static int +xlator_get_option (xlator_t *xl, char *key, char **value) +{ +        GF_ASSERT (xl); +        return dict_get_str (xl->options, key, value); +} +  static inline xlator_t *  first_of (volgen_graph_t *graph)  { @@ -685,25 +693,35 @@ volgen_graph_set_options_generic (volgen_graph_t *graph, dict_t *dict,  }  static int -basic_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, -                      void *param) +no_filter_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, +                          void *param)  {          xlator_t *trav;          int ret = 0; -        if (vme->option[0] == '!') -                return 0; -          for (trav = first_of (graph); trav; trav = trav->next) {                  if (strcmp (trav->type, vme->voltype) != 0)                          continue;                  ret = xlator_set_option (trav, vme->option, vme->value);                  if (ret) -                        return -1; +                        break;          } +        return ret; +} -        return 0; +static int +basic_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, +                      void *param) +{ +        int     ret = 0; + +        if (vme->option[0] == '!') +                goto out; + +        ret = no_filter_option_handler (graph, vme, param); +out: +        return ret;  }  static int @@ -991,14 +1009,39 @@ glusterd_get_trans_type_rb (gf_transport_type ttype)  }  static int -volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph) +_xl_link_children (xlator_t *parent, xlator_t *children, size_t child_count) +{ +        xlator_t        *trav = NULL; +        size_t          seek = 0; +        int             ret = -1; + +        if (child_count == 0) +                goto out; +        seek = child_count; +        for (trav = children; --seek; trav = trav->next); +        for (; child_count--; trav = trav->prev) { +                ret = volgen_xlator_link (parent, trav); +                if (ret) +                        goto out; +        } +        ret = 0; +out: +        return ret; +} + +static int +volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph, +                        size_t child_count)  {          xlator_t *trav = NULL; +        int      ret   = 0;          GF_ASSERT (dgraph->graph.first); -        if (volgen_xlator_link (first_of (dgraph), first_of (sgraph)) == -1) -                return -1; +        ret = _xl_link_children (first_of (dgraph), first_of (sgraph), +                                 child_count); +        if (ret) +                goto out;          for (trav = first_of (dgraph); trav->next; trav = trav->next); @@ -1006,7 +1049,8 @@ volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph)          trav->next->prev = trav;          dgraph->graph.xl_count += sgraph->graph.xl_count; -        return 0; +out: +        return ret;  }  static int @@ -1082,10 +1126,11 @@ build_graph_generic (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                  set_dict = dict_copy (volinfo->dict, NULL);                  if (!set_dict)                          return -1; -                dict_copy (mod_dict, set_dict); -                /* XXX dict_copy swallows errors */ -        } else +                 dict_copy (mod_dict, set_dict); +                 /* XXX dict_copy swallows errors */ +        } else {                  set_dict = volinfo->dict; +        }          ret = builder (graph, volinfo, set_dict, param);          if (!ret) @@ -1097,10 +1142,24 @@ build_graph_generic (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          return ret;  } +static gf_transport_type +transport_str_to_type (char *tt) +{ +        gf_transport_type type = GF_TRANSPORT_TCP; + +        if (!strcmp ("tcp", tt)) +                type = GF_TRANSPORT_TCP; +        else if (!strcmp ("rdma", tt)) +                type = GF_TRANSPORT_RDMA; +        else if (!strcmp ("tcp,rdma", tt)) +                type = GF_TRANSPORT_BOTH_TCP_RDMA; +        return type; +} +  static void -get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt) +transport_type_to_str (gf_transport_type type, char *tt)  { -        switch (volinfo->transport_type) { +        switch (type) {          case GF_TRANSPORT_RDMA:                  strcpy (tt, "rdma");                  break; @@ -1114,20 +1173,20 @@ get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt)  }  static void +get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt) +{ +        transport_type_to_str (volinfo->transport_type, tt); +} + +static void  get_vol_nfs_transport_type (glusterd_volinfo_t *volinfo, char *tt)  { -        switch (volinfo->nfs_transport_type) { -        case GF_TRANSPORT_RDMA: -                strcpy (tt, "rdma"); -                break; -        case GF_TRANSPORT_TCP: -                strcpy (tt, "tcp"); -                break; -        case GF_TRANSPORT_BOTH_TCP_RDMA: +        if (volinfo->nfs_transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) {                  gf_log ("", GF_LOG_ERROR, "%s:nfs transport cannot be both"                          " tcp and rdma", volinfo->volname);                  GF_ASSERT (0);          } +        transport_type_to_str (volinfo->nfs_transport_type, tt);  }  /*  gets the volinfo, dict, a character array for filling in @@ -1795,35 +1854,17 @@ glusterd_get_volopt_content (gf_boolean_t xml_out)  }  static int -client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, -                      dict_t *set_dict, void *param) +volgen_graph_build_clients (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, +                            dict_t *set_dict, void *param)  { -        int                      sub_count          = 0; -        int                      dist_count         = 0; +        int                      i                  = 0; +        int                      ret                = -1;          char                     transt[16]         = {0,};          char                    *volname            = NULL; -        dict_t                  *dict               = NULL;          glusterd_brickinfo_t    *brick = NULL; -        char                    *replicate_args[]   = {"cluster/replicate", -                                                       "%s-replicate-%d"}; -        char                    *stripe_args[]      = {"cluster/stripe", -                                                       "%s-stripe-%d"}; -        char                   **cluster_args       = NULL; -        int                      i                  = 0; -        int                      j                  = 0; -        int                      ret                = -1; -        xlator_t                *xl                 = NULL; -        xlator_t                *txl                = NULL; -        xlator_t                *trav               = NULL; -        int                      removed_bricks     = 0; -        int                      index_of_removed_brick = 0; -        char                    *removed_bricklist  = NULL; -        char                     volume_name[1024]  = {0,}; -        int                      idx                = 0; +        xlator_t                *xl                = NULL;          volname = volinfo->volname; -        dict    = volinfo->dict; -        GF_ASSERT (dict);          if (volinfo->brick_count == 0) {                  gf_log ("", GF_LOG_ERROR, @@ -1848,6 +1889,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                  strcpy (transt, "tcp");          i = 0; +        ret = -1;          list_for_each_entry (brick, &volinfo->bricks, brick_list) {                  ret = -1;                  xl = volgen_graph_add_nolink (graph, "protocol/client", @@ -1863,19 +1905,6 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                  ret = xlator_set_option (xl, "transport-type", transt);                  if (ret)                          goto out; -                if (brick->decommissioned) { -                        if (!removed_bricklist) { -                                removed_bricklist = GF_CALLOC (16 * GF_UNIT_KB, -                                                               1, gf_common_mt_char); -                                index_of_removed_brick = i; -                        } -                        if (removed_bricks) -                                strcat (removed_bricklist, ","); -                        snprintf (volume_name, 1024, "%s-client-%d", volname, i); -                        strcat (removed_bricklist, volume_name); -                        removed_bricks++; -                } -                  i++;          }          if (i != volinfo->brick_count) { @@ -1884,138 +1913,283 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                          "differs from brick count (%d)", i,                          volinfo->brick_count); +                ret = -1; +                goto out; +        } +        ret = 0; +out: +        return ret; +} + +static int +volgen_graph_build_clusters (volgen_graph_t *graph, +                             glusterd_volinfo_t *volinfo, char *xl_type, +                             char *xl_namefmt, size_t child_count, +                             size_t sub_count) +{ +        int             i = 0; +        int             j = 0; +        xlator_t        *txl = NULL; +        xlator_t        *xl  = NULL; +        xlator_t        *trav = NULL; +        char            *volname = NULL; +        int             ret     = -1; + +        if (child_count == 0) +                goto out; +        volname = volinfo->volname; +        txl = first_of (graph); +        for (trav = txl; --child_count; trav = trav->next); +        for (;; trav = trav->prev) { +                if (i % sub_count == 0) { +                        xl = volgen_graph_add_nolink (graph, xl_type, +                                                      xl_namefmt, volname, j); +                        if (!xl) { +                                ret = -1; +                                goto out; +                        } +                        j++; +                } + +                ret = volgen_xlator_link (xl, trav); +                if (ret) +                        goto out; + +                if (trav == txl) +                        break; +                i++; +        } + +        ret = j; +out: +        return ret; +} + +gf_boolean_t +_xl_is_client_decommissioned (xlator_t *xl, glusterd_volinfo_t *volinfo) +{ +        int             ret = 0; +        gf_boolean_t    decommissioned = _gf_false; +        char            *hostname = NULL; +        char            *path = NULL; + +        GF_ASSERT (!strcmp (xl->type, "protocol/client")); +        ret = xlator_get_option (xl, "remote-host", &hostname); +        if (ret) { +                GF_ASSERT (0); +                gf_log ("glusterd", GF_LOG_ERROR, "Failed to get remote-host " +                        "from client %s", xl->name); +                goto out; +        } +        ret = xlator_get_option (xl, "remote-subvolume", &path); +        if (ret) { +                GF_ASSERT (0); +                gf_log ("glusterd", GF_LOG_ERROR, "Failed to get remote-host " +                        "from client %s", xl->name); +                goto out; +        } + +        decommissioned = glusterd_is_brick_decommissioned (volinfo, hostname, +                                                           path); +out: +        return decommissioned; +} + +gf_boolean_t +_xl_has_decommissioned_clients (xlator_t *xl, glusterd_volinfo_t *volinfo) +{ +        xlator_list_t   *xl_child = NULL; +        gf_boolean_t    decommissioned = _gf_false; +        xlator_t        *cxl = NULL; + +        if (!xl) +                goto out; + +        if (!strcmp (xl->type, "protocol/client")) { +                decommissioned = _xl_is_client_decommissioned (xl, volinfo);                  goto out;          } -        sub_count = volinfo->sub_count; -        if (sub_count > 1) { +        xl_child = xl->children; +        while (xl_child) { +                cxl = xl_child->xlator; +                decommissioned = _xl_is_client_decommissioned (cxl, volinfo); +                if (decommissioned) +                        break; + +                xl_child = xl_child->next; +        } +out: +        return decommissioned; +} + +static int +_graph_get_decommissioned_children (xlator_t *dht, glusterd_volinfo_t *volinfo, +                                    char **children) +{ +        int             ret = -1; +        xlator_list_t   *xl_child = NULL; +        xlator_t        *cxl = NULL; +        gf_boolean_t    comma = _gf_false; + +        *children = NULL; +        xl_child = dht->children; +        while (xl_child) { +                cxl = xl_child->xlator; +                if (_xl_has_decommissioned_clients (cxl, volinfo)) { +                        if (!*children) { +                                *children = GF_CALLOC (16 * GF_UNIT_KB, 1, +                                                       gf_common_mt_char); +                                if (!*children) +                                        goto out; +                        } + +                        if (comma) +                                strcat (*children, ","); +                        strcat (*children, cxl->name); +                        comma = _gf_true; +                } + +                xl_child = xl_child->next; +        } +        ret = 0; +out: +        return ret; +} + +static int +volgen_graph_build_dht_cluster (volgen_graph_t *graph, +                                glusterd_volinfo_t *volinfo, size_t child_count) +{ +        int32_t                 clusters                 = 0; +        int                     ret                      = -1; +        char                    *decommissioned_children = NULL; +        xlator_t                *dht                     = NULL; + +        GF_ASSERT (child_count > 1); +        clusters = volgen_graph_build_clusters (graph,  volinfo, +                                                "cluster/distribute", "%s-dht", +                                                child_count, child_count); +        if (clusters < 0) +                goto out; +        dht = first_of (graph); +        ret = _graph_get_decommissioned_children (dht, volinfo, +                                                  &decommissioned_children); +        if (ret) +                goto out; +        if (decommissioned_children) { +                ret = xlator_set_option (dht, "decommissioned-bricks", +                                         decommissioned_children); +                if (ret) +                        goto out; +        } +        ret = 0; +out: +        if (decommissioned_children) +                GF_FREE (decommissioned_children); +        return ret; +} + +static int +volume_volgen_graph_build_clusters (volgen_graph_t *graph, +                                    glusterd_volinfo_t *volinfo) +{ +        char                    *replicate_args[]   = {"cluster/replicate", +                                                       "%s-replicate-%d"}; +        char                    *stripe_args[]      = {"cluster/stripe", +                                                       "%s-stripe-%d"}; +        int                     rclusters           = 0; +        int                     clusters            = 0; +        int                     dist_count          = 0; +        int                     ret                 = -1; + +        if (volinfo->sub_count > 1) {                  switch (volinfo->type) {                  case GF_CLUSTER_TYPE_REPLICATE: -                        cluster_args = replicate_args; +                        clusters = volgen_graph_build_clusters (graph, volinfo, +                                                           replicate_args[0], +                                                           replicate_args[1], +                                                           volinfo->brick_count, +                                                           volinfo->sub_count); +                        if (clusters < 0) +                                goto out;                          break;                  case GF_CLUSTER_TYPE_STRIPE: -                        cluster_args = stripe_args; +                        clusters = volgen_graph_build_clusters (graph, volinfo, +                                                           stripe_args[0], +                                                           stripe_args[1], +                                                           volinfo->brick_count, +                                                           volinfo->sub_count); +                        if (clusters < 0) +                                goto out;                          break;                  case GF_CLUSTER_TYPE_STRIPE_REPLICATE:                          /* Replicate after the clients, then stripe */ -                        if (volinfo->replica_count == 0) { -                                ret = -1; +                        if (volinfo->replica_count == 0) +                                return -1; +                        clusters = volgen_graph_build_clusters (graph, volinfo, +                                                           replicate_args[0], +                                                           replicate_args[1], +                                                           volinfo->brick_count, +                                                           volinfo->replica_count); +                        if (clusters < 0) +                                goto out; + +                        rclusters = volinfo->brick_count/volinfo->replica_count; +                        GF_ASSERT (rclusters == clusters); +                        clusters = volgen_graph_build_clusters (graph, volinfo, +                                                           stripe_args[0], +                                                           stripe_args[1], +                                                           rclusters, +                                                           volinfo->stripe_count); +                        if (clusters < 0)                                  goto out; -                        } -                        sub_count = volinfo->replica_count; -                        cluster_args = replicate_args;                          break;                  default:                          gf_log ("", GF_LOG_ERROR, "volume inconsistency: "                                  "unrecognized clustering type"); -                        ret = -1;                          goto out;                  } - -                i = 0; -                j = 0; -                txl = first_of (graph); -                for (trav = txl; trav->next; trav = trav->next); -                for (;; trav = trav->prev) { -                        if (i % sub_count == 0) { -                                xl = volgen_graph_add_nolink (graph, -                                                              cluster_args[0], -                                                              cluster_args[1], -                                                              volname, j); -                                if (!xl) { -                                        ret = -1; -                                        goto out; -                                } -                                j++; -                        } - -                        ret = volgen_xlator_link (xl, trav); -                        if (ret) -                                goto out; - -                        if (trav == txl) -                                break; -                        i++; -                } - -                if (GF_CLUSTER_TYPE_STRIPE_REPLICATE == volinfo->type) { -                        sub_count = volinfo->stripe_count; -                        cluster_args = stripe_args; - -                        i = 0; -                        txl = first_of (graph); -                        for (trav = txl; --j; trav = trav->next); -                        for (;; trav = trav->prev) { -                                if (i % sub_count == 0) { -                                        xl = volgen_graph_add_nolink (graph, -                                                                      cluster_args[0], -                                                                      cluster_args[1], -                                                                      volname, j); -                                        if (!xl) { -                                                ret = -1; -                                                goto out; -                                        } -                                        j++; -                                } - -                                ret = volgen_xlator_link (xl, trav); -                                if (ret) -                                        goto out; - -                                if (trav == txl) -                                        break; -                                i++; -                        } - -                }          } - -        if (volinfo->sub_count) +        if (volinfo->sub_count) {                  dist_count = volinfo->brick_count / volinfo->sub_count; -        else +                GF_ASSERT (dist_count == clusters); +        } else {                  dist_count = volinfo->brick_count; +        } +          if (dist_count > 1) { -                xl = volgen_graph_add_nolink (graph, "cluster/distribute", -                                              "%s-dht", volname); -                if (!xl) { -                        ret = -1; +                ret = volgen_graph_build_dht_cluster (graph, volinfo, +                                                      dist_count); +                if (ret)                          goto out; -                } +        } +        ret = 0; +out: +        return ret; +} -                trav = xl; -                for (i = 0; i < dist_count; i++) -                        trav = trav->next; -                for (; trav != xl; trav = trav->prev) { -                        ret = volgen_xlator_link (xl, trav); -                        if (ret) -                                goto out; -                } +static int +client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, +                      dict_t *set_dict, void *param) +{ +        int                      ret                = 0; +        xlator_t                *xl                 = NULL; +        char                    *volname            = NULL; -                if (removed_bricks) { -                        if (volinfo->sub_count) { -                                idx = index_of_removed_brick / volinfo->sub_count; -                                if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { -                                        snprintf (volume_name, 1024, "%s-replicate-%d", -                                                  volname, idx); -                                        strcpy (removed_bricklist, volume_name); -                                } else if (volinfo->type != GF_CLUSTER_TYPE_NONE) { -                                        snprintf (volume_name, 1024, "%s-stripe-%d  ", -                                                  volname, idx); -                                        strcpy (removed_bricklist, volume_name); -                                } -                        } -                        ret = xlator_set_option (xl, "decommissioned-bricks", -                                                 removed_bricklist); -                        if (ret) -                                goto out; -                } -        } +        volname = volinfo->volname; +        ret = volgen_graph_build_clients (graph, volinfo, set_dict, param); +        if (ret) +                goto out; + +        ret = volume_volgen_graph_build_clusters (graph, volinfo); +        if (ret) +                goto out;          ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);          if (ret == -1)                  goto out; -          if (ret) {                  xl = volgen_graph_add (graph, "features/quota", volname); @@ -2030,6 +2204,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          if (ret)                  goto out; +        ret = -1;          xl = volgen_graph_add_as (graph, "debug/io-stats", volname);          if (!xl)                  goto out; @@ -2040,11 +2215,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          if (!ret)                  ret = volgen_graph_set_options_generic (graph, set_dict, "client",                                                          &sys_loglevel_option_handler); -  out: -        if (removed_bricklist) -                GF_FREE (removed_bricklist); -          return ret;  } @@ -2059,8 +2230,28 @@ build_client_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,  }  static int +shd_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, +                           void *param) +{ +        int                     ret = 0; +        struct volopt_map_entry new_vme = {0}; +        int                     shd = 0; + +        shd = !strcmp (vme->option, "!self-heal-daemon"); +        if ((vme->option[0] == '!') && !shd) +                goto out; +        new_vme = *vme; +        if (shd) +                new_vme.option = "self-heal-daemon"; + +        ret = no_filter_option_handler (graph, &new_vme, param); +out: +        return ret; +} + +static int  nfs_option_handler (volgen_graph_t *graph, -                            struct volopt_map_entry *vme, void *param) +                    struct volopt_map_entry *vme, void *param)  {          xlator_t *xl = NULL;          char *aa = NULL; @@ -2234,6 +2425,93 @@ nfs_option_handler (volgen_graph_t *graph,          return 0;  } +static int +build_shd_graph (volgen_graph_t *graph, dict_t *mod_dict) +{ +        volgen_graph_t     cgraph         = {0}; +        glusterd_volinfo_t *voliter       = NULL; +        xlator_t           *this          = NULL; +        glusterd_conf_t    *priv          = NULL; +        dict_t             *set_dict      = NULL; +        int                ret            = 0; +        gf_boolean_t       valid_config   = _gf_false; +        xlator_t           *iostxl        = NULL; +        int                rclusters       = 0; +        int                replica_count  = 0; + +        this = THIS; +        priv = this->private; + +        set_dict = dict_new (); +        if (!set_dict) { +                ret = -ENOMEM; +                goto out; +        } + +        iostxl = volgen_graph_add_as (graph, "debug/io-stats", "glustershd"); +        if (!iostxl) { +                ret = -1; +                goto out; +        } + +        list_for_each_entry (voliter, &priv->volumes, vol_list) { +                if (voliter->status != GLUSTERD_STATUS_STARTED) +                        continue; + +                if (voliter->type == GF_CLUSTER_TYPE_REPLICATE) +                        replica_count = voliter->sub_count; +                else if (voliter->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) +                        replica_count = voliter->replica_count; +                else +                        continue; + +                valid_config = _gf_true; + +                ret = dict_set_str (set_dict, "cluster.self-heal-daemon", "on"); +                if (ret) +                        goto out; + +                dict_copy (voliter->dict, set_dict); +                if (mod_dict) +                        dict_copy (mod_dict, set_dict); + +                memset (&cgraph, 0, sizeof (cgraph)); +                ret = volgen_graph_build_clients (&cgraph, voliter, set_dict, +                                                  NULL); +                if (ret) +                        goto out; + +                rclusters = volgen_graph_build_clusters (&cgraph, voliter, +                                                        "cluster/replicate", +                                                        "%s-replicate-%d", +                                                        voliter->brick_count, +                                                        replica_count); +                if (rclusters < 0) { +                        ret = -1; +                        goto out; +                } + +                ret = volgen_graph_set_options_generic (&cgraph, set_dict, voliter, +                                                        shd_option_handler); +                if (ret) +                        goto out; + +                ret = volgen_graph_merge_sub (graph, &cgraph, rclusters); +                if (ret) +                        goto out; + +                ret = dict_reset (set_dict); +                if (ret) +                        goto out; +        } +out: +        if (set_dict) +                dict_unref (set_dict); +        if (!valid_config) +                ret = -EINVAL; +        return ret; +} +  /* builds a graph for nfs server role, with option overrides in mod_dict */  static int  build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) @@ -2259,14 +2537,6 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)                  return -1;          } -        ret = dict_set_str (set_dict, VKEY_PERF_STAT_PREFETCH, "off"); -        if (ret) -                goto out; - -        ret = dict_set_str (set_dict, "performance.client-io-threads", "off"); -        if (ret) -                goto out; -          nfsxl = volgen_graph_add_as (graph, "nfs/server", "nfs-server");          if (!nfsxl) {                  ret = -1; @@ -2274,7 +2544,7 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)          }          ret = xlator_set_option (nfsxl, "nfs.dynamic-volumes", "on");          if (ret) -                goto out;; +                goto out;          list_for_each_entry (voliter, &priv->volumes, vol_list) {                  if (voliter->status != GLUSTERD_STATUS_STARTED) @@ -2313,11 +2583,19 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)                  else                          get_transport_type (voliter, voliter->dict, nfs_xprt, _gf_true); +                ret = dict_set_str (set_dict, VKEY_PERF_STAT_PREFETCH, "off"); +                if (ret) +                        goto out; + +                ret = dict_set_str (set_dict, "performance.client-io-threads", "off"); +                if (ret) +                        goto out; +                  ret = dict_set_str (set_dict, "client-transport-type",                                      nfs_xprt);                  ret = build_client_graph (&cgraph, voliter, set_dict);                  if (ret) -                        goto out;; +                        goto out;                  if (mod_dict) {                          dict_copy (mod_dict, set_dict); @@ -2328,7 +2606,13 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)                                                                  basic_option_handler);                  } -                ret = volgen_graph_merge_sub (graph, &cgraph); +                if (ret) +                        goto out; + +                ret = volgen_graph_merge_sub (graph, &cgraph, 1); +                if (ret) +                        goto out; +                ret = dict_reset (set_dict);                  if (ret)                          goto out;          } @@ -2336,8 +2620,7 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)          list_for_each_entry (voliter, &priv->volumes, vol_list) {                  if (mod_dict) { -                        dict_copy (mod_dict, set_dict); -                        ret = volgen_graph_set_options_generic (graph, set_dict, voliter, +                        ret = volgen_graph_set_options_generic (graph, mod_dict, voliter,                                                                  nfs_option_handler);                  } else {                          ret = volgen_graph_set_options_generic (graph, voliter->dict, voliter, @@ -2509,22 +2792,24 @@ out:          return ret;  } -static void -get_client_filepath (char *filename, glusterd_volinfo_t *volinfo) +static int +generate_single_transport_client_volfile (glusterd_volinfo_t *volinfo, +                                          char *filepath, dict_t *dict)  { -        char  path[PATH_MAX] = {0,}; -        glusterd_conf_t *priv = NULL; +        volgen_graph_t graph = {0,}; +        int     ret = -1; -        priv = THIS->private; +        ret = build_client_graph (&graph, volinfo, dict); +        if (!ret) +                ret = volgen_write_volfile (&graph, filepath); -        GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); +        volgen_graph_free (&graph); -        snprintf (filename, PATH_MAX, "%s/%s-fuse.vol", -                  path, volinfo->volname); +        return ret;  } -static void -get_rdma_client_filepath (char *filename, glusterd_volinfo_t *volinfo) +void +get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo, gf_transport_type type)  {          char  path[PATH_MAX] = {0,};          glusterd_conf_t *priv = NULL; @@ -2533,55 +2818,68 @@ get_rdma_client_filepath (char *filename, glusterd_volinfo_t *volinfo)          GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); -        snprintf (filename, PATH_MAX, "%s/%s.rdma-fuse.vol", -                  path, volinfo->volname); +        switch (type) { +        case GF_TRANSPORT_TCP: +                snprintf (filepath, PATH_MAX, "%s/%s-fuse.vol", +                          path, volinfo->volname); +                break; +        case GF_TRANSPORT_RDMA: +                snprintf (filepath, PATH_MAX, "%s/%s.rdma-fuse.vol", +                          path, volinfo->volname); +                break; +        default: +                GF_ASSERT (0); +                break; +        }  } -static int -generate_client_volfile (glusterd_volinfo_t *volinfo) +static void +enumerate_transport_reqs (gf_transport_type type, char **types)  { -        volgen_graph_t graph = {0,}; -        char    filename[PATH_MAX] = {0,}; -        int     ret = -1; -        dict_t *dict = NULL; - -        get_client_filepath (filename, volinfo); +        switch (type) { +        case GF_TRANSPORT_TCP: +                types[0] = "tcp"; +                break; +        case GF_TRANSPORT_RDMA: +                types[0] = "rdma"; +                break; +        case GF_TRANSPORT_BOTH_TCP_RDMA: +                types[0] = "tcp"; +                types[1] = "rdma"; +                break; +        } +} -        if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { -                dict = dict_new (); -                if (!dict) -                        goto out; -                ret = dict_set_str (dict, "client-transport-type", "tcp"); +static int +generate_client_volfiles (glusterd_volinfo_t *volinfo) +{ +        char               filepath[PATH_MAX] = {0,}; +        int                ret = -1; +        char               *types[] = {NULL, NULL, NULL}; +        int                i = 0; +        dict_t             *dict = NULL; +        gf_transport_type  type = GF_TRANSPORT_TCP; + +        enumerate_transport_reqs (volinfo->transport_type, types); +        dict = dict_new (); +        if (!dict) +                goto out; +        for (i = 0; types[i]; i++) { +                memset (filepath, 0, sizeof (filepath)); +                ret = dict_set_str (dict, "client-transport-type", types[i]);                  if (ret)                          goto out; -        } - -        ret = build_client_graph (&graph, volinfo, dict); -        if (!ret) -                ret = volgen_write_volfile (&graph, filename); - -        volgen_graph_free (&graph); - -        if (dict) { -                /* This means, transport type is both RDMA and TCP */ - -                memset (&graph, 0, sizeof (graph)); -                get_rdma_client_filepath (filename, volinfo); - -                ret = dict_set_str (dict, "client-transport-type", "rdma"); +                type = transport_str_to_type (types[i]); +                get_client_filepath (filepath, volinfo, type); +                ret = generate_single_transport_client_volfile (volinfo, +                                                                filepath, +                                                                dict);                  if (ret)                          goto out; - -                ret = build_client_graph (&graph, volinfo, dict); -                if (!ret) -                        ret = volgen_write_volfile (&graph, filename); - -                volgen_graph_free (&graph); - -                dict_unref (dict);          } -  out: +        if (dict) +                dict_unref (dict);          return ret;  } @@ -2593,7 +2891,7 @@ glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo,          ret = glusterd_generate_brick_volfile (volinfo, brickinfo);          if (!ret) -                ret = generate_client_volfile (volinfo); +                ret = generate_client_volfiles (volinfo);          if (!ret)                  ret = glusterd_fetchspec_notify (THIS); @@ -2612,7 +2910,7 @@ glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo)                  goto out;          } -        ret = generate_client_volfile (volinfo); +        ret = generate_client_volfiles (volinfo);          if (ret) {                  gf_log ("", GF_LOG_ERROR,                          "Could not generate volfile for client"); @@ -2625,34 +2923,62 @@ out:          return ret;  } -void -glusterd_get_nfs_filepath (char *filename) +int +glusterd_create_global_volfile (int (*builder) (volgen_graph_t *graph, +                                                dict_t *set_dict), +                                char *filepath, dict_t  *mod_dict)  { -        char  path[PATH_MAX] = {0,}; -        glusterd_conf_t *priv  = NULL; +        volgen_graph_t graph = {0,}; +        int     ret = -1; -        priv = THIS->private; +        ret = builder (&graph, mod_dict); +        if (!ret) +                ret = volgen_write_volfile (&graph, filepath); -        GLUSTERD_GET_NFS_DIR (path, priv); +        volgen_graph_free (&graph); -        snprintf (filename, PATH_MAX, "%s/nfs-server.vol", path); +        return ret;  }  int  glusterd_create_nfs_volfile ()  { -        volgen_graph_t graph = {0,}; -        char    filename[PATH_MAX] = {0,}; -        int     ret = -1; +        char            filepath[PATH_MAX] = {0,}; +        glusterd_conf_t *conf = THIS->private; -        glusterd_get_nfs_filepath (filename); +        glusterd_get_nodesvc_volfile ("nfs", conf->workdir, +                                            filepath, sizeof (filepath)); +        return glusterd_create_global_volfile (build_nfs_graph, +                                               filepath, NULL); +} -        ret = build_nfs_graph (&graph, NULL); -        if (!ret) -                ret = volgen_write_volfile (&graph, filename); +int +glusterd_create_shd_volfile () +{ +        char            filepath[PATH_MAX] = {0,}; +        int             ret = -1; +        glusterd_conf_t *conf = THIS->private; +        dict_t          *mod_dict = NULL; -        volgen_graph_free (&graph); +        mod_dict = dict_new (); +        if (!mod_dict) +                goto out; + +        ret = dict_set_uint32 (mod_dict, "cluster.background-self-heal-count", 0); +        if (ret) +                goto out; + +        ret = dict_set_str (mod_dict, "cluster.data-self-heal", "on"); +        if (ret) +                goto out; +        glusterd_get_nodesvc_volfile ("glustershd", conf->workdir, +                                            filepath, sizeof (filepath)); +        ret = glusterd_create_global_volfile (build_shd_graph, filepath, +                                              mod_dict); +out: +        if (mod_dict) +                dict_unref (mod_dict);          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 3fd8a8351..974aed934 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -68,7 +68,10 @@ int glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo);  void glusterd_get_nfs_filepath (char *filename); +void glusterd_get_shd_filepath (char *filename); +  int glusterd_create_nfs_volfile (); +int glusterd_create_shd_volfile ();  int glusterd_delete_volfile (glusterd_volinfo_t *volinfo,                               glusterd_brickinfo_t *brickinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 90d3f16bc..81ef4c605 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -955,7 +955,7 @@ glusterd_op_start_volume (dict_t *dict, char **op_errstr)          if (ret)                  goto out; -        ret = glusterd_check_generate_start_nfs (); +        ret = glusterd_nodesvcs_handle_graph_change (volinfo);  out:          gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); @@ -994,13 +994,9 @@ glusterd_op_stop_volume (dict_t *dict)                  goto out;          if (glusterd_are_all_volumes_stopped ()) { -                if (glusterd_is_nfs_started ()) { -                        ret = glusterd_nfs_server_stop (); -                        if (ret) -                                goto out; -                } +                ret = glusterd_nodesvcs_stop (volinfo);          } else { -                ret = glusterd_check_generate_start_nfs (); +                ret = glusterd_nodesvcs_handle_graph_change (volinfo);          }  out: diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 69923256c..b9e09254b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -852,6 +852,15 @@ init (xlator_t *this)                  exit (1);          } +        snprintf (voldir, PATH_MAX, "%s/glustershd", dirname); +        ret = mkdir (voldir, 0777); +        if ((-1 == ret) && (errno != EEXIST)) { +                gf_log (this->name, GF_LOG_CRITICAL, +                        "Unable to create glustershd directory %s" +                        " ,errno = %d", voldir, errno); +                exit (1); +        } +          ret = glusterd_rpcsvc_options_build (this->options);          if (ret)                  goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index c8fa82819..45890a5d8 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -263,11 +263,6 @@ enum glusterd_vol_comp_status_ {  typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); -#define GLUSTERD_GET_NFS_DIR(path, priv)                                \ -        do {                                                            \ -                snprintf (path, PATH_MAX, "%s/nfs", priv->workdir);\ -        } while (0);                                                    \ -  #define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \          snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir,\                    volinfo->volname); @@ -277,10 +272,6 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);                    GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \                    GLUSTERD_BRICK_INFO_DIR); -#define GLUSTERD_GET_NFS_PIDFILE(pidfile)                               \ -                snprintf (pidfile, PATH_MAX, "%s/nfs/run/nfs.pid", \ -                          priv->workdir);                               \ -  #define GLUSTERD_REMOVE_SLASH_FROM_PATH(path,string) do {               \                  int i = 0;                                              \                  for (i = 1; i < strlen (path); i++) {                   \ diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index a8b7b67a4..7a9bdbcb9 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -2549,6 +2549,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,              (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {                  snprintf (host_buf, 1024, "<POSIX:%s:%s>", priv->hostname,                            real_path); +                size = strlen (host_buf) + 1;                  ret = dict_set_str (dict, GF_XATTR_PATHINFO_KEY,                                      host_buf);                  if (ret < 0) {  | 
