diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 6902 |
1 files changed, 6035 insertions, 867 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 3aafd6e9a..e8ae05851 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -32,7 +22,7 @@ #include "timer.h" #include "defaults.h" #include "compat.h" -#include "md5.h" +#include "syncop.h" #include "run.h" #include "compat-errno.h" #include "statedump.h" @@ -45,9 +35,10 @@ #include "glusterd-store.h" #include "glusterd-volgen.h" #include "glusterd-pmap.h" +#include "glusterfs-acl.h" +#include "glusterd-locks.h" #include "xdr-generic.h" - #include <sys/resource.h> #include <inttypes.h> #include <signal.h> @@ -56,20 +47,51 @@ #include <sys/ioctl.h> #include <sys/socket.h> #include <rpc/pmap_clnt.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/statvfs.h> +#include <ifaddrs.h> +#ifdef HAVE_BD_XLATOR +#include <lvm2app.h> +#endif + + +#ifdef GF_LINUX_HOST_OS +#include <mntent.h> +#endif #ifdef GF_SOLARIS_HOST_OS #include <sys/sockio.h> #endif -#define MOUNT_PROGRAM 100005 -#define NFS_PROGRAM 100003 -#define NFSV3_VERSION 3 -#define MOUNTV3_VERSION 3 -#define MOUNTV1_VERSION 1 +#define NFS_PROGRAM 100003 +#define NFSV3_VERSION 3 + +#define MOUNT_PROGRAM 100005 +#define MOUNTV3_VERSION 3 +#define MOUNTV1_VERSION 1 + +#define NLM_PROGRAM 100021 +#define NLMV4_VERSION 4 +#define NLMV1_VERSION 1 + +#define CEILING_POS(X) (((X)-(int)(X)) > 0 ? (int)((X)+1) : (int)(X)) -char *glusterd_sock_dir = "/tmp"; static glusterd_lock_t lock; +char* +gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo) +{ + if ((peerinfo == NULL) || uuid_is_null (peerinfo->uuid)) + return NULL; + + if (peerinfo->uuid_str[0] == '\0') + uuid_utoa_r (peerinfo->uuid, peerinfo->uuid_str); + + return peerinfo->uuid_str; +} + + int32_t glusterd_get_lock_owner (uuid_t *uuid) { @@ -94,126 +116,17 @@ glusterd_unset_lock_owner (uuid_t owner) } gf_boolean_t -glusterd_is_loopback_localhost (const struct sockaddr *sa, char *hostname) -{ - GF_ASSERT (sa); - - gf_boolean_t is_local = _gf_false; - const struct in_addr *addr4 = NULL; - const struct in6_addr *addr6 = NULL; - uint8_t *ap = NULL; - struct in6_addr loopbackaddr6 = IN6ADDR_LOOPBACK_INIT; - - switch (sa->sa_family) { - case AF_INET: - addr4 = &(((struct sockaddr_in *)sa)->sin_addr); - ap = (uint8_t*)&addr4->s_addr; - if (ap[0] == 127) - is_local = _gf_true; - break; - - case AF_INET6: - addr6 = &(((struct sockaddr_in6 *)sa)->sin6_addr); - if (memcmp (addr6, &loopbackaddr6, - sizeof (loopbackaddr6)) == 0) - is_local = _gf_true; - break; - - default: - if (hostname) - gf_log ("glusterd", GF_LOG_ERROR, - "unknown address family %d for %s", - sa->sa_family, hostname); - break; - } - - return is_local; -} - -char * -get_ip_from_addrinfo (struct addrinfo *addr, char **ip) +glusterd_is_fuse_available () { - char buf[64]; - void *in_addr = NULL; - struct sockaddr_in *s4 = NULL; - struct sockaddr_in6 *s6 = NULL; - - switch (addr->ai_family) - { - case AF_INET: - s4 = (struct sockaddr_in *)addr->ai_addr; - in_addr = &s4->sin_addr; - break; - - case AF_INET6: - s6 = (struct sockaddr_in6 *)addr->ai_addr; - in_addr = &s6->sin6_addr; - break; - - default: - gf_log ("glusterd", GF_LOG_ERROR, "Invalid family"); - return NULL; - } - - if (!inet_ntop(addr->ai_family, in_addr, buf, sizeof(buf))) { - gf_log ("glusterd", GF_LOG_ERROR, "String conversion failed"); - return NULL; - } - - *ip = strdup (buf); - return *ip; -} - -int32_t -glusterd_is_local_addr (char *hostname) -{ - int32_t ret = -1; - struct addrinfo *result = NULL; - struct addrinfo *res = NULL; - int32_t found = 0; - int sd = -1; - char *ip = NULL; - - ret = getaddrinfo (hostname, NULL, NULL, &result); - - if (ret != 0) { - gf_log ("", GF_LOG_ERROR, "error in getaddrinfo: %s\n", - gai_strerror(ret)); - goto out; - } - - for (res = result; res != NULL; res = res->ai_next) { - found = glusterd_is_loopback_localhost (res->ai_addr, hostname); - if (found) - goto out; - } - for (res = result; res != NULL; res = res->ai_next) { - gf_log ("glusterd", GF_LOG_DEBUG, "%s ", get_ip_from_addrinfo (res, &ip)); - sd = socket (res->ai_family, SOCK_DGRAM, 0); - if (sd == -1) - goto out; - /*If bind succeeds then its a local address*/ - ret = bind (sd, res->ai_addr, res->ai_addrlen); - if (ret == 0) { - found = _gf_true; - gf_log ("glusterd", GF_LOG_INFO, "%s is local", get_ip_from_addrinfo (res, &ip)); - close (sd); - break; - } - close (sd); - } + int fd = 0; -out: - if (result) - freeaddrinfo (result); + fd = open ("/dev/fuse", O_RDWR); - if (found) - gf_log ("glusterd", GF_LOG_DEBUG, "%s is local", hostname); + if (fd > -1 && !close (fd)) + return _gf_true; else - gf_log ("glusterd", GF_LOG_DEBUG, "%s is not local", hostname); - - return !found; + return _gf_false; } int32_t @@ -224,13 +137,17 @@ glusterd_lock (uuid_t uuid) char new_owner_str[50]; char owner_str[50]; int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (uuid); glusterd_get_lock_owner (&owner); if (!uuid_is_null (owner)) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to get lock" + gf_log (this->name, GF_LOG_ERROR, "Unable to get lock" " for uuid: %s, lock held by: %s", uuid_utoa_r (uuid, new_owner_str), uuid_utoa_r (owner, owner_str)); @@ -240,7 +157,7 @@ glusterd_lock (uuid_t uuid) ret = glusterd_set_lock_owner (uuid); if (!ret) { - gf_log ("glusterd", GF_LOG_INFO, "Cluster lock held by" + gf_log (this->name, GF_LOG_DEBUG, "Cluster lock held by" " %s", uuid_utoa (uuid)); } @@ -256,21 +173,25 @@ glusterd_unlock (uuid_t uuid) char new_owner_str[50]; char owner_str[50]; int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (uuid); glusterd_get_lock_owner (&owner); - if (NULL == owner) { - gf_log ("glusterd", GF_LOG_ERROR, "Cluster lock not held!"); + if (uuid_is_null (owner)) { + gf_log (this->name, GF_LOG_ERROR, "Cluster lock not held!"); goto out; } ret = uuid_compare (uuid, owner); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Cluster lock held by %s" - " ,unlock req from %s!", uuid_utoa_r (owner ,owner_str) + gf_log (this->name, GF_LOG_ERROR, "Cluster lock held by %s ," + "unlock req from %s!", uuid_utoa_r (owner ,owner_str) , uuid_utoa_r (uuid, new_owner_str)); goto out; } @@ -278,7 +199,7 @@ glusterd_unlock (uuid_t uuid) ret = glusterd_unset_lock_owner (uuid); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to clear cluster " + gf_log (this->name, GF_LOG_ERROR, "Unable to clear cluster " "lock"); goto out; } @@ -299,7 +220,7 @@ glusterd_get_uuid (uuid_t *uuid) GF_ASSERT (priv); - uuid_copy (*uuid, priv->uuid); + uuid_copy (*uuid, MY_UUID); return 0; } @@ -492,7 +413,7 @@ glusterd_check_volume_exists (char *volname) ret = stat (pathname, &stbuf); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Volume %s does not exist." + gf_log (THIS->name, GF_LOG_DEBUG, "Volume %s does not exist." "stat failed with errno : %d on path: %s", volname, errno, pathname); return _gf_false; @@ -515,34 +436,300 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) if (!new_volinfo) goto out; + LOCK_INIT (&new_volinfo->lock); INIT_LIST_HEAD (&new_volinfo->vol_list); + INIT_LIST_HEAD (&new_volinfo->snapvol_list); INIT_LIST_HEAD (&new_volinfo->bricks); + INIT_LIST_HEAD (&new_volinfo->snap_volumes); new_volinfo->dict = dict_new (); if (!new_volinfo->dict) { - if (new_volinfo) - GF_FREE (new_volinfo); + GF_FREE (new_volinfo); goto out; } new_volinfo->gsync_slaves = dict_new (); if (!new_volinfo->gsync_slaves) { - if (new_volinfo) - GF_FREE (new_volinfo); + GF_FREE (new_volinfo); goto out; } + snprintf (new_volinfo->parent_volname, GLUSTERD_MAX_VOLUME_NAME, "N/A"); + + new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + new_volinfo->xl = THIS; + *volinfo = new_volinfo; ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* This function will create a new volinfo and then + * dup the entries from volinfo to the new_volinfo. + * + * @param volinfo volinfo which will be duplicated + * @param dup_volinfo new volinfo which will be created + * @param set_userauth if this true then auth info is also set + * + * @return 0 on success else -1 + */ +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_volinfo, out); + + ret = glusterd_volinfo_new (&new_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to create the " + "duplicate volinfo for the volume %s", + volinfo->volname); + goto out; + } + + new_volinfo->type = volinfo->type; + new_volinfo->replica_count = volinfo->replica_count; + new_volinfo->stripe_count = volinfo->stripe_count; + new_volinfo->dist_leaf_count = volinfo->dist_leaf_count; + new_volinfo->sub_count = volinfo->sub_count; + new_volinfo->transport_type = volinfo->transport_type; + new_volinfo->nfs_transport_type = volinfo->nfs_transport_type; + new_volinfo->brick_count = volinfo->brick_count; + + dict_copy (volinfo->dict, new_volinfo->dict); + gd_update_volume_op_versions (new_volinfo); + + if (set_userauth) { + glusterd_auth_set_username (new_volinfo, + volinfo->auth.username); + glusterd_auth_set_password (new_volinfo, + volinfo->auth.password); + } + + *dup_volinfo = new_volinfo; + ret = 0; +out: + if (ret && (NULL != new_volinfo)) { + (void) glusterd_volinfo_delete (new_volinfo); + } + return ret; +} + +/* This function will duplicate brickinfo + * + * @param brickinfo Source brickinfo + * @param dup_brickinfo Destination brickinfo + * + * @return 0 on success else -1 + */ +int32_t +glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *dup_brickinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_brickinfo, out); + + strcpy (dup_brickinfo->hostname, brickinfo->hostname); + strcpy (dup_brickinfo->path, brickinfo->path); + strcpy (dup_brickinfo->device_path, brickinfo->device_path); + ret = gf_canonicalize_path (dup_brickinfo->path); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to canonicalize " + "brick path"); + goto out; + } + uuid_copy (dup_brickinfo->uuid, brickinfo->uuid); + + dup_brickinfo->port = brickinfo->port; + dup_brickinfo->rdma_port = brickinfo->rdma_port; + if (NULL != brickinfo->logfile) { + dup_brickinfo->logfile = gf_strdup (brickinfo->logfile); + if (NULL == dup_brickinfo->logfile) { + ret = -1; + goto out; + } + } + dup_brickinfo->status = brickinfo->status; + dup_brickinfo->snap_status = brickinfo->snap_status; +out: + return ret; +} + +/* This function will copy snap volinfo to the new + * passed volinfo and regenerate backend store files + * for the restored snap. + * + * @param new_volinfo new volinfo + * @param snap_volinfo volinfo of snap volume + * + * @return 0 on success and -1 on failure + * + * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves + */ +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + + GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, snap_volinfo, out); + + brick_count = 0; + list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) { + ret = glusterd_brickinfo_new (&new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + "new brickinfo"); + goto out; + } + + /* Duplicate brickinfo */ + ret = glusterd_brickinfo_dup (brickinfo, new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to dup " + "brickinfo"); + goto out; + } + + /* If the brick is not of this peer, or snapshot is missed * + * for the brick do not replace the xattr for it */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + (brickinfo->snap_status != -1)) { + /* We need to replace the volume id of all the bricks + * to the volume id of the origin volume. new_volinfo + * has the origin volume's volume id*/ + ret = sys_lsetxattr (new_brickinfo->path, + GF_XATTR_VOL_ID_KEY, + new_volinfo->volume_id, + sizeof (new_volinfo->volume_id), + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set extended attribute %s on %s. " + "Reason: %s, snap: %s", + GF_XATTR_VOL_ID_KEY, + new_brickinfo->path, strerror (errno), + new_volinfo->volname); + goto out; + } + } + + /* If a snapshot is pending for this brick then + * restore should also be pending + */ + if (brickinfo->snap_status == -1) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_volinfo->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + } + + list_add_tail (&new_brickinfo->brick_list, + &new_volinfo->bricks); + /* ownership of new_brickinfo is passed to new_volinfo */ + new_brickinfo = NULL; + brick_count++; + } + + /* Regenerate all volfiles */ + ret = glusterd_create_volfiles_and_notify_services (new_volinfo); + +out: + if (ret && (NULL != new_brickinfo)) { + (void) glusterd_brickinfo_delete (new_brickinfo); + } + return ret; } +void +glusterd_auth_cleanup (glusterd_volinfo_t *volinfo) { + + GF_ASSERT (volinfo); + + GF_FREE (volinfo->auth.username); + + GF_FREE (volinfo->auth.password); +} + +char * +glusterd_auth_get_username (glusterd_volinfo_t *volinfo) { + + GF_ASSERT (volinfo); + + return volinfo->auth.username; +} + +char * +glusterd_auth_get_password (glusterd_volinfo_t *volinfo) { + + GF_ASSERT (volinfo); + + return volinfo->auth.password; +} + +int32_t +glusterd_auth_set_username (glusterd_volinfo_t *volinfo, char *username) { + + GF_ASSERT (volinfo); + GF_ASSERT (username); + + volinfo->auth.username = gf_strdup (username); + return 0; +} + +int32_t +glusterd_auth_set_password (glusterd_volinfo_t *volinfo, char *password) { + + GF_ASSERT (volinfo); + GF_ASSERT (password); + + volinfo->auth.password = gf_strdup (password); + return 0; +} + int32_t glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo) { @@ -552,8 +739,7 @@ glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo) list_del_init (&brickinfo->brick_list); - if (brickinfo->logfile) - GF_FREE (brickinfo->logfile); + GF_FREE (brickinfo->logfile); GF_FREE (brickinfo); ret = 0; @@ -578,7 +764,7 @@ glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo) } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -590,6 +776,7 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_del_init (&volinfo->vol_list); + list_del_init (&volinfo->snapvol_list); ret = glusterd_volume_brickinfos_delete (volinfo); if (ret) @@ -598,18 +785,18 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) dict_unref (volinfo->dict); if (volinfo->gsync_slaves) dict_unref (volinfo->gsync_slaves); - if (volinfo->logdir) - GF_FREE (volinfo->logdir); + GF_FREE (volinfo->logdir); + + glusterd_auth_cleanup (volinfo); GF_FREE (volinfo); ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } - int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) { @@ -631,7 +818,7 @@ glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -639,17 +826,21 @@ int32_t glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo) { int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (brickinfo); ret = glusterd_hostname_to_uuid (brickinfo->hostname, brickinfo->uuid); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int32_t -glusterd_brickinfo_from_brick (char *brick, - glusterd_brickinfo_t **brickinfo) +glusterd_brickinfo_new_from_brick (char *brick, + glusterd_brickinfo_t **brickinfo) { int32_t ret = -1; glusterd_brickinfo_t *new_brickinfo = NULL; @@ -657,22 +848,37 @@ glusterd_brickinfo_from_brick (char *brick, char *path = NULL; char *tmp_host = NULL; char *tmp_path = NULL; + char *vg = NULL; GF_ASSERT (brick); GF_ASSERT (brickinfo); tmp_host = gf_strdup (brick); - if (tmp_host) - get_host_name (tmp_host, &hostname); + if (tmp_host && !get_host_name (tmp_host, &hostname)) + goto out; tmp_path = gf_strdup (brick); - if (tmp_path) - get_path_name (tmp_path, &path); + if (tmp_path && !get_path_name (tmp_path, &path)) + goto out; GF_ASSERT (hostname); GF_ASSERT (path); ret = glusterd_brickinfo_new (&new_brickinfo); + if (ret) + goto out; +#ifdef HAVE_BD_XLATOR + vg = strchr (path, '?'); + /* ? is used as a delimiter for vg */ + if (vg) { + strncpy (new_brickinfo->vg, vg + 1, PATH_MAX - 1); + *vg = '\0'; + } + new_brickinfo->caps = CAPS_BD; +#else + vg = NULL; /* Avoid compiler warnings when BD not enabled */ +#endif + ret = gf_canonicalize_path (path); if (ret) goto out; @@ -683,11 +889,258 @@ glusterd_brickinfo_from_brick (char *brick, ret = 0; out: - if (tmp_host) - GF_FREE (tmp_host); + GF_FREE (tmp_host); if (tmp_host) GF_FREE (tmp_path); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static gf_boolean_t +_is_prefix (char *str1, char *str2) +{ + GF_ASSERT (str1); + GF_ASSERT (str2); + + int i = 0; + int len1 = 0; + int len2 = 0; + int small_len = 0; + char *bigger = NULL; + gf_boolean_t prefix = _gf_true; + + len1 = strlen (str1); + len2 = strlen (str2); + small_len = min (len1, len2); + for (i = 0; i < small_len; i++) { + if (str1[i] != str2[i]) { + prefix = _gf_false; + break; + } + } + + if (len1 < len2) + bigger = str2; + + else if (len1 > len2) + bigger = str1; + + else + return prefix; + + if (bigger[small_len] != '/') + prefix = _gf_false; + + return prefix; +} + +/* Checks if @path is available in the peer identified by @uuid + * 'availability' is determined by querying current state of volumes + * in the cluster. */ +gf_boolean_t +glusterd_is_brickpath_available (uuid_t uuid, char *path) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t available = _gf_false; + char tmp_path[PATH_MAX+1] = {0}; + char tmp_brickpath[PATH_MAX+1] = {0}; + + priv = THIS->private; + + strncpy (tmp_path, path, PATH_MAX); + /* path may not yet exist */ + if (!realpath (path, tmp_path)) { + if (errno != ENOENT) { + goto out; + } + /* When realpath(3) fails, tmp_path is undefined. */ + strncpy(tmp_path,path,PATH_MAX); + } + + list_for_each_entry (volinfo, &priv->volumes, vol_list) { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (uuid, brickinfo->uuid)) + continue; + + if (!realpath (brickinfo->path, tmp_brickpath)) { + if (errno == ENOENT) + strncpy (tmp_brickpath, brickinfo->path, + PATH_MAX); + else + goto out; + } + + if (_is_prefix (tmp_brickpath, tmp_path)) + goto out; + } + } + available = _gf_true; +out: + return available; +} + +#ifdef HAVE_BD_XLATOR +/* + * Sets the tag of the format "trusted.glusterfs.volume-id:<uuid>" in + * the brick VG. It is used to avoid using same VG for another brick. + * @volume-id - gfid, @brick - brick info, @msg - Error message returned + * to the caller + */ +int +glusterd_bd_set_vg_tag (unsigned char *volume_id, glusterd_brickinfo_t *brick, + char *msg, int msg_size) +{ + lvm_t handle = NULL; + vg_t vg = NULL; + char *uuid = NULL; + int ret = -1; + + gf_asprintf (&uuid, "%s:%s", GF_XATTR_VOL_ID_KEY, + uuid_utoa (volume_id)); + if (!uuid) { + snprintf (msg, sizeof(*msg), "Could not allocate memory " + "for tag"); + return -1; + } + + handle = lvm_init (NULL); + if (!handle) { + snprintf (msg, sizeof(*msg), "lvm_init failed"); + goto out; + } + + vg = lvm_vg_open (handle, brick->vg, "w", 0); + if (!vg) { + snprintf (msg, sizeof(*msg), "Could not open VG %s", + brick->vg); + goto out; + } + + if (lvm_vg_add_tag (vg, uuid) < 0) { + snprintf (msg, sizeof(*msg), "Could not set tag %s for " + "VG %s", uuid, brick->vg); + goto out; + } + lvm_vg_write (vg); + ret = 0; +out: + GF_FREE (uuid); + + if (vg) + lvm_vg_close (vg); + if (handle) + lvm_quit (handle); + + return ret; +} +#endif + +int +glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo, + uuid_t volume_id, char **op_errstr, + gf_boolean_t is_force) +{ + int ret = -1; + char parentdir[PATH_MAX] = {0,}; + struct stat parent_st = {0,}; + struct stat brick_st = {0,}; + struct stat root_st = {0,}; + char msg[2048] = {0,}; + gf_boolean_t is_created = _gf_false; + + ret = mkdir (brickinfo->path, 0777); + if (ret) { + if (errno != EEXIST) { + snprintf (msg, sizeof (msg), "Failed to create brick " + "directory for brick %s:%s. Reason : %s ", + brickinfo->hostname, brickinfo->path, + strerror (errno)); + goto out; + } + } else { + is_created = _gf_true; + } + + ret = lstat (brickinfo->path, &brick_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s", + brickinfo->path, strerror (errno)); + goto out; + } + + if ((!is_created) && (!S_ISDIR (brick_st.st_mode))) { + snprintf (msg, sizeof (msg), "The provided path %s which is " + "already present, is not a directory", + brickinfo->path); + ret = -1; + goto out; + } + + snprintf (parentdir, sizeof (parentdir), "%s/..", brickinfo->path); + + ret = lstat ("/", &root_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on /. Reason : %s", + strerror (errno)); + goto out; + } + + ret = lstat (parentdir, &parent_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s", + parentdir, strerror (errno)); + goto out; + } + + if (!is_force) { + if (brick_st.st_dev != parent_st.st_dev) { + snprintf (msg, sizeof (msg), "The brick %s:%s is a " + "mount point. Please create a sub-directory " + "under the mount point and use that as the " + "brick directory. Or use 'force' at the end " + "of the command if you want to override this " + "behavior.", brickinfo->hostname, + brickinfo->path); + ret = -1; + goto out; + } + else if (parent_st.st_dev == root_st.st_dev) { + snprintf (msg, sizeof (msg), "The brick %s:%s is " + "is being created in the root partition. It " + "is recommended that you don't use the " + "system's root partition for storage backend." + " Or use 'force' at the end of the command if" + " you want to override this behavior.", + brickinfo->hostname, brickinfo->path); + ret = -1; + goto out; + } + } + +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) { + ret = glusterd_bd_set_vg_tag (volume_id, brickinfo, msg, + sizeof(msg)); + if (ret) + goto out; + } +#endif + ret = glusterd_check_and_set_brick_xattr (brickinfo->hostname, + brickinfo->path, volume_id, + op_errstr, is_force); + if (ret) + goto out; + + ret = 0; + +out: + if (ret && is_created) + rmdir (brickinfo->path); + if (ret && !*op_errstr && msg[0] != '\0') + *op_errstr = gf_strdup (msg); + return ret; } @@ -699,10 +1152,9 @@ glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path, glusterd_brickinfo_t *brickiter = NULL; uuid_t peer_uuid = {0}; int32_t ret = -1; - int32_t brick_path_len = 0; - int32_t path_len = 0; - int32_t smaller_path = 0; - gf_boolean_t is_path_smaller = _gf_true; + xlator_t *this = NULL; + + this = THIS; if (uuid) { uuid_copy (peer_uuid, uuid); @@ -712,55 +1164,27 @@ glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path, goto out; } ret = -1; - path_len = strlen (path); list_for_each_entry (brickiter, &volinfo->bricks, brick_list) { - if (uuid_is_null (brickiter->uuid)) { - ret = glusterd_resolve_brick (brickiter); - if (ret) - goto out; - } - brick_path_len = strlen (brickiter->path); - smaller_path = min (brick_path_len, path_len); - if (smaller_path != path_len) - is_path_smaller = _gf_false; - if ((!uuid_compare (peer_uuid, brickiter->uuid)) && - !strcmp (brickiter->path, path)) { - gf_log ("", GF_LOG_INFO, "Found brick"); + if ((uuid_is_null (brickiter->uuid)) && + (glusterd_resolve_brick (brickiter) != 0)) + goto out; + if (uuid_compare (peer_uuid, brickiter->uuid)) + continue; + + if (strcmp (brickiter->path, path) == 0) { + gf_log (this->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK, + brickiter->hostname, brickiter->path, + volinfo->volname); ret = 0; if (brickinfo) *brickinfo = brickiter; break; - } else { - if ((!uuid_compare (peer_uuid, brickiter->uuid)) && - !strncmp (brickiter->path, path, smaller_path)) { - if (is_path_smaller == _gf_true) { - if (brickiter->path[smaller_path] == '/') { - ret = 0; - gf_log ("", GF_LOG_INFO, - "given path %s lies" - " within %s", path, - brickiter->path); - *brickinfo = brickiter; - break; - } - } else - if (path[smaller_path] == '/') { - gf_log ("", GF_LOG_INFO, - "brick %s is a part of" - " %s", brickiter->path, - path); - ret = 0; - *brickinfo = brickiter; - break; - } - } - ret = -1; } } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -770,61 +1194,135 @@ glusterd_volume_brickinfo_get_by_brick (char *brick, glusterd_brickinfo_t **brickinfo) { int32_t ret = -1; - char *hostname = NULL; - char *path = NULL; - char *tmp_host = NULL; - char *tmp_path = NULL; + glusterd_brickinfo_t *tmp_brickinfo = NULL; GF_ASSERT (brick); GF_ASSERT (volinfo); - gf_log ("", GF_LOG_INFO, "brick: %s", brick); - - tmp_host = gf_strdup (brick); - if (tmp_host) - get_host_name (tmp_host, &hostname); - tmp_path = gf_strdup (brick); - if (tmp_path) - get_path_name (tmp_path, &path); - - if (!hostname || !path) { - gf_log ("", GF_LOG_ERROR, - "brick %s is not of form <HOSTNAME>:<export-dir>", - brick); - ret = -1; + ret = glusterd_brickinfo_new_from_brick (brick, &tmp_brickinfo); + if (ret) goto out; - } - ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo, + ret = glusterd_volume_brickinfo_get (NULL, tmp_brickinfo->hostname, + tmp_brickinfo->path, volinfo, brickinfo); + (void) glusterd_brickinfo_delete (tmp_brickinfo); out: - if (tmp_host) - GF_FREE (tmp_host); - if (tmp_path) - GF_FREE (tmp_path); gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } +gf_boolean_t +glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname, + char *path) +{ + gf_boolean_t decommissioned = _gf_false; + glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; + + ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo, + &brickinfo); + if (ret) + goto out; + decommissioned = brickinfo->decommissioned; +out: + return decommissioned; +} + int32_t glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) { GF_ASSERT (peerinfo); - glusterd_peerctx_t *peerctx = NULL; + glusterd_peerctx_t *peerctx = NULL; + gf_boolean_t quorum_action = _gf_false; + glusterd_conf_t *priv = THIS->private; + if (peerinfo->quorum_contrib != QUORUM_NONE) + quorum_action = _gf_true; if (peerinfo->rpc) { + /* cleanup the saved-frames before last unref */ + synclock_unlock (&priv->big_lock); + rpc_clnt_connection_cleanup (&peerinfo->rpc->conn); + synclock_lock (&priv->big_lock); + peerctx = peerinfo->rpc->mydata; peerinfo->rpc->mydata = NULL; peerinfo->rpc = rpc_clnt_unref (peerinfo->rpc); peerinfo->rpc = NULL; - if (peerctx) + if (peerctx) { + GF_FREE (peerctx->errstr); GF_FREE (peerctx); + } } glusterd_peer_destroy (peerinfo); + if (quorum_action) + glusterd_do_quorum_action (); return 0; } +int +glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + + if (!volume_id) + return -1; + + this = THIS; + priv = this->private; + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + voliter->volname); + break; + } + return ret; +} + +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); + + if (uuid_is_null(volume_id)) { + gf_log (this->name, GF_LOG_WARNING, "Volume UUID is NULL"); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + list_for_each_entry (voliter, &snap->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + goto out; + } + } + + gf_log (this->name, GF_LOG_WARNING, "Snap volume not found"); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) { @@ -834,134 +1332,149 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) glusterd_conf_t *priv = NULL; GF_ASSERT (volname); - this = THIS; GF_ASSERT (this); priv = this->private; + GF_ASSERT (priv); list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) { if (!strcmp (tmp_volinfo->volname, volname)) { - gf_log ("", GF_LOG_DEBUG, "Volume %s found", volname); + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + volname); ret = 0; *volinfo = tmp_volinfo; break; } } - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } - int32_t -glusterd_service_stop (const char *service, char *pidfile, int sig, - gf_boolean_t force_kill) +glusterd_snap_volinfo_find (char *snap_volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) { - int32_t ret = -1; - pid_t pid = -1; - FILE *file = NULL; - gf_boolean_t is_locked = _gf_false; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; - file = fopen (pidfile, "r+"); + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_volname); - if (!file) { - gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", - pidfile); - if (errno == ENOENT) { - gf_log ("",GF_LOG_TRACE, "%s may not be running", - service); + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->volname, snap_volname)) { ret = 0; + *volinfo = snap_vol; goto out; } - ret = -1; - goto out; } - ret = lockf (fileno (file), F_TLOCK, 0); - if (!ret) { - is_locked = _gf_true; - ret = unlink (pidfile); - if (ret && (ENOENT != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "unlink stale pidfile: %s", pidfile); - } else if (ret && (ENOENT == errno)){ + + gf_log (this->name, GF_LOG_WARNING, "Snap volume %s not found", + snap_volname); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (origin_volname); + + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->parent_volname, origin_volname)) { ret = 0; - gf_log ("", GF_LOG_INFO, "Brick already stopped"); + *volinfo = snap_vol; + goto out; } - goto out; } + gf_log (this->name, GF_LOG_DEBUG, "Snap volume not found(snap: %s, " + "origin-volume: %s", snap->snapname, origin_volname); - ret = fscanf (file, "%d", &pid); - if (ret <= 0) { - gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s", - pidfile); - ret = -1; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_service_stop (const char *service, char *pidfile, int sig, + gf_boolean_t force_kill) +{ + int32_t ret = -1; + pid_t pid = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + if (!glusterd_is_service_running (pidfile, &pid)) { + ret = 0; + gf_log (this->name, GF_LOG_INFO, "%s already stopped", service); goto out; } - fclose (file); - file = NULL; - - gf_log ("", GF_LOG_INFO, "Stopping gluster %s running in pid: %d", - service, pid); + gf_log (this->name, GF_LOG_DEBUG, "Stopping gluster %s running in pid: " + "%d", service, pid); ret = kill (pid, sig); + if (!force_kill) + goto out; - if (force_kill) { - sleep (1); - file = fopen (pidfile, "r+"); - if (!file) { - ret = 0; - goto out; - } - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = kill (pid, SIGKILL); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "kill pid %d reason: %s", pid, - strerror(errno)); - goto out; - } - - } else if (0 == ret){ - is_locked = _gf_true; - } - ret = unlink (pidfile); - if (ret && (ENOENT != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "unlink pidfile: %s", pidfile); + sleep (1); + if (glusterd_is_service_running (pidfile, NULL)) { + ret = kill (pid, SIGKILL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "kill pid %d reason: %s", pid, + strerror(errno)); goto out; } } ret = 0; out: - if (is_locked && file) - if (lockf (fileno (file), F_ULOCK, 0) < 0) - gf_log ("", GF_LOG_WARNING, "Cannot unlock pidfile: %s" - " reason: %s", pidfile, strerror(errno)); - if (file) - fclose (file); return ret; } void +glusterd_set_socket_filepath (char *sock_filepath, char *sockpath, size_t len) +{ + char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; + + md5_wrapper ((unsigned char *) sock_filepath, strlen(sock_filepath), md5_sum); + snprintf (sockpath, len, "%s/%s.socket", GLUSTERD_SOCK_DIR, md5_sum); +} + +void glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, char *sockpath, size_t len) { char export_path[PATH_MAX] = {0,}; char sock_filepath[PATH_MAX] = {0,}; - char md5_sum[MD5_DIGEST_LEN*2+1] = {0,}; char volume_dir[PATH_MAX] = {0,}; xlator_t *this = NULL; glusterd_conf_t *priv = NULL; int expected_file_len = 0; - expected_file_len = strlen (glusterd_sock_dir) + strlen ("/") + - MD5_DIGEST_LEN*2 + strlen (".socket") + 1; + expected_file_len = strlen (GLUSTERD_SOCK_DIR) + strlen ("/") + + MD5_DIGEST_LENGTH*2 + strlen (".socket") + 1; GF_ASSERT (len >= expected_file_len); this = THIS; GF_ASSERT (this); @@ -972,10 +1485,8 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path); snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s", volume_dir, brickinfo->hostname, export_path); - _get_md5_str (md5_sum, sizeof (md5_sum), - (uint8_t*)sock_filepath, strlen (sock_filepath)); - snprintf (sockpath, len, "%s/%s.socket", glusterd_sock_dir, md5_sum); + glusterd_set_socket_filepath (sock_filepath, sockpath, len); } /* connection happens only if it is not aleady connected, @@ -983,26 +1494,41 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, */ int32_t glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, char *socketpath) { int ret = 0; - char socketpath[PATH_MAX] = {0}; + char volume_id_str[64]; + char *brickid = NULL; dict_t *options = NULL; struct rpc_clnt *rpc = NULL; + glusterd_conf_t *priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); + GF_ASSERT (socketpath); if (brickinfo->rpc == NULL) { - glusterd_set_brick_socket_filepath (volinfo, brickinfo, - socketpath, - sizeof (socketpath)); - ret = rpc_clnt_transport_unix_options_build (&options, socketpath); + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. + * The default timeout of 30mins used for unreliable network + * connections is too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, socketpath, + 600); if (ret) goto out; + + uuid_utoa_r (volinfo->volume_id, volume_id_str); + ret = gf_asprintf (&brickid, "%s:%s:%s", volume_id_str, + brickinfo->hostname, brickinfo->path); + if (ret < 0) + goto out; + + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&rpc, options, glusterd_brick_rpc_notify, - brickinfo); + brickid); + synclock_lock (&priv->big_lock); if (ret) goto out; brickinfo->rpc = rpc; @@ -1012,25 +1538,53 @@ out: return ret; } +/* Caller should ensure that brick process is not running*/ +static void +_reap_brick_process (char *pidfile, char *brickpath) +{ + unlink (pidfile); + /* Brick process is not running and pmap may have an entry for it.*/ + pmap_registry_remove (THIS, 0, brickpath, + GF_PMAP_PORT_BRICKSERVER, NULL); +} + +static int +_mk_rundir_p (glusterd_volinfo_t *volinfo) +{ + char voldir[PATH_MAX] = {0,}; + char rundir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + priv = this->private; + GLUSTERD_GET_VOLUME_DIR (voldir, volinfo, priv); + snprintf (rundir, sizeof (rundir)-1, "%s/run", voldir); + ret = mkdir_p (rundir, 0777, _gf_true); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to create rundir"); + return ret; +} + int32_t glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, + gf_boolean_t wait) { int32_t ret = -1; xlator_t *this = NULL; glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; + char pidfile[PATH_MAX+1] = {0,}; char volfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; runner_t runner = {0,}; - char rundir[PATH_MAX] = {0,}; char exp_path[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; int port = 0; int rdma_port = 0; - FILE *file = NULL; - gf_boolean_t is_locked = _gf_false; char socketpath[PATH_MAX] = {0}; + char glusterd_uuid[1024] = {0,}; + char valgrind_logfile[PATH_MAX] = {0}; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1039,85 +1593,93 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, GF_ASSERT (this); priv = this->private; + GF_ASSERT (priv); - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - snprintf (rundir, PATH_MAX, "%s/run", path); - ret = mkdir (rundir, 0777); - - if ((ret == -1) && (EEXIST != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to create rundir %s", - rundir); + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = 0; goto out; } + ret = _mk_rundir_p (volinfo); + if (ret) + goto out; + glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, sizeof (socketpath)); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = 0; - gf_log ("", GF_LOG_INFO, "brick %s:%s " - "already started", brickinfo->hostname, - brickinfo->path); - goto connect; - } - } + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); + if (glusterd_is_service_running (pidfile, NULL)) + goto connect; - ret = pmap_registry_search (this, brickinfo->path, - GF_PMAP_PORT_BRICKSERVER); - if (ret) { - ret = 0; - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = 0; - gf_log ("", GF_LOG_INFO, "brick %s:%s " - "already started", brickinfo->hostname, - brickinfo->path); - goto connect; - } else if (0 == ret) { - is_locked = _gf_true; - } + _reap_brick_process (pidfile, brickinfo->path); + + port = brickinfo->port; + if (!port) + port = pmap_registry_alloc (THIS); + + /* Build the exp_path, before starting the glusterfsd even in + valgrind mode. Otherwise all the glusterfsd processes start + writing the valgrind log to the same file. + */ + GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); + runinit (&runner); + + if (priv->valgrind) { + /* Run bricks with valgrind */ + if (volinfo->logdir) { + snprintf (valgrind_logfile, PATH_MAX, + "%s/valgrind-%s-%s.log", + volinfo->logdir, + volinfo->volname, exp_path); + } else { + snprintf (valgrind_logfile, PATH_MAX, + "%s/bricks/valgrind-%s-%s.log", + DEFAULT_LOG_FILE_DIRECTORY, + volinfo->volname, exp_path); } - /* This means, pmap has the entry, remove it */ - ret = pmap_registry_remove (this, 0, brickinfo->path, - GF_PMAP_PORT_BRICKSERVER, NULL); + + runner_add_args (&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } - unlink (pidfile); - gf_log ("", GF_LOG_INFO, "About to start glusterfs" - " for brick %s:%s", brickinfo->hostname, - brickinfo->path); - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); - snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, - brickinfo->hostname, exp_path); + if (volinfo->is_snap_volume) { + snprintf (volfile, PATH_MAX,"/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, volinfo->volname, + brickinfo->hostname, exp_path); + } else { + snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, + brickinfo->hostname, exp_path); + } - if (!brickinfo->logfile && volinfo->logdir) { - snprintf (logfile, PATH_MAX, "%s/%s.log", volinfo->logdir, - exp_path); - brickinfo->logfile = gf_strdup (logfile); - } else if (!brickinfo->logfile) { + if (volinfo->logdir) { + snprintf (logfile, PATH_MAX, "%s/%s.log", + volinfo->logdir, exp_path); + } else { snprintf (logfile, PATH_MAX, "%s/bricks/%s.log", DEFAULT_LOG_FILE_DIRECTORY, exp_path); - brickinfo->logfile = gf_strdup (logfile); } + if (!brickinfo->logfile) + brickinfo->logfile = gf_strdup (logfile); - port = brickinfo->port; - if (!port) - port = pmap_registry_alloc (THIS); - - runinit (&runner); - runner_add_args (&runner, GFS_PREFIX"/sbin/glusterfsd", - "-s", "localhost", "--volfile-id", volfile, + (void) snprintf (glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s", + uuid_utoa (MY_UUID)); + runner_add_args (&runner, SBIN_DIR"/glusterfsd", + "-s", brickinfo->hostname, "--volfile-id", volfile, "-p", pidfile, "-S", socketpath, "--brick-name", brickinfo->path, - "-l", brickinfo->logfile, "--brick-port", NULL); + "-l", brickinfo->logfile, + "--xlator-option", glusterd_uuid, + NULL); + runner_add_arg (&runner, "--brick-port"); if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { runner_argprintf (&runner, "%d", port); } else { @@ -1134,26 +1696,34 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, runner_argprintf (&runner, "%s-server.listen-port=%d", volinfo->volname, port); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + runner_log (&runner, "", GF_LOG_DEBUG, "Starting GlusterFS"); - ret = runner_run (&runner); + if (wait) { + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); - if (ret == 0) { - //pmap_registry_bind (THIS, port, brickinfo->path); - brickinfo->port = port; - brickinfo->rdma_port = rdma_port; + } else { + ret = runner_run_nowait (&runner); } -connect: - ret = glusterd_brick_connect (volinfo, brickinfo); if (ret) goto out; + + brickinfo->port = port; + brickinfo->rdma_port = rdma_port; + +connect: + ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to connect to brick %s:%s on %s", + brickinfo->hostname, brickinfo->path, socketpath); + goto out; + } out: - if (is_locked && file) - if (lockf (fileno (file), F_ULOCK, 0) < 0) - gf_log ("", GF_LOG_WARNING, "Cannot unlock pidfile: %s" - " reason: %s", pidfile, strerror(errno)); - if (file) - fclose (file); return ret; } @@ -1181,7 +1751,7 @@ glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo, if (ret && (ENOENT == errno)) { ret = 0; } else { - gf_log ("glusterd", GF_LOG_ERROR, "Failed to remove %s" + gf_log (this->name, GF_LOG_ERROR, "Failed to remove %s" " error: %s", socketpath, strerror (errno)); } @@ -1191,24 +1761,37 @@ glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo, int32_t glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) { + rpc_clnt_t *rpc = NULL; + glusterd_conf_t *priv = THIS->private; + GF_ASSERT (brickinfo); - if (brickinfo->rpc) { - rpc_clnt_unref (brickinfo->rpc); - brickinfo->rpc = NULL; + if (!brickinfo) { + gf_log_callingfn ("glusterd", GF_LOG_WARNING, "!brickinfo"); + return -1; } + + rpc = brickinfo->rpc; + brickinfo->rpc = NULL; + + if (rpc) { + synclock_unlock (&priv->big_lock); + rpc_clnt_unref (rpc); + synclock_lock (&priv->big_lock); + } + return 0; } int32_t glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, + gf_boolean_t del_brick) { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = {0,}; + int ret = 0; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1217,17 +1800,22 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, GF_ASSERT (this); priv = this->private; - (void) glusterd_brick_disconnect (brickinfo); + if (del_brick) + list_del_init (&brickinfo->brick_list); - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); - - ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false); - if (ret == 0) { - glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); - (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo); + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + (void) glusterd_brick_disconnect (brickinfo); + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); + ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false); + if (ret == 0) { + glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); + (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo); + } } + + if (del_brick) + glusterd_delete_brick (volinfo, brickinfo); + return ret; } @@ -1257,6 +1845,114 @@ out: return ret; } +/* Free LINE[0..N-1] and then the LINE buffer. */ +static void +free_lines (char **line, size_t n) +{ + size_t i; + for (i = 0; i < n; i++) + GF_FREE (line[i]); + GF_FREE (line); +} + +char ** +glusterd_readin_file (const char *filepath, int *line_count) +{ + int ret = -1; + int n = 8; + int counter = 0; + char buffer[PATH_MAX + 256] = {0}; + char **lines = NULL; + FILE *fp = NULL; + void *p; + + fp = fopen (filepath, "r"); + if (!fp) + goto out; + + lines = GF_CALLOC (1, n * sizeof (*lines), gf_gld_mt_charptr); + if (!lines) + goto out; + + for (counter = 0; fgets (buffer, sizeof (buffer), fp); counter++) { + + if (counter == n-1) { + n *= 2; + p = GF_REALLOC (lines, n * sizeof (char *)); + if (!p) { + free_lines (lines, n/2); + lines = NULL; + goto out; + } + lines = p; + } + + lines[counter] = gf_strdup (buffer); + } + + lines[counter] = NULL; + /* Reduce allocation to minimal size. */ + p = GF_REALLOC (lines, (counter + 1) * sizeof (char *)); + if (!p) { + free_lines (lines, counter); + lines = NULL; + goto out; + } + lines = p; + + *line_count = counter; + ret = 0; + + out: + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "%s", strerror (errno)); + if (fp) + fclose (fp); + + return lines; +} + +int +glusterd_compare_lines (const void *a, const void *b) { + + return strcmp(* (char * const *) a, * (char * const *) b); +} + +int +glusterd_sort_and_redirect (const char *src_filepath, int dest_fd) +{ + int ret = -1; + int line_count = 0; + int counter = 0; + char **lines = NULL; + + + if (!src_filepath || dest_fd < 0) + goto out; + + lines = glusterd_readin_file (src_filepath, &line_count); + if (!lines) + goto out; + + qsort (lines, line_count, sizeof (*lines), glusterd_compare_lines); + + for (counter = 0; lines[counter]; counter++) { + + ret = write (dest_fd, lines[counter], + strlen (lines[counter])); + if (ret < 0) + goto out; + + GF_FREE (lines[counter]); + } + + ret = 0; + out: + GF_FREE (lines); + + return ret; +} + int glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) { @@ -1271,9 +1967,10 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) char sort_filepath[PATH_MAX] = {0}; gf_boolean_t unlink_sortfile = _gf_false; int sort_fd = 0; + xlator_t *this = NULL; GF_ASSERT (volinfo); - + this = THIS; priv = THIS->private; GF_ASSERT (priv); @@ -1282,10 +1979,10 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) snprintf (cksum_path, sizeof (cksum_path), "%s/%s", path, GLUSTERD_CKSUM_FILE); - fd = open (cksum_path, O_RDWR | O_APPEND | O_CREAT| O_TRUNC, 0644); + fd = open (cksum_path, O_RDWR | O_APPEND | O_CREAT| O_TRUNC, 0600); if (-1 == fd) { - gf_log ("", GF_LOG_ERROR, "Unable to open %s, errno: %d", + gf_log (this->name, GF_LOG_ERROR, "Unable to open %s, errno: %d", cksum_path, errno); ret = -1; goto out; @@ -1293,28 +1990,36 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) snprintf (filepath, sizeof (filepath), "%s/%s", path, GLUSTERD_VOLUME_INFO_FILE); - snprintf (sort_filepath, sizeof (sort_filepath), "/tmp/%s.XXXXXX", - volinfo->volname); + snprintf (sort_filepath, sizeof (sort_filepath), + "/tmp/%s.XXXXXX", volinfo->volname); + sort_fd = mkstemp (sort_filepath); if (sort_fd < 0) { - gf_log ("", GF_LOG_ERROR, "Could not generate temp file, " - "reason: %s for volume: %s", strerror (errno), + gf_log (this->name, GF_LOG_ERROR, "Could not generate temp " + "file, reason: %s for %s: %s", strerror (errno), + (volinfo->is_snap_volume)?"snap":"volume", volinfo->volname); goto out; } else { unlink_sortfile = _gf_true; } - ret = runcmd ("sort", filepath, "-o", sort_filepath, NULL); + /* sort the info file, result in sort_filepath */ + + ret = glusterd_sort_and_redirect (filepath, sort_fd); if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to sort file %s to %s", - filepath, sort_filepath); + gf_log (this->name, GF_LOG_ERROR, "sorting info file failed"); goto out; } + + ret = close (sort_fd); + if (ret) + goto out; + ret = get_checksum_for_path (sort_filepath, &cksum); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get checksum" + gf_log (this->name, GF_LOG_ERROR, "Unable to get checksum" " for path: %s", sort_filepath); goto out; } @@ -1333,57 +2038,88 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) goto out; volinfo->cksum = cksum; - out: if (fd > 0) close (fd); - if (sort_fd > 0) - close (sort_fd); if (unlink_sortfile) unlink (sort_filepath); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } -void -_add_volinfo_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) +int +_add_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) { - glusterd_voldict_ctx_t *ctx = NULL; + glusterd_dict_ctx_t *ctx = NULL; char optkey[512] = {0,}; int ret = -1; ctx = data; - snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count, + snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix, ctx->key_name, ctx->opt_count); ret = dict_set_str (ctx->dict, optkey, key); if (ret) gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", - ctx->key_name, ctx->count, key); - snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count, + ctx->key_name, ctx->opt_count, key); + snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix, ctx->val_name, ctx->opt_count); ret = dict_set_str (ctx->dict, optkey, value->data); if (ret) gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", - ctx->val_name, ctx->count, value->data); + ctx->val_name, ctx->opt_count, value->data); ctx->opt_count++; - return; + return ret; +} + +int32_t +glusterd_add_bricks_hname_path_to_dict (dict_t *dict, + glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + int ret = 0; + char key[256] = {0}; + int index = 0; + + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + snprintf (key, sizeof (key), "%d-hostname", index); + ret = dict_set_str (dict, key, brickinfo->hostname); + if (ret) + goto out; + + snprintf (key, sizeof (key), "%d-path", index); + ret = dict_set_str (dict, key, brickinfo->path); + if (ret) + goto out; + + index++; + } +out: + return ret; } int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count) { - int32_t ret = -1; - char key[512] = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t i = 1; - char *volume_id_str = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - glusterd_voldict_ctx_t ctx = {0}; + int32_t ret = -1; + char prefix[512] = {0,}; + char key[512] = {0,}; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t i = 1; + char *volume_id_str = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *str = NULL; + glusterd_dict_ctx_t ctx = {0}; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1398,6 +2134,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_set_int32 (dict, key, volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to set " + "is_volume_restored option for %s volume", + volinfo->volname); + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.brick_count", count); ret = dict_set_int32 (dict, key, volinfo->brick_count); @@ -1429,6 +2174,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.replica_count", count); + ret = dict_set_int32 (dict, key, volinfo->replica_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.dist_count", count); + ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.ckusm", count); ret = dict_set_int64 (dict, key, volinfo->cksum); if (ret) @@ -1440,30 +2197,91 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; - volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); - if (!volume_id_str) + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_set_uint32 (dict, key, volinfo->is_snap_volume); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_set_uint64 (dict, key, volinfo->snap_max_hard_limit); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); goto out; + } + volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); + if (!volume_id_str) { + ret = -1; + goto out; + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.volume_id", count); ret = dict_set_dynstr (dict, key, volume_id_str); if (ret) goto out; + volume_id_str = NULL; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.username", count); + str = glusterd_auth_get_username (volinfo); + if (str) { + ret = dict_set_dynstr (dict, key, gf_strdup (str)); + if (ret) + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.password", count); + str = glusterd_auth_get_password (volinfo); + if (str) { + ret = dict_set_dynstr (dict, key, gf_strdup (str)); + if (ret) + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d.rebalance", count); + ret = dict_set_int32 (dict, key, volinfo->rebal.defrag_cmd); + if (ret) + goto out; + + if (volinfo->rebal.defrag_cmd) { + rebalance_id_str = gf_strdup (uuid_utoa + (volinfo->rebal.rebalance_id)); + if (!rebalance_id_str) { + ret = -1; + goto out; + } + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d.rebalance-id", count); + ret = dict_set_dynstr (dict, key, rebalance_id_str); + if (ret) + goto out; + rebalance_id_str = NULL; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_set_uint32 (dict, key, volinfo->rebal.op); + if (ret) + goto out; memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); - ret = dict_set_int32 (dict, key, volinfo->rb_status); + ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status); if (ret) goto out; - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, count); gf_asprintf (&src_brick, "%s:%s", - volinfo->src_brick->hostname, - volinfo->src_brick->path); + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); ret = dict_set_dynstr (dict, key, src_brick); if (ret) goto out; @@ -1472,21 +2290,35 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, count); gf_asprintf (&dst_brick, "%s:%s", - volinfo->dst_brick->hostname, - volinfo->dst_brick->path); + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); ret = dict_set_dynstr (dict, key, dst_brick); if (ret) goto out; + + rb_id_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + if (!rb_id_str) { + ret = -1; + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_set_dynstr (dict, key, rb_id_str); + if (ret) + goto out; + rb_id_str = NULL; } + snprintf (prefix, sizeof (prefix), "volume%d", count); ctx.dict = dict; - ctx.count = count; + ctx.prefix = prefix; ctx.opt_count = 1; ctx.key_name = "key"; ctx.val_name = "value"; GF_ASSERT (volinfo->dict); - dict_foreach (volinfo->dict, _add_volinfo_dict_to_prdict, &ctx); + dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx); ctx.opt_count--; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -1495,13 +2327,13 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; ctx.dict = dict; - ctx.count = count; + ctx.prefix = prefix; ctx.opt_count = 1; ctx.key_name = "slave-num"; ctx.val_name = "slave-val"; GF_ASSERT (volinfo->gsync_slaves); - dict_foreach (volinfo->gsync_slaves, _add_volinfo_dict_to_prdict, &ctx); + dict_foreach (volinfo->gsync_slaves, _add_dict_to_prdict, &ctx); ctx.opt_count--; memset (key, 0, sizeof (key)); @@ -1525,11 +2357,48 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + count, i); + ret = dict_set_int32 (dict, key, brickinfo->snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_status for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + count, i); + ret = dict_set_str (dict, key, brickinfo->device_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_device for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + i++; } + /* Add volume op-versions to dict. This prevents volume inconsistencies + * in the cluster + */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.op-version", count); + ret = dict_set_int32 (dict, key, volinfo->op_version); + if (ret) + goto out; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.client-op-version", count); + ret = dict_set_int32 (dict, key, volinfo->client_op_version); out: + GF_FREE (volume_id_str); + GF_FREE (rebalance_id_str); + GF_FREE (rb_id_str); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; @@ -1543,6 +2412,7 @@ glusterd_build_volume_dict (dict_t **vols) glusterd_conf_t *priv = NULL; glusterd_volinfo_t *volinfo = NULL; int32_t count = 0; + glusterd_dict_ctx_t ctx = {0}; priv = THIS->private; @@ -1563,6 +2433,17 @@ glusterd_build_volume_dict (dict_t **vols) if (ret) goto out; + ctx.dict = dict; + ctx.prefix = "global"; + ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "val"; + dict_foreach (priv->opts, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + ret = dict_set_int32 (dict, "global-opt-count", ctx.opt_count); + if (ret) + goto out; + *vols = dict; out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -1573,7 +2454,8 @@ out: } int32_t -glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) +glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, + char *hostname) { int32_t ret = -1; @@ -1609,8 +2491,8 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) //Mismatch detected ret = 0; gf_log ("", GF_LOG_ERROR, "Version of volume %s differ." - "local version = %d, remote version = %d", - volinfo->volname, volinfo->version, version); + "local version = %d, remote version = %d on peer %s", + volinfo->volname, volinfo->version, version, hostname); *status = GLUSTERD_VOL_COMP_UPDATE_REQ; goto out; } else if (version < volinfo->version) { @@ -1629,8 +2511,8 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) if (cksum != volinfo->cksum) { ret = 0; gf_log ("", GF_LOG_ERROR, "Cksums of volume %s differ." - " local cksum = %d, remote cksum = %d", - volinfo->volname, volinfo->cksum, cksum); + " local cksum = %u, remote cksum = %u on peer %s", + volinfo->volname, volinfo->cksum, cksum, hostname); *status = GLUSTERD_VOL_COMP_RJT; goto out; } @@ -1644,8 +2526,8 @@ out: } static int32_t -import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, - char *value_prefix, int opt_count, int count) +import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, + char *value_prefix, int opt_count, char *prefix) { char key[512] = {0,}; int32_t ret = 0; @@ -1657,8 +2539,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, while (i <= opt_count) { memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.%s%d", - count, key_prefix, i); + snprintf (key, sizeof (key), "%s.%s%d", + prefix, key_prefix, i); ret = dict_get_str (vols, key, &opt_key); if (ret) { snprintf (msg, sizeof (msg), "Volume dict key not " @@ -1667,8 +2549,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.%s%d", - count, value_prefix, i); + snprintf (key, sizeof (key), "%s.%s%d", + prefix, value_prefix, i); ret = dict_get_str (vols, key, &opt_val); if (ret) { snprintf (msg, sizeof (msg), "Volume dict value not " @@ -1697,6 +2579,265 @@ out: } +gf_boolean_t +glusterd_is_quorum_option (char *option) +{ + gf_boolean_t res = _gf_false; + int i = 0; + char *keys[] = {GLUSTERD_QUORUM_TYPE_KEY, + GLUSTERD_QUORUM_RATIO_KEY, NULL}; + + for (i = 0; keys[i]; i++) { + if (strcmp (option, keys[i]) == 0) { + res = _gf_true; + break; + } + } + return res; +} + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value) +{ + int ret = 0; + gf_boolean_t reconfigured = _gf_false; + gf_boolean_t all = _gf_false; + char *oldquorum = NULL; + char *newquorum = NULL; + char *oldratio = NULL; + char *newratio = NULL; + + if ((strcmp ("all", option) != 0) && + !glusterd_is_quorum_option (option)) + goto out; + + if (strcmp ("all", option) == 0) + all = _gf_true; + + if (all || (strcmp (GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) { + newquorum = value; + ret = dict_get_str (options, GLUSTERD_QUORUM_TYPE_KEY, + &oldquorum); + } + + if (all || (strcmp (GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) { + newratio = value; + ret = dict_get_str (options, GLUSTERD_QUORUM_RATIO_KEY, + &oldratio); + } + + reconfigured = _gf_true; + + if (oldquorum && newquorum && (strcmp (oldquorum, newquorum) == 0)) + reconfigured = _gf_false; + if (oldratio && newratio && (strcmp (oldratio, newratio) == 0)) + reconfigured = _gf_false; + + if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) && + (newquorum == NULL)) + reconfigured = _gf_false; +out: + return reconfigured; +} + +static inline gf_boolean_t +_is_contributing_to_quorum (gd_quorum_contrib_t contrib) +{ + if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN)) + return _gf_true; + return _gf_false; +} + +static inline gf_boolean_t +_does_quorum_meet (int active_count, int quorum_count) +{ + return (active_count >= quorum_count); +} + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, + int *quorum_count) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int inquorum_count = 0; + char *val = NULL; + double quorum_percentage = 0.0; + gf_boolean_t ratio = _gf_false; + int count = 0; + + conf = this->private; + //Start with counting self + inquorum_count = 1; + if (active_count) + *active_count = 1; + list_for_each_entry (peerinfo, &conf->peers, uuid_list) { + if (peerinfo->quorum_contrib == QUORUM_WAITING) + goto out; + + if (_is_contributing_to_quorum (peerinfo->quorum_contrib)) + inquorum_count = inquorum_count + 1; + + if (active_count && (peerinfo->quorum_contrib == QUORUM_UP)) + *active_count = *active_count + 1; + } + + ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val); + if (ret == 0) { + ratio = _gf_true; + ret = gf_string2percent (val, &quorum_percentage); + if (!ret) + ratio = _gf_true; + } + if (ratio) + count = CEILING_POS (inquorum_count * + quorum_percentage / 100.0); + else + count = (inquorum_count * 50 / 100) + 1; + + *quorum_count = count; + ret = 0; +out: + return ret; +} + +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo) +{ + gf_boolean_t res = _gf_false; + char *quorum_type = NULL; + int ret = 0; + + ret = dict_get_str (volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, + &quorum_type); + if (ret) + goto out; + + if (strcmp (quorum_type, GLUSTERD_SERVER_QUORUM) == 0) + res = _gf_true; +out: + return res; +} + +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this) +{ + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + + conf = this->private; + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (glusterd_is_volume_in_server_quorum (volinfo)) { + return _gf_true; + } + } + return _gf_false; +} + +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this) +{ + int quorum_count = 0; + int active_count = 0; + gf_boolean_t in = _gf_false; + glusterd_conf_t *conf = NULL; + int ret = -1; + + conf = this->private; + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) + goto out; + + if (!_does_quorum_meet (active_count, quorum_count)) { + goto out; + } + + in = _gf_true; +out: + return in; +} + +int +glusterd_spawn_daemons (void *opaque) +{ + glusterd_conf_t *conf = THIS->private; + gf_boolean_t start_bricks = !conf->restart_done; + + if (start_bricks) { + glusterd_restart_bricks (conf); + conf->restart_done = _gf_true; + } + glusterd_restart_gsyncds (conf); + glusterd_restart_rebalance (conf); + return 0; +} + +void +glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, + gf_boolean_t meets_quorum) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *conf = NULL; + + conf = this->private; + if (volinfo->status != GLUSTERD_STATUS_STARTED) + goto out; + + if (!glusterd_is_volume_in_server_quorum (volinfo)) + meets_quorum = _gf_true; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!glusterd_is_local_brick (this, volinfo, brickinfo)) + continue; + if (meets_quorum) + glusterd_brick_start (volinfo, brickinfo, _gf_false); + else + glusterd_brick_stop (volinfo, brickinfo, _gf_false); + } +out: + return; +} + +int +glusterd_do_quorum_action () +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int active_count = 0; + int quorum_count = 0; + gf_boolean_t meets = _gf_false; + + this = THIS; + conf = this->private; + + conf->pending_quorum_action = _gf_true; + ret = glusterd_lock (conf->uuid); + if (ret) + goto out; + + { + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) + goto unlock; + + if (_does_quorum_meet (active_count, quorum_count)) + meets = _gf_true; + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + glusterd_do_volume_quorum_action (this, volinfo, meets); + } + } +unlock: + (void)glusterd_unlock (conf->uuid); + conf->pending_quorum_action = _gf_false; +out: + return ret; +} + int32_t glusterd_import_friend_volume_opts (dict_t *vols, int count, glusterd_volinfo_t *volinfo) @@ -1705,6 +2846,7 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, int32_t ret = -1; int opt_count = 0; char msg[2048] = {0}; + char volume_prefix[1024] = {0}; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -1715,8 +2857,9 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, goto out; } - ret = import_prdict_volinfo_dict (vols, volinfo->dict, "key", - "value", opt_count, count); + snprintf (volume_prefix, sizeof (volume_prefix), "volume%d", count); + ret = import_prdict_dict (vols, volinfo->dict, "key", "value", + opt_count, volume_prefix); if (ret) { snprintf (msg, sizeof (msg), "Unable to import options dict " "specified for %s", volinfo->volname); @@ -1732,9 +2875,8 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, goto out; } - ret = import_prdict_volinfo_dict (vols, volinfo->gsync_slaves, - "slave-num", "slave-val", opt_count, - count); + ret = import_prdict_dict (vols, volinfo->gsync_slaves, "slave-num", + "slave-val", opt_count, volume_prefix); if (ret) { snprintf (msg, sizeof (msg), "Unable to import gsync sessions " "specified for %s", volinfo->volname); @@ -1755,6 +2897,8 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, { char key[512] = {0,}; int ret = -1; + int32_t snap_status = 0; + char *snap_device = NULL; char *hostname = NULL; char *path = NULL; glusterd_brickinfo_t *new_brickinfo = NULL; @@ -1782,12 +2926,31 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, goto out; } + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + vol_count, brick_count); + ret = dict_get_int32 (vols, key, &snap_status); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + vol_count, brick_count); + ret = dict_get_str (vols, key, &snap_device); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + ret = glusterd_brickinfo_new (&new_brickinfo); if (ret) goto out; strcpy (new_brickinfo->path, path); strcpy (new_brickinfo->hostname, hostname); + strcpy (new_brickinfo->device_path, snap_device); + new_brickinfo->snap_status = snap_status; + //peerinfo might not be added yet (void) glusterd_resolve_brick (new_brickinfo); ret = 0; @@ -1837,7 +3000,13 @@ glusterd_import_volinfo (dict_t *vols, int count, char msg[2048] = {0}; char *src_brick = NULL; char *dst_brick = NULL; + char *str = NULL; int rb_status = 0; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; + int op_version = 0; + int client_op_version = 0; + uint32_t is_snap_volume = 0; GF_ASSERT (vols); GF_ASSERT (volinfo); @@ -1849,6 +3018,22 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_get_uint32 (vols, key, &is_snap_volume); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + if (is_snap_volume == _gf_true) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Not syncing snap volume %s", volname); + ret = 0; + goto out; + } + ret = glusterd_volinfo_new (&new_volinfo); if (ret) goto out; @@ -1905,11 +3090,27 @@ glusterd_import_volinfo (dict_t *vols, int count, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.stripe_count", count); ret = dict_get_int32 (vols, key, &new_volinfo->stripe_count); - if (!ret) { - if (new_volinfo->stripe_count) - new_volinfo->replica_count = (new_volinfo->sub_count / - new_volinfo->stripe_count); - } + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + + /* not having a 'replica_count' key is not a error + (as peer may be of old version) */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.replica_count", count); + ret = dict_get_int32 (vols, key, &new_volinfo->replica_count); + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + + /* not having a 'dist_count' key is not a error + (as peer may be of old version) */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.dist_count", count); + ret = dict_get_int32 (vols, key, &new_volinfo->dist_leaf_count); + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.ckusm", count); @@ -1929,6 +3130,26 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + uuid_parse (volume_id_str, new_volinfo->volume_id); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.username", count); + ret = dict_get_str (vols, key, &str); + if (!ret) { + ret = glusterd_auth_set_username (new_volinfo, str); + if (ret) + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.password", count); + ret = dict_get_str (vols, key, &str); + if (!ret) { + ret = glusterd_auth_set_password (new_volinfo, str); + if (ret) + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.transport_type", count); ret = dict_get_uint32 (vols, key, &new_volinfo->transport_type); @@ -1938,16 +3159,67 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } - uuid_parse (volume_id_str, new_volinfo->volume_id); + new_volinfo->is_snap_volume = is_snap_volume; + + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + "is_volume_restored option for %s", + volname); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + if (new_volinfo->rebal.defrag_cmd) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-id", count); + ret = dict_get_str (vols, key, &rebalance_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rebalance_id_str, + new_volinfo->rebal.rebalance_id); + } + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_get_uint32 (vols, key,(uint32_t *) &new_volinfo->rebal.op); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); ret = dict_get_int32 (vols, key, &rb_status); if (ret) goto out; - new_volinfo->rb_status = rb_status; + new_volinfo->rep_brick.rb_status = rb_status; - if (new_volinfo->rb_status > GF_RB_STATUS_NONE) { + if (new_volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, @@ -1956,8 +3228,8 @@ glusterd_import_volinfo (dict_t *vols, int count, if (ret) goto out; - ret = glusterd_brickinfo_from_brick (src_brick, - &new_volinfo->src_brick); + ret = glusterd_brickinfo_new_from_brick (src_brick, + &new_volinfo->rep_brick.src_brick); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create" " src brickinfo"); @@ -1971,19 +3243,65 @@ glusterd_import_volinfo (dict_t *vols, int count, if (ret) goto out; - ret = glusterd_brickinfo_from_brick (dst_brick, - &new_volinfo->dst_brick); + ret = glusterd_brickinfo_new_from_brick (dst_brick, + &new_volinfo->rep_brick.dst_brick); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create" " dst brickinfo"); goto out; } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_get_str (vols, key, &rb_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rb_id_str, new_volinfo->rep_brick.rb_id); + } } ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo); if (ret) goto out; + + /* Import the volume's op-versions if available else set it to 1. + * Not having op-versions implies this informtation was obtained from a + * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version + * 1 and all volumes are at op-versions 1. + * + * Either both the volume op-versions should be absent or both should be + * present. Only one being present is a failure + */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.op-version", count); + ret = dict_get_int32 (vols, key, &op_version); + if (ret) + ret = 0; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.client-op-version", count); + ret = dict_get_int32 (vols, key, &client_op_version); + if (ret) + ret = 0; + + if (op_version && client_op_version) { + new_volinfo->op_version = op_version; + new_volinfo->client_op_version = client_op_version; + } else if (((op_version == 0) && (client_op_version != 0)) || + ((op_version != 0) && (client_op_version == 0))) { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, + "Only one volume op-version found"); + goto out; + } else { + new_volinfo->op_version = 1; + new_volinfo->client_op_version = 1; + } + ret = glusterd_import_bricks (vols, count, new_volinfo); if (ret) goto out; @@ -2061,7 +3379,11 @@ glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo, old_brickinfo->path, new_volinfo, &new_brickinfo); if (ret) { - ret = glusterd_brick_stop (old_volinfo, old_brickinfo); + /*TODO: may need to switch to 'atomic' flavour of + * brick_stop, once we make peer rpc program also + * synctask enabled*/ + ret = glusterd_brick_stop (old_volinfo, old_brickinfo, + _gf_false); if (ret) gf_log ("glusterd", GF_LOG_ERROR, "Failed to " "stop brick %s:%s", old_brickinfo->hostname, @@ -2086,7 +3408,8 @@ glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo, * stop stale bricks. Stale volume information is going to be deleted. * Which deletes the valid brick information inside stale volinfo. * We dont want brick_rpc_notify to access already deleted brickinfo. - * Disconnect valid bricks. + * Disconnect all bricks from stale_volinfo (unconditionally), since + * they are being deleted subsequently. */ if (glusterd_is_volume_started (stale_volinfo)) { if (glusterd_is_volume_started (valid_volinfo)) { @@ -2095,10 +3418,12 @@ glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo, //Only valid bricks will be running now. (void) glusterd_volinfo_copy_brick_portinfo (valid_volinfo, stale_volinfo); - (void) glusterd_volume_disconnect_all_bricks (stale_volinfo); + } else { (void) glusterd_stop_bricks (stale_volinfo); } + + (void) glusterd_volume_disconnect_all_bricks (stale_volinfo); } /* Delete all the bricks and stores and vol files. They will be created * again by the valid_volinfo. Volume store delete should not be @@ -2108,7 +3433,7 @@ glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo, (void) glusterd_delete_all_bricks (stale_volinfo); if (stale_volinfo->shandle) { unlink (stale_volinfo->shandle->path); - (void) glusterd_store_handle_destroy (stale_volinfo->shandle); + (void) gf_store_handle_destroy (stale_volinfo->shandle); stale_volinfo->shandle = NULL; } (void) glusterd_volinfo_delete (stale_volinfo); @@ -2135,6 +3460,12 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) if (ret) goto out; + if (!new_volinfo) { + gf_log (this->name, GF_LOG_DEBUG, + "Not importing snap volume"); + goto out; + } + ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo); if (0 == ret) { (void) glusterd_delete_stale_volume (old_volinfo, new_volinfo); @@ -2180,14 +3511,104 @@ out: return ret; } +int +glusterd_get_global_opt_version (dict_t *opts, uint32_t *version) +{ + int ret = -1; + char *version_str = NULL; + + ret = dict_get_str (opts, GLUSTERD_GLOBAL_OPT_VERSION, &version_str); + if (ret) + goto out; + + ret = gf_string2uint (version_str, version); + if (ret) + goto out; + ret = 0; +out: + return ret; +} + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str) +{ + int ret = -1; + char version_string[64] = {0}; + uint32_t version = 0; + + ret = glusterd_get_global_opt_version (opts, &version); + if (ret) + goto out; + version++; + snprintf (version_string, sizeof (version_string), "%"PRIu32, version); + *version_str = gf_strdup (version_string); + if (*version_str) + ret = 0; +out: + return ret; +} + +int32_t +glusterd_import_global_opts (dict_t *friend_data) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + dict_t *import_options = NULL; + int count = 0; + uint32_t local_version = 0; + uint32_t remote_version = 0; + + this = THIS; + conf = this->private; + + ret = dict_get_int32 (friend_data, "global-opt-count", &count); + if (ret) { + //old version peer + ret = 0; + goto out; + } + + import_options = dict_new (); + if (!import_options) + goto out; + ret = import_prdict_dict (friend_data, import_options, "key", "val", + count, "global"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to import" + " global options"); + goto out; + } + + ret = glusterd_get_global_opt_version (conf->opts, &local_version); + if (ret) + goto out; + ret = glusterd_get_global_opt_version (import_options, &remote_version); + if (ret) + goto out; + if (remote_version > local_version) { + ret = glusterd_store_options (this, import_options); + if (ret) + goto out; + dict_unref (conf->opts); + conf->opts = dict_ref (import_options); + } + ret = 0; +out: + if (import_options) + dict_unref (import_options); + return ret; +} + int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status) +glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname) { int32_t ret = -1; int32_t count = 0; int i = 1; gf_boolean_t update = _gf_false; gf_boolean_t stale_nfs = _gf_false; + gf_boolean_t stale_shd = _gf_false; GF_ASSERT (vols); GF_ASSERT (status); @@ -2197,7 +3618,8 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) goto out; while (i <= count) { - ret = glusterd_compare_friend_volume (vols, i, status); + ret = glusterd_compare_friend_volume (vols, i, status, + hostname); if (ret) goto out; @@ -2212,16 +3634,23 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) } if (update) { - if (glusterd_is_nfs_started ()) + if (glusterd_is_nodesvc_running ("nfs")) stale_nfs = _gf_true; + if (glusterd_is_nodesvc_running ("glustershd")) + stale_shd = _gf_true; + ret = glusterd_import_global_opts (vols); + if (ret) + goto out; ret = glusterd_import_friend_volumes (vols); if (ret) goto out; if (_gf_false == glusterd_are_all_volumes_stopped ()) { - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_graph_change (NULL); } else { if (stale_nfs) glusterd_nfs_server_stop (); + if (stale_shd) + glusterd_shd_stop (); } } @@ -2232,47 +3661,279 @@ out: return ret; } +/* Valid only in if service is 'local' to glusterd. + * pid can be -1, if reading pidfile failed */ gf_boolean_t -glusterd_is_nfs_started () +glusterd_is_service_running (char *pidfile, int *pid) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; + FILE *file = NULL; + gf_boolean_t running = _gf_false; + int ret = 0; + int fno = 0; - this = THIS; - GF_ASSERT(this); + file = fopen (pidfile, "r+"); + if (!file) + goto out; + + fno = fileno (file); + ret = lockf (fno, F_TEST, 0); + if (ret == -1) + running = _gf_true; + if (!pid) + goto out; + + ret = fscanf (file, "%d", pid); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s, %s", + pidfile, strerror (errno)); + *pid = -1; + } + +out: + if (file) + fclose (file); + return running; +} + +void +glusterd_get_nodesvc_dir (char *server, char *workdir, + char *path, size_t len) +{ + GF_ASSERT (len == PATH_MAX); + snprintf (path, len, "%s/%s", workdir, server); +} +void +glusterd_get_nodesvc_rundir (char *server, char *workdir, + char *path, size_t len) +{ + char dir[PATH_MAX] = {0}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir)); + snprintf (path, len, "%s/run", dir); +} + +void +glusterd_get_nodesvc_pidfile (char *server, char *workdir, + char *path, size_t len) +{ + char dir[PATH_MAX] = {0}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_rundir (server, workdir, dir, sizeof (dir)); + snprintf (path, len, "%s/%s.pid", dir, server); +} + +void +glusterd_get_nodesvc_volfile (char *server, char *workdir, + char *volfile, size_t len) +{ + char dir[PATH_MAX] = {0,}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir)); + snprintf (volfile, len, "%s/%s-server.vol", dir, server); +} + +void +glusterd_nodesvc_set_online_status (char *server, gf_boolean_t status) +{ + glusterd_conf_t *priv = NULL; + + GF_ASSERT (server); + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + GF_ASSERT (priv->nfs); + + if (!strcmp("glustershd", server)) + priv->shd->online = status; + else if (!strcmp ("nfs", server)) + priv->nfs->online = status; +} + +gf_boolean_t +glusterd_is_nodesvc_online (char *server) +{ + glusterd_conf_t *conf = NULL; + gf_boolean_t online = _gf_false; + + GF_ASSERT (server); + conf = THIS->private; + GF_ASSERT (conf); + GF_ASSERT (conf->shd); + GF_ASSERT (conf->nfs); + + if (!strcmp (server, "glustershd")) + online = conf->shd->online; + else if (!strcmp (server, "nfs")) + online = conf->nfs->online; + + return online; +} + +int32_t +glusterd_nodesvc_set_socket_filepath (char *rundir, uuid_t uuid, + char *socketpath, int len) +{ + char sockfilepath[PATH_MAX] = {0,}; + + snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s", + rundir, uuid_utoa (uuid)); + + glusterd_set_socket_filepath (sockfilepath, socketpath, len); + return 0; +} + +struct rpc_clnt* +glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node) +{ + struct rpc_clnt *rpc = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + nodesrv_t *shd = NULL; + glusterd_volinfo_t *volinfo = NULL; + nodesrv_t *nfs = NULL; + + GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out); + GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out); + + if (pending_node->type == GD_NODE_BRICK) { + brickinfo = pending_node->node; + rpc = brickinfo->rpc; + + } else if (pending_node->type == GD_NODE_SHD) { + shd = pending_node->node; + rpc = shd->rpc; + + } else if (pending_node->type == GD_NODE_REBALANCE) { + volinfo = pending_node->node; + if (volinfo->rebal.defrag) + rpc = volinfo->rebal.defrag->rpc; + + } else if (pending_node->type == GD_NODE_NFS) { + nfs = pending_node->node; + rpc = nfs->rpc; + + } else { + GF_ASSERT (0); + } + +out: + return rpc; +} + +struct rpc_clnt* +glusterd_nodesvc_get_rpc (char *server) +{ + glusterd_conf_t *priv = NULL; + struct rpc_clnt *rpc = NULL; + + GF_ASSERT (server); + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + GF_ASSERT (priv->nfs); + + if (!strcmp (server, "glustershd")) + rpc = priv->shd->rpc; + else if (!strcmp (server, "nfs")) + rpc = priv->nfs->rpc; + + return rpc; +} + +int32_t +glusterd_nodesvc_set_rpc (char *server, struct rpc_clnt *rpc) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT (this); priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + GF_ASSERT (priv->nfs); - GLUSTERD_GET_NFS_PIDFILE(pidfile); - ret = access (pidfile, F_OK); + if (!strcmp ("glustershd", server)) + priv->shd->rpc = rpc; + else if (!strcmp ("nfs", server)) + priv->nfs->rpc = rpc; - if (ret == 0) - return _gf_true; - else - return _gf_false; + return ret; } int32_t -glusterd_nfs_server_start () +glusterd_nodesvc_connect (char *server, char *socketpath) { + int ret = 0; + dict_t *options = NULL; + struct rpc_clnt *rpc = NULL; + glusterd_conf_t *priv = THIS->private; + + rpc = glusterd_nodesvc_get_rpc (server); + + if (rpc == NULL) { + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. + * The default timeout of 30mins used for unreliable network + * connections is too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, socketpath, + 600); + if (ret) + goto out; + synclock_unlock (&priv->big_lock); + ret = glusterd_rpc_create (&rpc, options, + glusterd_nodesvc_rpc_notify, + server); + synclock_lock (&priv->big_lock); + if (ret) + goto out; + (void) glusterd_nodesvc_set_rpc (server, rpc); + } +out: + return ret; +} + +int32_t +glusterd_nodesvc_disconnect (char *server) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char logfile[PATH_MAX] = {0,}; - char volfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - char rundir[PATH_MAX] = {0,}; + struct rpc_clnt *rpc = NULL; + + rpc = glusterd_nodesvc_get_rpc (server); + (void)glusterd_nodesvc_set_rpc (server, NULL); + + if (rpc) + rpc_clnt_unref (rpc); + + return 0; +} + +int32_t +glusterd_nodesvc_start (char *server) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + char pidfile[PATH_MAX] = {0,}; + char logfile[PATH_MAX] = {0,}; + char volfile[PATH_MAX] = {0,}; + char rundir[PATH_MAX] = {0,}; + char sockfpath[PATH_MAX] = {0,}; + char volfileid[256] = {0}; + char glusterd_uuid_option[1024] = {0}; + char valgrind_logfile[PATH_MAX] = {0}; this = THIS; GF_ASSERT(this); priv = this->private; - GLUSTERD_GET_NFS_DIR(path, priv); - snprintf (rundir, PATH_MAX, "%s/run", path); + glusterd_get_nodesvc_rundir (server, priv->workdir, + rundir, sizeof (rundir)); ret = mkdir (rundir, 0777); if ((ret == -1) && (EEXIST != errno)) { @@ -2281,21 +3942,130 @@ glusterd_nfs_server_start () goto out; } - GLUSTERD_GET_NFS_PIDFILE(pidfile); - glusterd_get_nfs_filepath (volfile); - + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + glusterd_get_nodesvc_volfile (server, priv->workdir, + volfile, sizeof (volfile)); ret = access (volfile, F_OK); if (ret) { - gf_log ("", GF_LOG_ERROR, "Nfs Volfile %s is not present", - volfile); + gf_log ("", GF_LOG_ERROR, "%s Volfile %s is not present", + server, volfile); goto out; } - snprintf (logfile, PATH_MAX, "%s/nfs.log", DEFAULT_LOG_FILE_DIRECTORY); + snprintf (logfile, PATH_MAX, "%s/%s.log", DEFAULT_LOG_FILE_DIRECTORY, + server); + snprintf (volfileid, sizeof (volfileid), "gluster/%s", server); + + glusterd_nodesvc_set_socket_filepath (rundir, MY_UUID, + sockfpath, sizeof (sockfpath)); + + runinit (&runner); + + if (priv->valgrind) { + snprintf (valgrind_logfile, PATH_MAX, + "%s/valgrind-%s.log", + DEFAULT_LOG_FILE_DIRECTORY, + server); + + runner_add_args (&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); + } + + runner_add_args (&runner, SBIN_DIR"/glusterfs", + "-s", "localhost", + "--volfile-id", volfileid, + "-p", pidfile, + "-l", logfile, + "-S", sockfpath, NULL); + + if (!strcmp (server, "glustershd")) { + snprintf (glusterd_uuid_option, sizeof (glusterd_uuid_option), + "*replicate*.node-uuid=%s", uuid_utoa (MY_UUID)); + runner_add_args (&runner, "--xlator-option", + glusterd_uuid_option, NULL); + } + runner_log (&runner, "", GF_LOG_DEBUG, + "Starting the nfs/glustershd services"); + + ret = runner_run_nowait (&runner); + if (ret == 0) { + glusterd_nodesvc_connect (server, sockfpath); + } +out: + return ret; +} + +int +glusterd_nfs_server_start () +{ + return glusterd_nodesvc_start ("nfs"); +} + +int +glusterd_shd_start () +{ + return glusterd_nodesvc_start ("glustershd"); +} + +gf_boolean_t +glusterd_is_nodesvc_running (char *server) +{ + char pidfile[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + return glusterd_is_service_running (pidfile, NULL); +} + +int32_t +glusterd_nodesvc_unlink_socket_file (char *server) +{ + int ret = 0; + char sockfpath[PATH_MAX] = {0,}; + char rundir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + + glusterd_get_nodesvc_rundir (server, priv->workdir, + rundir, sizeof (rundir)); + + glusterd_nodesvc_set_socket_filepath (rundir, MY_UUID, + sockfpath, sizeof (sockfpath)); + + ret = unlink (sockfpath); + if (ret && (ENOENT == errno)) { + ret = 0; + } else { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to remove %s" + " error: %s", sockfpath, strerror (errno)); + } + + return ret; +} + +int32_t +glusterd_nodesvc_stop (char *server, int sig) +{ + char pidfile[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + int ret = 0; + + if (!glusterd_is_nodesvc_running (server)) + goto out; + + (void)glusterd_nodesvc_disconnect (server); - ret = runcmd (GFS_PREFIX"/sbin/glusterfs", "-f", volfile, - "-p", pidfile, "-l", logfile, NULL); + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + ret = glusterd_service_stop (server, pidfile, sig, _gf_true); + if (ret == 0) { + glusterd_nodesvc_set_online_status (server, _gf_false); + (void)glusterd_nodesvc_unlink_socket_file (server); + } out: return ret; } @@ -2318,28 +4088,120 @@ glusterd_nfs_pmap_deregister () else gf_log ("", GF_LOG_ERROR, "De-register NFSV3 is unsuccessful"); + if (pmap_unset (NLM_PROGRAM, NLMV4_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered NLM v4 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-registration of NLM v4 failed"); + + if (pmap_unset (NLM_PROGRAM, NLMV1_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered NLM v1 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-registration of NLM v1 failed"); + + if (pmap_unset (ACL_PROGRAM, ACLV3_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered ACL v3 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-registration of ACL v3 failed"); } -int32_t +int glusterd_nfs_server_stop () { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int ret = 0; + gf_boolean_t deregister = _gf_false; + + if (glusterd_is_nodesvc_running ("nfs")) + deregister = _gf_true; + ret = glusterd_nodesvc_stop ("nfs", SIGKILL); + if (ret) + goto out; + if (deregister) + glusterd_nfs_pmap_deregister (); +out: + return ret; +} + +int +glusterd_shd_stop () +{ + return glusterd_nodesvc_stop ("glustershd", SIGTERM); +} + +int +glusterd_add_node_to_dict (char *server, dict_t *dict, int count, + dict_t *vol_opts) +{ + int ret = -1; + glusterd_conf_t *priv = THIS->private; char pidfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; + gf_boolean_t running = _gf_false; + int pid = -1; + int port = 0; + char key[1024] = {0,}; + + glusterd_get_nodesvc_pidfile (server, priv->workdir, pidfile, + sizeof (pidfile)); + //Consider service to be running only when glusterd sees it Online + if (glusterd_is_nodesvc_online (server)) + running = glusterd_is_service_running (pidfile, &pid); + + /* For nfs-servers/self-heal-daemon setting + * brick<n>.hostname = "NFS Server" / "Self-heal Daemon" + * brick<n>.path = uuid + * brick<n>.port = 0 + * + * This might be confusing, but cli displays the name of + * the brick as hostname+path, so this will make more sense + * when output. + */ + snprintf (key, sizeof (key), "brick%d.hostname", count); + if (!strcmp (server, "nfs")) + ret = dict_set_str (dict, key, "NFS Server"); + else if (!strcmp (server, "glustershd")) + ret = dict_set_str (dict, key, "Self-heal Daemon"); + if (ret) + goto out; - this = THIS; - GF_ASSERT(this); + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.path", count); + ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (MY_UUID))); + if (ret) + goto out; - priv = this->private; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.port", count); + /* Port is available only for the NFS server. + * Self-heal daemon doesn't provide any port for access + * by entities other than gluster. + */ + if (!strcmp (server, "nfs")) { + if (dict_get (vol_opts, "nfs.port")) { + ret = dict_get_int32 (vol_opts, "nfs.port", &port); + if (ret) + goto out; + } else + port = GF_NFS3_PORT; + } + ret = dict_set_int32 (dict, key, port); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.pid", count); + ret = dict_set_int32 (dict, key, pid); + if (ret) + goto out; - GLUSTERD_GET_NFS_DIR(path, priv); - GLUSTERD_GET_NFS_PIDFILE(pidfile); + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.status", count); + ret = dict_set_int32 (dict, key, running); + if (ret) + goto out; - glusterd_service_stop ("nfsd", pidfile, SIGKILL, _gf_true); - glusterd_nfs_pmap_deregister (); - return 0; +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; } int @@ -2370,32 +4232,231 @@ glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len) out: - if (tmp_host) - GF_FREE (tmp_host); + GF_FREE (tmp_host); return ret; } int -glusterd_check_generate_start_nfs () +glusterd_check_generate_start_service (int (*create_volfile) (), + int (*stop) (), int (*start) ()) { int ret = -1; - ret = glusterd_create_nfs_volfile (); + ret = create_volfile (); if (ret) goto out; - if (glusterd_is_nfs_started ()) { - ret = glusterd_nfs_server_stop (); - if (ret) - goto out; + ret = stop (); + if (ret) + goto out; + + ret = start (); +out: + return ret; +} + +int +glusterd_reconfigure_nodesvc (int (*create_volfile) ()) +{ + int ret = -1; + + ret = create_volfile (); + if (ret) + goto out; + + ret = glusterd_fetchspec_notify (THIS); +out: + return ret; +} + +int +glusterd_reconfigure_shd () +{ + int (*create_volfile) () = glusterd_create_shd_volfile; + return glusterd_reconfigure_nodesvc (create_volfile); +} + +int +glusterd_reconfigure_nfs () +{ + int ret = -1; + gf_boolean_t identical = _gf_false; + + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + ret = glusterd_check_nfs_volfile_identical (&identical); + if (ret) + goto out; + + if (identical) { + ret = 0; + goto out; } - ret = glusterd_nfs_server_start (); + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_check_nfs_topology_identical (&identical); + if (ret) + goto out; + + /* Topology is not changed, but just the options. But write the + * options to NFS volfile, so that NFS will be reconfigured. + */ + if (identical) { + ret = glusterd_create_nfs_volfile(); + if (ret == 0) {/* Only if above PASSES */ + ret = glusterd_fetchspec_notify (THIS); + } + goto out; + } + + /* + * NFS volfile's topology has been changed. NFS server needs + * to be RESTARTED to ACT on the changed volfile. + */ + ret = glusterd_check_generate_start_nfs (); + +out: + return ret; +} + + +int +glusterd_check_generate_start_nfs () +{ + int ret = 0; + + ret = glusterd_check_generate_start_service (glusterd_create_nfs_volfile, + glusterd_nfs_server_stop, + glusterd_nfs_server_start); + return ret; +} + +int +glusterd_check_generate_start_shd () +{ + int ret = 0; + + ret = glusterd_check_generate_start_service (glusterd_create_shd_volfile, + glusterd_shd_stop, + glusterd_shd_start); + if (ret == -EINVAL) + ret = 0; + return ret; +} + +int +glusterd_nodesvcs_batch_op (glusterd_volinfo_t *volinfo, + int (*nfs_op) (), int (*shd_op) ()) +{ + int ret = 0; + + ret = nfs_op (); + if (ret) + goto out; + + if (volinfo && !glusterd_is_volume_replicate (volinfo)) + goto out; + + ret = shd_op (); + if (ret) + goto out; out: return ret; } int +glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_nfs_server_start, + glusterd_shd_start); +} + +int +glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_nfs_server_stop, + glusterd_shd_stop); +} + +gf_boolean_t +glusterd_are_all_volumes_stopped () +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (voliter->status == GLUSTERD_STATUS_STARTED) + return _gf_false; + } + + return _gf_true; + +} + +gf_boolean_t +glusterd_all_replicate_volumes_stopped () +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (!glusterd_is_volume_replicate (voliter)) + continue; + if (voliter->status == GLUSTERD_STATUS_STARTED) + return _gf_false; + } + + return _gf_true; +} + +int +glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo) +{ + int (*shd_op) () = NULL; + int (*nfs_op) () = NULL; + + shd_op = glusterd_check_generate_start_shd; + nfs_op = glusterd_check_generate_start_nfs; + if (glusterd_are_all_volumes_stopped ()) { + shd_op = glusterd_shd_stop; + nfs_op = glusterd_nfs_server_stop; + } else if (glusterd_all_replicate_volumes_stopped()) { + shd_op = glusterd_shd_stop; + } + return glusterd_nodesvcs_batch_op (volinfo, nfs_op, shd_op); +} + +int +glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_reconfigure_nfs, + glusterd_reconfigure_shd); +} + +int glusterd_volume_count_get (void) { glusterd_volinfo_t *tmp_volinfo = NULL; @@ -2437,9 +4498,9 @@ glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path, list_for_each_entry (volinfo, &priv->volumes, vol_list) { ret = glusterd_volume_brickinfo_get (uuid, hostname, path, - volinfo, - brickinfo); - if (!ret) + volinfo, brickinfo); + if (ret == 0) + /*Found*/ goto out; } out: @@ -2448,7 +4509,8 @@ out: int glusterd_brick_start (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, + gf_boolean_t wait) { int ret = -1; xlator_t *this = NULL; @@ -2465,61 +4527,83 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, if (uuid_is_null (brickinfo->uuid)) { ret = glusterd_resolve_brick (brickinfo); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot resolve brick: %s:%s", + gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); goto out; } } - if (uuid_compare (brickinfo->uuid, conf->uuid)) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) { ret = 0; goto out; } - ret = glusterd_volume_start_glusterfs (volinfo, brickinfo); + ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to start " - "glusterfs, ret: %d", ret); + gf_log (this->name, GF_LOG_ERROR, "Unable to start brick %s:%s", + brickinfo->hostname, brickinfo->path); goto out; } out: - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); + gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret); return ret; } int glusterd_restart_bricks (glusterd_conf_t *conf) { - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - int ret = 0; - gf_boolean_t start_nfs = _gf_false; + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; + gf_boolean_t start_nodesvcs = _gf_false; + xlator_t *this = NULL; - GF_ASSERT (conf); + this = THIS; + GF_ASSERT (this); list_for_each_entry (volinfo, &conf->volumes, vol_list) { - //If volume status is not started, do not proceed - if (volinfo->status == GLUSTERD_STATUS_STARTED) { + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the volume %s", + volinfo->volname); + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + glusterd_brick_start (volinfo, brickinfo, _gf_false); + } + } + + list_for_each_entry (snap, &conf->snapshots, snap_list) { + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the snap " + "volume %s", volinfo->volname); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - glusterd_brick_start (volinfo, brickinfo); + glusterd_brick_start (volinfo, brickinfo, + _gf_false); } - start_nfs = _gf_true; } } - if (start_nfs) - glusterd_check_generate_start_nfs (); + + if (start_nodesvcs) + glusterd_nodesvcs_handle_graph_change (NULL); + return ret; } -void +int _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) { + char *path_list = NULL; char *slave = NULL; int uuid_len = 0; + int ret = 0; char uuid_str[64] = {0}; glusterd_volinfo_t *volinfo = NULL; + char *conf_path = NULL; volinfo = data; GF_ASSERT (volinfo); @@ -2527,12 +4611,28 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) if (slave) slave ++; else - return; + return 0; uuid_len = (slave - value->data - 1); - strncpy (uuid_str, (char*)value->data, uuid_len); - glusterd_start_gsync (volinfo, slave, uuid_str, NULL); + + ret = glusterd_get_local_brickpaths (volinfo, &path_list); + + ret = dict_get_str (this, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + + glusterd_start_gsync (volinfo, slave, path_list, conf_path, + uuid_str, NULL); + + GF_FREE (path_list); + path_list = NULL; + +out: + return ret; } int @@ -2556,6 +4656,15 @@ glusterd_restart_gsyncds (glusterd_conf_t *conf) return ret; } +inline int +glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo) +{ + int rcount = volinfo->replica_count; + int scount = volinfo->stripe_count; + + return (rcount ? rcount : 1) * (scount ? scount : 1); +} + int glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, gf_boolean_t localhost, glusterd_brickinfo_t **brickinfo) @@ -2572,7 +4681,7 @@ glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, list_for_each_entry (volinfo, &priv->volumes, vol_list) { list_for_each_entry (tmpbrkinfo, &volinfo->bricks, brick_list) { - if (localhost && glusterd_is_local_addr (tmpbrkinfo->hostname)) + if (localhost && !gf_is_local_addr (tmpbrkinfo->hostname)) continue; if (!strcmp(tmpbrkinfo->path, brickname) && (tmpbrkinfo->port == port)) { @@ -2584,9 +4693,23 @@ glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, return ret; } +glusterd_brickinfo_t* +glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos) +{ + glusterd_brickinfo_t *tmpbrkinfo = NULL; + + list_for_each_entry (tmpbrkinfo, &volinfo->bricks, + brick_list) { + if (pos == 0) + return tmpbrkinfo; + pos--; + } + return NULL; +} + void glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo, - gf_brick_status_t status) + gf_brick_status_t status) { GF_ASSERT (brickinfo); brickinfo->status = status; @@ -2629,21 +4752,400 @@ out: return -1; } +#ifdef GF_LINUX_HOST_OS +int +glusterd_get_brick_root (char *path, char **mount_point) +{ + char *ptr = NULL; + char *mnt_pt = NULL; + struct stat brickstat = {0}; + struct stat buf = {0}; + + if (!path) + goto err; + mnt_pt = gf_strdup (path); + if (!mnt_pt) + goto err; + if (stat (mnt_pt, &brickstat)) + goto err; + + while ((ptr = strrchr (mnt_pt, '/')) && + ptr != mnt_pt) { + + *ptr = '\0'; + if (stat (mnt_pt, &buf)) { + gf_log (THIS->name, GF_LOG_ERROR, "error in " + "stat: %s", strerror (errno)); + goto err; + } + + if (brickstat.st_dev != buf.st_dev) { + *ptr = '/'; + break; + } + } + + if (ptr == mnt_pt) { + if (stat ("/", &buf)) { + gf_log (THIS->name, GF_LOG_ERROR, "error in " + "stat: %s", strerror (errno)); + goto err; + } + if (brickstat.st_dev == buf.st_dev) + strcpy (mnt_pt, "/"); + } + + *mount_point = mnt_pt; + return 0; + + err: + GF_FREE (mnt_pt); + return -1; +} + +static char* +glusterd_parse_inode_size (char *stream, char *pattern) +{ + char *needle = NULL; + char *trail = NULL; + + needle = strstr (stream, pattern); + if (!needle) + goto out; + + needle = nwstrtail (needle, pattern); + + trail = needle; + while (trail && isdigit (*trail)) trail++; + if (trail) + *trail = '\0'; + +out: + return needle; +} + +static int +glusterd_add_inode_size_to_dict (dict_t *dict, int count) +{ + int ret = -1; + char key[1024] = {0}; + char buffer[4096] = {0}; + char *inode_size = NULL; + char *device = NULL; + char *fs_name = NULL; + char *cur_word = NULL; + char *pattern = NULL; + char *trail = NULL; + runner_t runner = {0, }; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.device", count); + ret = dict_get_str (dict, key, &device); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.fs_name", count); + ret = dict_get_str (dict, key, &fs_name); + if (ret) + goto out; + + runinit (&runner); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + /* get inode size for xfs or ext2/3/4 */ + if (!strcmp (fs_name, "xfs")) { + + runner_add_args (&runner, "xfs_info", device, NULL); + pattern = "isize="; + + } else if (IS_EXT_FS(fs_name)) { + + runner_add_args (&runner, "tune2fs", "-l", device, NULL); + pattern = "Inode size:"; + + } else { + ret = 0; + gf_log (THIS->name, GF_LOG_INFO, "Skipped fetching " + "inode size for %s: FS type not recommended", + fs_name); + goto out; + } + + ret = runner_start (&runner); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "could not get inode " + "size for %s : %s package missing", fs_name, + ((strcmp (fs_name, "xfs")) ? + "e2fsprogs" : "xfsprogs")); + goto out; + } + + for (;;) { + if (fgets (buffer, sizeof (buffer), + runner_chio (&runner, STDOUT_FILENO)) == NULL) + break; + trail = strrchr (buffer, '\n'); + if (trail) + *trail = '\0'; + + cur_word = glusterd_parse_inode_size (buffer, pattern); + if (cur_word) + break; + } + + ret = runner_end (&runner); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "%s exited with non-zero " + "exit status", ((!strcmp (fs_name, "xfs")) ? + "xfs_info" : "tune2fs")); + goto out; + } + if (!cur_word) { + ret = -1; + gf_log (THIS->name, GF_LOG_ERROR, "Unable to retrieve inode " + "size using %s", + (!strcmp (fs_name, "xfs")? "xfs_info": "tune2fs")); + goto out; + } + + inode_size = gf_strdup (cur_word); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.inode_size", count); + + ret = dict_set_dynstr (dict, key, inode_size); + + out: + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "failed to get inode size"); + return ret; +} + +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab) +{ + struct mntent *entry = NULL; + + mtab = setmntent (_PATH_MOUNTED, "r"); + if (!mtab) + goto out; + + entry = getmntent (mtab); + + while (1) { + if (!entry) + goto out; + + if (!strcmp (entry->mnt_dir, mnt_pt) && + strcmp (entry->mnt_type, "rootfs")) + break; + entry = getmntent (mtab); + } + +out: + return entry; +} + +static int +glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, + dict_t *dict, int count) +{ + int ret = -1; + char key[1024] = {0}; + char base_key[1024] = {0}; + char *mnt_pt = NULL; + char *fs_name = NULL; + char *mnt_options = NULL; + char *device = NULL; + struct mntent *entry = NULL; + FILE *mtab = NULL; + + snprintf (base_key, sizeof (base_key), "brick%d", count); + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) + goto out; + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { + ret = -1; + goto out; + } + + /* get device file */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.device", base_key); + + device = gf_strdup (entry->mnt_fsname); + ret = dict_set_dynstr (dict, key, device); + if (ret) + goto out; + + /* fs type */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.fs_name", base_key); + + fs_name = gf_strdup (entry->mnt_type); + ret = dict_set_dynstr (dict, key, fs_name); + if (ret) + goto out; + + /* mount options */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.mnt_options", base_key); + + mnt_options = gf_strdup (entry->mnt_opts); + ret = dict_set_dynstr (dict, key, mnt_options); + + out: + GF_FREE (mnt_pt); + if (mtab) + endmntent (mtab); + + return ret; +} + +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + char *mnt_pt = NULL; + char *device = NULL; + FILE *mtab = NULL; + struct mntent *entry = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brickinfo); + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mount point " + "for %s brick", brickinfo->path); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (NULL == entry) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mnt entry " + "for %s mount path", mnt_pt); + goto out; + } + + /* get the fs_name/device */ + device = gf_strdup (entry->mnt_fsname); + +out: + if (NULL != mtab) { + endmntent (mtab); + } + + return device; +} +#endif + +int +glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + dict_t *dict, int count) +{ + int ret = -1; + uint64_t memtotal = 0; + uint64_t memfree = 0; + uint64_t inodes_total = 0; + uint64_t inodes_free = 0; + uint64_t block_size = 0; + char key[1024] = {0}; + char base_key[1024] = {0}; + struct statvfs brickstat = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); + GF_ASSERT (dict); + + snprintf (base_key, sizeof (base_key), "brick%d", count); + + ret = statvfs (brickinfo->path, &brickstat); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "statfs error: %s ", + strerror (errno)); + goto out; + } + + /* file system block size */ + block_size = brickstat.f_bsize; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.block_size", base_key); + ret = dict_set_uint64 (dict, key, block_size); + if (ret) + goto out; + + /* free space in brick */ + memfree = brickstat.f_bfree * brickstat.f_bsize; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.free", base_key); + ret = dict_set_uint64 (dict, key, memfree); + if (ret) + goto out; + + /* total space of brick */ + memtotal = brickstat.f_blocks * brickstat.f_bsize; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.total", base_key); + ret = dict_set_uint64 (dict, key, memtotal); + if (ret) + goto out; + + /* inodes: total and free counts only for ext2/3/4 and xfs */ + inodes_total = brickstat.f_files; + if (inodes_total) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.total_inodes", base_key); + ret = dict_set_uint64 (dict, key, inodes_total); + if (ret) + goto out; + } + + inodes_free = brickstat.f_ffree; + if (inodes_free) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.free_inodes", base_key); + ret = dict_set_uint64 (dict, key, inodes_free); + if (ret) + goto out; + } +#ifdef GF_LINUX_HOST_OS + ret = glusterd_add_brick_mount_details (brickinfo, dict, count); + if (ret) + goto out; + + ret = glusterd_add_inode_size_to_dict (dict, count); +#endif + out: + if (ret) + gf_log (this->name, GF_LOG_DEBUG, "Error adding brick" + " detail to dict: %s", strerror (errno)); + return ret; +} + int32_t glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, dict_t *dict, int32_t count) { - int ret = -1; - char key[8192] = {0,}; - char base_key[8192] = {0}; - char pidfile[PATH_MAX] = {0}; - char path[PATH_MAX] = {0}; - FILE *file = NULL; - int32_t pid = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int ret = -1; + int32_t pid = -1; + int32_t brick_online = -1; + char key[1024] = {0}; + char base_key[1024] = {0}; + char pidfile[PATH_MAX] = {0}; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -2656,6 +5158,7 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, snprintf (base_key, sizeof (base_key), "brick%d", count); snprintf (key, sizeof (key), "%s.hostname", base_key); + ret = dict_set_str (dict, key, brickinfo->hostname); if (ret) goto out; @@ -2672,47 +5175,55 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); + + brick_online = glusterd_is_service_running (pidfile, &pid); memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "%s.status", base_key); - ret = dict_set_int32 (dict, key, brickinfo->signed_in); + snprintf (key, sizeof (key), "%s.pid", base_key); + ret = dict_set_int32 (dict, key, pid); if (ret) goto out; - if (!brickinfo->signed_in) - goto out; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.status", base_key); + ret = dict_set_int32 (dict, key, brick_online); +out: + if (ret) + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); + return ret; +} - file = fopen (pidfile, "r+"); - if (!file) { - gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", - pidfile); - ret = -1; - goto out; - } +int32_t +glusterd_get_all_volnames (dict_t *dict) +{ + int ret = -1; + int32_t vol_count = 0; + char key[256] = {0}; + glusterd_volinfo_t *entry = NULL; + glusterd_conf_t *priv = NULL; - ret = fscanf (file, "%d", &pid); - if (ret <= 0) { - gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s", - pidfile); - ret = -1; - goto out; + priv = THIS->private; + GF_ASSERT (priv); + + list_for_each_entry (entry, &priv->volumes, vol_list) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "vol%d", vol_count); + ret = dict_set_str (dict, key, entry->volname); + if (ret) + goto out; + + vol_count++; } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "%s.pid", base_key); - ret = dict_set_int32 (dict, key, pid); - if (ret) - goto out; + ret = dict_set_int32 (dict, "vol_count", vol_count); -out: + out: if (ret) - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - + gf_log (THIS->name, GF_LOG_ERROR, "failed to get all " + "volume names for status"); return ret; } @@ -2752,11 +5263,14 @@ glusterd_friend_find_by_uuid (uuid_t uuid, int ret = -1; glusterd_conf_t *priv = NULL; glusterd_peerinfo_t *entry = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (peerinfo); *peerinfo = NULL; - priv = THIS->private; + priv = this->private; GF_ASSERT (priv); @@ -2766,7 +5280,7 @@ glusterd_friend_find_by_uuid (uuid_t uuid, list_for_each_entry (entry, &priv->peers, uuid_list) { if (!uuid_compare (entry->uuid, uuid)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Friend found... state: %s", glusterd_friend_sm_state_name_get (entry->state.state)); *peerinfo = entry; @@ -2774,7 +5288,7 @@ glusterd_friend_find_by_uuid (uuid_t uuid, } } - gf_log ("glusterd", GF_LOG_DEBUG, "Friend with uuid: %s, not found", + gf_log (this->name, GF_LOG_DEBUG, "Friend with uuid: %s, not found", uuid_utoa (uuid)); return ret; } @@ -2794,12 +5308,15 @@ glusterd_friend_find_by_hostname (const char *hoststr, struct sockaddr_in *s4 = NULL; struct in_addr *in_addr = NULL; char hname[1024] = {0,}; + xlator_t *this = NULL; + + this = THIS; GF_ASSERT (hoststr); GF_ASSERT (peerinfo); *peerinfo = NULL; - priv = THIS->private; + priv = this->private; GF_ASSERT (priv); @@ -2807,7 +5324,7 @@ glusterd_friend_find_by_hostname (const char *hoststr, if (!strncasecmp (entry->hostname, hoststr, 1024)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Friend %s found.. state: %d", hoststr, entry->state.state); *peerinfo = entry; @@ -2815,9 +5332,10 @@ glusterd_friend_find_by_hostname (const char *hoststr, } } - ret = getaddrinfo(hoststr, NULL, NULL, &addr); + ret = getaddrinfo (hoststr, NULL, NULL, &addr); if (ret != 0) { - gf_log ("", GF_LOG_ERROR, "error in getaddrinfo: %s\n", + gf_log (this->name, GF_LOG_ERROR, + "error in getaddrinfo: %s\n", gai_strerror(ret)); goto out; } @@ -2846,7 +5364,7 @@ glusterd_friend_find_by_hostname (const char *hoststr, if (!strncasecmp (entry->hostname, host, 1024) || !strncasecmp (entry->hostname,hname, 1024)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Friend %s found.. state: %d", hoststr, entry->state.state); *peerinfo = entry; @@ -2857,7 +5375,7 @@ glusterd_friend_find_by_hostname (const char *hoststr, } out: - gf_log ("glusterd", GF_LOG_DEBUG, "Unable to find friend: %s", hoststr); + gf_log (this->name, GF_LOG_DEBUG, "Unable to find friend: %s", hoststr); if (addr) freeaddrinfo (addr); return -1; @@ -2881,23 +5399,26 @@ glusterd_hostname_to_uuid (char *hostname, uuid_t uuid) ret = glusterd_friend_find_by_hostname (hostname, &peerinfo); if (ret) { - ret = glusterd_is_local_addr (hostname); - if (ret) + if (gf_is_local_addr (hostname)) { + uuid_copy (uuid, MY_UUID); + ret = 0; + } else { + ret = 0; goto out; - else - uuid_copy (uuid, priv->uuid); + } } else { uuid_copy (uuid, peerinfo->uuid); } out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); return ret; } int glusterd_brick_stop (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, + gf_boolean_t del_brick) { int ret = -1; xlator_t *this = NULL; @@ -2914,74 +5435,52 @@ glusterd_brick_stop (glusterd_volinfo_t *volinfo, if (uuid_is_null (brickinfo->uuid)) { ret = glusterd_resolve_brick (brickinfo); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot resolve brick: %s:%s", + gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); goto out; } } - if (uuid_compare (brickinfo->uuid, conf->uuid)) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) { ret = 0; + if (del_brick) + glusterd_delete_brick (volinfo, brickinfo); goto out; } - gf_log ("", GF_LOG_INFO, "About to stop glusterfs" + gf_log (this->name, GF_LOG_DEBUG, "About to stop glusterfs" " for brick %s:%s", brickinfo->hostname, brickinfo->path); - ret = glusterd_volume_stop_glusterfs (volinfo, brickinfo); + ret = glusterd_volume_stop_glusterfs (volinfo, brickinfo, del_brick); if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to remove" + gf_log (this->name, GF_LOG_CRITICAL, "Unable to stop" " brick: %s:%s", brickinfo->hostname, brickinfo->path); goto out; } out: - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); + gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret); return ret; } int glusterd_is_defrag_on (glusterd_volinfo_t *volinfo) { - return (volinfo->defrag != NULL); + return (volinfo->rebal.defrag != NULL); } -int -glusterd_is_replace_running (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo) +gf_boolean_t +glusterd_is_rb_ongoing (glusterd_volinfo_t *volinfo) { - int ret = 0; - char *src_hostname = NULL; - char *brick_hostname = NULL; + gf_boolean_t ret = _gf_false; - if (volinfo->src_brick) { - src_hostname = gf_strdup (volinfo->src_brick->hostname); - if (!src_hostname) { - ret = -1; - goto out; - } - } else { - gf_log ("glusterd", GF_LOG_DEBUG, - "replace brick is not running"); - goto out; - } + GF_ASSERT (volinfo); - brick_hostname = gf_strdup (brickinfo->hostname); - if (!brick_hostname) { - ret = -1; - goto out; - } - if (!glusterd_is_local_addr (src_hostname) && !glusterd_is_local_addr (brick_hostname)) { - if (glusterd_is_rb_started (volinfo) || glusterd_is_rb_paused (volinfo)) - ret = -1; - } + if (glusterd_is_rb_started (volinfo) || + glusterd_is_rb_paused (volinfo)) + ret = _gf_true; -out: - if (src_hostname) - GF_FREE (src_hostname); - if (brick_hostname) - GF_FREE (brick_hostname); return ret; } @@ -2990,7 +5489,6 @@ glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, char *op_errstr, size_t len) { glusterd_brickinfo_t *newbrickinfo = NULL; - glusterd_brickinfo_t *tmpbrkinfo = NULL; int ret = -1; gf_boolean_t is_allocated = _gf_false; glusterd_peerinfo_t *peerinfo = NULL; @@ -3007,7 +5505,7 @@ glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, GF_ASSERT (op_errstr); if (!brickinfo) { - ret = glusterd_brickinfo_from_brick (brick, &newbrickinfo); + ret = glusterd_brickinfo_new_from_brick (brick, &newbrickinfo); if (ret) goto out; is_allocated = _gf_true; @@ -3017,42 +5515,54 @@ glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, ret = glusterd_resolve_brick (newbrickinfo); if (ret) { - snprintf (op_errstr, len, "Host %s not a friend", - newbrickinfo->hostname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", op_errstr); + snprintf(op_errstr, len, "Host %s is not in \'Peer " + "in Cluster\' state", newbrickinfo->hostname); goto out; } - if (!uuid_compare (priv->uuid, newbrickinfo->uuid)) - goto brick_validation; - ret = glusterd_friend_find_by_uuid (newbrickinfo->uuid, &peerinfo); - if (ret) - goto out; - if ((!peerinfo->connected) || - (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)) { - snprintf(op_errstr, len, "Host %s not connected", - newbrickinfo->hostname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", op_errstr); - ret = -1; - goto out; - } -brick_validation: - ret = glusterd_brickinfo_get (newbrickinfo->uuid, - newbrickinfo->hostname, - newbrickinfo->path, &tmpbrkinfo); - if (!ret) { - snprintf(op_errstr, len, "Brick: %s already in use", - brick); - gf_log ("", GF_LOG_ERROR, "%s", op_errstr); - ret = -1; - goto out; + if (!uuid_compare (MY_UUID, newbrickinfo->uuid)) { + /* brick is local */ + if (!glusterd_is_brickpath_available (newbrickinfo->uuid, + newbrickinfo->path)) { + snprintf(op_errstr, len, "Brick: %s not available." + " Brick may be containing or be contained " + "by an existing brick", brick); + ret = -1; + goto out; + } + } else { - ret = 0; + ret = glusterd_friend_find_by_uuid (newbrickinfo->uuid, + &peerinfo); + if (ret) { + snprintf (op_errstr, len, "Failed to find host %s", + newbrickinfo->hostname); + goto out; + } + + if ((!peerinfo->connected)) { + snprintf(op_errstr, len, "Host %s not connected", + newbrickinfo->hostname); + ret = -1; + goto out; + } + + if (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) { + snprintf(op_errstr, len, "Host %s is not in \'Peer " + "in Cluster\' state", + newbrickinfo->hostname); + ret = -1; + goto out; + } } + + ret = 0; out: - if (is_allocated && newbrickinfo) + if (is_allocated) glusterd_brickinfo_delete (newbrickinfo); - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); + if (op_errstr[0] != '\0') + gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr); + gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret); return ret; } @@ -3060,8 +5570,8 @@ int glusterd_is_rb_started(glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_started:status=%d", volinfo->rb_status); - return (volinfo->rb_status == GF_RB_STATUS_STARTED); + "is_rb_started:status=%d", volinfo->rep_brick.rb_status); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_STARTED); } @@ -3069,9 +5579,9 @@ int glusterd_is_rb_paused ( glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_paused:status=%d", volinfo->rb_status); + "is_rb_paused:status=%d", volinfo->rep_brick.rb_status); - return (volinfo->rb_status == GF_RB_STATUS_PAUSED); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_PAUSED); } inline int @@ -3079,10 +5589,10 @@ glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status) { gf_log ("", GF_LOG_DEBUG, "setting status from %d to %d", - volinfo->rb_status, + volinfo->rep_brick.rb_status, status); - volinfo->rb_status = status; + volinfo->rep_brick.rb_status = status; return 0; } @@ -3090,142 +5600,192 @@ inline int glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src, glusterd_brickinfo_t *dst) { - if (!volinfo->src_brick || !volinfo->dst_brick) + glusterd_replace_brick_t *rb = NULL; + + GF_ASSERT (volinfo); + + rb = &volinfo->rep_brick; + + if (!rb->src_brick || !rb->dst_brick) return -1; - if (strcmp (volinfo->src_brick->hostname, src->hostname) || - strcmp (volinfo->src_brick->path, src->path)) { + if (strcmp (rb->src_brick->hostname, src->hostname) || + strcmp (rb->src_brick->path, src->path)) { gf_log("", GF_LOG_ERROR, "Replace brick src bricks differ"); return -1; } - if (strcmp (volinfo->dst_brick->hostname, dst->hostname) || - strcmp (volinfo->dst_brick->path, dst->path)) { + + if (strcmp (rb->dst_brick->hostname, dst->hostname) || + strcmp (rb->dst_brick->path, dst->path)) { gf_log ("", GF_LOG_ERROR, "Replace brick dst bricks differ"); return -1; } + return 0; } -int -glusterd_brick_create_path (char *host, char *path, uuid_t uuid, mode_t mode, - char **op_errstr) +/*path needs to be absolute; works only on gfid, volume-id*/ +static int +glusterd_is_uuid_present (char *path, char *xattr, gf_boolean_t *present) { - int ret = -1; - char msg[2048] = {0}; - struct stat st_buf = {0}; - uuid_t gfid = {0,}; - uuid_t old_uuid = {0,}; - char old_uuid_buf[64] = {0,}; - - ret = stat (path, &st_buf); - if ((!ret) && (!S_ISDIR (st_buf.st_mode))) { - snprintf (msg, sizeof (msg), "brick %s:%s, " - "path %s is not a directory", host, path, path); - gf_log ("", GF_LOG_ERROR, "%s", msg); - ret = -1; + GF_ASSERT (path); + GF_ASSERT (xattr); + GF_ASSERT (present); + + int ret = -1; + uuid_t uid = {0,}; + + if (!path || !xattr || !present) goto out; - } else if (!ret) { - goto check_xattr; - } else { - ret = mkdir (path, mode); - if (ret) { - snprintf (msg, sizeof (msg), "brick: %s:%s, path " - "creation failed, reason: %s", - host, path, strerror(errno)); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); + + ret = sys_lgetxattr (path, xattr, &uid, 16); + + if (ret >= 0) { + *present = _gf_true; + ret = 0; + goto out; + } + + switch (errno) { +#if defined(ENODATA) + case ENODATA: /* FALLTHROUGH */ +#endif +#if defined(ENOATTR) && (ENOATTR != ENODATA) + case ENOATTR: /* FALLTHROUGH */ +#endif + case ENOTSUP: + *present = _gf_false; + ret = 0; + break; + default: + break; + } +out: + return ret; +} + +/*path needs to be absolute*/ +static int +glusterd_is_path_in_use (char *path, gf_boolean_t *in_use, char **op_errstr) +{ + int i = 0; + int ret = -1; + gf_boolean_t used = _gf_false; + char dir[PATH_MAX] = {0,}; + char *curdir = NULL; + char msg[2048] = {0}; + char *keys[3] = {GFID_XATTR_KEY, + GF_XATTR_VOL_ID_KEY, + NULL}; + + GF_ASSERT (path); + if (!path) + goto out; + + strcpy (dir, path); + curdir = dir; + do { + for (i = 0; !used && keys[i]; i++) { + ret = glusterd_is_uuid_present (curdir, keys[i], &used); + if (ret) + goto out; + } + + if (used) + break; + + curdir = dirname (curdir); + if (!strcmp (curdir, ".")) goto out; + + + } while (strcmp (curdir, "/")); + + if (!strcmp (curdir, "/")) { + for (i = 0; !used && keys[i]; i++) { + ret = glusterd_is_uuid_present (curdir, keys[i], &used); + if (ret) + goto out; + } + } + + ret = 0; + *in_use = used; +out: + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get extended " + "attribute %s, reason: %s", keys[i], + strerror (errno)); + } + + if (*in_use) { + if (!strcmp (path, curdir)) { + snprintf (msg, sizeof (msg), "%s is already part of a " + "volume", path); } else { - goto check_xattr; + snprintf (msg, sizeof (msg), "parent directory %s is " + "already part of a volume", curdir); } } -/* To check if filesystem is read-only - and if it supports extended attributes */ -check_xattr: + if (strlen (msg)) { + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + } + + return ret; +} + +int +glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid, + char **op_errstr, gf_boolean_t is_force) +{ + int ret = -1; + char msg[2048] = {0,}; + gf_boolean_t in_use = _gf_false; + int flags = 0; + + /* Check for xattr support in backend fs */ ret = sys_lsetxattr (path, "trusted.glusterfs.test", "working", 8, 0); if (ret) { - snprintf (msg, sizeof (msg), "glusterfs is not" + snprintf (msg, sizeof (msg), "Glusterfs is not" " supported on brick: %s:%s.\nSetting" " extended attributes failed, reason:" " %s.", host, path, strerror(errno)); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); goto out; + } else { - /* Remove xattr *cannot* fail after setting it succeeded */ sys_lremovexattr (path, "trusted.glusterfs.test"); } - /* Now check if the export directory has some other 'gfid', - other than that of root '/' */ - ret = sys_lgetxattr (path, "trusted.gfid", gfid, 16); - if (ret == 16) { - if (__is_root_gfid (gfid) != 0) { - gf_log (THIS->name, GF_LOG_WARNING, - "%s: gfid (%s) is not that of glusterfs '/' ", - path, uuid_utoa (gfid)); - snprintf (msg, sizeof (msg), - "'%s:%s' gfid (%s) is not that of " - "glusterfs '/' ", host, path, uuid_utoa (gfid)); - ret = -1; - goto out; - } - } else if (ret != -1) { - /* Wrong 'gfid' is set, it should be error */ - ret = -1; - snprintf (msg, sizeof (msg), "'%s:%s' has wrong entry" - "for 'gfid'.", host, path); - goto out; - } else if ((ret == -1) && (errno != ENODATA)) { - /* Wrong 'gfid' is set, it should be error */ - snprintf (msg, sizeof (msg), "'%s:%s' has failed to fetch " - "'gfid' (%s)", host, path, strerror (errno)); + ret = glusterd_is_path_in_use (path, &in_use, op_errstr); + if (ret) goto out; - } - ret = 0; - if (!uuid) - goto out; - - /* This 'key' is set when the volume is started for the first time */ - ret = sys_lgetxattr (path, "trusted.glusterfs.volume-id", - old_uuid, 16); - if (ret == 16) { - if (uuid_compare (old_uuid, uuid)) { - uuid_utoa_r (old_uuid, old_uuid_buf); - gf_log (THIS->name, GF_LOG_WARNING, - "%s: mismatching volume-id (%s) recieved. " - "already is a part of volume %s ", - path, uuid_utoa (uuid), old_uuid_buf); - snprintf (msg, sizeof (msg), "'%s:%s' has been part of " - "a deleted volume with id %s. Please " - "re-create the brick directory.", - host, path, old_uuid_buf); - ret = -1; - goto out; - } - } else if (ret != -1) { - /* Wrong 'volume-id' is set, it should be error */ + if (in_use && !is_force) { ret = -1; - snprintf (msg, sizeof (msg), "'%s:%s' has wrong entry" - "for 'volume-id'.", host, path); - goto out; - } else if ((ret == -1) && (errno != ENODATA)) { - /* Wrong 'volume-id' is set, it should be error */ - snprintf (msg, sizeof (msg), "'%s:%s' : failed to fetch " - "'volume-id' (%s)", host, path, strerror (errno)); goto out; + } + + if (!is_force) + flags = XATTR_CREATE; + + ret = sys_lsetxattr (path, GF_XATTR_VOL_ID_KEY, uuid, 16, + flags); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to set extended " + "attributes %s, reason: %s", + GF_XATTR_VOL_ID_KEY, strerror (errno)); + goto out; } - /* if 'ret == -1' then 'volume-id' not set, seems to be a fresh - directory */ ret = 0; out: - if (msg[0] != '\0') + if (strlen (msg)) *op_errstr = gf_strdup (msg); - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); return ret; } @@ -3236,9 +5796,8 @@ glusterd_sm_tr_log_transition_add_to_dict (dict_t *dict, { int ret = -1; char key[512] = {0}; - char timestr[256] = {0,}; + char timestr[64] = {0,}; char *str = NULL; - struct tm tm = {0}; GF_ASSERT (dict); GF_ASSERT (log); @@ -3267,9 +5826,8 @@ glusterd_sm_tr_log_transition_add_to_dict (dict_t *dict, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "log%d-time", count); - localtime_r ((const time_t*)&log->transitions[i].time, &tm); - memset (timestr, 0, sizeof (timestr)); - strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", &tm); + gf_time_fmt (timestr, sizeof timestr, log->transitions[i].time, + gf_timefmt_FT); str = gf_strdup (timestr); ret = dict_set_dynstr (dict, key, str); if (ret) @@ -3357,8 +5915,7 @@ glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log) { if (!log) return; - if (log->transitions) - GF_FREE (log->transitions); + GF_FREE (log->transitions); return; } @@ -3370,6 +5927,10 @@ glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, glusterd_sm_transition_t *transitions = NULL; int ret = -1; int next = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (log); if (!log) @@ -3392,18 +5953,18 @@ glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, if (log->count < log->size) log->count++; ret = 0; - gf_log ("glusterd", GF_LOG_DEBUG, "Transitioning from '%s' to '%s' " + gf_log (this->name, GF_LOG_DEBUG, "Transitioning from '%s' to '%s' " "due to event '%s'", log->state_name_get (old_state), log->state_name_get (new_state), log->event_name_get (event)); out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); return ret; } int glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, - glusterd_friend_sm_state_t state, - uuid_t *uuid, const char *hostname) + glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port) { glusterd_peerinfo_t *new_peer = NULL; int ret = -1; @@ -3433,6 +5994,9 @@ glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, if (ret) goto out; + if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED) + new_peer->quorum_contrib = QUORUM_WAITING; + new_peer->port = port; *peerinfo = new_peer; out: if (ret && new_peer) @@ -3456,8 +6020,7 @@ glusterd_peer_destroy (glusterd_peerinfo_t *peerinfo) } list_del_init (&peerinfo->uuid_list); - if (peerinfo->hostname) - GF_FREE (peerinfo->hostname); + GF_FREE (peerinfo->hostname); glusterd_sm_tr_log_delete (&peerinfo->sm_log); GF_FREE (peerinfo); peerinfo = NULL; @@ -3473,7 +6036,7 @@ glusterd_remove_pending_entry (struct list_head *list, void *elem) { glusterd_pending_node_t *pending_node = NULL; glusterd_pending_node_t *tmp = NULL; - int ret = -1; + int ret = 0; list_for_each_entry_safe (pending_node, tmp, list, list) { if (elem == pending_node->node) { @@ -3484,7 +6047,7 @@ glusterd_remove_pending_entry (struct list_head *list, void *elem) } } out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "returning %d", ret); return ret; } @@ -3526,7 +6089,7 @@ glusterd_delete_volume (glusterd_volinfo_t *volinfo) ret = glusterd_volinfo_delete (volinfo); out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "returning %d", ret); return ret; } @@ -3535,17 +6098,15 @@ glusterd_delete_brick (glusterd_volinfo_t* volinfo, glusterd_brickinfo_t *brickinfo) { int ret = 0; + char voldir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); -#ifdef DEBUG - ret = glusterd_volume_brickinfo_get (brickinfo->uuid, - brickinfo->hostname, - brickinfo->path, volinfo, NULL); - GF_ASSERT (0 == ret); -#endif + GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv); + glusterd_delete_volfile (volinfo, brickinfo); - glusterd_store_delete_brick (volinfo, brickinfo); + glusterd_store_delete_brick (brickinfo, voldir); glusterd_brickinfo_delete (brickinfo); volinfo->brick_count--; return ret; @@ -3567,28 +6128,92 @@ glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo) } int -mkdir_if_missing (char *path) +glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, char **pathlist) { - struct stat st = {0,}; - int ret = 0; + char **path_tokens = NULL; + char *tmp_path_list = NULL; + char path[PATH_MAX] = ""; + int32_t count = 0; + int32_t pathlen = 0; + int32_t total_len = 0; + int32_t ret = 0; + int i = 0; + glusterd_brickinfo_t *brickinfo = NULL; + + if ((!volinfo) || (!pathlist)) + goto out; + + path_tokens = GF_CALLOC (sizeof(char*), volinfo->brick_count, + gf_gld_mt_charptr); + if (!path_tokens) { + gf_log ("", GF_LOG_DEBUG, "Could not allocate memory."); + ret = -1; + goto out; + } + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + pathlen = snprintf (path, sizeof(path), + "--path=%s ", brickinfo->path); + if (pathlen < sizeof(path)) + path[pathlen] = '\0'; + else + path[sizeof(path)-1] = '\0'; + path_tokens[count] = gf_strdup (path); + if (!path_tokens[count]) { + gf_log ("", GF_LOG_DEBUG, + "Could not allocate memory."); + ret = -1; + goto out; + } + count++; + total_len += pathlen; + } + + tmp_path_list = GF_CALLOC (sizeof(char), total_len + 1, + gf_gld_mt_char); + if (!tmp_path_list) { + gf_log ("", GF_LOG_DEBUG, "Could not allocate memory."); + ret = -1; + goto out; + } - ret = mkdir (path, 0777); - if (!ret || errno == EEXIST) - ret = stat (path, &st); - if (ret == -1 || !S_ISDIR (st.st_mode)) - gf_log ("", GF_LOG_WARNING, "Failed to create the" - " directory %s", path); + for (i = 0; i < count; i++) + strcat (tmp_path_list, path_tokens[i]); + if (count) + *pathlist = tmp_path_list; + + ret = count; +out: + for (i = 0; i < count; i++) { + GF_FREE (path_tokens[i]); + path_tokens[i] = NULL; + } + + GF_FREE (path_tokens); + path_tokens = NULL; + + if (ret == 0) { + gf_log ("", GF_LOG_DEBUG, "No Local Bricks Present."); + GF_FREE (tmp_path_list); + tmp_path_list = NULL; + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, - char *glusterd_uuid_str, char **op_errstr) + char *path_list, char *conf_path, + char *glusterd_uuid_str, + char **op_errstr) { int32_t ret = 0; int32_t status = 0; - char buf[PATH_MAX] = {0,}; char uuid_str [64] = {0}; runner_t runner = {0,}; xlator_t *this = NULL; @@ -3600,48 +6225,46 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, priv = this->private; GF_ASSERT (priv); - uuid_utoa_r (priv->uuid, uuid_str); - if (strcmp (uuid_str, glusterd_uuid_str)) - goto out; - - ret = gsync_status (master_vol->volname, slave, &status); - if (status == 0) - goto out; + uuid_utoa_r (MY_UUID, uuid_str); - snprintf (buf, PATH_MAX, "%s/"GEOREP"/%s", priv->workdir, master_vol->volname); - ret = mkdir_if_missing (buf); - if (ret) { - errcode = -1; + if (!path_list) { + ret = 0; + gf_log ("", GF_LOG_DEBUG, "No Bricks in this node." + " Not starting gsyncd."); goto out; } - snprintf (buf, PATH_MAX, DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/%s", - master_vol->volname); - ret = mkdir_if_missing (buf); - if (ret) { - errcode = -1; + ret = gsync_status (master_vol->volname, slave, conf_path, &status); + if (status == 0) goto out; - } uuid_utoa_r (master_vol->volume_id, uuid_str); runinit (&runner); - runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, priv->workdir); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + path_list, "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); runner_argprintf (&runner, ":%s", master_vol->volname); runner_add_args (&runner, slave, "--config-set", "session-owner", uuid_str, NULL); + synclock_unlock (&priv->big_lock); ret = runner_run (&runner); + synclock_lock (&priv->big_lock); if (ret == -1) { errcode = -1; goto out; } runinit (&runner); - runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--monitor", "-c", NULL); - runner_argprintf (&runner, "%s/"GSYNC_CONF, priv->workdir); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + path_list, "--monitor", "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); runner_argprintf (&runner, ":%s", master_vol->volname); + runner_argprintf (&runner, "--glusterd-uuid=%s", + uuid_utoa (priv->uuid)); runner_add_arg (&runner, slave); + synclock_unlock (&priv->big_lock); ret = runner_run (&runner); + synclock_lock (&priv->big_lock); if (ret == -1) { gf_asprintf (op_errstr, GEOREP" start failed for %s %s", master_vol->volname, slave); @@ -3653,7 +6276,7 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, out: if ((ret != 0) && errcode == -1) { if (op_errstr) - *op_errstr = gf_strdup ("internal error, cannot start" + *op_errstr = gf_strdup ("internal error, cannot start " "the " GEOREP " session"); } @@ -3662,17 +6285,38 @@ out: } int32_t -glusterd_recreate_bricks (glusterd_conf_t *conf) +glusterd_recreate_volfiles (glusterd_conf_t *conf) { glusterd_volinfo_t *volinfo = NULL; int ret = 0; + int op_ret = 0; GF_ASSERT (conf); list_for_each_entry (volinfo, &conf->volumes, vol_list) { ret = generate_brick_volfiles (volinfo); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate brick volfiles for %s", + volinfo->volname); + op_ret = ret; + } + ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate trusted client volfiles for %s", + volinfo->volname); + op_ret = ret; + } + ret = generate_client_volfiles (volinfo, GF_CLIENT_OTHER); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate client volfiles for %s", + volinfo->volname); + op_ret = ret; + } } - return ret; + return op_ret; } int32_t @@ -3682,7 +6326,8 @@ glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf) char *type = NULL; gf_boolean_t upgrade = _gf_false; gf_boolean_t downgrade = _gf_false; - gf_boolean_t regenerate_brick_volfiles = _gf_false; + gf_boolean_t regenerate_volfiles = _gf_false; + gf_boolean_t terminate = _gf_false; ret = dict_get_str (options, "upgrade", &type); if (!ret) { @@ -3694,7 +6339,7 @@ glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf) goto out; } if (_gf_true == upgrade) - regenerate_brick_volfiles = _gf_true; + regenerate_volfiles = _gf_true; } ret = dict_get_str (options, "downgrade", &type); @@ -3717,10 +6362,2533 @@ glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf) if (!upgrade && !downgrade) ret = 0; - if (regenerate_brick_volfiles) { - ret = glusterd_recreate_bricks (conf); + else + terminate = _gf_true; + if (regenerate_volfiles) { + ret = glusterd_recreate_volfiles (conf); + } +out: + if (terminate && (ret == 0)) + kill (getpid(), SIGTERM); + return ret; +} + +gf_boolean_t +glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo) +{ + gf_boolean_t replicates = _gf_false; + if (volinfo && ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || + (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE))) + replicates = _gf_true; + return replicates; +} + +int +glusterd_set_dump_options (char *dumpoptions_path, char *options, + int option_cnt) +{ + int ret = 0; + char *dup_options = NULL; + char *option = NULL; + char *tmpptr = NULL; + FILE *fp = NULL; + int nfs_cnt = 0; + + if (0 == option_cnt || + (option_cnt == 1 && (!strcmp (options, "nfs ")))) { + ret = 0; + goto out; + } + + fp = fopen (dumpoptions_path, "w"); + if (!fp) { + ret = -1; + goto out; + } + dup_options = gf_strdup (options); + gf_log ("", GF_LOG_INFO, "Received following statedump options: %s", + dup_options); + option = strtok_r (dup_options, " ", &tmpptr); + while (option) { + if (!strcmp (option, "nfs")) { + if (nfs_cnt > 0) { + unlink (dumpoptions_path); + ret = 0; + goto out; + } + nfs_cnt++; + option = strtok_r (NULL, " ", &tmpptr); + continue; + } + fprintf (fp, "%s=yes\n", option); + option = strtok_r (NULL, " ", &tmpptr); + } + +out: + if (fp) + fclose (fp); + GF_FREE (dup_options); + return ret; +} + +int +glusterd_brick_statedump (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *options, int option_cnt, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile_path[PATH_MAX] = {0,}; + char dumpoptions_path[PATH_MAX] = {0,}; + FILE *pidfile = NULL; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (uuid_is_null (brickinfo->uuid)) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Cannot resolve brick %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + } + + if (uuid_compare (brickinfo->uuid, MY_UUID)) { + ret = 0; + goto out; + } + + GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf); + + pidfile = fopen (pidfile_path, "r"); + if (!pidfile) { + gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", + pidfile_path); + ret = -1; + goto out; + } + + ret = fscanf (pidfile, "%d", &pid); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to get pid of brick process"); + ret = -1; + goto out; + } + + snprintf (dumpoptions_path, sizeof (dumpoptions_path), + DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid); + ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error while parsing the statedump " + "options"); + ret = -1; + goto out; + } + + gf_log ("", GF_LOG_INFO, "Performing statedump on brick with pid %d", + pid); + + kill (pid, SIGUSR1); + + sleep (1); + ret = 0; +out: + unlink (dumpoptions_path); + if (pidfile) + fclose (pidfile); + return ret; +} + +int +glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile_path[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + FILE *pidfile = NULL; + pid_t pid = -1; + char dumpoptions_path[PATH_MAX] = {0,}; + char *option = NULL; + char *tmpptr = NULL; + char *dup_options = NULL; + char msg[256] = {0,}; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + dup_options = gf_strdup (options); + option = strtok_r (dup_options, " ", &tmpptr); + if (strcmp (option, "nfs")) { + snprintf (msg, sizeof (msg), "for nfs statedump, options should" + " be after the key nfs"); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + GLUSTERD_GET_NFS_DIR (path, conf); + GLUSTERD_GET_NFS_PIDFILE (pidfile_path, path); + + pidfile = fopen (pidfile_path, "r"); + if (!pidfile) { + gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", + pidfile_path); + ret = -1; + goto out; + } + + ret = fscanf (pidfile, "%d", &pid); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to get pid of brick process"); + ret = -1; + goto out; + } + + snprintf (dumpoptions_path, sizeof (dumpoptions_path), + DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid); + ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error while parsing the statedump " + "options"); + ret = -1; + goto out; + } + + gf_log ("", GF_LOG_INFO, "Performing statedump on nfs server with " + "pid %d", pid); + + kill (pid, SIGUSR1); + + sleep (1); + + ret = 0; +out: + if (pidfile) + fclose (pidfile); + unlink (dumpoptions_path); + GF_FREE (dup_options); + return ret; +} + +/* Checks if the given peer contains all the bricks belonging to the + * given volume. Returns true if it does else returns false + */ +gf_boolean_t +glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo, + uuid_t friend_uuid) +{ + gf_boolean_t ret = _gf_true; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (volinfo); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (friend_uuid, brickinfo->uuid)) { + ret = _gf_false; + break; + } + } + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Remove all volumes which completely belong to given friend + */ +int +glusterd_friend_remove_cleanup_vols (uuid_t uuid) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + list_for_each_entry_safe (volinfo, tmp_volinfo, + &priv->volumes, vol_list) { + if (glusterd_friend_contains_vol_bricks (volinfo, uuid)) { + gf_log (THIS->name, GF_LOG_INFO, + "Deleting stale volume %s", volinfo->volname); + ret = glusterd_delete_volume (volinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Error deleting stale volume"); + goto out; + } + } + } + ret = 0; +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Check if the all peers are connected and befriended, except the peer + * specified (the peer being detached) + */ +gf_boolean_t +glusterd_chk_peers_connected_befriended (uuid_t skip_uuid) +{ + gf_boolean_t ret = _gf_true; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + + priv= THIS->private; + GF_ASSERT (priv); + + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + + if (!uuid_is_null (skip_uuid) && !uuid_compare (skip_uuid, + peerinfo->uuid)) + continue; + + if ((GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) + || !(peerinfo->connected)) { + ret = _gf_false; + break; + } + } + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %s", + (ret?"TRUE":"FALSE")); + return ret; +} + +void +glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo, + gf_transport_type type) +{ + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + + if ((volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) && + (type == GF_TRANSPORT_RDMA)) + snprintf (filepath, PATH_MAX, "%s/%s.rdma-fuse.vol", + path, volinfo->volname); + else + snprintf (filepath, PATH_MAX, "%s/%s-fuse.vol", + path, volinfo->volname); +} + +void +glusterd_get_trusted_client_filepath (char *filepath, + glusterd_volinfo_t *volinfo, + gf_transport_type type) +{ + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + + if ((volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) && + (type == GF_TRANSPORT_RDMA)) + snprintf (filepath, PATH_MAX, + "%s/trusted-%s.rdma-fuse.vol", + path, volinfo->volname); + else + snprintf (filepath, PATH_MAX, + "%s/trusted-%s-fuse.vol", + path, volinfo->volname); +} + +int +glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk) +{ + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX]; + int ret = -1; + pid_t pid; + + priv = THIS->private; + if (!priv) + return ret; + + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + + if (!glusterd_is_service_running (pidfile, &pid)) { + glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd, + cbk, volinfo->rebal.op); + } else { + glusterd_rebalance_rpc_create (volinfo, priv, cmd); + } + + return ret; +} + +int +glusterd_restart_rebalance (glusterd_conf_t *conf) +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + char op_errstr[256]; + + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (!volinfo->rebal.defrag_cmd) + continue; + glusterd_volume_defrag_restart (volinfo, op_errstr, 256, + volinfo->rebal.defrag_cmd, NULL); + } + return ret; +} + +void +glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo) +{ + glusterd_rebalance_t *rebal = NULL; + GF_ASSERT (volinfo); + + rebal = &volinfo->rebal; + rebal->rebalance_files = 0; + rebal->rebalance_data = 0; + rebal->lookedup_files = 0; + rebal->rebalance_failures = 0; + rebal->rebalance_time = 0; + rebal->skipped_files = 0; + +} + +/* Return hostname for given uuid if it exists + * else return NULL + */ +char * +glusterd_uuid_to_hostname (uuid_t uuid) +{ + char *hostname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_peerinfo_t *entry = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + if (!uuid_compare (MY_UUID, uuid)) { + hostname = gf_strdup ("localhost"); + } + if (!list_empty (&priv->peers)) { + list_for_each_entry (entry, &priv->peers, uuid_list) { + if (!uuid_compare (entry->uuid, uuid)) { + hostname = gf_strdup (entry->hostname); + break; + } + } + } + + return hostname; +} + +gf_boolean_t +glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + gf_boolean_t local = _gf_false; + int ret = 0; + glusterd_conf_t *conf = NULL; + + if (uuid_is_null (brickinfo->uuid)) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) + goto out; + } + conf = this->private; + local = !uuid_compare (brickinfo->uuid, MY_UUID); +out: + return local; +} +int +glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char *volid_str = NULL; + uuid_t vol_uid = {0, }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_str (op_dict, "vol-id", &volid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume id for " + "volume %s", volinfo->volname); + goto out; + } + ret = uuid_parse (volid_str, vol_uid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to parse volume id " + "for volume %s", volinfo->volname); + goto out; + } + + if (uuid_compare (vol_uid, volinfo->volume_id)) { + gf_log (this->name, GF_LOG_ERROR, "Volume ids of volume %s - %s" + " and %s - are different. Possibly a split brain among " + "peers.", volinfo->volname, volid_str, + uuid_utoa (volinfo->volume_id)); + ret = -1; + goto out; + } + +out: + return ret; +} + +int +glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, + dict_t *rsp_dict) +{ + int ret = 0; + uint64_t files = 0; + uint64_t size = 0; + uint64_t lookup = 0; + gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED; + uint64_t failures = 0; + uint64_t skipped = 0; + xlator_t *this = NULL; + double run_time = 0; + + this = THIS; + + ret = dict_get_uint64 (rsp_dict, "files", &files); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get file count"); + + ret = dict_get_uint64 (rsp_dict, "size", &size); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get size of xfer"); + + ret = dict_get_uint64 (rsp_dict, "lookups", &lookup); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get lookedup file count"); + + ret = dict_get_int32 (rsp_dict, "status", (int32_t *)&status); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get status"); + + ret = dict_get_uint64 (rsp_dict, "failures", &failures); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get failure count"); + + ret = dict_get_uint64 (rsp_dict, "skipped", &skipped); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get skipped count"); + + ret = dict_get_double (rsp_dict, "run-time", &run_time); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get run-time"); + + if (files) + volinfo->rebal.rebalance_files = files; + if (size) + volinfo->rebal.rebalance_data = size; + if (lookup) + volinfo->rebal.lookedup_files = lookup; + if (status) + volinfo->rebal.defrag_status = status; + if (failures) + volinfo->rebal.rebalance_failures = failures; + if (skipped) + volinfo->rebal.skipped_files = skipped; + if (run_time) + volinfo->rebal.rebalance_time = run_time; + + return ret; +} + +int +glusterd_check_topology_identical (const char *filename1, + const char *filename2, + gf_boolean_t *identical) +{ + int ret = -1; /* FAILURE */ + xlator_t *this = NULL; + FILE *fp1 = NULL; + FILE *fp2 = NULL; + glusterfs_graph_t *grph1 = NULL; + glusterfs_graph_t *grph2 = NULL; + + if ((!filename1) || (!filename2) || (!identical)) + goto out; + + this = THIS; + + errno = 0; /* RESET the errno */ + + /* fopen() the volfile1 to create the graph */ + fp1 = fopen (filename1, "r"); + if (fp1 == NULL) { + gf_log (this->name, GF_LOG_ERROR, "fopen() on file: %s failed " + "(%s)", filename1, strerror (errno)); + goto out; + } + + /* fopen() the volfile2 to create the graph */ + fp2 = fopen (filename2, "r"); + if (fp2 == NULL) { + gf_log (this->name, GF_LOG_ERROR, "fopen() on file: %s failed " + "(%s)", filename2, strerror (errno)); + goto out; } + + /* create the graph for filename1 */ + grph1 = glusterfs_graph_construct(fp1); + if (grph1 == NULL) + goto out; + + /* create the graph for filename2 */ + grph2 = glusterfs_graph_construct(fp2); + if (grph2 == NULL) + goto out; + + /* compare the graph topology */ + *identical = is_graph_topology_equal(grph1, grph2); + ret = 0; /* SUCCESS */ out: + if (fp1) + fclose(fp1); + if (fp2) + fclose(fp2); + if (grph1) + glusterfs_graph_destroy(grph1); + if (grph2) + glusterfs_graph_destroy(grph2); + + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int +glusterd_check_files_identical (char *filename1, char *filename2, + gf_boolean_t *identical) +{ + int ret = -1; + struct stat buf1 = {0,}; + struct stat buf2 = {0,}; + uint32_t cksum1 = 0; + uint32_t cksum2 = 0; + xlator_t *this = NULL; + + GF_ASSERT (filename1); + GF_ASSERT (filename2); + GF_ASSERT (identical); + + this = THIS; + + ret = stat (filename1, &buf1); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "stat on file: %s failed " + "(%s)", filename1, strerror (errno)); + goto out; + } + + ret = stat (filename2, &buf2); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "stat on file: %s failed " + "(%s)", filename2, strerror (errno)); + goto out; + } + + if (buf1.st_size != buf2.st_size) { + *identical = _gf_false; + goto out; + } + + ret = get_checksum_for_path (filename1, &cksum1); + if (ret) + goto out; + + + ret = get_checksum_for_path (filename2, &cksum2); + if (ret) + goto out; + + if (cksum1 != cksum2) + *identical = _gf_false; + else + *identical = _gf_true; + +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int +glusterd_volset_help (dict_t *dict, char **op_errstr) +{ + int ret = -1; + gf_boolean_t xml_out = _gf_false; + xlator_t *this = NULL; + + this = THIS; + + if (!dict) { + if (!(dict = glusterd_op_get_ctx ())) { + ret = 0; + goto out; + } + } + + if (dict_get (dict, "help" )) { + xml_out = _gf_false; + + } else if (dict_get (dict, "help-xml" )) { + xml_out = _gf_true; +#if (HAVE_LIB_XML) + ret = 0; +#else + gf_log (this->name, GF_LOG_ERROR, + "libxml not present in the system"); + if (op_errstr) + *op_errstr = gf_strdup ("Error: xml libraries not " + "present to produce " + "xml-output"); + goto out; +#endif + + } else { + goto out; + } + + ret = glusterd_get_volopt_content (dict, xml_out); + if (ret && op_errstr) + *op_errstr = gf_strdup ("Failed to get volume options help"); + out: + + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload, + int payloadcount, struct iobref *iobref, xdrproc_t xdrproc, + dict_t *dict) +{ + int ret = -1; + char *cmd = NULL; + int op_ret = 0; + char *op_errstr = NULL; + int op_errno = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + op_ret = arg->op_ret; + op_errstr = arg->op_errstr; + op_errno = arg->op_errno; + + ret = dict_get_str (dict, "cmd-str", &cmd); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to get command " + "string"); + + if (cmd) { + if (op_ret) + gf_cmd_log ("", "%s : FAILED %s %s", cmd, + (op_errstr)? ":" : " ", + (op_errstr)? op_errstr : " "); + else + gf_cmd_log ("", "%s : SUCCESS", cmd); + } + + glusterd_submit_reply (req, arg, payload, payloadcount, iobref, + (xdrproc_t) xdrproc); + if (dict) + dict_unref (dict); + + return ret; +} + +static int32_t +glusterd_append_gsync_status (dict_t *dst, dict_t *src) +{ + int ret = 0; + char *stop_msg = NULL; + + ret = dict_get_str (src, "gsync-status", &stop_msg); + if (ret) { + ret = 0; + goto out; + } + + ret = dict_set_dynstr (dst, "gsync-status", gf_strdup (stop_msg)); + if (ret) { + gf_log ("glusterd", GF_LOG_WARNING, "Unable to set the stop" + "message in the ctx dictionary"); + goto out; + } + + ret = 0; + out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); return ret; + } +static int32_t +glusterd_append_status_dicts (dict_t *dst, dict_t *src) +{ + int dst_count = 0; + int src_count = 0; + int i = 0; + int ret = 0; + char mst[PATH_MAX] = {0,}; + char slv[PATH_MAX] = {0, }; + char sts[PATH_MAX] = {0, }; + char nds[PATH_MAX] = {0, }; + char *mst_val = NULL; + char *slv_val = NULL; + char *sts_val = NULL; + char *nds_val = NULL; + + GF_ASSERT (dst); + + if (src == NULL) + goto out; + + ret = dict_get_int32 (dst, "gsync-count", &dst_count); + if (ret) + dst_count = 0; + + ret = dict_get_int32 (src, "gsync-count", &src_count); + if (ret || !src_count) { + gf_log ("", GF_LOG_DEBUG, "Source brick empty"); + ret = 0; + goto out; + } + + for (i = 1; i <= src_count; i++) { + snprintf (nds, sizeof(nds), "node%d", i); + snprintf (mst, sizeof(mst), "master%d", i); + snprintf (slv, sizeof(slv), "slave%d", i); + snprintf (sts, sizeof(sts), "status%d", i); + + ret = dict_get_str (src, nds, &nds_val); + if (ret) + goto out; + + ret = dict_get_str (src, mst, &mst_val); + if (ret) + goto out; + + ret = dict_get_str (src, slv, &slv_val); + if (ret) + goto out; + + ret = dict_get_str (src, sts, &sts_val); + if (ret) + goto out; + + snprintf (nds, sizeof(nds), "node%d", i+dst_count); + snprintf (mst, sizeof(mst), "master%d", i+dst_count); + snprintf (slv, sizeof(slv), "slave%d", i+dst_count); + snprintf (sts, sizeof(sts), "status%d", i+dst_count); + + ret = dict_set_dynstr (dst, nds, gf_strdup (nds_val)); + if (ret) + goto out; + + ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val)); + if (ret) + goto out; + + ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val)); + if (ret) + goto out; + + ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val)); + if (ret) + goto out; + + } + + ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count); + + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +int32_t +glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr) +{ + dict_t *ctx = NULL; + int ret = 0; + char *conf_path = NULL; + + if (aggr) { + ctx = aggr; + + } else { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log ("", GF_LOG_ERROR, + "Operation Context is not present"); + GF_ASSERT (0); + } + } + + if (rsp_dict) { + ret = glusterd_append_status_dicts (ctx, rsp_dict); + if (ret) + goto out; + + ret = glusterd_append_gsync_status (ctx, rsp_dict); + if (ret) + goto out; + + ret = dict_get_str (rsp_dict, "conf_path", &conf_path); + if (!ret && conf_path) { + ret = dict_set_dynstr (ctx, "conf_path", + gf_strdup(conf_path)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store conf path."); + goto out; + } + } + } + if ((op_errstr) && (strcmp ("", op_errstr))) { + ret = dict_set_dynstr (ctx, "errstr", gf_strdup(op_errstr)); + if (ret) + goto out; + } + + ret = 0; + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret); + return ret; +} + +int32_t +glusterd_rb_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int32_t src_port = 0; + int32_t dst_port = 0; + int ret = 0; + dict_t *ctx = NULL; + + + if (aggr) { + ctx = aggr; + + } else { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log ("", GF_LOG_ERROR, + "Operation Context is not present"); + GF_ASSERT (0); + } + } + + if (rsp_dict) { + ret = dict_get_int32 (rsp_dict, "src-brick-port", &src_port); + if (ret == 0) { + gf_log ("", GF_LOG_DEBUG, + "src-brick-port=%d found", src_port); + } + + ret = dict_get_int32 (rsp_dict, "dst-brick-port", &dst_port); + if (ret == 0) { + gf_log ("", GF_LOG_DEBUG, + "dst-brick-port=%d found", dst_port); + } + + } + + if (src_port) { + ret = dict_set_int32 (ctx, "src-brick-port", + src_port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set src-brick"); + goto out; + } + } + + if (dst_port) { + ret = dict_set_int32 (ctx, "dst-brick-port", + dst_port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set dst-brick"); + goto out; + } + + } + +out: + return ret; + +} + +int32_t +glusterd_sync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + + GF_ASSERT (rsp_dict); + + if (!rsp_dict) { + goto out; + } + + ret = glusterd_import_friend_volumes (rsp_dict); +out: + return ret; + +} + +static int +_profile_volume_add_friend_rsp (dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[256] = {0}; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + int brick_count = 0; + char brick_key[256]; + + if (strcmp (key, "count") == 0) + return 0; + sscanf (key, "%d%s", &brick_count, brick_key); + rsp_ctx = data; + new_value = data_copy (value); + GF_ASSERT (new_value); + snprintf (new_key, sizeof (new_key), "%d%s", + rsp_ctx->count + brick_count, brick_key); + dict_set (rsp_ctx->dict, new_key, new_value); + return 0; +} + +int +glusterd_profile_volume_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t brick_count = 0; + int32_t count = 0; + dict_t *ctx_dict = NULL; + glusterd_op_t op = GD_OP_NONE; + + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (rsp_dict, "count", &brick_count); + if (ret) { + ret = 0; //no bricks in the rsp + goto out; + } + + op = glusterd_op_get_op (); + GF_ASSERT (GD_OP_PROFILE_VOLUME == op); + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx (); + } + + ret = dict_get_int32 (ctx_dict, "count", &count); + rsp_ctx.count = count; + rsp_ctx.dict = ctx_dict; + dict_foreach (rsp_dict, _profile_volume_add_friend_rsp, &rsp_ctx); + dict_del (ctx_dict, "count"); + ret = dict_set_int32 (ctx_dict, "count", count + brick_count); +out: + return ret; +} + +static int +glusterd_volume_status_add_peer_rsp (dict_t *this, char *key, data_t *value, + void *data) +{ + glusterd_status_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + char brick_key[1024] = {0,}; + char new_key[1024] = {0,}; + int32_t index = 0; + int32_t ret = 0; + + /* Skip the following keys, they are already present in the ctx_dict */ + if (!strcmp (key, "count") || !strcmp (key, "cmd") || + !strcmp (key, "brick-index-max") || !strcmp (key, "other-count")) + return 0; + + rsp_ctx = data; + new_value = data_copy (value); + GF_ASSERT (new_value); + + sscanf (key, "brick%d.%s", &index, brick_key); + + if (index > rsp_ctx->brick_index_max) { + snprintf (new_key, sizeof (new_key), "brick%d.%s", + index + rsp_ctx->other_count, brick_key); + } else { + strncpy (new_key, key, sizeof (new_key)); + new_key[sizeof (new_key) - 1] = 0; + } + + ret = dict_set (rsp_ctx->dict, new_key, new_value); + if (ret) + gf_log ("", GF_LOG_ERROR, "Unable to set key: %s in dict", + key); + + return 0; +} + +int +glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + glusterd_status_rsp_conv_t rsp_ctx = {0}; + int32_t cmd = GF_CLI_STATUS_NONE; + int32_t node_count = 0; + int32_t other_count = 0; + int32_t brick_index_max = -1; + int32_t rsp_node_count = 0; + int32_t rsp_other_count = 0; + int vol_count = -1; + int i = 0; + dict_t *ctx_dict = NULL; + char key[PATH_MAX] = {0,}; + char *volname = NULL; + + GF_ASSERT (rsp_dict); + + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx (GD_OP_STATUS_VOLUME); + + } + + ret = dict_get_int32 (ctx_dict, "cmd", &cmd); + if (ret) + goto out; + + if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd (ctx_dict)) { + ret = dict_get_int32 (rsp_dict, "vol_count", &vol_count); + if (ret == 0) { + ret = dict_set_int32 (ctx_dict, "vol_count", + vol_count); + if (ret) + goto out; + + for (i = 0; i < vol_count; i++) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "vol%d", i); + ret = dict_get_str (rsp_dict, key, &volname); + if (ret) + goto out; + + ret = dict_set_str (ctx_dict, key, volname); + if (ret) + goto out; + } + } + } + + if ((cmd & GF_CLI_STATUS_TASKS) != 0) { + dict_copy (rsp_dict, aggr); + ret = 0; + goto out; + } + + ret = dict_get_int32 (rsp_dict, "count", &rsp_node_count); + if (ret) { + ret = 0; //no bricks in the rsp + goto out; + } + + ret = dict_get_int32 (rsp_dict, "other-count", &rsp_other_count); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to get other count from rsp_dict"); + goto out; + } + + ret = dict_get_int32 (ctx_dict, "count", &node_count); + ret = dict_get_int32 (ctx_dict, "other-count", &other_count); + if (!dict_get (ctx_dict, "brick-index-max")) { + ret = dict_get_int32 (rsp_dict, "brick-index-max", &brick_index_max); + if (ret) + goto out; + ret = dict_set_int32 (ctx_dict, "brick-index-max", brick_index_max); + if (ret) + goto out; + + } else { + ret = dict_get_int32 (ctx_dict, "brick-index-max", &brick_index_max); + } + + rsp_ctx.count = node_count; + rsp_ctx.brick_index_max = brick_index_max; + rsp_ctx.other_count = other_count; + rsp_ctx.dict = ctx_dict; + + dict_foreach (rsp_dict, glusterd_volume_status_add_peer_rsp, &rsp_ctx); + + ret = dict_set_int32 (ctx_dict, "count", node_count + rsp_node_count); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to update node count"); + goto out; + } + + ret = dict_set_int32 (ctx_dict, "other-count", + (other_count + rsp_other_count)); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to update other-count"); +out: + return ret; +} + +int +glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + char key[256] = {0,}; + char *node_uuid = NULL; + char *node_uuid_str = NULL; + char *volname = NULL; + dict_t *ctx_dict = NULL; + double elapsed_time = 0; + glusterd_conf_t *conf = NULL; + glusterd_op_t op = GD_OP_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int32_t index = 0; + int32_t count = 0; + int32_t current_index = 2; + int32_t value32 = 0; + uint64_t value = 0; + char *peer_uuid_str = NULL; + + GF_ASSERT (rsp_dict); + conf = THIS->private; + + op = glusterd_op_get_op (); + GF_ASSERT ((GD_OP_REBALANCE == op) || + (GD_OP_DEFRAG_BRICK_VOLUME == op)); + + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx (op); + + } + + if (!ctx_dict) + goto out; + + ret = dict_get_str (ctx_dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + ret = dict_get_int32 (rsp_dict, "count", &index); + if (ret) + gf_log ("", GF_LOG_ERROR, "failed to get index"); + + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", index); + ret = dict_get_str (rsp_dict, key, &node_uuid); + if (!ret) { + node_uuid_str = gf_strdup (node_uuid); + + /* Finding the index of the node-uuid in the peer-list */ + list_for_each_entry (peerinfo, &conf->peers, uuid_list) { + peer_uuid_str = gd_peer_uuid_str (peerinfo); + if (strcmp (peer_uuid_str, node_uuid_str) == 0) + break; + + current_index++; + } + + /* Setting the largest index value as the total count. */ + ret = dict_get_int32 (ctx_dict, "count", &count); + if (count < current_index) { + ret = dict_set_int32 (ctx_dict, "count", current_index); + if (ret) + gf_log ("", GF_LOG_ERROR, "Failed to set count"); + } + + /* Setting the same index for the node, as is in the peerlist.*/ + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", current_index); + ret = dict_set_dynstr (ctx_dict, key, node_uuid_str); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set node-uuid"); + } + } + + snprintf (key, 256, "files-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "files-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set the file count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "size-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "size-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set the size of migration"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set lookuped file count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "status-%d", index); + ret = dict_get_int32 (rsp_dict, key, &value32); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "status-%d", current_index); + ret = dict_set_int32 (ctx_dict, key, value32); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set status"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set failure count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set skipped count"); + } + } + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", index); + ret = dict_get_double (rsp_dict, key, &elapsed_time); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", current_index); + ret = dict_set_double (ctx_dict, key, elapsed_time); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set run-time"); + } + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char buf[PATH_MAX] = ""; + char *volname = NULL; + int ret = -1; + int config_command = 0; + uint64_t i = 0; + uint64_t value = 0; + uint64_t voldisplaycount = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "config-command", &config_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + switch (config_command) { + case GF_SNAP_CONFIG_DISPLAY: + ret = dict_get_uint64 (src, "snap-max-hard-limit", &value); + if (!ret) { + ret = dict_set_uint64 (dst, "snap-max-hard-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_hard_limit"); + goto out; + } + } else { + /* Received dummy response from other nodes */ + ret = 0; + goto out; + } + + ret = dict_get_uint64 (src, "snap-max-soft-limit", &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get snap_max_soft_limit"); + goto out; + } + + ret = dict_set_uint64 (dst, "snap-max-soft-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_soft_limit"); + goto out; + } + + ret = dict_get_uint64 (src, "voldisplaycount", + &voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get voldisplaycount"); + goto out; + } + + ret = dict_set_uint64 (dst, "voldisplaycount", + voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set voldisplaycount"); + goto out; + } + + for (i = 0; i < voldisplaycount; i++) { + snprintf (buf, sizeof(buf), "volume%ld-volname", i); + ret = dict_get_str (src, buf, &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_str (dst, buf, volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + } + + break; + default: + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Aggregate missed_snap_counts from different nodes and save it * + * in the req_dict of the originator node */ +int +glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *buf = NULL; + char *tmp_str = NULL; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t src_missed_snap_count = -1; + int32_t dst_missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (src, "missed_snap_count", + &src_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = dict_get_int32 (dst, "missed_snap_count", + &dst_missed_snap_count); + if (ret) { + /* Initialize dst_missed_count for the first time */ + dst_missed_snap_count = 0; + } + + for (i = 0; i < src_missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (src, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + dst_missed_snap_count); + + tmp_str = gf_strdup (buf); + if (!tmp_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dst, name_buf, tmp_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set %s", name_buf); + goto out; + } + + tmp_str = NULL; + dst_missed_snap_count++; + } + + ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set dst_missed_snap_count"); + goto out; + } + +out: + if (ret && tmp_str) + GF_FREE(tmp_str); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snap_create_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snap_config_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + default: + // copy the response dictinary's contents to the dict to be + // sent back to the cli + dict_copy (src, dst); + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src) +{ + char output_name[PATH_MAX] = ""; + char *output = NULL; + int ret = 0; + int i = 0; + int len = 0; + int src_output_count = 0; + int dst_output_count = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "output_count", &dst_output_count); + + ret = dict_get_int32 (src, "output_count", &src_output_count); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "No output from source"); + ret = 0; + goto out; + } + + for (i = 1; i <= src_output_count; i++) { + len = snprintf (output_name, sizeof(output_name) - 1, + "output_%d", i); + output_name[len] = '\0'; + ret = dict_get_str (src, output_name, &output); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch %s", + output_name); + goto out; + } + + len = snprintf (output_name, sizeof(output_name) - 1, + "output_%d", i+dst_output_count); + output_name[len] = '\0'; + ret = dict_set_dynstr (dst, output_name, gf_strdup (output)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set %s", + output_name); + goto out; + } + } + + ret = dict_set_int32 (dst, "output_count", + dst_output_count+src_output_count); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + glusterd_op_t op = GD_OP_NONE; + + op = glusterd_op_get_op (); + GF_ASSERT (aggr); + GF_ASSERT (rsp_dict); + + if (!aggr) + goto out; + dict_copy (rsp_dict, aggr); +out: + return ret; +} + +int +glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + dict_t *ctx_dict = NULL; + glusterd_op_t op = GD_OP_NONE; + + GF_ASSERT (rsp_dict); + + op = glusterd_op_get_op (); + GF_ASSERT (GD_OP_HEAL_VOLUME == op); + + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx (op); + } + + if (!ctx_dict) + goto out; + dict_copy (rsp_dict, ctx_dict); +out: + return ret; +} + +int +_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[256] = {0}; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + + rsp_ctx = data; + new_value = data_copy (value); + GF_ASSERT (new_value); + snprintf (new_key, sizeof (new_key), "%d-%s", rsp_ctx->count, key); + dict_set (rsp_ctx->dict, new_key, new_value); + return 0; +} + +int +glusterd_profile_volume_brick_rsp (void *pending_entry, + dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr, gd_node_type type) +{ + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t count = 0; + char brick[PATH_MAX+1024] = {0}; + char key[256] = {0}; + char *full_brick = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (rsp_dict); + GF_ASSERT (op_ctx); + GF_ASSERT (op_errstr); + GF_ASSERT (pending_entry); + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_int32 (op_ctx, "count", &count); + if (ret) { + count = 1; + } else { + count++; + } + snprintf (key, sizeof (key), "%d-brick", count); + if (type == GD_NODE_BRICK) { + brickinfo = pending_entry; + snprintf (brick, sizeof (brick), "%s:%s", brickinfo->hostname, + brickinfo->path); + } else if (type == GD_NODE_NFS) { + snprintf (brick, sizeof (brick), "%s", uuid_utoa (MY_UUID)); + } + full_brick = gf_strdup (brick); + GF_ASSERT (full_brick); + ret = dict_set_dynstr (op_ctx, key, full_brick); + + rsp_ctx.count = count; + rsp_ctx.dict = op_ctx; + dict_foreach (rsp_dict, _profile_volume_add_brick_rsp, &rsp_ctx); + dict_del (op_ctx, "count"); + ret = dict_set_int32 (op_ctx, "count", count); + return ret; +} + +//input-key: <replica-id>:<child-id>-* +//output-key: <brick-id>-* +int +_heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data) +{ + char new_key[256] = {0,}; + char int_str[16] = {0}; + data_t *new_value = NULL; + char *rxl_end = NULL; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + rxl_end = strchr (key, '-'); + if (!rxl_end) + goto out; + + int_len = strlen (key) - strlen (rxl_end); + strncpy (int_str, key, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_id); + if (ret) + goto out; + + rxl_child_end = strchr (rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; + strncpy (int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + if (!strcmp (rxl_child_end, "-status")) { + brickinfo = glusterd_get_brickinfo_by_position (volinfo, + brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, + brickinfo)) + goto out; + } + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end); + dict_set (rsp_ctx->dict, new_key, new_value); + +out: + return 0; +} + +int +_heal_volume_add_shd_rsp_of_statistics (dict_t *this, char *key, data_t + *value, void *data) +{ + char new_key[256] = {0,}; + char int_str[16] = {0,}; + char key_begin_string[128] = {0,}; + data_t *new_value = NULL; + char *rxl_end = NULL; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *key_begin_str = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + key_begin_str = strchr (key, '-'); + if (!key_begin_str) + goto out; + + int_len = strlen (key) - strlen (key_begin_str); + strncpy (key_begin_string, key, int_len); + key_begin_string[int_len] = '\0'; + + rxl_end = strchr (key_begin_str + 1, '-'); + if (!rxl_end) + goto out; + + int_len = strlen (key_begin_str) - strlen (rxl_end) - 1; + strncpy (int_str, key_begin_str + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_id); + if (ret) + goto out; + + + rxl_child_end = strchr (rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; + strncpy (int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + brickinfo = glusterd_get_brickinfo_by_position (volinfo, brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, brickinfo)) + goto out; + + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "%s-%d%s", key_begin_string, + brick_id, rxl_child_end); + dict_set (rsp_ctx->dict, new_key, new_value); + +out: + return 0; + +} + +int +glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx, char **op_errstr) +{ + int ret = 0; + glusterd_heal_rsp_conv_t rsp_ctx = {0}; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + int heal_op = -1; + + GF_ASSERT (rsp_dict); + GF_ASSERT (op_ctx); + GF_ASSERT (op_errstr); + + ret = dict_get_str (req_dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = dict_get_int32 (req_dict, "heal-op", &heal_op); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get heal_op"); + goto out; + } + + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + rsp_ctx.dict = op_ctx; + rsp_ctx.volinfo = volinfo; + rsp_ctx.this = THIS; + if (heal_op == GF_AFR_OP_STATISTICS) + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp_of_statistics, + &rsp_ctx); + else + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + + +out: + return ret; +} + +int +_status_volume_add_brick_rsp (dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[256] = {0,}; + data_t *new_value = 0; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + + rsp_ctx = data; + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "brick%d.%s", rsp_ctx->count, key); + dict_set (rsp_ctx->dict, new_key, new_value); + + return 0; +} + +int +glusterd_status_volume_brick_rsp (dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr) +{ + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t count = 0; + int index = 0; + + GF_ASSERT (rsp_dict); + GF_ASSERT (op_ctx); + GF_ASSERT (op_errstr); + + ret = dict_get_int32 (op_ctx, "count", &count); + if (ret) { + count = 0; + } else { + count++; + } + ret = dict_get_int32 (rsp_dict, "index", &index); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get node index"); + goto out; + } + dict_del (rsp_dict, "index"); + + rsp_ctx.count = index; + rsp_ctx.dict = op_ctx; + dict_foreach (rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx); + ret = dict_set_int32 (op_ctx, "count", count); + +out: + return ret; +} + +int +glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx) +{ + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char key[256] = {0,}; + int32_t i = 0; + char buf[1024] = {0,}; + char *node_str = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (req_dict); + + ret = dict_get_str (req_dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + if (rsp_dict) { + ret = glusterd_defrag_volume_status_update (volinfo, + rsp_dict); + } + + if (!op_ctx) { + dict_copy (rsp_dict, op_ctx); + goto out; + } + + ret = dict_get_int32 (op_ctx, "count", &i); + i++; + + ret = dict_set_int32 (op_ctx, "count", i); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Failed to set count"); + + snprintf (buf, 1024, "%s", uuid_utoa (MY_UUID)); + node_str = gf_strdup (buf); + + snprintf (key, 256, "node-uuid-%d",i); + ret = dict_set_dynstr (op_ctx, key, node_str); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set node-uuid"); + + memset (key, 0 , 256); + snprintf (key, 256, "files-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_files); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set file count"); + + memset (key, 0 , 256); + snprintf (key, 256, "size-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_data); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set size of xfer"); + + memset (key, 0 , 256); + snprintf (key, 256, "lookups-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.lookedup_files); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set lookedup file count"); + + memset (key, 0 , 256); + snprintf (key, 256, "status-%d", i); + ret = dict_set_int32 (op_ctx, key, volinfo->rebal.defrag_status); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set status"); + + memset (key, 0 , 256); + snprintf (key, 256, "failures-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_failures); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set failure count"); + + memset (key, 0 , 256); + snprintf (key, 256, "skipped-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.skipped_files); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set skipped count"); + + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", i); + ret = dict_set_double (op_ctx, key, volinfo->rebal.rebalance_time); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set run-time"); + +out: + return ret; +} +int32_t +glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry, + glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr, gd_node_type type) +{ + int ret = 0; + + GF_ASSERT (op_errstr); + + switch (op) { + case GD_OP_PROFILE_VOLUME: + ret = glusterd_profile_volume_brick_rsp (pending_entry, + rsp_dict, op_ctx, + op_errstr, type); + break; + case GD_OP_STATUS_VOLUME: + ret = glusterd_status_volume_brick_rsp (rsp_dict, op_ctx, + op_errstr); + break; + + case GD_OP_DEFRAG_BRICK_VOLUME: + glusterd_defrag_volume_node_rsp (req_dict, + rsp_dict, op_ctx); + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_heal_volume_brick_rsp (req_dict, rsp_dict, + op_ctx, op_errstr); + break; + default: + break; + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Should be used only when an operation is in progress, as that is the only + * time a lock_owner is set + */ +gf_boolean_t +is_origin_glusterd (dict_t *dict) +{ + gf_boolean_t ret = _gf_false; + uuid_t lock_owner = {0,}; + uuid_t *originator_uuid = NULL; + + GF_ASSERT (dict); + + ret = dict_get_bin (dict, "originator_uuid", + (void **) &originator_uuid); + if (ret) { + /* If not originator_uuid has been set, then the command + * has been originated from a glusterd running on older version + * Hence fetching the lock owner */ + ret = glusterd_get_lock_owner (&lock_owner); + if (ret) { + ret = _gf_false; + goto out; + } + ret = !uuid_compare (MY_UUID, lock_owner); + } else + ret = !uuid_compare (MY_UUID, *originator_uuid); + +out: + return ret; +} + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key) +{ + int ret = -1; + uuid_t task_id = {0,}; + char *uuid_str = NULL; + xlator_t *this = NULL; + + GF_ASSERT (dict); + + this = THIS; + GF_ASSERT (this); + + uuid_generate (task_id); + uuid_str = gf_strdup (uuid_utoa (task_id)); + if (!uuid_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s in dict", + key); + goto out; + } + gf_log (this->name, GF_LOG_INFO, "Generated task-id %s for key %s", + uuid_str, key); + +out: + if (ret) + GF_FREE (uuid_str); + return ret; +} + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key) +{ + int ret = -1; + char tmp_str[40] = {0,}; + char *task_id_str = NULL; + + GF_ASSERT (dict); + GF_ASSERT (key); + + uuid_unparse (uuid, tmp_str); + task_id_str = gf_strdup (tmp_str); + if (!task_id_str) + return -1; + + ret = dict_set_dynstr (dict, key, task_id_str); + if (ret) { + GF_FREE (task_id_str); + gf_log (THIS->name, GF_LOG_ERROR, + "Error setting uuid in dict with key %s", key); + } + + return 0; +} + +int +_update_volume_op_versions (dict_t *this, char *key, data_t *value, void *data) +{ + int op_version = 0; + glusterd_volinfo_t *ctx = NULL; + gf_boolean_t enabled = _gf_true; + int ret = -1; + + GF_ASSERT (data); + ctx = data; + + op_version = glusterd_get_op_version_for_key (key); + + if (gd_is_xlator_option (key) || gd_is_boolean_option (key)) { + ret = gf_string2boolean (value->data, &enabled); + if (ret) + return 0; + + if (!enabled) + return 0; + } + + if (op_version > ctx->op_version) + ctx->op_version = op_version; + + if (gd_is_client_option (key) && + (op_version > ctx->client_op_version)) + ctx->client_op_version = op_version; + + return 0; +} + +void +gd_update_volume_op_versions (glusterd_volinfo_t *volinfo) +{ + glusterd_conf_t *conf = NULL; + gf_boolean_t ob_enabled = _gf_false; + + GF_ASSERT (volinfo); + + conf = THIS->private; + GF_ASSERT (conf); + + /* Reset op-versions to minimum */ + volinfo->op_version = 1; + volinfo->client_op_version = 1; + + dict_foreach (volinfo->dict, _update_volume_op_versions, volinfo); + + /* Special case for open-behind + * If cluster op-version >= 2 and open-behind hasn't been explicitly + * disabled, volume op-versions must be updated to account for it + */ + + /* TODO: Remove once we have a general way to update automatically + * enabled features + */ + if (conf->op_version >= 2) { + ob_enabled = dict_get_str_boolean (volinfo->dict, + "performance.open-behind", + _gf_true); + if (ob_enabled) { + + if (volinfo->op_version < 2) + volinfo->op_version = 2; + if (volinfo->client_op_version < 2) + volinfo->client_op_version = 2; + } + } + + return; +} + +/* A task is committed/completed once the task-id for it is cleared */ +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo) +{ + GF_ASSERT (volinfo); + + if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) && + !uuid_is_null (volinfo->rebal.rebalance_id)) + return _gf_false; + + return _gf_true; +} + +gf_boolean_t +glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + struct list_head *peers, + char **down_peerstr) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + gf_boolean_t ret = _gf_false; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) + continue; + + /*Found peer who owns the brick, return false + * if peer is not connected or not friend */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + *down_peerstr = gf_strdup (peerinfo->hostname); + gf_log ("", GF_LOG_DEBUG, "Peer %s is down. ", + peerinfo->hostname); + goto out; + } + } + } + + ret = _gf_true; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +gf_boolean_t +glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict) +{ + int ret = -1; + uint32_t cmd = GF_CLI_STATUS_NONE; + gf_boolean_t is_status_tasks = _gf_false; + + if (op != GD_OP_STATUS_VOLUME) + goto out; + + ret = dict_get_uint32 (dict, "cmd", &cmd); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get opcode"); + goto out; + } + + if (cmd & GF_CLI_STATUS_TASKS) + is_status_tasks = _gf_true; + +out: + return is_status_tasks; +} + +int +glusterd_compare_snap_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_snap_t *snap1 = NULL; + glusterd_snap_t *snap2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snap1 = list_entry(list1, glusterd_snap_t, snap_list); + snap2 = list_entry(list2, glusterd_snap_t, snap_list); + diff_time = difftime(snap1->time_stamp, snap2->time_stamp); + + return ((int)diff_time); +} + +int +glusterd_compare_snap_vol_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_volinfo_t *snapvol1 = NULL; + glusterd_volinfo_t *snapvol2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snapvol1 = list_entry(list1, glusterd_volinfo_t, snapvol_list); + snapvol2 = list_entry(list2, glusterd_volinfo_t, snapvol_list); + diff_time = difftime(snapvol1->snapshot->time_stamp, + snapvol2->snapshot->time_stamp); + + return ((int)diff_time); +} + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) +{ + glusterd_missed_snap_info *new_missed_snapinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (missed_snapinfo); + + new_missed_snapinfo = GF_CALLOC (1, sizeof(*new_missed_snapinfo), + gf_gld_mt_missed_snapinfo_t); + + if (!new_missed_snapinfo) + goto out; + + new_missed_snapinfo->node_snap_info = NULL; + INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); + INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); + + *missed_snapinfo = new_missed_snapinfo; + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) +{ + glusterd_snap_op_t *new_snap_op = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap_op); + + new_snap_op = GF_CALLOC (1, sizeof(*new_snap_op), + gf_gld_mt_missed_snapinfo_t); + + if (!new_snap_op) + goto out; + + new_snap_op->brick_path = NULL; + new_snap_op->brick_num = -1; + new_snap_op->op = -1; + new_snap_op->status = -1; + INIT_LIST_HEAD (&new_snap_op->snap_ops_list); + + *snap_op = new_snap_op; + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} |
